@INPROCEEDINGS{1387Kelm2012, AUTHOR = {Pascal Kelm and Sebastian Schmiedeke and Thomas Sikora}, TITLE = {How Spatial Segmentation improves the Multimodal Geo-Tagging}, BOOKTITLE = {Working Notes Proceedings of the MediaEval 2012 Workshop}, YEAR = {2012}, MONTH = oct, EDITOR = {Martha A. Larson, Sebastian Schmiedeke, Pascal Kelm, Adam Rae, Vasileios Mezaris, Tomas Piatrik, Mohammad Soleymani, Florian Metze, Gareth J.F. Jones}, PUBLISHER = {CEUR-WS.org}, PAGES = {9--10}, ADDRESS = {Santa Croce in Fossabanda Piazza Santa Croce, 5 - 56125 - Pisa - Toscana - Italia}, NOTE = {ISSN 1613-0073}, PDF = {http://elvera.nue.tu-berlin.de/files/1387Kelm2012.pdf}, URL = {http://elvera.nue.tu-berlin.de/files/1387Kelm2012.pdf}, ABSTRACT = {In this paper we present a hierarchical, multi-modal ap- proach in combination with different granularity levels for the Placing Task at the MediaEval benchmark 2012. Our approach makes use of external resources like gazetteers to extract toponyms in the metadata and of visual and textual features to identify similar content. First, the bounderies detection recognizes the country and its dimension to speed up the estimation and to eliminate geographical ambiguity. Next, we prepared a training database to group them to- gether into geographical regions and to build a hierarchical model. The fusion of visual and textual methods for differ- ent granularities is used to classify the videos’ location into possible regions. At the end the Flickr videos are tagged with the geo-information of the most similar training image within the regions that is previously filtered by the proba- bilistic model for each test video.} }