@INPROCEEDINGS{1039Goldmann2006, AUTHOR = {Lutz Goldmann and Amjad Samour and Mustafa Karaman and Thomas Sikora}, TITLE = {Extracting High Level Semantics by Means of Speech, Audio, and Image Primitives in Surveillance Applications}, BOOKTITLE = {IEEE Int. Conf. on Image Processing (ICIP'06)}, YEAR = {2006}, MONTH = oct, PAGES = {2397--2400}, ADDRESS = {Atlanta, GA, USA}, NOTE = {invited paper, ISBN: 1-4244-1437-7 ISSN: 1522-4880}, PDF = {http://elvera.nue.tu-berlin.de/files/1039Goldmann2006.pdf}, DOI = {10.1109/ICIP.2006.312945}, URL = {http://elvera.nue.tu-berlin.de/files/1039Goldmann2006.pdf}, ABSTRACT = {Traditional surveillance systems are usually based on visual information only. With the emerging multimedia analysis techniques, interests are changing towards systems that incorporate multiple sensors and different modalities, which leads to new ways of analyzing this multimedia data and more sophisticated applications. This paper shortly reviews the ideas of traditional surveillance systems and explains actual research interests in this domain. Then, it focuses on the typical structure, goals, and applications of multimedia surveillance systems. These issues are supported by short descriptions of selected analysis steps of such a system currently under development. Some experimental results are given to illustrate the extracted semantics and to assess the performance of the individual steps.} }