@INPROCEEDINGS{0781Kim2004,
	AUTHOR = {Hyoung-Gook Kim and Thomas Sikora},
	TITLE = {Comparison of MPEG-7 Audio Spectrum Projection Features and MFCC applied to Speaker Recognition, Sound Classification and Audio Segmentation},
	BOOKTITLE = {ICASSP 2004},
	YEAR = {2004},
	MONTH = may,
	ORGANIZATION = {IEEE},
	ADDRESS = {Montreal, Canada},
	PDF = {http://elvera.nue.tu-berlin.de/files/0781Kim2004.pdf},
	ABSTRACT = {Our purpose is to evaluate the MPEG-7 Audio Spectrum Projection (ASP) features for general sound recognition performance vs. well established MFCC. The recognition tasks of interest are speaker recognition, sound classification, and segmentation of audio using sound/speaker identification. For the sound classification we use three approaches: the direct approach, the hierarchical approach without hints, and the hierarchical approach with hints. For audio segmentation the MPEG-7 ASP features and MFCCs are used to train hidden Markov models (HMM) for individual speakers and sounds. The trained sound/speaker models are then used to segment conversational speech involving a given subset of people in panel discussion television programs. Results show that MFCC approach yields sound/speaker recognition rate superior to MPEG-7 implementations.}
}