@ARTICLE{0795Kim2004,
	AUTHOR = {Hyoung-Gook Kim and Nicolas Moreau and Thomas Sikora},
	TITLE = {Audio Classification Based on MPEG-7 Spectral Basis Representations},
	JOURNAL = {IEEE Transactions on Circuits and Systems for Video Technology 7, Special Issue on Audio and Video Analysis for Multimedia Interactive Services},
	YEAR = {2004},
	MONTH = may,
	PAGES = {16--725},
	VOLUME = {14},
	NUMBER = {5},
	PDF = {http://elvera.nue.tu-berlin.de/files/0795Kim2004.pdf},
	ABSTRACT = {classification and retrieval technique targeted for analysis of film material. The technique consists of low-level descriptors and high-level description schemes. For low-level descriptors, low-dimensional features such as audio spectrum projection based on audio spectrum basis descriptors is produced in order to find a balanced tradeoff between reducing dimensionality and retaining maximum information content. High-level description schemes are used to describe the modeling of reduced-dimension features, the procedure of audio classification, and retrieval. A  classifier based on continuous hidden Markov models is applied. The sound model state path, which is selected according to the maximum-likelihood model, is stored in an MPEG-7 sound database and used as an index for query applications. Various experiments are presented where the speaker- and sound-recognition rates are compared for different feature extraction methods. Using independent ccomponent analysis, we achieved better results than normalized audio spectrum envelope and principal component analysis in a speaker recognition system. In audio classification experiments, audio sounds are classified into selected sound classes in real time with an accuracy of 96%.}
}