[{"keyword":["computational complexity","reverberation","speech recognition","automatic speech recognition","background noise","clean speech","computational complexity","energy compensation","logarithmic mel power spectral domain","mel frequency cepstral coefficients","microphone input signals","model-based feature compensation schemes","noisy reverberant speech automatic recognition","noisy reverberant speech features","reverberation","Atmospheric modeling","Computational modeling","Noise","Noise measurement","Reverberation","Speech","Vectors","Model-based feature compensation","observation model for reverberant and noisy speech","recursive observation model","robust automatic speech recognition"],"language":[{"iso":"eng"}],"_id":"11861","user_id":"44006","department":[{"_id":"54"}],"abstract":[{"lang":"eng","text":"In this contribution we present a theoretical and experimental investigation into the effects of reverberation and noise on features in the logarithmic mel power spectral domain, an intermediate stage in the computation of the mel frequency cepstral coefficients, prevalent in automatic speech recognition (ASR). Gaining insight into the complex interaction between clean speech, noise, and noisy reverberant speech features is essential for any ASR system to be robust against noise and reverberation present in distant microphone input signals. The findings are gathered in a probabilistic formulation of an observation model which may be used in model-based feature compensation schemes. The proposed observation model extends previous models in three major directions: First, the contribution of additive background noise to the observation error is explicitly taken into account. Second, an energy compensation constant is introduced which ensures an unbiased estimate of the reverberant speech features, and, third, a recursive variant of the observation model is developed resulting in reduced computational complexity when used in model-based feature compensation. The experimental section is used to evaluate the accuracy of the model and to describe how its parameters can be determined from test data."}],"status":"public","type":"journal_article","publication":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","title":"A New Observation Model in the Logarithmic Mel Power Spectral Domain for the Automatic Recognition of Noisy Reverberant Speech","doi":"10.1109/TASLP.2013.2285480","date_updated":"2022-01-06T06:51:11Z","date_created":"2019-07-12T05:29:41Z","author":[{"last_name":"Leutnant","full_name":"Leutnant, Volker","first_name":"Volker"},{"first_name":"Alexander","last_name":"Krueger","full_name":"Krueger, Alexander"},{"id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach","first_name":"Reinhold"}],"volume":22,"year":"2014","citation":{"chicago":"Leutnant, Volker, Alexander Krueger, and Reinhold Haeb-Umbach. “A New Observation Model in the Logarithmic Mel Power Spectral Domain for the Automatic Recognition of Noisy Reverberant Speech.” <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i> 22, no. 1 (2014): 95–109. <a href=\"https://doi.org/10.1109/TASLP.2013.2285480\">https://doi.org/10.1109/TASLP.2013.2285480</a>.","ieee":"V. Leutnant, A. Krueger, and R. Haeb-Umbach, “A New Observation Model in the Logarithmic Mel Power Spectral Domain for the Automatic Recognition of Noisy Reverberant Speech,” <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i>, vol. 22, no. 1, pp. 95–109, 2014.","ama":"Leutnant V, Krueger A, Haeb-Umbach R. A New Observation Model in the Logarithmic Mel Power Spectral Domain for the Automatic Recognition of Noisy Reverberant Speech. <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i>. 2014;22(1):95-109. doi:<a href=\"https://doi.org/10.1109/TASLP.2013.2285480\">10.1109/TASLP.2013.2285480</a>","apa":"Leutnant, V., Krueger, A., &#38; Haeb-Umbach, R. (2014). A New Observation Model in the Logarithmic Mel Power Spectral Domain for the Automatic Recognition of Noisy Reverberant Speech. <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i>, <i>22</i>(1), 95–109. <a href=\"https://doi.org/10.1109/TASLP.2013.2285480\">https://doi.org/10.1109/TASLP.2013.2285480</a>","mla":"Leutnant, Volker, et al. “A New Observation Model in the Logarithmic Mel Power Spectral Domain for the Automatic Recognition of Noisy Reverberant Speech.” <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i>, vol. 22, no. 1, 2014, pp. 95–109, doi:<a href=\"https://doi.org/10.1109/TASLP.2013.2285480\">10.1109/TASLP.2013.2285480</a>.","bibtex":"@article{Leutnant_Krueger_Haeb-Umbach_2014, title={A New Observation Model in the Logarithmic Mel Power Spectral Domain for the Automatic Recognition of Noisy Reverberant Speech}, volume={22}, DOI={<a href=\"https://doi.org/10.1109/TASLP.2013.2285480\">10.1109/TASLP.2013.2285480</a>}, number={1}, journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing}, author={Leutnant, Volker and Krueger, Alexander and Haeb-Umbach, Reinhold}, year={2014}, pages={95–109} }","short":"V. Leutnant, A. Krueger, R. Haeb-Umbach, IEEE/ACM Transactions on Audio, Speech, and Language Processing 22 (2014) 95–109."},"page":"95-109","intvolume":"        22","publication_identifier":{"issn":["2329-9290"]},"issue":"1"},{"oa":"1","date_updated":"2022-01-06T06:51:08Z","date_created":"2019-07-12T05:27:20Z","author":[{"full_name":"Chinaev, Aleksej","last_name":"Chinaev","first_name":"Aleksej"},{"full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach","first_name":"Reinhold"}],"title":"MAP-based Estimation of the Parameters of a Gaussian Mixture Model in the Presence of Noisy Observations","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2013/ChHa13.pdf","open_access":"1"}],"doi":"10.1109/ICASSP.2013.6638279","publication_identifier":{"issn":["1520-6149"]},"related_material":{"link":[{"relation":"supplementary_material","description":"Poster","url":"https://groups.uni-paderborn.de/nt/pubs/2013/ChHa13_Poster.pdf"}]},"year":"2013","citation":{"ama":"Chinaev A, Haeb-Umbach R. MAP-based Estimation of the Parameters of a Gaussian Mixture Model in the Presence of Noisy Observations. In: <i>38th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013)</i>. ; 2013:3352-3356. doi:<a href=\"https://doi.org/10.1109/ICASSP.2013.6638279\">10.1109/ICASSP.2013.6638279</a>","chicago":"Chinaev, Aleksej, and Reinhold Haeb-Umbach. “MAP-Based Estimation of the Parameters of a Gaussian Mixture Model in the Presence of Noisy Observations.” In <i>38th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013)</i>, 3352–56, 2013. <a href=\"https://doi.org/10.1109/ICASSP.2013.6638279\">https://doi.org/10.1109/ICASSP.2013.6638279</a>.","ieee":"A. Chinaev and R. Haeb-Umbach, “MAP-based Estimation of the Parameters of a Gaussian Mixture Model in the Presence of Noisy Observations,” in <i>38th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013)</i>, 2013, pp. 3352–3356.","bibtex":"@inproceedings{Chinaev_Haeb-Umbach_2013, title={MAP-based Estimation of the Parameters of a Gaussian Mixture Model in the Presence of Noisy Observations}, DOI={<a href=\"https://doi.org/10.1109/ICASSP.2013.6638279\">10.1109/ICASSP.2013.6638279</a>}, booktitle={38th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013)}, author={Chinaev, Aleksej and Haeb-Umbach, Reinhold}, year={2013}, pages={3352–3356} }","short":"A. Chinaev, R. Haeb-Umbach, in: 38th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013), 2013, pp. 3352–3356.","mla":"Chinaev, Aleksej, and Reinhold Haeb-Umbach. “MAP-Based Estimation of the Parameters of a Gaussian Mixture Model in the Presence of Noisy Observations.” <i>38th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013)</i>, 2013, pp. 3352–56, doi:<a href=\"https://doi.org/10.1109/ICASSP.2013.6638279\">10.1109/ICASSP.2013.6638279</a>.","apa":"Chinaev, A., &#38; Haeb-Umbach, R. (2013). MAP-based Estimation of the Parameters of a Gaussian Mixture Model in the Presence of Noisy Observations. In <i>38th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013)</i> (pp. 3352–3356). <a href=\"https://doi.org/10.1109/ICASSP.2013.6638279\">https://doi.org/10.1109/ICASSP.2013.6638279</a>"},"page":"3352-3356","_id":"11740","user_id":"44006","department":[{"_id":"54"}],"keyword":["Gaussian noise","maximum likelihood estimation","parameter estimation","GMM parameter","Gaussian mixture model","MAP estimation","Map-based estimation","maximum a-posteriori estimation","maximum likelihood technique","noisy observation","sequential estimation framework","white Gaussian noise","Additive noise","Gaussian mixture model","Maximum likelihood estimation","Noise measurement","Gaussian mixture model","Maximum a posteriori estimation","Maximum likelihood estimation"],"language":[{"iso":"eng"}],"type":"conference","publication":"38th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013)","abstract":[{"text":"In this contribution we derive the Maximum A-Posteriori (MAP) estimates of the parameters of a Gaussian Mixture Model (GMM) in the presence of noisy observations. We assume the distortion to be white Gaussian noise of known mean and variance. An approximate conjugate prior of the GMM parameters is derived allowing for a computationally efficient implementation in a sequential estimation framework. Simulations on artificially generated data demonstrate the superiority of the proposed method compared to the Maximum Likelihood technique and to the ordinary MAP approach, whose estimates are corrected by the known statistics of the distortion in a straightforward manner.","lang":"eng"}],"status":"public"}]