[{"publication":"Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on","type":"conference","abstract":[{"lang":"eng","text":"The parametric Bayesian Feature Enhancement (BFE) and a datadriven Denoising Autoencoder (DA) both bring performance gains in severe single-channel speech recognition conditions. The first can be adjusted to different conditions by an appropriate parameter setting, while the latter needs to be trained on conditions similar to the ones expected at decoding time, making it vulnerable to a mismatch between training and test conditions. We use a DNN backend and study reverberant ASR under three types of mismatch conditions: different room reverberation times, different speaker to microphone distances and the difference between artificially reverberated data and the recordings in a reverberant environment. We show that for these mismatch conditions BFE can provide the targets for a DA. This unsupervised adaptation provides a performance gain over the direct use of BFE and even enables to compensate for the mismatch of real and simulated reverberant data."}],"status":"public","_id":"11813","department":[{"_id":"54"}],"user_id":"44006","keyword":["codecs","signal denoising","speech recognition","Bayesian feature enhancement","denoising autoencoder","reverberant ASR","single-channel speech recognition","speaker to microphone distances","unsupervised adaptation","Adaptation models","Noise reduction","Reverberation","Speech","Speech recognition","Training","deep neuronal networks","denoising autoencoder","feature enhancement","robust speech recognition"],"language":[{"iso":"eng"}],"year":"2015","page":"5053-5057","citation":{"chicago":"Heymann, Jahn, Reinhold Haeb-Umbach, P. Golik, and R. Schlueter. “Unsupervised Adaptation of a Denoising Autoencoder by Bayesian Feature Enhancement for Reverberant Asr under Mismatch Conditions.” In <i>Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference On</i>, 5053–57, 2015. <a href=\"https://doi.org/10.1109/ICASSP.2015.7178933\">https://doi.org/10.1109/ICASSP.2015.7178933</a>.","ieee":"J. Heymann, R. Haeb-Umbach, P. Golik, and R. Schlueter, “Unsupervised adaptation of a denoising autoencoder by Bayesian Feature Enhancement for reverberant asr under mismatch conditions,” in <i>Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on</i>, 2015, pp. 5053–5057.","ama":"Heymann J, Haeb-Umbach R, Golik P, Schlueter R. Unsupervised adaptation of a denoising autoencoder by Bayesian Feature Enhancement for reverberant asr under mismatch conditions. In: <i>Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference On</i>. ; 2015:5053-5057. doi:<a href=\"https://doi.org/10.1109/ICASSP.2015.7178933\">10.1109/ICASSP.2015.7178933</a>","apa":"Heymann, J., Haeb-Umbach, R., Golik, P., &#38; Schlueter, R. (2015). Unsupervised adaptation of a denoising autoencoder by Bayesian Feature Enhancement for reverberant asr under mismatch conditions. In <i>Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on</i> (pp. 5053–5057). <a href=\"https://doi.org/10.1109/ICASSP.2015.7178933\">https://doi.org/10.1109/ICASSP.2015.7178933</a>","short":"J. Heymann, R. Haeb-Umbach, P. Golik, R. Schlueter, in: Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference On, 2015, pp. 5053–5057.","bibtex":"@inproceedings{Heymann_Haeb-Umbach_Golik_Schlueter_2015, title={Unsupervised adaptation of a denoising autoencoder by Bayesian Feature Enhancement for reverberant asr under mismatch conditions}, DOI={<a href=\"https://doi.org/10.1109/ICASSP.2015.7178933\">10.1109/ICASSP.2015.7178933</a>}, booktitle={Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on}, author={Heymann, Jahn and Haeb-Umbach, Reinhold and Golik, P. and Schlueter, R.}, year={2015}, pages={5053–5057} }","mla":"Heymann, Jahn, et al. “Unsupervised Adaptation of a Denoising Autoencoder by Bayesian Feature Enhancement for Reverberant Asr under Mismatch Conditions.” <i>Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference On</i>, 2015, pp. 5053–57, doi:<a href=\"https://doi.org/10.1109/ICASSP.2015.7178933\">10.1109/ICASSP.2015.7178933</a>."},"oa":"1","date_updated":"2022-01-06T06:51:09Z","date_created":"2019-07-12T05:28:45Z","author":[{"full_name":"Heymann, Jahn","id":"9168","last_name":"Heymann","first_name":"Jahn"},{"last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold","first_name":"Reinhold"},{"last_name":"Golik","full_name":"Golik, P.","first_name":"P."},{"first_name":"R.","last_name":"Schlueter","full_name":"Schlueter, R."}],"title":"Unsupervised adaptation of a denoising autoencoder by Bayesian Feature Enhancement for reverberant asr under mismatch conditions","doi":"10.1109/ICASSP.2015.7178933","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2015/hey_icassp_2015.pdf","open_access":"1"}]},{"title":"The reverb challenge: a common evaluation framework for dereverberation and recognition of reverberant speech","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2013/Reverb2013.pdf","open_access":"1"}],"date_updated":"2022-01-06T06:51:11Z","oa":"1","author":[{"full_name":"Kinoshita, Keisuke","last_name":"Kinoshita","first_name":"Keisuke"},{"first_name":"Marc","last_name":"Delcroix","full_name":"Delcroix, Marc"},{"full_name":"Yoshioka, Takuya","last_name":"Yoshioka","first_name":"Takuya"},{"first_name":"Tomohiro","last_name":"Nakatani","full_name":"Nakatani, Tomohiro"},{"full_name":"Habets, Emanuel","last_name":"Habets","first_name":"Emanuel"},{"full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach","first_name":"Reinhold"},{"first_name":"Volker","last_name":"Leutnant","full_name":"Leutnant, Volker"},{"last_name":"Sehr","full_name":"Sehr, Armin","first_name":"Armin"},{"full_name":"Kellermann, Walter","last_name":"Kellermann","first_name":"Walter"},{"first_name":"Roland","last_name":"Maas","full_name":"Maas, Roland"},{"first_name":"Sharon","full_name":"Gannot, Sharon","last_name":"Gannot"},{"full_name":"Raj, Bhiksha","last_name":"Raj","first_name":"Bhiksha"}],"date_created":"2019-07-12T05:29:17Z","year":"2013","citation":{"ieee":"K. Kinoshita <i>et al.</i>, “The reverb challenge: a common evaluation framework for dereverberation and recognition of reverberant speech,” in <i> IEEE Workshop on Applications of Signal Processing to Audio and Acoustics </i>, 2013, pp. 22–23.","chicago":"Kinoshita, Keisuke, Marc Delcroix, Takuya Yoshioka, Tomohiro Nakatani, Emanuel Habets, Reinhold Haeb-Umbach, Volker Leutnant, et al. “The Reverb Challenge: A Common Evaluation Framework for Dereverberation and Recognition of Reverberant Speech.” In <i> IEEE Workshop on Applications of Signal Processing to Audio and Acoustics </i>, 22–23, 2013.","ama":"Kinoshita K, Delcroix M, Yoshioka T, et al. The reverb challenge: a common evaluation framework for dereverberation and recognition of reverberant speech. In: <i> IEEE Workshop on Applications of Signal Processing to Audio and Acoustics </i>. ; 2013:22-23.","apa":"Kinoshita, K., Delcroix, M., Yoshioka, T., Nakatani, T., Habets, E., Haeb-Umbach, R., … Raj, B. (2013). The reverb challenge: a common evaluation framework for dereverberation and recognition of reverberant speech. In <i> IEEE Workshop on Applications of Signal Processing to Audio and Acoustics </i> (pp. 22–23).","mla":"Kinoshita, Keisuke, et al. “The Reverb Challenge: A Common Evaluation Framework for Dereverberation and Recognition of Reverberant Speech.” <i> IEEE Workshop on Applications of Signal Processing to Audio and Acoustics </i>, 2013, pp. 22–23.","short":"K. Kinoshita, M. Delcroix, T. Yoshioka, T. Nakatani, E. Habets, R. Haeb-Umbach, V. Leutnant, A. Sehr, W. Kellermann, R. Maas, S. Gannot, B. Raj, in:  IEEE Workshop on Applications of Signal Processing to Audio and Acoustics , 2013, pp. 22–23.","bibtex":"@inproceedings{Kinoshita_Delcroix_Yoshioka_Nakatani_Habets_Haeb-Umbach_Leutnant_Sehr_Kellermann_Maas_et al._2013, title={The reverb challenge: a common evaluation framework for dereverberation and recognition of reverberant speech}, booktitle={ IEEE Workshop on Applications of Signal Processing to Audio and Acoustics }, author={Kinoshita, Keisuke and Delcroix, Marc and Yoshioka, Takuya and Nakatani, Tomohiro and Habets, Emanuel and Haeb-Umbach, Reinhold and Leutnant, Volker and Sehr, Armin and Kellermann, Walter and Maas, Roland and et al.}, year={2013}, pages={22–23} }"},"page":" 22-23 ","keyword":["Reverberant speech","dereverberation","ASR","evaluation","challenge"],"language":[{"iso":"eng"}],"_id":"11841","user_id":"44006","department":[{"_id":"54"}],"abstract":[{"lang":"eng","text":"Recently, substantial progress has been made in the field of reverberant speech signal processing, including both single- and multichannel de-reverberation techniques, and automatic speech recognition (ASR) techniques robust to reverberation. To evaluate state-of-the-art algorithms and obtain new insights regarding potential future research directions, we propose a common evaluation framework including datasets, tasks, and evaluation metrics for both speech enhancement and ASR techniques. The proposed framework will be used as a common basis for the REVERB (REverberant Voice Enhancement and Recognition Benchmark) challenge. This paper describes the rationale behind the challenge, and provides a detailed description of the evaluation framework and benchmark results."}],"status":"public","type":"conference","publication":" IEEE Workshop on Applications of Signal Processing to Audio and Acoustics "},{"date_created":"2019-07-12T05:29:23Z","title":"Model-Based Feature Enhancement for Reverberant Speech Recognition","issue":"7","year":"2010","language":[{"iso":"eng"}],"keyword":["ASR","AURORA5 database","automatic speech recognition","Bayesian inference","belief networks","CMLLR","computational complexity","constrained maximum likelihood linear regression","least mean squares methods","LMPSC computation","logarithmic Mel power spectrum","maximum likelihood estimation","Mel frequency cepstral coefficients","MFCC feature vectors","microphone signal","minimum mean square error estimation","model-based feature enhancement","regression analysis","reverberant speech recognition","reverberation","RIR energy","room impulse response","speech recognition","stochastic observation model","stochastic processes"],"publication":"IEEE Transactions on Audio, Speech, and Language Processing","abstract":[{"text":"In this paper, we present a new technique for automatic speech recognition (ASR) in reverberant environments. Our approach is aimed at the enhancement of the logarithmic Mel power spectrum, which is computed at an intermediate stage to obtain the widely used Mel frequency cepstral coefficients (MFCCs). Given the reverberant logarithmic Mel power spectral coefficients (LMPSCs), a minimum mean square error estimate of the clean LMPSCs is computed by carrying out Bayesian inference. We employ switching linear dynamical models as an a priori model for the dynamics of the clean LMPSCs. Further, we derive a stochastic observation model which relates the clean to the reverberant LMPSCs through a simplified model of the room impulse response (RIR). This model requires only two parameters, namely RIR energy and reverberation time, which can be estimated from the captured microphone signal. The performance of the proposed enhancement technique is studied on the AURORA5 database and compared to that of constrained maximum-likelihood linear regression (CMLLR). It is shown by experimental results that our approach significantly outperforms CMLLR and that up to 80\\% of the errors caused by the reverberation are recovered. In addition to the fact that the approach is compatible with the standard MFCC feature vectors, it leaves the ASR back-end unchanged. It is of moderate computational complexity and suitable for real time applications.","lang":"eng"}],"author":[{"last_name":"Krueger","full_name":"Krueger, Alexander","first_name":"Alexander"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242"}],"volume":18,"date_updated":"2022-01-06T06:51:11Z","oa":"1","main_file_link":[{"open_access":"1","url":"https://groups.uni-paderborn.de/nt/pubs/2010/KrHa10.pdf"}],"doi":"10.1109/TASL.2010.2049684","citation":{"ama":"Krueger A, Haeb-Umbach R. Model-Based Feature Enhancement for Reverberant Speech Recognition. <i>IEEE Transactions on Audio, Speech, and Language Processing</i>. 2010;18(7):1692-1707. doi:<a href=\"https://doi.org/10.1109/TASL.2010.2049684\">10.1109/TASL.2010.2049684</a>","chicago":"Krueger, Alexander, and Reinhold Haeb-Umbach. “Model-Based Feature Enhancement for Reverberant Speech Recognition.” <i>IEEE Transactions on Audio, Speech, and Language Processing</i> 18, no. 7 (2010): 1692–1707. <a href=\"https://doi.org/10.1109/TASL.2010.2049684\">https://doi.org/10.1109/TASL.2010.2049684</a>.","ieee":"A. Krueger and R. Haeb-Umbach, “Model-Based Feature Enhancement for Reverberant Speech Recognition,” <i>IEEE Transactions on Audio, Speech, and Language Processing</i>, vol. 18, no. 7, pp. 1692–1707, 2010.","bibtex":"@article{Krueger_Haeb-Umbach_2010, title={Model-Based Feature Enhancement for Reverberant Speech Recognition}, volume={18}, DOI={<a href=\"https://doi.org/10.1109/TASL.2010.2049684\">10.1109/TASL.2010.2049684</a>}, number={7}, journal={IEEE Transactions on Audio, Speech, and Language Processing}, author={Krueger, Alexander and Haeb-Umbach, Reinhold}, year={2010}, pages={1692–1707} }","short":"A. Krueger, R. Haeb-Umbach, IEEE Transactions on Audio, Speech, and Language Processing 18 (2010) 1692–1707.","mla":"Krueger, Alexander, and Reinhold Haeb-Umbach. “Model-Based Feature Enhancement for Reverberant Speech Recognition.” <i>IEEE Transactions on Audio, Speech, and Language Processing</i>, vol. 18, no. 7, 2010, pp. 1692–707, doi:<a href=\"https://doi.org/10.1109/TASL.2010.2049684\">10.1109/TASL.2010.2049684</a>.","apa":"Krueger, A., &#38; Haeb-Umbach, R. (2010). Model-Based Feature Enhancement for Reverberant Speech Recognition. <i>IEEE Transactions on Audio, Speech, and Language Processing</i>, <i>18</i>(7), 1692–1707. <a href=\"https://doi.org/10.1109/TASL.2010.2049684\">https://doi.org/10.1109/TASL.2010.2049684</a>"},"page":"1692-1707","intvolume":"        18","user_id":"44006","department":[{"_id":"54"}],"_id":"11846","type":"journal_article","status":"public"},{"issue":"5","citation":{"mla":"Ion, Valentin, and Reinhold Haeb-Umbach. “A Novel Uncertainty Decoding Rule With Applications to Transmission Error Robust Speech Recognition.” <i>IEEE Transactions on Audio, Speech, and Language Processing</i>, vol. 16, no. 5, 2008, pp. 1047–60, doi:<a href=\"https://doi.org/10.1109/TASL.2008.925879\">10.1109/TASL.2008.925879</a>.","short":"V. Ion, R. Haeb-Umbach, IEEE Transactions on Audio, Speech, and Language Processing 16 (2008) 1047–1060.","bibtex":"@article{Ion_Haeb-Umbach_2008, title={A Novel Uncertainty Decoding Rule With Applications to Transmission Error Robust Speech Recognition}, volume={16}, DOI={<a href=\"https://doi.org/10.1109/TASL.2008.925879\">10.1109/TASL.2008.925879</a>}, number={5}, journal={IEEE Transactions on Audio, Speech, and Language Processing}, author={Ion, Valentin and Haeb-Umbach, Reinhold}, year={2008}, pages={1047–1060} }","apa":"Ion, V., &#38; Haeb-Umbach, R. (2008). A Novel Uncertainty Decoding Rule With Applications to Transmission Error Robust Speech Recognition. <i>IEEE Transactions on Audio, Speech, and Language Processing</i>, <i>16</i>(5), 1047–1060. <a href=\"https://doi.org/10.1109/TASL.2008.925879\">https://doi.org/10.1109/TASL.2008.925879</a>","ama":"Ion V, Haeb-Umbach R. A Novel Uncertainty Decoding Rule With Applications to Transmission Error Robust Speech Recognition. <i>IEEE Transactions on Audio, Speech, and Language Processing</i>. 2008;16(5):1047-1060. doi:<a href=\"https://doi.org/10.1109/TASL.2008.925879\">10.1109/TASL.2008.925879</a>","chicago":"Ion, Valentin, and Reinhold Haeb-Umbach. “A Novel Uncertainty Decoding Rule With Applications to Transmission Error Robust Speech Recognition.” <i>IEEE Transactions on Audio, Speech, and Language Processing</i> 16, no. 5 (2008): 1047–60. <a href=\"https://doi.org/10.1109/TASL.2008.925879\">https://doi.org/10.1109/TASL.2008.925879</a>.","ieee":"V. Ion and R. Haeb-Umbach, “A Novel Uncertainty Decoding Rule With Applications to Transmission Error Robust Speech Recognition,” <i>IEEE Transactions on Audio, Speech, and Language Processing</i>, vol. 16, no. 5, pp. 1047–1060, 2008."},"intvolume":"        16","page":"1047-1060","year":"2008","date_created":"2019-07-12T05:28:53Z","author":[{"first_name":"Valentin","last_name":"Ion","full_name":"Ion, Valentin"},{"full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach","first_name":"Reinhold"}],"volume":16,"oa":"1","date_updated":"2022-01-06T06:51:10Z","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2008/IoHa08-1.pdf","open_access":"1"}],"doi":"10.1109/TASL.2008.925879","title":"A Novel Uncertainty Decoding Rule With Applications to Transmission Error Robust Speech Recognition","type":"journal_article","publication":"IEEE Transactions on Audio, Speech, and Language Processing","status":"public","abstract":[{"lang":"eng","text":"In this paper, we derive an uncertainty decoding rule for automatic speech recognition (ASR), which accounts for both corrupted observations and inter-frame correlation. The conditional independence assumption, prevalent in hidden Markov model-based ASR, is relaxed to obtain a clean speech posterior that is conditioned on the complete observed feature vector sequence. This is a more informative posterior than one conditioned only on the current observation. The novel decoding is used to obtain a transmission-error robust remote ASR system, where the speech capturing unit is connected to the decoder via an error-prone communication network. We show how the clean speech posterior can be computed for communication links being characterized by either bit errors or packet loss. Recognition results are presented for both distributed and network speech recognition, where in the latter case common voice-over-IP codecs are employed."}],"user_id":"44006","department":[{"_id":"54"}],"_id":"11820","language":[{"iso":"eng"}],"keyword":["automatic speech recognition","bit errors","codecs","communication links","corrupted observations","decoding","distributed speech recognition","error-prone communication network","feature vector sequence","hidden Markov model-based ASR","hidden Markov models","inter-frame correlation","Internet telephony","network speech recognition","packet loss","speech posterior","speech recognition","transmission error robust speech recognition","uncertainty decoding","voice-over-IP codecs"]}]
