[{"doi":"10.1109/ICASSP.2015.7178933","main_file_link":[{"open_access":"1","url":"https://groups.uni-paderborn.de/nt/pubs/2015/hey_icassp_2015.pdf"}],"title":"Unsupervised adaptation of a denoising autoencoder by Bayesian Feature Enhancement for reverberant asr under mismatch conditions","date_created":"2019-07-12T05:28:45Z","author":[{"first_name":"Jahn","id":"9168","full_name":"Heymann, Jahn","last_name":"Heymann"},{"id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach","first_name":"Reinhold"},{"first_name":"P.","full_name":"Golik, P.","last_name":"Golik"},{"first_name":"R.","full_name":"Schlueter, R.","last_name":"Schlueter"}],"date_updated":"2022-01-06T06:51:09Z","oa":"1","page":"5053-5057","citation":{"apa":"Heymann, J., Haeb-Umbach, R., Golik, P., &#38; Schlueter, R. (2015). Unsupervised adaptation of a denoising autoencoder by Bayesian Feature Enhancement for reverberant asr under mismatch conditions. In <i>Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on</i> (pp. 5053–5057). <a href=\"https://doi.org/10.1109/ICASSP.2015.7178933\">https://doi.org/10.1109/ICASSP.2015.7178933</a>","mla":"Heymann, Jahn, et al. “Unsupervised Adaptation of a Denoising Autoencoder by Bayesian Feature Enhancement for Reverberant Asr under Mismatch Conditions.” <i>Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference On</i>, 2015, pp. 5053–57, doi:<a href=\"https://doi.org/10.1109/ICASSP.2015.7178933\">10.1109/ICASSP.2015.7178933</a>.","bibtex":"@inproceedings{Heymann_Haeb-Umbach_Golik_Schlueter_2015, title={Unsupervised adaptation of a denoising autoencoder by Bayesian Feature Enhancement for reverberant asr under mismatch conditions}, DOI={<a href=\"https://doi.org/10.1109/ICASSP.2015.7178933\">10.1109/ICASSP.2015.7178933</a>}, booktitle={Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on}, author={Heymann, Jahn and Haeb-Umbach, Reinhold and Golik, P. and Schlueter, R.}, year={2015}, pages={5053–5057} }","short":"J. Heymann, R. Haeb-Umbach, P. Golik, R. Schlueter, in: Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference On, 2015, pp. 5053–5057.","chicago":"Heymann, Jahn, Reinhold Haeb-Umbach, P. Golik, and R. Schlueter. “Unsupervised Adaptation of a Denoising Autoencoder by Bayesian Feature Enhancement for Reverberant Asr under Mismatch Conditions.” In <i>Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference On</i>, 5053–57, 2015. <a href=\"https://doi.org/10.1109/ICASSP.2015.7178933\">https://doi.org/10.1109/ICASSP.2015.7178933</a>.","ieee":"J. Heymann, R. Haeb-Umbach, P. Golik, and R. Schlueter, “Unsupervised adaptation of a denoising autoencoder by Bayesian Feature Enhancement for reverberant asr under mismatch conditions,” in <i>Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on</i>, 2015, pp. 5053–5057.","ama":"Heymann J, Haeb-Umbach R, Golik P, Schlueter R. Unsupervised adaptation of a denoising autoencoder by Bayesian Feature Enhancement for reverberant asr under mismatch conditions. In: <i>Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference On</i>. ; 2015:5053-5057. doi:<a href=\"https://doi.org/10.1109/ICASSP.2015.7178933\">10.1109/ICASSP.2015.7178933</a>"},"year":"2015","language":[{"iso":"eng"}],"keyword":["codecs","signal denoising","speech recognition","Bayesian feature enhancement","denoising autoencoder","reverberant ASR","single-channel speech recognition","speaker to microphone distances","unsupervised adaptation","Adaptation models","Noise reduction","Reverberation","Speech","Speech recognition","Training","deep neuronal networks","denoising autoencoder","feature enhancement","robust speech recognition"],"department":[{"_id":"54"}],"user_id":"44006","_id":"11813","status":"public","abstract":[{"text":"The parametric Bayesian Feature Enhancement (BFE) and a datadriven Denoising Autoencoder (DA) both bring performance gains in severe single-channel speech recognition conditions. The first can be adjusted to different conditions by an appropriate parameter setting, while the latter needs to be trained on conditions similar to the ones expected at decoding time, making it vulnerable to a mismatch between training and test conditions. We use a DNN backend and study reverberant ASR under three types of mismatch conditions: different room reverberation times, different speaker to microphone distances and the difference between artificially reverberated data and the recordings in a reverberant environment. We show that for these mismatch conditions BFE can provide the targets for a DA. This unsupervised adaptation provides a performance gain over the direct use of BFE and even enables to compensate for the mismatch of real and simulated reverberant data.","lang":"eng"}],"publication":"Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on","type":"conference"},{"language":[{"iso":"eng"}],"user_id":"44006","department":[{"_id":"54"}],"_id":"11814","status":"public","abstract":[{"lang":"eng","text":" \"In this paper we present an algorithm for the unsupervised segmentation of a lattice produced by a phoneme recognizer into words. Using a lattice rather than a single phoneme string accounts for the uncertainty of the recognizer about the true label sequence. An example application is the discovery of lexical units from the output of an error-prone phoneme recognizer in a zero-resource setting, where neither the lexicon nor the language model (LM) is known. We propose a computationally efficient iterative approach, which alternates between the following two steps: First, the most probable string is extracted from the lattice using a phoneme LM learned on the segmentation result of the previous iteration. Second, word segmentation is performed on the extracted string using a word and phoneme LM which is learned alongside the new segmentation. We present results on lattices produced by a phoneme recognizer on the WSJCAM0 dataset. We show that our approach delivers superior segmentation performance than an earlier approach found in the literature, in particular for higher-order language models. \" "}],"type":"conference","publication":"39th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2014)","main_file_link":[{"open_access":"1","url":"https://groups.uni-paderborn.de/nt/pubs/2014/HeWaHa2014.pdf"}],"title":"Iterative Bayesian Word Segmentation for Unspuervised Vocabulary Discovery from Phoneme Lattices","author":[{"first_name":"Jahn","last_name":"Heymann","full_name":"Heymann, Jahn","id":"9168"},{"last_name":"Walter","full_name":"Walter, Oliver","first_name":"Oliver"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242"},{"last_name":"Raj","full_name":"Raj, Bhiksha","first_name":"Bhiksha"}],"date_created":"2019-07-12T05:28:46Z","oa":"1","date_updated":"2022-01-06T06:51:09Z","citation":{"ieee":"J. Heymann, O. Walter, R. Haeb-Umbach, and B. Raj, “Iterative Bayesian Word Segmentation for Unspuervised Vocabulary Discovery from Phoneme Lattices,” in <i>39th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2014)</i>, 2014.","chicago":"Heymann, Jahn, Oliver Walter, Reinhold Haeb-Umbach, and Bhiksha Raj. “Iterative Bayesian Word Segmentation for Unspuervised Vocabulary Discovery from Phoneme Lattices.” In <i>39th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2014)</i>, 2014.","ama":"Heymann J, Walter O, Haeb-Umbach R, Raj B. Iterative Bayesian Word Segmentation for Unspuervised Vocabulary Discovery from Phoneme Lattices. In: <i>39th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2014)</i>. ; 2014.","apa":"Heymann, J., Walter, O., Haeb-Umbach, R., &#38; Raj, B. (2014). Iterative Bayesian Word Segmentation for Unspuervised Vocabulary Discovery from Phoneme Lattices. In <i>39th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2014)</i>.","mla":"Heymann, Jahn, et al. “Iterative Bayesian Word Segmentation for Unspuervised Vocabulary Discovery from Phoneme Lattices.” <i>39th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2014)</i>, 2014.","short":"J. Heymann, O. Walter, R. Haeb-Umbach, B. Raj, in: 39th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2014), 2014.","bibtex":"@inproceedings{Heymann_Walter_Haeb-Umbach_Raj_2014, title={Iterative Bayesian Word Segmentation for Unspuervised Vocabulary Discovery from Phoneme Lattices}, booktitle={39th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2014)}, author={Heymann, Jahn and Walter, Oliver and Haeb-Umbach, Reinhold and Raj, Bhiksha}, year={2014} }"},"year":"2014","related_material":{"link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2014/HeWaHa2014_Poster.pdf","relation":"supplementary_material","description":"Poster"}]}},{"date_created":"2019-07-12T05:28:47Z","author":[{"id":"9168","full_name":"Heymann, Jahn","last_name":"Heymann","first_name":"Jahn"},{"first_name":"Oliver","full_name":"Walter, Oliver","last_name":"Walter"},{"first_name":"Reinhold","id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach"},{"first_name":"Bhiksha","full_name":"Raj, Bhiksha","last_name":"Raj"}],"oa":"1","date_updated":"2022-01-06T06:51:09Z","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2013/HeWaHaRa13.pdf","open_access":"1"}],"title":"Unsupervised Word Segmentation from Noisy Input","related_material":{"link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2013/HeWaHaRa_Poster.pdf","relation":"supplementary_material","description":"Poster"}]},"citation":{"bibtex":"@inproceedings{Heymann_Walter_Haeb-Umbach_Raj_2013, title={Unsupervised Word Segmentation from Noisy Input}, booktitle={Automatic Speech Recognition and Understanding Workshop (ASRU 2013)}, author={Heymann, Jahn and Walter, Oliver and Haeb-Umbach, Reinhold and Raj, Bhiksha}, year={2013} }","mla":"Heymann, Jahn, et al. “Unsupervised Word Segmentation from Noisy Input.” <i>Automatic Speech Recognition and Understanding Workshop (ASRU 2013)</i>, 2013.","short":"J. Heymann, O. Walter, R. Haeb-Umbach, B. Raj, in: Automatic Speech Recognition and Understanding Workshop (ASRU 2013), 2013.","apa":"Heymann, J., Walter, O., Haeb-Umbach, R., &#38; Raj, B. (2013). Unsupervised Word Segmentation from Noisy Input. In <i>Automatic Speech Recognition and Understanding Workshop (ASRU 2013)</i>.","ama":"Heymann J, Walter O, Haeb-Umbach R, Raj B. Unsupervised Word Segmentation from Noisy Input. In: <i>Automatic Speech Recognition and Understanding Workshop (ASRU 2013)</i>. ; 2013.","chicago":"Heymann, Jahn, Oliver Walter, Reinhold Haeb-Umbach, and Bhiksha Raj. “Unsupervised Word Segmentation from Noisy Input.” In <i>Automatic Speech Recognition and Understanding Workshop (ASRU 2013)</i>, 2013.","ieee":"J. Heymann, O. Walter, R. Haeb-Umbach, and B. Raj, “Unsupervised Word Segmentation from Noisy Input,” in <i>Automatic Speech Recognition and Understanding Workshop (ASRU 2013)</i>, 2013."},"year":"2013","department":[{"_id":"54"}],"user_id":"44006","_id":"11815","language":[{"iso":"eng"}],"publication":"Automatic Speech Recognition and Understanding Workshop (ASRU 2013)","type":"conference","status":"public"}]