[{"type":"conference","publication":"Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on","status":"public","abstract":[{"lang":"eng","text":"The parametric Bayesian Feature Enhancement (BFE) and a datadriven Denoising Autoencoder (DA) both bring performance gains in severe single-channel speech recognition conditions. The first can be adjusted to different conditions by an appropriate parameter setting, while the latter needs to be trained on conditions similar to the ones expected at decoding time, making it vulnerable to a mismatch between training and test conditions. We use a DNN backend and study reverberant ASR under three types of mismatch conditions: different room reverberation times, different speaker to microphone distances and the difference between artificially reverberated data and the recordings in a reverberant environment. We show that for these mismatch conditions BFE can provide the targets for a DA. This unsupervised adaptation provides a performance gain over the direct use of BFE and even enables to compensate for the mismatch of real and simulated reverberant data."}],"user_id":"44006","department":[{"_id":"54"}],"_id":"11813","language":[{"iso":"eng"}],"keyword":["codecs","signal denoising","speech recognition","Bayesian feature enhancement","denoising autoencoder","reverberant ASR","single-channel speech recognition","speaker to microphone distances","unsupervised adaptation","Adaptation models","Noise reduction","Reverberation","Speech","Speech recognition","Training","deep neuronal networks","denoising autoencoder","feature enhancement","robust speech recognition"],"citation":{"mla":"Heymann, Jahn, et al. “Unsupervised Adaptation of a Denoising Autoencoder by Bayesian Feature Enhancement for Reverberant Asr under Mismatch Conditions.” <i>Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference On</i>, 2015, pp. 5053–57, doi:<a href=\"https://doi.org/10.1109/ICASSP.2015.7178933\">10.1109/ICASSP.2015.7178933</a>.","bibtex":"@inproceedings{Heymann_Haeb-Umbach_Golik_Schlueter_2015, title={Unsupervised adaptation of a denoising autoencoder by Bayesian Feature Enhancement for reverberant asr under mismatch conditions}, DOI={<a href=\"https://doi.org/10.1109/ICASSP.2015.7178933\">10.1109/ICASSP.2015.7178933</a>}, booktitle={Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on}, author={Heymann, Jahn and Haeb-Umbach, Reinhold and Golik, P. and Schlueter, R.}, year={2015}, pages={5053–5057} }","short":"J. Heymann, R. Haeb-Umbach, P. Golik, R. Schlueter, in: Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference On, 2015, pp. 5053–5057.","apa":"Heymann, J., Haeb-Umbach, R., Golik, P., &#38; Schlueter, R. (2015). Unsupervised adaptation of a denoising autoencoder by Bayesian Feature Enhancement for reverberant asr under mismatch conditions. In <i>Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on</i> (pp. 5053–5057). <a href=\"https://doi.org/10.1109/ICASSP.2015.7178933\">https://doi.org/10.1109/ICASSP.2015.7178933</a>","ieee":"J. Heymann, R. Haeb-Umbach, P. Golik, and R. Schlueter, “Unsupervised adaptation of a denoising autoencoder by Bayesian Feature Enhancement for reverberant asr under mismatch conditions,” in <i>Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on</i>, 2015, pp. 5053–5057.","chicago":"Heymann, Jahn, Reinhold Haeb-Umbach, P. Golik, and R. Schlueter. “Unsupervised Adaptation of a Denoising Autoencoder by Bayesian Feature Enhancement for Reverberant Asr under Mismatch Conditions.” In <i>Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference On</i>, 5053–57, 2015. <a href=\"https://doi.org/10.1109/ICASSP.2015.7178933\">https://doi.org/10.1109/ICASSP.2015.7178933</a>.","ama":"Heymann J, Haeb-Umbach R, Golik P, Schlueter R. Unsupervised adaptation of a denoising autoencoder by Bayesian Feature Enhancement for reverberant asr under mismatch conditions. In: <i>Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference On</i>. ; 2015:5053-5057. doi:<a href=\"https://doi.org/10.1109/ICASSP.2015.7178933\">10.1109/ICASSP.2015.7178933</a>"},"page":"5053-5057","year":"2015","date_created":"2019-07-12T05:28:45Z","author":[{"first_name":"Jahn","last_name":"Heymann","id":"9168","full_name":"Heymann, Jahn"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold"},{"full_name":"Golik, P.","last_name":"Golik","first_name":"P."},{"last_name":"Schlueter","full_name":"Schlueter, R.","first_name":"R."}],"date_updated":"2022-01-06T06:51:09Z","oa":"1","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2015/hey_icassp_2015.pdf","open_access":"1"}],"doi":"10.1109/ICASSP.2015.7178933","title":"Unsupervised adaptation of a denoising autoencoder by Bayesian Feature Enhancement for reverberant asr under mismatch conditions"},{"doi":"10.1109/TASL.2013.2258013","title":"Bayesian Feature Enhancement for Reverberation and Noise Robust Speech Recognition","date_created":"2019-07-12T05:29:42Z","author":[{"full_name":"Leutnant, Volker","last_name":"Leutnant","first_name":"Volker"},{"last_name":"Krueger","full_name":"Krueger, Alexander","first_name":"Alexander"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold"}],"volume":21,"date_updated":"2022-01-06T06:51:11Z","citation":{"chicago":"Leutnant, Volker, Alexander Krueger, and Reinhold Haeb-Umbach. “Bayesian Feature Enhancement for Reverberation and Noise Robust Speech Recognition.” <i>IEEE Transactions on Audio, Speech, and Language Processing</i> 21, no. 8 (2013): 1640–52. <a href=\"https://doi.org/10.1109/TASL.2013.2258013\">https://doi.org/10.1109/TASL.2013.2258013</a>.","ieee":"V. Leutnant, A. Krueger, and R. Haeb-Umbach, “Bayesian Feature Enhancement for Reverberation and Noise Robust Speech Recognition,” <i>IEEE Transactions on Audio, Speech, and Language Processing</i>, vol. 21, no. 8, pp. 1640–1652, 2013.","ama":"Leutnant V, Krueger A, Haeb-Umbach R. Bayesian Feature Enhancement for Reverberation and Noise Robust Speech Recognition. <i>IEEE Transactions on Audio, Speech, and Language Processing</i>. 2013;21(8):1640-1652. doi:<a href=\"https://doi.org/10.1109/TASL.2013.2258013\">10.1109/TASL.2013.2258013</a>","apa":"Leutnant, V., Krueger, A., &#38; Haeb-Umbach, R. (2013). Bayesian Feature Enhancement for Reverberation and Noise Robust Speech Recognition. <i>IEEE Transactions on Audio, Speech, and Language Processing</i>, <i>21</i>(8), 1640–1652. <a href=\"https://doi.org/10.1109/TASL.2013.2258013\">https://doi.org/10.1109/TASL.2013.2258013</a>","mla":"Leutnant, Volker, et al. “Bayesian Feature Enhancement for Reverberation and Noise Robust Speech Recognition.” <i>IEEE Transactions on Audio, Speech, and Language Processing</i>, vol. 21, no. 8, 2013, pp. 1640–52, doi:<a href=\"https://doi.org/10.1109/TASL.2013.2258013\">10.1109/TASL.2013.2258013</a>.","bibtex":"@article{Leutnant_Krueger_Haeb-Umbach_2013, title={Bayesian Feature Enhancement for Reverberation and Noise Robust Speech Recognition}, volume={21}, DOI={<a href=\"https://doi.org/10.1109/TASL.2013.2258013\">10.1109/TASL.2013.2258013</a>}, number={8}, journal={IEEE Transactions on Audio, Speech, and Language Processing}, author={Leutnant, Volker and Krueger, Alexander and Haeb-Umbach, Reinhold}, year={2013}, pages={1640–1652} }","short":"V. Leutnant, A. Krueger, R. Haeb-Umbach, IEEE Transactions on Audio, Speech, and Language Processing 21 (2013) 1640–1652."},"page":"1640-1652","intvolume":"        21","year":"2013","issue":"8","language":[{"iso":"eng"}],"keyword":["Bayes methods","compensation","error statistics","reverberation","speech recognition","Bayesian feature enhancement","background noise","clean speech feature vectors","compensation","connected digits recognition task","error statistics","memory requirements","noisy reverberant data","posteriori probability density function","recursive formulation","reverberant logarithmic mel power spectral coefficients","robust automatic speech recognition","signal-to-noise ratios","time-variant observation","word error rate reduction","Robust automatic speech recognition","model-based Bayesian feature enhancement","observation model for reverberant and noisy speech","recursive observation model"],"user_id":"44006","department":[{"_id":"54"}],"_id":"11862","status":"public","abstract":[{"lang":"eng","text":"In this contribution we extend a previously proposed Bayesian approach for the enhancement of reverberant logarithmic mel power spectral coefficients for robust automatic speech recognition to the additional compensation of background noise. A recently proposed observation model is employed whose time-variant observation error statistics are obtained as a side product of the inference of the a posteriori probability density function of the clean speech feature vectors. Further a reduction of the computational effort and the memory requirements are achieved by using a recursive formulation of the observation model. The performance of the proposed algorithms is first experimentally studied on a connected digits recognition task with artificially created noisy reverberant data. It is shown that the use of the time-variant observation error model leads to a significant error rate reduction at low signal-to-noise ratios compared to a time-invariant model. Further experiments were conducted on a 5000 word task recorded in a reverberant and noisy environment. A significant word error rate reduction was obtained demonstrating the effectiveness of the approach on real-world data."}],"type":"journal_article","publication":"IEEE Transactions on Audio, Speech, and Language Processing"},{"department":[{"_id":"54"}],"user_id":"44006","_id":"11864","language":[{"iso":"eng"}],"keyword":["Robust Automatic Speech Recognition","Bayesian feature enhancement","observation model for reverberant and noisy speech"],"publication":"Signal Processing, Communications and Computing (ICSPCC), 2012 IEEE International Conference on","type":"conference","status":"public","abstract":[{"lang":"eng","text":"In this work, an observation model for the joint compensation of noise and reverberation in the logarithmic mel power spectral density domain is considered. It relates the features of the noisy reverberant speech to those of the non-reverberant speech and the noise. In contrast to enhancement of features only corrupted by reverberation (reverberant features), enhancement of noisy reverberant features requires a more sophisticated model for the error introduced by the proposed observation model. In a first consideration, it will be shown that this error is highly dependent on the instantaneous ratio of the power of reverberant speech to the power of the noise and, moreover, sensitive to the phase between reverberant speech and noise in the short-time discrete Fourier domain. Afterwards, a statistically motivated approach will be presented allowing for the model of the observation error to be inferred from the error model previously used for the reverberation only case. Finally, the developed observation error model will be utilized in a Bayesian feature enhancement scheme, leading to improvements in word accuracy on the AURORA5 database."}],"author":[{"first_name":"Volker","full_name":"Leutnant, Volker","last_name":"Leutnant"},{"full_name":"Krueger, Alexander","last_name":"Krueger","first_name":"Alexander"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold"}],"date_created":"2019-07-12T05:29:44Z","oa":"1","date_updated":"2022-01-06T06:51:11Z","main_file_link":[{"url":"http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6335731","open_access":"1"}],"title":"A Statistical Observation Model For Noisy Reverberant Speech Features and its Application to Robust ASR","citation":{"ieee":"V. Leutnant, A. Krueger, and R. Haeb-Umbach, “A Statistical Observation Model For Noisy Reverberant Speech Features and its Application to Robust ASR,” in <i>Signal Processing, Communications and Computing (ICSPCC), 2012 IEEE International Conference on</i>, 2012.","chicago":"Leutnant, Volker, Alexander Krueger, and Reinhold Haeb-Umbach. “A Statistical Observation Model For Noisy Reverberant Speech Features and Its Application to Robust ASR.” In <i>Signal Processing, Communications and Computing (ICSPCC), 2012 IEEE International Conference On</i>, 2012.","ama":"Leutnant V, Krueger A, Haeb-Umbach R. A Statistical Observation Model For Noisy Reverberant Speech Features and its Application to Robust ASR. In: <i>Signal Processing, Communications and Computing (ICSPCC), 2012 IEEE International Conference On</i>. ; 2012.","bibtex":"@inproceedings{Leutnant_Krueger_Haeb-Umbach_2012, title={A Statistical Observation Model For Noisy Reverberant Speech Features and its Application to Robust ASR}, booktitle={Signal Processing, Communications and Computing (ICSPCC), 2012 IEEE International Conference on}, author={Leutnant, Volker and Krueger, Alexander and Haeb-Umbach, Reinhold}, year={2012} }","mla":"Leutnant, Volker, et al. “A Statistical Observation Model For Noisy Reverberant Speech Features and Its Application to Robust ASR.” <i>Signal Processing, Communications and Computing (ICSPCC), 2012 IEEE International Conference On</i>, 2012.","short":"V. Leutnant, A. Krueger, R. Haeb-Umbach, in: Signal Processing, Communications and Computing (ICSPCC), 2012 IEEE International Conference On, 2012.","apa":"Leutnant, V., Krueger, A., &#38; Haeb-Umbach, R. (2012). A Statistical Observation Model For Noisy Reverberant Speech Features and its Application to Robust ASR. In <i>Signal Processing, Communications and Computing (ICSPCC), 2012 IEEE International Conference on</i>."},"year":"2012"},{"_id":"11846","department":[{"_id":"54"}],"user_id":"44006","type":"journal_article","status":"public","date_updated":"2022-01-06T06:51:11Z","oa":"1","volume":18,"author":[{"full_name":"Krueger, Alexander","last_name":"Krueger","first_name":"Alexander"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242"}],"doi":"10.1109/TASL.2010.2049684","main_file_link":[{"open_access":"1","url":"https://groups.uni-paderborn.de/nt/pubs/2010/KrHa10.pdf"}],"page":"1692-1707","intvolume":"        18","citation":{"bibtex":"@article{Krueger_Haeb-Umbach_2010, title={Model-Based Feature Enhancement for Reverberant Speech Recognition}, volume={18}, DOI={<a href=\"https://doi.org/10.1109/TASL.2010.2049684\">10.1109/TASL.2010.2049684</a>}, number={7}, journal={IEEE Transactions on Audio, Speech, and Language Processing}, author={Krueger, Alexander and Haeb-Umbach, Reinhold}, year={2010}, pages={1692–1707} }","short":"A. Krueger, R. Haeb-Umbach, IEEE Transactions on Audio, Speech, and Language Processing 18 (2010) 1692–1707.","mla":"Krueger, Alexander, and Reinhold Haeb-Umbach. “Model-Based Feature Enhancement for Reverberant Speech Recognition.” <i>IEEE Transactions on Audio, Speech, and Language Processing</i>, vol. 18, no. 7, 2010, pp. 1692–707, doi:<a href=\"https://doi.org/10.1109/TASL.2010.2049684\">10.1109/TASL.2010.2049684</a>.","apa":"Krueger, A., &#38; Haeb-Umbach, R. (2010). Model-Based Feature Enhancement for Reverberant Speech Recognition. <i>IEEE Transactions on Audio, Speech, and Language Processing</i>, <i>18</i>(7), 1692–1707. <a href=\"https://doi.org/10.1109/TASL.2010.2049684\">https://doi.org/10.1109/TASL.2010.2049684</a>","ama":"Krueger A, Haeb-Umbach R. Model-Based Feature Enhancement for Reverberant Speech Recognition. <i>IEEE Transactions on Audio, Speech, and Language Processing</i>. 2010;18(7):1692-1707. doi:<a href=\"https://doi.org/10.1109/TASL.2010.2049684\">10.1109/TASL.2010.2049684</a>","ieee":"A. Krueger and R. Haeb-Umbach, “Model-Based Feature Enhancement for Reverberant Speech Recognition,” <i>IEEE Transactions on Audio, Speech, and Language Processing</i>, vol. 18, no. 7, pp. 1692–1707, 2010.","chicago":"Krueger, Alexander, and Reinhold Haeb-Umbach. “Model-Based Feature Enhancement for Reverberant Speech Recognition.” <i>IEEE Transactions on Audio, Speech, and Language Processing</i> 18, no. 7 (2010): 1692–1707. <a href=\"https://doi.org/10.1109/TASL.2010.2049684\">https://doi.org/10.1109/TASL.2010.2049684</a>."},"keyword":["ASR","AURORA5 database","automatic speech recognition","Bayesian inference","belief networks","CMLLR","computational complexity","constrained maximum likelihood linear regression","least mean squares methods","LMPSC computation","logarithmic Mel power spectrum","maximum likelihood estimation","Mel frequency cepstral coefficients","MFCC feature vectors","microphone signal","minimum mean square error estimation","model-based feature enhancement","regression analysis","reverberant speech recognition","reverberation","RIR energy","room impulse response","speech recognition","stochastic observation model","stochastic processes"],"language":[{"iso":"eng"}],"publication":"IEEE Transactions on Audio, Speech, and Language Processing","abstract":[{"lang":"eng","text":"In this paper, we present a new technique for automatic speech recognition (ASR) in reverberant environments. Our approach is aimed at the enhancement of the logarithmic Mel power spectrum, which is computed at an intermediate stage to obtain the widely used Mel frequency cepstral coefficients (MFCCs). Given the reverberant logarithmic Mel power spectral coefficients (LMPSCs), a minimum mean square error estimate of the clean LMPSCs is computed by carrying out Bayesian inference. We employ switching linear dynamical models as an a priori model for the dynamics of the clean LMPSCs. Further, we derive a stochastic observation model which relates the clean to the reverberant LMPSCs through a simplified model of the room impulse response (RIR). This model requires only two parameters, namely RIR energy and reverberation time, which can be estimated from the captured microphone signal. The performance of the proposed enhancement technique is studied on the AURORA5 database and compared to that of constrained maximum-likelihood linear regression (CMLLR). It is shown by experimental results that our approach significantly outperforms CMLLR and that up to 80\\% of the errors caused by the reverberation are recovered. In addition to the fact that the approach is compatible with the standard MFCC feature vectors, it leaves the ASR back-end unchanged. It is of moderate computational complexity and suitable for real time applications."}],"date_created":"2019-07-12T05:29:23Z","title":"Model-Based Feature Enhancement for Reverberant Speech Recognition","issue":"7","year":"2010"},{"_id":"11937","user_id":"44006","department":[{"_id":"54"}],"type":"journal_article","status":"public","oa":"1","date_updated":"2022-01-06T06:51:12Z","author":[{"full_name":"Windmann, Stefan","last_name":"Windmann","first_name":"Stefan"},{"first_name":"Reinhold","full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach"}],"volume":17,"main_file_link":[{"open_access":"1","url":"https://groups.uni-paderborn.de/nt/pubs/2009/WiHa09-1.pdf"}],"doi":"10.1109/TASL.2009.2014894","citation":{"mla":"Windmann, Stefan, and Reinhold Haeb-Umbach. “Approaches to Iterative Speech Feature Enhancement and Recognition.” <i>IEEE Transactions on Audio, Speech, and Language Processing</i>, vol. 17, no. 5, 2009, pp. 974–84, doi:<a href=\"https://doi.org/10.1109/TASL.2009.2014894\">10.1109/TASL.2009.2014894</a>.","bibtex":"@article{Windmann_Haeb-Umbach_2009, title={Approaches to Iterative Speech Feature Enhancement and Recognition}, volume={17}, DOI={<a href=\"https://doi.org/10.1109/TASL.2009.2014894\">10.1109/TASL.2009.2014894</a>}, number={5}, journal={IEEE Transactions on Audio, Speech, and Language Processing}, author={Windmann, Stefan and Haeb-Umbach, Reinhold}, year={2009}, pages={974–984} }","short":"S. Windmann, R. Haeb-Umbach, IEEE Transactions on Audio, Speech, and Language Processing 17 (2009) 974–984.","apa":"Windmann, S., &#38; Haeb-Umbach, R. (2009). Approaches to Iterative Speech Feature Enhancement and Recognition. <i>IEEE Transactions on Audio, Speech, and Language Processing</i>, <i>17</i>(5), 974–984. <a href=\"https://doi.org/10.1109/TASL.2009.2014894\">https://doi.org/10.1109/TASL.2009.2014894</a>","ama":"Windmann S, Haeb-Umbach R. Approaches to Iterative Speech Feature Enhancement and Recognition. <i>IEEE Transactions on Audio, Speech, and Language Processing</i>. 2009;17(5):974-984. doi:<a href=\"https://doi.org/10.1109/TASL.2009.2014894\">10.1109/TASL.2009.2014894</a>","chicago":"Windmann, Stefan, and Reinhold Haeb-Umbach. “Approaches to Iterative Speech Feature Enhancement and Recognition.” <i>IEEE Transactions on Audio, Speech, and Language Processing</i> 17, no. 5 (2009): 974–84. <a href=\"https://doi.org/10.1109/TASL.2009.2014894\">https://doi.org/10.1109/TASL.2009.2014894</a>.","ieee":"S. Windmann and R. Haeb-Umbach, “Approaches to Iterative Speech Feature Enhancement and Recognition,” <i>IEEE Transactions on Audio, Speech, and Language Processing</i>, vol. 17, no. 5, pp. 974–984, 2009."},"page":"974-984","intvolume":"        17","keyword":["AURORA2 databases","AURORA4 databases","automatic speech recognition","feedback structures","hidden Markov models","HMM","iterative methods","iterative speech feature enhancement","model probabilities","speech decoding","speech enhancement","speech feature distribution","speech recognition","switching linear dynamic models"],"language":[{"iso":"eng"}],"publication":"IEEE Transactions on Audio, Speech, and Language Processing","abstract":[{"lang":"eng","text":"In automatic speech recognition, hidden Markov models (HMMs) are commonly used for speech decoding, while switching linear dynamic models (SLDMs) can be employed for a preceding model-based speech feature enhancement. In this paper, these model types are combined in order to obtain a novel iterative speech feature enhancement and recognition architecture. It is shown that speech feature enhancement with SLDMs can be improved by feeding back information from the HMM to the enhancement stage. Two different feedback structures are derived. In the first, the posteriors of the HMM states are used to control the model probabilities of the SLDMs, while in the second they are employed to directly influence the estimate of the speech feature distribution. Both approaches lead to improvements in recognition accuracy both on the AURORA2 and AURORA4 databases compared to non-iterative speech feature enhancement with SLDMs. It is also shown that a combination with uncertainty decoding further enhances performance."}],"date_created":"2019-07-12T05:31:08Z","title":"Approaches to Iterative Speech Feature Enhancement and Recognition","issue":"5","year":"2009"},{"citation":{"apa":"Windmann, S., &#38; Haeb-Umbach, R. (2008). Modeling the dynamics of speech and noise for speech feature enhancement in ASR. In <i>IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2008)</i> (pp. 4409–4412). <a href=\"https://doi.org/10.1109/ICASSP.2008.4518633\">https://doi.org/10.1109/ICASSP.2008.4518633</a>","bibtex":"@inproceedings{Windmann_Haeb-Umbach_2008, title={Modeling the dynamics of speech and noise for speech feature enhancement in ASR}, DOI={<a href=\"https://doi.org/10.1109/ICASSP.2008.4518633\">10.1109/ICASSP.2008.4518633</a>}, booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2008)}, author={Windmann, Stefan and Haeb-Umbach, Reinhold}, year={2008}, pages={4409–4412} }","short":"S. Windmann, R. Haeb-Umbach, in: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2008), 2008, pp. 4409–4412.","mla":"Windmann, Stefan, and Reinhold Haeb-Umbach. “Modeling the Dynamics of Speech and Noise for Speech Feature Enhancement in ASR.” <i>IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2008)</i>, 2008, pp. 4409–12, doi:<a href=\"https://doi.org/10.1109/ICASSP.2008.4518633\">10.1109/ICASSP.2008.4518633</a>.","ama":"Windmann S, Haeb-Umbach R. Modeling the dynamics of speech and noise for speech feature enhancement in ASR. In: <i>IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2008)</i>. ; 2008:4409-4412. doi:<a href=\"https://doi.org/10.1109/ICASSP.2008.4518633\">10.1109/ICASSP.2008.4518633</a>","ieee":"S. Windmann and R. Haeb-Umbach, “Modeling the dynamics of speech and noise for speech feature enhancement in ASR,” in <i>IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2008)</i>, 2008, pp. 4409–4412.","chicago":"Windmann, Stefan, and Reinhold Haeb-Umbach. “Modeling the Dynamics of Speech and Noise for Speech Feature Enhancement in ASR.” In <i>IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2008)</i>, 4409–12, 2008. <a href=\"https://doi.org/10.1109/ICASSP.2008.4518633\">https://doi.org/10.1109/ICASSP.2008.4518633</a>."},"page":"4409-4412","year":"2008","date_created":"2019-07-12T05:31:11Z","author":[{"first_name":"Stefan","full_name":"Windmann, Stefan","last_name":"Windmann"},{"id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach","first_name":"Reinhold"}],"oa":"1","date_updated":"2022-01-06T06:51:12Z","main_file_link":[{"open_access":"1","url":"https://groups.uni-paderborn.de/nt/pubs/2008/WiHa08-1.pdf"}],"doi":"10.1109/ICASSP.2008.4518633","title":"Modeling the dynamics of speech and noise for speech feature enhancement in ASR","type":"conference","publication":"IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2008)","status":"public","abstract":[{"text":"In this paper a switching linear dynamical model (SLDM) approach for speech feature enhancement is improved by employing more accurate models for the dynamics of speech and noise. The model of the clean speech feature trajectory is improved by augmenting the state vector to capture information derived from the delta features. Further a hidden noise state variable is introduced to obtain a more elaborated model for the noise dynamics. Approximate Bayesian inference in the SLDM is carried out by a bank of extended Kalman filters, whose outputs are combined according to the a posteriori probability of the individual state models. Experimental results on the AURORA2 database show improved recognition accuracy.","lang":"eng"}],"user_id":"44006","department":[{"_id":"54"}],"_id":"11939","language":[{"iso":"eng"}],"keyword":["a posteriori probability","AURORA2 database","Bayesian inference","Bayes methods","channel bank filters","extended Kalman filter banks","hidden noise state variable","Kalman filters","noise dynamics","speech enhancement","speech feature enhancement","speech feature trajectory","switching linear dynamical model approach"]}]
