[{"title":"Using the turbo principle for exploiting temporal and spectral correlations in speech presence probability estimation","doi":"10.1109/ICASSP.2013.6637771","date_updated":"2022-01-06T06:51:12Z","date_created":"2019-07-12T05:30:45Z","author":[{"full_name":"Vu, Dang Hai Tran","last_name":"Vu","first_name":"Dang Hai Tran"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold"}],"year":"2013","page":"863-867","citation":{"apa":"Vu, D. H. T., &#38; Haeb-Umbach, R. (2013). Using the turbo principle for exploiting temporal and spectral correlations in speech presence probability estimation. In <i>38th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013)</i> (pp. 863–867). <a href=\"https://doi.org/10.1109/ICASSP.2013.6637771\">https://doi.org/10.1109/ICASSP.2013.6637771</a>","mla":"Vu, Dang Hai Tran, and Reinhold Haeb-Umbach. “Using the Turbo Principle for Exploiting Temporal and Spectral Correlations in Speech Presence Probability Estimation.” <i>38th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013)</i>, 2013, pp. 863–67, doi:<a href=\"https://doi.org/10.1109/ICASSP.2013.6637771\">10.1109/ICASSP.2013.6637771</a>.","bibtex":"@inproceedings{Vu_Haeb-Umbach_2013, title={Using the turbo principle for exploiting temporal and spectral correlations in speech presence probability estimation}, DOI={<a href=\"https://doi.org/10.1109/ICASSP.2013.6637771\">10.1109/ICASSP.2013.6637771</a>}, booktitle={38th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013)}, author={Vu, Dang Hai Tran and Haeb-Umbach, Reinhold}, year={2013}, pages={863–867} }","short":"D.H.T. Vu, R. Haeb-Umbach, in: 38th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013), 2013, pp. 863–867.","ama":"Vu DHT, Haeb-Umbach R. Using the turbo principle for exploiting temporal and spectral correlations in speech presence probability estimation. In: <i>38th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013)</i>. ; 2013:863-867. doi:<a href=\"https://doi.org/10.1109/ICASSP.2013.6637771\">10.1109/ICASSP.2013.6637771</a>","ieee":"D. H. T. Vu and R. Haeb-Umbach, “Using the turbo principle for exploiting temporal and spectral correlations in speech presence probability estimation,” in <i>38th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013)</i>, 2013, pp. 863–867.","chicago":"Vu, Dang Hai Tran, and Reinhold Haeb-Umbach. “Using the Turbo Principle for Exploiting Temporal and Spectral Correlations in Speech Presence Probability Estimation.” In <i>38th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013)</i>, 863–67, 2013. <a href=\"https://doi.org/10.1109/ICASSP.2013.6637771\">https://doi.org/10.1109/ICASSP.2013.6637771</a>."},"publication_identifier":{"issn":["1520-6149"]},"keyword":["correlation methods","estimation theory","hidden Markov models","iterative methods","probability","spectral analysis","speech processing","2D HMM","SPP estimates","iterative algorithm","posterior probability estimation","spectral correlation","speech presence probability estimation","state-of-the-art SPP estimation algorithm","temporal correlation","turbo principle","two-dimensional hidden Markov model","Correlation","Decoding","Estimation","Iterative decoding","Noise","Speech","Vectors"],"language":[{"iso":"eng"}],"_id":"11917","department":[{"_id":"54"}],"user_id":"44006","abstract":[{"text":"In this paper we present a speech presence probability (SPP) estimation algorithmwhich exploits both temporal and spectral correlations of speech. To this end, the SPP estimation is formulated as the posterior probability estimation of the states of a two-dimensional (2D) Hidden Markov Model (HMM). We derive an iterative algorithm to decode the 2D-HMM which is based on the turbo principle. The experimental results show that indeed the SPP estimates improve from iteration to iteration, and further clearly outperform another state-of-the-art SPP estimation algorithm.","lang":"eng"}],"status":"public","publication":"38th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013)","type":"conference"},{"issue":"5","year":"2008","citation":{"mla":"Ion, Valentin, and Reinhold Haeb-Umbach. “A Novel Uncertainty Decoding Rule With Applications to Transmission Error Robust Speech Recognition.” <i>IEEE Transactions on Audio, Speech, and Language Processing</i>, vol. 16, no. 5, 2008, pp. 1047–60, doi:<a href=\"https://doi.org/10.1109/TASL.2008.925879\">10.1109/TASL.2008.925879</a>.","short":"V. Ion, R. Haeb-Umbach, IEEE Transactions on Audio, Speech, and Language Processing 16 (2008) 1047–1060.","bibtex":"@article{Ion_Haeb-Umbach_2008, title={A Novel Uncertainty Decoding Rule With Applications to Transmission Error Robust Speech Recognition}, volume={16}, DOI={<a href=\"https://doi.org/10.1109/TASL.2008.925879\">10.1109/TASL.2008.925879</a>}, number={5}, journal={IEEE Transactions on Audio, Speech, and Language Processing}, author={Ion, Valentin and Haeb-Umbach, Reinhold}, year={2008}, pages={1047–1060} }","apa":"Ion, V., &#38; Haeb-Umbach, R. (2008). A Novel Uncertainty Decoding Rule With Applications to Transmission Error Robust Speech Recognition. <i>IEEE Transactions on Audio, Speech, and Language Processing</i>, <i>16</i>(5), 1047–1060. <a href=\"https://doi.org/10.1109/TASL.2008.925879\">https://doi.org/10.1109/TASL.2008.925879</a>","ieee":"V. Ion and R. Haeb-Umbach, “A Novel Uncertainty Decoding Rule With Applications to Transmission Error Robust Speech Recognition,” <i>IEEE Transactions on Audio, Speech, and Language Processing</i>, vol. 16, no. 5, pp. 1047–1060, 2008.","chicago":"Ion, Valentin, and Reinhold Haeb-Umbach. “A Novel Uncertainty Decoding Rule With Applications to Transmission Error Robust Speech Recognition.” <i>IEEE Transactions on Audio, Speech, and Language Processing</i> 16, no. 5 (2008): 1047–60. <a href=\"https://doi.org/10.1109/TASL.2008.925879\">https://doi.org/10.1109/TASL.2008.925879</a>.","ama":"Ion V, Haeb-Umbach R. A Novel Uncertainty Decoding Rule With Applications to Transmission Error Robust Speech Recognition. <i>IEEE Transactions on Audio, Speech, and Language Processing</i>. 2008;16(5):1047-1060. doi:<a href=\"https://doi.org/10.1109/TASL.2008.925879\">10.1109/TASL.2008.925879</a>"},"intvolume":"        16","page":"1047-1060","oa":"1","date_updated":"2022-01-06T06:51:10Z","author":[{"first_name":"Valentin","full_name":"Ion, Valentin","last_name":"Ion"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242"}],"date_created":"2019-07-12T05:28:53Z","volume":16,"title":"A Novel Uncertainty Decoding Rule With Applications to Transmission Error Robust Speech Recognition","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2008/IoHa08-1.pdf","open_access":"1"}],"doi":"10.1109/TASL.2008.925879","type":"journal_article","publication":"IEEE Transactions on Audio, Speech, and Language Processing","abstract":[{"lang":"eng","text":"In this paper, we derive an uncertainty decoding rule for automatic speech recognition (ASR), which accounts for both corrupted observations and inter-frame correlation. The conditional independence assumption, prevalent in hidden Markov model-based ASR, is relaxed to obtain a clean speech posterior that is conditioned on the complete observed feature vector sequence. This is a more informative posterior than one conditioned only on the current observation. The novel decoding is used to obtain a transmission-error robust remote ASR system, where the speech capturing unit is connected to the decoder via an error-prone communication network. We show how the clean speech posterior can be computed for communication links being characterized by either bit errors or packet loss. Recognition results are presented for both distributed and network speech recognition, where in the latter case common voice-over-IP codecs are employed."}],"status":"public","_id":"11820","user_id":"44006","department":[{"_id":"54"}],"keyword":["automatic speech recognition","bit errors","codecs","communication links","corrupted observations","decoding","distributed speech recognition","error-prone communication network","feature vector sequence","hidden Markov model-based ASR","hidden Markov models","inter-frame correlation","Internet telephony","network speech recognition","packet loss","speech posterior","speech recognition","transmission error robust speech recognition","uncertainty decoding","voice-over-IP codecs"],"language":[{"iso":"eng"}]},{"user_id":"44006","department":[{"_id":"54"}],"_id":"11931","language":[{"iso":"eng"}],"keyword":["bimodal human-robot interface","binaural signal processing","enhanced single-channel input signal","filter-and-sum beamforming","filtering theory","FIR filter coefficient","generalized cross correlation method","microphones","microphone signal","nonlinear Bayesian tracking","particle filtering","robust adaptive algorithm","robust speaker direction estimation","signal processing","speech enhancement","speech recognition","speech recognizer","user interfaces"],"type":"conference","publication":"IEEE Workshop on Multimedia Signal Processing (MMSP 2004)","status":"public","abstract":[{"lang":"eng","text":"The paper is concerned with binaural signal processing for a bimodal human-robot interface with hearing and vision. The two microphone signals are processed to obtain an enhanced single-channel input signal for the subsequent speech recognizer and to localize the acoustic source, an important information for establishing a natural human-robot communication. We utilize a robust adaptive algorithm for filter-and-sum beamforming (FSB) and extract speaker direction information from the resulting FIR filter coefficients. Further, particle filtering is applied which conducts a nonlinear Bayesian tracking of speaker movement. Good location accuracy can be achieved even in highly reverberant environments. The results obtained outperform the conventional generalized cross correlation (GCC) method."}],"author":[{"first_name":"Ernst","last_name":"Warsitz","full_name":"Warsitz, Ernst"},{"last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242","first_name":"Reinhold"}],"date_created":"2019-07-12T05:31:01Z","oa":"1","date_updated":"2022-01-06T06:51:12Z","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2004/WaHa04.pdf","open_access":"1"}],"doi":"10.1109/MMSP.2004.1436569","title":"Robust speaker direction estimation with particle filtering","citation":{"ama":"Warsitz E, Haeb-Umbach R. Robust speaker direction estimation with particle filtering. In: <i>IEEE Workshop on Multimedia Signal Processing (MMSP 2004)</i>. ; 2004:367-370. doi:<a href=\"https://doi.org/10.1109/MMSP.2004.1436569\">10.1109/MMSP.2004.1436569</a>","chicago":"Warsitz, Ernst, and Reinhold Haeb-Umbach. “Robust Speaker Direction Estimation with Particle Filtering.” In <i>IEEE Workshop on Multimedia Signal Processing (MMSP 2004)</i>, 367–70, 2004. <a href=\"https://doi.org/10.1109/MMSP.2004.1436569\">https://doi.org/10.1109/MMSP.2004.1436569</a>.","ieee":"E. Warsitz and R. Haeb-Umbach, “Robust speaker direction estimation with particle filtering,” in <i>IEEE Workshop on Multimedia Signal Processing (MMSP 2004)</i>, 2004, pp. 367–370.","mla":"Warsitz, Ernst, and Reinhold Haeb-Umbach. “Robust Speaker Direction Estimation with Particle Filtering.” <i>IEEE Workshop on Multimedia Signal Processing (MMSP 2004)</i>, 2004, pp. 367–70, doi:<a href=\"https://doi.org/10.1109/MMSP.2004.1436569\">10.1109/MMSP.2004.1436569</a>.","bibtex":"@inproceedings{Warsitz_Haeb-Umbach_2004, title={Robust speaker direction estimation with particle filtering}, DOI={<a href=\"https://doi.org/10.1109/MMSP.2004.1436569\">10.1109/MMSP.2004.1436569</a>}, booktitle={IEEE Workshop on Multimedia Signal Processing (MMSP 2004)}, author={Warsitz, Ernst and Haeb-Umbach, Reinhold}, year={2004}, pages={367–370} }","short":"E. Warsitz, R. Haeb-Umbach, in: IEEE Workshop on Multimedia Signal Processing (MMSP 2004), 2004, pp. 367–370.","apa":"Warsitz, E., &#38; Haeb-Umbach, R. (2004). Robust speaker direction estimation with particle filtering. In <i>IEEE Workshop on Multimedia Signal Processing (MMSP 2004)</i> (pp. 367–370). <a href=\"https://doi.org/10.1109/MMSP.2004.1436569\">https://doi.org/10.1109/MMSP.2004.1436569</a>"},"page":"367-370","year":"2004"},{"department":[{"_id":"54"}],"user_id":"44006","_id":"11778","language":[{"iso":"eng"}],"keyword":["acoustic space","adaptation experiments","automatic generation","bottom-up clustering","broad phonetic class regression trees","correlation criterion","correlation methods","maximum likelihood estimation","maximum likelihood linear regression based speaker adaptation","MLLR adaptation","pattern clustering","phonetic regression class trees","speaker-independent training data","speech recognition","speech units","statistical analysis","trees (mathematics)"],"publication":"IEEE Transactions on Speech and Audio Processing","type":"journal_article","status":"public","abstract":[{"lang":"eng","text":"In this paper, it is shown that a correlation criterion is the appropriate criterion for bottom-up clustering to obtain broad phonetic class regression trees for maximum likelihood linear regression (MLLR)-based speaker adaptation. The correlation structure among speech units is estimated on the speaker-independent training data. In adaptation experiments the tree outperformed a regression tree obtained from clustering according to closeness in acoustic space and achieved results comparable with those of a manually designed broad phonetic class tree"}],"volume":9,"author":[{"first_name":"Reinhold","last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242"}],"date_created":"2019-07-12T05:28:04Z","date_updated":"2022-01-06T06:51:08Z","oa":"1","doi":"10.1109/89.906003","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2001/Ha01.pdf","open_access":"1"}],"title":"Automatic generation of phonetic regression class trees for MLLR adaptation","issue":"3","intvolume":"         9","page":"299-302","citation":{"apa":"Haeb-Umbach, R. (2001). Automatic generation of phonetic regression class trees for MLLR adaptation. <i>IEEE Transactions on Speech and Audio Processing</i>, <i>9</i>(3), 299–302. <a href=\"https://doi.org/10.1109/89.906003\">https://doi.org/10.1109/89.906003</a>","short":"R. Haeb-Umbach, IEEE Transactions on Speech and Audio Processing 9 (2001) 299–302.","mla":"Haeb-Umbach, Reinhold. “Automatic Generation of Phonetic Regression Class Trees for MLLR Adaptation.” <i>IEEE Transactions on Speech and Audio Processing</i>, vol. 9, no. 3, 2001, pp. 299–302, doi:<a href=\"https://doi.org/10.1109/89.906003\">10.1109/89.906003</a>.","bibtex":"@article{Haeb-Umbach_2001, title={Automatic generation of phonetic regression class trees for MLLR adaptation}, volume={9}, DOI={<a href=\"https://doi.org/10.1109/89.906003\">10.1109/89.906003</a>}, number={3}, journal={IEEE Transactions on Speech and Audio Processing}, author={Haeb-Umbach, Reinhold}, year={2001}, pages={299–302} }","ama":"Haeb-Umbach R. Automatic generation of phonetic regression class trees for MLLR adaptation. <i>IEEE Transactions on Speech and Audio Processing</i>. 2001;9(3):299-302. doi:<a href=\"https://doi.org/10.1109/89.906003\">10.1109/89.906003</a>","ieee":"R. Haeb-Umbach, “Automatic generation of phonetic regression class trees for MLLR adaptation,” <i>IEEE Transactions on Speech and Audio Processing</i>, vol. 9, no. 3, pp. 299–302, 2001.","chicago":"Haeb-Umbach, Reinhold. “Automatic Generation of Phonetic Regression Class Trees for MLLR Adaptation.” <i>IEEE Transactions on Speech and Audio Processing</i> 9, no. 3 (2001): 299–302. <a href=\"https://doi.org/10.1109/89.906003\">https://doi.org/10.1109/89.906003</a>."},"year":"2001"}]
