[{"type":"conference","publication":"12. ITG Fachtagung Sprachkommunikation (ITG 2016)","status":"public","abstract":[{"text":"This paper is concerned with speech presence probability estimation employing an explicit model of the temporal and spectral correlations of speech. An undirected graphical model is introduced, based on a Factor Graph formulation. It is shown that this undirected model cures some of the theoretical issues of an earlier directed graphical model. Furthermore, we formulate a message passing inference scheme based on an approximate graph factorization, identify this inference scheme as a particular message passing schedule based on the turbo principle and suggest further alternative schedules. The experiments show an improved performance over speech presence probability estimation based on an IID assumption, and a slightly better performance of the turbo schedule over the alternatives.","lang":"eng"}],"user_id":"44006","department":[{"_id":"54"}],"_id":"11771","language":[{"iso":"eng"}],"related_material":{"link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2016/itgspeech2016_08_Glarner_slides.pdf","description":"Slides","relation":"supplementary_material"}]},"citation":{"apa":"Glarner, T., Mahdi Momenzadeh, M., Drude, L., &#38; Haeb-Umbach, R. (2016). Factor Graph Decoding for Speech Presence Probability Estimation. In <i>12. ITG Fachtagung Sprachkommunikation (ITG 2016)</i>.","mla":"Glarner, Thomas, et al. “Factor Graph Decoding for Speech Presence Probability Estimation.” <i>12. ITG Fachtagung Sprachkommunikation (ITG 2016)</i>, 2016.","short":"T. Glarner, M. Mahdi Momenzadeh, L. Drude, R. Haeb-Umbach, in: 12. ITG Fachtagung Sprachkommunikation (ITG 2016), 2016.","bibtex":"@inproceedings{Glarner_Mahdi Momenzadeh_Drude_Haeb-Umbach_2016, title={Factor Graph Decoding for Speech Presence Probability Estimation}, booktitle={12. ITG Fachtagung Sprachkommunikation (ITG 2016)}, author={Glarner, Thomas and Mahdi Momenzadeh, Mohammad and Drude, Lukas and Haeb-Umbach, Reinhold}, year={2016} }","ama":"Glarner T, Mahdi Momenzadeh M, Drude L, Haeb-Umbach R. Factor Graph Decoding for Speech Presence Probability Estimation. In: <i>12. ITG Fachtagung Sprachkommunikation (ITG 2016)</i>. ; 2016.","ieee":"T. Glarner, M. Mahdi Momenzadeh, L. Drude, and R. Haeb-Umbach, “Factor Graph Decoding for Speech Presence Probability Estimation,” in <i>12. ITG Fachtagung Sprachkommunikation (ITG 2016)</i>, 2016.","chicago":"Glarner, Thomas, Mohammad Mahdi Momenzadeh, Lukas Drude, and Reinhold Haeb-Umbach. “Factor Graph Decoding for Speech Presence Probability Estimation.” In <i>12. ITG Fachtagung Sprachkommunikation (ITG 2016)</i>, 2016."},"year":"2016","author":[{"last_name":"Glarner","id":"14169","full_name":"Glarner, Thomas","first_name":"Thomas"},{"full_name":"Mahdi Momenzadeh, Mohammad","last_name":"Mahdi Momenzadeh","first_name":"Mohammad"},{"last_name":"Drude","id":"11213","full_name":"Drude, Lukas","first_name":"Lukas"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242"}],"date_created":"2019-07-12T05:27:56Z","date_updated":"2022-01-06T06:51:08Z","oa":"1","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2016/itgspeech2016_08_Glarner.pdf","open_access":"1"}],"title":"Factor Graph Decoding for Speech Presence Probability Estimation"},{"status":"public","type":"conference","publication":"Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)","language":[{"iso":"eng"}],"_id":"11812","user_id":"44006","department":[{"_id":"54"}],"year":"2016","citation":{"bibtex":"@inproceedings{Heymann_Drude_Haeb-Umbach_2016, title={Neural Network Based Spectral Mask Estimation for Acoustic Beamforming}, booktitle={Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)}, author={Heymann, Jahn and Drude, Lukas and Haeb-Umbach, Reinhold}, year={2016} }","short":"J. Heymann, L. Drude, R. Haeb-Umbach, in: Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP), 2016.","mla":"Heymann, Jahn, et al. “Neural Network Based Spectral Mask Estimation for Acoustic Beamforming.” <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2016.","apa":"Heymann, J., Drude, L., &#38; Haeb-Umbach, R. (2016). Neural Network Based Spectral Mask Estimation for Acoustic Beamforming. In <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>.","ama":"Heymann J, Drude L, Haeb-Umbach R. Neural Network Based Spectral Mask Estimation for Acoustic Beamforming. In: <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>. ; 2016.","ieee":"J. Heymann, L. Drude, and R. Haeb-Umbach, “Neural Network Based Spectral Mask Estimation for Acoustic Beamforming,” in <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2016.","chicago":"Heymann, Jahn, Lukas Drude, and Reinhold Haeb-Umbach. “Neural Network Based Spectral Mask Estimation for Acoustic Beamforming.” In <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2016."},"related_material":{"link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2016/icassp_2016_heymann_slides.pdf","relation":"supplementary_material","description":"Slides"}]},"title":"Neural Network Based Spectral Mask Estimation for Acoustic Beamforming","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2016/icassp_2016_heymann_paper.pdf","open_access":"1"}],"date_updated":"2022-01-06T06:51:09Z","oa":"1","author":[{"full_name":"Heymann, Jahn","id":"9168","last_name":"Heymann","first_name":"Jahn"},{"last_name":"Drude","full_name":"Drude, Lukas","id":"11213","first_name":"Lukas"},{"id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach","first_name":"Reinhold"}],"date_created":"2019-07-12T05:28:44Z"},{"related_material":{"link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2016/JacobHaeb_ITG2016_poster.pdf","relation":"supplementary_material","description":"Poster"}]},"year":"2016","citation":{"apa":"Jacob, F., &#38; Haeb-Umbach, R. (2016). On the Bias of Direction of Arrival Estimation Using Linear Microphone Arrays. In <i>12. ITG Fachtagung Sprachkommunikation (ITG 2016)</i>.","short":"F. Jacob, R. Haeb-Umbach, in: 12. ITG Fachtagung Sprachkommunikation (ITG 2016), 2016.","mla":"Jacob, Florian, and Reinhold Haeb-Umbach. “On the Bias of Direction of Arrival Estimation Using Linear Microphone Arrays.” <i>12. ITG Fachtagung Sprachkommunikation (ITG 2016)</i>, 2016.","bibtex":"@inproceedings{Jacob_Haeb-Umbach_2016, title={On the Bias of Direction of Arrival Estimation Using Linear Microphone Arrays}, booktitle={12. ITG Fachtagung Sprachkommunikation (ITG 2016)}, author={Jacob, Florian and Haeb-Umbach, Reinhold}, year={2016} }","ieee":"F. Jacob and R. Haeb-Umbach, “On the Bias of Direction of Arrival Estimation Using Linear Microphone Arrays,” in <i>12. ITG Fachtagung Sprachkommunikation (ITG 2016)</i>, 2016.","chicago":"Jacob, Florian, and Reinhold Haeb-Umbach. “On the Bias of Direction of Arrival Estimation Using Linear Microphone Arrays.” In <i>12. ITG Fachtagung Sprachkommunikation (ITG 2016)</i>, 2016.","ama":"Jacob F, Haeb-Umbach R. On the Bias of Direction of Arrival Estimation Using Linear Microphone Arrays. In: <i>12. ITG Fachtagung Sprachkommunikation (ITG 2016)</i>. ; 2016."},"date_updated":"2022-01-06T06:51:10Z","oa":"1","date_created":"2019-07-12T05:29:03Z","author":[{"full_name":"Jacob, Florian","last_name":"Jacob","first_name":"Florian"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242"}],"title":"On the Bias of Direction of Arrival Estimation Using Linear Microphone Arrays","main_file_link":[{"open_access":"1","url":"https://groups.uni-paderborn.de/nt/pubs/2016/JacobHaeb_ITG2016.pdf"}],"type":"conference","publication":"12. ITG Fachtagung Sprachkommunikation (ITG 2016)","abstract":[{"lang":"eng","text":"This contribution investigates Direction of Arrival (DoA) estimation using linearly arranged microphone arrays. We are going to develop a model for the DoA estimation error in a reverberant scenario and show the existence of a bias, that is a consequence of the linear arrangement and limited field of view (FoV) bias: First, the limited FoV leading to a clipping of the measurements, and, second, the angular distribution of the signal energy of the reflections being non-uniform. Since both issues are a consequence of the linear arrangement of the sensors, the bias arises largely independent of the kind of DoA estimator. The experimental evaluation demonstrates the existence of the bias for a selected number of DoA estimation methods and proves that the prediction from the developed theoretical model matches the simulation results."}],"status":"public","_id":"11829","user_id":"44006","department":[{"_id":"54"}],"language":[{"iso":"eng"}]},{"abstract":[{"text":"We present a system for the 4th CHiME challenge which significantly increases the performance for all three tracks with respect to the provided baseline system. The front-end uses a bi-directional Long Short-Term Memory (BLSTM)-based neural network to estimate signal statistics. These then steer a Generalized Eigenvalue beamformer. The back-end consists of a 22 layer deep Wide Residual Network and two extra BLSTM layers. Working on a whole utterance instead of frames allows us to refine Batch-Normalization. We also train our own BLSTM-based language model. Adding a discriminative speaker adaptation leads to further gains. The final system achieves a word error rate on the six channel real test data of 3.48%. For the two channel track we achieve 5.96% and for the one channel track 9.34%. This is the best reported performance on the challenge achieved by a single system, i.e., a configuration, which does not combine multiple systems. At the same time, our system is independent of the microphone configuration. We can thus use the same components for all three tracks.","lang":"eng"}],"status":"public","publication":"Computer Speech and Language","type":"conference","language":[{"iso":"eng"}],"_id":"11834","department":[{"_id":"54"}],"user_id":"44006","year":"2016","citation":{"ama":"Heymann J, Drude L, Haeb-Umbach R. Wide Residual BLSTM Network with Discriminative Speaker Adaptation for Robust Speech Recognition. In: <i>Computer Speech and Language</i>. ; 2016.","chicago":"Heymann, Jahn, Lukas Drude, and Reinhold Haeb-Umbach. “Wide Residual BLSTM Network with Discriminative Speaker Adaptation for Robust Speech Recognition.” In <i>Computer Speech and Language</i>, 2016.","ieee":"J. Heymann, L. Drude, and R. Haeb-Umbach, “Wide Residual BLSTM Network with Discriminative Speaker Adaptation for Robust Speech Recognition,” in <i>Computer Speech and Language</i>, 2016.","apa":"Heymann, J., Drude, L., &#38; Haeb-Umbach, R. (2016). Wide Residual BLSTM Network with Discriminative Speaker Adaptation for Robust Speech Recognition. In <i>Computer Speech and Language</i>.","short":"J. Heymann, L. Drude, R. Haeb-Umbach, in: Computer Speech and Language, 2016.","bibtex":"@inproceedings{Heymann_Drude_Haeb-Umbach_2016, title={Wide Residual BLSTM Network with Discriminative Speaker Adaptation for Robust Speech Recognition}, booktitle={Computer Speech and Language}, author={Heymann, Jahn and Drude, Lukas and Haeb-Umbach, Reinhold}, year={2016} }","mla":"Heymann, Jahn, et al. “Wide Residual BLSTM Network with Discriminative Speaker Adaptation for Robust Speech Recognition.” <i>Computer Speech and Language</i>, 2016."},"related_material":{"link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2016/chime4_upbonly_poster.pdf","relation":"supplementary_material","description":"Poster"}]},"title":"Wide Residual BLSTM Network with Discriminative Speaker Adaptation for Robust Speech Recognition","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2016/chime4_upbonly_paper.pdf","open_access":"1"}],"date_updated":"2022-01-06T06:51:11Z","oa":"1","author":[{"first_name":"Jahn","last_name":"Heymann","id":"9168","full_name":"Heymann, Jahn"},{"id":"11213","full_name":"Drude, Lukas","last_name":"Drude","first_name":"Lukas"},{"last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242","first_name":"Reinhold"}],"date_created":"2019-07-12T05:29:09Z"},{"language":[{"iso":"eng"}],"user_id":"44006","department":[{"_id":"54"}],"_id":"11840","status":"public","type":"journal_article","publication":"EURASIP Journal on Advances in Signal Processing","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2016/REVERB_summary.pdf","open_access":"1"}],"title":"A summary of the REVERB challenge: state-of-the-art and remaining challenges in reverberant speech processing research","date_created":"2019-07-12T05:29:16Z","author":[{"last_name":"Kinoshita","full_name":"Kinoshita, Keisuke","first_name":"Keisuke"},{"full_name":"Delcroix, Marc","last_name":"Delcroix","first_name":"Marc"},{"last_name":"Gannot","full_name":"Gannot, Sharon","first_name":"Sharon"},{"first_name":"Emanuel A. P.","last_name":"Habets","full_name":"Habets, Emanuel A. P."},{"first_name":"Reinhold","id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach"},{"first_name":"Walter","full_name":"Kellermann, Walter","last_name":"Kellermann"},{"last_name":"Leutnant","full_name":"Leutnant, Volker","first_name":"Volker"},{"full_name":"Maas, Roland","last_name":"Maas","first_name":"Roland"},{"first_name":"Tomohiro","last_name":"Nakatani","full_name":"Nakatani, Tomohiro"},{"first_name":"Bhiksha","last_name":"Raj","full_name":"Raj, Bhiksha"},{"last_name":"Sehr","full_name":"Sehr, Armin","first_name":"Armin"},{"first_name":"Takuya","full_name":"Yoshioka, Takuya","last_name":"Yoshioka"}],"date_updated":"2022-01-06T06:51:11Z","oa":"1","citation":{"apa":"Kinoshita, K., Delcroix, M., Gannot, S., Habets, E. A. P., Haeb-Umbach, R., Kellermann, W., … Yoshioka, T. (2016). A summary of the REVERB challenge: state-of-the-art and remaining challenges in reverberant speech processing research. <i>EURASIP Journal on Advances in Signal Processing</i>.","short":"K. Kinoshita, M. Delcroix, S. Gannot, E.A.P. Habets, R. Haeb-Umbach, W. Kellermann, V. Leutnant, R. Maas, T. Nakatani, B. Raj, A. Sehr, T. Yoshioka, EURASIP Journal on Advances in Signal Processing (2016).","bibtex":"@article{Kinoshita_Delcroix_Gannot_Habets_Haeb-Umbach_Kellermann_Leutnant_Maas_Nakatani_Raj_et al._2016, title={A summary of the REVERB challenge: state-of-the-art and remaining challenges in reverberant speech processing research}, journal={EURASIP Journal on Advances in Signal Processing}, author={Kinoshita, Keisuke and Delcroix, Marc and Gannot, Sharon and Habets, Emanuel A. P. and Haeb-Umbach, Reinhold and Kellermann, Walter and Leutnant, Volker and Maas, Roland and Nakatani, Tomohiro and Raj, Bhiksha and et al.}, year={2016} }","mla":"Kinoshita, Keisuke, et al. “A Summary of the REVERB Challenge: State-of-the-Art and Remaining Challenges in Reverberant Speech Processing Research.” <i>EURASIP Journal on Advances in Signal Processing</i>, 2016.","ama":"Kinoshita K, Delcroix M, Gannot S, et al. A summary of the REVERB challenge: state-of-the-art and remaining challenges in reverberant speech processing research. <i>EURASIP Journal on Advances in Signal Processing</i>. 2016.","ieee":"K. Kinoshita <i>et al.</i>, “A summary of the REVERB challenge: state-of-the-art and remaining challenges in reverberant speech processing research,” <i>EURASIP Journal on Advances in Signal Processing</i>, 2016.","chicago":"Kinoshita, Keisuke, Marc Delcroix, Sharon Gannot, Emanuel A. P. Habets, Reinhold Haeb-Umbach, Walter Kellermann, Volker Leutnant, et al. “A Summary of the REVERB Challenge: State-of-the-Art and Remaining Challenges in Reverberant Speech Processing Research.” <i>EURASIP Journal on Advances in Signal Processing</i>, 2016."},"year":"2016"},{"title":"Acoustic Microphone Geometry Calibration: An overview and experimental evaluation of state-of-the-art algorithms","doi":"10.1109/MSP.2016.2555198","date_updated":"2022-01-06T06:51:11Z","volume":33,"date_created":"2019-07-12T05:30:09Z","author":[{"first_name":"Axel","last_name":"Plinge","full_name":"Plinge, Axel"},{"first_name":"Florian","full_name":"Jacob, Florian","last_name":"Jacob"},{"full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach","first_name":"Reinhold"},{"full_name":"Fink, Gernot A.","last_name":"Fink","first_name":"Gernot A."}],"year":"2016","intvolume":"        33","page":"14-29","citation":{"apa":"Plinge, A., Jacob, F., Haeb-Umbach, R., &#38; Fink, G. A. (2016). Acoustic Microphone Geometry Calibration: An overview and experimental evaluation of state-of-the-art algorithms. <i>IEEE Signal Processing Magazine</i>, <i>33</i>(4), 14–29. <a href=\"https://doi.org/10.1109/MSP.2016.2555198\">https://doi.org/10.1109/MSP.2016.2555198</a>","mla":"Plinge, Axel, et al. “Acoustic Microphone Geometry Calibration: An Overview and Experimental Evaluation of State-of-the-Art Algorithms.” <i>IEEE Signal Processing Magazine</i>, vol. 33, no. 4, 2016, pp. 14–29, doi:<a href=\"https://doi.org/10.1109/MSP.2016.2555198\">10.1109/MSP.2016.2555198</a>.","bibtex":"@article{Plinge_Jacob_Haeb-Umbach_Fink_2016, title={Acoustic Microphone Geometry Calibration: An overview and experimental evaluation of state-of-the-art algorithms}, volume={33}, DOI={<a href=\"https://doi.org/10.1109/MSP.2016.2555198\">10.1109/MSP.2016.2555198</a>}, number={4}, journal={IEEE Signal Processing Magazine}, author={Plinge, Axel and Jacob, Florian and Haeb-Umbach, Reinhold and Fink, Gernot A.}, year={2016}, pages={14–29} }","short":"A. Plinge, F. Jacob, R. Haeb-Umbach, G.A. Fink, IEEE Signal Processing Magazine 33 (2016) 14–29.","ama":"Plinge A, Jacob F, Haeb-Umbach R, Fink GA. Acoustic Microphone Geometry Calibration: An overview and experimental evaluation of state-of-the-art algorithms. <i>IEEE Signal Processing Magazine</i>. 2016;33(4):14-29. doi:<a href=\"https://doi.org/10.1109/MSP.2016.2555198\">10.1109/MSP.2016.2555198</a>","chicago":"Plinge, Axel, Florian Jacob, Reinhold Haeb-Umbach, and Gernot A. Fink. “Acoustic Microphone Geometry Calibration: An Overview and Experimental Evaluation of State-of-the-Art Algorithms.” <i>IEEE Signal Processing Magazine</i> 33, no. 4 (2016): 14–29. <a href=\"https://doi.org/10.1109/MSP.2016.2555198\">https://doi.org/10.1109/MSP.2016.2555198</a>.","ieee":"A. Plinge, F. Jacob, R. Haeb-Umbach, and G. A. Fink, “Acoustic Microphone Geometry Calibration: An overview and experimental evaluation of state-of-the-art algorithms,” <i>IEEE Signal Processing Magazine</i>, vol. 33, no. 4, pp. 14–29, 2016."},"publication_identifier":{"issn":["1053-5888"]},"issue":"4","keyword":["Acoustic sensors","Microphones","Portable computers","Smart phones","Wireless communication","Wireless sensor networks"],"language":[{"iso":"eng"}],"_id":"11886","department":[{"_id":"54"}],"user_id":"44006","abstract":[{"text":"Today, we are often surrounded by devices with one or more microphones, such as smartphones, laptops, and wireless microphones. If they are part of an acoustic sensor network, their distribution in the environment can be beneficially exploited for various speech processing tasks. However, applications like speaker localization, speaker tracking, and speech enhancement by beamforming avail themselves of the geometrical configuration of the sensors. Therefore, acoustic microphone geometry calibration has recently become a very active field of research. This article provides an application-oriented, comprehensive survey of existing methods for microphone position self-calibration, which will be categorized by the measurements they use and the scenarios they can calibrate. Selected methods will be evaluated comparatively with real-world recordings.","lang":"eng"}],"status":"public","publication":"IEEE Signal Processing Magazine","type":"journal_article"},{"department":[{"_id":"54"}],"user_id":"44006","_id":"11908","language":[{"iso":"eng"}],"publication":"Computer Speech and Language","type":"conference","status":"public","abstract":[{"text":"This paper describes automatic speech recognition (ASR) systems developed jointly by RWTH, UPB and FORTH for the 1ch, 2ch and 6ch track of the 4th CHiME Challenge. In the 2ch and 6ch tracks the final system output is obtained by a Confusion Network Combination (CNC) of multiple systems. The Acoustic Model (AM) is a deep neural network based on Bidirectional Long Short-Term Memory (BLSTM) units. The systems differ by front ends and training sets used for the acoustic training. The model for the 1ch track is trained without any preprocessing. For each front end we trained and evaluated individual acoustic models. We compare the ASR performance of different beamforming approaches: a conventional superdirective beamformer [1] and an MVDR beamformer as in [2], where the steering vector is estimated based on [3]. Furthermore we evaluated a BLSTM supported Generalized Eigenvalue beamformer using NN-GEV [4]. The back end is implemented using RWTH?s open-source toolkits RASR [5], RETURNN [6] and rwthlm [7]. We rescore lattices with a Long Short-Term Memory (LSTM) based language model. The overall best results are obtained by a system combination that includes the lattices from the system of UPB?s submission [8]. Our final submission scored second in each of the three tracks of the 4th CHiME Challenge.","lang":"eng"}],"date_created":"2019-07-12T05:30:35Z","author":[{"last_name":"Menne","full_name":"Menne, Tobias","first_name":"Tobias"},{"first_name":"Jahn","full_name":"Heymann, Jahn","id":"9168","last_name":"Heymann"},{"first_name":"Anastasios","full_name":"Alexandridis, Anastasios","last_name":"Alexandridis"},{"full_name":"Irie, Kazuki","last_name":"Irie","first_name":"Kazuki"},{"full_name":"Zeyer, Albert","last_name":"Zeyer","first_name":"Albert"},{"first_name":"Markus","full_name":"Kitza, Markus","last_name":"Kitza"},{"first_name":"Pavel","full_name":"Golik, Pavel","last_name":"Golik"},{"first_name":"Ilia","last_name":"Kulikov","full_name":"Kulikov, Ilia"},{"last_name":"Drude","id":"11213","full_name":"Drude, Lukas","first_name":"Lukas"},{"full_name":"Schlüter, Ralf","last_name":"Schlüter","first_name":"Ralf"},{"first_name":"Hermann","last_name":"Ney","full_name":"Ney, Hermann"},{"last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold","first_name":"Reinhold"},{"full_name":"Mouchtaris, Athanasios","last_name":"Mouchtaris","first_name":"Athanasios"}],"oa":"1","date_updated":"2022-01-06T06:51:12Z","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2016/chime4_rwthupbforth_paper.pdf","open_access":"1"}],"title":"The RWTH/UPB/FORTH System Combination for the 4th CHiME Challenge Evaluation","citation":{"apa":"Menne, T., Heymann, J., Alexandridis, A., Irie, K., Zeyer, A., Kitza, M., … Mouchtaris, A. (2016). The RWTH/UPB/FORTH System Combination for the 4th CHiME Challenge Evaluation. In <i>Computer Speech and Language</i>.","short":"T. Menne, J. Heymann, A. Alexandridis, K. Irie, A. Zeyer, M. Kitza, P. Golik, I. Kulikov, L. Drude, R. Schlüter, H. Ney, R. Haeb-Umbach, A. Mouchtaris, in: Computer Speech and Language, 2016.","bibtex":"@inproceedings{Menne_Heymann_Alexandridis_Irie_Zeyer_Kitza_Golik_Kulikov_Drude_Schlüter_et al._2016, title={The RWTH/UPB/FORTH System Combination for the 4th CHiME Challenge Evaluation}, booktitle={Computer Speech and Language}, author={Menne, Tobias and Heymann, Jahn and Alexandridis, Anastasios and Irie, Kazuki and Zeyer, Albert and Kitza, Markus and Golik, Pavel and Kulikov, Ilia and Drude, Lukas and Schlüter, Ralf and et al.}, year={2016} }","mla":"Menne, Tobias, et al. “The RWTH/UPB/FORTH System Combination for the 4th CHiME Challenge Evaluation.” <i>Computer Speech and Language</i>, 2016.","ieee":"T. Menne <i>et al.</i>, “The RWTH/UPB/FORTH System Combination for the 4th CHiME Challenge Evaluation,” in <i>Computer Speech and Language</i>, 2016.","chicago":"Menne, Tobias, Jahn Heymann, Anastasios Alexandridis, Kazuki Irie, Albert Zeyer, Markus Kitza, Pavel Golik, et al. “The RWTH/UPB/FORTH System Combination for the 4th CHiME Challenge Evaluation.” In <i>Computer Speech and Language</i>, 2016.","ama":"Menne T, Heymann J, Alexandridis A, et al. The RWTH/UPB/FORTH System Combination for the 4th CHiME Challenge Evaluation. In: <i>Computer Speech and Language</i>. ; 2016."},"year":"2016"},{"status":"public","abstract":[{"lang":"eng","text":"In this paper we demonstrate an algorithm to learn words from speech using non-parametric Bayesian hierarchical models in an unsupervised setting. We exploit the assumption of a hierarchical structure of speech, namely the formation of spoken words as a sequence of phonemes. We employ the Nested Hierarchical Pitman-Yor Language Model, which allows an a priori unknown and possibly unlimited number of words. We assume the n-gram probabilities of words, the m-gram probabilities of phoneme sequences in words and the phoneme sequences of the words themselves as latent variables to be learned. We evaluate the algorithm on a cross language task using an existing speech recognizer trained on English speech to decode speech in the Xitsonga language supplied for the 2015 ZeroSpeech challenge. We apply the learning algorithm on the resulting phoneme graphs and achieve the highest token precision and F score compared to present systems."}],"publication":"38th German Conference on Pattern Recognition (GCPR 2016)","type":"conference","language":[{"iso":"eng"}],"department":[{"_id":"54"}],"user_id":"44006","_id":"11920","citation":{"ama":"Walter O, Haeb-Umbach R. Unsupervised Word Discovery from Speech using Bayesian Hierarchical Models. In: <i>38th German Conference on Pattern Recognition (GCPR 2016)</i>. ; 2016.","chicago":"Walter, Oliver, and Reinhold Haeb-Umbach. “Unsupervised Word Discovery from Speech Using Bayesian Hierarchical Models.” In <i>38th German Conference on Pattern Recognition (GCPR 2016)</i>, 2016.","ieee":"O. Walter and R. Haeb-Umbach, “Unsupervised Word Discovery from Speech using Bayesian Hierarchical Models,” in <i>38th German Conference on Pattern Recognition (GCPR 2016)</i>, 2016.","apa":"Walter, O., &#38; Haeb-Umbach, R. (2016). Unsupervised Word Discovery from Speech using Bayesian Hierarchical Models. In <i>38th German Conference on Pattern Recognition (GCPR 2016)</i>.","mla":"Walter, Oliver, and Reinhold Haeb-Umbach. “Unsupervised Word Discovery from Speech Using Bayesian Hierarchical Models.” <i>38th German Conference on Pattern Recognition (GCPR 2016)</i>, 2016.","short":"O. Walter, R. Haeb-Umbach, in: 38th German Conference on Pattern Recognition (GCPR 2016), 2016.","bibtex":"@inproceedings{Walter_Haeb-Umbach_2016, title={Unsupervised Word Discovery from Speech using Bayesian Hierarchical Models}, booktitle={38th German Conference on Pattern Recognition (GCPR 2016)}, author={Walter, Oliver and Haeb-Umbach, Reinhold}, year={2016} }"},"year":"2016","related_material":{"link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2016/WaHa16_Talk.pdf","relation":"supplementary_material","description":"Presentation"}]},"main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2016/WaHa16.pdf","open_access":"1"}],"title":"Unsupervised Word Discovery from Speech using Bayesian Hierarchical Models","date_created":"2019-07-12T05:30:49Z","author":[{"last_name":"Walter","full_name":"Walter, Oliver","first_name":"Oliver"},{"full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach","first_name":"Reinhold"}],"date_updated":"2022-01-06T06:51:12Z","oa":"1"},{"user_id":"460","department":[{"_id":"54"}],"_id":"11890","language":[{"iso":"eng"}],"type":"conference","publication":"24th European Signal Processing Conference (EUSIPCO 2016)","status":"public","abstract":[{"text":"In this paper we study the influence of directional radio patterns of Bluetooth low energy (BLE) beacons on smartphone localization accuracy and beacon network planning. A two-dimensional model of the power emission characteristic is derived from measurements of the radiation pattern of BLE beacons carried out in an RF chamber. The Cramer-Rao lower bound (CRLB) for position estimation is then derived for this directional power emission model. With this lower bound on the RMS positioning error the coverage of different beacon network configurations can be evaluated. For near-optimal network planing an evolutionary optimization algorithm for finding the best beacon placement is presented.","lang":"eng"}],"author":[{"first_name":"Joerg","last_name":"Schmalenstroeer","id":"460","full_name":"Schmalenstroeer, Joerg"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold"}],"date_created":"2019-07-12T05:30:14Z","oa":"1","date_updated":"2023-10-26T08:11:52Z","main_file_link":[{"open_access":"1","url":"https://groups.uni-paderborn.de/nt/pubs/2016/SchHaeb16.pdf"}],"title":"Investigations into Bluetooth Low Energy Localization Precision Limits","related_material":{"link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2016/SchHaeb16_Poster.pdf","relation":"supplementary_material","description":"Poster"}]},"quality_controlled":"1","citation":{"apa":"Schmalenstroeer, J., &#38; Haeb-Umbach, R. (2016). Investigations into Bluetooth Low Energy Localization Precision Limits. <i>24th European Signal Processing Conference (EUSIPCO 2016)</i>.","mla":"Schmalenstroeer, Joerg, and Reinhold Haeb-Umbach. “Investigations into Bluetooth Low Energy Localization Precision Limits.” <i>24th European Signal Processing Conference (EUSIPCO 2016)</i>, 2016.","bibtex":"@inproceedings{Schmalenstroeer_Haeb-Umbach_2016, title={Investigations into Bluetooth Low Energy Localization Precision Limits}, booktitle={24th European Signal Processing Conference (EUSIPCO 2016)}, author={Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}, year={2016} }","short":"J. Schmalenstroeer, R. Haeb-Umbach, in: 24th European Signal Processing Conference (EUSIPCO 2016), 2016.","ama":"Schmalenstroeer J, Haeb-Umbach R. Investigations into Bluetooth Low Energy Localization Precision Limits. In: <i>24th European Signal Processing Conference (EUSIPCO 2016)</i>. ; 2016.","chicago":"Schmalenstroeer, Joerg, and Reinhold Haeb-Umbach. “Investigations into Bluetooth Low Energy Localization Precision Limits.” In <i>24th European Signal Processing Conference (EUSIPCO 2016)</i>, 2016.","ieee":"J. Schmalenstroeer and R. Haeb-Umbach, “Investigations into Bluetooth Low Energy Localization Precision Limits,” 2016."},"year":"2016"},{"type":"conference","publication":"Interspeech 2015","status":"public","abstract":[{"lang":"eng","text":"Noise tracking is an important component of speech enhancement algorithms. Of the many noise trackers proposed, Minimum Statistics (MS) is a particularly popular one due to its simple parameterization and at the same time excellent performance. In this paper we propose to further reduce the number of MS parameters by giving an alternative derivation of an optimal smoothing constant. At the same time the noise tracking performance is improved as is demonstrated by experiments employing speech degraded by various noise types and at different SNR values."}],"user_id":"44006","department":[{"_id":"54"}],"_id":"11739","language":[{"iso":"eng"}],"keyword":["speech enhancement","noise tracking","optimal smoothing"],"related_material":{"link":[{"description":"Poster","relation":"supplementary_material","url":"https://groups.uni-paderborn.de/nt/pubs/2015/ChHa15_Poster.pdf"}]},"citation":{"short":"A. Chinaev, R. Haeb-Umbach, in: Interspeech 2015, 2015, pp. 1785–1789.","mla":"Chinaev, Aleksej, and Reinhold Haeb-Umbach. “On Optimal Smoothing in Minimum Statistics Based Noise Tracking.” <i>Interspeech 2015</i>, 2015, pp. 1785–89.","bibtex":"@inproceedings{Chinaev_Haeb-Umbach_2015, title={On Optimal Smoothing in Minimum Statistics Based Noise Tracking}, booktitle={Interspeech 2015}, author={Chinaev, Aleksej and Haeb-Umbach, Reinhold}, year={2015}, pages={1785–1789} }","apa":"Chinaev, A., &#38; Haeb-Umbach, R. (2015). On Optimal Smoothing in Minimum Statistics Based Noise Tracking. In <i>Interspeech 2015</i> (pp. 1785–1789).","ama":"Chinaev A, Haeb-Umbach R. On Optimal Smoothing in Minimum Statistics Based Noise Tracking. In: <i>Interspeech 2015</i>. ; 2015:1785-1789.","chicago":"Chinaev, Aleksej, and Reinhold Haeb-Umbach. “On Optimal Smoothing in Minimum Statistics Based Noise Tracking.” In <i>Interspeech 2015</i>, 1785–89, 2015.","ieee":"A. Chinaev and R. Haeb-Umbach, “On Optimal Smoothing in Minimum Statistics Based Noise Tracking,” in <i>Interspeech 2015</i>, 2015, pp. 1785–1789."},"page":"1785-1789","year":"2015","date_created":"2019-07-12T05:27:19Z","author":[{"first_name":"Aleksej","full_name":"Chinaev, Aleksej","last_name":"Chinaev"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242"}],"oa":"1","date_updated":"2022-01-06T06:51:08Z","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2015/ChHa15.pdf","open_access":"1"}],"title":"On Optimal Smoothing in Minimum Statistics Based Noise Tracking"},{"user_id":"44006","department":[{"_id":"54"}],"_id":"11748","language":[{"iso":"eng"}],"type":"conference","publication":"INTERSPEECH 2015","status":"public","abstract":[{"text":"We present a semantic analysis technique for spoken input using Markov Logic Networks (MLNs). MLNs combine graphical models with first-order logic. They areparticularly suitable for providing inference in the presence of inconsistent and incomplete data, which are typical of an automatic speech recognizer's (ASR) output in the presence of degraded speech. The target application is a speech interface to a home automation system to be operated by people with speech impairments, where the ASR output is particularly noisy. In order to cater for dysarthric speech with non-canonical phoneme realizations, acoustic representations of the input speech are learned in an unsupervised fashion. While training data transcripts are not required for the acoustic model training, the MLN training requires supervision, however, at a rather loose and abstract level. Results on two databases, one of them for dysarthric speech, show that MLN-based semantic analysis clearly outperforms baseline approaches employing non-negative matrix factorization, multinomial naive Bayes models, or support vector machines.","lang":"eng"}],"date_created":"2019-07-12T05:27:30Z","author":[{"full_name":"Despotovic, Vladimir","last_name":"Despotovic","first_name":"Vladimir"},{"first_name":"Oliver","last_name":"Walter","full_name":"Walter, Oliver"},{"full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach","first_name":"Reinhold"}],"date_updated":"2022-01-06T06:51:08Z","oa":"1","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2015/DeWaHa.pdf","open_access":"1"}],"title":"Semantic Analysis of Spoken Input using Markov Logic Networks","related_material":{"link":[{"description":"Poster","relation":"supplementary_material","url":"https://groups.uni-paderborn.de/nt/pubs/2015/DeWaHa_Poster.pdf"}]},"citation":{"apa":"Despotovic, V., Walter, O., &#38; Haeb-Umbach, R. (2015). Semantic Analysis of Spoken Input using Markov Logic Networks. In <i>INTERSPEECH 2015</i>.","bibtex":"@inproceedings{Despotovic_Walter_Haeb-Umbach_2015, title={Semantic Analysis of Spoken Input using Markov Logic Networks}, booktitle={INTERSPEECH 2015}, author={Despotovic, Vladimir and Walter, Oliver and Haeb-Umbach, Reinhold}, year={2015} }","short":"V. Despotovic, O. Walter, R. Haeb-Umbach, in: INTERSPEECH 2015, 2015.","mla":"Despotovic, Vladimir, et al. “Semantic Analysis of Spoken Input Using Markov Logic Networks.” <i>INTERSPEECH 2015</i>, 2015.","ama":"Despotovic V, Walter O, Haeb-Umbach R. Semantic Analysis of Spoken Input using Markov Logic Networks. In: <i>INTERSPEECH 2015</i>. ; 2015.","ieee":"V. Despotovic, O. Walter, and R. Haeb-Umbach, “Semantic Analysis of Spoken Input using Markov Logic Networks,” in <i>INTERSPEECH 2015</i>, 2015.","chicago":"Despotovic, Vladimir, Oliver Walter, and Reinhold Haeb-Umbach. “Semantic Analysis of Spoken Input Using Markov Logic Networks.” In <i>INTERSPEECH 2015</i>, 2015."},"year":"2015"},{"year":"2015","citation":{"chicago":"Drude, Lukas, Florian Jacob, and Reinhold Haeb-Umbach. “DOA-Estimation Based on a Complex Watson Kernel Method.” In <i>23th European Signal Processing Conference (EUSIPCO 2015)</i>, 2015.","ieee":"L. Drude, F. Jacob, and R. Haeb-Umbach, “DOA-Estimation based on a Complex Watson Kernel Method,” in <i>23th European Signal Processing Conference (EUSIPCO 2015)</i>, 2015.","ama":"Drude L, Jacob F, Haeb-Umbach R. DOA-Estimation based on a Complex Watson Kernel Method. In: <i>23th European Signal Processing Conference (EUSIPCO 2015)</i>. ; 2015.","apa":"Drude, L., Jacob, F., &#38; Haeb-Umbach, R. (2015). DOA-Estimation based on a Complex Watson Kernel Method. In <i>23th European Signal Processing Conference (EUSIPCO 2015)</i>.","bibtex":"@inproceedings{Drude_Jacob_Haeb-Umbach_2015, title={DOA-Estimation based on a Complex Watson Kernel Method}, booktitle={23th European Signal Processing Conference (EUSIPCO 2015)}, author={Drude, Lukas and Jacob, Florian and Haeb-Umbach, Reinhold}, year={2015} }","short":"L. Drude, F. Jacob, R. Haeb-Umbach, in: 23th European Signal Processing Conference (EUSIPCO 2015), 2015.","mla":"Drude, Lukas, et al. “DOA-Estimation Based on a Complex Watson Kernel Method.” <i>23th European Signal Processing Conference (EUSIPCO 2015)</i>, 2015."},"related_material":{"link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2015/DrJaHa15_Presentation.pdf","description":"Presentation","relation":"supplementary_material"}]},"title":"DOA-Estimation based on a Complex Watson Kernel Method","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2015/DrJaHa15.pdf","open_access":"1"}],"date_updated":"2022-01-06T06:51:08Z","oa":"1","author":[{"first_name":"Lukas","last_name":"Drude","id":"11213","full_name":"Drude, Lukas"},{"first_name":"Florian","last_name":"Jacob","full_name":"Jacob, Florian"},{"id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach","first_name":"Reinhold"}],"date_created":"2019-07-12T05:27:38Z","abstract":[{"lang":"eng","text":"This contribution presents a Direction of Arrival (DoA) estimation algorithm based on the complex Watson distribution to incorporate both phase and level differences of captured micro- phone array signals. The derived algorithm is reviewed in the context of the Generalized State Coherence Transform (GSCT) on the one hand and a kernel density estimation method on the other hand. A thorough simulative evaluation yields insight into parameter selection and provides details on the performance for both directional and omni-directional microphones. A comparison to the well known Steered Response Power with Phase Transform (SRP-PHAT) algorithm and a state of the art DoA estimator which explicitly accounts for aliasing, shows in particular the advantages of presented algorithm if inter-sensor level differences are indicative of the DoA, as with directional microphones."}],"status":"public","type":"conference","publication":"23th European Signal Processing Conference (EUSIPCO 2015)","language":[{"iso":"eng"}],"_id":"11755","user_id":"44006","department":[{"_id":"54"}]},{"title":"BLSTM supported GEV Beamformer Front-End for the 3RD CHiME Challenge","author":[{"first_name":"Jahn","full_name":"Heymann, Jahn","id":"9168","last_name":"Heymann"},{"full_name":"Drude, Lukas","id":"11213","last_name":"Drude","first_name":"Lukas"},{"full_name":"Chinaev, Aleksej","last_name":"Chinaev","first_name":"Aleksej"},{"first_name":"Reinhold","id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach"}],"date_created":"2019-07-12T05:28:41Z","date_updated":"2022-01-06T06:51:09Z","citation":{"apa":"Heymann, J., Drude, L., Chinaev, A., &#38; Haeb-Umbach, R. (2015). BLSTM supported GEV Beamformer Front-End for the 3RD CHiME Challenge. In <i>Automatic Speech Recognition and Understanding Workshop (ASRU 2015)</i>.","bibtex":"@inproceedings{Heymann_Drude_Chinaev_Haeb-Umbach_2015, title={BLSTM supported GEV Beamformer Front-End for the 3RD CHiME Challenge}, booktitle={Automatic Speech Recognition and Understanding Workshop (ASRU 2015)}, author={Heymann, Jahn and Drude, Lukas and Chinaev, Aleksej and Haeb-Umbach, Reinhold}, year={2015} }","short":"J. Heymann, L. Drude, A. Chinaev, R. Haeb-Umbach, in: Automatic Speech Recognition and Understanding Workshop (ASRU 2015), 2015.","mla":"Heymann, Jahn, et al. “BLSTM Supported GEV Beamformer Front-End for the 3RD CHiME Challenge.” <i>Automatic Speech Recognition and Understanding Workshop (ASRU 2015)</i>, 2015.","ama":"Heymann J, Drude L, Chinaev A, Haeb-Umbach R. BLSTM supported GEV Beamformer Front-End for the 3RD CHiME Challenge. In: <i>Automatic Speech Recognition and Understanding Workshop (ASRU 2015)</i>. ; 2015.","ieee":"J. Heymann, L. Drude, A. Chinaev, and R. Haeb-Umbach, “BLSTM supported GEV Beamformer Front-End for the 3RD CHiME Challenge,” in <i>Automatic Speech Recognition and Understanding Workshop (ASRU 2015)</i>, 2015.","chicago":"Heymann, Jahn, Lukas Drude, Aleksej Chinaev, and Reinhold Haeb-Umbach. “BLSTM Supported GEV Beamformer Front-End for the 3RD CHiME Challenge.” In <i>Automatic Speech Recognition and Understanding Workshop (ASRU 2015)</i>, 2015."},"year":"2015","language":[{"iso":"eng"}],"department":[{"_id":"54"}],"user_id":"44006","_id":"11810","status":"public","publication":"Automatic Speech Recognition and Understanding Workshop (ASRU 2015)","type":"conference"},{"abstract":[{"lang":"eng","text":"The parametric Bayesian Feature Enhancement (BFE) and a datadriven Denoising Autoencoder (DA) both bring performance gains in severe single-channel speech recognition conditions. The first can be adjusted to different conditions by an appropriate parameter setting, while the latter needs to be trained on conditions similar to the ones expected at decoding time, making it vulnerable to a mismatch between training and test conditions. We use a DNN backend and study reverberant ASR under three types of mismatch conditions: different room reverberation times, different speaker to microphone distances and the difference between artificially reverberated data and the recordings in a reverberant environment. We show that for these mismatch conditions BFE can provide the targets for a DA. This unsupervised adaptation provides a performance gain over the direct use of BFE and even enables to compensate for the mismatch of real and simulated reverberant data."}],"status":"public","type":"conference","publication":"Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on","keyword":["codecs","signal denoising","speech recognition","Bayesian feature enhancement","denoising autoencoder","reverberant ASR","single-channel speech recognition","speaker to microphone distances","unsupervised adaptation","Adaptation models","Noise reduction","Reverberation","Speech","Speech recognition","Training","deep neuronal networks","denoising autoencoder","feature enhancement","robust speech recognition"],"language":[{"iso":"eng"}],"_id":"11813","user_id":"44006","department":[{"_id":"54"}],"year":"2015","citation":{"short":"J. Heymann, R. Haeb-Umbach, P. Golik, R. Schlueter, in: Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference On, 2015, pp. 5053–5057.","bibtex":"@inproceedings{Heymann_Haeb-Umbach_Golik_Schlueter_2015, title={Unsupervised adaptation of a denoising autoencoder by Bayesian Feature Enhancement for reverberant asr under mismatch conditions}, DOI={<a href=\"https://doi.org/10.1109/ICASSP.2015.7178933\">10.1109/ICASSP.2015.7178933</a>}, booktitle={Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on}, author={Heymann, Jahn and Haeb-Umbach, Reinhold and Golik, P. and Schlueter, R.}, year={2015}, pages={5053–5057} }","mla":"Heymann, Jahn, et al. “Unsupervised Adaptation of a Denoising Autoencoder by Bayesian Feature Enhancement for Reverberant Asr under Mismatch Conditions.” <i>Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference On</i>, 2015, pp. 5053–57, doi:<a href=\"https://doi.org/10.1109/ICASSP.2015.7178933\">10.1109/ICASSP.2015.7178933</a>.","apa":"Heymann, J., Haeb-Umbach, R., Golik, P., &#38; Schlueter, R. (2015). Unsupervised adaptation of a denoising autoencoder by Bayesian Feature Enhancement for reverberant asr under mismatch conditions. In <i>Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on</i> (pp. 5053–5057). <a href=\"https://doi.org/10.1109/ICASSP.2015.7178933\">https://doi.org/10.1109/ICASSP.2015.7178933</a>","ama":"Heymann J, Haeb-Umbach R, Golik P, Schlueter R. Unsupervised adaptation of a denoising autoencoder by Bayesian Feature Enhancement for reverberant asr under mismatch conditions. In: <i>Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference On</i>. ; 2015:5053-5057. doi:<a href=\"https://doi.org/10.1109/ICASSP.2015.7178933\">10.1109/ICASSP.2015.7178933</a>","ieee":"J. Heymann, R. Haeb-Umbach, P. Golik, and R. Schlueter, “Unsupervised adaptation of a denoising autoencoder by Bayesian Feature Enhancement for reverberant asr under mismatch conditions,” in <i>Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on</i>, 2015, pp. 5053–5057.","chicago":"Heymann, Jahn, Reinhold Haeb-Umbach, P. Golik, and R. Schlueter. “Unsupervised Adaptation of a Denoising Autoencoder by Bayesian Feature Enhancement for Reverberant Asr under Mismatch Conditions.” In <i>Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference On</i>, 5053–57, 2015. <a href=\"https://doi.org/10.1109/ICASSP.2015.7178933\">https://doi.org/10.1109/ICASSP.2015.7178933</a>."},"page":"5053-5057","title":"Unsupervised adaptation of a denoising autoencoder by Bayesian Feature Enhancement for reverberant asr under mismatch conditions","main_file_link":[{"open_access":"1","url":"https://groups.uni-paderborn.de/nt/pubs/2015/hey_icassp_2015.pdf"}],"doi":"10.1109/ICASSP.2015.7178933","oa":"1","date_updated":"2022-01-06T06:51:09Z","author":[{"first_name":"Jahn","full_name":"Heymann, Jahn","id":"9168","last_name":"Heymann"},{"first_name":"Reinhold","id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach"},{"full_name":"Golik, P.","last_name":"Golik","first_name":"P."},{"first_name":"R.","last_name":"Schlueter","full_name":"Schlueter, R."}],"date_created":"2019-07-12T05:28:45Z"},{"year":"2015","citation":{"ama":"Jacob F, Haeb-Umbach R. Absolute Geometry Calibration of Distributed Microphone Arrays in an Audio-Visual Sensor Network. <i>ArXiv e-prints</i>. 2015.","chicago":"Jacob, Florian, and Reinhold Haeb-Umbach. “Absolute Geometry Calibration of Distributed Microphone Arrays in an Audio-Visual Sensor Network.” <i>ArXiv E-Prints</i>, 2015.","ieee":"F. Jacob and R. Haeb-Umbach, “Absolute Geometry Calibration of Distributed Microphone Arrays in an Audio-Visual Sensor Network,” <i>ArXiv e-prints</i>, 2015.","short":"F. Jacob, R. Haeb-Umbach, ArXiv E-Prints (2015).","bibtex":"@article{Jacob_Haeb-Umbach_2015, title={Absolute Geometry Calibration of Distributed Microphone Arrays in an Audio-Visual Sensor Network}, journal={ArXiv e-prints}, author={Jacob, Florian and Haeb-Umbach, Reinhold}, year={2015} }","mla":"Jacob, Florian, and Reinhold Haeb-Umbach. “Absolute Geometry Calibration of Distributed Microphone Arrays in an Audio-Visual Sensor Network.” <i>ArXiv E-Prints</i>, 2015.","apa":"Jacob, F., &#38; Haeb-Umbach, R. (2015). Absolute Geometry Calibration of Distributed Microphone Arrays in an Audio-Visual Sensor Network. <i>ArXiv E-Prints</i>."},"date_updated":"2022-01-06T06:51:11Z","oa":"1","date_created":"2019-07-12T05:29:05Z","author":[{"first_name":"Florian","full_name":"Jacob, Florian","last_name":"Jacob"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold"}],"title":"Absolute Geometry Calibration of Distributed Microphone Arrays in an Audio-Visual Sensor Network","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2015/JaHa2015.pdf","open_access":"1"}],"type":"journal_article","publication":"ArXiv e-prints","abstract":[{"text":"Joint audio-visual speaker tracking requires that the locations of microphones and cameras are known and that they are given in a common coordinate system. Sensor self-localization algorithms, however, are usually separately developed for either the acoustic or the visual modality and return their positions in a modality specific coordinate system, often with an unknown rotation, scaling and translation between the two. In this paper we propose two techniques to determine the positions of acoustic sensors in a common coordinate system, based on audio-visual correlates, i.e., events that are localized by both, microphones and cameras separately. The first approach maps the output of an acoustic self-calibration algorithm by estimating rotation, scale and translation to the visual coordinate system, while the second solves a joint system of equations with acoustic and visual directions of arrival as input. The evaluation of the two strategies reveals that joint calibration outperforms the mapping approach and achieves an overall calibration error of 0.20m even in reverberant environments.","lang":"eng"}],"status":"public","_id":"11830","user_id":"44006","department":[{"_id":"54"}],"language":[{"iso":"eng"}]},{"type":"book","status":"public","_id":"11868","user_id":"44006","department":[{"_id":"54"}],"language":[{"iso":"eng"}],"related_material":{"link":[{"description":"Sample-Chapter","relation":"supplementary_material","url":"https://groups.uni-paderborn.de/nt/pubs/2015/RASR_Chap5.pdf"},{"description":"Store","relation":"supplementary_material","url":"http://store.elsevier.com/9780128023983"}]},"year":"2015","citation":{"short":"J. Li, L. Deng, R. Haeb-Umbach, Y. Gong, Robust Automatic Speech Recognition, Elsevier, 2015.","mla":"Li, Jinyu, et al. <i>Robust Automatic Speech Recognition</i>. Elsevier, 2015.","bibtex":"@book{Li_Deng_Haeb-Umbach_Gong_2015, title={Robust Automatic Speech Recognition}, publisher={Elsevier}, author={Li, Jinyu and Deng, Li and Haeb-Umbach, Reinhold and Gong, Y.}, year={2015} }","apa":"Li, J., Deng, L., Haeb-Umbach, R., &#38; Gong, Y. (2015). <i>Robust Automatic Speech Recognition</i>. Elsevier.","chicago":"Li, Jinyu, Li Deng, Reinhold Haeb-Umbach, and Y. Gong. <i>Robust Automatic Speech Recognition</i>. Elsevier, 2015.","ieee":"J. Li, L. Deng, R. Haeb-Umbach, and Y. Gong, <i>Robust Automatic Speech Recognition</i>. Elsevier, 2015.","ama":"Li J, Deng L, Haeb-Umbach R, Gong Y. <i>Robust Automatic Speech Recognition</i>. Elsevier; 2015."},"publisher":"Elsevier","oa":"1","date_updated":"2022-01-06T06:51:11Z","author":[{"full_name":"Li, Jinyu","last_name":"Li","first_name":"Jinyu"},{"full_name":"Deng, Li","last_name":"Deng","first_name":"Li"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold"},{"full_name":"Gong, Y.","last_name":"Gong","first_name":"Y."}],"date_created":"2019-07-12T05:29:49Z","title":"Robust Automatic Speech Recognition","main_file_link":[{"url":"http://store.elsevier.com/Robust-Automatic-Speech-Recognition/Jinyu-Li/isbn-9780128023983/","open_access":"1"}]},{"citation":{"ama":"Marchi E, Schuller B, Baron-Cohen S, et al. Typicality and Emotion in the Voice of Children with Autism Spectrum Condition: Evidence Across Three Languages. In: <i>INTERSPEECH 2015</i>. ; 2015.","ieee":"E. Marchi <i>et al.</i>, “Typicality and Emotion in the Voice of Children with Autism Spectrum Condition: Evidence Across Three Languages,” in <i>INTERSPEECH 2015</i>, 2015.","chicago":"Marchi, Erik, Bjoern Schuller, Simon Baron-Cohen, Ofer Golan, Sven Boelte, Prerna Arora, and Reinhold Haeb-Umbach. “Typicality and Emotion in the Voice of Children with Autism Spectrum Condition: Evidence Across Three Languages.” In <i>INTERSPEECH 2015</i>, 2015.","short":"E. Marchi, B. Schuller, S. Baron-Cohen, O. Golan, S. Boelte, P. Arora, R. Haeb-Umbach, in: INTERSPEECH 2015, 2015.","bibtex":"@inproceedings{Marchi_Schuller_Baron-Cohen_Golan_Boelte_Arora_Haeb-Umbach_2015, title={Typicality and Emotion in the Voice of Children with Autism Spectrum Condition: Evidence Across Three Languages}, booktitle={INTERSPEECH 2015}, author={Marchi, Erik and Schuller, Bjoern and Baron-Cohen, Simon and Golan, Ofer and Boelte, Sven and Arora, Prerna and Haeb-Umbach, Reinhold}, year={2015} }","mla":"Marchi, Erik, et al. “Typicality and Emotion in the Voice of Children with Autism Spectrum Condition: Evidence Across Three Languages.” <i>INTERSPEECH 2015</i>, 2015.","apa":"Marchi, E., Schuller, B., Baron-Cohen, S., Golan, O., Boelte, S., Arora, P., &#38; Haeb-Umbach, R. (2015). Typicality and Emotion in the Voice of Children with Autism Spectrum Condition: Evidence Across Three Languages. In <i>INTERSPEECH 2015</i>."},"year":"2015","author":[{"first_name":"Erik","full_name":"Marchi, Erik","last_name":"Marchi"},{"last_name":"Schuller","full_name":"Schuller, Bjoern","first_name":"Bjoern"},{"full_name":"Baron-Cohen, Simon","last_name":"Baron-Cohen","first_name":"Simon"},{"full_name":"Golan, Ofer","last_name":"Golan","first_name":"Ofer"},{"full_name":"Boelte, Sven","last_name":"Boelte","first_name":"Sven"},{"first_name":"Prerna","full_name":"Arora, Prerna","last_name":"Arora"},{"last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold","first_name":"Reinhold"}],"date_created":"2019-07-12T05:29:57Z","date_updated":"2022-01-06T06:51:11Z","oa":"1","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2015/MaScBaOfSvPrHa.pdf","open_access":"1"}],"title":"Typicality and Emotion in the Voice of Children with Autism Spectrum Condition: Evidence Across Three Languages","publication":"INTERSPEECH 2015","type":"conference","status":"public","abstract":[{"lang":"eng","text":"Only a few studies exist on automatic emotion analysis of speech from children with Autism Spectrum Conditions (ASC). Out of these, some preliminary studies have recently focused on comparing the relevance of selected prosodic features against large sets of acoustic, spectral, and cepstral features; however, no study so far provided a comparison of performances across different languages. The present contribution aims to fill this white spot in the literature and provide insight by extensive evaluations carried out on three databases of prompted phrases collected in English, Swedish, and Hebrew, inducing nine emotion categories embedded in short-stories. The datasets contain speech of children with ASC and typically developing children under the same conditions. We evaluate automatic diagnosis and recognition of emotions in atypical childrens voice over the nine categories including binary valence/arousal discrimination."}],"department":[{"_id":"54"}],"user_id":"44006","_id":"11875","language":[{"iso":"eng"}]},{"status":"public","abstract":[{"text":"In this paper we present a source counting algorithm to determine the number of speakers in a speech mixture. In our proposed method, we model the histogram of estimated directions of arrival with a nonparametric Bayesian infinite Gaussian mixture model. As an alternative to classical model selection criteria and to avoid specifying the maximum number of mixture components in advance, a Dirichlet process prior is employed over the mixture components. This allows to automatically determine the optimal number of mixture components that most probably model the observations. We demonstrate by experiments that this model outperforms a parametric approach using a finite Gaussian mixture model with a Dirichlet distribution prior over the mixture weights.","lang":"eng"}],"type":"conference","publication":"40th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2015)","language":[{"iso":"eng"}],"user_id":"44006","department":[{"_id":"54"}],"_id":"11919","citation":{"apa":"Walter, O., Drude, L., &#38; Haeb-Umbach, R. (2015). Source Counting in Speech Mixtures by Nonparametric Bayesian Estimation of an infinite Gaussian Mixture Model. In <i>40th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2015)</i>.","mla":"Walter, Oliver, et al. “Source Counting in Speech Mixtures by Nonparametric Bayesian Estimation of an Infinite Gaussian Mixture Model.” <i>40th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2015)</i>, 2015.","short":"O. Walter, L. Drude, R. Haeb-Umbach, in: 40th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2015), 2015.","bibtex":"@inproceedings{Walter_Drude_Haeb-Umbach_2015, title={Source Counting in Speech Mixtures by Nonparametric Bayesian Estimation of an infinite Gaussian Mixture Model}, booktitle={40th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2015)}, author={Walter, Oliver and Drude, Lukas and Haeb-Umbach, Reinhold}, year={2015} }","ieee":"O. Walter, L. Drude, and R. Haeb-Umbach, “Source Counting in Speech Mixtures by Nonparametric Bayesian Estimation of an infinite Gaussian Mixture Model,” in <i>40th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2015)</i>, 2015.","chicago":"Walter, Oliver, Lukas Drude, and Reinhold Haeb-Umbach. “Source Counting in Speech Mixtures by Nonparametric Bayesian Estimation of an Infinite Gaussian Mixture Model.” In <i>40th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2015)</i>, 2015.","ama":"Walter O, Drude L, Haeb-Umbach R. Source Counting in Speech Mixtures by Nonparametric Bayesian Estimation of an infinite Gaussian Mixture Model. In: <i>40th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2015)</i>. ; 2015."},"year":"2015","related_material":{"link":[{"relation":"supplementary_material","description":"Poster","url":"https://groups.uni-paderborn.de/nt/pubs/2015/WaDrHa15_Poster.pdf"}]},"main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2015/WaDrHa15.pdf","open_access":"1"}],"title":"Source Counting in Speech Mixtures by Nonparametric Bayesian Estimation of an infinite Gaussian Mixture Model","author":[{"first_name":"Oliver","full_name":"Walter, Oliver","last_name":"Walter"},{"first_name":"Lukas","last_name":"Drude","full_name":"Drude, Lukas","id":"11213"},{"last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold","first_name":"Reinhold"}],"date_created":"2019-07-12T05:30:47Z","oa":"1","date_updated":"2022-01-06T06:51:12Z"},{"type":"journal_article","publication":"KI - Kuenstliche Intelligenz","status":"public","abstract":[{"text":"Besides the core learning algorithm itself, one major question in machine learning is how to best encode given training data such that the learning technology can efficiently learn based thereon and generalize to novel data. While classical approaches often rely on a hand coded data representation, the topic of autonomous representation or feature learning plays a major role in modern learning architectures. The goal of this contribution is to give an overview about different principles of autonomous feature learning, and to exemplify two principles based on two recent examples: autonomous metric learning for sequences, and autonomous learning of a deep representation for spoken language, respectively.","lang":"eng"}],"user_id":"44006","department":[{"_id":"54"}],"_id":"11922","language":[{"iso":"eng"}],"keyword":["Representation learning","Metric learning","Deep representation","Spoken language"],"citation":{"apa":"Walter, O., Haeb-Umbach, R., Mokbel, B., Paassen, B., &#38; Hammer, B. (2015). Autonomous Learning of Representations. <i>KI - Kuenstliche Intelligenz</i>, 1–13. <a href=\"http://dx.doi.org/10.1007/s13218-015-0372-1\">http://dx.doi.org/10.1007/s13218-015-0372-1</a>","mla":"Walter, Oliver, et al. “Autonomous Learning of Representations.” <i>KI - Kuenstliche Intelligenz</i>, 2015, pp. 1–13, doi:<a href=\"http://dx.doi.org/10.1007/s13218-015-0372-1\">http://dx.doi.org/10.1007/s13218-015-0372-1</a>.","bibtex":"@article{Walter_Haeb-Umbach_Mokbel_Paassen_Hammer_2015, title={Autonomous Learning of Representations}, DOI={<a href=\"http://dx.doi.org/10.1007/s13218-015-0372-1\">http://dx.doi.org/10.1007/s13218-015-0372-1</a>}, journal={KI - Kuenstliche Intelligenz}, author={Walter, Oliver and Haeb-Umbach, Reinhold and Mokbel, Bassam and Paassen, Benjamin and Hammer, Barbara}, year={2015}, pages={1–13} }","short":"O. Walter, R. Haeb-Umbach, B. Mokbel, B. Paassen, B. Hammer, KI - Kuenstliche Intelligenz (2015) 1–13.","chicago":"Walter, Oliver, Reinhold Haeb-Umbach, Bassam Mokbel, Benjamin Paassen, and Barbara Hammer. “Autonomous Learning of Representations.” <i>KI - Kuenstliche Intelligenz</i>, 2015, 1–13. <a href=\"http://dx.doi.org/10.1007/s13218-015-0372-1\">http://dx.doi.org/10.1007/s13218-015-0372-1</a>.","ieee":"O. Walter, R. Haeb-Umbach, B. Mokbel, B. Paassen, and B. Hammer, “Autonomous Learning of Representations,” <i>KI - Kuenstliche Intelligenz</i>, pp. 1–13, 2015.","ama":"Walter O, Haeb-Umbach R, Mokbel B, Paassen B, Hammer B. Autonomous Learning of Representations. <i>KI - Kuenstliche Intelligenz</i>. 2015:1-13. doi:<a href=\"http://dx.doi.org/10.1007/s13218-015-0372-1\">http://dx.doi.org/10.1007/s13218-015-0372-1</a>"},"page":"1-13","year":"2015","author":[{"last_name":"Walter","full_name":"Walter, Oliver","first_name":"Oliver"},{"last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold","first_name":"Reinhold"},{"last_name":"Mokbel","full_name":"Mokbel, Bassam","first_name":"Bassam"},{"last_name":"Paassen","full_name":"Paassen, Benjamin","first_name":"Benjamin"},{"first_name":"Barbara","full_name":"Hammer, Barbara","last_name":"Hammer"}],"date_created":"2019-07-12T05:30:51Z","oa":"1","date_updated":"2022-01-06T06:51:12Z","main_file_link":[{"open_access":"1","url":"https://groups.uni-paderborn.de/nt/pubs/2015/WaHaMoPaHa15.pdf"}],"doi":"http://dx.doi.org/10.1007/s13218-015-0372-1","title":"Autonomous Learning of Representations"},{"date_created":"2019-07-12T05:30:52Z","author":[{"full_name":"Walter, Oliver","last_name":"Walter","first_name":"Oliver"},{"last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold","first_name":"Reinhold"},{"first_name":"Jan","last_name":"Strunk","full_name":"Strunk, Jan"},{"full_name":"P. Himmelmann, Nikolaus ","last_name":"P. Himmelmann","first_name":"Nikolaus "}],"oa":"1","date_updated":"2022-01-06T06:51:12Z","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2015/WaHaStHi.pdf","open_access":"1"}],"title":"Lexicon Discovery for Language Preservation using Unsupervised Word Segmentation with Pitman-Yor Language Models (FGNT-2015-01)","citation":{"ama":"Walter O, Haeb-Umbach R, Strunk J, P. Himmelmann N. <i>Lexicon Discovery for Language Preservation Using Unsupervised Word Segmentation with Pitman-Yor Language Models (FGNT-2015-01)</i>.; 2015.","chicago":"Walter, Oliver, Reinhold Haeb-Umbach, Jan Strunk, and Nikolaus  P. Himmelmann. <i>Lexicon Discovery for Language Preservation Using Unsupervised Word Segmentation with Pitman-Yor Language Models (FGNT-2015-01)</i>, 2015.","ieee":"O. Walter, R. Haeb-Umbach, J. Strunk, and N. P. Himmelmann, <i>Lexicon Discovery for Language Preservation using Unsupervised Word Segmentation with Pitman-Yor Language Models (FGNT-2015-01)</i>. 2015.","bibtex":"@book{Walter_Haeb-Umbach_Strunk_P. Himmelmann_2015, title={Lexicon Discovery for Language Preservation using Unsupervised Word Segmentation with Pitman-Yor Language Models (FGNT-2015-01)}, author={Walter, Oliver and Haeb-Umbach, Reinhold and Strunk, Jan and P. Himmelmann, Nikolaus }, year={2015} }","short":"O. Walter, R. Haeb-Umbach, J. Strunk, N. P. Himmelmann, Lexicon Discovery for Language Preservation Using Unsupervised Word Segmentation with Pitman-Yor Language Models (FGNT-2015-01), 2015.","mla":"Walter, Oliver, et al. <i>Lexicon Discovery for Language Preservation Using Unsupervised Word Segmentation with Pitman-Yor Language Models (FGNT-2015-01)</i>. 2015.","apa":"Walter, O., Haeb-Umbach, R., Strunk, J., &#38; P. Himmelmann, N. (2015). <i>Lexicon Discovery for Language Preservation using Unsupervised Word Segmentation with Pitman-Yor Language Models (FGNT-2015-01)</i>."},"year":"2015","user_id":"44006","department":[{"_id":"54"}],"_id":"11923","language":[{"iso":"eng"}],"type":"report","status":"public","abstract":[{"text":"In this paper we show that recently developed algorithms for unsupervised word segmentation can be a valuable tool for the documentation of endangered languages. We applied an unsupervised word segmentation algorithm based on a nested Pitman-Yor language model to two austronesian languages, Wooi and Waima'a. The algorithm was then modified and parameterized to cater the needs of linguists for high precision of lexical discovery: We obtained a lexicon precision of of 69.2\\% and 67.5\\% for Wooi and Waima'a, respectively, if single-letter words and words found less than three times were discarded. A comparison with an English word segmentation task showed comparable performance, verifying that the assumptions underlying the Pitman-Yor language model, the universality of Zipf's law and the power of n-gram structures, do also hold for languages as exotic as Wooi and Waima'a.","lang":"eng"}]}]