[{"status":"public","publication":"Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)","type":"conference","language":[{"iso":"eng"}],"_id":"11751","department":[{"_id":"54"}],"user_id":"44006","year":"2016","citation":{"apa":"Drude, L., Boeddeker, C., &#38; Haeb-Umbach, R. (2016). Blind Speech Separation based on Complex Spherical k-Mode Clustering. In <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>.","mla":"Drude, Lukas, et al. “Blind Speech Separation Based on Complex Spherical K-Mode Clustering.” <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2016.","short":"L. Drude, C. Boeddeker, R. Haeb-Umbach, in: Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP), 2016.","bibtex":"@inproceedings{Drude_Boeddeker_Haeb-Umbach_2016, title={Blind Speech Separation based on Complex Spherical k-Mode Clustering}, booktitle={Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)}, author={Drude, Lukas and Boeddeker, Christoph and Haeb-Umbach, Reinhold}, year={2016} }","chicago":"Drude, Lukas, Christoph Boeddeker, and Reinhold Haeb-Umbach. “Blind Speech Separation Based on Complex Spherical K-Mode Clustering.” In <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2016.","ieee":"L. Drude, C. Boeddeker, and R. Haeb-Umbach, “Blind Speech Separation based on Complex Spherical k-Mode Clustering,” in <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2016.","ama":"Drude L, Boeddeker C, Haeb-Umbach R. Blind Speech Separation based on Complex Spherical k-Mode Clustering. In: <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>. ; 2016."},"related_material":{"link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2016/icassp_2016_drude_slides.pdf","relation":"supplementary_material","description":"Slides"}]},"title":"Blind Speech Separation based on Complex Spherical k-Mode Clustering","main_file_link":[{"open_access":"1","url":"https://groups.uni-paderborn.de/nt/pubs/2016/icassp_2016_drude_paper.pdf"}],"oa":"1","date_updated":"2022-01-06T06:51:08Z","date_created":"2019-07-12T05:27:33Z","author":[{"first_name":"Lukas","id":"11213","full_name":"Drude, Lukas","last_name":"Drude"},{"last_name":"Boeddeker","full_name":"Boeddeker, Christoph","id":"40767","first_name":"Christoph"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242"}]},{"status":"public","abstract":[{"text":"Although complex-valued neural networks (CVNNs) â?? networks which can operate with complex arithmetic â?? have been around for a while, they have not been given reconsideration since the breakthrough of deep network architectures. This paper presents a critical assessment whether the novel tool set of deep neural networks (DNNs) should be extended to complex-valued arithmetic. Indeed, with DNNs making inroads in speech enhancement tasks, the use of complex-valued input data, specifically the short-time Fourier transform coefficients, is an obvious consideration. In particular when it comes to performing tasks that heavily rely on phase information, such as acoustic beamforming, complex-valued algorithms are omnipresent. In this contribution we recapitulate backpropagation in CVNNs, develop complex-valued network elements, such as the split-rectified non-linearity, and compare real- and complex-valued networks on a beamforming task. We find that CVNNs hardly provide a performance gain and conclude that the effort of developing the complex-valued counterparts of the building blocks of modern deep or recurrent neural networks can hardly be justified.","lang":"eng"}],"publication":"INTERSPEECH 2016, San Francisco, USA","type":"conference","language":[{"iso":"eng"}],"department":[{"_id":"54"}],"user_id":"44006","_id":"11756","citation":{"apa":"Drude, L., Raj, B., &#38; Haeb-Umbach, R. (2016). On the appropriateness of complex-valued neural networks for speech enhancement. In <i>INTERSPEECH 2016, San Francisco, USA</i>.","bibtex":"@inproceedings{Drude_Raj_Haeb-Umbach_2016, title={On the appropriateness of complex-valued neural networks for speech enhancement}, booktitle={INTERSPEECH 2016, San Francisco, USA}, author={Drude, Lukas and Raj, Bhiksha and Haeb-Umbach, Reinhold}, year={2016} }","short":"L. Drude, B. Raj, R. Haeb-Umbach, in: INTERSPEECH 2016, San Francisco, USA, 2016.","mla":"Drude, Lukas, et al. “On the Appropriateness of Complex-Valued Neural Networks for Speech Enhancement.” <i>INTERSPEECH 2016, San Francisco, USA</i>, 2016.","ama":"Drude L, Raj B, Haeb-Umbach R. On the appropriateness of complex-valued neural networks for speech enhancement. In: <i>INTERSPEECH 2016, San Francisco, USA</i>. ; 2016.","chicago":"Drude, Lukas, Bhiksha Raj, and Reinhold Haeb-Umbach. “On the Appropriateness of Complex-Valued Neural Networks for Speech Enhancement.” In <i>INTERSPEECH 2016, San Francisco, USA</i>, 2016.","ieee":"L. Drude, B. Raj, and R. Haeb-Umbach, “On the appropriateness of complex-valued neural networks for speech enhancement,” in <i>INTERSPEECH 2016, San Francisco, USA</i>, 2016."},"year":"2016","related_material":{"link":[{"relation":"supplementary_material","description":"Poster","url":"https://groups.uni-paderborn.de/nt/pubs/2016/interspeech_2016_drude_slides.pdf"}]},"main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2016/interspeech_2016_drude_paper.pdf","open_access":"1"}],"title":"On the appropriateness of complex-valued neural networks for speech enhancement","author":[{"first_name":"Lukas","id":"11213","full_name":"Drude, Lukas","last_name":"Drude"},{"last_name":"Raj","full_name":"Raj, Bhiksha","first_name":"Bhiksha"},{"id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach","first_name":"Reinhold"}],"date_created":"2019-07-12T05:27:39Z","date_updated":"2022-01-06T06:51:08Z","oa":"1"},{"main_file_link":[{"open_access":"1","url":"https://groups.uni-paderborn.de/nt/pubs/2016/itgspeech2016_08_Glarner.pdf"}],"title":"Factor Graph Decoding for Speech Presence Probability Estimation","date_created":"2019-07-12T05:27:56Z","author":[{"first_name":"Thomas","last_name":"Glarner","id":"14169","full_name":"Glarner, Thomas"},{"full_name":"Mahdi Momenzadeh, Mohammad","last_name":"Mahdi Momenzadeh","first_name":"Mohammad"},{"first_name":"Lukas","last_name":"Drude","full_name":"Drude, Lukas","id":"11213"},{"last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold","first_name":"Reinhold"}],"date_updated":"2022-01-06T06:51:08Z","oa":"1","citation":{"ieee":"T. Glarner, M. Mahdi Momenzadeh, L. Drude, and R. Haeb-Umbach, “Factor Graph Decoding for Speech Presence Probability Estimation,” in <i>12. ITG Fachtagung Sprachkommunikation (ITG 2016)</i>, 2016.","chicago":"Glarner, Thomas, Mohammad Mahdi Momenzadeh, Lukas Drude, and Reinhold Haeb-Umbach. “Factor Graph Decoding for Speech Presence Probability Estimation.” In <i>12. ITG Fachtagung Sprachkommunikation (ITG 2016)</i>, 2016.","ama":"Glarner T, Mahdi Momenzadeh M, Drude L, Haeb-Umbach R. Factor Graph Decoding for Speech Presence Probability Estimation. In: <i>12. ITG Fachtagung Sprachkommunikation (ITG 2016)</i>. ; 2016.","apa":"Glarner, T., Mahdi Momenzadeh, M., Drude, L., &#38; Haeb-Umbach, R. (2016). Factor Graph Decoding for Speech Presence Probability Estimation. In <i>12. ITG Fachtagung Sprachkommunikation (ITG 2016)</i>.","short":"T. Glarner, M. Mahdi Momenzadeh, L. Drude, R. Haeb-Umbach, in: 12. ITG Fachtagung Sprachkommunikation (ITG 2016), 2016.","bibtex":"@inproceedings{Glarner_Mahdi Momenzadeh_Drude_Haeb-Umbach_2016, title={Factor Graph Decoding for Speech Presence Probability Estimation}, booktitle={12. ITG Fachtagung Sprachkommunikation (ITG 2016)}, author={Glarner, Thomas and Mahdi Momenzadeh, Mohammad and Drude, Lukas and Haeb-Umbach, Reinhold}, year={2016} }","mla":"Glarner, Thomas, et al. “Factor Graph Decoding for Speech Presence Probability Estimation.” <i>12. ITG Fachtagung Sprachkommunikation (ITG 2016)</i>, 2016."},"year":"2016","related_material":{"link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2016/itgspeech2016_08_Glarner_slides.pdf","relation":"supplementary_material","description":"Slides"}]},"language":[{"iso":"eng"}],"user_id":"44006","department":[{"_id":"54"}],"_id":"11771","status":"public","abstract":[{"lang":"eng","text":"This paper is concerned with speech presence probability estimation employing an explicit model of the temporal and spectral correlations of speech. An undirected graphical model is introduced, based on a Factor Graph formulation. It is shown that this undirected model cures some of the theoretical issues of an earlier directed graphical model. Furthermore, we formulate a message passing inference scheme based on an approximate graph factorization, identify this inference scheme as a particular message passing schedule based on the turbo principle and suggest further alternative schedules. The experiments show an improved performance over speech presence probability estimation based on an IID assumption, and a slightly better performance of the turbo schedule over the alternatives."}],"type":"conference","publication":"12. ITG Fachtagung Sprachkommunikation (ITG 2016)"},{"status":"public","type":"conference","publication":"Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)","language":[{"iso":"eng"}],"user_id":"44006","department":[{"_id":"54"}],"_id":"11812","citation":{"apa":"Heymann, J., Drude, L., &#38; Haeb-Umbach, R. (2016). Neural Network Based Spectral Mask Estimation for Acoustic Beamforming. In <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>.","bibtex":"@inproceedings{Heymann_Drude_Haeb-Umbach_2016, title={Neural Network Based Spectral Mask Estimation for Acoustic Beamforming}, booktitle={Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)}, author={Heymann, Jahn and Drude, Lukas and Haeb-Umbach, Reinhold}, year={2016} }","short":"J. Heymann, L. Drude, R. Haeb-Umbach, in: Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP), 2016.","mla":"Heymann, Jahn, et al. “Neural Network Based Spectral Mask Estimation for Acoustic Beamforming.” <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2016.","ama":"Heymann J, Drude L, Haeb-Umbach R. Neural Network Based Spectral Mask Estimation for Acoustic Beamforming. In: <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>. ; 2016.","ieee":"J. Heymann, L. Drude, and R. Haeb-Umbach, “Neural Network Based Spectral Mask Estimation for Acoustic Beamforming,” in <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2016.","chicago":"Heymann, Jahn, Lukas Drude, and Reinhold Haeb-Umbach. “Neural Network Based Spectral Mask Estimation for Acoustic Beamforming.” In <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2016."},"year":"2016","related_material":{"link":[{"relation":"supplementary_material","description":"Slides","url":"https://groups.uni-paderborn.de/nt/pubs/2016/icassp_2016_heymann_slides.pdf"}]},"main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2016/icassp_2016_heymann_paper.pdf","open_access":"1"}],"title":"Neural Network Based Spectral Mask Estimation for Acoustic Beamforming","date_created":"2019-07-12T05:28:44Z","author":[{"last_name":"Heymann","id":"9168","full_name":"Heymann, Jahn","first_name":"Jahn"},{"first_name":"Lukas","id":"11213","full_name":"Drude, Lukas","last_name":"Drude"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242"}],"oa":"1","date_updated":"2022-01-06T06:51:09Z"},{"main_file_link":[{"open_access":"1","url":"https://groups.uni-paderborn.de/nt/pubs/2016/JacobHaeb_ITG2016.pdf"}],"title":"On the Bias of Direction of Arrival Estimation Using Linear Microphone Arrays","date_created":"2019-07-12T05:29:03Z","author":[{"first_name":"Florian","last_name":"Jacob","full_name":"Jacob, Florian"},{"full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach","first_name":"Reinhold"}],"date_updated":"2022-01-06T06:51:10Z","oa":"1","citation":{"apa":"Jacob, F., &#38; Haeb-Umbach, R. (2016). On the Bias of Direction of Arrival Estimation Using Linear Microphone Arrays. In <i>12. ITG Fachtagung Sprachkommunikation (ITG 2016)</i>.","short":"F. Jacob, R. Haeb-Umbach, in: 12. ITG Fachtagung Sprachkommunikation (ITG 2016), 2016.","bibtex":"@inproceedings{Jacob_Haeb-Umbach_2016, title={On the Bias of Direction of Arrival Estimation Using Linear Microphone Arrays}, booktitle={12. ITG Fachtagung Sprachkommunikation (ITG 2016)}, author={Jacob, Florian and Haeb-Umbach, Reinhold}, year={2016} }","mla":"Jacob, Florian, and Reinhold Haeb-Umbach. “On the Bias of Direction of Arrival Estimation Using Linear Microphone Arrays.” <i>12. ITG Fachtagung Sprachkommunikation (ITG 2016)</i>, 2016.","ieee":"F. Jacob and R. Haeb-Umbach, “On the Bias of Direction of Arrival Estimation Using Linear Microphone Arrays,” in <i>12. ITG Fachtagung Sprachkommunikation (ITG 2016)</i>, 2016.","chicago":"Jacob, Florian, and Reinhold Haeb-Umbach. “On the Bias of Direction of Arrival Estimation Using Linear Microphone Arrays.” In <i>12. ITG Fachtagung Sprachkommunikation (ITG 2016)</i>, 2016.","ama":"Jacob F, Haeb-Umbach R. On the Bias of Direction of Arrival Estimation Using Linear Microphone Arrays. In: <i>12. ITG Fachtagung Sprachkommunikation (ITG 2016)</i>. ; 2016."},"year":"2016","related_material":{"link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2016/JacobHaeb_ITG2016_poster.pdf","relation":"supplementary_material","description":"Poster"}]},"language":[{"iso":"eng"}],"user_id":"44006","department":[{"_id":"54"}],"_id":"11829","status":"public","abstract":[{"lang":"eng","text":"This contribution investigates Direction of Arrival (DoA) estimation using linearly arranged microphone arrays. We are going to develop a model for the DoA estimation error in a reverberant scenario and show the existence of a bias, that is a consequence of the linear arrangement and limited field of view (FoV) bias: First, the limited FoV leading to a clipping of the measurements, and, second, the angular distribution of the signal energy of the reflections being non-uniform. Since both issues are a consequence of the linear arrangement of the sensors, the bias arises largely independent of the kind of DoA estimator. The experimental evaluation demonstrates the existence of the bias for a selected number of DoA estimation methods and proves that the prediction from the developed theoretical model matches the simulation results."}],"type":"conference","publication":"12. ITG Fachtagung Sprachkommunikation (ITG 2016)"},{"_id":"11834","department":[{"_id":"54"}],"user_id":"44006","language":[{"iso":"eng"}],"publication":"Computer Speech and Language","type":"conference","abstract":[{"lang":"eng","text":"We present a system for the 4th CHiME challenge which significantly increases the performance for all three tracks with respect to the provided baseline system. The front-end uses a bi-directional Long Short-Term Memory (BLSTM)-based neural network to estimate signal statistics. These then steer a Generalized Eigenvalue beamformer. The back-end consists of a 22 layer deep Wide Residual Network and two extra BLSTM layers. Working on a whole utterance instead of frames allows us to refine Batch-Normalization. We also train our own BLSTM-based language model. Adding a discriminative speaker adaptation leads to further gains. The final system achieves a word error rate on the six channel real test data of 3.48%. For the two channel track we achieve 5.96% and for the one channel track 9.34%. This is the best reported performance on the challenge achieved by a single system, i.e., a configuration, which does not combine multiple systems. At the same time, our system is independent of the microphone configuration. We can thus use the same components for all three tracks."}],"status":"public","oa":"1","date_updated":"2022-01-06T06:51:11Z","date_created":"2019-07-12T05:29:09Z","author":[{"last_name":"Heymann","full_name":"Heymann, Jahn","id":"9168","first_name":"Jahn"},{"first_name":"Lukas","id":"11213","full_name":"Drude, Lukas","last_name":"Drude"},{"full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach","first_name":"Reinhold"}],"title":"Wide Residual BLSTM Network with Discriminative Speaker Adaptation for Robust Speech Recognition","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2016/chime4_upbonly_paper.pdf","open_access":"1"}],"related_material":{"link":[{"relation":"supplementary_material","description":"Poster","url":"https://groups.uni-paderborn.de/nt/pubs/2016/chime4_upbonly_poster.pdf"}]},"year":"2016","citation":{"apa":"Heymann, J., Drude, L., &#38; Haeb-Umbach, R. (2016). Wide Residual BLSTM Network with Discriminative Speaker Adaptation for Robust Speech Recognition. In <i>Computer Speech and Language</i>.","mla":"Heymann, Jahn, et al. “Wide Residual BLSTM Network with Discriminative Speaker Adaptation for Robust Speech Recognition.” <i>Computer Speech and Language</i>, 2016.","short":"J. Heymann, L. Drude, R. Haeb-Umbach, in: Computer Speech and Language, 2016.","bibtex":"@inproceedings{Heymann_Drude_Haeb-Umbach_2016, title={Wide Residual BLSTM Network with Discriminative Speaker Adaptation for Robust Speech Recognition}, booktitle={Computer Speech and Language}, author={Heymann, Jahn and Drude, Lukas and Haeb-Umbach, Reinhold}, year={2016} }","chicago":"Heymann, Jahn, Lukas Drude, and Reinhold Haeb-Umbach. “Wide Residual BLSTM Network with Discriminative Speaker Adaptation for Robust Speech Recognition.” In <i>Computer Speech and Language</i>, 2016.","ieee":"J. Heymann, L. Drude, and R. Haeb-Umbach, “Wide Residual BLSTM Network with Discriminative Speaker Adaptation for Robust Speech Recognition,” in <i>Computer Speech and Language</i>, 2016.","ama":"Heymann J, Drude L, Haeb-Umbach R. Wide Residual BLSTM Network with Discriminative Speaker Adaptation for Robust Speech Recognition. In: <i>Computer Speech and Language</i>. ; 2016."}},{"year":"2016","citation":{"ieee":"K. Kinoshita <i>et al.</i>, “A summary of the REVERB challenge: state-of-the-art and remaining challenges in reverberant speech processing research,” <i>EURASIP Journal on Advances in Signal Processing</i>, 2016.","chicago":"Kinoshita, Keisuke, Marc Delcroix, Sharon Gannot, Emanuel A. P. Habets, Reinhold Haeb-Umbach, Walter Kellermann, Volker Leutnant, et al. “A Summary of the REVERB Challenge: State-of-the-Art and Remaining Challenges in Reverberant Speech Processing Research.” <i>EURASIP Journal on Advances in Signal Processing</i>, 2016.","ama":"Kinoshita K, Delcroix M, Gannot S, et al. A summary of the REVERB challenge: state-of-the-art and remaining challenges in reverberant speech processing research. <i>EURASIP Journal on Advances in Signal Processing</i>. 2016.","bibtex":"@article{Kinoshita_Delcroix_Gannot_Habets_Haeb-Umbach_Kellermann_Leutnant_Maas_Nakatani_Raj_et al._2016, title={A summary of the REVERB challenge: state-of-the-art and remaining challenges in reverberant speech processing research}, journal={EURASIP Journal on Advances in Signal Processing}, author={Kinoshita, Keisuke and Delcroix, Marc and Gannot, Sharon and Habets, Emanuel A. P. and Haeb-Umbach, Reinhold and Kellermann, Walter and Leutnant, Volker and Maas, Roland and Nakatani, Tomohiro and Raj, Bhiksha and et al.}, year={2016} }","mla":"Kinoshita, Keisuke, et al. “A Summary of the REVERB Challenge: State-of-the-Art and Remaining Challenges in Reverberant Speech Processing Research.” <i>EURASIP Journal on Advances in Signal Processing</i>, 2016.","short":"K. Kinoshita, M. Delcroix, S. Gannot, E.A.P. Habets, R. Haeb-Umbach, W. Kellermann, V. Leutnant, R. Maas, T. Nakatani, B. Raj, A. Sehr, T. Yoshioka, EURASIP Journal on Advances in Signal Processing (2016).","apa":"Kinoshita, K., Delcroix, M., Gannot, S., Habets, E. A. P., Haeb-Umbach, R., Kellermann, W., … Yoshioka, T. (2016). A summary of the REVERB challenge: state-of-the-art and remaining challenges in reverberant speech processing research. <i>EURASIP Journal on Advances in Signal Processing</i>."},"oa":"1","date_updated":"2022-01-06T06:51:11Z","author":[{"first_name":"Keisuke","last_name":"Kinoshita","full_name":"Kinoshita, Keisuke"},{"first_name":"Marc","last_name":"Delcroix","full_name":"Delcroix, Marc"},{"last_name":"Gannot","full_name":"Gannot, Sharon","first_name":"Sharon"},{"last_name":"Habets","full_name":"Habets, Emanuel A. P.","first_name":"Emanuel A. P."},{"first_name":"Reinhold","full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach"},{"first_name":"Walter","last_name":"Kellermann","full_name":"Kellermann, Walter"},{"first_name":"Volker","full_name":"Leutnant, Volker","last_name":"Leutnant"},{"last_name":"Maas","full_name":"Maas, Roland","first_name":"Roland"},{"full_name":"Nakatani, Tomohiro","last_name":"Nakatani","first_name":"Tomohiro"},{"first_name":"Bhiksha","last_name":"Raj","full_name":"Raj, Bhiksha"},{"first_name":"Armin","full_name":"Sehr, Armin","last_name":"Sehr"},{"last_name":"Yoshioka","full_name":"Yoshioka, Takuya","first_name":"Takuya"}],"date_created":"2019-07-12T05:29:16Z","title":"A summary of the REVERB challenge: state-of-the-art and remaining challenges in reverberant speech processing research","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2016/REVERB_summary.pdf","open_access":"1"}],"type":"journal_article","publication":"EURASIP Journal on Advances in Signal Processing","status":"public","_id":"11840","user_id":"44006","department":[{"_id":"54"}],"language":[{"iso":"eng"}]},{"user_id":"44006","department":[{"_id":"54"}],"_id":"11886","language":[{"iso":"eng"}],"keyword":["Acoustic sensors","Microphones","Portable computers","Smart phones","Wireless communication","Wireless sensor networks"],"type":"journal_article","publication":"IEEE Signal Processing Magazine","status":"public","abstract":[{"text":"Today, we are often surrounded by devices with one or more microphones, such as smartphones, laptops, and wireless microphones. If they are part of an acoustic sensor network, their distribution in the environment can be beneficially exploited for various speech processing tasks. However, applications like speaker localization, speaker tracking, and speech enhancement by beamforming avail themselves of the geometrical configuration of the sensors. Therefore, acoustic microphone geometry calibration has recently become a very active field of research. This article provides an application-oriented, comprehensive survey of existing methods for microphone position self-calibration, which will be categorized by the measurements they use and the scenarios they can calibrate. Selected methods will be evaluated comparatively with real-world recordings.","lang":"eng"}],"author":[{"first_name":"Axel","full_name":"Plinge, Axel","last_name":"Plinge"},{"first_name":"Florian","last_name":"Jacob","full_name":"Jacob, Florian"},{"first_name":"Reinhold","id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach"},{"last_name":"Fink","full_name":"Fink, Gernot A.","first_name":"Gernot A."}],"date_created":"2019-07-12T05:30:09Z","volume":33,"date_updated":"2022-01-06T06:51:11Z","doi":"10.1109/MSP.2016.2555198","title":"Acoustic Microphone Geometry Calibration: An overview and experimental evaluation of state-of-the-art algorithms","issue":"4","publication_identifier":{"issn":["1053-5888"]},"citation":{"apa":"Plinge, A., Jacob, F., Haeb-Umbach, R., &#38; Fink, G. A. (2016). Acoustic Microphone Geometry Calibration: An overview and experimental evaluation of state-of-the-art algorithms. <i>IEEE Signal Processing Magazine</i>, <i>33</i>(4), 14–29. <a href=\"https://doi.org/10.1109/MSP.2016.2555198\">https://doi.org/10.1109/MSP.2016.2555198</a>","bibtex":"@article{Plinge_Jacob_Haeb-Umbach_Fink_2016, title={Acoustic Microphone Geometry Calibration: An overview and experimental evaluation of state-of-the-art algorithms}, volume={33}, DOI={<a href=\"https://doi.org/10.1109/MSP.2016.2555198\">10.1109/MSP.2016.2555198</a>}, number={4}, journal={IEEE Signal Processing Magazine}, author={Plinge, Axel and Jacob, Florian and Haeb-Umbach, Reinhold and Fink, Gernot A.}, year={2016}, pages={14–29} }","mla":"Plinge, Axel, et al. “Acoustic Microphone Geometry Calibration: An Overview and Experimental Evaluation of State-of-the-Art Algorithms.” <i>IEEE Signal Processing Magazine</i>, vol. 33, no. 4, 2016, pp. 14–29, doi:<a href=\"https://doi.org/10.1109/MSP.2016.2555198\">10.1109/MSP.2016.2555198</a>.","short":"A. Plinge, F. Jacob, R. Haeb-Umbach, G.A. Fink, IEEE Signal Processing Magazine 33 (2016) 14–29.","ieee":"A. Plinge, F. Jacob, R. Haeb-Umbach, and G. A. Fink, “Acoustic Microphone Geometry Calibration: An overview and experimental evaluation of state-of-the-art algorithms,” <i>IEEE Signal Processing Magazine</i>, vol. 33, no. 4, pp. 14–29, 2016.","chicago":"Plinge, Axel, Florian Jacob, Reinhold Haeb-Umbach, and Gernot A. Fink. “Acoustic Microphone Geometry Calibration: An Overview and Experimental Evaluation of State-of-the-Art Algorithms.” <i>IEEE Signal Processing Magazine</i> 33, no. 4 (2016): 14–29. <a href=\"https://doi.org/10.1109/MSP.2016.2555198\">https://doi.org/10.1109/MSP.2016.2555198</a>.","ama":"Plinge A, Jacob F, Haeb-Umbach R, Fink GA. Acoustic Microphone Geometry Calibration: An overview and experimental evaluation of state-of-the-art algorithms. <i>IEEE Signal Processing Magazine</i>. 2016;33(4):14-29. doi:<a href=\"https://doi.org/10.1109/MSP.2016.2555198\">10.1109/MSP.2016.2555198</a>"},"page":"14-29","intvolume":"        33","year":"2016"},{"language":[{"iso":"eng"}],"department":[{"_id":"54"}],"user_id":"44006","_id":"11908","status":"public","abstract":[{"lang":"eng","text":"This paper describes automatic speech recognition (ASR) systems developed jointly by RWTH, UPB and FORTH for the 1ch, 2ch and 6ch track of the 4th CHiME Challenge. In the 2ch and 6ch tracks the final system output is obtained by a Confusion Network Combination (CNC) of multiple systems. The Acoustic Model (AM) is a deep neural network based on Bidirectional Long Short-Term Memory (BLSTM) units. The systems differ by front ends and training sets used for the acoustic training. The model for the 1ch track is trained without any preprocessing. For each front end we trained and evaluated individual acoustic models. We compare the ASR performance of different beamforming approaches: a conventional superdirective beamformer [1] and an MVDR beamformer as in [2], where the steering vector is estimated based on [3]. Furthermore we evaluated a BLSTM supported Generalized Eigenvalue beamformer using NN-GEV [4]. The back end is implemented using RWTH?s open-source toolkits RASR [5], RETURNN [6] and rwthlm [7]. We rescore lattices with a Long Short-Term Memory (LSTM) based language model. The overall best results are obtained by a system combination that includes the lattices from the system of UPB?s submission [8]. Our final submission scored second in each of the three tracks of the 4th CHiME Challenge."}],"publication":"Computer Speech and Language","type":"conference","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2016/chime4_rwthupbforth_paper.pdf","open_access":"1"}],"title":"The RWTH/UPB/FORTH System Combination for the 4th CHiME Challenge Evaluation","author":[{"first_name":"Tobias","full_name":"Menne, Tobias","last_name":"Menne"},{"full_name":"Heymann, Jahn","id":"9168","last_name":"Heymann","first_name":"Jahn"},{"full_name":"Alexandridis, Anastasios","last_name":"Alexandridis","first_name":"Anastasios"},{"last_name":"Irie","full_name":"Irie, Kazuki","first_name":"Kazuki"},{"first_name":"Albert","full_name":"Zeyer, Albert","last_name":"Zeyer"},{"first_name":"Markus","last_name":"Kitza","full_name":"Kitza, Markus"},{"last_name":"Golik","full_name":"Golik, Pavel","first_name":"Pavel"},{"first_name":"Ilia","full_name":"Kulikov, Ilia","last_name":"Kulikov"},{"last_name":"Drude","full_name":"Drude, Lukas","id":"11213","first_name":"Lukas"},{"last_name":"Schlüter","full_name":"Schlüter, Ralf","first_name":"Ralf"},{"full_name":"Ney, Hermann","last_name":"Ney","first_name":"Hermann"},{"last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242","first_name":"Reinhold"},{"first_name":"Athanasios","last_name":"Mouchtaris","full_name":"Mouchtaris, Athanasios"}],"date_created":"2019-07-12T05:30:35Z","oa":"1","date_updated":"2022-01-06T06:51:12Z","citation":{"apa":"Menne, T., Heymann, J., Alexandridis, A., Irie, K., Zeyer, A., Kitza, M., … Mouchtaris, A. (2016). The RWTH/UPB/FORTH System Combination for the 4th CHiME Challenge Evaluation. In <i>Computer Speech and Language</i>.","mla":"Menne, Tobias, et al. “The RWTH/UPB/FORTH System Combination for the 4th CHiME Challenge Evaluation.” <i>Computer Speech and Language</i>, 2016.","bibtex":"@inproceedings{Menne_Heymann_Alexandridis_Irie_Zeyer_Kitza_Golik_Kulikov_Drude_Schlüter_et al._2016, title={The RWTH/UPB/FORTH System Combination for the 4th CHiME Challenge Evaluation}, booktitle={Computer Speech and Language}, author={Menne, Tobias and Heymann, Jahn and Alexandridis, Anastasios and Irie, Kazuki and Zeyer, Albert and Kitza, Markus and Golik, Pavel and Kulikov, Ilia and Drude, Lukas and Schlüter, Ralf and et al.}, year={2016} }","short":"T. Menne, J. Heymann, A. Alexandridis, K. Irie, A. Zeyer, M. Kitza, P. Golik, I. Kulikov, L. Drude, R. Schlüter, H. Ney, R. Haeb-Umbach, A. Mouchtaris, in: Computer Speech and Language, 2016.","ieee":"T. Menne <i>et al.</i>, “The RWTH/UPB/FORTH System Combination for the 4th CHiME Challenge Evaluation,” in <i>Computer Speech and Language</i>, 2016.","chicago":"Menne, Tobias, Jahn Heymann, Anastasios Alexandridis, Kazuki Irie, Albert Zeyer, Markus Kitza, Pavel Golik, et al. “The RWTH/UPB/FORTH System Combination for the 4th CHiME Challenge Evaluation.” In <i>Computer Speech and Language</i>, 2016.","ama":"Menne T, Heymann J, Alexandridis A, et al. The RWTH/UPB/FORTH System Combination for the 4th CHiME Challenge Evaluation. In: <i>Computer Speech and Language</i>. ; 2016."},"year":"2016"},{"language":[{"iso":"eng"}],"_id":"11920","department":[{"_id":"54"}],"user_id":"44006","abstract":[{"lang":"eng","text":"In this paper we demonstrate an algorithm to learn words from speech using non-parametric Bayesian hierarchical models in an unsupervised setting. We exploit the assumption of a hierarchical structure of speech, namely the formation of spoken words as a sequence of phonemes. We employ the Nested Hierarchical Pitman-Yor Language Model, which allows an a priori unknown and possibly unlimited number of words. We assume the n-gram probabilities of words, the m-gram probabilities of phoneme sequences in words and the phoneme sequences of the words themselves as latent variables to be learned. We evaluate the algorithm on a cross language task using an existing speech recognizer trained on English speech to decode speech in the Xitsonga language supplied for the 2015 ZeroSpeech challenge. We apply the learning algorithm on the resulting phoneme graphs and achieve the highest token precision and F score compared to present systems."}],"status":"public","publication":"38th German Conference on Pattern Recognition (GCPR 2016)","type":"conference","title":"Unsupervised Word Discovery from Speech using Bayesian Hierarchical Models","main_file_link":[{"open_access":"1","url":"https://groups.uni-paderborn.de/nt/pubs/2016/WaHa16.pdf"}],"date_updated":"2022-01-06T06:51:12Z","oa":"1","date_created":"2019-07-12T05:30:49Z","author":[{"last_name":"Walter","full_name":"Walter, Oliver","first_name":"Oliver"},{"first_name":"Reinhold","id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach"}],"year":"2016","citation":{"ama":"Walter O, Haeb-Umbach R. Unsupervised Word Discovery from Speech using Bayesian Hierarchical Models. In: <i>38th German Conference on Pattern Recognition (GCPR 2016)</i>. ; 2016.","ieee":"O. Walter and R. Haeb-Umbach, “Unsupervised Word Discovery from Speech using Bayesian Hierarchical Models,” in <i>38th German Conference on Pattern Recognition (GCPR 2016)</i>, 2016.","chicago":"Walter, Oliver, and Reinhold Haeb-Umbach. “Unsupervised Word Discovery from Speech Using Bayesian Hierarchical Models.” In <i>38th German Conference on Pattern Recognition (GCPR 2016)</i>, 2016.","mla":"Walter, Oliver, and Reinhold Haeb-Umbach. “Unsupervised Word Discovery from Speech Using Bayesian Hierarchical Models.” <i>38th German Conference on Pattern Recognition (GCPR 2016)</i>, 2016.","short":"O. Walter, R. Haeb-Umbach, in: 38th German Conference on Pattern Recognition (GCPR 2016), 2016.","bibtex":"@inproceedings{Walter_Haeb-Umbach_2016, title={Unsupervised Word Discovery from Speech using Bayesian Hierarchical Models}, booktitle={38th German Conference on Pattern Recognition (GCPR 2016)}, author={Walter, Oliver and Haeb-Umbach, Reinhold}, year={2016} }","apa":"Walter, O., &#38; Haeb-Umbach, R. (2016). Unsupervised Word Discovery from Speech using Bayesian Hierarchical Models. In <i>38th German Conference on Pattern Recognition (GCPR 2016)</i>."},"related_material":{"link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2016/WaHa16_Talk.pdf","relation":"supplementary_material","description":"Presentation"}]}},{"_id":"11890","user_id":"460","department":[{"_id":"54"}],"language":[{"iso":"eng"}],"type":"conference","publication":"24th European Signal Processing Conference (EUSIPCO 2016)","abstract":[{"lang":"eng","text":"In this paper we study the influence of directional radio patterns of Bluetooth low energy (BLE) beacons on smartphone localization accuracy and beacon network planning. A two-dimensional model of the power emission characteristic is derived from measurements of the radiation pattern of BLE beacons carried out in an RF chamber. The Cramer-Rao lower bound (CRLB) for position estimation is then derived for this directional power emission model. With this lower bound on the RMS positioning error the coverage of different beacon network configurations can be evaluated. For near-optimal network planing an evolutionary optimization algorithm for finding the best beacon placement is presented."}],"status":"public","date_updated":"2023-10-26T08:11:52Z","oa":"1","date_created":"2019-07-12T05:30:14Z","author":[{"first_name":"Joerg","last_name":"Schmalenstroeer","id":"460","full_name":"Schmalenstroeer, Joerg"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold"}],"title":"Investigations into Bluetooth Low Energy Localization Precision Limits","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2016/SchHaeb16.pdf","open_access":"1"}],"quality_controlled":"1","related_material":{"link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2016/SchHaeb16_Poster.pdf","relation":"supplementary_material","description":"Poster"}]},"year":"2016","citation":{"ama":"Schmalenstroeer J, Haeb-Umbach R. Investigations into Bluetooth Low Energy Localization Precision Limits. In: <i>24th European Signal Processing Conference (EUSIPCO 2016)</i>. ; 2016.","ieee":"J. Schmalenstroeer and R. Haeb-Umbach, “Investigations into Bluetooth Low Energy Localization Precision Limits,” 2016.","chicago":"Schmalenstroeer, Joerg, and Reinhold Haeb-Umbach. “Investigations into Bluetooth Low Energy Localization Precision Limits.” In <i>24th European Signal Processing Conference (EUSIPCO 2016)</i>, 2016.","apa":"Schmalenstroeer, J., &#38; Haeb-Umbach, R. (2016). Investigations into Bluetooth Low Energy Localization Precision Limits. <i>24th European Signal Processing Conference (EUSIPCO 2016)</i>.","mla":"Schmalenstroeer, Joerg, and Reinhold Haeb-Umbach. “Investigations into Bluetooth Low Energy Localization Precision Limits.” <i>24th European Signal Processing Conference (EUSIPCO 2016)</i>, 2016.","bibtex":"@inproceedings{Schmalenstroeer_Haeb-Umbach_2016, title={Investigations into Bluetooth Low Energy Localization Precision Limits}, booktitle={24th European Signal Processing Conference (EUSIPCO 2016)}, author={Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}, year={2016} }","short":"J. Schmalenstroeer, R. Haeb-Umbach, in: 24th European Signal Processing Conference (EUSIPCO 2016), 2016."}},{"date_updated":"2022-01-06T06:51:08Z","oa":"1","author":[{"first_name":"Aleksej","full_name":"Chinaev, Aleksej","last_name":"Chinaev"},{"last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242","first_name":"Reinhold"}],"date_created":"2019-07-12T05:27:19Z","title":"On Optimal Smoothing in Minimum Statistics Based Noise Tracking","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2015/ChHa15.pdf","open_access":"1"}],"related_material":{"link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2015/ChHa15_Poster.pdf","relation":"supplementary_material","description":"Poster"}]},"year":"2015","page":"1785-1789","citation":{"apa":"Chinaev, A., &#38; Haeb-Umbach, R. (2015). On Optimal Smoothing in Minimum Statistics Based Noise Tracking. In <i>Interspeech 2015</i> (pp. 1785–1789).","bibtex":"@inproceedings{Chinaev_Haeb-Umbach_2015, title={On Optimal Smoothing in Minimum Statistics Based Noise Tracking}, booktitle={Interspeech 2015}, author={Chinaev, Aleksej and Haeb-Umbach, Reinhold}, year={2015}, pages={1785–1789} }","mla":"Chinaev, Aleksej, and Reinhold Haeb-Umbach. “On Optimal Smoothing in Minimum Statistics Based Noise Tracking.” <i>Interspeech 2015</i>, 2015, pp. 1785–89.","short":"A. Chinaev, R. Haeb-Umbach, in: Interspeech 2015, 2015, pp. 1785–1789.","ieee":"A. Chinaev and R. Haeb-Umbach, “On Optimal Smoothing in Minimum Statistics Based Noise Tracking,” in <i>Interspeech 2015</i>, 2015, pp. 1785–1789.","chicago":"Chinaev, Aleksej, and Reinhold Haeb-Umbach. “On Optimal Smoothing in Minimum Statistics Based Noise Tracking.” In <i>Interspeech 2015</i>, 1785–89, 2015.","ama":"Chinaev A, Haeb-Umbach R. On Optimal Smoothing in Minimum Statistics Based Noise Tracking. In: <i>Interspeech 2015</i>. ; 2015:1785-1789."},"_id":"11739","department":[{"_id":"54"}],"user_id":"44006","keyword":["speech enhancement","noise tracking","optimal smoothing"],"language":[{"iso":"eng"}],"publication":"Interspeech 2015","type":"conference","abstract":[{"text":"Noise tracking is an important component of speech enhancement algorithms. Of the many noise trackers proposed, Minimum Statistics (MS) is a particularly popular one due to its simple parameterization and at the same time excellent performance. In this paper we propose to further reduce the number of MS parameters by giving an alternative derivation of an optimal smoothing constant. At the same time the noise tracking performance is improved as is demonstrated by experiments employing speech degraded by various noise types and at different SNR values.","lang":"eng"}],"status":"public"},{"status":"public","abstract":[{"text":"We present a semantic analysis technique for spoken input using Markov Logic Networks (MLNs). MLNs combine graphical models with first-order logic. They areparticularly suitable for providing inference in the presence of inconsistent and incomplete data, which are typical of an automatic speech recognizer's (ASR) output in the presence of degraded speech. The target application is a speech interface to a home automation system to be operated by people with speech impairments, where the ASR output is particularly noisy. In order to cater for dysarthric speech with non-canonical phoneme realizations, acoustic representations of the input speech are learned in an unsupervised fashion. While training data transcripts are not required for the acoustic model training, the MLN training requires supervision, however, at a rather loose and abstract level. Results on two databases, one of them for dysarthric speech, show that MLN-based semantic analysis clearly outperforms baseline approaches employing non-negative matrix factorization, multinomial naive Bayes models, or support vector machines.","lang":"eng"}],"publication":"INTERSPEECH 2015","type":"conference","language":[{"iso":"eng"}],"department":[{"_id":"54"}],"user_id":"44006","_id":"11748","citation":{"apa":"Despotovic, V., Walter, O., &#38; Haeb-Umbach, R. (2015). Semantic Analysis of Spoken Input using Markov Logic Networks. In <i>INTERSPEECH 2015</i>.","bibtex":"@inproceedings{Despotovic_Walter_Haeb-Umbach_2015, title={Semantic Analysis of Spoken Input using Markov Logic Networks}, booktitle={INTERSPEECH 2015}, author={Despotovic, Vladimir and Walter, Oliver and Haeb-Umbach, Reinhold}, year={2015} }","short":"V. Despotovic, O. Walter, R. Haeb-Umbach, in: INTERSPEECH 2015, 2015.","mla":"Despotovic, Vladimir, et al. “Semantic Analysis of Spoken Input Using Markov Logic Networks.” <i>INTERSPEECH 2015</i>, 2015.","ieee":"V. Despotovic, O. Walter, and R. Haeb-Umbach, “Semantic Analysis of Spoken Input using Markov Logic Networks,” in <i>INTERSPEECH 2015</i>, 2015.","chicago":"Despotovic, Vladimir, Oliver Walter, and Reinhold Haeb-Umbach. “Semantic Analysis of Spoken Input Using Markov Logic Networks.” In <i>INTERSPEECH 2015</i>, 2015.","ama":"Despotovic V, Walter O, Haeb-Umbach R. Semantic Analysis of Spoken Input using Markov Logic Networks. In: <i>INTERSPEECH 2015</i>. ; 2015."},"year":"2015","related_material":{"link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2015/DeWaHa_Poster.pdf","relation":"supplementary_material","description":"Poster"}]},"main_file_link":[{"open_access":"1","url":"https://groups.uni-paderborn.de/nt/pubs/2015/DeWaHa.pdf"}],"title":"Semantic Analysis of Spoken Input using Markov Logic Networks","date_created":"2019-07-12T05:27:30Z","author":[{"full_name":"Despotovic, Vladimir","last_name":"Despotovic","first_name":"Vladimir"},{"full_name":"Walter, Oliver","last_name":"Walter","first_name":"Oliver"},{"last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold","first_name":"Reinhold"}],"date_updated":"2022-01-06T06:51:08Z","oa":"1"},{"title":"DOA-Estimation based on a Complex Watson Kernel Method","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2015/DrJaHa15.pdf","open_access":"1"}],"oa":"1","date_updated":"2022-01-06T06:51:08Z","author":[{"first_name":"Lukas","last_name":"Drude","id":"11213","full_name":"Drude, Lukas"},{"first_name":"Florian","last_name":"Jacob","full_name":"Jacob, Florian"},{"id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach","first_name":"Reinhold"}],"date_created":"2019-07-12T05:27:38Z","year":"2015","citation":{"ama":"Drude L, Jacob F, Haeb-Umbach R. DOA-Estimation based on a Complex Watson Kernel Method. In: <i>23th European Signal Processing Conference (EUSIPCO 2015)</i>. ; 2015.","ieee":"L. Drude, F. Jacob, and R. Haeb-Umbach, “DOA-Estimation based on a Complex Watson Kernel Method,” in <i>23th European Signal Processing Conference (EUSIPCO 2015)</i>, 2015.","chicago":"Drude, Lukas, Florian Jacob, and Reinhold Haeb-Umbach. “DOA-Estimation Based on a Complex Watson Kernel Method.” In <i>23th European Signal Processing Conference (EUSIPCO 2015)</i>, 2015.","bibtex":"@inproceedings{Drude_Jacob_Haeb-Umbach_2015, title={DOA-Estimation based on a Complex Watson Kernel Method}, booktitle={23th European Signal Processing Conference (EUSIPCO 2015)}, author={Drude, Lukas and Jacob, Florian and Haeb-Umbach, Reinhold}, year={2015} }","short":"L. Drude, F. Jacob, R. Haeb-Umbach, in: 23th European Signal Processing Conference (EUSIPCO 2015), 2015.","mla":"Drude, Lukas, et al. “DOA-Estimation Based on a Complex Watson Kernel Method.” <i>23th European Signal Processing Conference (EUSIPCO 2015)</i>, 2015.","apa":"Drude, L., Jacob, F., &#38; Haeb-Umbach, R. (2015). DOA-Estimation based on a Complex Watson Kernel Method. In <i>23th European Signal Processing Conference (EUSIPCO 2015)</i>."},"related_material":{"link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2015/DrJaHa15_Presentation.pdf","description":"Presentation","relation":"supplementary_material"}]},"language":[{"iso":"eng"}],"_id":"11755","department":[{"_id":"54"}],"user_id":"44006","abstract":[{"text":"This contribution presents a Direction of Arrival (DoA) estimation algorithm based on the complex Watson distribution to incorporate both phase and level differences of captured micro- phone array signals. The derived algorithm is reviewed in the context of the Generalized State Coherence Transform (GSCT) on the one hand and a kernel density estimation method on the other hand. A thorough simulative evaluation yields insight into parameter selection and provides details on the performance for both directional and omni-directional microphones. A comparison to the well known Steered Response Power with Phase Transform (SRP-PHAT) algorithm and a state of the art DoA estimator which explicitly accounts for aliasing, shows in particular the advantages of presented algorithm if inter-sensor level differences are indicative of the DoA, as with directional microphones.","lang":"eng"}],"status":"public","publication":"23th European Signal Processing Conference (EUSIPCO 2015)","type":"conference"},{"citation":{"ieee":"J. Heymann, L. Drude, A. Chinaev, and R. Haeb-Umbach, “BLSTM supported GEV Beamformer Front-End for the 3RD CHiME Challenge,” in <i>Automatic Speech Recognition and Understanding Workshop (ASRU 2015)</i>, 2015.","chicago":"Heymann, Jahn, Lukas Drude, Aleksej Chinaev, and Reinhold Haeb-Umbach. “BLSTM Supported GEV Beamformer Front-End for the 3RD CHiME Challenge.” In <i>Automatic Speech Recognition and Understanding Workshop (ASRU 2015)</i>, 2015.","ama":"Heymann J, Drude L, Chinaev A, Haeb-Umbach R. BLSTM supported GEV Beamformer Front-End for the 3RD CHiME Challenge. In: <i>Automatic Speech Recognition and Understanding Workshop (ASRU 2015)</i>. ; 2015.","apa":"Heymann, J., Drude, L., Chinaev, A., &#38; Haeb-Umbach, R. (2015). BLSTM supported GEV Beamformer Front-End for the 3RD CHiME Challenge. In <i>Automatic Speech Recognition and Understanding Workshop (ASRU 2015)</i>.","bibtex":"@inproceedings{Heymann_Drude_Chinaev_Haeb-Umbach_2015, title={BLSTM supported GEV Beamformer Front-End for the 3RD CHiME Challenge}, booktitle={Automatic Speech Recognition and Understanding Workshop (ASRU 2015)}, author={Heymann, Jahn and Drude, Lukas and Chinaev, Aleksej and Haeb-Umbach, Reinhold}, year={2015} }","mla":"Heymann, Jahn, et al. “BLSTM Supported GEV Beamformer Front-End for the 3RD CHiME Challenge.” <i>Automatic Speech Recognition and Understanding Workshop (ASRU 2015)</i>, 2015.","short":"J. Heymann, L. Drude, A. Chinaev, R. Haeb-Umbach, in: Automatic Speech Recognition and Understanding Workshop (ASRU 2015), 2015."},"year":"2015","title":"BLSTM supported GEV Beamformer Front-End for the 3RD CHiME Challenge","author":[{"first_name":"Jahn","full_name":"Heymann, Jahn","id":"9168","last_name":"Heymann"},{"first_name":"Lukas","id":"11213","full_name":"Drude, Lukas","last_name":"Drude"},{"first_name":"Aleksej","last_name":"Chinaev","full_name":"Chinaev, Aleksej"},{"last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold","first_name":"Reinhold"}],"date_created":"2019-07-12T05:28:41Z","date_updated":"2022-01-06T06:51:09Z","status":"public","publication":"Automatic Speech Recognition and Understanding Workshop (ASRU 2015)","type":"conference","language":[{"iso":"eng"}],"department":[{"_id":"54"}],"user_id":"44006","_id":"11810"},{"department":[{"_id":"54"}],"user_id":"44006","_id":"11813","language":[{"iso":"eng"}],"keyword":["codecs","signal denoising","speech recognition","Bayesian feature enhancement","denoising autoencoder","reverberant ASR","single-channel speech recognition","speaker to microphone distances","unsupervised adaptation","Adaptation models","Noise reduction","Reverberation","Speech","Speech recognition","Training","deep neuronal networks","denoising autoencoder","feature enhancement","robust speech recognition"],"publication":"Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on","type":"conference","status":"public","abstract":[{"lang":"eng","text":"The parametric Bayesian Feature Enhancement (BFE) and a datadriven Denoising Autoencoder (DA) both bring performance gains in severe single-channel speech recognition conditions. The first can be adjusted to different conditions by an appropriate parameter setting, while the latter needs to be trained on conditions similar to the ones expected at decoding time, making it vulnerable to a mismatch between training and test conditions. We use a DNN backend and study reverberant ASR under three types of mismatch conditions: different room reverberation times, different speaker to microphone distances and the difference between artificially reverberated data and the recordings in a reverberant environment. We show that for these mismatch conditions BFE can provide the targets for a DA. This unsupervised adaptation provides a performance gain over the direct use of BFE and even enables to compensate for the mismatch of real and simulated reverberant data."}],"author":[{"full_name":"Heymann, Jahn","id":"9168","last_name":"Heymann","first_name":"Jahn"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242"},{"first_name":"P.","last_name":"Golik","full_name":"Golik, P."},{"first_name":"R.","full_name":"Schlueter, R.","last_name":"Schlueter"}],"date_created":"2019-07-12T05:28:45Z","date_updated":"2022-01-06T06:51:09Z","oa":"1","doi":"10.1109/ICASSP.2015.7178933","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2015/hey_icassp_2015.pdf","open_access":"1"}],"title":"Unsupervised adaptation of a denoising autoencoder by Bayesian Feature Enhancement for reverberant asr under mismatch conditions","page":"5053-5057","citation":{"ama":"Heymann J, Haeb-Umbach R, Golik P, Schlueter R. Unsupervised adaptation of a denoising autoencoder by Bayesian Feature Enhancement for reverberant asr under mismatch conditions. In: <i>Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference On</i>. ; 2015:5053-5057. doi:<a href=\"https://doi.org/10.1109/ICASSP.2015.7178933\">10.1109/ICASSP.2015.7178933</a>","chicago":"Heymann, Jahn, Reinhold Haeb-Umbach, P. Golik, and R. Schlueter. “Unsupervised Adaptation of a Denoising Autoencoder by Bayesian Feature Enhancement for Reverberant Asr under Mismatch Conditions.” In <i>Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference On</i>, 5053–57, 2015. <a href=\"https://doi.org/10.1109/ICASSP.2015.7178933\">https://doi.org/10.1109/ICASSP.2015.7178933</a>.","ieee":"J. Heymann, R. Haeb-Umbach, P. Golik, and R. Schlueter, “Unsupervised adaptation of a denoising autoencoder by Bayesian Feature Enhancement for reverberant asr under mismatch conditions,” in <i>Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on</i>, 2015, pp. 5053–5057.","apa":"Heymann, J., Haeb-Umbach, R., Golik, P., &#38; Schlueter, R. (2015). Unsupervised adaptation of a denoising autoencoder by Bayesian Feature Enhancement for reverberant asr under mismatch conditions. In <i>Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on</i> (pp. 5053–5057). <a href=\"https://doi.org/10.1109/ICASSP.2015.7178933\">https://doi.org/10.1109/ICASSP.2015.7178933</a>","short":"J. Heymann, R. Haeb-Umbach, P. Golik, R. Schlueter, in: Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference On, 2015, pp. 5053–5057.","bibtex":"@inproceedings{Heymann_Haeb-Umbach_Golik_Schlueter_2015, title={Unsupervised adaptation of a denoising autoencoder by Bayesian Feature Enhancement for reverberant asr under mismatch conditions}, DOI={<a href=\"https://doi.org/10.1109/ICASSP.2015.7178933\">10.1109/ICASSP.2015.7178933</a>}, booktitle={Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on}, author={Heymann, Jahn and Haeb-Umbach, Reinhold and Golik, P. and Schlueter, R.}, year={2015}, pages={5053–5057} }","mla":"Heymann, Jahn, et al. “Unsupervised Adaptation of a Denoising Autoencoder by Bayesian Feature Enhancement for Reverberant Asr under Mismatch Conditions.” <i>Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference On</i>, 2015, pp. 5053–57, doi:<a href=\"https://doi.org/10.1109/ICASSP.2015.7178933\">10.1109/ICASSP.2015.7178933</a>."},"year":"2015"},{"_id":"11830","user_id":"44006","department":[{"_id":"54"}],"language":[{"iso":"eng"}],"type":"journal_article","publication":"ArXiv e-prints","abstract":[{"lang":"eng","text":"Joint audio-visual speaker tracking requires that the locations of microphones and cameras are known and that they are given in a common coordinate system. Sensor self-localization algorithms, however, are usually separately developed for either the acoustic or the visual modality and return their positions in a modality specific coordinate system, often with an unknown rotation, scaling and translation between the two. In this paper we propose two techniques to determine the positions of acoustic sensors in a common coordinate system, based on audio-visual correlates, i.e., events that are localized by both, microphones and cameras separately. The first approach maps the output of an acoustic self-calibration algorithm by estimating rotation, scale and translation to the visual coordinate system, while the second solves a joint system of equations with acoustic and visual directions of arrival as input. The evaluation of the two strategies reveals that joint calibration outperforms the mapping approach and achieves an overall calibration error of 0.20m even in reverberant environments."}],"status":"public","oa":"1","date_updated":"2022-01-06T06:51:11Z","author":[{"first_name":"Florian","full_name":"Jacob, Florian","last_name":"Jacob"},{"first_name":"Reinhold","full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach"}],"date_created":"2019-07-12T05:29:05Z","title":"Absolute Geometry Calibration of Distributed Microphone Arrays in an Audio-Visual Sensor Network","main_file_link":[{"open_access":"1","url":"https://groups.uni-paderborn.de/nt/pubs/2015/JaHa2015.pdf"}],"year":"2015","citation":{"ama":"Jacob F, Haeb-Umbach R. Absolute Geometry Calibration of Distributed Microphone Arrays in an Audio-Visual Sensor Network. <i>ArXiv e-prints</i>. 2015.","chicago":"Jacob, Florian, and Reinhold Haeb-Umbach. “Absolute Geometry Calibration of Distributed Microphone Arrays in an Audio-Visual Sensor Network.” <i>ArXiv E-Prints</i>, 2015.","ieee":"F. Jacob and R. Haeb-Umbach, “Absolute Geometry Calibration of Distributed Microphone Arrays in an Audio-Visual Sensor Network,” <i>ArXiv e-prints</i>, 2015.","bibtex":"@article{Jacob_Haeb-Umbach_2015, title={Absolute Geometry Calibration of Distributed Microphone Arrays in an Audio-Visual Sensor Network}, journal={ArXiv e-prints}, author={Jacob, Florian and Haeb-Umbach, Reinhold}, year={2015} }","short":"F. Jacob, R. Haeb-Umbach, ArXiv E-Prints (2015).","mla":"Jacob, Florian, and Reinhold Haeb-Umbach. “Absolute Geometry Calibration of Distributed Microphone Arrays in an Audio-Visual Sensor Network.” <i>ArXiv E-Prints</i>, 2015.","apa":"Jacob, F., &#38; Haeb-Umbach, R. (2015). Absolute Geometry Calibration of Distributed Microphone Arrays in an Audio-Visual Sensor Network. <i>ArXiv E-Prints</i>."}},{"related_material":{"link":[{"relation":"supplementary_material","description":"Sample-Chapter","url":"https://groups.uni-paderborn.de/nt/pubs/2015/RASR_Chap5.pdf"},{"url":"http://store.elsevier.com/9780128023983","relation":"supplementary_material","description":"Store"}]},"year":"2015","citation":{"ama":"Li J, Deng L, Haeb-Umbach R, Gong Y. <i>Robust Automatic Speech Recognition</i>. Elsevier; 2015.","chicago":"Li, Jinyu, Li Deng, Reinhold Haeb-Umbach, and Y. Gong. <i>Robust Automatic Speech Recognition</i>. Elsevier, 2015.","ieee":"J. Li, L. Deng, R. Haeb-Umbach, and Y. Gong, <i>Robust Automatic Speech Recognition</i>. Elsevier, 2015.","apa":"Li, J., Deng, L., Haeb-Umbach, R., &#38; Gong, Y. (2015). <i>Robust Automatic Speech Recognition</i>. Elsevier.","mla":"Li, Jinyu, et al. <i>Robust Automatic Speech Recognition</i>. Elsevier, 2015.","short":"J. Li, L. Deng, R. Haeb-Umbach, Y. Gong, Robust Automatic Speech Recognition, Elsevier, 2015.","bibtex":"@book{Li_Deng_Haeb-Umbach_Gong_2015, title={Robust Automatic Speech Recognition}, publisher={Elsevier}, author={Li, Jinyu and Deng, Li and Haeb-Umbach, Reinhold and Gong, Y.}, year={2015} }"},"publisher":"Elsevier","oa":"1","date_updated":"2022-01-06T06:51:11Z","date_created":"2019-07-12T05:29:49Z","author":[{"first_name":"Jinyu","full_name":"Li, Jinyu","last_name":"Li"},{"first_name":"Li","last_name":"Deng","full_name":"Deng, Li"},{"first_name":"Reinhold","id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach"},{"first_name":"Y.","full_name":"Gong, Y.","last_name":"Gong"}],"title":"Robust Automatic Speech Recognition","main_file_link":[{"open_access":"1","url":"http://store.elsevier.com/Robust-Automatic-Speech-Recognition/Jinyu-Li/isbn-9780128023983/"}],"type":"book","status":"public","_id":"11868","department":[{"_id":"54"}],"user_id":"44006","language":[{"iso":"eng"}]},{"abstract":[{"text":"Only a few studies exist on automatic emotion analysis of speech from children with Autism Spectrum Conditions (ASC). Out of these, some preliminary studies have recently focused on comparing the relevance of selected prosodic features against large sets of acoustic, spectral, and cepstral features; however, no study so far provided a comparison of performances across different languages. The present contribution aims to fill this white spot in the literature and provide insight by extensive evaluations carried out on three databases of prompted phrases collected in English, Swedish, and Hebrew, inducing nine emotion categories embedded in short-stories. The datasets contain speech of children with ASC and typically developing children under the same conditions. We evaluate automatic diagnosis and recognition of emotions in atypical childrens voice over the nine categories including binary valence/arousal discrimination.","lang":"eng"}],"status":"public","publication":"INTERSPEECH 2015","type":"conference","language":[{"iso":"eng"}],"_id":"11875","department":[{"_id":"54"}],"user_id":"44006","year":"2015","citation":{"ieee":"E. Marchi <i>et al.</i>, “Typicality and Emotion in the Voice of Children with Autism Spectrum Condition: Evidence Across Three Languages,” in <i>INTERSPEECH 2015</i>, 2015.","chicago":"Marchi, Erik, Bjoern Schuller, Simon Baron-Cohen, Ofer Golan, Sven Boelte, Prerna Arora, and Reinhold Haeb-Umbach. “Typicality and Emotion in the Voice of Children with Autism Spectrum Condition: Evidence Across Three Languages.” In <i>INTERSPEECH 2015</i>, 2015.","ama":"Marchi E, Schuller B, Baron-Cohen S, et al. Typicality and Emotion in the Voice of Children with Autism Spectrum Condition: Evidence Across Three Languages. In: <i>INTERSPEECH 2015</i>. ; 2015.","mla":"Marchi, Erik, et al. “Typicality and Emotion in the Voice of Children with Autism Spectrum Condition: Evidence Across Three Languages.” <i>INTERSPEECH 2015</i>, 2015.","bibtex":"@inproceedings{Marchi_Schuller_Baron-Cohen_Golan_Boelte_Arora_Haeb-Umbach_2015, title={Typicality and Emotion in the Voice of Children with Autism Spectrum Condition: Evidence Across Three Languages}, booktitle={INTERSPEECH 2015}, author={Marchi, Erik and Schuller, Bjoern and Baron-Cohen, Simon and Golan, Ofer and Boelte, Sven and Arora, Prerna and Haeb-Umbach, Reinhold}, year={2015} }","short":"E. Marchi, B. Schuller, S. Baron-Cohen, O. Golan, S. Boelte, P. Arora, R. Haeb-Umbach, in: INTERSPEECH 2015, 2015.","apa":"Marchi, E., Schuller, B., Baron-Cohen, S., Golan, O., Boelte, S., Arora, P., &#38; Haeb-Umbach, R. (2015). Typicality and Emotion in the Voice of Children with Autism Spectrum Condition: Evidence Across Three Languages. In <i>INTERSPEECH 2015</i>."},"title":"Typicality and Emotion in the Voice of Children with Autism Spectrum Condition: Evidence Across Three Languages","main_file_link":[{"open_access":"1","url":"https://groups.uni-paderborn.de/nt/pubs/2015/MaScBaOfSvPrHa.pdf"}],"oa":"1","date_updated":"2022-01-06T06:51:11Z","author":[{"first_name":"Erik","full_name":"Marchi, Erik","last_name":"Marchi"},{"first_name":"Bjoern","last_name":"Schuller","full_name":"Schuller, Bjoern"},{"last_name":"Baron-Cohen","full_name":"Baron-Cohen, Simon","first_name":"Simon"},{"first_name":"Ofer","full_name":"Golan, Ofer","last_name":"Golan"},{"first_name":"Sven","full_name":"Boelte, Sven","last_name":"Boelte"},{"last_name":"Arora","full_name":"Arora, Prerna","first_name":"Prerna"},{"last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242","first_name":"Reinhold"}],"date_created":"2019-07-12T05:29:57Z"},{"publication":"40th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2015)","type":"conference","abstract":[{"text":"In this paper we present a source counting algorithm to determine the number of speakers in a speech mixture. In our proposed method, we model the histogram of estimated directions of arrival with a nonparametric Bayesian infinite Gaussian mixture model. As an alternative to classical model selection criteria and to avoid specifying the maximum number of mixture components in advance, a Dirichlet process prior is employed over the mixture components. This allows to automatically determine the optimal number of mixture components that most probably model the observations. We demonstrate by experiments that this model outperforms a parametric approach using a finite Gaussian mixture model with a Dirichlet distribution prior over the mixture weights.","lang":"eng"}],"status":"public","_id":"11919","department":[{"_id":"54"}],"user_id":"44006","language":[{"iso":"eng"}],"related_material":{"link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2015/WaDrHa15_Poster.pdf","relation":"supplementary_material","description":"Poster"}]},"year":"2015","citation":{"short":"O. Walter, L. Drude, R. Haeb-Umbach, in: 40th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2015), 2015.","bibtex":"@inproceedings{Walter_Drude_Haeb-Umbach_2015, title={Source Counting in Speech Mixtures by Nonparametric Bayesian Estimation of an infinite Gaussian Mixture Model}, booktitle={40th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2015)}, author={Walter, Oliver and Drude, Lukas and Haeb-Umbach, Reinhold}, year={2015} }","mla":"Walter, Oliver, et al. “Source Counting in Speech Mixtures by Nonparametric Bayesian Estimation of an Infinite Gaussian Mixture Model.” <i>40th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2015)</i>, 2015.","apa":"Walter, O., Drude, L., &#38; Haeb-Umbach, R. (2015). Source Counting in Speech Mixtures by Nonparametric Bayesian Estimation of an infinite Gaussian Mixture Model. In <i>40th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2015)</i>.","ama":"Walter O, Drude L, Haeb-Umbach R. Source Counting in Speech Mixtures by Nonparametric Bayesian Estimation of an infinite Gaussian Mixture Model. In: <i>40th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2015)</i>. ; 2015.","ieee":"O. Walter, L. Drude, and R. Haeb-Umbach, “Source Counting in Speech Mixtures by Nonparametric Bayesian Estimation of an infinite Gaussian Mixture Model,” in <i>40th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2015)</i>, 2015.","chicago":"Walter, Oliver, Lukas Drude, and Reinhold Haeb-Umbach. “Source Counting in Speech Mixtures by Nonparametric Bayesian Estimation of an Infinite Gaussian Mixture Model.” In <i>40th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2015)</i>, 2015."},"date_updated":"2022-01-06T06:51:12Z","oa":"1","author":[{"full_name":"Walter, Oliver","last_name":"Walter","first_name":"Oliver"},{"first_name":"Lukas","last_name":"Drude","full_name":"Drude, Lukas","id":"11213"},{"last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold","first_name":"Reinhold"}],"date_created":"2019-07-12T05:30:47Z","title":"Source Counting in Speech Mixtures by Nonparametric Bayesian Estimation of an infinite Gaussian Mixture Model","main_file_link":[{"open_access":"1","url":"https://groups.uni-paderborn.de/nt/pubs/2015/WaDrHa15.pdf"}]}]