[{"_id":"11873","project":[{"_id":"52","name":"Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"department":[{"_id":"54"}],"user_id":"40767","language":[{"iso":"eng"}],"publication":"ITG 2018, Oldenburg, Germany","type":"conference","abstract":[{"lang":"eng","text":"NARA-WPE is a Python software package providing implementations of the weighted prediction error (WPE) dereverberation algorithm. WPE has been shown to be a highly effective tool for speech dereverberation, thus improving the perceptual quality of the signal and improving the recognition performance of downstream automatic speech recognition (ASR). It is suitable both for single-channel and multi-channel applications. The package consist of (1) a Numpy implementation which can easily be integrated into a custom Python toolchain, and (2) a TensorFlow implementation which allows integration into larger computational graphs and enables backpropagation through WPE to train more advanced front-ends. This package comprises of an iterative offline (batch) version, a block-online version, and a frame-online version which can be used in moderately low latency applications, e.g. digital speech assistants."}],"status":"public","oa":"1","date_updated":"2022-01-06T06:51:11Z","date_created":"2019-07-12T05:29:54Z","author":[{"first_name":"Lukas","full_name":"Drude, Lukas","id":"11213","last_name":"Drude"},{"full_name":"Heymann, Jahn","id":"9168","last_name":"Heymann","first_name":"Jahn"},{"first_name":"Christoph","full_name":"Boeddeker, Christoph","id":"40767","last_name":"Boeddeker"},{"last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold","first_name":"Reinhold"}],"title":"NARA-WPE: A Python package for weighted prediction error dereverberation in Numpy and Tensorflow for online and offline processing","main_file_link":[{"open_access":"1","url":"https://groups.uni-paderborn.de/nt/pubs/2018/ITG_2018_Drude_Paper.pdf"}],"related_material":{"link":[{"relation":"supplementary_material","description":"Poster","url":"https://groups.uni-paderborn.de/nt/pubs/2018/ITG_2018_Drude_Poster.pdf"}]},"year":"2018","citation":{"mla":"Drude, Lukas, et al. “NARA-WPE: A Python Package for Weighted Prediction Error Dereverberation in Numpy and Tensorflow for Online and Offline Processing.” <i>ITG 2018, Oldenburg, Germany</i>, 2018.","bibtex":"@inproceedings{Drude_Heymann_Boeddeker_Haeb-Umbach_2018, title={NARA-WPE: A Python package for weighted prediction error dereverberation in Numpy and Tensorflow for online and offline processing}, booktitle={ITG 2018, Oldenburg, Germany}, author={Drude, Lukas and Heymann, Jahn and Boeddeker, Christoph and Haeb-Umbach, Reinhold}, year={2018} }","short":"L. Drude, J. Heymann, C. Boeddeker, R. Haeb-Umbach, in: ITG 2018, Oldenburg, Germany, 2018.","apa":"Drude, L., Heymann, J., Boeddeker, C., &#38; Haeb-Umbach, R. (2018). NARA-WPE: A Python package for weighted prediction error dereverberation in Numpy and Tensorflow for online and offline processing. In <i>ITG 2018, Oldenburg, Germany</i>.","ieee":"L. Drude, J. Heymann, C. Boeddeker, and R. Haeb-Umbach, “NARA-WPE: A Python package for weighted prediction error dereverberation in Numpy and Tensorflow for online and offline processing,” in <i>ITG 2018, Oldenburg, Germany</i>, 2018.","chicago":"Drude, Lukas, Jahn Heymann, Christoph Boeddeker, and Reinhold Haeb-Umbach. “NARA-WPE: A Python Package for Weighted Prediction Error Dereverberation in Numpy and Tensorflow for Online and Offline Processing.” In <i>ITG 2018, Oldenburg, Germany</i>, 2018.","ama":"Drude L, Heymann J, Boeddeker C, Haeb-Umbach R. NARA-WPE: A Python package for weighted prediction error dereverberation in Numpy and Tensorflow for online and offline processing. In: <i>ITG 2018, Oldenburg, Germany</i>. ; 2018."}},{"main_file_link":[{"open_access":"1","url":"https://groups.uni-paderborn.de/nt/pubs/2018/ICASSP_2018_Boeddeker_Paper.pdf"}],"title":"Exploring Practical Aspects of Neural Mask-Based Beamforming for Far-Field Speech Recognition","date_created":"2019-07-30T14:53:58Z","author":[{"first_name":"Christoph","last_name":"Boeddeker","id":"40767","full_name":"Boeddeker, Christoph"},{"first_name":"Hakan","last_name":"Erdogan","full_name":"Erdogan, Hakan"},{"full_name":"Yoshioka, Takuya","last_name":"Yoshioka","first_name":"Takuya"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold"}],"date_updated":"2022-01-06T06:51:24Z","oa":"1","citation":{"short":"C. Boeddeker, H. Erdogan, T. Yoshioka, R. Haeb-Umbach, in: ICASSP 2018, Calgary, Canada, 2018.","bibtex":"@inproceedings{Boeddeker_Erdogan_Yoshioka_Haeb-Umbach_2018, title={Exploring Practical Aspects of Neural Mask-Based Beamforming for Far-Field Speech Recognition}, booktitle={ICASSP 2018, Calgary, Canada}, author={Boeddeker, Christoph and Erdogan, Hakan and Yoshioka, Takuya and Haeb-Umbach, Reinhold}, year={2018} }","mla":"Boeddeker, Christoph, et al. “Exploring Practical Aspects of Neural Mask-Based Beamforming for Far-Field Speech Recognition.” <i>ICASSP 2018, Calgary, Canada</i>, 2018.","apa":"Boeddeker, C., Erdogan, H., Yoshioka, T., &#38; Haeb-Umbach, R. (2018). Exploring Practical Aspects of Neural Mask-Based Beamforming for Far-Field Speech Recognition. In <i>ICASSP 2018, Calgary, Canada</i>.","ama":"Boeddeker C, Erdogan H, Yoshioka T, Haeb-Umbach R. Exploring Practical Aspects of Neural Mask-Based Beamforming for Far-Field Speech Recognition. In: <i>ICASSP 2018, Calgary, Canada</i>. ; 2018.","ieee":"C. Boeddeker, H. Erdogan, T. Yoshioka, and R. Haeb-Umbach, “Exploring Practical Aspects of Neural Mask-Based Beamforming for Far-Field Speech Recognition,” in <i>ICASSP 2018, Calgary, Canada</i>, 2018.","chicago":"Boeddeker, Christoph, Hakan Erdogan, Takuya Yoshioka, and Reinhold Haeb-Umbach. “Exploring Practical Aspects of Neural Mask-Based Beamforming for Far-Field Speech Recognition.” In <i>ICASSP 2018, Calgary, Canada</i>, 2018."},"year":"2018","related_material":{"link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2018/ICASSP_2018_Boeddeker_Slides.pdf","description":"Poster","relation":"supplementary_material"}]},"language":[{"iso":"eng"}],"user_id":"44006","department":[{"_id":"54"}],"_id":"12901","status":"public","abstract":[{"lang":"eng","text":"This work examines acoustic beamformers employing neural networks (NNs) for mask prediction as front-end for automatic speech recognition (ASR) systems for practical scenarios like voice-enabled home devices. To test the versatility of the mask predicting network, the system is evaluated with different recording hardware, different microphone array designs, and different acoustic models of the downstream ASR system. Significant gains in recognition accuracy are obtained in all configurations despite the fact that the NN had been trained on mismatched data. Unlike previous work, the NN is trained on a feature level objective, which gives some performance advantage over a mask related criterion. Furthermore, different approaches for realizing online, or adaptive, NN-based beamforming are explored, where the online algorithms still show significant gains compared to the baseline performance."}],"type":"conference","publication":"ICASSP 2018, Calgary, Canada"},{"date_created":"2019-07-30T14:35:15Z","author":[{"first_name":"Christoph","id":"40767","full_name":"Boeddeker, Christoph","last_name":"Boeddeker"},{"first_name":"Jens","id":"27643","full_name":"Heitkaemper, Jens","last_name":"Heitkaemper"},{"last_name":"Schmalenstroeer","full_name":"Schmalenstroeer, Joerg","id":"460","first_name":"Joerg"},{"first_name":"Lukas","last_name":"Drude","id":"11213","full_name":"Drude, Lukas"},{"first_name":"Jahn","last_name":"Heymann","full_name":"Heymann, Jahn"},{"last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold","first_name":"Reinhold"}],"oa":"1","date_updated":"2023-10-26T08:14:15Z","main_file_link":[{"open_access":"1","url":"https://groups.uni-paderborn.de/nt/pubs/2018/INTERSPEECH_2018_Heitkaemper_Paper.pdf"}],"title":"Front-End Processing for the CHiME-5 Dinner Party Scenario","related_material":{"link":[{"description":"Poster","relation":"supplementary_material","url":"https://groups.uni-paderborn.de/nt/pubs/2018/INTERSPEECH_2018_Heitkaemper_Poster.pdf"}]},"quality_controlled":"1","citation":{"apa":"Boeddeker, C., Heitkaemper, J., Schmalenstroeer, J., Drude, L., Heymann, J., &#38; Haeb-Umbach, R. (2018). Front-End Processing for the CHiME-5 Dinner Party Scenario. <i>Proc. CHiME 2018 Workshop on Speech Processing in Everyday Environments, Hyderabad, India</i>.","bibtex":"@inproceedings{Boeddeker_Heitkaemper_Schmalenstroeer_Drude_Heymann_Haeb-Umbach_2018, title={Front-End Processing for the CHiME-5 Dinner Party Scenario}, booktitle={Proc. CHiME 2018 Workshop on Speech Processing in Everyday Environments, Hyderabad, India}, author={Boeddeker, Christoph and Heitkaemper, Jens and Schmalenstroeer, Joerg and Drude, Lukas and Heymann, Jahn and Haeb-Umbach, Reinhold}, year={2018} }","mla":"Boeddeker, Christoph, et al. “Front-End Processing for the CHiME-5 Dinner Party Scenario.” <i>Proc. CHiME 2018 Workshop on Speech Processing in Everyday Environments, Hyderabad, India</i>, 2018.","short":"C. Boeddeker, J. Heitkaemper, J. Schmalenstroeer, L. Drude, J. Heymann, R. Haeb-Umbach, in: Proc. CHiME 2018 Workshop on Speech Processing in Everyday Environments, Hyderabad, India, 2018.","chicago":"Boeddeker, Christoph, Jens Heitkaemper, Joerg Schmalenstroeer, Lukas Drude, Jahn Heymann, and Reinhold Haeb-Umbach. “Front-End Processing for the CHiME-5 Dinner Party Scenario.” In <i>Proc. CHiME 2018 Workshop on Speech Processing in Everyday Environments, Hyderabad, India</i>, 2018.","ieee":"C. Boeddeker, J. Heitkaemper, J. Schmalenstroeer, L. Drude, J. Heymann, and R. Haeb-Umbach, “Front-End Processing for the CHiME-5 Dinner Party Scenario,” 2018.","ama":"Boeddeker C, Heitkaemper J, Schmalenstroeer J, Drude L, Heymann J, Haeb-Umbach R. Front-End Processing for the CHiME-5 Dinner Party Scenario. In: <i>Proc. CHiME 2018 Workshop on Speech Processing in Everyday Environments, Hyderabad, India</i>. ; 2018."},"year":"2018","department":[{"_id":"54"}],"user_id":"460","_id":"12899","project":[{"_id":"52","name":"Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"language":[{"iso":"eng"}],"publication":"Proc. CHiME 2018 Workshop on Speech Processing in Everyday Environments, Hyderabad, India","type":"conference","status":"public","abstract":[{"lang":"eng","text":"This contribution presents a speech enhancement system for the CHiME-5 Dinner Party Scenario. The front-end employs multi-channel linear time-variant filtering and achieves its gains without the use of a neural network. We present an adaptation of blind source separation techniques to the CHiME-5 database which we call Guided Source Separation (GSS). Using the baseline acoustic and language model, the combination of Weighted Prediction Error based dereverberation, guided source separation, and beamforming reduces the WER by 10:54% (relative) for the single array track and by 21:12% (relative) on the multiple array track."}]},{"year":"2018","citation":{"short":"M. Kitza, W. Michel, C. Boeddeker, J. Heitkaemper, T. Menne, R. Schlüter, H. Ney, J. Schmalenstroeer, L. Drude, J. Heymann, R. Haeb-Umbach, in: Proc. CHiME 2018 Workshop on Speech Processing in Everyday Environments, Hyderabad, India, 2018.","bibtex":"@inproceedings{Kitza_Michel_Boeddeker_Heitkaemper_Menne_Schlüter_Ney_Schmalenstroeer_Drude_Heymann_et al._2018, title={The RWTH/UPB System Combination for the CHiME 2018 Workshop}, booktitle={Proc. CHiME 2018 Workshop on Speech Processing in Everyday Environments, Hyderabad, India}, author={Kitza, Markus and Michel, Wilfried and Boeddeker, Christoph and Heitkaemper, Jens and Menne, Tobias and Schlüter, Ralf and Ney, Hermann and Schmalenstroeer, Joerg and Drude, Lukas and Heymann, Jahn and et al.}, year={2018} }","mla":"Kitza, Markus, et al. “The RWTH/UPB System Combination for the CHiME 2018 Workshop.” <i>Proc. CHiME 2018 Workshop on Speech Processing in Everyday Environments, Hyderabad, India</i>, 2018.","apa":"Kitza, M., Michel, W., Boeddeker, C., Heitkaemper, J., Menne, T., Schlüter, R., Ney, H., Schmalenstroeer, J., Drude, L., Heymann, J., &#38; Haeb-Umbach, R. (2018). The RWTH/UPB System Combination for the CHiME 2018 Workshop. <i>Proc. CHiME 2018 Workshop on Speech Processing in Everyday Environments, Hyderabad, India</i>.","ama":"Kitza M, Michel W, Boeddeker C, et al. The RWTH/UPB System Combination for the CHiME 2018 Workshop. In: <i>Proc. CHiME 2018 Workshop on Speech Processing in Everyday Environments, Hyderabad, India</i>. ; 2018.","chicago":"Kitza, Markus, Wilfried Michel, Christoph Boeddeker, Jens Heitkaemper, Tobias Menne, Ralf Schlüter, Hermann Ney, et al. “The RWTH/UPB System Combination for the CHiME 2018 Workshop.” In <i>Proc. CHiME 2018 Workshop on Speech Processing in Everyday Environments, Hyderabad, India</i>, 2018.","ieee":"M. Kitza <i>et al.</i>, “The RWTH/UPB System Combination for the CHiME 2018 Workshop,” 2018."},"quality_controlled":"1","title":"The RWTH/UPB System Combination for the CHiME 2018 Workshop","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2018/INTERSPEECH_2018_Heitkaemper_RWTH_Paper.pdf","open_access":"1"}],"date_updated":"2023-10-26T08:12:14Z","oa":"1","date_created":"2019-07-12T05:29:58Z","author":[{"first_name":"Markus","last_name":"Kitza","full_name":"Kitza, Markus"},{"full_name":"Michel, Wilfried","last_name":"Michel","first_name":"Wilfried"},{"last_name":"Boeddeker","full_name":"Boeddeker, Christoph","id":"40767","first_name":"Christoph"},{"last_name":"Heitkaemper","id":"27643","full_name":"Heitkaemper, Jens","first_name":"Jens"},{"first_name":"Tobias","full_name":"Menne, Tobias","last_name":"Menne"},{"full_name":"Schlüter, Ralf","last_name":"Schlüter","first_name":"Ralf"},{"last_name":"Ney","full_name":"Ney, Hermann","first_name":"Hermann"},{"first_name":"Joerg","last_name":"Schmalenstroeer","full_name":"Schmalenstroeer, Joerg","id":"460"},{"first_name":"Lukas","id":"11213","full_name":"Drude, Lukas","last_name":"Drude"},{"first_name":"Jahn","last_name":"Heymann","id":"9168","full_name":"Heymann, Jahn"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold"}],"abstract":[{"text":"This paper describes the systems for the single-array track and the multiple-array track of the 5th CHiME Challenge. The final system is a combination of multiple systems, using Confusion Network Combination (CNC). The different systems presented here are utilizing different front-ends and training sets for a Bidirectional Long Short-Term Memory (BLSTM) Acoustic Model (AM). The front-end was replaced by enhancements provided by Paderborn University [1]. The back-end has been implemented using RASR [2] and RETURNN [3]. Additionally, a system combination including the hypothesis word graphs from the system of the submission [1] has been performed, which results in the final best system.","lang":"eng"}],"status":"public","type":"conference","publication":"Proc. CHiME 2018 Workshop on Speech Processing in Everyday Environments, Hyderabad, India","language":[{"iso":"eng"}],"_id":"11876","user_id":"460","department":[{"_id":"54"}]},{"type":"report","abstract":[{"text":"This report describes the computation of gradients by algorithmic differentiation for statistically optimum beamforming operations. Especially the derivation of complex-valued functions is a key component of this approach. Therefore the real-valued algorithmic differentiation is extended via the complex-valued chain rule. In addition to the basic mathematic operations the derivative of the eigenvalue problem with complex-valued eigenvectors is one of the key results of this report. The potential of this approach is shown with experimental results on the CHiME-3 challenge database. There, the beamforming task is used as a front-end for an ASR system. With the developed derivatives a joint optimization of a speech enhancement and speech recognition system w.r.t. the recognition optimization criterion is possible.","lang":"eng"}],"status":"public","_id":"11735","user_id":"40767","department":[{"_id":"54"}],"language":[{"iso":"eng"}],"year":"2017","citation":{"bibtex":"@book{Boeddeker_Hanebrink_Drude_Heymann_Haeb-Umbach_2017, title={On the Computation of Complex-valued Gradients with Application to Statistically Optimum Beamforming}, author={Boeddeker, Christoph and Hanebrink, Patrick and Drude, Lukas and Heymann, Jahn and Haeb-Umbach, Reinhold}, year={2017} }","mla":"Boeddeker, Christoph, et al. <i>On the Computation of Complex-Valued Gradients with Application to Statistically Optimum Beamforming</i>. 2017.","short":"C. Boeddeker, P. Hanebrink, L. Drude, J. Heymann, R. Haeb-Umbach, On the Computation of Complex-Valued Gradients with Application to Statistically Optimum Beamforming, 2017.","apa":"Boeddeker, C., Hanebrink, P., Drude, L., Heymann, J., &#38; Haeb-Umbach, R. (2017). <i>On the Computation of Complex-valued Gradients with Application to Statistically Optimum Beamforming</i>.","ama":"Boeddeker C, Hanebrink P, Drude L, Heymann J, Haeb-Umbach R. <i>On the Computation of Complex-Valued Gradients with Application to Statistically Optimum Beamforming</i>.; 2017.","ieee":"C. Boeddeker, P. Hanebrink, L. Drude, J. Heymann, and R. Haeb-Umbach, <i>On the Computation of Complex-valued Gradients with Application to Statistically Optimum Beamforming</i>. 2017.","chicago":"Boeddeker, Christoph, Patrick Hanebrink, Lukas Drude, Jahn Heymann, and Reinhold Haeb-Umbach. <i>On the Computation of Complex-Valued Gradients with Application to Statistically Optimum Beamforming</i>, 2017."},"oa":"1","date_updated":"2022-01-06T06:51:08Z","author":[{"full_name":"Boeddeker, Christoph","id":"40767","last_name":"Boeddeker","first_name":"Christoph"},{"first_name":"Patrick","full_name":"Hanebrink, Patrick","last_name":"Hanebrink"},{"first_name":"Lukas","last_name":"Drude","full_name":"Drude, Lukas","id":"11213"},{"first_name":"Jahn","last_name":"Heymann","id":"9168","full_name":"Heymann, Jahn"},{"first_name":"Reinhold","full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach"}],"date_created":"2019-07-12T05:27:15Z","title":"On the Computation of Complex-valued Gradients with Application to Statistically Optimum Beamforming","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2017/ArXiv_2017_BoeddekerHanebrinkHaeb_Article.pdf","open_access":"1"}]},{"main_file_link":[{"open_access":"1","url":"https://groups.uni-paderborn.de/nt/pubs/2017/icassp_2017_boeddeker_paper.pdf"}],"title":"Optimizing Neural-Network Supported Acoustic Beamforming by Algorithmic Differentiation","author":[{"id":"40767","full_name":"Boeddeker, Christoph","last_name":"Boeddeker","first_name":"Christoph"},{"full_name":"Hanebrink, Patrick","last_name":"Hanebrink","first_name":"Patrick"},{"last_name":"Drude","full_name":"Drude, Lukas","id":"11213","first_name":"Lukas"},{"first_name":"Jahn","last_name":"Heymann","id":"9168","full_name":"Heymann, Jahn"},{"last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold","first_name":"Reinhold"}],"date_created":"2019-07-12T05:27:16Z","oa":"1","date_updated":"2022-01-06T06:51:08Z","citation":{"bibtex":"@inproceedings{Boeddeker_Hanebrink_Drude_Heymann_Haeb-Umbach_2017, title={Optimizing Neural-Network Supported Acoustic Beamforming by Algorithmic Differentiation}, booktitle={Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)}, author={Boeddeker, Christoph and Hanebrink, Patrick and Drude, Lukas and Heymann, Jahn and Haeb-Umbach, Reinhold}, year={2017} }","short":"C. Boeddeker, P. Hanebrink, L. Drude, J. Heymann, R. Haeb-Umbach, in: Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP), 2017.","mla":"Boeddeker, Christoph, et al. “Optimizing Neural-Network Supported Acoustic Beamforming by Algorithmic Differentiation.” <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2017.","apa":"Boeddeker, C., Hanebrink, P., Drude, L., Heymann, J., &#38; Haeb-Umbach, R. (2017). Optimizing Neural-Network Supported Acoustic Beamforming by Algorithmic Differentiation. In <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>.","ama":"Boeddeker C, Hanebrink P, Drude L, Heymann J, Haeb-Umbach R. Optimizing Neural-Network Supported Acoustic Beamforming by Algorithmic Differentiation. In: <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>. ; 2017.","chicago":"Boeddeker, Christoph, Patrick Hanebrink, Lukas Drude, Jahn Heymann, and Reinhold Haeb-Umbach. “Optimizing Neural-Network Supported Acoustic Beamforming by Algorithmic Differentiation.” In <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2017.","ieee":"C. Boeddeker, P. Hanebrink, L. Drude, J. Heymann, and R. Haeb-Umbach, “Optimizing Neural-Network Supported Acoustic Beamforming by Algorithmic Differentiation,” in <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2017."},"year":"2017","language":[{"iso":"eng"}],"department":[{"_id":"54"}],"user_id":"44006","_id":"11736","status":"public","abstract":[{"text":"In this paper we show how a neural network for spectral mask estimation for an acoustic beamformer can be optimized by algorithmic differentiation. Using the beamformer output SNR as the objective function to maximize, the gradient is propagated through the beamformer all the way to the neural network which provides the clean speech and noise masks from which the beamformer coefficients are estimated by eigenvalue decomposition. A key theoretical result is the derivative of an eigenvalue problem involving complex-valued eigenvectors. Experimental results on the CHiME-3 challenge database demonstrate the effectiveness of the approach. The tools developed in this paper are a key component for an end-to-end optimization of speech enhancement and speech recognition.","lang":"eng"}],"publication":"Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)","type":"conference"},{"project":[{"_id":"52","name":"Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"_id":"11809","user_id":"40767","department":[{"_id":"54"}],"language":[{"iso":"eng"}],"type":"conference","publication":"Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)","abstract":[{"lang":"eng","text":"This paper presents an end-to-end training approach for a beamformer-supported multi-channel ASR system. A neural network which estimates masks for a statistically optimum beamformer is jointly trained with a network for acoustic modeling. To update its parameters, we propagate the gradients from the acoustic model all the way through feature extraction and the complex valued beamforming operation. Besides avoiding a mismatch between the front-end and the back-end, this approach also eliminates the need for stereo data, i.e., the parallel availability of clean and noisy versions of the signals. Instead, it can be trained with real noisy multichannel data only. Also, relying on the signal statistics for beamforming, the approach makes no assumptions on the configuration of the microphone array. We further observe a performance gain through joint training in terms of word error rate in an evaluation of the system on the CHiME 4 dataset."}],"status":"public","date_updated":"2022-01-06T06:51:09Z","oa":"1","date_created":"2019-07-12T05:28:40Z","author":[{"first_name":"Jahn","id":"9168","full_name":"Heymann, Jahn","last_name":"Heymann"},{"id":"11213","full_name":"Drude, Lukas","last_name":"Drude","first_name":"Lukas"},{"full_name":"Boeddeker, Christoph","id":"40767","last_name":"Boeddeker","first_name":"Christoph"},{"first_name":"Patrick","full_name":"Hanebrink, Patrick","last_name":"Hanebrink"},{"first_name":"Reinhold","id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach"}],"title":"BEAMNET: End-to-End Training of a Beamformer-Supported Multi-Channel ASR System","main_file_link":[{"open_access":"1","url":"https://groups.uni-paderborn.de/nt/pubs/2017/icassp_2017_heymann_paper.pdf"}],"related_material":{"link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2017/icassp_2017_heymann_poster.pdf","description":"Poster","relation":"supplementary_material"}]},"year":"2017","citation":{"ama":"Heymann J, Drude L, Boeddeker C, Hanebrink P, Haeb-Umbach R. BEAMNET: End-to-End Training of a Beamformer-Supported Multi-Channel ASR System. In: <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>. ; 2017.","chicago":"Heymann, Jahn, Lukas Drude, Christoph Boeddeker, Patrick Hanebrink, and Reinhold Haeb-Umbach. “BEAMNET: End-to-End Training of a Beamformer-Supported Multi-Channel ASR System.” In <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2017.","ieee":"J. Heymann, L. Drude, C. Boeddeker, P. Hanebrink, and R. Haeb-Umbach, “BEAMNET: End-to-End Training of a Beamformer-Supported Multi-Channel ASR System,” in <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2017.","apa":"Heymann, J., Drude, L., Boeddeker, C., Hanebrink, P., &#38; Haeb-Umbach, R. (2017). BEAMNET: End-to-End Training of a Beamformer-Supported Multi-Channel ASR System. In <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>.","bibtex":"@inproceedings{Heymann_Drude_Boeddeker_Hanebrink_Haeb-Umbach_2017, title={BEAMNET: End-to-End Training of a Beamformer-Supported Multi-Channel ASR System}, booktitle={Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)}, author={Heymann, Jahn and Drude, Lukas and Boeddeker, Christoph and Hanebrink, Patrick and Haeb-Umbach, Reinhold}, year={2017} }","mla":"Heymann, Jahn, et al. “BEAMNET: End-to-End Training of a Beamformer-Supported Multi-Channel ASR System.” <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2017.","short":"J. Heymann, L. Drude, C. Boeddeker, P. Hanebrink, R. Haeb-Umbach, in: Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP), 2017."}},{"oa":"1","date_updated":"2023-10-26T08:12:05Z","author":[{"last_name":"Schmalenstroeer","full_name":"Schmalenstroeer, Joerg","id":"460","first_name":"Joerg"},{"first_name":"Jahn","last_name":"Heymann","id":"9168","full_name":"Heymann, Jahn"},{"id":"11213","full_name":"Drude, Lukas","last_name":"Drude","first_name":"Lukas"},{"first_name":"Christoph","id":"40767","full_name":"Boeddeker, Christoph","last_name":"Boeddeker"},{"first_name":"Reinhold","id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach"}],"date_created":"2019-07-12T05:30:20Z","title":"Multi-Stage Coherence Drift Based Sampling Rate Synchronization for Acoustic Beamforming","main_file_link":[{"open_access":"1","url":"https://groups.uni-paderborn.de/nt/pubs/2017/MMSP_2017_SchHaeb.pdf"}],"quality_controlled":"1","related_material":{"link":[{"description":"Poster","relation":"supplementary_material","url":"https://groups.uni-paderborn.de/nt/pubs/2017/MMSP_2017_SchHaeb_poster.pdf"}]},"year":"2017","citation":{"apa":"Schmalenstroeer, J., Heymann, J., Drude, L., Boeddeker, C., &#38; Haeb-Umbach, R. (2017). Multi-Stage Coherence Drift Based Sampling Rate Synchronization for Acoustic Beamforming. <i>IEEE 19th International Workshop on Multimedia Signal Processing (MMSP)</i>.","mla":"Schmalenstroeer, Joerg, et al. “Multi-Stage Coherence Drift Based Sampling Rate Synchronization for Acoustic Beamforming.” <i>IEEE 19th International Workshop on Multimedia Signal Processing (MMSP)</i>, 2017.","short":"J. Schmalenstroeer, J. Heymann, L. Drude, C. Boeddeker, R. Haeb-Umbach, in: IEEE 19th International Workshop on Multimedia Signal Processing (MMSP), 2017.","bibtex":"@inproceedings{Schmalenstroeer_Heymann_Drude_Boeddeker_Haeb-Umbach_2017, title={Multi-Stage Coherence Drift Based Sampling Rate Synchronization for Acoustic Beamforming}, booktitle={IEEE 19th International Workshop on Multimedia Signal Processing (MMSP)}, author={Schmalenstroeer, Joerg and Heymann, Jahn and Drude, Lukas and Boeddeker, Christoph and Haeb-Umbach, Reinhold}, year={2017} }","ama":"Schmalenstroeer J, Heymann J, Drude L, Boeddeker C, Haeb-Umbach R. Multi-Stage Coherence Drift Based Sampling Rate Synchronization for Acoustic Beamforming. In: <i>IEEE 19th International Workshop on Multimedia Signal Processing (MMSP)</i>. ; 2017.","ieee":"J. Schmalenstroeer, J. Heymann, L. Drude, C. Boeddeker, and R. Haeb-Umbach, “Multi-Stage Coherence Drift Based Sampling Rate Synchronization for Acoustic Beamforming,” 2017.","chicago":"Schmalenstroeer, Joerg, Jahn Heymann, Lukas Drude, Christoph Boeddeker, and Reinhold Haeb-Umbach. “Multi-Stage Coherence Drift Based Sampling Rate Synchronization for Acoustic Beamforming.” In <i>IEEE 19th International Workshop on Multimedia Signal Processing (MMSP)</i>, 2017."},"_id":"11895","user_id":"460","department":[{"_id":"54"}],"language":[{"iso":"eng"}],"type":"conference","publication":"IEEE 19th International Workshop on Multimedia Signal Processing (MMSP)","abstract":[{"lang":"eng","text":"Multi-channel speech enhancement algorithms rely on a synchronous sampling of the microphone signals. This, however, cannot always be guaranteed, especially if the sensors are distributed in an environment. To avoid performance degradation the sampling rate offset needs to be estimated and compensated for. In this contribution we extend the recently proposed coherence drift based method in two important directions. First, the increasing phase shift in the short-time Fourier transform domain is estimated from the coherence drift in a Matched Filterlike fashion, where intermediate estimates are weighted by their instantaneous SNR. Second, an observed bias is removed by iterating between offset estimation and compensation by resampling a couple of times. The effectiveness of the proposed method is demonstrated by speech recognition results on the output of a beamformer with and without sampling rate offset compensation between the input channels. We compare MVDR and maximum-SNR beamformers in reverberant environments and further show that both benefit from a novel phase normalization, which we also propose in this contribution."}],"status":"public"},{"year":"2016","citation":{"ama":"Drude L, Boeddeker C, Haeb-Umbach R. Blind Speech Separation based on Complex Spherical k-Mode Clustering. In: <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>. ; 2016.","chicago":"Drude, Lukas, Christoph Boeddeker, and Reinhold Haeb-Umbach. “Blind Speech Separation Based on Complex Spherical K-Mode Clustering.” In <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2016.","ieee":"L. Drude, C. Boeddeker, and R. Haeb-Umbach, “Blind Speech Separation based on Complex Spherical k-Mode Clustering,” in <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2016.","apa":"Drude, L., Boeddeker, C., &#38; Haeb-Umbach, R. (2016). Blind Speech Separation based on Complex Spherical k-Mode Clustering. In <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>.","short":"L. Drude, C. Boeddeker, R. Haeb-Umbach, in: Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP), 2016.","bibtex":"@inproceedings{Drude_Boeddeker_Haeb-Umbach_2016, title={Blind Speech Separation based on Complex Spherical k-Mode Clustering}, booktitle={Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)}, author={Drude, Lukas and Boeddeker, Christoph and Haeb-Umbach, Reinhold}, year={2016} }","mla":"Drude, Lukas, et al. “Blind Speech Separation Based on Complex Spherical K-Mode Clustering.” <i>Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2016."},"related_material":{"link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2016/icassp_2016_drude_slides.pdf","relation":"supplementary_material","description":"Slides"}]},"title":"Blind Speech Separation based on Complex Spherical k-Mode Clustering","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2016/icassp_2016_drude_paper.pdf","open_access":"1"}],"oa":"1","date_updated":"2022-01-06T06:51:08Z","date_created":"2019-07-12T05:27:33Z","author":[{"last_name":"Drude","full_name":"Drude, Lukas","id":"11213","first_name":"Lukas"},{"first_name":"Christoph","last_name":"Boeddeker","id":"40767","full_name":"Boeddeker, Christoph"},{"last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242","first_name":"Reinhold"}],"status":"public","publication":"Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)","type":"conference","language":[{"iso":"eng"}],"_id":"11751","department":[{"_id":"54"}],"user_id":"44006"}]
