[{"citation":{"ieee":"T. Gburrek, C. Boeddeker, T. von Neumann, T. Cord-Landwehr, J. Schmalenstroeer, and R. Haeb-Umbach, <i>A Meeting Transcription System for an Ad-Hoc Acoustic Sensor Network</i>. arXiv, 2022.","chicago":"Gburrek, Tobias, Christoph Boeddeker, Thilo von Neumann, Tobias Cord-Landwehr, Joerg Schmalenstroeer, and Reinhold Haeb-Umbach. <i>A Meeting Transcription System for an Ad-Hoc Acoustic Sensor Network</i>. arXiv, 2022. <a href=\"https://doi.org/10.48550/ARXIV.2205.00944\">https://doi.org/10.48550/ARXIV.2205.00944</a>.","ama":"Gburrek T, Boeddeker C, von Neumann T, Cord-Landwehr T, Schmalenstroeer J, Haeb-Umbach R. <i>A Meeting Transcription System for an Ad-Hoc Acoustic Sensor Network</i>. arXiv; 2022. doi:<a href=\"https://doi.org/10.48550/ARXIV.2205.00944\">10.48550/ARXIV.2205.00944</a>","bibtex":"@book{Gburrek_Boeddeker_von Neumann_Cord-Landwehr_Schmalenstroeer_Haeb-Umbach_2022, title={A Meeting Transcription System for an Ad-Hoc Acoustic Sensor Network}, DOI={<a href=\"https://doi.org/10.48550/ARXIV.2205.00944\">10.48550/ARXIV.2205.00944</a>}, publisher={arXiv}, author={Gburrek, Tobias and Boeddeker, Christoph and von Neumann, Thilo and Cord-Landwehr, Tobias and Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}, year={2022} }","short":"T. Gburrek, C. Boeddeker, T. von Neumann, T. Cord-Landwehr, J. Schmalenstroeer, R. Haeb-Umbach, A Meeting Transcription System for an Ad-Hoc Acoustic Sensor Network, arXiv, 2022.","mla":"Gburrek, Tobias, et al. <i>A Meeting Transcription System for an Ad-Hoc Acoustic Sensor Network</i>. arXiv, 2022, doi:<a href=\"https://doi.org/10.48550/ARXIV.2205.00944\">10.48550/ARXIV.2205.00944</a>.","apa":"Gburrek, T., Boeddeker, C., von Neumann, T., Cord-Landwehr, T., Schmalenstroeer, J., &#38; Haeb-Umbach, R. (2022). <i>A Meeting Transcription System for an Ad-Hoc Acoustic Sensor Network</i>. arXiv. <a href=\"https://doi.org/10.48550/ARXIV.2205.00944\">https://doi.org/10.48550/ARXIV.2205.00944</a>"},"year":"2022","has_accepted_license":"1","doi":"10.48550/ARXIV.2205.00944","title":"A Meeting Transcription System for an Ad-Hoc Acoustic Sensor Network","date_created":"2022-10-18T11:10:58Z","author":[{"id":"44006","full_name":"Gburrek, Tobias","last_name":"Gburrek","first_name":"Tobias"},{"first_name":"Christoph","last_name":"Boeddeker","id":"40767","full_name":"Boeddeker, Christoph"},{"orcid":"https://orcid.org/0000-0002-7717-8670","last_name":"von Neumann","full_name":"von Neumann, Thilo","id":"49870","first_name":"Thilo"},{"id":"44393","full_name":"Cord-Landwehr, Tobias","last_name":"Cord-Landwehr","first_name":"Tobias"},{"full_name":"Schmalenstroeer, Joerg","id":"460","last_name":"Schmalenstroeer","first_name":"Joerg"},{"last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold","first_name":"Reinhold"}],"oa":"1","publisher":"arXiv","date_updated":"2025-02-12T09:03:42Z","status":"public","file":[{"date_updated":"2023-11-17T06:42:04Z","creator":"tgburrek","date_created":"2023-11-17T06:42:04Z","file_size":199006,"file_id":"48992","access_level":"open_access","file_name":"meeting_transcription_22.pdf","content_type":"application/pdf","relation":"main_file"}],"type":"misc","file_date_updated":"2023-11-17T06:42:04Z","language":[{"iso":"eng"}],"ddc":["004"],"department":[{"_id":"54"}],"user_id":"40767","_id":"33816","project":[{"_id":"52","name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing"},{"_id":"508","name":"Automatische Transkription von Gesprächssituationen","grant_number":"448568305"}]},{"publication_status":"published","citation":{"apa":"Boeddeker, C., Cord-Landwehr, T., von Neumann, T., &#38; Haeb-Umbach, R. (2022). An Initialization Scheme for Meeting Separation with Spatial Mixture Models. <i>Interspeech 2022</i>. <a href=\"https://doi.org/10.21437/interspeech.2022-10929\">https://doi.org/10.21437/interspeech.2022-10929</a>","mla":"Boeddeker, Christoph, et al. “An Initialization Scheme for Meeting Separation with Spatial Mixture Models.” <i>Interspeech 2022</i>, ISCA, 2022, doi:<a href=\"https://doi.org/10.21437/interspeech.2022-10929\">10.21437/interspeech.2022-10929</a>.","short":"C. Boeddeker, T. Cord-Landwehr, T. von Neumann, R. Haeb-Umbach, in: Interspeech 2022, ISCA, 2022.","bibtex":"@inproceedings{Boeddeker_Cord-Landwehr_von Neumann_Haeb-Umbach_2022, title={An Initialization Scheme for Meeting Separation with Spatial Mixture Models}, DOI={<a href=\"https://doi.org/10.21437/interspeech.2022-10929\">10.21437/interspeech.2022-10929</a>}, booktitle={Interspeech 2022}, publisher={ISCA}, author={Boeddeker, Christoph and Cord-Landwehr, Tobias and von Neumann, Thilo and Haeb-Umbach, Reinhold}, year={2022} }","chicago":"Boeddeker, Christoph, Tobias Cord-Landwehr, Thilo von Neumann, and Reinhold Haeb-Umbach. “An Initialization Scheme for Meeting Separation with Spatial Mixture Models.” In <i>Interspeech 2022</i>. ISCA, 2022. <a href=\"https://doi.org/10.21437/interspeech.2022-10929\">https://doi.org/10.21437/interspeech.2022-10929</a>.","ieee":"C. Boeddeker, T. Cord-Landwehr, T. von Neumann, and R. Haeb-Umbach, “An Initialization Scheme for Meeting Separation with Spatial Mixture Models,” 2022, doi: <a href=\"https://doi.org/10.21437/interspeech.2022-10929\">10.21437/interspeech.2022-10929</a>.","ama":"Boeddeker C, Cord-Landwehr T, von Neumann T, Haeb-Umbach R. An Initialization Scheme for Meeting Separation with Spatial Mixture Models. In: <i>Interspeech 2022</i>. ISCA; 2022. doi:<a href=\"https://doi.org/10.21437/interspeech.2022-10929\">10.21437/interspeech.2022-10929</a>"},"year":"2022","date_created":"2022-10-28T10:53:56Z","author":[{"last_name":"Boeddeker","id":"40767","full_name":"Boeddeker, Christoph","first_name":"Christoph"},{"first_name":"Tobias","last_name":"Cord-Landwehr","id":"44393","full_name":"Cord-Landwehr, Tobias"},{"first_name":"Thilo","full_name":"von Neumann, Thilo","id":"49870","last_name":"von Neumann","orcid":"https://orcid.org/0000-0002-7717-8670"},{"first_name":"Reinhold","id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach"}],"date_updated":"2025-02-12T09:06:56Z","oa":"1","publisher":"ISCA","main_file_link":[{"open_access":"1","url":"https://www.isca-archive.org/interspeech_2022/boeddeker22_interspeech.pdf"}],"doi":"10.21437/interspeech.2022-10929","title":"An Initialization Scheme for Meeting Separation with Spatial Mixture Models","type":"conference","publication":"Interspeech 2022","status":"public","user_id":"40767","department":[{"_id":"54"}],"project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"},{"name":"Automatische Transkription von Gesprächssituationen","_id":"508","grant_number":"448568305"}],"_id":"33954","language":[{"iso":"eng"}]},{"conference":{"name":"Interspeech 2022"},"doi":"10.21437/Interspeech.2022-11408","main_file_link":[{"url":"https://www.isca-archive.org/interspeech_2022/kinoshita22_interspeech.pdf"}],"title":"Utterance-by-utterance overlap-aware neural diarization with Graph-PIT","author":[{"first_name":"Keisuke","full_name":"Kinoshita, Keisuke","last_name":"Kinoshita"},{"first_name":"Thilo","id":"49870","full_name":"von Neumann, Thilo","last_name":"von Neumann","orcid":"https://orcid.org/0000-0002-7717-8670"},{"first_name":"Marc","full_name":"Delcroix, Marc","last_name":"Delcroix"},{"last_name":"Boeddeker","id":"40767","full_name":"Boeddeker, Christoph","first_name":"Christoph"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242"}],"date_created":"2022-10-28T12:07:57Z","date_updated":"2025-02-12T09:09:05Z","publisher":"ISCA","page":"1486-1490","citation":{"ieee":"K. Kinoshita, T. von Neumann, M. Delcroix, C. Boeddeker, and R. Haeb-Umbach, “Utterance-by-utterance overlap-aware neural diarization with Graph-PIT,” in <i>Proc. Interspeech 2022</i>, 2022, pp. 1486–1490, doi: <a href=\"https://doi.org/10.21437/Interspeech.2022-11408\">10.21437/Interspeech.2022-11408</a>.","chicago":"Kinoshita, Keisuke, Thilo von Neumann, Marc Delcroix, Christoph Boeddeker, and Reinhold Haeb-Umbach. “Utterance-by-Utterance Overlap-Aware Neural Diarization with Graph-PIT.” In <i>Proc. Interspeech 2022</i>, 1486–90. ISCA, 2022. <a href=\"https://doi.org/10.21437/Interspeech.2022-11408\">https://doi.org/10.21437/Interspeech.2022-11408</a>.","ama":"Kinoshita K, von Neumann T, Delcroix M, Boeddeker C, Haeb-Umbach R. Utterance-by-utterance overlap-aware neural diarization with Graph-PIT. In: <i>Proc. Interspeech 2022</i>. ISCA; 2022:1486-1490. doi:<a href=\"https://doi.org/10.21437/Interspeech.2022-11408\">10.21437/Interspeech.2022-11408</a>","bibtex":"@inproceedings{Kinoshita_von Neumann_Delcroix_Boeddeker_Haeb-Umbach_2022, title={Utterance-by-utterance overlap-aware neural diarization with Graph-PIT}, DOI={<a href=\"https://doi.org/10.21437/Interspeech.2022-11408\">10.21437/Interspeech.2022-11408</a>}, booktitle={Proc. Interspeech 2022}, publisher={ISCA}, author={Kinoshita, Keisuke and von Neumann, Thilo and Delcroix, Marc and Boeddeker, Christoph and Haeb-Umbach, Reinhold}, year={2022}, pages={1486–1490} }","short":"K. Kinoshita, T. von Neumann, M. Delcroix, C. Boeddeker, R. Haeb-Umbach, in: Proc. Interspeech 2022, ISCA, 2022, pp. 1486–1490.","mla":"Kinoshita, Keisuke, et al. “Utterance-by-Utterance Overlap-Aware Neural Diarization with Graph-PIT.” <i>Proc. Interspeech 2022</i>, ISCA, 2022, pp. 1486–90, doi:<a href=\"https://doi.org/10.21437/Interspeech.2022-11408\">10.21437/Interspeech.2022-11408</a>.","apa":"Kinoshita, K., von Neumann, T., Delcroix, M., Boeddeker, C., &#38; Haeb-Umbach, R. (2022). Utterance-by-utterance overlap-aware neural diarization with Graph-PIT. <i>Proc. Interspeech 2022</i>, 1486–1490. <a href=\"https://doi.org/10.21437/Interspeech.2022-11408\">https://doi.org/10.21437/Interspeech.2022-11408</a>"},"year":"2022","quality_controlled":"1","publication_status":"published","language":[{"iso":"eng"}],"department":[{"_id":"54"}],"user_id":"40767","_id":"33958","status":"public","abstract":[{"lang":"eng","text":"Recent speaker diarization studies showed that integration of end-to-end neural diarization (EEND) and clustering-based diarization is a promising approach for achieving state-of-the-art performance on various tasks. Such an approach first divides an observed signal into fixed-length segments, then performs {\\it segment-level} local diarization based on an EEND module, and merges the segment-level results via clustering to form a final global diarization result. The segmentation is done to limit the number of speakers in each segment since the current EEND cannot handle a large number of speakers. In this paper, we argue that such an approach involving the segmentation has several issues; for example, it inevitably faces a dilemma that larger segment sizes increase both the context available for enhancing the performance and the number of speakers for the local EEND module to handle. To resolve such a problem, this paper proposes a novel framework that performs diarization without segmentation. However, it can still handle challenging data containing many speakers and a significant amount of overlapping speech. The proposed method can take an entire meeting for inference and perform {\\it utterance-by-utterance} diarization that clusters utterance activities in terms of speakers. To this end, we leverage a neural network training scheme called Graph-PIT proposed recently for neural source separation. Experiments with simulated active-meeting-like data and CALLHOME data show the superiority of the proposed approach over the conventional methods."}],"publication":"Proc. Interspeech 2022","type":"conference"},{"doi":"10.1109/JPROC.2020.3018668","volume":109,"author":[{"last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold","first_name":"Reinhold"},{"first_name":"Jahn","full_name":"Heymann, Jahn","last_name":"Heymann"},{"full_name":"Drude, Lukas","last_name":"Drude","first_name":"Lukas"},{"full_name":"Watanabe, Shinji","last_name":"Watanabe","first_name":"Shinji"},{"full_name":"Delcroix, Marc","last_name":"Delcroix","first_name":"Marc"},{"full_name":"Nakatani, Tomohiro","last_name":"Nakatani","first_name":"Tomohiro"}],"date_updated":"2022-01-06T06:54:44Z","oa":"1","intvolume":"       109","page":"124-148","citation":{"apa":"Haeb-Umbach, R., Heymann, J., Drude, L., Watanabe, S., Delcroix, M., &#38; Nakatani, T. (2021). Far-Field Automatic Speech Recognition. <i>Proceedings of the IEEE</i>, <i>109</i>(2), 124–148. <a href=\"https://doi.org/10.1109/JPROC.2020.3018668\">https://doi.org/10.1109/JPROC.2020.3018668</a>","short":"R. Haeb-Umbach, J. Heymann, L. Drude, S. Watanabe, M. Delcroix, T. Nakatani, Proceedings of the IEEE 109 (2021) 124–148.","bibtex":"@article{Haeb-Umbach_Heymann_Drude_Watanabe_Delcroix_Nakatani_2021, title={Far-Field Automatic Speech Recognition}, volume={109}, DOI={<a href=\"https://doi.org/10.1109/JPROC.2020.3018668\">10.1109/JPROC.2020.3018668</a>}, number={2}, journal={Proceedings of the IEEE}, author={Haeb-Umbach, Reinhold and Heymann, Jahn and Drude, Lukas and Watanabe, Shinji and Delcroix, Marc and Nakatani, Tomohiro}, year={2021}, pages={124–148} }","mla":"Haeb-Umbach, Reinhold, et al. “Far-Field Automatic Speech Recognition.” <i>Proceedings of the IEEE</i>, vol. 109, no. 2, 2021, pp. 124–48, doi:<a href=\"https://doi.org/10.1109/JPROC.2020.3018668\">10.1109/JPROC.2020.3018668</a>.","chicago":"Haeb-Umbach, Reinhold, Jahn Heymann, Lukas Drude, Shinji Watanabe, Marc Delcroix, and Tomohiro Nakatani. “Far-Field Automatic Speech Recognition.” <i>Proceedings of the IEEE</i> 109, no. 2 (2021): 124–48. <a href=\"https://doi.org/10.1109/JPROC.2020.3018668\">https://doi.org/10.1109/JPROC.2020.3018668</a>.","ieee":"R. Haeb-Umbach, J. Heymann, L. Drude, S. Watanabe, M. Delcroix, and T. Nakatani, “Far-Field Automatic Speech Recognition,” <i>Proceedings of the IEEE</i>, vol. 109, no. 2, pp. 124–148, 2021.","ama":"Haeb-Umbach R, Heymann J, Drude L, Watanabe S, Delcroix M, Nakatani T. Far-Field Automatic Speech Recognition. <i>Proceedings of the IEEE</i>. 2021;109(2):124-148. doi:<a href=\"https://doi.org/10.1109/JPROC.2020.3018668\">10.1109/JPROC.2020.3018668</a>"},"has_accepted_license":"1","file_date_updated":"2021-01-25T08:17:23Z","department":[{"_id":"54"}],"user_id":"59789","_id":"21065","project":[{"name":"Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"status":"public","type":"journal_article","title":"Far-Field Automatic Speech Recognition","date_created":"2021-01-25T08:15:27Z","year":"2021","issue":"2","language":[{"iso":"eng"}],"ddc":["000"],"file":[{"date_created":"2021-01-25T08:17:23Z","creator":"huesera","date_updated":"2021-01-25T08:17:23Z","access_level":"open_access","file_name":"proceedings_2021_haebumbach_Paper.pdf","file_id":"21066","file_size":4173988,"content_type":"application/pdf","relation":"main_file"}],"abstract":[{"lang":"eng","text":"The machine recognition of speech spoken at a distance from the microphones, known as far-field automatic speech recognition (ASR), has received a significant increase of attention in science and industry, which caused or was caused by an equally significant improvement in recognition accuracy. Meanwhile it has entered the consumer market with digital home assistants with a spoken language interface being its most prominent application. Speech recorded at a distance is affected by various acoustic distortions and, consequently, quite different processing pipelines have emerged compared to ASR for close-talk speech. A signal enhancement front-end for dereverberation, source separation and acoustic beamforming is employed to clean up the speech, and the back-end ASR engine is robustified by multi-condition training and adaptation. We will also describe the so-called end-to-end approach to ASR, which is a new promising architecture that has recently been extended to the far-field scenario. This tutorial article gives an account of the algorithms used to enable accurate speech recognition from a distance, and it will be seen that, although deep learning has a significant share in the technological breakthroughs, a clever combination with traditional signal processing can lead to surprisingly effective solutions."}],"publication":"Proceedings of the IEEE"},{"type":"conference","publication":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","status":"public","user_id":"40767","department":[{"_id":"54"}],"_id":"28256","language":[{"iso":"eng"}],"publication_status":"published","citation":{"mla":"Zhang, Wangyou, et al. “End-to-End Dereverberation, Beamforming, and Speech Recognition with Improved Numerical Stability and Advanced Frontend.” <i>ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2021, doi:<a href=\"https://doi.org/10.1109/icassp39728.2021.9414464\">10.1109/icassp39728.2021.9414464</a>.","short":"W. Zhang, C. Boeddeker, S. Watanabe, T. Nakatani, M. Delcroix, K. Kinoshita, T. Ochiai, N. Kamo, R. Haeb-Umbach, Y. Qian, in: ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2021.","bibtex":"@inproceedings{Zhang_Boeddeker_Watanabe_Nakatani_Delcroix_Kinoshita_Ochiai_Kamo_Haeb-Umbach_Qian_2021, title={End-to-End Dereverberation, Beamforming, and Speech Recognition with Improved Numerical Stability and Advanced Frontend}, DOI={<a href=\"https://doi.org/10.1109/icassp39728.2021.9414464\">10.1109/icassp39728.2021.9414464</a>}, booktitle={ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, author={Zhang, Wangyou and Boeddeker, Christoph and Watanabe, Shinji and Nakatani, Tomohiro and Delcroix, Marc and Kinoshita, Keisuke and Ochiai, Tsubasa and Kamo, Naoyuki and Haeb-Umbach, Reinhold and Qian, Yanmin}, year={2021} }","apa":"Zhang, W., Boeddeker, C., Watanabe, S., Nakatani, T., Delcroix, M., Kinoshita, K., Ochiai, T., Kamo, N., Haeb-Umbach, R., &#38; Qian, Y. (2021). End-to-End Dereverberation, Beamforming, and Speech Recognition with Improved Numerical Stability and Advanced Frontend. <i>ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. <a href=\"https://doi.org/10.1109/icassp39728.2021.9414464\">https://doi.org/10.1109/icassp39728.2021.9414464</a>","ama":"Zhang W, Boeddeker C, Watanabe S, et al. End-to-End Dereverberation, Beamforming, and Speech Recognition with Improved Numerical Stability and Advanced Frontend. In: <i>ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. ; 2021. doi:<a href=\"https://doi.org/10.1109/icassp39728.2021.9414464\">10.1109/icassp39728.2021.9414464</a>","ieee":"W. Zhang <i>et al.</i>, “End-to-End Dereverberation, Beamforming, and Speech Recognition with Improved Numerical Stability and Advanced Frontend,” 2021, doi: <a href=\"https://doi.org/10.1109/icassp39728.2021.9414464\">10.1109/icassp39728.2021.9414464</a>.","chicago":"Zhang, Wangyou, Christoph Boeddeker, Shinji Watanabe, Tomohiro Nakatani, Marc Delcroix, Keisuke Kinoshita, Tsubasa Ochiai, Naoyuki Kamo, Reinhold Haeb-Umbach, and Yanmin Qian. “End-to-End Dereverberation, Beamforming, and Speech Recognition with Improved Numerical Stability and Advanced Frontend.” In <i>ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2021. <a href=\"https://doi.org/10.1109/icassp39728.2021.9414464\">https://doi.org/10.1109/icassp39728.2021.9414464</a>."},"year":"2021","author":[{"full_name":"Zhang, Wangyou","last_name":"Zhang","first_name":"Wangyou"},{"id":"40767","full_name":"Boeddeker, Christoph","last_name":"Boeddeker","first_name":"Christoph"},{"first_name":"Shinji","full_name":"Watanabe, Shinji","last_name":"Watanabe"},{"full_name":"Nakatani, Tomohiro","last_name":"Nakatani","first_name":"Tomohiro"},{"first_name":"Marc","last_name":"Delcroix","full_name":"Delcroix, Marc"},{"first_name":"Keisuke","full_name":"Kinoshita, Keisuke","last_name":"Kinoshita"},{"first_name":"Tsubasa","last_name":"Ochiai","full_name":"Ochiai, Tsubasa"},{"full_name":"Kamo, Naoyuki","last_name":"Kamo","first_name":"Naoyuki"},{"full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach","first_name":"Reinhold"},{"full_name":"Qian, Yanmin","last_name":"Qian","first_name":"Yanmin"}],"date_created":"2021-12-03T11:31:42Z","date_updated":"2022-01-13T08:31:27Z","doi":"10.1109/icassp39728.2021.9414464","title":"End-to-End Dereverberation, Beamforming, and Speech Recognition with Improved Numerical Stability and Advanced Frontend"},{"page":"1-5","citation":{"mla":"Heitkaemper, Jens, et al. “A Database for Research on Detection and Enhancement of Speech Transmitted over HF Links.” <i>Speech Communication; 14th ITG-Symposium</i>, 2021, pp. 1–5.","bibtex":"@inproceedings{Heitkaemper_Schmalenstroeer_Ion_Haeb-Umbach_2021, title={A Database for Research on Detection and Enhancement of Speech Transmitted over HF links}, booktitle={Speech Communication; 14th ITG-Symposium}, author={Heitkaemper, Jens and Schmalenstroeer, Joerg and Ion, Valentin and Haeb-Umbach, Reinhold}, year={2021}, pages={1–5} }","short":"J. Heitkaemper, J. Schmalenstroeer, V. Ion, R. Haeb-Umbach, in: Speech Communication; 14th ITG-Symposium, 2021, pp. 1–5.","apa":"Heitkaemper, J., Schmalenstroeer, J., Ion, V., &#38; Haeb-Umbach, R. (2021). A Database for Research on Detection and Enhancement of Speech Transmitted over HF links. <i>Speech Communication; 14th ITG-Symposium</i>, 1–5.","ama":"Heitkaemper J, Schmalenstroeer J, Ion V, Haeb-Umbach R. A Database for Research on Detection and Enhancement of Speech Transmitted over HF links. In: <i>Speech Communication; 14th ITG-Symposium</i>. ; 2021:1-5.","ieee":"J. Heitkaemper, J. Schmalenstroeer, V. Ion, and R. Haeb-Umbach, “A Database for Research on Detection and Enhancement of Speech Transmitted over HF links,” in <i>Speech Communication; 14th ITG-Symposium</i>, 2021, pp. 1–5.","chicago":"Heitkaemper, Jens, Joerg Schmalenstroeer, Valentin Ion, and Reinhold Haeb-Umbach. “A Database for Research on Detection and Enhancement of Speech Transmitted over HF Links.” In <i>Speech Communication; 14th ITG-Symposium</i>, 1–5, 2021."},"year":"2021","quality_controlled":"1","title":"A Database for Research on Detection and Enhancement of Speech Transmitted over HF links","author":[{"first_name":"Jens","full_name":"Heitkaemper, Jens","id":"27643","last_name":"Heitkaemper"},{"full_name":"Schmalenstroeer, Joerg","id":"460","last_name":"Schmalenstroeer","first_name":"Joerg"},{"last_name":"Ion","full_name":"Ion, Valentin","first_name":"Valentin"},{"last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold","first_name":"Reinhold"}],"date_created":"2021-09-09T08:41:25Z","date_updated":"2023-10-26T08:06:57Z","status":"public","publication":"Speech Communication; 14th ITG-Symposium","type":"conference","language":[{"iso":"eng"}],"department":[{"_id":"54"}],"user_id":"460","_id":"24000","project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}]},{"conference":{"location":"Kiel","name":"ITG Conference on Speech Communication"},"main_file_link":[{"url":"https://arxiv.org/pdf/2106.05627.pdf","open_access":"1"}],"author":[{"full_name":"Boeddeker, Christoph","id":"40767","last_name":"Boeddeker","first_name":"Christoph"},{"full_name":"Rautenberg, Frederik","id":"72602","last_name":"Rautenberg","first_name":"Frederik"},{"full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach","first_name":"Reinhold"}],"date_updated":"2023-11-15T15:29:32Z","oa":"1","citation":{"apa":"Boeddeker, C., Rautenberg, F., &#38; Haeb-Umbach, R. (2021). A Comparison and Combination of Unsupervised Blind Source Separation  Techniques. <i>ITG Conference on Speech Communication</i>. ITG Conference on Speech Communication, Kiel.","bibtex":"@inproceedings{Boeddeker_Rautenberg_Haeb-Umbach_2021, title={A Comparison and Combination of Unsupervised Blind Source Separation  Techniques}, booktitle={ITG Conference on Speech Communication}, author={Boeddeker, Christoph and Rautenberg, Frederik and Haeb-Umbach, Reinhold}, year={2021} }","short":"C. Boeddeker, F. Rautenberg, R. Haeb-Umbach, in: ITG Conference on Speech Communication, 2021.","mla":"Boeddeker, Christoph, et al. “A Comparison and Combination of Unsupervised Blind Source Separation  Techniques.” <i>ITG Conference on Speech Communication</i>, 2021.","ama":"Boeddeker C, Rautenberg F, Haeb-Umbach R. A Comparison and Combination of Unsupervised Blind Source Separation  Techniques. In: <i>ITG Conference on Speech Communication</i>. ; 2021.","ieee":"C. Boeddeker, F. Rautenberg, and R. Haeb-Umbach, “A Comparison and Combination of Unsupervised Blind Source Separation  Techniques,” presented at the ITG Conference on Speech Communication, Kiel, 2021.","chicago":"Boeddeker, Christoph, Frederik Rautenberg, and Reinhold Haeb-Umbach. “A Comparison and Combination of Unsupervised Blind Source Separation  Techniques.” In <i>ITG Conference on Speech Communication</i>, 2021."},"has_accepted_license":"1","file_date_updated":"2023-11-15T15:29:32Z","department":[{"_id":"54"}],"user_id":"40767","_id":"44843","status":"public","type":"conference","title":"A Comparison and Combination of Unsupervised Blind Source Separation  Techniques","date_created":"2023-05-15T07:59:33Z","year":"2021","language":[{"iso":"eng"}],"ddc":["000"],"external_id":{"arxiv":["2106.05627"]},"file":[{"date_updated":"2023-11-15T15:29:32Z","date_created":"2023-05-16T08:37:31Z","creator":"frra","file_size":295972,"file_name":"2106.05627.pdf","access_level":"open_access","file_id":"44856","content_type":"application/pdf","relation":"main_file"}],"abstract":[{"text":"Unsupervised blind source separation methods do not require a training phase\r\nand thus cannot suffer from a train-test mismatch, which is a common concern in\r\nneural network based source separation. The unsupervised techniques can be\r\ncategorized in two classes, those building upon the sparsity of speech in the\r\nShort-Time Fourier transform domain and those exploiting non-Gaussianity or\r\nnon-stationarity of the source signals. In this contribution, spatial mixture\r\nmodels which fall in the first category and independent vector analysis (IVA)\r\nas a representative of the second category are compared w.r.t. their separation\r\nperformance and the performance of a downstream speech recognizer on a\r\nreverberant dataset of reasonable size. Furthermore, we introduce a serial\r\nconcatenation of the two, where the result of the mixture model serves as\r\ninitialization of IVA, which achieves significantly better WER performance than\r\neach algorithm individually and even approaches the performance of a much more\r\ncomplex neural network based technique.","lang":"eng"}],"publication":"ITG Conference on Speech Communication"},{"publication":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","type":"conference","status":"public","file":[{"relation":"main_file","content_type":"application/pdf","file_size":228717,"file_name":"ICASSP2021_BSSEval.pdf","access_level":"open_access","file_id":"28260","date_updated":"2023-11-15T15:18:08Z","creator":"cbj","date_created":"2021-12-03T12:01:20Z"}],"department":[{"_id":"54"}],"user_id":"40767","_id":"28259","project":[{"_id":"52","name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"file_date_updated":"2023-11-15T15:18:08Z","language":[{"iso":"eng"}],"ddc":["000"],"has_accepted_license":"1","publication_status":"published","citation":{"short":"C. Boeddeker, W. Zhang, T. Nakatani, K. Kinoshita, T. Ochiai, M. Delcroix, N. Kamo, Y. Qian, R. Haeb-Umbach, in: ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2021.","bibtex":"@inproceedings{Boeddeker_Zhang_Nakatani_Kinoshita_Ochiai_Delcroix_Kamo_Qian_Haeb-Umbach_2021, title={Convolutive Transfer Function Invariant SDR Training Criteria for Multi-Channel Reverberant Speech Separation}, DOI={<a href=\"https://doi.org/10.1109/icassp39728.2021.9414661\">10.1109/icassp39728.2021.9414661</a>}, booktitle={ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, author={Boeddeker, Christoph and Zhang, Wangyou and Nakatani, Tomohiro and Kinoshita, Keisuke and Ochiai, Tsubasa and Delcroix, Marc and Kamo, Naoyuki and Qian, Yanmin and Haeb-Umbach, Reinhold}, year={2021} }","mla":"Boeddeker, Christoph, et al. “Convolutive Transfer Function Invariant SDR Training Criteria for Multi-Channel Reverberant Speech Separation.” <i>ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2021, doi:<a href=\"https://doi.org/10.1109/icassp39728.2021.9414661\">10.1109/icassp39728.2021.9414661</a>.","apa":"Boeddeker, C., Zhang, W., Nakatani, T., Kinoshita, K., Ochiai, T., Delcroix, M., Kamo, N., Qian, Y., &#38; Haeb-Umbach, R. (2021). Convolutive Transfer Function Invariant SDR Training Criteria for Multi-Channel Reverberant Speech Separation. <i>ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. <a href=\"https://doi.org/10.1109/icassp39728.2021.9414661\">https://doi.org/10.1109/icassp39728.2021.9414661</a>","chicago":"Boeddeker, Christoph, Wangyou Zhang, Tomohiro Nakatani, Keisuke Kinoshita, Tsubasa Ochiai, Marc Delcroix, Naoyuki Kamo, Yanmin Qian, and Reinhold Haeb-Umbach. “Convolutive Transfer Function Invariant SDR Training Criteria for Multi-Channel Reverberant Speech Separation.” In <i>ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2021. <a href=\"https://doi.org/10.1109/icassp39728.2021.9414661\">https://doi.org/10.1109/icassp39728.2021.9414661</a>.","ieee":"C. Boeddeker <i>et al.</i>, “Convolutive Transfer Function Invariant SDR Training Criteria for Multi-Channel Reverberant Speech Separation,” 2021, doi: <a href=\"https://doi.org/10.1109/icassp39728.2021.9414661\">10.1109/icassp39728.2021.9414661</a>.","ama":"Boeddeker C, Zhang W, Nakatani T, et al. Convolutive Transfer Function Invariant SDR Training Criteria for Multi-Channel Reverberant Speech Separation. In: <i>ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. ; 2021. doi:<a href=\"https://doi.org/10.1109/icassp39728.2021.9414661\">10.1109/icassp39728.2021.9414661</a>"},"year":"2021","date_created":"2021-12-03T12:00:16Z","author":[{"full_name":"Boeddeker, Christoph","id":"40767","last_name":"Boeddeker","first_name":"Christoph"},{"first_name":"Wangyou","full_name":"Zhang, Wangyou","last_name":"Zhang"},{"last_name":"Nakatani","full_name":"Nakatani, Tomohiro","first_name":"Tomohiro"},{"first_name":"Keisuke","full_name":"Kinoshita, Keisuke","last_name":"Kinoshita"},{"full_name":"Ochiai, Tsubasa","last_name":"Ochiai","first_name":"Tsubasa"},{"first_name":"Marc","full_name":"Delcroix, Marc","last_name":"Delcroix"},{"first_name":"Naoyuki","last_name":"Kamo","full_name":"Kamo, Naoyuki"},{"first_name":"Yanmin","last_name":"Qian","full_name":"Qian, Yanmin"},{"first_name":"Reinhold","id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach"}],"oa":"1","date_updated":"2023-11-15T15:18:09Z","doi":"10.1109/icassp39728.2021.9414661","title":"Convolutive Transfer Function Invariant SDR Training Criteria for Multi-Channel Reverberant Speech Separation"},{"author":[{"first_name":"Joerg","last_name":"Schmalenstroeer","id":"460","full_name":"Schmalenstroeer, Joerg"},{"first_name":"Jens","last_name":"Heitkaemper","full_name":"Heitkaemper, Jens","id":"27643"},{"last_name":"Ullmann","full_name":"Ullmann, Joerg","id":"16256","first_name":"Joerg"},{"first_name":"Reinhold","full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach"}],"date_created":"2021-09-09T08:40:04Z","date_updated":"2023-11-15T14:56:38Z","oa":"1","main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2103.01599"}],"title":"Open Range Pitch Tracking for Carrier Frequency Difference Estimation from HF Transmitted Speech","page":"1-5","citation":{"apa":"Schmalenstroeer, J., Heitkaemper, J., Ullmann, J., &#38; Haeb-Umbach, R. (2021). Open Range Pitch Tracking for Carrier Frequency Difference Estimation from HF Transmitted Speech. <i>29th European Signal Processing Conference (EUSIPCO)</i>, 1–5.","short":"J. Schmalenstroeer, J. Heitkaemper, J. Ullmann, R. Haeb-Umbach, in: 29th European Signal Processing Conference (EUSIPCO), 2021, pp. 1–5.","mla":"Schmalenstroeer, Joerg, et al. “Open Range Pitch Tracking for Carrier Frequency Difference Estimation from HF Transmitted Speech.” <i>29th European Signal Processing Conference (EUSIPCO)</i>, 2021, pp. 1–5.","bibtex":"@inproceedings{Schmalenstroeer_Heitkaemper_Ullmann_Haeb-Umbach_2021, title={Open Range Pitch Tracking for Carrier Frequency Difference Estimation from HF Transmitted Speech}, booktitle={29th European Signal Processing Conference (EUSIPCO)}, author={Schmalenstroeer, Joerg and Heitkaemper, Jens and Ullmann, Joerg and Haeb-Umbach, Reinhold}, year={2021}, pages={1–5} }","ama":"Schmalenstroeer J, Heitkaemper J, Ullmann J, Haeb-Umbach R. Open Range Pitch Tracking for Carrier Frequency Difference Estimation from HF Transmitted Speech. In: <i>29th European Signal Processing Conference (EUSIPCO)</i>. ; 2021:1-5.","chicago":"Schmalenstroeer, Joerg, Jens Heitkaemper, Joerg Ullmann, and Reinhold Haeb-Umbach. “Open Range Pitch Tracking for Carrier Frequency Difference Estimation from HF Transmitted Speech.” In <i>29th European Signal Processing Conference (EUSIPCO)</i>, 1–5, 2021.","ieee":"J. Schmalenstroeer, J. Heitkaemper, J. Ullmann, and R. Haeb-Umbach, “Open Range Pitch Tracking for Carrier Frequency Difference Estimation from HF Transmitted Speech,” in <i>29th European Signal Processing Conference (EUSIPCO)</i>, 2021, pp. 1–5."},"year":"2021","department":[{"_id":"54"}],"user_id":"460","_id":"23998","language":[{"iso":"eng"}],"extern":"1","publication":"29th European Signal Processing Conference (EUSIPCO)","type":"conference","status":"public"},{"title":"Geometry calibration in wireless acoustic sensor networks utilizing DoA and distance information","main_file_link":[{"open_access":"1","url":"https://asmp-eurasipjournals.springeropen.com/articles/10.1186/s13636-021-00210-x"}],"doi":"10.1186/s13636-021-00210-x","date_updated":"2023-11-17T06:36:17Z","oa":"1","author":[{"first_name":"Tobias","full_name":"Gburrek, Tobias","id":"44006","last_name":"Gburrek"},{"last_name":"Schmalenstroeer","id":"460","full_name":"Schmalenstroeer, Joerg","first_name":"Joerg"},{"id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach","first_name":"Reinhold"}],"date_created":"2021-07-05T05:30:15Z","year":"2021","citation":{"apa":"Gburrek, T., Schmalenstroeer, J., &#38; Haeb-Umbach, R. (2021). Geometry calibration in wireless acoustic sensor networks utilizing DoA and distance information. <i>EURASIP Journal on Audio, Speech, and Music Processing</i>. <a href=\"https://doi.org/10.1186/s13636-021-00210-x\">https://doi.org/10.1186/s13636-021-00210-x</a>","short":"T. Gburrek, J. Schmalenstroeer, R. Haeb-Umbach, EURASIP Journal on Audio, Speech, and Music Processing (2021).","mla":"Gburrek, Tobias, et al. “Geometry Calibration in Wireless Acoustic Sensor Networks Utilizing DoA and Distance Information.” <i>EURASIP Journal on Audio, Speech, and Music Processing</i>, 2021, doi:<a href=\"https://doi.org/10.1186/s13636-021-00210-x\">10.1186/s13636-021-00210-x</a>.","bibtex":"@article{Gburrek_Schmalenstroeer_Haeb-Umbach_2021, title={Geometry calibration in wireless acoustic sensor networks utilizing DoA and distance information}, DOI={<a href=\"https://doi.org/10.1186/s13636-021-00210-x\">10.1186/s13636-021-00210-x</a>}, journal={EURASIP Journal on Audio, Speech, and Music Processing}, author={Gburrek, Tobias and Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}, year={2021} }","ama":"Gburrek T, Schmalenstroeer J, Haeb-Umbach R. Geometry calibration in wireless acoustic sensor networks utilizing DoA and distance information. <i>EURASIP Journal on Audio, Speech, and Music Processing</i>. Published online 2021. doi:<a href=\"https://doi.org/10.1186/s13636-021-00210-x\">10.1186/s13636-021-00210-x</a>","ieee":"T. Gburrek, J. Schmalenstroeer, and R. Haeb-Umbach, “Geometry calibration in wireless acoustic sensor networks utilizing DoA and distance information,” <i>EURASIP Journal on Audio, Speech, and Music Processing</i>, 2021, doi: <a href=\"https://doi.org/10.1186/s13636-021-00210-x\">10.1186/s13636-021-00210-x</a>.","chicago":"Gburrek, Tobias, Joerg Schmalenstroeer, and Reinhold Haeb-Umbach. “Geometry Calibration in Wireless Acoustic Sensor Networks Utilizing DoA and Distance Information.” <i>EURASIP Journal on Audio, Speech, and Music Processing</i>, 2021. <a href=\"https://doi.org/10.1186/s13636-021-00210-x\">https://doi.org/10.1186/s13636-021-00210-x</a>."},"publication_status":"published","publication_identifier":{"issn":["1687-4722"]},"quality_controlled":"1","language":[{"iso":"eng"}],"_id":"22528","user_id":"44006","department":[{"_id":"54"}],"abstract":[{"text":"Due to the ad hoc nature of wireless acoustic sensor networks, the position of the sensor nodes is typically unknown. This contribution proposes a technique to estimate the position and orientation of the sensor nodes from the recorded speech signals. The method assumes that a node comprises a microphone array with synchronously sampled microphones rather than a single microphone, but does not require the sampling clocks of the nodes to be synchronized. From the observed audio signals, the distances between the acoustic sources and arrays, as well as the directions of arrival, are estimated. They serve as input to a non-linear least squares problem, from which both the sensor nodes’ positions and orientations, as well as the source positions, are alternatingly estimated in an iterative process. Given one set of unknowns, i.e., either the source positions or the sensor nodes’ geometry, the other set of unknowns can be computed in closed-form. The proposed approach is computationally efficient and the first one, which employs both distance and directional information for geometry calibration in a common cost function. Since both distance and direction of arrival measurements suffer from outliers, e.g., caused by strong reflections of the sound waves on the surfaces of the room, we introduce measures to deemphasize or remove unreliable measurements. Additionally, we discuss modifications of our previously proposed deep neural network-based acoustic distance estimator, to account not only for omnidirectional sources but also for directional sources. Simulation results show good positioning accuracy and compare very favorably with alternative approaches from the literature.","lang":"eng"}],"status":"public","type":"journal_article","publication":"EURASIP Journal on Audio, Speech, and Music Processing"},{"publication":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","type":"conference","status":"public","file":[{"content_type":"application/pdf","relation":"main_file","date_updated":"2023-11-17T06:30:11Z","creator":"tgburrek","date_created":"2023-11-17T06:29:40Z","file_size":312400,"file_name":"icassp21.pdf","file_id":"48988","access_level":"open_access"}],"_id":"23994","department":[{"_id":"54"}],"user_id":"44006","ddc":["004"],"language":[{"iso":"eng"}],"file_date_updated":"2023-11-17T06:30:11Z","quality_controlled":"1","has_accepted_license":"1","publication_status":"published","year":"2021","citation":{"apa":"Gburrek, T., Schmalenstroeer, J., &#38; Haeb-Umbach, R. (2021). Iterative Geometry Calibration from Distance Estimates for Wireless Acoustic Sensor Networks. <i>ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. <a href=\"https://doi.org/10.1109/icassp39728.2021.9413831\">https://doi.org/10.1109/icassp39728.2021.9413831</a>","mla":"Gburrek, Tobias, et al. “Iterative Geometry Calibration from Distance Estimates for Wireless Acoustic Sensor Networks.” <i>ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2021, doi:<a href=\"https://doi.org/10.1109/icassp39728.2021.9413831\">10.1109/icassp39728.2021.9413831</a>.","bibtex":"@inproceedings{Gburrek_Schmalenstroeer_Haeb-Umbach_2021, title={Iterative Geometry Calibration from Distance Estimates for Wireless Acoustic Sensor Networks}, DOI={<a href=\"https://doi.org/10.1109/icassp39728.2021.9413831\">10.1109/icassp39728.2021.9413831</a>}, booktitle={ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, author={Gburrek, Tobias and Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}, year={2021} }","short":"T. Gburrek, J. Schmalenstroeer, R. Haeb-Umbach, in: ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2021.","ama":"Gburrek T, Schmalenstroeer J, Haeb-Umbach R. Iterative Geometry Calibration from Distance Estimates for Wireless Acoustic Sensor Networks. In: <i>ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. ; 2021. doi:<a href=\"https://doi.org/10.1109/icassp39728.2021.9413831\">10.1109/icassp39728.2021.9413831</a>","chicago":"Gburrek, Tobias, Joerg Schmalenstroeer, and Reinhold Haeb-Umbach. “Iterative Geometry Calibration from Distance Estimates for Wireless Acoustic Sensor Networks.” In <i>ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2021. <a href=\"https://doi.org/10.1109/icassp39728.2021.9413831\">https://doi.org/10.1109/icassp39728.2021.9413831</a>.","ieee":"T. Gburrek, J. Schmalenstroeer, and R. Haeb-Umbach, “Iterative Geometry Calibration from Distance Estimates for Wireless Acoustic Sensor Networks,” 2021, doi: <a href=\"https://doi.org/10.1109/icassp39728.2021.9413831\">10.1109/icassp39728.2021.9413831</a>."},"date_updated":"2023-11-17T06:30:12Z","oa":"1","date_created":"2021-09-09T08:30:16Z","author":[{"id":"44006","full_name":"Gburrek, Tobias","last_name":"Gburrek","first_name":"Tobias"},{"id":"460","full_name":"Schmalenstroeer, Joerg","last_name":"Schmalenstroeer","first_name":"Joerg"},{"first_name":"Reinhold","full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach"}],"title":"Iterative Geometry Calibration from Distance Estimates for Wireless Acoustic Sensor Networks","doi":"10.1109/icassp39728.2021.9413831"},{"page":"1-5","citation":{"apa":"Gburrek, T., Schmalenstroeer, J., &#38; Haeb-Umbach, R. (2021). On Source-Microphone Distance Estimation Using Convolutional Recurrent Neural Networks. <i>Speech Communication; 14th ITG-Symposium</i>, 1–5.","short":"T. Gburrek, J. Schmalenstroeer, R. Haeb-Umbach, in: Speech Communication; 14th ITG-Symposium, 2021, pp. 1–5.","mla":"Gburrek, Tobias, et al. “On Source-Microphone Distance Estimation Using Convolutional Recurrent Neural Networks.” <i>Speech Communication; 14th ITG-Symposium</i>, 2021, pp. 1–5.","bibtex":"@inproceedings{Gburrek_Schmalenstroeer_Haeb-Umbach_2021, title={On Source-Microphone Distance Estimation Using Convolutional Recurrent Neural Networks}, booktitle={Speech Communication; 14th ITG-Symposium}, author={Gburrek, Tobias and Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}, year={2021}, pages={1–5} }","chicago":"Gburrek, Tobias, Joerg Schmalenstroeer, and Reinhold Haeb-Umbach. “On Source-Microphone Distance Estimation Using Convolutional Recurrent Neural Networks.” In <i>Speech Communication; 14th ITG-Symposium</i>, 1–5, 2021.","ieee":"T. Gburrek, J. Schmalenstroeer, and R. Haeb-Umbach, “On Source-Microphone Distance Estimation Using Convolutional Recurrent Neural Networks,” in <i>Speech Communication; 14th ITG-Symposium</i>, 2021, pp. 1–5.","ama":"Gburrek T, Schmalenstroeer J, Haeb-Umbach R. On Source-Microphone Distance Estimation Using Convolutional Recurrent Neural Networks. In: <i>Speech Communication; 14th ITG-Symposium</i>. ; 2021:1-5."},"year":"2021","quality_controlled":"1","has_accepted_license":"1","title":"On Source-Microphone Distance Estimation Using Convolutional Recurrent Neural Networks","date_created":"2021-09-09T08:40:44Z","author":[{"first_name":"Tobias","last_name":"Gburrek","id":"44006","full_name":"Gburrek, Tobias"},{"last_name":"Schmalenstroeer","id":"460","full_name":"Schmalenstroeer, Joerg","first_name":"Joerg"},{"id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach","first_name":"Reinhold"}],"date_updated":"2023-11-17T06:32:20Z","oa":"1","status":"public","file":[{"date_updated":"2023-11-17T06:31:37Z","creator":"tgburrek","date_created":"2023-11-17T06:31:37Z","file_size":449694,"file_id":"48989","access_level":"open_access","file_name":"dist_est.pdf","content_type":"application/pdf","relation":"main_file"}],"publication":"Speech Communication; 14th ITG-Symposium","type":"conference","language":[{"iso":"eng"}],"file_date_updated":"2023-11-17T06:31:37Z","ddc":["004"],"department":[{"_id":"54"}],"user_id":"44006","_id":"23999"},{"title":"Contrastive Predictive Coding Supported Factorized Variational Autoencoder for Unsupervised Learning of Disentangled Speech Representations","date_created":"2022-01-13T07:55:29Z","year":"2021","quality_controlled":"1","language":[{"iso":"eng"}],"ddc":["000"],"file":[{"date_updated":"2022-01-13T08:19:19Z","creator":"ebbers","date_created":"2022-01-13T07:56:30Z","file_size":236628,"file_name":"Template.pdf","file_id":"29305","access_level":"open_access","content_type":"application/pdf","relation":"main_file"}],"abstract":[{"text":"In this work we address disentanglement of style and content in speech signals. We propose a fully convolutional variational autoencoder employing two encoders: a content encoder and a style encoder. To foster disentanglement, we propose adversarial contrastive predictive coding. This new disentanglement method does neither need parallel data nor any supervision. We show that the proposed technique is capable of separating speaker and content traits into the two different representations and show competitive speaker-content disentanglement performance compared to other unsupervised approaches. We further demonstrate an increased robustness of the content representation against a train-test mismatch compared to spectral features, when used for phone recognition.","lang":"eng"}],"publication":"Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","author":[{"first_name":"Janek","last_name":"Ebbers","full_name":"Ebbers, Janek","id":"34851"},{"first_name":"Michael","full_name":"Kuhlmann, Michael","id":"49871","last_name":"Kuhlmann"},{"last_name":"Cord-Landwehr","full_name":"Cord-Landwehr, Tobias","id":"44393","first_name":"Tobias"},{"last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242","first_name":"Reinhold"}],"date_updated":"2023-11-22T08:29:42Z","oa":"1","citation":{"ama":"Ebbers J, Kuhlmann M, Cord-Landwehr T, Haeb-Umbach R. Contrastive Predictive Coding Supported Factorized Variational Autoencoder for Unsupervised Learning of Disentangled Speech Representations. In: <i>Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. ; 2021:3860–3864.","chicago":"Ebbers, Janek, Michael Kuhlmann, Tobias Cord-Landwehr, and Reinhold Haeb-Umbach. “Contrastive Predictive Coding Supported Factorized Variational Autoencoder for Unsupervised Learning of Disentangled Speech Representations.” In <i>Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, 3860–3864, 2021.","ieee":"J. Ebbers, M. Kuhlmann, T. Cord-Landwehr, and R. Haeb-Umbach, “Contrastive Predictive Coding Supported Factorized Variational Autoencoder for Unsupervised Learning of Disentangled Speech Representations,” in <i>Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2021, pp. 3860–3864.","mla":"Ebbers, Janek, et al. “Contrastive Predictive Coding Supported Factorized Variational Autoencoder for Unsupervised Learning of Disentangled Speech Representations.” <i>Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2021, pp. 3860–3864.","short":"J. Ebbers, M. Kuhlmann, T. Cord-Landwehr, R. Haeb-Umbach, in: Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2021, pp. 3860–3864.","bibtex":"@inproceedings{Ebbers_Kuhlmann_Cord-Landwehr_Haeb-Umbach_2021, title={Contrastive Predictive Coding Supported Factorized Variational Autoencoder for Unsupervised Learning of Disentangled Speech Representations}, booktitle={Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, author={Ebbers, Janek and Kuhlmann, Michael and Cord-Landwehr, Tobias and Haeb-Umbach, Reinhold}, year={2021}, pages={3860–3864} }","apa":"Ebbers, J., Kuhlmann, M., Cord-Landwehr, T., &#38; Haeb-Umbach, R. (2021). Contrastive Predictive Coding Supported Factorized Variational Autoencoder for Unsupervised Learning of Disentangled Speech Representations. <i>Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, 3860–3864."},"page":"3860–3864","has_accepted_license":"1","file_date_updated":"2022-01-13T08:19:19Z","user_id":"34851","department":[{"_id":"54"}],"project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"_id":"29304","status":"public","type":"conference"},{"project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"_id":"26770","user_id":"49870","department":[{"_id":"54"}],"file_date_updated":"2021-12-06T10:48:30Z","type":"conference","status":"public","date_updated":"2023-11-15T12:14:40Z","oa":"1","author":[{"last_name":"von Neumann","orcid":"https://orcid.org/0000-0002-7717-8670","full_name":"von Neumann, Thilo","id":"49870","first_name":"Thilo"},{"first_name":"Keisuke","full_name":"Kinoshita, Keisuke","last_name":"Kinoshita"},{"first_name":"Christoph","full_name":"Boeddeker, Christoph","id":"40767","last_name":"Boeddeker"},{"full_name":"Delcroix, Marc","last_name":"Delcroix","first_name":"Marc"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold"}],"doi":"10.21437/interspeech.2021-1177","conference":{"name":"Interspeech"},"publication_status":"published","has_accepted_license":"1","related_material":{"link":[{"url":"https://github.com/fgnt/graph_pit","relation":"software"}]},"citation":{"ama":"von Neumann T, Kinoshita K, Boeddeker C, Delcroix M, Haeb-Umbach R. Graph-PIT: Generalized Permutation Invariant Training for Continuous Separation of Arbitrary Numbers of Speakers. In: <i>Interspeech 2021</i>. ; 2021. doi:<a href=\"https://doi.org/10.21437/interspeech.2021-1177\">10.21437/interspeech.2021-1177</a>","ieee":"T. von Neumann, K. Kinoshita, C. Boeddeker, M. Delcroix, and R. Haeb-Umbach, “Graph-PIT: Generalized Permutation Invariant Training for Continuous Separation of Arbitrary Numbers of Speakers,” presented at the Interspeech, 2021, doi: <a href=\"https://doi.org/10.21437/interspeech.2021-1177\">10.21437/interspeech.2021-1177</a>.","chicago":"Neumann, Thilo von, Keisuke Kinoshita, Christoph Boeddeker, Marc Delcroix, and Reinhold Haeb-Umbach. “Graph-PIT: Generalized Permutation Invariant Training for Continuous Separation of Arbitrary Numbers of Speakers.” In <i>Interspeech 2021</i>, 2021. <a href=\"https://doi.org/10.21437/interspeech.2021-1177\">https://doi.org/10.21437/interspeech.2021-1177</a>.","mla":"von Neumann, Thilo, et al. “Graph-PIT: Generalized Permutation Invariant Training for Continuous Separation of Arbitrary Numbers of Speakers.” <i>Interspeech 2021</i>, 2021, doi:<a href=\"https://doi.org/10.21437/interspeech.2021-1177\">10.21437/interspeech.2021-1177</a>.","short":"T. von Neumann, K. Kinoshita, C. Boeddeker, M. Delcroix, R. Haeb-Umbach, in: Interspeech 2021, 2021.","bibtex":"@inproceedings{von Neumann_Kinoshita_Boeddeker_Delcroix_Haeb-Umbach_2021, title={Graph-PIT: Generalized Permutation Invariant Training for Continuous Separation of Arbitrary Numbers of Speakers}, DOI={<a href=\"https://doi.org/10.21437/interspeech.2021-1177\">10.21437/interspeech.2021-1177</a>}, booktitle={Interspeech 2021}, author={von Neumann, Thilo and Kinoshita, Keisuke and Boeddeker, Christoph and Delcroix, Marc and Haeb-Umbach, Reinhold}, year={2021} }","apa":"von Neumann, T., Kinoshita, K., Boeddeker, C., Delcroix, M., &#38; Haeb-Umbach, R. (2021). Graph-PIT: Generalized Permutation Invariant Training for Continuous Separation of Arbitrary Numbers of Speakers. <i>Interspeech 2021</i>. Interspeech. <a href=\"https://doi.org/10.21437/interspeech.2021-1177\">https://doi.org/10.21437/interspeech.2021-1177</a>"},"ddc":["000"],"keyword":["Continuous speech separation","automatic speech recognition","overlapped speech","permutation invariant training"],"language":[{"iso":"eng"}],"publication":"Interspeech 2021","abstract":[{"lang":"eng","text":"Automatic transcription of meetings requires handling of overlapped speech, which calls for continuous speech separation (CSS) systems. The uPIT criterion was proposed for utterance-level separation with neural networks and introduces the constraint that the total number of speakers must not exceed the number of output channels. When processing meeting-like data in a segment-wise manner, i.e., by separating overlapping segments independently and stitching adjacent segments to continuous output streams, this constraint has to be fulfilled for any segment. In this contribution, we show that this constraint can be significantly relaxed. We propose a novel graph-based PIT criterion, which casts the assignment of utterances to output channels in a graph coloring problem. It only requires that the number of concurrently active speakers must not exceed the number of output channels. As a consequence, the system can process an arbitrary number of speakers and arbitrarily long segments and thus can handle more diverse scenarios.\r\nFurther, the stitching algorithm for obtaining a consistent output order in neighboring segments is of less importance and can even be eliminated completely, not the least reducing the computational effort. Experiments on meeting-style WSJ data show improvements in recognition performance over using the uPIT criterion. "}],"file":[{"content_type":"video/mp4","relation":"supplementary_material","date_updated":"2021-12-06T10:48:30Z","creator":"tvn","date_created":"2021-12-06T10:39:13Z","title":"Video for INTERSPEECH 2021","file_size":9550220,"access_level":"open_access","file_id":"28327","file_name":"Interspeech 2021 voiceover-002-compressed.mp4"},{"file_size":1337297,"title":"Slides from INTERSPEECH 2021","file_id":"28328","access_level":"open_access","file_name":"Graph-PIT-poster-presentation.pptx","date_updated":"2021-12-06T10:47:01Z","date_created":"2021-12-06T10:47:01Z","creator":"tvn","relation":"slides","content_type":"application/vnd.openxmlformats-officedocument.presentationml.presentation"},{"relation":"main_file","content_type":"application/pdf","file_size":226589,"file_name":"INTERSPEECH2021_Graph_PIT.pdf","access_level":"open_access","file_id":"28329","date_updated":"2021-12-06T10:48:21Z","creator":"tvn","date_created":"2021-12-06T10:48:21Z"}],"date_created":"2021-10-25T08:50:01Z","title":"Graph-PIT: Generalized Permutation Invariant Training for Continuous Separation of Arbitrary Numbers of Speakers","quality_controlled":"1","year":"2021"},{"file_date_updated":"2022-01-07T10:42:54Z","language":[{"iso":"eng"}],"ddc":["000"],"user_id":"49870","department":[{"_id":"54"}],"project":[{"_id":"52","name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"_id":"29173","file":[{"file_id":"29180","access_level":"open_access","file_name":"poster.pdf","file_size":191938,"date_created":"2022-01-06T13:23:27Z","creator":"tvn","date_updated":"2022-01-06T13:23:27Z","relation":"poster","content_type":"application/pdf"},{"date_created":"2022-01-07T10:42:54Z","creator":"tvn","date_updated":"2022-01-07T10:42:54Z","file_id":"29181","file_name":"ITG2021_Speeding_up_Permutation_Invariant_Training.pdf","access_level":"open_access","file_size":236670,"content_type":"application/pdf","relation":"main_file"}],"status":"public","type":"conference","publication":"Speech Communication; 14th ITG Conference","conference":{"end_date":"2021-10-01","location":"Kiel","name":"Speech Communication; 14th ITG Conference","start_date":"2021-09-29"},"title":"Speeding Up Permutation Invariant Training for Source Separation","author":[{"first_name":"Thilo","id":"49870","full_name":"von Neumann, Thilo","last_name":"von Neumann","orcid":"https://orcid.org/0000-0002-7717-8670"},{"first_name":"Christoph","full_name":"Boeddeker, Christoph","id":"40767","last_name":"Boeddeker"},{"first_name":"Keisuke","full_name":"Kinoshita, Keisuke","last_name":"Kinoshita"},{"first_name":"Marc","last_name":"Delcroix","full_name":"Delcroix, Marc"},{"first_name":"Reinhold","full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach"}],"date_created":"2022-01-07T10:40:56Z","oa":"1","date_updated":"2023-11-15T12:16:31Z","citation":{"apa":"von Neumann, T., Boeddeker, C., Kinoshita, K., Delcroix, M., &#38; Haeb-Umbach, R. (2021). Speeding Up Permutation Invariant Training for Source Separation. <i>Speech Communication; 14th ITG Conference</i>. Speech Communication; 14th ITG Conference, Kiel.","bibtex":"@inproceedings{von Neumann_Boeddeker_Kinoshita_Delcroix_Haeb-Umbach_2021, title={Speeding Up Permutation Invariant Training for Source Separation}, booktitle={Speech Communication; 14th ITG Conference}, author={von Neumann, Thilo and Boeddeker, Christoph and Kinoshita, Keisuke and Delcroix, Marc and Haeb-Umbach, Reinhold}, year={2021} }","short":"T. von Neumann, C. Boeddeker, K. Kinoshita, M. Delcroix, R. Haeb-Umbach, in: Speech Communication; 14th ITG Conference, 2021.","mla":"von Neumann, Thilo, et al. “Speeding Up Permutation Invariant Training for Source Separation.” <i>Speech Communication; 14th ITG Conference</i>, 2021.","ieee":"T. von Neumann, C. Boeddeker, K. Kinoshita, M. Delcroix, and R. Haeb-Umbach, “Speeding Up Permutation Invariant Training for Source Separation,” presented at the Speech Communication; 14th ITG Conference, Kiel, 2021.","chicago":"Neumann, Thilo von, Christoph Boeddeker, Keisuke Kinoshita, Marc Delcroix, and Reinhold Haeb-Umbach. “Speeding Up Permutation Invariant Training for Source Separation.” In <i>Speech Communication; 14th ITG Conference</i>, 2021.","ama":"von Neumann T, Boeddeker C, Kinoshita K, Delcroix M, Haeb-Umbach R. Speeding Up Permutation Invariant Training for Source Separation. In: <i>Speech Communication; 14th ITG Conference</i>. ; 2021."},"year":"2021","has_accepted_license":"1","quality_controlled":"1"},{"file":[{"content_type":"application/pdf","relation":"main_file","date_created":"2022-01-13T08:08:54Z","creator":"ebbers","date_updated":"2022-01-13T08:19:50Z","file_name":"template.pdf","file_id":"29309","access_level":"open_access","file_size":239462}],"abstract":[{"lang":"eng","text":"In this paper we present our system for the Detection and Classification of Acoustic Scenes and Events (DCASE) 2021 Challenge Task 4: Sound Event Detection and Separation in Domestic Environments, where it scored the fourth rank. Our presented solution is an advancement of our system used in the previous edition of the task.We use a forward-backward convolutional recurrent neural network (FBCRNN) for tagging and pseudo labeling followed by tag-conditioned sound event detection (SED) models which are trained using strong pseudo labels provided by the FBCRNN. Our advancement over our earlier model is threefold. First, we introduce a strong label loss in the objective of the FBCRNN to take advantage of the strongly labeled synthetic data during training. Second, we perform multiple iterations of self-training for both the FBCRNN and tag-conditioned SED models. Third, while we used only tag-conditioned CNNs as our SED model in the previous edition we here explore sophisticated tag-conditioned SED model architectures, namely, bidirectional CRNNs and bidirectional convolutional transformer neural networks (CTNNs), and combine them. With metric and class specific tuning of median filter lengths for post-processing, our final SED model, consisting of 6 submodels (2 of each architecture), achieves on the public evaluation set poly-phonic sound event detection scores (PSDS) of 0.455 for scenario 1 and 0.684 for scenario as well as a collar-based F1-score of 0.596 outperforming the baselines and our model from the previous edition by far. Source code is publicly available at https://github.com/fgnt/pb_sed."}],"publication":"Proceedings of the 6th Detection and Classification of Acoustic Scenes and Events 2021 Workshop (DCASE2021)","language":[{"iso":"eng"}],"ddc":["000"],"year":"2021","quality_controlled":"1","title":"Self-Trained Audio Tagging and Sound Event Detection in Domestic Environments","date_created":"2022-01-13T08:07:47Z","status":"public","type":"conference","file_date_updated":"2022-01-13T08:19:50Z","department":[{"_id":"54"}],"user_id":"34851","_id":"29308","project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"page":"226–230","citation":{"apa":"Ebbers, J., &#38; Haeb-Umbach, R. (2021). Self-Trained Audio Tagging and Sound Event Detection in Domestic Environments. <i>Proceedings of the 6th Detection and Classification of Acoustic Scenes and Events 2021 Workshop (DCASE2021)</i>, 226–230.","mla":"Ebbers, Janek, and Reinhold Haeb-Umbach. “Self-Trained Audio Tagging and Sound Event Detection in Domestic Environments.” <i>Proceedings of the 6th Detection and Classification of Acoustic Scenes and Events 2021 Workshop (DCASE2021)</i>, 2021, pp. 226–230.","short":"J. Ebbers, R. Haeb-Umbach, in: Proceedings of the 6th Detection and Classification of Acoustic Scenes and Events 2021 Workshop (DCASE2021), Barcelona, Spain, 2021, pp. 226–230.","bibtex":"@inproceedings{Ebbers_Haeb-Umbach_2021, place={Barcelona, Spain}, title={Self-Trained Audio Tagging and Sound Event Detection in Domestic Environments}, booktitle={Proceedings of the 6th Detection and Classification of Acoustic Scenes and Events 2021 Workshop (DCASE2021)}, author={Ebbers, Janek and Haeb-Umbach, Reinhold}, year={2021}, pages={226–230} }","ama":"Ebbers J, Haeb-Umbach R. Self-Trained Audio Tagging and Sound Event Detection in Domestic Environments. In: <i>Proceedings of the 6th Detection and Classification of Acoustic Scenes and Events 2021 Workshop (DCASE2021)</i>. ; 2021:226–230.","chicago":"Ebbers, Janek, and Reinhold Haeb-Umbach. “Self-Trained Audio Tagging and Sound Event Detection in Domestic Environments.” In <i>Proceedings of the 6th Detection and Classification of Acoustic Scenes and Events 2021 Workshop (DCASE2021)</i>, 226–230. Barcelona, Spain, 2021.","ieee":"J. Ebbers and R. Haeb-Umbach, “Self-Trained Audio Tagging and Sound Event Detection in Domestic Environments,” in <i>Proceedings of the 6th Detection and Classification of Acoustic Scenes and Events 2021 Workshop (DCASE2021)</i>, 2021, pp. 226–230."},"place":"Barcelona, Spain","publication_identifier":{"isbn":["978-84-09-36072-7"]},"has_accepted_license":"1","author":[{"first_name":"Janek","full_name":"Ebbers, Janek","id":"34851","last_name":"Ebbers"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242"}],"date_updated":"2023-11-22T08:28:32Z","oa":"1"},{"user_id":"34851","department":[{"_id":"54"}],"project":[{"_id":"52","name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"_id":"29306","language":[{"iso":"eng"}],"file_date_updated":"2022-01-13T08:19:35Z","ddc":["000"],"type":"conference","publication":"Proceedings of the 29th European Signal Processing Conference (EUSIPCO)","file":[{"relation":"main_file","content_type":"application/pdf","access_level":"open_access","file_id":"29307","file_name":"conference_101719.pdf","file_size":213938,"date_created":"2022-01-13T08:03:26Z","creator":"ebbers","date_updated":"2022-01-13T08:19:35Z"}],"status":"public","abstract":[{"text":"Recently, there has been a rising interest in sound recognition via Acoustic Sensor Networks to support applications such as ambient assisted living or environmental habitat monitoring. With state-of-the-art sound recognition being dominated by deep-learning-based approaches, there is a high demand for labeled training data. Despite the availability of large-scale  data sets such as Google's AudioSet, acquiring training data matching a certain application environment is still often a problem. In this paper we are concerned with human activity monitoring in a domestic environment using an ASN consisting of multiple nodes each providing multichannel signals. We propose a self-training based domain adaptation approach, which only requires unlabeled data from the target environment. Here, a sound recognition system trained on AudioSet, the teacher, generates pseudo labels for data from the target environment on which a student network is trained. The student can furthermore glean information about the spatial arrangement of sensors and sound sources to further improve classification performance. It is shown that  the student significantly improves recognition performance over the pre-trained teacher without relying on labeled data from the environment the system is deployed in.","lang":"eng"}],"date_created":"2022-01-13T08:01:21Z","author":[{"first_name":"Janek","last_name":"Ebbers","full_name":"Ebbers, Janek","id":"34851"},{"full_name":"Keyser, Moritz Curt","last_name":"Keyser","first_name":"Moritz Curt"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold"}],"date_updated":"2023-11-22T08:28:50Z","oa":"1","title":"Adapting Sound Recognition to A New Environment Via Self-Training","quality_controlled":"1","has_accepted_license":"1","citation":{"ama":"Ebbers J, Keyser MC, Haeb-Umbach R. Adapting Sound Recognition to A New Environment Via Self-Training. In: <i>Proceedings of the 29th European Signal Processing Conference (EUSIPCO)</i>. ; 2021:1135–1139.","chicago":"Ebbers, Janek, Moritz Curt Keyser, and Reinhold Haeb-Umbach. “Adapting Sound Recognition to A New Environment Via Self-Training.” In <i>Proceedings of the 29th European Signal Processing Conference (EUSIPCO)</i>, 1135–1139, 2021.","ieee":"J. Ebbers, M. C. Keyser, and R. Haeb-Umbach, “Adapting Sound Recognition to A New Environment Via Self-Training,” in <i>Proceedings of the 29th European Signal Processing Conference (EUSIPCO)</i>, 2021, pp. 1135–1139.","mla":"Ebbers, Janek, et al. “Adapting Sound Recognition to A New Environment Via Self-Training.” <i>Proceedings of the 29th European Signal Processing Conference (EUSIPCO)</i>, 2021, pp. 1135–1139.","bibtex":"@inproceedings{Ebbers_Keyser_Haeb-Umbach_2021, title={Adapting Sound Recognition to A New Environment Via Self-Training}, booktitle={Proceedings of the 29th European Signal Processing Conference (EUSIPCO)}, author={Ebbers, Janek and Keyser, Moritz Curt and Haeb-Umbach, Reinhold}, year={2021}, pages={1135–1139} }","short":"J. Ebbers, M.C. Keyser, R. Haeb-Umbach, in: Proceedings of the 29th European Signal Processing Conference (EUSIPCO), 2021, pp. 1135–1139.","apa":"Ebbers, J., Keyser, M. C., &#38; Haeb-Umbach, R. (2021). Adapting Sound Recognition to A New Environment Via Self-Training. <i>Proceedings of the 29th European Signal Processing Conference (EUSIPCO)</i>, 1135–1139."},"page":"1135–1139","year":"2021"},{"quality_controlled":"1","issue":"3","year":"2021","date_created":"2021-09-14T20:52:57Z","title":"Explanation as a Social Practice: Toward a Conceptual Framework for the Social Design of AI Systems","publication":"IEEE Transactions on Cognitive and Developmental Systems","abstract":[{"text":"One objective of current research in explainable intelligent systems is to implement social aspects in order to increase the relevance of explanations. In this paper, we argue that a novel conceptual framework is needed to overcome shortcomings of existing AI systems with little attention to processes of interaction and learning. Drawing from research in interaction and development, we first outline the novel conceptual framework that pushes the design of AI systems toward true interactivity with an emphasis on the role of the partner and social relevance. We propose that AI systems will be able to provide a meaningful and relevant explanation only if the process of explaining is extended to active contribution of both partners that brings about dynamics that is modulated by different levels of analysis. Accordingly, our conceptual framework comprises monitoring and scaffolding as key concepts and claims that the process of explaining is not only modulated by the interaction between explainee and explainer but is embedded into a larger social context in which conventionalized and routinized behaviors are established. We discuss our conceptual framework in relation to the established objectives of transparency and autonomy that are raised for the design of explainable AI systems currently.","lang":"eng"}],"file":[{"file_name":"2020-12-01_explainability_final_version.pdf","file_id":"49081","access_level":"open_access","file_size":626217,"date_created":"2023-11-20T16:33:51Z","creator":"haebumb","date_updated":"2023-11-20T16:33:51Z","relation":"main_file","content_type":"application/pdf"}],"keyword":["Explainability","process ofexplaining andunderstanding","explainable artificial systems"],"ddc":["300"],"language":[{"iso":"eng"}],"has_accepted_license":"1","publication_identifier":{"issn":["2379-8920","2379-8939"]},"publication_status":"published","intvolume":"        13","page":"717-728","citation":{"apa":"Rohlfing, K. J., Cimiano, P., Scharlau, I., Matzner, T., Buhl, H. M., Buschmeier, H., Esposito, E., Grimminger, A., Hammer, B., Haeb-Umbach, R., Horwath, I., Hüllermeier, E., Kern, F., Kopp, S., Thommes, K., Ngonga Ngomo, A.-C., Schulte, C., Wachsmuth, H., Wagner, P., &#38; Wrede, B. (2021). Explanation as a Social Practice: Toward a Conceptual Framework for the Social Design of AI Systems. <i>IEEE Transactions on Cognitive and Developmental Systems</i>, <i>13</i>(3), 717–728. <a href=\"https://doi.org/10.1109/tcds.2020.3044366\">https://doi.org/10.1109/tcds.2020.3044366</a>","short":"K.J. Rohlfing, P. Cimiano, I. Scharlau, T. Matzner, H.M. Buhl, H. Buschmeier, E. Esposito, A. Grimminger, B. Hammer, R. Haeb-Umbach, I. Horwath, E. Hüllermeier, F. Kern, S. Kopp, K. Thommes, A.-C. Ngonga Ngomo, C. Schulte, H. Wachsmuth, P. Wagner, B. Wrede, IEEE Transactions on Cognitive and Developmental Systems 13 (2021) 717–728.","mla":"Rohlfing, Katharina J., et al. “Explanation as a Social Practice: Toward a Conceptual Framework for the Social Design of AI Systems.” <i>IEEE Transactions on Cognitive and Developmental Systems</i>, vol. 13, no. 3, 2021, pp. 717–28, doi:<a href=\"https://doi.org/10.1109/tcds.2020.3044366\">10.1109/tcds.2020.3044366</a>.","bibtex":"@article{Rohlfing_Cimiano_Scharlau_Matzner_Buhl_Buschmeier_Esposito_Grimminger_Hammer_Haeb-Umbach_et al._2021, title={Explanation as a Social Practice: Toward a Conceptual Framework for the Social Design of AI Systems}, volume={13}, DOI={<a href=\"https://doi.org/10.1109/tcds.2020.3044366\">10.1109/tcds.2020.3044366</a>}, number={3}, journal={IEEE Transactions on Cognitive and Developmental Systems}, author={Rohlfing, Katharina J. and Cimiano, Philipp and Scharlau, Ingrid and Matzner, Tobias and Buhl, Heike M. and Buschmeier, Hendrik and Esposito, Elena and Grimminger, Angela and Hammer, Barbara and Haeb-Umbach, Reinhold and et al.}, year={2021}, pages={717–728} }","ama":"Rohlfing KJ, Cimiano P, Scharlau I, et al. Explanation as a Social Practice: Toward a Conceptual Framework for the Social Design of AI Systems. <i>IEEE Transactions on Cognitive and Developmental Systems</i>. 2021;13(3):717-728. doi:<a href=\"https://doi.org/10.1109/tcds.2020.3044366\">10.1109/tcds.2020.3044366</a>","ieee":"K. J. Rohlfing <i>et al.</i>, “Explanation as a Social Practice: Toward a Conceptual Framework for the Social Design of AI Systems,” <i>IEEE Transactions on Cognitive and Developmental Systems</i>, vol. 13, no. 3, pp. 717–728, 2021, doi: <a href=\"https://doi.org/10.1109/tcds.2020.3044366\">10.1109/tcds.2020.3044366</a>.","chicago":"Rohlfing, Katharina J., Philipp Cimiano, Ingrid Scharlau, Tobias Matzner, Heike M. Buhl, Hendrik Buschmeier, Elena Esposito, et al. “Explanation as a Social Practice: Toward a Conceptual Framework for the Social Design of AI Systems.” <i>IEEE Transactions on Cognitive and Developmental Systems</i> 13, no. 3 (2021): 717–28. <a href=\"https://doi.org/10.1109/tcds.2020.3044366\">https://doi.org/10.1109/tcds.2020.3044366</a>."},"oa":"1","date_updated":"2023-12-05T10:15:02Z","volume":13,"author":[{"first_name":"Katharina J.","last_name":"Rohlfing","full_name":"Rohlfing, Katharina J.","id":"50352"},{"first_name":"Philipp","full_name":"Cimiano, Philipp","last_name":"Cimiano"},{"first_name":"Ingrid","full_name":"Scharlau, Ingrid","id":"451","last_name":"Scharlau","orcid":"0000-0003-2364-9489"},{"first_name":"Tobias","id":"65695","full_name":"Matzner, Tobias","last_name":"Matzner"},{"last_name":"Buhl","full_name":"Buhl, Heike M.","id":"27152","first_name":"Heike M."},{"first_name":"Hendrik","full_name":"Buschmeier, Hendrik","last_name":"Buschmeier"},{"last_name":"Esposito","full_name":"Esposito, Elena","first_name":"Elena"},{"id":"57578","full_name":"Grimminger, Angela","last_name":"Grimminger","first_name":"Angela"},{"first_name":"Barbara","full_name":"Hammer, Barbara","last_name":"Hammer"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold"},{"first_name":"Ilona","full_name":"Horwath, Ilona","id":"68836","last_name":"Horwath"},{"first_name":"Eyke","last_name":"Hüllermeier","id":"48129","full_name":"Hüllermeier, Eyke"},{"first_name":"Friederike","full_name":"Kern, Friederike","last_name":"Kern"},{"last_name":"Kopp","full_name":"Kopp, Stefan","first_name":"Stefan"},{"full_name":"Thommes, Kirsten","id":"72497","last_name":"Thommes","first_name":"Kirsten"},{"first_name":"Axel-Cyrille","last_name":"Ngonga Ngomo","id":"65716","full_name":"Ngonga Ngomo, Axel-Cyrille"},{"first_name":"Carsten","id":"60311","full_name":"Schulte, Carsten","last_name":"Schulte"},{"first_name":"Henning","full_name":"Wachsmuth, Henning","id":"3900","last_name":"Wachsmuth"},{"full_name":"Wagner, Petra","last_name":"Wagner","first_name":"Petra"},{"first_name":"Britta","full_name":"Wrede, Britta","last_name":"Wrede"}],"doi":"10.1109/tcds.2020.3044366","type":"journal_article","status":"public","_id":"24456","project":[{"grant_number":"438445824","_id":"109","name":"TRR 318: TRR 318 - Erklärbarkeit konstruieren"}],"department":[{"_id":"603"},{"_id":"749"},{"_id":"424"},{"_id":"67"},{"_id":"574"},{"_id":"184"},{"_id":"757"},{"_id":"54"},{"_id":"178"}],"user_id":"42933","article_type":"original","file_date_updated":"2023-11-20T16:33:51Z"},{"status":"public","editor":[{"first_name":"Ronald","last_name":"Böck","full_name":"Böck, Ronald"},{"last_name":"Siegert","full_name":"Siegert, Ingo","first_name":"Ingo"},{"last_name":"Wendemuth","full_name":"Wendemuth, Andreas","first_name":"Andreas"}],"publication":"Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2020","type":"conference","language":[{"iso":"eng"}],"keyword":["Poster"],"department":[{"_id":"54"}],"user_id":"44006","_id":"17763","page":"227-234","citation":{"apa":"Haeb-Umbach, R. (2020). Sprachtechnologien für Digitale Assistenten. In R. Böck, I. Siegert, &#38; A. Wendemuth (Eds.), <i>Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2020</i> (pp. 227–234). TUDpress, Dresden.","bibtex":"@inproceedings{Haeb-Umbach_2020, title={Sprachtechnologien für Digitale Assistenten}, booktitle={Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2020}, publisher={TUDpress, Dresden}, author={Haeb-Umbach, Reinhold}, editor={Böck, Ronald and Siegert, Ingo and Wendemuth, AndreasEditors}, year={2020}, pages={227–234} }","mla":"Haeb-Umbach, Reinhold. “Sprachtechnologien Für Digitale Assistenten.” <i>Studientexte Zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2020</i>, edited by Ronald Böck et al., TUDpress, Dresden, 2020, pp. 227–34.","short":"R. Haeb-Umbach, in: R. Böck, I. Siegert, A. Wendemuth (Eds.), Studientexte Zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2020, TUDpress, Dresden, 2020, pp. 227–234.","ama":"Haeb-Umbach R. Sprachtechnologien für Digitale Assistenten. In: Böck R, Siegert I, Wendemuth A, eds. <i>Studientexte Zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2020</i>. TUDpress, Dresden; 2020:227-234.","ieee":"R. Haeb-Umbach, “Sprachtechnologien für Digitale Assistenten,” in <i>Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2020</i>, 2020, pp. 227–234.","chicago":"Haeb-Umbach, Reinhold. “Sprachtechnologien Für Digitale Assistenten.” In <i>Studientexte Zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2020</i>, edited by Ronald Böck, Ingo Siegert, and Andreas Wendemuth, 227–34. TUDpress, Dresden, 2020."},"year":"2020","publication_identifier":{"isbn":["978-3-959081-93-1"]},"main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2020/ESSV_2020_haeb_umbach.pdf","open_access":"1"}],"title":"Sprachtechnologien für Digitale Assistenten","author":[{"last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242","first_name":"Reinhold"}],"date_created":"2020-08-10T09:53:12Z","oa":"1","date_updated":"2022-01-06T06:53:19Z","publisher":"TUDpress, Dresden"},{"has_accepted_license":"1","year":"2020","citation":{"apa":"Boeddeker, C., Cord-Landwehr, T., Heitkaemper, J., Zorila, C., Hayakawa, D., Li, M., … Haeb-Umbach, R. (2020). Towards a speaker diarization system for the CHiME 2020 dinner party transcription. In <i>Proc. CHiME 2020 Workshop on Speech Processing in Everyday Environments</i>.","mla":"Boeddeker, Christoph, et al. “Towards a Speaker Diarization System for the CHiME 2020 Dinner Party Transcription.” <i>Proc. CHiME 2020 Workshop on Speech Processing in Everyday Environments</i>, 2020.","short":"C. Boeddeker, T. Cord-Landwehr, J. Heitkaemper, C. Zorila, D. Hayakawa, M. Li, M. Liu, R. Doddipatla, R. Haeb-Umbach, in: Proc. CHiME 2020 Workshop on Speech Processing in Everyday Environments, 2020.","bibtex":"@inproceedings{Boeddeker_Cord-Landwehr_Heitkaemper_Zorila_Hayakawa_Li_Liu_Doddipatla_Haeb-Umbach_2020, title={Towards a speaker diarization system for the CHiME 2020 dinner party transcription}, booktitle={Proc. CHiME 2020 Workshop on Speech Processing in Everyday Environments}, author={Boeddeker, Christoph and Cord-Landwehr, Tobias and Heitkaemper, Jens and Zorila, Catalin and Hayakawa, Daichi and Li, Mohan and Liu, Min and Doddipatla, Rama and Haeb-Umbach, Reinhold}, year={2020} }","ieee":"C. Boeddeker <i>et al.</i>, “Towards a speaker diarization system for the CHiME 2020 dinner party transcription,” in <i>Proc. CHiME 2020 Workshop on Speech Processing in Everyday Environments</i>, 2020.","chicago":"Boeddeker, Christoph, Tobias Cord-Landwehr, Jens Heitkaemper, Catalin Zorila, Daichi Hayakawa, Mohan Li, Min Liu, Rama Doddipatla, and Reinhold Haeb-Umbach. “Towards a Speaker Diarization System for the CHiME 2020 Dinner Party Transcription.” In <i>Proc. CHiME 2020 Workshop on Speech Processing in Everyday Environments</i>, 2020.","ama":"Boeddeker C, Cord-Landwehr T, Heitkaemper J, et al. Towards a speaker diarization system for the CHiME 2020 dinner party transcription. In: <i>Proc. CHiME 2020 Workshop on Speech Processing in Everyday Environments</i>. ; 2020."},"date_updated":"2022-01-06T06:54:33Z","oa":"1","author":[{"first_name":"Christoph","id":"40767","full_name":"Boeddeker, Christoph","last_name":"Boeddeker"},{"first_name":"Tobias","last_name":"Cord-Landwehr","full_name":"Cord-Landwehr, Tobias","id":"44393"},{"first_name":"Jens","full_name":"Heitkaemper, Jens","id":"27643","last_name":"Heitkaemper"},{"first_name":"Catalin","full_name":"Zorila, Catalin","last_name":"Zorila"},{"first_name":"Daichi","last_name":"Hayakawa","full_name":"Hayakawa, Daichi"},{"first_name":"Mohan","last_name":"Li","full_name":"Li, Mohan"},{"last_name":"Liu","full_name":"Liu, Min","first_name":"Min"},{"first_name":"Rama","last_name":"Doddipatla","full_name":"Doddipatla, Rama"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold"}],"date_created":"2020-12-11T12:49:13Z","title":"Towards a speaker diarization system for the CHiME 2020 dinner party transcription","type":"conference","publication":"Proc. CHiME 2020 Workshop on Speech Processing in Everyday Environments","file":[{"content_type":"application/pdf","relation":"main_file","date_created":"2020-12-11T12:48:48Z","creator":"cbj","date_updated":"2020-12-11T12:48:48Z","file_id":"20702","file_name":"template.pdf","access_level":"open_access","file_size":115421}],"status":"public","project":[{"name":"Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"_id":"20700","user_id":"40767","department":[{"_id":"54"}],"ddc":["000"],"file_date_updated":"2020-12-11T12:48:48Z","language":[{"iso":"eng"}]}]