[{"language":[{"iso":"eng"}],"_id":"59999","project":[{"_id":"129","name":"TRR 318 - C06: TRR 318 - Technisch unterstütztes Erklären von Stimmcharakteristika (Teilprojekt C06)","grant_number":"438445824"}],"department":[{"_id":"54"},{"_id":"660"}],"user_id":"72602","status":"public","publication":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","type":"conference","title":"Speech Synthesis along Perceptual Voice Quality Dimensions","conference":{"start_date":"2025-04-06","name":"IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"Hyderabad, India ","end_date":"2025-04-11"},"doi":"10.1109/icassp49660.2025.10888012","publisher":"IEEE","date_updated":"2025-05-26T11:09:56Z","date_created":"2025-05-20T08:17:22Z","author":[{"first_name":"Frederik","last_name":"Rautenberg","id":"72602","full_name":"Rautenberg, Frederik"},{"id":"49871","full_name":"Kuhlmann, Michael","last_name":"Kuhlmann","first_name":"Michael"},{"full_name":"Seebauer, Fritz","last_name":"Seebauer","first_name":"Fritz"},{"last_name":"Wiechmann","full_name":"Wiechmann, Jana","first_name":"Jana"},{"first_name":"Petra","full_name":"Wagner, Petra","last_name":"Wagner"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242"}],"year":"2025","citation":{"apa":"Rautenberg, F., Kuhlmann, M., Seebauer, F., Wiechmann, J., Wagner, P., &#38; Haeb-Umbach, R. (2025). Speech Synthesis along Perceptual Voice Quality Dimensions. <i>ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Hyderabad, India . <a href=\"https://doi.org/10.1109/icassp49660.2025.10888012\">https://doi.org/10.1109/icassp49660.2025.10888012</a>","short":"F. Rautenberg, M. Kuhlmann, F. Seebauer, J. Wiechmann, P. Wagner, R. Haeb-Umbach, in: ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE, 2025.","bibtex":"@inproceedings{Rautenberg_Kuhlmann_Seebauer_Wiechmann_Wagner_Haeb-Umbach_2025, title={Speech Synthesis along Perceptual Voice Quality Dimensions}, DOI={<a href=\"https://doi.org/10.1109/icassp49660.2025.10888012\">10.1109/icassp49660.2025.10888012</a>}, booktitle={ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, publisher={IEEE}, author={Rautenberg, Frederik and Kuhlmann, Michael and Seebauer, Fritz and Wiechmann, Jana and Wagner, Petra and Haeb-Umbach, Reinhold}, year={2025} }","mla":"Rautenberg, Frederik, et al. “Speech Synthesis along Perceptual Voice Quality Dimensions.” <i>ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, IEEE, 2025, doi:<a href=\"https://doi.org/10.1109/icassp49660.2025.10888012\">10.1109/icassp49660.2025.10888012</a>.","ieee":"F. Rautenberg, M. Kuhlmann, F. Seebauer, J. Wiechmann, P. Wagner, and R. Haeb-Umbach, “Speech Synthesis along Perceptual Voice Quality Dimensions,” presented at the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Hyderabad, India , 2025, doi: <a href=\"https://doi.org/10.1109/icassp49660.2025.10888012\">10.1109/icassp49660.2025.10888012</a>.","chicago":"Rautenberg, Frederik, Michael Kuhlmann, Fritz Seebauer, Jana Wiechmann, Petra Wagner, and Reinhold Haeb-Umbach. “Speech Synthesis along Perceptual Voice Quality Dimensions.” In <i>ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. IEEE, 2025. <a href=\"https://doi.org/10.1109/icassp49660.2025.10888012\">https://doi.org/10.1109/icassp49660.2025.10888012</a>.","ama":"Rautenberg F, Kuhlmann M, Seebauer F, Wiechmann J, Wagner P, Haeb-Umbach R. Speech Synthesis along Perceptual Voice Quality Dimensions. In: <i>ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. IEEE; 2025. doi:<a href=\"https://doi.org/10.1109/icassp49660.2025.10888012\">10.1109/icassp49660.2025.10888012</a>"},"publication_status":"published"},{"user_id":"72602","department":[{"_id":"54"},{"_id":"660"}],"project":[{"_id":"129","name":"TRR 318; TP C06: Technisch unterstütztes Erklären von Stimmcharakteristika"}],"_id":"61047","language":[{"iso":"eng"}],"type":"conference","publication":"Interspeech 2025","status":"public","date_created":"2025-08-28T08:39:01Z","author":[{"id":"72602","full_name":"Rautenberg, Frederik","last_name":"Rautenberg","first_name":"Frederik"},{"full_name":"Seebauer, Fritz","last_name":"Seebauer","first_name":"Fritz"},{"last_name":"Wiechmann","full_name":"Wiechmann, Jana","first_name":"Jana"},{"first_name":"Michael","last_name":"Kuhlmann","id":"49871","full_name":"Kuhlmann, Michael"},{"last_name":"Wagner","full_name":"Wagner, Petra","first_name":"Petra"},{"full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach","first_name":"Reinhold"}],"date_updated":"2025-08-28T08:56:49Z","publisher":"ISCA","doi":"10.21437/Interspeech.2025-1443","conference":{"start_date":"2025-08-17","name":"Interspeech","location":"Rotterdam","end_date":"2025-08-21"},"title":"Synthesizing Speech with Selected Perceptual Voice Qualities – A Case Study with Creaky Voice","citation":{"bibtex":"@inproceedings{Rautenberg_Seebauer_Wiechmann_Kuhlmann_Wagner_Haeb-Umbach_2025, title={Synthesizing Speech with Selected Perceptual Voice Qualities – A Case Study with Creaky Voice}, DOI={<a href=\"https://doi.org/10.21437/Interspeech.2025-1443\">10.21437/Interspeech.2025-1443</a>}, booktitle={Interspeech 2025}, publisher={ISCA}, author={Rautenberg, Frederik and Seebauer, Fritz and Wiechmann, Jana and Kuhlmann, Michael and Wagner, Petra and Haeb-Umbach, Reinhold}, year={2025} }","short":"F. Rautenberg, F. Seebauer, J. Wiechmann, M. Kuhlmann, P. Wagner, R. Haeb-Umbach, in: Interspeech 2025, ISCA, 2025.","mla":"Rautenberg, Frederik, et al. “Synthesizing Speech with Selected Perceptual Voice Qualities – A Case Study with Creaky Voice.” <i>Interspeech 2025</i>, ISCA, 2025, doi:<a href=\"https://doi.org/10.21437/Interspeech.2025-1443\">10.21437/Interspeech.2025-1443</a>.","apa":"Rautenberg, F., Seebauer, F., Wiechmann, J., Kuhlmann, M., Wagner, P., &#38; Haeb-Umbach, R. (2025). Synthesizing Speech with Selected Perceptual Voice Qualities – A Case Study with Creaky Voice. <i>Interspeech 2025</i>. Interspeech, Rotterdam. <a href=\"https://doi.org/10.21437/Interspeech.2025-1443\">https://doi.org/10.21437/Interspeech.2025-1443</a>","ieee":"F. Rautenberg, F. Seebauer, J. Wiechmann, M. Kuhlmann, P. Wagner, and R. Haeb-Umbach, “Synthesizing Speech with Selected Perceptual Voice Qualities – A Case Study with Creaky Voice,” presented at the Interspeech, Rotterdam, 2025, doi: <a href=\"https://doi.org/10.21437/Interspeech.2025-1443\">10.21437/Interspeech.2025-1443</a>.","chicago":"Rautenberg, Frederik, Fritz Seebauer, Jana Wiechmann, Michael Kuhlmann, Petra Wagner, and Reinhold Haeb-Umbach. “Synthesizing Speech with Selected Perceptual Voice Qualities – A Case Study with Creaky Voice.” In <i>Interspeech 2025</i>. ISCA, 2025. <a href=\"https://doi.org/10.21437/Interspeech.2025-1443\">https://doi.org/10.21437/Interspeech.2025-1443</a>.","ama":"Rautenberg F, Seebauer F, Wiechmann J, Kuhlmann M, Wagner P, Haeb-Umbach R. Synthesizing Speech with Selected Perceptual Voice Qualities – A Case Study with Creaky Voice. In: <i>Interspeech 2025</i>. ISCA; 2025. doi:<a href=\"https://doi.org/10.21437/Interspeech.2025-1443\">10.21437/Interspeech.2025-1443</a>"},"year":"2025"},{"publication":"Proceedings of INTERSPEECH","abstract":[{"lang":"eng","text":"We propose a spatio-spectral, combined model-based and data-driven\r\ndiarization pipeline consisting of TDOA-based segmentation followed by\r\nembedding-based clustering. The proposed system requires neither access to\r\nmulti-channel training data nor prior knowledge about the number or placement\r\nof microphones. It works for both a compact microphone array and distributed\r\nmicrophones, with minor adjustments. Due to its superior handling of\r\noverlapping speech during segmentation, the proposed pipeline significantly\r\noutperforms the single-channel pyannote approach, both in a scenario with a\r\ncompact microphone array and in a setup with distributed microphones.\r\nAdditionally, we show that, unlike fully spatial diarization pipelines, the\r\nproposed system can correctly track speakers when they change positions."}],"file":[{"date_created":"2025-08-29T09:43:32Z","creator":"cord","date_updated":"2025-08-29T09:43:32Z","access_level":"open_access","file_id":"61085","file_name":"main.pdf","file_size":921918,"content_type":"application/pdf","relation":"main_file"}],"external_id":{"arxiv":["2506.16228"]},"ddc":["000"],"language":[{"iso":"eng"}],"year":"2025","date_created":"2025-08-29T09:39:01Z","title":"Spatio-spectral diarization of meetings by combining TDOA-based  segmentation and speaker embedding-based clustering","type":"conference","status":"public","project":[{"_id":"52","name":"Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"_id":"61079","user_id":"44393","department":[{"_id":"54"}],"file_date_updated":"2025-08-29T09:43:32Z","has_accepted_license":"1","citation":{"apa":"Cord-Landwehr, T., Gburrek, T., Deegen, M., &#38; Haeb-Umbach, R. (2025). Spatio-spectral diarization of meetings by combining TDOA-based  segmentation and speaker embedding-based clustering. <i>Proceedings of INTERSPEECH</i>. Interspeech 2025, Rotterdam. <a href=\"https://doi.org/10.21437/Interspeech.2025-1663\">https://doi.org/10.21437/Interspeech.2025-1663</a>","short":"T. Cord-Landwehr, T. Gburrek, M. Deegen, R. Haeb-Umbach, in: Proceedings of INTERSPEECH, 2025.","bibtex":"@inproceedings{Cord-Landwehr_Gburrek_Deegen_Haeb-Umbach_2025, title={Spatio-spectral diarization of meetings by combining TDOA-based  segmentation and speaker embedding-based clustering}, DOI={<a href=\"https://doi.org/10.21437/Interspeech.2025-1663\">10.21437/Interspeech.2025-1663</a>}, booktitle={Proceedings of INTERSPEECH}, author={Cord-Landwehr, Tobias and Gburrek, Tobias and Deegen, Marc and Haeb-Umbach, Reinhold}, year={2025} }","mla":"Cord-Landwehr, Tobias, et al. “Spatio-Spectral Diarization of Meetings by Combining TDOA-Based  Segmentation and Speaker Embedding-Based Clustering.” <i>Proceedings of INTERSPEECH</i>, 2025, doi:<a href=\"https://doi.org/10.21437/Interspeech.2025-1663\">10.21437/Interspeech.2025-1663</a>.","ieee":"T. Cord-Landwehr, T. Gburrek, M. Deegen, and R. Haeb-Umbach, “Spatio-spectral diarization of meetings by combining TDOA-based  segmentation and speaker embedding-based clustering,” presented at the Interspeech 2025, Rotterdam, 2025, doi: <a href=\"https://doi.org/10.21437/Interspeech.2025-1663\">10.21437/Interspeech.2025-1663</a>.","chicago":"Cord-Landwehr, Tobias, Tobias Gburrek, Marc Deegen, and Reinhold Haeb-Umbach. “Spatio-Spectral Diarization of Meetings by Combining TDOA-Based  Segmentation and Speaker Embedding-Based Clustering.” In <i>Proceedings of INTERSPEECH</i>, 2025. <a href=\"https://doi.org/10.21437/Interspeech.2025-1663\">https://doi.org/10.21437/Interspeech.2025-1663</a>.","ama":"Cord-Landwehr T, Gburrek T, Deegen M, Haeb-Umbach R. Spatio-spectral diarization of meetings by combining TDOA-based  segmentation and speaker embedding-based clustering. In: <i>Proceedings of INTERSPEECH</i>. ; 2025. doi:<a href=\"https://doi.org/10.21437/Interspeech.2025-1663\">10.21437/Interspeech.2025-1663</a>"},"date_updated":"2025-11-10T09:06:47Z","oa":"1","author":[{"id":"44393","full_name":"Cord-Landwehr, Tobias","last_name":"Cord-Landwehr","first_name":"Tobias"},{"last_name":"Gburrek","id":"44006","full_name":"Gburrek, Tobias","first_name":"Tobias"},{"first_name":"Marc","id":"70272","full_name":"Deegen, Marc","last_name":"Deegen"},{"id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach","first_name":"Reinhold"}],"conference":{"location":"Rotterdam","name":"Interspeech 2025"},"doi":"10.21437/Interspeech.2025-1663"},{"publication":"Interspeech 2025","type":"conference","status":"public","department":[{"_id":"54"}],"user_id":"49871","_id":"62164","language":[{"iso":"eng"}],"publication_status":"published","citation":{"bibtex":"@inproceedings{Kuhlmann_Seebauer_Wagner_Häb-Umbach_2025, title={Towards Frame-level Quality Predictions of Synthetic Speech}, DOI={<a href=\"https://doi.org/10.21437/interspeech.2025-2190\">10.21437/interspeech.2025-2190</a>}, booktitle={Interspeech 2025}, publisher={ISCA}, author={Kuhlmann, Michael and Seebauer, Fritz and Wagner, Petra and Häb-Umbach, Reinhold}, year={2025} }","short":"M. Kuhlmann, F. Seebauer, P. Wagner, R. Häb-Umbach, in: Interspeech 2025, ISCA, 2025.","mla":"Kuhlmann, Michael, et al. “Towards Frame-Level Quality Predictions of Synthetic Speech.” <i>Interspeech 2025</i>, ISCA, 2025, doi:<a href=\"https://doi.org/10.21437/interspeech.2025-2190\">10.21437/interspeech.2025-2190</a>.","apa":"Kuhlmann, M., Seebauer, F., Wagner, P., &#38; Häb-Umbach, R. (2025). Towards Frame-level Quality Predictions of Synthetic Speech. <i>Interspeech 2025</i>. <a href=\"https://doi.org/10.21437/interspeech.2025-2190\">https://doi.org/10.21437/interspeech.2025-2190</a>","ama":"Kuhlmann M, Seebauer F, Wagner P, Häb-Umbach R. Towards Frame-level Quality Predictions of Synthetic Speech. In: <i>Interspeech 2025</i>. ISCA; 2025. doi:<a href=\"https://doi.org/10.21437/interspeech.2025-2190\">10.21437/interspeech.2025-2190</a>","chicago":"Kuhlmann, Michael, Fritz Seebauer, Petra Wagner, and Reinhold Häb-Umbach. “Towards Frame-Level Quality Predictions of Synthetic Speech.” In <i>Interspeech 2025</i>. ISCA, 2025. <a href=\"https://doi.org/10.21437/interspeech.2025-2190\">https://doi.org/10.21437/interspeech.2025-2190</a>.","ieee":"M. Kuhlmann, F. Seebauer, P. Wagner, and R. Häb-Umbach, “Towards Frame-level Quality Predictions of Synthetic Speech,” 2025, doi: <a href=\"https://doi.org/10.21437/interspeech.2025-2190\">10.21437/interspeech.2025-2190</a>."},"year":"2025","date_created":"2025-11-11T11:43:20Z","author":[{"first_name":"Michael","full_name":"Kuhlmann, Michael","id":"49871","last_name":"Kuhlmann"},{"full_name":"Seebauer, Fritz","last_name":"Seebauer","first_name":"Fritz"},{"first_name":"Petra","id":"74505","full_name":"Wagner, Petra","last_name":"Wagner"},{"last_name":"Häb-Umbach","id":"242","full_name":"Häb-Umbach, Reinhold","first_name":"Reinhold"}],"publisher":"ISCA","date_updated":"2025-11-11T11:45:12Z","doi":"10.21437/interspeech.2025-2190","title":"Towards Frame-level Quality Predictions of Synthetic Speech"},{"title":"A Fully Zero-Shot Approach to Obtaining Specialized and Compact Audio Tagging Models","conference":{"name":"16th ITG Conference on Speech Communication","start_date":"2025-09-24","end_date":"2025-09-26","location":"Berlin"},"date_updated":"2025-11-28T13:20:17Z","date_created":"2025-11-11T11:46:42Z","author":[{"first_name":"Alexander","last_name":"Werning","full_name":"Werning, Alexander","id":"62152"},{"last_name":"Häb-Umbach","id":"242","full_name":"Häb-Umbach, Reinhold","first_name":"Reinhold"}],"year":"2025","place":"Berlin","page":"76-80","citation":{"chicago":"Werning, Alexander, and Reinhold Häb-Umbach. “A Fully Zero-Shot Approach to Obtaining Specialized and Compact Audio Tagging Models.” In <i>Proceedings of the 16th ITG Conference on Speech Communication</i>, edited by Sebastian Möller, Timo Gerkmann, and Dorothea Kolossa, 76–80. Berlin, 2025.","ieee":"A. Werning and R. Häb-Umbach, “A Fully Zero-Shot Approach to Obtaining Specialized and Compact Audio Tagging Models,” in <i>Proceedings of the 16th ITG Conference on Speech Communication</i>, Berlin, 2025, pp. 76–80.","ama":"Werning A, Häb-Umbach R. A Fully Zero-Shot Approach to Obtaining Specialized and Compact Audio Tagging Models. In: Möller S, Gerkmann T, Kolossa D, eds. <i>Proceedings of the 16th ITG Conference on Speech Communication</i>. ; 2025:76-80.","apa":"Werning, A., &#38; Häb-Umbach, R. (2025). A Fully Zero-Shot Approach to Obtaining Specialized and Compact Audio Tagging Models. In S. Möller, T. Gerkmann, &#38; D. Kolossa (Eds.), <i>Proceedings of the 16th ITG Conference on Speech Communication</i> (pp. 76–80).","bibtex":"@inproceedings{Werning_Häb-Umbach_2025, place={Berlin}, title={A Fully Zero-Shot Approach to Obtaining Specialized and Compact Audio Tagging Models}, booktitle={Proceedings of the 16th ITG Conference on Speech Communication}, author={Werning, Alexander and Häb-Umbach, Reinhold}, editor={Möller, Sebastian and Gerkmann, Timo and Kolossa, Dorothea}, year={2025}, pages={76–80} }","short":"A. Werning, R. Häb-Umbach, in: S. Möller, T. Gerkmann, D. Kolossa (Eds.), Proceedings of the 16th ITG Conference on Speech Communication, Berlin, 2025, pp. 76–80.","mla":"Werning, Alexander, and Reinhold Häb-Umbach. “A Fully Zero-Shot Approach to Obtaining Specialized and Compact Audio Tagging Models.” <i>Proceedings of the 16th ITG Conference on Speech Communication</i>, edited by Sebastian Möller et al., 2025, pp. 76–80."},"quality_controlled":"1","publication_identifier":{"unknown":["978-3-8007-6617-8"]},"publication_status":"published","language":[{"iso":"eng"}],"_id":"62163","project":[{"_id":"512","name":"WestAI - AI Service Center West"}],"department":[{"_id":"54"}],"user_id":"62152","editor":[{"first_name":"Sebastian","last_name":"Möller","full_name":"Möller, Sebastian"},{"full_name":"Gerkmann, Timo","last_name":"Gerkmann","first_name":"Timo"},{"first_name":"Dorothea","last_name":"Kolossa","full_name":"Kolossa, Dorothea"}],"abstract":[{"lang":"eng","text":"Zero-shot classifiers based on Contrastive Language-Audio Pretraining (CLAP) models enable classification of given audio into classes defined at test time using text. These models are costly to run with respect to computation and memory requirements. In this work, we propose to build a specialized low-resource classifier for classes pre-defined using text, using a two-stage procedure consisting of zero-shot data set pruning and model compression. First, relevant in-domain data is selected from a source dataset using class label embeddings obtained from a pre-trained CLAP model. This data is then used to distill the audio encoder of a CLAP model. The proposed compression method produces compact audio encoders with slightly reduced accuracy. Note that neither labeled nor unlabeled in-domain audio data is required for its development. We verify by cross-dataset tests that the resulting classifiers are indeed specialized to their task."}],"status":"public","publication":"Proceedings of the 16th ITG Conference on Speech Communication","type":"conference"},{"conference":{"location":"Copenhagen","end_date":"2025-03-20","start_date":"2025-03-17","name":"DAS|DAGA 2025 - 51st Annual Meeting on Acoustics"},"date_updated":"2025-11-28T13:21:13Z","author":[{"first_name":"Alexander","last_name":"Werning","id":"62152","full_name":"Werning, Alexander"},{"last_name":"Häb-Umbach","full_name":"Häb-Umbach, Reinhold","id":"242","first_name":"Reinhold"}],"place":"Copenhagen","citation":{"chicago":"Werning, Alexander, and Reinhold Häb-Umbach. “Distilling Efficient Audio Models Using Data Pruning with CLAP.” In <i>Proceedings of DAS|DAGA 2025</i>, edited by Deutsche Gesellschaft für Akustik e.V. (DEGA), Berlin, 2025. Copenhagen, 2025.","ieee":"A. Werning and R. Häb-Umbach, “Distilling Efficient Audio Models using Data Pruning with CLAP,” in <i>Proceedings of DAS|DAGA 2025</i>, Copenhagen, 2025.","ama":"Werning A, Häb-Umbach R. Distilling Efficient Audio Models using Data Pruning with CLAP. In: Deutsche Gesellschaft für Akustik e.V. (DEGA), Berlin, 2025, ed. <i>Proceedings of DAS|DAGA 2025</i>. ; 2025.","short":"A. Werning, R. Häb-Umbach, in: Deutsche Gesellschaft für Akustik e.V. (DEGA), Berlin, 2025 (Ed.), Proceedings of DAS|DAGA 2025, Copenhagen, 2025.","bibtex":"@inproceedings{Werning_Häb-Umbach_2025, place={Copenhagen}, title={Distilling Efficient Audio Models using Data Pruning with CLAP}, booktitle={Proceedings of DAS|DAGA 2025}, author={Werning, Alexander and Häb-Umbach, Reinhold}, editor={Deutsche Gesellschaft für Akustik e.V. (DEGA), Berlin, 2025}, year={2025} }","mla":"Werning, Alexander, and Reinhold Häb-Umbach. “Distilling Efficient Audio Models Using Data Pruning with CLAP.” <i>Proceedings of DAS|DAGA 2025</i>, edited by Deutsche Gesellschaft für Akustik e.V. (DEGA), Berlin, 2025, 2025.","apa":"Werning, A., &#38; Häb-Umbach, R. (2025). Distilling Efficient Audio Models using Data Pruning with CLAP. In Deutsche Gesellschaft für Akustik e.V. (DEGA), Berlin, 2025 (Ed.), <i>Proceedings of DAS|DAGA 2025</i>."},"has_accepted_license":"1","publication_identifier":{"unknown":["978-3-939296-23-2"]},"publication_status":"published","_id":"59900","project":[{"_id":"512","name":"WestAI - AI Service Center West"}],"department":[{"_id":"54"}],"user_id":"62152","status":"public","type":"conference","title":"Distilling Efficient Audio Models using Data Pruning with CLAP","date_created":"2025-05-14T13:18:10Z","year":"2025","corporate_editor":["Deutsche Gesellschaft für Akustik e.V. (DEGA), Berlin, 2025"],"ddc":["004"],"language":[{"iso":"eng"}],"abstract":[{"text":"Running state-of-the-art large-scale audio models on edge devices is often infeasible due to their limited storage and computing resources. It is therefore necessary to compress and tune the models for the specific target task and hardware. This is commonly achieved by distilling the audio model, the teacher, to a small target model, the student. However, this approach can be improved by prepending a dataset pruning stage and training the teacher on the pruned data set only, which contains examples relevant to the target task. Recently, CLAP models have emerged that embed audio and text examples in a common embedding space. We use the audio embeddings of the CLAP model for the above pruning stage, which is realized using a domain classifier. After knowledge distillation, the student is eventually fine-tuned on some data from the target domain. The CLAP architecture combines text and audio embedding spaces, which allows to search for data given only a textual description, such as a class label. We show how this can help data pruning.","lang":"eng"}],"publication":"Proceedings of DAS|DAGA 2025"},{"status":"public","publication":" ITG Conference on Speech Communication","type":"conference","language":[{"iso":"eng"}],"_id":"62174","department":[{"_id":"54"}],"user_id":"44393","year":"2025","citation":{"ama":"Meise AT, Cord-Landwehr T, Haeb-Umbach R. On the Application of Diffusion Models for Simultaneous Denoising and Dereverberation. In: <i> ITG Conference on Speech Communication</i>. ; 2025.","ieee":"A. T. Meise, T. Cord-Landwehr, and R. Haeb-Umbach, “On the Application of Diffusion Models for Simultaneous Denoising and Dereverberation,” presented at the ITG Conference on Speech Communication, Berlin, 2025.","chicago":"Meise, Adrian Tobias, Tobias Cord-Landwehr, and Reinhold Haeb-Umbach. “On the Application of Diffusion Models for Simultaneous Denoising and Dereverberation.” In <i> ITG Conference on Speech Communication</i>, 2025.","bibtex":"@inproceedings{Meise_Cord-Landwehr_Haeb-Umbach_2025, title={On the Application of Diffusion Models for Simultaneous Denoising and Dereverberation}, booktitle={ ITG Conference on Speech Communication}, author={Meise, Adrian Tobias and Cord-Landwehr, Tobias and Haeb-Umbach, Reinhold}, year={2025} }","mla":"Meise, Adrian Tobias, et al. “On the Application of Diffusion Models for Simultaneous Denoising and Dereverberation.” <i> ITG Conference on Speech Communication</i>, 2025.","short":"A.T. Meise, T. Cord-Landwehr, R. Haeb-Umbach, in:  ITG Conference on Speech Communication, 2025.","apa":"Meise, A. T., Cord-Landwehr, T., &#38; Haeb-Umbach, R. (2025). On the Application of Diffusion Models for Simultaneous Denoising and Dereverberation. <i> ITG Conference on Speech Communication</i>. ITG Conference on Speech Communication, Berlin."},"publication_identifier":{"isbn":["978-3-8007-6617-8"]},"title":"On the Application of Diffusion Models for Simultaneous Denoising and Dereverberation","conference":{"location":"Berlin","name":"ITG Conference on Speech Communication"},"date_updated":"2026-01-05T09:05:14Z","author":[{"first_name":"Adrian Tobias","id":"79268","full_name":"Meise, Adrian Tobias","last_name":"Meise"},{"first_name":"Tobias","id":"44393","full_name":"Cord-Landwehr, Tobias","last_name":"Cord-Landwehr"},{"last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold","first_name":"Reinhold"}],"date_created":"2025-11-13T07:21:51Z"},{"year":"2024","citation":{"apa":"Cornell, S., Park, T., Huang, S., Boeddeker, C., Chang, X., Maciejewski, M., Wiesner, M., Garcia, P., &#38; Watanabe, S. (2024). The CHiME-8 DASR Challenge for Generalizable and Array Agnostic Distant  Automatic Speech Recognition and Diarization. In <i>arXiv:2407.16447</i>.","mla":"Cornell, Samuele, et al. “The CHiME-8 DASR Challenge for Generalizable and Array Agnostic Distant  Automatic Speech Recognition and Diarization.” <i>ArXiv:2407.16447</i>, 2024.","short":"S. Cornell, T. Park, S. Huang, C. Boeddeker, X. Chang, M. Maciejewski, M. Wiesner, P. Garcia, S. Watanabe, ArXiv:2407.16447 (2024).","bibtex":"@article{Cornell_Park_Huang_Boeddeker_Chang_Maciejewski_Wiesner_Garcia_Watanabe_2024, title={The CHiME-8 DASR Challenge for Generalizable and Array Agnostic Distant  Automatic Speech Recognition and Diarization}, journal={arXiv:2407.16447}, author={Cornell, Samuele and Park, Taejin and Huang, Steve and Boeddeker, Christoph and Chang, Xuankai and Maciejewski, Matthew and Wiesner, Matthew and Garcia, Paola and Watanabe, Shinji}, year={2024} }","ieee":"S. Cornell <i>et al.</i>, “The CHiME-8 DASR Challenge for Generalizable and Array Agnostic Distant  Automatic Speech Recognition and Diarization,” <i>arXiv:2407.16447</i>. 2024.","chicago":"Cornell, Samuele, Taejin Park, Steve Huang, Christoph Boeddeker, Xuankai Chang, Matthew Maciejewski, Matthew Wiesner, Paola Garcia, and Shinji Watanabe. “The CHiME-8 DASR Challenge for Generalizable and Array Agnostic Distant  Automatic Speech Recognition and Diarization.” <i>ArXiv:2407.16447</i>, 2024.","ama":"Cornell S, Park T, Huang S, et al. The CHiME-8 DASR Challenge for Generalizable and Array Agnostic Distant  Automatic Speech Recognition and Diarization. <i>arXiv:240716447</i>. Published online 2024."},"title":"The CHiME-8 DASR Challenge for Generalizable and Array Agnostic Distant  Automatic Speech Recognition and Diarization","main_file_link":[{"open_access":"1","url":"https://arxiv.org/pdf/2407.16447"}],"oa":"1","date_updated":"2024-09-30T08:09:40Z","date_created":"2024-09-30T08:08:46Z","author":[{"first_name":"Samuele","last_name":"Cornell","full_name":"Cornell, Samuele"},{"last_name":"Park","full_name":"Park, Taejin","first_name":"Taejin"},{"first_name":"Steve","last_name":"Huang","full_name":"Huang, Steve"},{"last_name":"Boeddeker","id":"40767","full_name":"Boeddeker, Christoph","first_name":"Christoph"},{"last_name":"Chang","full_name":"Chang, Xuankai","first_name":"Xuankai"},{"full_name":"Maciejewski, Matthew","last_name":"Maciejewski","first_name":"Matthew"},{"first_name":"Matthew","last_name":"Wiesner","full_name":"Wiesner, Matthew"},{"first_name":"Paola","full_name":"Garcia, Paola","last_name":"Garcia"},{"last_name":"Watanabe","full_name":"Watanabe, Shinji","first_name":"Shinji"}],"abstract":[{"text":"This paper presents the CHiME-8 DASR challenge which carries on from the\r\nprevious edition CHiME-7 DASR (C7DASR) and the past CHiME-6 challenge. It\r\nfocuses on joint multi-channel distant speech recognition (DASR) and\r\ndiarization with one or more, possibly heterogeneous, devices. The main goal is\r\nto spur research towards meeting transcription approaches that can generalize\r\nacross arbitrary number of speakers, diverse settings (formal vs. informal\r\nconversations), meeting duration, wide-variety of acoustic scenarios and\r\ndifferent recording configurations. Novelties with respect to C7DASR include:\r\ni) the addition of NOTSOFAR-1, an additional office/corporate meeting scenario,\r\nii) a manually corrected Mixer 6 development set, iii) a new track in which we\r\nallow the use of large-language models (LLM) iv) a jury award mechanism to\r\nencourage participants to explore also more practical and innovative solutions.\r\nTo lower the entry barrier for participants, we provide a standalone toolkit\r\nfor downloading and preparing such datasets as well as performing text\r\nnormalization and scoring their submissions. Furthermore, this year we also\r\nprovide two baseline systems, one directly inherited from C7DASR and based on\r\nESPnet and another one developed on NeMo and based on NeMo team submission in\r\nlast year C7DASR. Baseline system results suggest that the addition of the\r\nNOTSOFAR-1 scenario significantly increases the task's difficulty due to its\r\nhigh number of speakers and very short duration.","lang":"eng"}],"status":"public","type":"preprint","publication":"arXiv:2407.16447","language":[{"iso":"eng"}],"_id":"56273","external_id":{"arxiv":["2407.16447"]},"user_id":"40767","department":[{"_id":"54"}]},{"file_date_updated":"2025-04-16T10:21:45Z","_id":"52958","project":[{"_id":"52","name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing"},{"_id":"508","name":"Automatische Transkription von Gesprächssituationen","grant_number":"448568305"}],"department":[{"_id":"54"}],"user_id":"40767","status":"public","type":"journal_article","doi":"10.1109/taslp.2024.3350887","main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/2303.03849"}],"oa":"1","date_updated":"2025-04-16T10:21:45Z","volume":32,"author":[{"first_name":"Christoph","last_name":"Boeddeker","id":"40767","full_name":"Boeddeker, Christoph"},{"first_name":"Aswin Shanmugam","full_name":"Subramanian, Aswin Shanmugam","last_name":"Subramanian"},{"first_name":"Gordon","last_name":"Wichern","full_name":"Wichern, Gordon"},{"last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242","first_name":"Reinhold"},{"full_name":"Le Roux, Jonathan","last_name":"Le Roux","first_name":"Jonathan"}],"intvolume":"        32","page":"1185-1197","citation":{"ieee":"C. Boeddeker, A. S. Subramanian, G. Wichern, R. Haeb-Umbach, and J. Le Roux, “TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings,” <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i>, vol. 32, pp. 1185–1197, 2024, doi: <a href=\"https://doi.org/10.1109/taslp.2024.3350887\">10.1109/taslp.2024.3350887</a>.","chicago":"Boeddeker, Christoph, Aswin Shanmugam Subramanian, Gordon Wichern, Reinhold Haeb-Umbach, and Jonathan Le Roux. “TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings.” <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i> 32 (2024): 1185–97. <a href=\"https://doi.org/10.1109/taslp.2024.3350887\">https://doi.org/10.1109/taslp.2024.3350887</a>.","ama":"Boeddeker C, Subramanian AS, Wichern G, Haeb-Umbach R, Le Roux J. TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings. <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i>. 2024;32:1185-1197. doi:<a href=\"https://doi.org/10.1109/taslp.2024.3350887\">10.1109/taslp.2024.3350887</a>","apa":"Boeddeker, C., Subramanian, A. S., Wichern, G., Haeb-Umbach, R., &#38; Le Roux, J. (2024). TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings. <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i>, <i>32</i>, 1185–1197. <a href=\"https://doi.org/10.1109/taslp.2024.3350887\">https://doi.org/10.1109/taslp.2024.3350887</a>","bibtex":"@article{Boeddeker_Subramanian_Wichern_Haeb-Umbach_Le Roux_2024, title={TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings}, volume={32}, DOI={<a href=\"https://doi.org/10.1109/taslp.2024.3350887\">10.1109/taslp.2024.3350887</a>}, journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Boeddeker, Christoph and Subramanian, Aswin Shanmugam and Wichern, Gordon and Haeb-Umbach, Reinhold and Le Roux, Jonathan}, year={2024}, pages={1185–1197} }","mla":"Boeddeker, Christoph, et al. “TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings.” <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i>, vol. 32, Institute of Electrical and Electronics Engineers (IEEE), 2024, pp. 1185–97, doi:<a href=\"https://doi.org/10.1109/taslp.2024.3350887\">10.1109/taslp.2024.3350887</a>.","short":"C. Boeddeker, A.S. Subramanian, G. Wichern, R. Haeb-Umbach, J. Le Roux, IEEE/ACM Transactions on Audio, Speech, and Language Processing 32 (2024) 1185–1197."},"publication_identifier":{"issn":["2329-9290","2329-9304"]},"has_accepted_license":"1","publication_status":"published","keyword":["Electrical and Electronic Engineering","Acoustics and Ultrasonics","Computer Science (miscellaneous)","Computational Mathematics"],"ddc":["000"],"language":[{"iso":"eng"}],"file":[{"date_created":"2025-04-16T10:14:47Z","creator":"cbj","date_updated":"2025-04-16T10:21:45Z","file_id":"59602","file_name":"main.pdf","access_level":"open_access","file_size":3432879,"content_type":"application/pdf","relation":"main_file"},{"content_type":"application/pdf","relation":"main_file","creator":"cbj","date_created":"2025-04-16T10:15:08Z","date_updated":"2025-04-16T10:21:45Z","access_level":"open_access","file_id":"59603","file_name":"slides.pdf","file_size":2838635},{"relation":"main_file","content_type":"application/pdf","file_size":2038741,"file_name":"poster.pdf","file_id":"59604","access_level":"open_access","date_updated":"2025-04-16T10:21:45Z","creator":"cbj","date_created":"2025-04-16T10:15:22Z"}],"publication":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","title":"TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings","publisher":"Institute of Electrical and Electronics Engineers (IEEE)","date_created":"2024-03-26T16:11:54Z","year":"2024"},{"date_updated":"2024-11-18T09:45:14Z","author":[{"id":"62152","full_name":"Werning, Alexander","last_name":"Werning","first_name":"Alexander"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242"}],"date_created":"2024-11-18T09:44:46Z","title":"UPB-NT submission to DCASE24: Dataset pruning for targeted knowledge distillation","year":"2024","citation":{"mla":"Werning, Alexander, and Reinhold Haeb-Umbach. <i>UPB-NT Submission to DCASE24: Dataset Pruning for Targeted Knowledge Distillation</i>. 2024.","short":"A. Werning, R. Haeb-Umbach, UPB-NT Submission to DCASE24: Dataset Pruning for Targeted Knowledge Distillation, 2024.","bibtex":"@book{Werning_Haeb-Umbach_2024, title={UPB-NT submission to DCASE24: Dataset pruning for targeted knowledge distillation}, author={Werning, Alexander and Haeb-Umbach, Reinhold}, year={2024} }","apa":"Werning, A., &#38; Haeb-Umbach, R. (2024). <i>UPB-NT submission to DCASE24: Dataset pruning for targeted knowledge distillation</i>.","ama":"Werning A, Haeb-Umbach R. <i>UPB-NT Submission to DCASE24: Dataset Pruning for Targeted Knowledge Distillation</i>.; 2024.","ieee":"A. Werning and R. Haeb-Umbach, <i>UPB-NT submission to DCASE24: Dataset pruning for targeted knowledge distillation</i>. 2024.","chicago":"Werning, Alexander, and Reinhold Haeb-Umbach. <i>UPB-NT Submission to DCASE24: Dataset Pruning for Targeted Knowledge Distillation</i>, 2024."},"project":[{"_id":"512","name":"WestAI - AI Service Center West"}],"_id":"57161","user_id":"62152","department":[{"_id":"54"}],"language":[{"iso":"eng"}],"type":"report","status":"public"},{"page":"436–440","citation":{"ieee":"Y. Xie, M. Kuhlmann, F. Rautenberg, Z.-H. Tan, and R. Häb-Umbach, “Speaker and Style Disentanglement of Speech Based on Contrastive Predictive Coding Supported Factorized Variational Autoencoder,” in <i>2024 32nd European Signal Processing Conference (EUSIPCO)</i>, 2024, pp. 436–440.","chicago":"Xie, Yuying, Michael Kuhlmann, Frederik Rautenberg, Zheng-Hua Tan, and Reinhold Häb-Umbach. “Speaker and Style Disentanglement of Speech Based on Contrastive Predictive Coding Supported Factorized Variational Autoencoder.” In <i>2024 32nd European Signal Processing Conference (EUSIPCO)</i>, 436–440, 2024.","ama":"Xie Y, Kuhlmann M, Rautenberg F, Tan Z-H, Häb-Umbach R. Speaker and Style Disentanglement of Speech Based on Contrastive Predictive Coding Supported Factorized Variational Autoencoder. In: <i>2024 32nd European Signal Processing Conference (EUSIPCO)</i>. ; 2024:436–440.","apa":"Xie, Y., Kuhlmann, M., Rautenberg, F., Tan, Z.-H., &#38; Häb-Umbach, R. (2024). Speaker and Style Disentanglement of Speech Based on Contrastive Predictive Coding Supported Factorized Variational Autoencoder. <i>2024 32nd European Signal Processing Conference (EUSIPCO)</i>, 436–440.","bibtex":"@inproceedings{Xie_Kuhlmann_Rautenberg_Tan_Häb-Umbach_2024, title={Speaker and Style Disentanglement of Speech Based on Contrastive Predictive Coding Supported Factorized Variational Autoencoder}, booktitle={2024 32nd European Signal Processing Conference (EUSIPCO)}, author={Xie, Yuying and Kuhlmann, Michael and Rautenberg, Frederik and Tan, Zheng-Hua and Häb-Umbach, Reinhold}, year={2024}, pages={436–440} }","mla":"Xie, Yuying, et al. “Speaker and Style Disentanglement of Speech Based on Contrastive Predictive Coding Supported Factorized Variational Autoencoder.” <i>2024 32nd European Signal Processing Conference (EUSIPCO)</i>, 2024, pp. 436–440.","short":"Y. Xie, M. Kuhlmann, F. Rautenberg, Z.-H. Tan, R. Häb-Umbach, in: 2024 32nd European Signal Processing Conference (EUSIPCO), 2024, pp. 436–440."},"year":"2024","date_created":"2024-11-15T06:52:54Z","author":[{"full_name":"Xie, Yuying","last_name":"Xie","first_name":"Yuying"},{"full_name":"Kuhlmann, Michael","id":"49871","last_name":"Kuhlmann","first_name":"Michael"},{"first_name":"Frederik","last_name":"Rautenberg","id":"72602","full_name":"Rautenberg, Frederik"},{"last_name":"Tan","full_name":"Tan, Zheng-Hua","first_name":"Zheng-Hua"},{"full_name":"Häb-Umbach, Reinhold","id":"242","last_name":"Häb-Umbach","first_name":"Reinhold"}],"date_updated":"2024-11-15T06:54:40Z","title":"Speaker and Style Disentanglement of Speech Based on Contrastive Predictive Coding Supported Factorized Variational Autoencoder","publication":"2024 32nd European Signal Processing Conference (EUSIPCO)","type":"conference","status":"public","department":[{"_id":"54"}],"user_id":"49871","_id":"57099","language":[{"iso":"eng"}]},{"year":"2024","citation":{"apa":"von Neumann, T., Boeddeker, C., Cord-Landwehr, T., Delcroix, M., &#38; Haeb-Umbach, R. (2024). Meeting Recognition with Continuous Speech Separation and Transcription-Supported Diarization. <i>2024 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)</i>. <a href=\"https://doi.org/10.1109/icasspw62465.2024.10625894\">https://doi.org/10.1109/icasspw62465.2024.10625894</a>","mla":"von Neumann, Thilo, et al. “Meeting Recognition with Continuous Speech Separation and Transcription-Supported Diarization.” <i>2024 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)</i>, IEEE, 2024, doi:<a href=\"https://doi.org/10.1109/icasspw62465.2024.10625894\">10.1109/icasspw62465.2024.10625894</a>.","bibtex":"@inproceedings{von Neumann_Boeddeker_Cord-Landwehr_Delcroix_Haeb-Umbach_2024, title={Meeting Recognition with Continuous Speech Separation and Transcription-Supported Diarization}, DOI={<a href=\"https://doi.org/10.1109/icasspw62465.2024.10625894\">10.1109/icasspw62465.2024.10625894</a>}, booktitle={2024 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)}, publisher={IEEE}, author={von Neumann, Thilo and Boeddeker, Christoph and Cord-Landwehr, Tobias and Delcroix, Marc and Haeb-Umbach, Reinhold}, year={2024} }","short":"T. von Neumann, C. Boeddeker, T. Cord-Landwehr, M. Delcroix, R. Haeb-Umbach, in: 2024 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW), IEEE, 2024.","ama":"von Neumann T, Boeddeker C, Cord-Landwehr T, Delcroix M, Haeb-Umbach R. Meeting Recognition with Continuous Speech Separation and Transcription-Supported Diarization. In: <i>2024 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)</i>. IEEE; 2024. doi:<a href=\"https://doi.org/10.1109/icasspw62465.2024.10625894\">10.1109/icasspw62465.2024.10625894</a>","ieee":"T. von Neumann, C. Boeddeker, T. Cord-Landwehr, M. Delcroix, and R. Haeb-Umbach, “Meeting Recognition with Continuous Speech Separation and Transcription-Supported Diarization,” 2024, doi: <a href=\"https://doi.org/10.1109/icasspw62465.2024.10625894\">10.1109/icasspw62465.2024.10625894</a>.","chicago":"Neumann, Thilo von, Christoph Boeddeker, Tobias Cord-Landwehr, Marc Delcroix, and Reinhold Haeb-Umbach. “Meeting Recognition with Continuous Speech Separation and Transcription-Supported Diarization.” In <i>2024 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)</i>. IEEE, 2024. <a href=\"https://doi.org/10.1109/icasspw62465.2024.10625894\">https://doi.org/10.1109/icasspw62465.2024.10625894</a>."},"has_accepted_license":"1","publication_status":"published","title":"Meeting Recognition with Continuous Speech Separation and Transcription-Supported Diarization","doi":"10.1109/icasspw62465.2024.10625894","publisher":"IEEE","date_updated":"2025-02-12T09:20:07Z","oa":"1","author":[{"last_name":"von Neumann","orcid":"https://orcid.org/0000-0002-7717-8670","full_name":"von Neumann, Thilo","id":"49870","first_name":"Thilo"},{"first_name":"Christoph","full_name":"Boeddeker, Christoph","id":"40767","last_name":"Boeddeker"},{"first_name":"Tobias","id":"44393","full_name":"Cord-Landwehr, Tobias","last_name":"Cord-Landwehr"},{"first_name":"Marc","last_name":"Delcroix","full_name":"Delcroix, Marc"},{"last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold","first_name":"Reinhold"}],"date_created":"2024-09-04T07:26:02Z","status":"public","file":[{"date_updated":"2024-09-04T07:34:30Z","creator":"tvn","date_created":"2024-09-04T07:34:30Z","file_size":150432,"access_level":"open_access","file_name":"main.pdf","file_id":"56005","content_type":"application/pdf","relation":"main_file"}],"publication":"2024 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)","type":"conference","ddc":["000"],"file_date_updated":"2024-09-04T07:34:30Z","language":[{"iso":"eng"}],"_id":"56004","project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"},{"_id":"508","name":"Automatische Transkription von Gesprächssituationen","grant_number":"448568305"}],"department":[{"_id":"54"}],"user_id":"40767"},{"status":"public","publication":"Interspeech 2024","type":"conference","language":[{"iso":"eng"}],"department":[{"_id":"54"}],"user_id":"40767","_id":"56272","project":[{"_id":"52","name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing"},{"grant_number":"448568305","_id":"508","name":"Automatische Transkription von Gesprächssituationen"}],"citation":{"apa":"Boeddeker, C., Cord-Landwehr, T., &#38; Haeb-Umbach, R. (2024). Once more Diarization: Improving meeting transcription systems through segment-level speaker reassignment. <i>Interspeech 2024</i>. <a href=\"https://doi.org/10.21437/interspeech.2024-1286\">https://doi.org/10.21437/interspeech.2024-1286</a>","short":"C. Boeddeker, T. Cord-Landwehr, R. Haeb-Umbach, in: Interspeech 2024, ISCA, 2024.","bibtex":"@inproceedings{Boeddeker_Cord-Landwehr_Haeb-Umbach_2024, title={Once more Diarization: Improving meeting transcription systems through segment-level speaker reassignment}, DOI={<a href=\"https://doi.org/10.21437/interspeech.2024-1286\">10.21437/interspeech.2024-1286</a>}, booktitle={Interspeech 2024}, publisher={ISCA}, author={Boeddeker, Christoph and Cord-Landwehr, Tobias and Haeb-Umbach, Reinhold}, year={2024} }","mla":"Boeddeker, Christoph, et al. “Once More Diarization: Improving Meeting Transcription Systems through Segment-Level Speaker Reassignment.” <i>Interspeech 2024</i>, ISCA, 2024, doi:<a href=\"https://doi.org/10.21437/interspeech.2024-1286\">10.21437/interspeech.2024-1286</a>.","ama":"Boeddeker C, Cord-Landwehr T, Haeb-Umbach R. Once more Diarization: Improving meeting transcription systems through segment-level speaker reassignment. In: <i>Interspeech 2024</i>. ISCA; 2024. doi:<a href=\"https://doi.org/10.21437/interspeech.2024-1286\">10.21437/interspeech.2024-1286</a>","chicago":"Boeddeker, Christoph, Tobias Cord-Landwehr, and Reinhold Haeb-Umbach. “Once More Diarization: Improving Meeting Transcription Systems through Segment-Level Speaker Reassignment.” In <i>Interspeech 2024</i>. ISCA, 2024. <a href=\"https://doi.org/10.21437/interspeech.2024-1286\">https://doi.org/10.21437/interspeech.2024-1286</a>.","ieee":"C. Boeddeker, T. Cord-Landwehr, and R. Haeb-Umbach, “Once more Diarization: Improving meeting transcription systems through segment-level speaker reassignment,” 2024, doi: <a href=\"https://doi.org/10.21437/interspeech.2024-1286\">10.21437/interspeech.2024-1286</a>."},"year":"2024","publication_status":"published","doi":"10.21437/interspeech.2024-1286","main_file_link":[{"url":"https://www.isca-archive.org/interspeech_2024/boeddeker24_interspeech.pdf","open_access":"1"}],"title":"Once more Diarization: Improving meeting transcription systems through segment-level speaker reassignment","author":[{"first_name":"Christoph","last_name":"Boeddeker","full_name":"Boeddeker, Christoph","id":"40767"},{"last_name":"Cord-Landwehr","full_name":"Cord-Landwehr, Tobias","id":"44393","first_name":"Tobias"},{"last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242","first_name":"Reinhold"}],"date_created":"2024-09-30T08:04:47Z","oa":"1","publisher":"ISCA","date_updated":"2025-02-12T09:18:36Z"},{"citation":{"bibtex":"@inproceedings{Vieting_Berger_von Neumann_Boeddeker_Schlüter_Haeb-Umbach_2024, title={Combining TF-GridNet and Mixture Encoder for Continuous Speech Separation for Meeting Transcription}, booktitle={2024 IEEE Spoken Language Technology Workshop (SLT)}, author={Vieting, Peter and Berger, Simon and von Neumann, Thilo and Boeddeker, Christoph and Schlüter, Ralf and Haeb-Umbach, Reinhold}, year={2024} }","short":"P. Vieting, S. Berger, T. von Neumann, C. Boeddeker, R. Schlüter, R. Haeb-Umbach, in: 2024 IEEE Spoken Language Technology Workshop (SLT), 2024.","mla":"Vieting, Peter, et al. “Combining TF-GridNet and Mixture Encoder for Continuous Speech Separation for Meeting Transcription.” <i>2024 IEEE Spoken Language Technology Workshop (SLT)</i>, 2024.","apa":"Vieting, P., Berger, S., von Neumann, T., Boeddeker, C., Schlüter, R., &#38; Haeb-Umbach, R. (2024). Combining TF-GridNet and Mixture Encoder for Continuous Speech Separation for Meeting Transcription. <i>2024 IEEE Spoken Language Technology Workshop (SLT)</i>.","ieee":"P. Vieting, S. Berger, T. von Neumann, C. Boeddeker, R. Schlüter, and R. Haeb-Umbach, “Combining TF-GridNet and Mixture Encoder for Continuous Speech Separation for Meeting Transcription,” 2024.","chicago":"Vieting, Peter, Simon Berger, Thilo von Neumann, Christoph Boeddeker, Ralf Schlüter, and Reinhold Haeb-Umbach. “Combining TF-GridNet and Mixture Encoder for Continuous Speech Separation for Meeting Transcription.” In <i>2024 IEEE Spoken Language Technology Workshop (SLT)</i>, 2024.","ama":"Vieting P, Berger S, von Neumann T, Boeddeker C, Schlüter R, Haeb-Umbach R. Combining TF-GridNet and Mixture Encoder for Continuous Speech Separation for Meeting Transcription. In: <i>2024 IEEE Spoken Language Technology Workshop (SLT)</i>. ; 2024."},"year":"2024","date_created":"2024-12-09T11:46:18Z","author":[{"first_name":"Peter","last_name":"Vieting","full_name":"Vieting, Peter"},{"first_name":"Simon","last_name":"Berger","full_name":"Berger, Simon"},{"last_name":"von Neumann","orcid":"https://orcid.org/0000-0002-7717-8670","id":"49870","full_name":"von Neumann, Thilo","first_name":"Thilo"},{"first_name":"Christoph","last_name":"Boeddeker","full_name":"Boeddeker, Christoph","id":"40767"},{"full_name":"Schlüter, Ralf","last_name":"Schlüter","first_name":"Ralf"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold"}],"date_updated":"2025-02-12T09:20:59Z","oa":"1","main_file_link":[{"open_access":"1","url":"https://www-i6.informatik.rwth-aachen.de/publications/download/1259/VietingPeterBergerSimonNeumannThilovonBoeddekerChristophSchl%FCterRalfHaeb-UmbachReinhold--CombiningTF-GridNetMixtureEncoderforContinuousSpeechSeparationforMeetingTranscription--2024.pdf"}],"title":"Combining TF-GridNet and Mixture Encoder for Continuous Speech Separation for Meeting Transcription","publication":"2024 IEEE Spoken Language Technology Workshop (SLT)","type":"conference","status":"public","department":[{"_id":"54"}],"user_id":"40767","_id":"57659","project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"},{"_id":"508","name":"Automatische Transkription von Gesprächssituationen","grant_number":"448568305"}],"language":[{"iso":"eng"}]},{"file_date_updated":"2025-08-14T08:11:57Z","department":[{"_id":"54"}],"user_id":"44393","_id":"57085","project":[{"_id":"52","name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing"},{"name":"Automatische Transkription von Gesprächssituationen","_id":"508"}],"status":"public","type":"conference","doi":"10.1109/ICASSP49660.2025.10888445","conference":{"location":"Hyderabad, India","name":"2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"},"main_file_link":[{"open_access":"1","url":"https://arxiv.org/pdf/2410.21455"}],"author":[{"first_name":"Tobias","last_name":"Cord-Landwehr","id":"44393","full_name":"Cord-Landwehr, Tobias"},{"last_name":"Boeddeker","id":"40767","full_name":"Boeddeker, Christoph","first_name":"Christoph"},{"last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold","first_name":"Reinhold"}],"oa":"1","date_updated":"2025-08-14T08:12:22Z","citation":{"mla":"Cord-Landwehr, Tobias, et al. “Simultaneous Diarization and Separation of Meetings through the Integration of Statistical Mixture Models.” <i>ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2024, doi:<a href=\"https://doi.org/10.1109/ICASSP49660.2025.10888445\">10.1109/ICASSP49660.2025.10888445</a>.","short":"T. Cord-Landwehr, C. Boeddeker, R. Haeb-Umbach, in: ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2024.","bibtex":"@inproceedings{Cord-Landwehr_Boeddeker_Haeb-Umbach_2024, title={Simultaneous Diarization and Separation of Meetings through the Integration of Statistical Mixture Models}, DOI={<a href=\"https://doi.org/10.1109/ICASSP49660.2025.10888445\">10.1109/ICASSP49660.2025.10888445</a>}, booktitle={ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, author={Cord-Landwehr, Tobias and Boeddeker, Christoph and Haeb-Umbach, Reinhold}, year={2024} }","apa":"Cord-Landwehr, T., Boeddeker, C., &#38; Haeb-Umbach, R. (2024). Simultaneous Diarization and Separation of Meetings through the Integration of Statistical Mixture Models. <i>ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Hyderabad, India. <a href=\"https://doi.org/10.1109/ICASSP49660.2025.10888445\">https://doi.org/10.1109/ICASSP49660.2025.10888445</a>","chicago":"Cord-Landwehr, Tobias, Christoph Boeddeker, and Reinhold Haeb-Umbach. “Simultaneous Diarization and Separation of Meetings through the Integration of Statistical Mixture Models.” In <i>ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2024. <a href=\"https://doi.org/10.1109/ICASSP49660.2025.10888445\">https://doi.org/10.1109/ICASSP49660.2025.10888445</a>.","ieee":"T. Cord-Landwehr, C. Boeddeker, and R. Haeb-Umbach, “Simultaneous Diarization and Separation of Meetings through the Integration of Statistical Mixture Models,” presented at the 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Hyderabad, India, 2024, doi: <a href=\"https://doi.org/10.1109/ICASSP49660.2025.10888445\">10.1109/ICASSP49660.2025.10888445</a>.","ama":"Cord-Landwehr T, Boeddeker C, Haeb-Umbach R. Simultaneous Diarization and Separation of Meetings through the Integration of Statistical Mixture Models. In: <i>ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. ; 2024. doi:<a href=\"https://doi.org/10.1109/ICASSP49660.2025.10888445\">10.1109/ICASSP49660.2025.10888445</a>"},"has_accepted_license":"1","language":[{"iso":"eng"}],"keyword":["diarization","source separation","mixture model","meeting"],"ddc":["000"],"file":[{"file_name":"main.pdf","file_id":"60930","access_level":"closed","file_size":259907,"date_created":"2025-08-14T08:11:57Z","creator":"cord","date_updated":"2025-08-14T08:11:57Z","relation":"main_file","success":1,"content_type":"application/pdf"}],"abstract":[{"text":"We propose an approach for simultaneous diarization and separation of meeting data. It consists of a complex Angular Central Gaussian Mixture Model (cACGMM) for speech source separation, and a von-Mises-Fisher Mixture Model (VMFMM) for diarization in a joint statistical framework. Through the integration, both spatial and spectral information are exploited for diarization and separation. We also develop a method for counting the number of active speakers in a segment of a meeting to support block-wise processing. While the total number of speakers in a meeting may be known, it is usually not known on a per-segment level. With the proposed speaker counting, joint diarization and source separation can be done segment-by-segment, and the permutation problem across segments is solved, thus allowing for block-online processing in the future. Experimental results on the LibriCSS meeting corpus show that the integrated approach outperforms a cascaded approach of diarization and speech enhancement in terms of WER, both on a per-segment and on a per-meeting level.","lang":"eng"}],"publication":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","title":"Simultaneous Diarization and Separation of Meetings through the Integration of Statistical Mixture Models","date_created":"2024-11-14T09:32:38Z","year":"2024"},{"file":[{"file_size":254478,"file_name":"main.pdf","access_level":"closed","file_id":"60929","date_updated":"2025-08-14T08:09:52Z","date_created":"2025-08-14T08:09:52Z","creator":"cord","success":1,"relation":"main_file","content_type":"application/pdf"}],"publication":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","ddc":["000"],"language":[{"iso":"eng"}],"year":"2024","title":"Geodesic Interpolation of Frame-Wise Speaker Embeddings for the Diarization of Meeting Scenarios","publisher":"IEEE","date_created":"2024-04-25T12:57:22Z","status":"public","type":"conference","file_date_updated":"2025-08-14T08:09:52Z","project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"},{"_id":"508","name":"Automatische Transkription von Gesprächssituationen"}],"_id":"53659","user_id":"44393","department":[{"_id":"54"}],"citation":{"ama":"Cord-Landwehr T, Boeddeker C, Zorilă C, Doddipatla R, Haeb-Umbach R. Geodesic Interpolation of Frame-Wise Speaker Embeddings for the Diarization of Meeting Scenarios. In: <i>ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. IEEE; 2024. doi:<a href=\"https://doi.org/10.1109/icassp48485.2024.10445911\">10.1109/icassp48485.2024.10445911</a>","ieee":"T. Cord-Landwehr, C. Boeddeker, C. Zorilă, R. Doddipatla, and R. Haeb-Umbach, “Geodesic Interpolation of Frame-Wise Speaker Embeddings for the Diarization of Meeting Scenarios,” presented at the 2024 IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), Seoul, 2024, doi: <a href=\"https://doi.org/10.1109/icassp48485.2024.10445911\">10.1109/icassp48485.2024.10445911</a>.","chicago":"Cord-Landwehr, Tobias, Christoph Boeddeker, Cătălin Zorilă, Rama Doddipatla, and Reinhold Haeb-Umbach. “Geodesic Interpolation of Frame-Wise Speaker Embeddings for the Diarization of Meeting Scenarios.” In <i>ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. IEEE, 2024. <a href=\"https://doi.org/10.1109/icassp48485.2024.10445911\">https://doi.org/10.1109/icassp48485.2024.10445911</a>.","apa":"Cord-Landwehr, T., Boeddeker, C., Zorilă, C., Doddipatla, R., &#38; Haeb-Umbach, R. (2024). Geodesic Interpolation of Frame-Wise Speaker Embeddings for the Diarization of Meeting Scenarios. <i>ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. 2024 IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), Seoul. <a href=\"https://doi.org/10.1109/icassp48485.2024.10445911\">https://doi.org/10.1109/icassp48485.2024.10445911</a>","bibtex":"@inproceedings{Cord-Landwehr_Boeddeker_Zorilă_Doddipatla_Haeb-Umbach_2024, title={Geodesic Interpolation of Frame-Wise Speaker Embeddings for the Diarization of Meeting Scenarios}, DOI={<a href=\"https://doi.org/10.1109/icassp48485.2024.10445911\">10.1109/icassp48485.2024.10445911</a>}, booktitle={ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, publisher={IEEE}, author={Cord-Landwehr, Tobias and Boeddeker, Christoph and Zorilă, Cătălin and Doddipatla, Rama and Haeb-Umbach, Reinhold}, year={2024} }","mla":"Cord-Landwehr, Tobias, et al. “Geodesic Interpolation of Frame-Wise Speaker Embeddings for the Diarization of Meeting Scenarios.” <i>ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, IEEE, 2024, doi:<a href=\"https://doi.org/10.1109/icassp48485.2024.10445911\">10.1109/icassp48485.2024.10445911</a>.","short":"T. Cord-Landwehr, C. Boeddeker, C. Zorilă, R. Doddipatla, R. Haeb-Umbach, in: ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE, 2024."},"publication_status":"published","has_accepted_license":"1","doi":"10.1109/icassp48485.2024.10445911","conference":{"name":"2024 IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)","location":"Seoul"},"date_updated":"2025-08-14T08:11:07Z","author":[{"last_name":"Cord-Landwehr","id":"44393","full_name":"Cord-Landwehr, Tobias","first_name":"Tobias"},{"last_name":"Boeddeker","id":"40767","full_name":"Boeddeker, Christoph","first_name":"Christoph"},{"first_name":"Cătălin","last_name":"Zorilă","full_name":"Zorilă, Cătălin"},{"first_name":"Rama","last_name":"Doddipatla","full_name":"Doddipatla, Rama"},{"last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242","first_name":"Reinhold"}]},{"year":"2024","citation":{"ama":"Werning A, Haeb-Umbach R. Target-Specific Dataset Pruning for Compression of Audio Tagging Models. In: <i>32nd European Signal Processing Conference (EUSIPCO 2024)</i>. ; 2024.","chicago":"Werning, Alexander, and Reinhold Haeb-Umbach. “Target-Specific Dataset Pruning for Compression of Audio Tagging Models.” In <i>32nd European Signal Processing Conference (EUSIPCO 2024)</i>, 2024.","ieee":"A. Werning and R. Haeb-Umbach, “Target-Specific Dataset Pruning for Compression of Audio Tagging Models,” presented at the 32nd European Signal Processing Conference, Lyon, 2024.","apa":"Werning, A., &#38; Haeb-Umbach, R. (2024). Target-Specific Dataset Pruning for Compression of Audio Tagging Models. <i>32nd European Signal Processing Conference (EUSIPCO 2024)</i>. 32nd European Signal Processing Conference, Lyon.","bibtex":"@inproceedings{Werning_Haeb-Umbach_2024, title={Target-Specific Dataset Pruning for Compression of Audio Tagging Models}, booktitle={32nd European Signal Processing Conference (EUSIPCO 2024)}, author={Werning, Alexander and Haeb-Umbach, Reinhold}, year={2024} }","short":"A. Werning, R. Haeb-Umbach, in: 32nd European Signal Processing Conference (EUSIPCO 2024), 2024.","mla":"Werning, Alexander, and Reinhold Haeb-Umbach. “Target-Specific Dataset Pruning for Compression of Audio Tagging Models.” <i>32nd European Signal Processing Conference (EUSIPCO 2024)</i>, 2024."},"quality_controlled":"1","has_accepted_license":"1","title":"Target-Specific Dataset Pruning for Compression of Audio Tagging Models","conference":{"location":"Lyon","name":"32nd European Signal Processing Conference"},"date_updated":"2025-11-28T13:22:00Z","date_created":"2024-11-18T09:29:16Z","author":[{"first_name":"Alexander","full_name":"Werning, Alexander","id":"62152","last_name":"Werning"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242"}],"abstract":[{"lang":"eng","text":"Large audio tagging models are usually trained or pre-trained on AudioSet, a dataset that encompasses a large amount of different sound classes and acoustic environments. Knowledge distillation has emerged as a method to compress such models without compromising their effectiveness. There are many different applications for audio tagging, some of which require a specialization to a narrow domain of sounds to be classified. For these scenarios, it is beneficial to distill the large audio tagger with respect to a specific subset of sounds of interest. A method to prune a general dataset with respect to a target dataset is presented. By distilling with such a specialized pruned dataset, we obtain a compressed model with better classification accuracy in the specific target domain than with target-agnostic distillation."}],"status":"public","file":[{"date_updated":"2024-11-18T12:10:09Z","date_created":"2024-11-18T12:10:09Z","creator":"awerning","file_size":183539,"file_name":"Eusipco__Target_specific_Dataset_Pruning_for_Compression_of_Audio_Tagging_Models.pdf","file_id":"57200","access_level":"closed","content_type":"application/pdf","success":1,"relation":"main_file"}],"publication":"32nd European Signal Processing Conference (EUSIPCO 2024)","type":"conference","keyword":["data pruning","knowledge distillation","audio tagging"],"ddc":["000"],"file_date_updated":"2024-11-18T12:10:09Z","language":[{"iso":"eng"}],"_id":"57160","project":[{"name":"WestAI - AI Service Center West","_id":"512"}],"department":[{"_id":"54"}],"user_id":"62152"},{"file_date_updated":"2024-11-13T08:42:13Z","language":[{"iso":"eng"}],"ddc":["006"],"user_id":"44006","department":[{"_id":"54"}],"_id":"57031","file":[{"creator":"tgburrek","date_created":"2024-11-13T08:42:13Z","date_updated":"2024-11-13T08:42:13Z","file_name":"dist_est_iwaenc24_gburrek.pdf","access_level":"open_access","file_id":"57033","file_size":300589,"content_type":"application/pdf","relation":"main_file"}],"status":"public","type":"conference","publication":"2024 18th International Workshop on Acoustic Signal Enhancement (IWAENC)","doi":"10.1109/iwaenc61483.2024.10694103","title":"Diminishing Domain Mismatch for DNN-Based Acoustic Distance Estimation via Stochastic Room Reverberation Models","author":[{"first_name":"Tobias","last_name":"Gburrek","full_name":"Gburrek, Tobias","id":"44006"},{"first_name":"Adrian Tobias","last_name":"Meise","id":"79268","full_name":"Meise, Adrian Tobias"},{"last_name":"Schmalenstroeer","id":"460","full_name":"Schmalenstroeer, Joerg","first_name":"Joerg"},{"last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242","first_name":"Reinhold"}],"date_created":"2024-11-13T08:39:57Z","publisher":"IEEE","date_updated":"2026-01-05T16:28:23Z","oa":"1","citation":{"ama":"Gburrek T, Meise AT, Schmalenstroeer J, Haeb-Umbach R. Diminishing Domain Mismatch for DNN-Based Acoustic Distance Estimation via Stochastic Room Reverberation Models. In: <i>2024 18th International Workshop on Acoustic Signal Enhancement (IWAENC)</i>. IEEE; 2024. doi:<a href=\"https://doi.org/10.1109/iwaenc61483.2024.10694103\">10.1109/iwaenc61483.2024.10694103</a>","chicago":"Gburrek, Tobias, Adrian Tobias Meise, Joerg Schmalenstroeer, and Reinhold Haeb-Umbach. “Diminishing Domain Mismatch for DNN-Based Acoustic Distance Estimation via Stochastic Room Reverberation Models.” In <i>2024 18th International Workshop on Acoustic Signal Enhancement (IWAENC)</i>. IEEE, 2024. <a href=\"https://doi.org/10.1109/iwaenc61483.2024.10694103\">https://doi.org/10.1109/iwaenc61483.2024.10694103</a>.","ieee":"T. Gburrek, A. T. Meise, J. Schmalenstroeer, and R. Haeb-Umbach, “Diminishing Domain Mismatch for DNN-Based Acoustic Distance Estimation via Stochastic Room Reverberation Models,” 2024, doi: <a href=\"https://doi.org/10.1109/iwaenc61483.2024.10694103\">10.1109/iwaenc61483.2024.10694103</a>.","apa":"Gburrek, T., Meise, A. T., Schmalenstroeer, J., &#38; Haeb-Umbach, R. (2024). Diminishing Domain Mismatch for DNN-Based Acoustic Distance Estimation via Stochastic Room Reverberation Models. <i>2024 18th International Workshop on Acoustic Signal Enhancement (IWAENC)</i>. <a href=\"https://doi.org/10.1109/iwaenc61483.2024.10694103\">https://doi.org/10.1109/iwaenc61483.2024.10694103</a>","short":"T. Gburrek, A.T. Meise, J. Schmalenstroeer, R. Haeb-Umbach, in: 2024 18th International Workshop on Acoustic Signal Enhancement (IWAENC), IEEE, 2024.","mla":"Gburrek, Tobias, et al. “Diminishing Domain Mismatch for DNN-Based Acoustic Distance Estimation via Stochastic Room Reverberation Models.” <i>2024 18th International Workshop on Acoustic Signal Enhancement (IWAENC)</i>, IEEE, 2024, doi:<a href=\"https://doi.org/10.1109/iwaenc61483.2024.10694103\">10.1109/iwaenc61483.2024.10694103</a>.","bibtex":"@inproceedings{Gburrek_Meise_Schmalenstroeer_Haeb-Umbach_2024, title={Diminishing Domain Mismatch for DNN-Based Acoustic Distance Estimation via Stochastic Room Reverberation Models}, DOI={<a href=\"https://doi.org/10.1109/iwaenc61483.2024.10694103\">10.1109/iwaenc61483.2024.10694103</a>}, booktitle={2024 18th International Workshop on Acoustic Signal Enhancement (IWAENC)}, publisher={IEEE}, author={Gburrek, Tobias and Meise, Adrian Tobias and Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}, year={2024} }"},"year":"2024","publication_status":"published","quality_controlled":"1","has_accepted_license":"1"},{"quality_controlled":"1","year":"2023","citation":{"apa":"Gburrek, T., Schmalenstroeer, J., &#38; Haeb-Umbach, R. (2023). On the Integration of Sampling Rate Synchronization and Acoustic Beamforming. <i>European Signal Processing Conference (EUSIPCO)</i>. European Signal Processing Conference (EUSIPCO), Helsinki.","bibtex":"@inproceedings{Gburrek_Schmalenstroeer_Haeb-Umbach_2023, title={On the Integration of Sampling Rate Synchronization and Acoustic Beamforming}, booktitle={European Signal Processing Conference (EUSIPCO)}, author={Gburrek, Tobias and Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}, year={2023} }","mla":"Gburrek, Tobias, et al. “On the Integration of Sampling Rate Synchronization and Acoustic Beamforming.” <i>European Signal Processing Conference (EUSIPCO)</i>, 2023.","short":"T. Gburrek, J. Schmalenstroeer, R. Haeb-Umbach, in: European Signal Processing Conference (EUSIPCO), 2023.","chicago":"Gburrek, Tobias, Joerg Schmalenstroeer, and Reinhold Haeb-Umbach. “On the Integration of Sampling Rate Synchronization and Acoustic Beamforming.” In <i>European Signal Processing Conference (EUSIPCO)</i>, 2023.","ieee":"T. Gburrek, J. Schmalenstroeer, and R. Haeb-Umbach, “On the Integration of Sampling Rate Synchronization and Acoustic Beamforming,” presented at the European Signal Processing Conference (EUSIPCO), Helsinki, 2023.","ama":"Gburrek T, Schmalenstroeer J, Haeb-Umbach R. On the Integration of Sampling Rate Synchronization and Acoustic Beamforming. In: <i>European Signal Processing Conference (EUSIPCO)</i>. ; 2023."},"date_updated":"2023-10-26T08:16:23Z","oa":"1","author":[{"id":"44006","full_name":"Gburrek, Tobias","last_name":"Gburrek","first_name":"Tobias"},{"last_name":"Schmalenstroeer","id":"460","full_name":"Schmalenstroeer, Joerg","first_name":"Joerg"},{"last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold","first_name":"Reinhold"}],"date_created":"2023-10-18T12:54:08Z","title":"On the Integration of Sampling Rate Synchronization and Acoustic Beamforming","conference":{"name":"European Signal Processing Conference (EUSIPCO)","location":"Helsinki"},"main_file_link":[{"open_access":"1","url":"https://eurasip.org/Proceedings/Eusipco/Eusipco2023/pdfs/0000011.pdf"}],"publication":"European Signal Processing Conference (EUSIPCO)","type":"conference","status":"public","_id":"48269","department":[{"_id":"54"}],"user_id":"460","language":[{"iso":"eng"}]},{"user_id":"460","department":[{"_id":"54"}],"_id":"48270","language":[{"iso":"eng"}],"file_date_updated":"2023-11-15T14:48:44Z","ddc":["004"],"type":"conference","publication":"ITG Conference on Speech Communication","file":[{"access_level":"open_access","file_name":"SchTgbHaeb2023Final.pdf","file_id":"48483","file_size":2844502,"creator":"schmalen","date_created":"2023-10-26T08:20:15Z","date_updated":"2023-11-15T14:48:44Z","relation":"main_file","content_type":"application/pdf"}],"status":"public","date_created":"2023-10-18T13:00:54Z","author":[{"last_name":"Schmalenstroeer","id":"460","full_name":"Schmalenstroeer, Joerg","first_name":"Joerg"},{"last_name":"Gburrek","id":"44006","full_name":"Gburrek, Tobias","first_name":"Tobias"},{"first_name":"Reinhold","id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach"}],"oa":"1","date_updated":"2023-11-15T14:48:45Z","conference":{"name":"ITG Conference on Speech Communication","location":"Aachen"},"title":"LibriWASN: A Data Set for Meeting Separation, Diarization, and Recognition with Asynchronous Recording Devices","has_accepted_license":"1","quality_controlled":"1","citation":{"apa":"Schmalenstroeer, J., Gburrek, T., &#38; Haeb-Umbach, R. (2023). LibriWASN: A Data Set for Meeting Separation, Diarization, and Recognition with Asynchronous Recording Devices. <i>ITG Conference on Speech Communication</i>. ITG Conference on Speech Communication, Aachen.","mla":"Schmalenstroeer, Joerg, et al. “LibriWASN: A Data Set for Meeting Separation, Diarization, and Recognition with Asynchronous Recording Devices.” <i>ITG Conference on Speech Communication</i>, 2023.","short":"J. Schmalenstroeer, T. Gburrek, R. Haeb-Umbach, in: ITG Conference on Speech Communication, 2023.","bibtex":"@inproceedings{Schmalenstroeer_Gburrek_Haeb-Umbach_2023, title={LibriWASN: A Data Set for Meeting Separation, Diarization, and Recognition with Asynchronous Recording Devices}, booktitle={ITG Conference on Speech Communication}, author={Schmalenstroeer, Joerg and Gburrek, Tobias and Haeb-Umbach, Reinhold}, year={2023} }","ieee":"J. Schmalenstroeer, T. Gburrek, and R. Haeb-Umbach, “LibriWASN: A Data Set for Meeting Separation, Diarization, and Recognition with Asynchronous Recording Devices,” presented at the ITG Conference on Speech Communication, Aachen, 2023.","chicago":"Schmalenstroeer, Joerg, Tobias Gburrek, and Reinhold Haeb-Umbach. “LibriWASN: A Data Set for Meeting Separation, Diarization, and Recognition with Asynchronous Recording Devices.” In <i>ITG Conference on Speech Communication</i>, 2023.","ama":"Schmalenstroeer J, Gburrek T, Haeb-Umbach R. LibriWASN: A Data Set for Meeting Separation, Diarization, and Recognition with Asynchronous Recording Devices. In: <i>ITG Conference on Speech Communication</i>. ; 2023."},"year":"2023"}]