[{"status":"public","type":"conference","publication":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","language":[{"iso":"eng"}],"user_id":"72602","department":[{"_id":"54"},{"_id":"660"}],"project":[{"grant_number":"438445824","name":"TRR 318 - C06: TRR 318 - Technisch unterstütztes Erklären von Stimmcharakteristika (Teilprojekt C06)","_id":"129"}],"_id":"59999","citation":{"ama":"Rautenberg F, Kuhlmann M, Seebauer F, Wiechmann J, Wagner P, Haeb-Umbach R. Speech Synthesis along Perceptual Voice Quality Dimensions. In: <i>ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. IEEE; 2025. doi:<a href=\"https://doi.org/10.1109/icassp49660.2025.10888012\">10.1109/icassp49660.2025.10888012</a>","chicago":"Rautenberg, Frederik, Michael Kuhlmann, Fritz Seebauer, Jana Wiechmann, Petra Wagner, and Reinhold Haeb-Umbach. “Speech Synthesis along Perceptual Voice Quality Dimensions.” In <i>ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. IEEE, 2025. <a href=\"https://doi.org/10.1109/icassp49660.2025.10888012\">https://doi.org/10.1109/icassp49660.2025.10888012</a>.","ieee":"F. Rautenberg, M. Kuhlmann, F. Seebauer, J. Wiechmann, P. Wagner, and R. Haeb-Umbach, “Speech Synthesis along Perceptual Voice Quality Dimensions,” presented at the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Hyderabad, India , 2025, doi: <a href=\"https://doi.org/10.1109/icassp49660.2025.10888012\">10.1109/icassp49660.2025.10888012</a>.","mla":"Rautenberg, Frederik, et al. “Speech Synthesis along Perceptual Voice Quality Dimensions.” <i>ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, IEEE, 2025, doi:<a href=\"https://doi.org/10.1109/icassp49660.2025.10888012\">10.1109/icassp49660.2025.10888012</a>.","short":"F. Rautenberg, M. Kuhlmann, F. Seebauer, J. Wiechmann, P. Wagner, R. Haeb-Umbach, in: ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE, 2025.","bibtex":"@inproceedings{Rautenberg_Kuhlmann_Seebauer_Wiechmann_Wagner_Haeb-Umbach_2025, title={Speech Synthesis along Perceptual Voice Quality Dimensions}, DOI={<a href=\"https://doi.org/10.1109/icassp49660.2025.10888012\">10.1109/icassp49660.2025.10888012</a>}, booktitle={ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, publisher={IEEE}, author={Rautenberg, Frederik and Kuhlmann, Michael and Seebauer, Fritz and Wiechmann, Jana and Wagner, Petra and Haeb-Umbach, Reinhold}, year={2025} }","apa":"Rautenberg, F., Kuhlmann, M., Seebauer, F., Wiechmann, J., Wagner, P., &#38; Haeb-Umbach, R. (2025). Speech Synthesis along Perceptual Voice Quality Dimensions. <i>ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Hyderabad, India . <a href=\"https://doi.org/10.1109/icassp49660.2025.10888012\">https://doi.org/10.1109/icassp49660.2025.10888012</a>"},"year":"2025","publication_status":"published","conference":{"end_date":"2025-04-11","location":"Hyderabad, India ","name":"IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","start_date":"2025-04-06"},"doi":"10.1109/icassp49660.2025.10888012","title":"Speech Synthesis along Perceptual Voice Quality Dimensions","date_created":"2025-05-20T08:17:22Z","author":[{"id":"72602","full_name":"Rautenberg, Frederik","last_name":"Rautenberg","first_name":"Frederik"},{"id":"49871","full_name":"Kuhlmann, Michael","last_name":"Kuhlmann","first_name":"Michael"},{"last_name":"Seebauer","full_name":"Seebauer, Fritz","first_name":"Fritz"},{"first_name":"Jana","last_name":"Wiechmann","full_name":"Wiechmann, Jana"},{"full_name":"Wagner, Petra","last_name":"Wagner","first_name":"Petra"},{"first_name":"Reinhold","id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach"}],"date_updated":"2025-05-26T11:09:56Z","publisher":"IEEE"},{"status":"public","type":"conference","publication":"Interspeech 2025","language":[{"iso":"eng"}],"user_id":"72602","department":[{"_id":"54"},{"_id":"660"}],"project":[{"_id":"129","name":"TRR 318; TP C06: Technisch unterstütztes Erklären von Stimmcharakteristika"}],"_id":"61047","citation":{"ama":"Rautenberg F, Seebauer F, Wiechmann J, Kuhlmann M, Wagner P, Haeb-Umbach R. Synthesizing Speech with Selected Perceptual Voice Qualities – A Case Study with Creaky Voice. In: <i>Interspeech 2025</i>. ISCA; 2025. doi:<a href=\"https://doi.org/10.21437/Interspeech.2025-1443\">10.21437/Interspeech.2025-1443</a>","ieee":"F. Rautenberg, F. Seebauer, J. Wiechmann, M. Kuhlmann, P. Wagner, and R. Haeb-Umbach, “Synthesizing Speech with Selected Perceptual Voice Qualities – A Case Study with Creaky Voice,” presented at the Interspeech, Rotterdam, 2025, doi: <a href=\"https://doi.org/10.21437/Interspeech.2025-1443\">10.21437/Interspeech.2025-1443</a>.","chicago":"Rautenberg, Frederik, Fritz Seebauer, Jana Wiechmann, Michael Kuhlmann, Petra Wagner, and Reinhold Haeb-Umbach. “Synthesizing Speech with Selected Perceptual Voice Qualities – A Case Study with Creaky Voice.” In <i>Interspeech 2025</i>. ISCA, 2025. <a href=\"https://doi.org/10.21437/Interspeech.2025-1443\">https://doi.org/10.21437/Interspeech.2025-1443</a>.","apa":"Rautenberg, F., Seebauer, F., Wiechmann, J., Kuhlmann, M., Wagner, P., &#38; Haeb-Umbach, R. (2025). Synthesizing Speech with Selected Perceptual Voice Qualities – A Case Study with Creaky Voice. <i>Interspeech 2025</i>. Interspeech, Rotterdam. <a href=\"https://doi.org/10.21437/Interspeech.2025-1443\">https://doi.org/10.21437/Interspeech.2025-1443</a>","short":"F. Rautenberg, F. Seebauer, J. Wiechmann, M. Kuhlmann, P. Wagner, R. Haeb-Umbach, in: Interspeech 2025, ISCA, 2025.","bibtex":"@inproceedings{Rautenberg_Seebauer_Wiechmann_Kuhlmann_Wagner_Haeb-Umbach_2025, title={Synthesizing Speech with Selected Perceptual Voice Qualities – A Case Study with Creaky Voice}, DOI={<a href=\"https://doi.org/10.21437/Interspeech.2025-1443\">10.21437/Interspeech.2025-1443</a>}, booktitle={Interspeech 2025}, publisher={ISCA}, author={Rautenberg, Frederik and Seebauer, Fritz and Wiechmann, Jana and Kuhlmann, Michael and Wagner, Petra and Haeb-Umbach, Reinhold}, year={2025} }","mla":"Rautenberg, Frederik, et al. “Synthesizing Speech with Selected Perceptual Voice Qualities – A Case Study with Creaky Voice.” <i>Interspeech 2025</i>, ISCA, 2025, doi:<a href=\"https://doi.org/10.21437/Interspeech.2025-1443\">10.21437/Interspeech.2025-1443</a>."},"year":"2025","conference":{"start_date":"2025-08-17","name":"Interspeech","location":"Rotterdam","end_date":"2025-08-21"},"doi":"10.21437/Interspeech.2025-1443","title":"Synthesizing Speech with Selected Perceptual Voice Qualities – A Case Study with Creaky Voice","author":[{"first_name":"Frederik","id":"72602","full_name":"Rautenberg, Frederik","last_name":"Rautenberg"},{"first_name":"Fritz","last_name":"Seebauer","full_name":"Seebauer, Fritz"},{"first_name":"Jana","last_name":"Wiechmann","full_name":"Wiechmann, Jana"},{"first_name":"Michael","id":"49871","full_name":"Kuhlmann, Michael","last_name":"Kuhlmann"},{"first_name":"Petra","full_name":"Wagner, Petra","last_name":"Wagner"},{"first_name":"Reinhold","full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach"}],"date_created":"2025-08-28T08:39:01Z","date_updated":"2025-08-28T08:56:49Z","publisher":"ISCA"},{"year":"2025","title":"Spatio-spectral diarization of meetings by combining TDOA-based  segmentation and speaker embedding-based clustering","date_created":"2025-08-29T09:39:01Z","abstract":[{"lang":"eng","text":"We propose a spatio-spectral, combined model-based and data-driven\r\ndiarization pipeline consisting of TDOA-based segmentation followed by\r\nembedding-based clustering. The proposed system requires neither access to\r\nmulti-channel training data nor prior knowledge about the number or placement\r\nof microphones. It works for both a compact microphone array and distributed\r\nmicrophones, with minor adjustments. Due to its superior handling of\r\noverlapping speech during segmentation, the proposed pipeline significantly\r\noutperforms the single-channel pyannote approach, both in a scenario with a\r\ncompact microphone array and in a setup with distributed microphones.\r\nAdditionally, we show that, unlike fully spatial diarization pipelines, the\r\nproposed system can correctly track speakers when they change positions."}],"file":[{"date_updated":"2025-08-29T09:43:32Z","creator":"cord","date_created":"2025-08-29T09:43:32Z","file_size":921918,"file_id":"61085","file_name":"main.pdf","access_level":"open_access","content_type":"application/pdf","relation":"main_file"}],"publication":"Proceedings of INTERSPEECH","ddc":["000"],"language":[{"iso":"eng"}],"external_id":{"arxiv":["2506.16228"]},"citation":{"ama":"Cord-Landwehr T, Gburrek T, Deegen M, Haeb-Umbach R. Spatio-spectral diarization of meetings by combining TDOA-based  segmentation and speaker embedding-based clustering. In: <i>Proceedings of INTERSPEECH</i>. ; 2025. doi:<a href=\"https://doi.org/10.21437/Interspeech.2025-1663\">10.21437/Interspeech.2025-1663</a>","ieee":"T. Cord-Landwehr, T. Gburrek, M. Deegen, and R. Haeb-Umbach, “Spatio-spectral diarization of meetings by combining TDOA-based  segmentation and speaker embedding-based clustering,” presented at the Interspeech 2025, Rotterdam, 2025, doi: <a href=\"https://doi.org/10.21437/Interspeech.2025-1663\">10.21437/Interspeech.2025-1663</a>.","chicago":"Cord-Landwehr, Tobias, Tobias Gburrek, Marc Deegen, and Reinhold Haeb-Umbach. “Spatio-Spectral Diarization of Meetings by Combining TDOA-Based  Segmentation and Speaker Embedding-Based Clustering.” In <i>Proceedings of INTERSPEECH</i>, 2025. <a href=\"https://doi.org/10.21437/Interspeech.2025-1663\">https://doi.org/10.21437/Interspeech.2025-1663</a>.","short":"T. Cord-Landwehr, T. Gburrek, M. Deegen, R. Haeb-Umbach, in: Proceedings of INTERSPEECH, 2025.","bibtex":"@inproceedings{Cord-Landwehr_Gburrek_Deegen_Haeb-Umbach_2025, title={Spatio-spectral diarization of meetings by combining TDOA-based  segmentation and speaker embedding-based clustering}, DOI={<a href=\"https://doi.org/10.21437/Interspeech.2025-1663\">10.21437/Interspeech.2025-1663</a>}, booktitle={Proceedings of INTERSPEECH}, author={Cord-Landwehr, Tobias and Gburrek, Tobias and Deegen, Marc and Haeb-Umbach, Reinhold}, year={2025} }","mla":"Cord-Landwehr, Tobias, et al. “Spatio-Spectral Diarization of Meetings by Combining TDOA-Based  Segmentation and Speaker Embedding-Based Clustering.” <i>Proceedings of INTERSPEECH</i>, 2025, doi:<a href=\"https://doi.org/10.21437/Interspeech.2025-1663\">10.21437/Interspeech.2025-1663</a>.","apa":"Cord-Landwehr, T., Gburrek, T., Deegen, M., &#38; Haeb-Umbach, R. (2025). Spatio-spectral diarization of meetings by combining TDOA-based  segmentation and speaker embedding-based clustering. <i>Proceedings of INTERSPEECH</i>. Interspeech 2025, Rotterdam. <a href=\"https://doi.org/10.21437/Interspeech.2025-1663\">https://doi.org/10.21437/Interspeech.2025-1663</a>"},"has_accepted_license":"1","doi":"10.21437/Interspeech.2025-1663","conference":{"location":"Rotterdam","name":"Interspeech 2025"},"oa":"1","date_updated":"2025-11-10T09:06:47Z","author":[{"last_name":"Cord-Landwehr","full_name":"Cord-Landwehr, Tobias","id":"44393","first_name":"Tobias"},{"last_name":"Gburrek","id":"44006","full_name":"Gburrek, Tobias","first_name":"Tobias"},{"first_name":"Marc","id":"70272","full_name":"Deegen, Marc","last_name":"Deegen"},{"last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold","first_name":"Reinhold"}],"status":"public","type":"conference","file_date_updated":"2025-08-29T09:43:32Z","project":[{"name":"Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"_id":"61079","user_id":"44393","department":[{"_id":"54"}]},{"doi":"10.21437/interspeech.2025-2190","title":"Towards Frame-level Quality Predictions of Synthetic Speech","date_created":"2025-11-11T11:43:20Z","author":[{"full_name":"Kuhlmann, Michael","id":"49871","last_name":"Kuhlmann","first_name":"Michael"},{"first_name":"Fritz","full_name":"Seebauer, Fritz","last_name":"Seebauer"},{"id":"74505","full_name":"Wagner, Petra","last_name":"Wagner","first_name":"Petra"},{"full_name":"Häb-Umbach, Reinhold","id":"242","last_name":"Häb-Umbach","first_name":"Reinhold"}],"publisher":"ISCA","date_updated":"2025-11-11T11:45:12Z","citation":{"apa":"Kuhlmann, M., Seebauer, F., Wagner, P., &#38; Häb-Umbach, R. (2025). Towards Frame-level Quality Predictions of Synthetic Speech. <i>Interspeech 2025</i>. <a href=\"https://doi.org/10.21437/interspeech.2025-2190\">https://doi.org/10.21437/interspeech.2025-2190</a>","bibtex":"@inproceedings{Kuhlmann_Seebauer_Wagner_Häb-Umbach_2025, title={Towards Frame-level Quality Predictions of Synthetic Speech}, DOI={<a href=\"https://doi.org/10.21437/interspeech.2025-2190\">10.21437/interspeech.2025-2190</a>}, booktitle={Interspeech 2025}, publisher={ISCA}, author={Kuhlmann, Michael and Seebauer, Fritz and Wagner, Petra and Häb-Umbach, Reinhold}, year={2025} }","short":"M. Kuhlmann, F. Seebauer, P. Wagner, R. Häb-Umbach, in: Interspeech 2025, ISCA, 2025.","mla":"Kuhlmann, Michael, et al. “Towards Frame-Level Quality Predictions of Synthetic Speech.” <i>Interspeech 2025</i>, ISCA, 2025, doi:<a href=\"https://doi.org/10.21437/interspeech.2025-2190\">10.21437/interspeech.2025-2190</a>.","ieee":"M. Kuhlmann, F. Seebauer, P. Wagner, and R. Häb-Umbach, “Towards Frame-level Quality Predictions of Synthetic Speech,” 2025, doi: <a href=\"https://doi.org/10.21437/interspeech.2025-2190\">10.21437/interspeech.2025-2190</a>.","chicago":"Kuhlmann, Michael, Fritz Seebauer, Petra Wagner, and Reinhold Häb-Umbach. “Towards Frame-Level Quality Predictions of Synthetic Speech.” In <i>Interspeech 2025</i>. ISCA, 2025. <a href=\"https://doi.org/10.21437/interspeech.2025-2190\">https://doi.org/10.21437/interspeech.2025-2190</a>.","ama":"Kuhlmann M, Seebauer F, Wagner P, Häb-Umbach R. Towards Frame-level Quality Predictions of Synthetic Speech. In: <i>Interspeech 2025</i>. ISCA; 2025. doi:<a href=\"https://doi.org/10.21437/interspeech.2025-2190\">10.21437/interspeech.2025-2190</a>"},"year":"2025","publication_status":"published","language":[{"iso":"eng"}],"department":[{"_id":"54"}],"user_id":"49871","_id":"62164","status":"public","publication":"Interspeech 2025","type":"conference"},{"type":"conference","publication":"Proceedings of the 16th ITG Conference on Speech Communication","abstract":[{"lang":"eng","text":"Zero-shot classifiers based on Contrastive Language-Audio Pretraining (CLAP) models enable classification of given audio into classes defined at test time using text. These models are costly to run with respect to computation and memory requirements. In this work, we propose to build a specialized low-resource classifier for classes pre-defined using text, using a two-stage procedure consisting of zero-shot data set pruning and model compression. First, relevant in-domain data is selected from a source dataset using class label embeddings obtained from a pre-trained CLAP model. This data is then used to distill the audio encoder of a CLAP model. The proposed compression method produces compact audio encoders with slightly reduced accuracy. Note that neither labeled nor unlabeled in-domain audio data is required for its development. We verify by cross-dataset tests that the resulting classifiers are indeed specialized to their task."}],"editor":[{"first_name":"Sebastian","last_name":"Möller","full_name":"Möller, Sebastian"},{"last_name":"Gerkmann","full_name":"Gerkmann, Timo","first_name":"Timo"},{"full_name":"Kolossa, Dorothea","last_name":"Kolossa","first_name":"Dorothea"}],"status":"public","project":[{"name":"WestAI - AI Service Center West","_id":"512"}],"_id":"62163","user_id":"62152","department":[{"_id":"54"}],"language":[{"iso":"eng"}],"publication_status":"published","quality_controlled":"1","publication_identifier":{"unknown":["978-3-8007-6617-8"]},"year":"2025","place":"Berlin","citation":{"bibtex":"@inproceedings{Werning_Häb-Umbach_2025, place={Berlin}, title={A Fully Zero-Shot Approach to Obtaining Specialized and Compact Audio Tagging Models}, booktitle={Proceedings of the 16th ITG Conference on Speech Communication}, author={Werning, Alexander and Häb-Umbach, Reinhold}, editor={Möller, Sebastian and Gerkmann, Timo and Kolossa, Dorothea}, year={2025}, pages={76–80} }","short":"A. Werning, R. Häb-Umbach, in: S. Möller, T. Gerkmann, D. Kolossa (Eds.), Proceedings of the 16th ITG Conference on Speech Communication, Berlin, 2025, pp. 76–80.","mla":"Werning, Alexander, and Reinhold Häb-Umbach. “A Fully Zero-Shot Approach to Obtaining Specialized and Compact Audio Tagging Models.” <i>Proceedings of the 16th ITG Conference on Speech Communication</i>, edited by Sebastian Möller et al., 2025, pp. 76–80.","apa":"Werning, A., &#38; Häb-Umbach, R. (2025). A Fully Zero-Shot Approach to Obtaining Specialized and Compact Audio Tagging Models. In S. Möller, T. Gerkmann, &#38; D. Kolossa (Eds.), <i>Proceedings of the 16th ITG Conference on Speech Communication</i> (pp. 76–80).","ama":"Werning A, Häb-Umbach R. A Fully Zero-Shot Approach to Obtaining Specialized and Compact Audio Tagging Models. In: Möller S, Gerkmann T, Kolossa D, eds. <i>Proceedings of the 16th ITG Conference on Speech Communication</i>. ; 2025:76-80.","chicago":"Werning, Alexander, and Reinhold Häb-Umbach. “A Fully Zero-Shot Approach to Obtaining Specialized and Compact Audio Tagging Models.” In <i>Proceedings of the 16th ITG Conference on Speech Communication</i>, edited by Sebastian Möller, Timo Gerkmann, and Dorothea Kolossa, 76–80. Berlin, 2025.","ieee":"A. Werning and R. Häb-Umbach, “A Fully Zero-Shot Approach to Obtaining Specialized and Compact Audio Tagging Models,” in <i>Proceedings of the 16th ITG Conference on Speech Communication</i>, Berlin, 2025, pp. 76–80."},"page":"76-80","date_updated":"2025-11-28T13:20:17Z","date_created":"2025-11-11T11:46:42Z","author":[{"id":"62152","full_name":"Werning, Alexander","last_name":"Werning","first_name":"Alexander"},{"last_name":"Häb-Umbach","id":"242","full_name":"Häb-Umbach, Reinhold","first_name":"Reinhold"}],"title":"A Fully Zero-Shot Approach to Obtaining Specialized and Compact Audio Tagging Models","conference":{"start_date":"2025-09-24","name":"16th ITG Conference on Speech Communication","location":"Berlin","end_date":"2025-09-26"}},{"publication":"Proceedings of DAS|DAGA 2025","abstract":[{"text":"Running state-of-the-art large-scale audio models on edge devices is often infeasible due to their limited storage and computing resources. It is therefore necessary to compress and tune the models for the specific target task and hardware. This is commonly achieved by distilling the audio model, the teacher, to a small target model, the student. However, this approach can be improved by prepending a dataset pruning stage and training the teacher on the pruned data set only, which contains examples relevant to the target task. Recently, CLAP models have emerged that embed audio and text examples in a common embedding space. We use the audio embeddings of the CLAP model for the above pruning stage, which is realized using a domain classifier. After knowledge distillation, the student is eventually fine-tuned on some data from the target domain. The CLAP architecture combines text and audio embedding spaces, which allows to search for data given only a textual description, such as a class label. We show how this can help data pruning.","lang":"eng"}],"ddc":["004"],"language":[{"iso":"eng"}],"year":"2025","corporate_editor":["Deutsche Gesellschaft für Akustik e.V. (DEGA), Berlin, 2025"],"date_created":"2025-05-14T13:18:10Z","title":"Distilling Efficient Audio Models using Data Pruning with CLAP","type":"conference","status":"public","project":[{"_id":"512","name":"WestAI - AI Service Center West"}],"_id":"59900","user_id":"62152","department":[{"_id":"54"}],"publication_status":"published","has_accepted_license":"1","publication_identifier":{"unknown":["978-3-939296-23-2"]},"place":"Copenhagen","citation":{"ieee":"A. Werning and R. Häb-Umbach, “Distilling Efficient Audio Models using Data Pruning with CLAP,” in <i>Proceedings of DAS|DAGA 2025</i>, Copenhagen, 2025.","chicago":"Werning, Alexander, and Reinhold Häb-Umbach. “Distilling Efficient Audio Models Using Data Pruning with CLAP.” In <i>Proceedings of DAS|DAGA 2025</i>, edited by Deutsche Gesellschaft für Akustik e.V. (DEGA), Berlin, 2025. Copenhagen, 2025.","ama":"Werning A, Häb-Umbach R. Distilling Efficient Audio Models using Data Pruning with CLAP. In: Deutsche Gesellschaft für Akustik e.V. (DEGA), Berlin, 2025, ed. <i>Proceedings of DAS|DAGA 2025</i>. ; 2025.","apa":"Werning, A., &#38; Häb-Umbach, R. (2025). Distilling Efficient Audio Models using Data Pruning with CLAP. In Deutsche Gesellschaft für Akustik e.V. (DEGA), Berlin, 2025 (Ed.), <i>Proceedings of DAS|DAGA 2025</i>.","bibtex":"@inproceedings{Werning_Häb-Umbach_2025, place={Copenhagen}, title={Distilling Efficient Audio Models using Data Pruning with CLAP}, booktitle={Proceedings of DAS|DAGA 2025}, author={Werning, Alexander and Häb-Umbach, Reinhold}, editor={Deutsche Gesellschaft für Akustik e.V. (DEGA), Berlin, 2025}, year={2025} }","short":"A. Werning, R. Häb-Umbach, in: Deutsche Gesellschaft für Akustik e.V. (DEGA), Berlin, 2025 (Ed.), Proceedings of DAS|DAGA 2025, Copenhagen, 2025.","mla":"Werning, Alexander, and Reinhold Häb-Umbach. “Distilling Efficient Audio Models Using Data Pruning with CLAP.” <i>Proceedings of DAS|DAGA 2025</i>, edited by Deutsche Gesellschaft für Akustik e.V. (DEGA), Berlin, 2025, 2025."},"date_updated":"2025-11-28T13:21:13Z","author":[{"first_name":"Alexander","id":"62152","full_name":"Werning, Alexander","last_name":"Werning"},{"last_name":"Häb-Umbach","full_name":"Häb-Umbach, Reinhold","id":"242","first_name":"Reinhold"}],"conference":{"location":"Copenhagen","end_date":"2025-03-20","start_date":"2025-03-17","name":"DAS|DAGA 2025 - 51st Annual Meeting on Acoustics"}},{"title":"On the Application of Diffusion Models for Simultaneous Denoising and Dereverberation","conference":{"name":"ITG Conference on Speech Communication","location":"Berlin"},"date_updated":"2026-01-05T09:05:14Z","author":[{"first_name":"Adrian Tobias","last_name":"Meise","id":"79268","full_name":"Meise, Adrian Tobias"},{"first_name":"Tobias","full_name":"Cord-Landwehr, Tobias","id":"44393","last_name":"Cord-Landwehr"},{"id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach","first_name":"Reinhold"}],"date_created":"2025-11-13T07:21:51Z","year":"2025","citation":{"bibtex":"@inproceedings{Meise_Cord-Landwehr_Haeb-Umbach_2025, title={On the Application of Diffusion Models for Simultaneous Denoising and Dereverberation}, booktitle={ ITG Conference on Speech Communication}, author={Meise, Adrian Tobias and Cord-Landwehr, Tobias and Haeb-Umbach, Reinhold}, year={2025} }","mla":"Meise, Adrian Tobias, et al. “On the Application of Diffusion Models for Simultaneous Denoising and Dereverberation.” <i> ITG Conference on Speech Communication</i>, 2025.","short":"A.T. Meise, T. Cord-Landwehr, R. Haeb-Umbach, in:  ITG Conference on Speech Communication, 2025.","apa":"Meise, A. T., Cord-Landwehr, T., &#38; Haeb-Umbach, R. (2025). On the Application of Diffusion Models for Simultaneous Denoising and Dereverberation. <i> ITG Conference on Speech Communication</i>. ITG Conference on Speech Communication, Berlin.","ama":"Meise AT, Cord-Landwehr T, Haeb-Umbach R. On the Application of Diffusion Models for Simultaneous Denoising and Dereverberation. In: <i> ITG Conference on Speech Communication</i>. ; 2025.","ieee":"A. T. Meise, T. Cord-Landwehr, and R. Haeb-Umbach, “On the Application of Diffusion Models for Simultaneous Denoising and Dereverberation,” presented at the ITG Conference on Speech Communication, Berlin, 2025.","chicago":"Meise, Adrian Tobias, Tobias Cord-Landwehr, and Reinhold Haeb-Umbach. “On the Application of Diffusion Models for Simultaneous Denoising and Dereverberation.” In <i> ITG Conference on Speech Communication</i>, 2025."},"publication_identifier":{"isbn":["978-3-8007-6617-8"]},"language":[{"iso":"eng"}],"_id":"62174","user_id":"44393","department":[{"_id":"54"}],"status":"public","type":"conference","publication":" ITG Conference on Speech Communication"},{"publication":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","file":[{"relation":"main_file","content_type":"application/pdf","file_size":3432879,"file_name":"main.pdf","file_id":"59602","access_level":"open_access","date_updated":"2025-04-16T10:21:45Z","date_created":"2025-04-16T10:14:47Z","creator":"cbj"},{"date_updated":"2025-04-16T10:21:45Z","date_created":"2025-04-16T10:15:08Z","creator":"cbj","file_size":2838635,"access_level":"open_access","file_name":"slides.pdf","file_id":"59603","content_type":"application/pdf","relation":"main_file"},{"file_size":2038741,"file_name":"poster.pdf","access_level":"open_access","file_id":"59604","date_updated":"2025-04-16T10:21:45Z","date_created":"2025-04-16T10:15:22Z","creator":"cbj","relation":"main_file","content_type":"application/pdf"}],"language":[{"iso":"eng"}],"ddc":["000"],"keyword":["Electrical and Electronic Engineering","Acoustics and Ultrasonics","Computer Science (miscellaneous)","Computational Mathematics"],"year":"2024","date_created":"2024-03-26T16:11:54Z","publisher":"Institute of Electrical and Electronics Engineers (IEEE)","title":"TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings","type":"journal_article","status":"public","user_id":"40767","department":[{"_id":"54"}],"project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"},{"name":"Automatische Transkription von Gesprächssituationen","_id":"508","grant_number":"448568305"}],"_id":"52958","file_date_updated":"2025-04-16T10:21:45Z","publication_status":"published","publication_identifier":{"issn":["2329-9290","2329-9304"]},"has_accepted_license":"1","citation":{"apa":"Boeddeker, C., Subramanian, A. S., Wichern, G., Haeb-Umbach, R., &#38; Le Roux, J. (2024). TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings. <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i>, <i>32</i>, 1185–1197. <a href=\"https://doi.org/10.1109/taslp.2024.3350887\">https://doi.org/10.1109/taslp.2024.3350887</a>","mla":"Boeddeker, Christoph, et al. “TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings.” <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i>, vol. 32, Institute of Electrical and Electronics Engineers (IEEE), 2024, pp. 1185–97, doi:<a href=\"https://doi.org/10.1109/taslp.2024.3350887\">10.1109/taslp.2024.3350887</a>.","bibtex":"@article{Boeddeker_Subramanian_Wichern_Haeb-Umbach_Le Roux_2024, title={TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings}, volume={32}, DOI={<a href=\"https://doi.org/10.1109/taslp.2024.3350887\">10.1109/taslp.2024.3350887</a>}, journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Boeddeker, Christoph and Subramanian, Aswin Shanmugam and Wichern, Gordon and Haeb-Umbach, Reinhold and Le Roux, Jonathan}, year={2024}, pages={1185–1197} }","short":"C. Boeddeker, A.S. Subramanian, G. Wichern, R. Haeb-Umbach, J. Le Roux, IEEE/ACM Transactions on Audio, Speech, and Language Processing 32 (2024) 1185–1197.","chicago":"Boeddeker, Christoph, Aswin Shanmugam Subramanian, Gordon Wichern, Reinhold Haeb-Umbach, and Jonathan Le Roux. “TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings.” <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i> 32 (2024): 1185–97. <a href=\"https://doi.org/10.1109/taslp.2024.3350887\">https://doi.org/10.1109/taslp.2024.3350887</a>.","ieee":"C. Boeddeker, A. S. Subramanian, G. Wichern, R. Haeb-Umbach, and J. Le Roux, “TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings,” <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i>, vol. 32, pp. 1185–1197, 2024, doi: <a href=\"https://doi.org/10.1109/taslp.2024.3350887\">10.1109/taslp.2024.3350887</a>.","ama":"Boeddeker C, Subramanian AS, Wichern G, Haeb-Umbach R, Le Roux J. TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings. <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i>. 2024;32:1185-1197. doi:<a href=\"https://doi.org/10.1109/taslp.2024.3350887\">10.1109/taslp.2024.3350887</a>"},"intvolume":"        32","page":"1185-1197","author":[{"first_name":"Christoph","id":"40767","full_name":"Boeddeker, Christoph","last_name":"Boeddeker"},{"last_name":"Subramanian","full_name":"Subramanian, Aswin Shanmugam","first_name":"Aswin Shanmugam"},{"last_name":"Wichern","full_name":"Wichern, Gordon","first_name":"Gordon"},{"full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach","first_name":"Reinhold"},{"last_name":"Le Roux","full_name":"Le Roux, Jonathan","first_name":"Jonathan"}],"volume":32,"date_updated":"2025-04-16T10:21:45Z","oa":"1","main_file_link":[{"url":"https://arxiv.org/abs/2303.03849","open_access":"1"}],"doi":"10.1109/taslp.2024.3350887"},{"year":"2024","citation":{"mla":"Werning, Alexander, and Reinhold Haeb-Umbach. <i>UPB-NT Submission to DCASE24: Dataset Pruning for Targeted Knowledge Distillation</i>. 2024.","short":"A. Werning, R. Haeb-Umbach, UPB-NT Submission to DCASE24: Dataset Pruning for Targeted Knowledge Distillation, 2024.","bibtex":"@book{Werning_Haeb-Umbach_2024, title={UPB-NT submission to DCASE24: Dataset pruning for targeted knowledge distillation}, author={Werning, Alexander and Haeb-Umbach, Reinhold}, year={2024} }","apa":"Werning, A., &#38; Haeb-Umbach, R. (2024). <i>UPB-NT submission to DCASE24: Dataset pruning for targeted knowledge distillation</i>.","ama":"Werning A, Haeb-Umbach R. <i>UPB-NT Submission to DCASE24: Dataset Pruning for Targeted Knowledge Distillation</i>.; 2024.","ieee":"A. Werning and R. Haeb-Umbach, <i>UPB-NT submission to DCASE24: Dataset pruning for targeted knowledge distillation</i>. 2024.","chicago":"Werning, Alexander, and Reinhold Haeb-Umbach. <i>UPB-NT Submission to DCASE24: Dataset Pruning for Targeted Knowledge Distillation</i>, 2024."},"title":"UPB-NT submission to DCASE24: Dataset pruning for targeted knowledge distillation","date_updated":"2024-11-18T09:45:14Z","author":[{"last_name":"Werning","full_name":"Werning, Alexander","id":"62152","first_name":"Alexander"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242"}],"date_created":"2024-11-18T09:44:46Z","status":"public","type":"report","language":[{"iso":"eng"}],"_id":"57161","project":[{"_id":"512","name":"WestAI - AI Service Center West"}],"department":[{"_id":"54"}],"user_id":"62152"},{"title":"Speaker and Style Disentanglement of Speech Based on Contrastive Predictive Coding Supported Factorized Variational Autoencoder","date_updated":"2024-11-15T06:54:40Z","date_created":"2024-11-15T06:52:54Z","author":[{"last_name":"Xie","full_name":"Xie, Yuying","first_name":"Yuying"},{"full_name":"Kuhlmann, Michael","id":"49871","last_name":"Kuhlmann","first_name":"Michael"},{"full_name":"Rautenberg, Frederik","id":"72602","last_name":"Rautenberg","first_name":"Frederik"},{"first_name":"Zheng-Hua","last_name":"Tan","full_name":"Tan, Zheng-Hua"},{"full_name":"Häb-Umbach, Reinhold","id":"242","last_name":"Häb-Umbach","first_name":"Reinhold"}],"year":"2024","citation":{"mla":"Xie, Yuying, et al. “Speaker and Style Disentanglement of Speech Based on Contrastive Predictive Coding Supported Factorized Variational Autoencoder.” <i>2024 32nd European Signal Processing Conference (EUSIPCO)</i>, 2024, pp. 436–440.","bibtex":"@inproceedings{Xie_Kuhlmann_Rautenberg_Tan_Häb-Umbach_2024, title={Speaker and Style Disentanglement of Speech Based on Contrastive Predictive Coding Supported Factorized Variational Autoencoder}, booktitle={2024 32nd European Signal Processing Conference (EUSIPCO)}, author={Xie, Yuying and Kuhlmann, Michael and Rautenberg, Frederik and Tan, Zheng-Hua and Häb-Umbach, Reinhold}, year={2024}, pages={436–440} }","short":"Y. Xie, M. Kuhlmann, F. Rautenberg, Z.-H. Tan, R. Häb-Umbach, in: 2024 32nd European Signal Processing Conference (EUSIPCO), 2024, pp. 436–440.","apa":"Xie, Y., Kuhlmann, M., Rautenberg, F., Tan, Z.-H., &#38; Häb-Umbach, R. (2024). Speaker and Style Disentanglement of Speech Based on Contrastive Predictive Coding Supported Factorized Variational Autoencoder. <i>2024 32nd European Signal Processing Conference (EUSIPCO)</i>, 436–440.","ama":"Xie Y, Kuhlmann M, Rautenberg F, Tan Z-H, Häb-Umbach R. Speaker and Style Disentanglement of Speech Based on Contrastive Predictive Coding Supported Factorized Variational Autoencoder. In: <i>2024 32nd European Signal Processing Conference (EUSIPCO)</i>. ; 2024:436–440.","ieee":"Y. Xie, M. Kuhlmann, F. Rautenberg, Z.-H. Tan, and R. Häb-Umbach, “Speaker and Style Disentanglement of Speech Based on Contrastive Predictive Coding Supported Factorized Variational Autoencoder,” in <i>2024 32nd European Signal Processing Conference (EUSIPCO)</i>, 2024, pp. 436–440.","chicago":"Xie, Yuying, Michael Kuhlmann, Frederik Rautenberg, Zheng-Hua Tan, and Reinhold Häb-Umbach. “Speaker and Style Disentanglement of Speech Based on Contrastive Predictive Coding Supported Factorized Variational Autoencoder.” In <i>2024 32nd European Signal Processing Conference (EUSIPCO)</i>, 436–440, 2024."},"page":"436–440","language":[{"iso":"eng"}],"_id":"57099","user_id":"49871","department":[{"_id":"54"}],"status":"public","type":"conference","publication":"2024 32nd European Signal Processing Conference (EUSIPCO)"},{"status":"public","file":[{"date_updated":"2024-09-04T07:34:30Z","date_created":"2024-09-04T07:34:30Z","creator":"tvn","file_size":150432,"access_level":"open_access","file_id":"56005","file_name":"main.pdf","content_type":"application/pdf","relation":"main_file"}],"publication":"2024 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)","type":"conference","ddc":["000"],"file_date_updated":"2024-09-04T07:34:30Z","language":[{"iso":"eng"}],"_id":"56004","project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"},{"grant_number":"448568305","_id":"508","name":"Automatische Transkription von Gesprächssituationen"}],"department":[{"_id":"54"}],"user_id":"40767","year":"2024","citation":{"ieee":"T. von Neumann, C. Boeddeker, T. Cord-Landwehr, M. Delcroix, and R. Haeb-Umbach, “Meeting Recognition with Continuous Speech Separation and Transcription-Supported Diarization,” 2024, doi: <a href=\"https://doi.org/10.1109/icasspw62465.2024.10625894\">10.1109/icasspw62465.2024.10625894</a>.","chicago":"Neumann, Thilo von, Christoph Boeddeker, Tobias Cord-Landwehr, Marc Delcroix, and Reinhold Haeb-Umbach. “Meeting Recognition with Continuous Speech Separation and Transcription-Supported Diarization.” In <i>2024 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)</i>. IEEE, 2024. <a href=\"https://doi.org/10.1109/icasspw62465.2024.10625894\">https://doi.org/10.1109/icasspw62465.2024.10625894</a>.","ama":"von Neumann T, Boeddeker C, Cord-Landwehr T, Delcroix M, Haeb-Umbach R. Meeting Recognition with Continuous Speech Separation and Transcription-Supported Diarization. In: <i>2024 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)</i>. IEEE; 2024. doi:<a href=\"https://doi.org/10.1109/icasspw62465.2024.10625894\">10.1109/icasspw62465.2024.10625894</a>","short":"T. von Neumann, C. Boeddeker, T. Cord-Landwehr, M. Delcroix, R. Haeb-Umbach, in: 2024 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW), IEEE, 2024.","mla":"von Neumann, Thilo, et al. “Meeting Recognition with Continuous Speech Separation and Transcription-Supported Diarization.” <i>2024 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)</i>, IEEE, 2024, doi:<a href=\"https://doi.org/10.1109/icasspw62465.2024.10625894\">10.1109/icasspw62465.2024.10625894</a>.","bibtex":"@inproceedings{von Neumann_Boeddeker_Cord-Landwehr_Delcroix_Haeb-Umbach_2024, title={Meeting Recognition with Continuous Speech Separation and Transcription-Supported Diarization}, DOI={<a href=\"https://doi.org/10.1109/icasspw62465.2024.10625894\">10.1109/icasspw62465.2024.10625894</a>}, booktitle={2024 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)}, publisher={IEEE}, author={von Neumann, Thilo and Boeddeker, Christoph and Cord-Landwehr, Tobias and Delcroix, Marc and Haeb-Umbach, Reinhold}, year={2024} }","apa":"von Neumann, T., Boeddeker, C., Cord-Landwehr, T., Delcroix, M., &#38; Haeb-Umbach, R. (2024). Meeting Recognition with Continuous Speech Separation and Transcription-Supported Diarization. <i>2024 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)</i>. <a href=\"https://doi.org/10.1109/icasspw62465.2024.10625894\">https://doi.org/10.1109/icasspw62465.2024.10625894</a>"},"has_accepted_license":"1","publication_status":"published","title":"Meeting Recognition with Continuous Speech Separation and Transcription-Supported Diarization","doi":"10.1109/icasspw62465.2024.10625894","date_updated":"2025-02-12T09:20:07Z","publisher":"IEEE","oa":"1","date_created":"2024-09-04T07:26:02Z","author":[{"first_name":"Thilo","full_name":"von Neumann, Thilo","id":"49870","orcid":"https://orcid.org/0000-0002-7717-8670","last_name":"von Neumann"},{"id":"40767","full_name":"Boeddeker, Christoph","last_name":"Boeddeker","first_name":"Christoph"},{"first_name":"Tobias","full_name":"Cord-Landwehr, Tobias","id":"44393","last_name":"Cord-Landwehr"},{"full_name":"Delcroix, Marc","last_name":"Delcroix","first_name":"Marc"},{"last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold","first_name":"Reinhold"}]},{"language":[{"iso":"eng"}],"user_id":"40767","department":[{"_id":"54"}],"project":[{"_id":"52","name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing"},{"grant_number":"448568305","name":"Automatische Transkription von Gesprächssituationen","_id":"508"}],"_id":"56272","status":"public","type":"conference","publication":"Interspeech 2024","main_file_link":[{"url":"https://www.isca-archive.org/interspeech_2024/boeddeker24_interspeech.pdf","open_access":"1"}],"doi":"10.21437/interspeech.2024-1286","title":"Once more Diarization: Improving meeting transcription systems through segment-level speaker reassignment","author":[{"last_name":"Boeddeker","id":"40767","full_name":"Boeddeker, Christoph","first_name":"Christoph"},{"last_name":"Cord-Landwehr","full_name":"Cord-Landwehr, Tobias","id":"44393","first_name":"Tobias"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242"}],"date_created":"2024-09-30T08:04:47Z","date_updated":"2025-02-12T09:18:36Z","oa":"1","publisher":"ISCA","citation":{"chicago":"Boeddeker, Christoph, Tobias Cord-Landwehr, and Reinhold Haeb-Umbach. “Once More Diarization: Improving Meeting Transcription Systems through Segment-Level Speaker Reassignment.” In <i>Interspeech 2024</i>. ISCA, 2024. <a href=\"https://doi.org/10.21437/interspeech.2024-1286\">https://doi.org/10.21437/interspeech.2024-1286</a>.","ieee":"C. Boeddeker, T. Cord-Landwehr, and R. Haeb-Umbach, “Once more Diarization: Improving meeting transcription systems through segment-level speaker reassignment,” 2024, doi: <a href=\"https://doi.org/10.21437/interspeech.2024-1286\">10.21437/interspeech.2024-1286</a>.","ama":"Boeddeker C, Cord-Landwehr T, Haeb-Umbach R. Once more Diarization: Improving meeting transcription systems through segment-level speaker reassignment. In: <i>Interspeech 2024</i>. ISCA; 2024. doi:<a href=\"https://doi.org/10.21437/interspeech.2024-1286\">10.21437/interspeech.2024-1286</a>","apa":"Boeddeker, C., Cord-Landwehr, T., &#38; Haeb-Umbach, R. (2024). Once more Diarization: Improving meeting transcription systems through segment-level speaker reassignment. <i>Interspeech 2024</i>. <a href=\"https://doi.org/10.21437/interspeech.2024-1286\">https://doi.org/10.21437/interspeech.2024-1286</a>","bibtex":"@inproceedings{Boeddeker_Cord-Landwehr_Haeb-Umbach_2024, title={Once more Diarization: Improving meeting transcription systems through segment-level speaker reassignment}, DOI={<a href=\"https://doi.org/10.21437/interspeech.2024-1286\">10.21437/interspeech.2024-1286</a>}, booktitle={Interspeech 2024}, publisher={ISCA}, author={Boeddeker, Christoph and Cord-Landwehr, Tobias and Haeb-Umbach, Reinhold}, year={2024} }","mla":"Boeddeker, Christoph, et al. “Once More Diarization: Improving Meeting Transcription Systems through Segment-Level Speaker Reassignment.” <i>Interspeech 2024</i>, ISCA, 2024, doi:<a href=\"https://doi.org/10.21437/interspeech.2024-1286\">10.21437/interspeech.2024-1286</a>.","short":"C. Boeddeker, T. Cord-Landwehr, R. Haeb-Umbach, in: Interspeech 2024, ISCA, 2024."},"year":"2024","publication_status":"published"},{"language":[{"iso":"eng"}],"_id":"57659","project":[{"_id":"52","name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing"},{"grant_number":"448568305","_id":"508","name":"Automatische Transkription von Gesprächssituationen"}],"department":[{"_id":"54"}],"user_id":"40767","status":"public","publication":"2024 IEEE Spoken Language Technology Workshop (SLT)","type":"conference","title":"Combining TF-GridNet and Mixture Encoder for Continuous Speech Separation for Meeting Transcription","main_file_link":[{"url":"https://www-i6.informatik.rwth-aachen.de/publications/download/1259/VietingPeterBergerSimonNeumannThilovonBoeddekerChristophSchl%FCterRalfHaeb-UmbachReinhold--CombiningTF-GridNetMixtureEncoderforContinuousSpeechSeparationforMeetingTranscription--2024.pdf","open_access":"1"}],"oa":"1","date_updated":"2025-02-12T09:20:59Z","author":[{"full_name":"Vieting, Peter","last_name":"Vieting","first_name":"Peter"},{"first_name":"Simon","last_name":"Berger","full_name":"Berger, Simon"},{"orcid":"https://orcid.org/0000-0002-7717-8670","last_name":"von Neumann","full_name":"von Neumann, Thilo","id":"49870","first_name":"Thilo"},{"last_name":"Boeddeker","full_name":"Boeddeker, Christoph","id":"40767","first_name":"Christoph"},{"first_name":"Ralf","full_name":"Schlüter, Ralf","last_name":"Schlüter"},{"last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242","first_name":"Reinhold"}],"date_created":"2024-12-09T11:46:18Z","year":"2024","citation":{"mla":"Vieting, Peter, et al. “Combining TF-GridNet and Mixture Encoder for Continuous Speech Separation for Meeting Transcription.” <i>2024 IEEE Spoken Language Technology Workshop (SLT)</i>, 2024.","bibtex":"@inproceedings{Vieting_Berger_von Neumann_Boeddeker_Schlüter_Haeb-Umbach_2024, title={Combining TF-GridNet and Mixture Encoder for Continuous Speech Separation for Meeting Transcription}, booktitle={2024 IEEE Spoken Language Technology Workshop (SLT)}, author={Vieting, Peter and Berger, Simon and von Neumann, Thilo and Boeddeker, Christoph and Schlüter, Ralf and Haeb-Umbach, Reinhold}, year={2024} }","short":"P. Vieting, S. Berger, T. von Neumann, C. Boeddeker, R. Schlüter, R. Haeb-Umbach, in: 2024 IEEE Spoken Language Technology Workshop (SLT), 2024.","apa":"Vieting, P., Berger, S., von Neumann, T., Boeddeker, C., Schlüter, R., &#38; Haeb-Umbach, R. (2024). Combining TF-GridNet and Mixture Encoder for Continuous Speech Separation for Meeting Transcription. <i>2024 IEEE Spoken Language Technology Workshop (SLT)</i>.","ama":"Vieting P, Berger S, von Neumann T, Boeddeker C, Schlüter R, Haeb-Umbach R. Combining TF-GridNet and Mixture Encoder for Continuous Speech Separation for Meeting Transcription. In: <i>2024 IEEE Spoken Language Technology Workshop (SLT)</i>. ; 2024.","chicago":"Vieting, Peter, Simon Berger, Thilo von Neumann, Christoph Boeddeker, Ralf Schlüter, and Reinhold Haeb-Umbach. “Combining TF-GridNet and Mixture Encoder for Continuous Speech Separation for Meeting Transcription.” In <i>2024 IEEE Spoken Language Technology Workshop (SLT)</i>, 2024.","ieee":"P. Vieting, S. Berger, T. von Neumann, C. Boeddeker, R. Schlüter, and R. Haeb-Umbach, “Combining TF-GridNet and Mixture Encoder for Continuous Speech Separation for Meeting Transcription,” 2024."}},{"type":"conference","status":"public","user_id":"44393","department":[{"_id":"54"}],"project":[{"_id":"52","name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing"},{"_id":"508","name":"Automatische Transkription von Gesprächssituationen"}],"_id":"57085","file_date_updated":"2025-08-14T08:11:57Z","has_accepted_license":"1","citation":{"chicago":"Cord-Landwehr, Tobias, Christoph Boeddeker, and Reinhold Haeb-Umbach. “Simultaneous Diarization and Separation of Meetings through the Integration of Statistical Mixture Models.” In <i>ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2024. <a href=\"https://doi.org/10.1109/ICASSP49660.2025.10888445\">https://doi.org/10.1109/ICASSP49660.2025.10888445</a>.","ieee":"T. Cord-Landwehr, C. Boeddeker, and R. Haeb-Umbach, “Simultaneous Diarization and Separation of Meetings through the Integration of Statistical Mixture Models,” presented at the 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Hyderabad, India, 2024, doi: <a href=\"https://doi.org/10.1109/ICASSP49660.2025.10888445\">10.1109/ICASSP49660.2025.10888445</a>.","ama":"Cord-Landwehr T, Boeddeker C, Haeb-Umbach R. Simultaneous Diarization and Separation of Meetings through the Integration of Statistical Mixture Models. In: <i>ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. ; 2024. doi:<a href=\"https://doi.org/10.1109/ICASSP49660.2025.10888445\">10.1109/ICASSP49660.2025.10888445</a>","mla":"Cord-Landwehr, Tobias, et al. “Simultaneous Diarization and Separation of Meetings through the Integration of Statistical Mixture Models.” <i>ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2024, doi:<a href=\"https://doi.org/10.1109/ICASSP49660.2025.10888445\">10.1109/ICASSP49660.2025.10888445</a>.","bibtex":"@inproceedings{Cord-Landwehr_Boeddeker_Haeb-Umbach_2024, title={Simultaneous Diarization and Separation of Meetings through the Integration of Statistical Mixture Models}, DOI={<a href=\"https://doi.org/10.1109/ICASSP49660.2025.10888445\">10.1109/ICASSP49660.2025.10888445</a>}, booktitle={ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, author={Cord-Landwehr, Tobias and Boeddeker, Christoph and Haeb-Umbach, Reinhold}, year={2024} }","short":"T. Cord-Landwehr, C. Boeddeker, R. Haeb-Umbach, in: ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2024.","apa":"Cord-Landwehr, T., Boeddeker, C., &#38; Haeb-Umbach, R. (2024). Simultaneous Diarization and Separation of Meetings through the Integration of Statistical Mixture Models. <i>ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Hyderabad, India. <a href=\"https://doi.org/10.1109/ICASSP49660.2025.10888445\">https://doi.org/10.1109/ICASSP49660.2025.10888445</a>"},"author":[{"first_name":"Tobias","id":"44393","full_name":"Cord-Landwehr, Tobias","last_name":"Cord-Landwehr"},{"id":"40767","full_name":"Boeddeker, Christoph","last_name":"Boeddeker","first_name":"Christoph"},{"first_name":"Reinhold","id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach"}],"oa":"1","date_updated":"2025-08-14T08:12:22Z","main_file_link":[{"url":"https://arxiv.org/pdf/2410.21455","open_access":"1"}],"conference":{"location":"Hyderabad, India","name":"2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"},"doi":"10.1109/ICASSP49660.2025.10888445","publication":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","file":[{"relation":"main_file","success":1,"content_type":"application/pdf","file_name":"main.pdf","file_id":"60930","access_level":"closed","file_size":259907,"creator":"cord","date_created":"2025-08-14T08:11:57Z","date_updated":"2025-08-14T08:11:57Z"}],"abstract":[{"lang":"eng","text":"We propose an approach for simultaneous diarization and separation of meeting data. It consists of a complex Angular Central Gaussian Mixture Model (cACGMM) for speech source separation, and a von-Mises-Fisher Mixture Model (VMFMM) for diarization in a joint statistical framework. Through the integration, both spatial and spectral information are exploited for diarization and separation. We also develop a method for counting the number of active speakers in a segment of a meeting to support block-wise processing. While the total number of speakers in a meeting may be known, it is usually not known on a per-segment level. With the proposed speaker counting, joint diarization and source separation can be done segment-by-segment, and the permutation problem across segments is solved, thus allowing for block-online processing in the future. Experimental results on the LibriCSS meeting corpus show that the integrated approach outperforms a cascaded approach of diarization and speech enhancement in terms of WER, both on a per-segment and on a per-meeting level."}],"language":[{"iso":"eng"}],"ddc":["000"],"keyword":["diarization","source separation","mixture model","meeting"],"year":"2024","date_created":"2024-11-14T09:32:38Z","title":"Simultaneous Diarization and Separation of Meetings through the Integration of Statistical Mixture Models"},{"year":"2024","title":"Geodesic Interpolation of Frame-Wise Speaker Embeddings for the Diarization of Meeting Scenarios","publisher":"IEEE","date_created":"2024-04-25T12:57:22Z","file":[{"content_type":"application/pdf","relation":"main_file","success":1,"date_created":"2025-08-14T08:09:52Z","creator":"cord","date_updated":"2025-08-14T08:09:52Z","access_level":"closed","file_id":"60929","file_name":"main.pdf","file_size":254478}],"publication":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","ddc":["000"],"language":[{"iso":"eng"}],"citation":{"apa":"Cord-Landwehr, T., Boeddeker, C., Zorilă, C., Doddipatla, R., &#38; Haeb-Umbach, R. (2024). Geodesic Interpolation of Frame-Wise Speaker Embeddings for the Diarization of Meeting Scenarios. <i>ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. 2024 IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), Seoul. <a href=\"https://doi.org/10.1109/icassp48485.2024.10445911\">https://doi.org/10.1109/icassp48485.2024.10445911</a>","mla":"Cord-Landwehr, Tobias, et al. “Geodesic Interpolation of Frame-Wise Speaker Embeddings for the Diarization of Meeting Scenarios.” <i>ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, IEEE, 2024, doi:<a href=\"https://doi.org/10.1109/icassp48485.2024.10445911\">10.1109/icassp48485.2024.10445911</a>.","short":"T. Cord-Landwehr, C. Boeddeker, C. Zorilă, R. Doddipatla, R. Haeb-Umbach, in: ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE, 2024.","bibtex":"@inproceedings{Cord-Landwehr_Boeddeker_Zorilă_Doddipatla_Haeb-Umbach_2024, title={Geodesic Interpolation of Frame-Wise Speaker Embeddings for the Diarization of Meeting Scenarios}, DOI={<a href=\"https://doi.org/10.1109/icassp48485.2024.10445911\">10.1109/icassp48485.2024.10445911</a>}, booktitle={ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, publisher={IEEE}, author={Cord-Landwehr, Tobias and Boeddeker, Christoph and Zorilă, Cătălin and Doddipatla, Rama and Haeb-Umbach, Reinhold}, year={2024} }","chicago":"Cord-Landwehr, Tobias, Christoph Boeddeker, Cătălin Zorilă, Rama Doddipatla, and Reinhold Haeb-Umbach. “Geodesic Interpolation of Frame-Wise Speaker Embeddings for the Diarization of Meeting Scenarios.” In <i>ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. IEEE, 2024. <a href=\"https://doi.org/10.1109/icassp48485.2024.10445911\">https://doi.org/10.1109/icassp48485.2024.10445911</a>.","ieee":"T. Cord-Landwehr, C. Boeddeker, C. Zorilă, R. Doddipatla, and R. Haeb-Umbach, “Geodesic Interpolation of Frame-Wise Speaker Embeddings for the Diarization of Meeting Scenarios,” presented at the 2024 IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), Seoul, 2024, doi: <a href=\"https://doi.org/10.1109/icassp48485.2024.10445911\">10.1109/icassp48485.2024.10445911</a>.","ama":"Cord-Landwehr T, Boeddeker C, Zorilă C, Doddipatla R, Haeb-Umbach R. Geodesic Interpolation of Frame-Wise Speaker Embeddings for the Diarization of Meeting Scenarios. In: <i>ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. IEEE; 2024. doi:<a href=\"https://doi.org/10.1109/icassp48485.2024.10445911\">10.1109/icassp48485.2024.10445911</a>"},"publication_status":"published","has_accepted_license":"1","doi":"10.1109/icassp48485.2024.10445911","conference":{"location":"Seoul","name":"2024 IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)"},"date_updated":"2025-08-14T08:11:07Z","author":[{"first_name":"Tobias","last_name":"Cord-Landwehr","id":"44393","full_name":"Cord-Landwehr, Tobias"},{"first_name":"Christoph","last_name":"Boeddeker","id":"40767","full_name":"Boeddeker, Christoph"},{"full_name":"Zorilă, Cătălin","last_name":"Zorilă","first_name":"Cătălin"},{"last_name":"Doddipatla","full_name":"Doddipatla, Rama","first_name":"Rama"},{"last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242","first_name":"Reinhold"}],"status":"public","type":"conference","file_date_updated":"2025-08-14T08:09:52Z","project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"},{"_id":"508","name":"Automatische Transkription von Gesprächssituationen"}],"_id":"53659","user_id":"44393","department":[{"_id":"54"}]},{"quality_controlled":"1","has_accepted_license":"1","citation":{"ama":"Werning A, Haeb-Umbach R. Target-Specific Dataset Pruning for Compression of Audio Tagging Models. In: <i>32nd European Signal Processing Conference (EUSIPCO 2024)</i>. ; 2024.","ieee":"A. Werning and R. Haeb-Umbach, “Target-Specific Dataset Pruning for Compression of Audio Tagging Models,” presented at the 32nd European Signal Processing Conference, Lyon, 2024.","chicago":"Werning, Alexander, and Reinhold Haeb-Umbach. “Target-Specific Dataset Pruning for Compression of Audio Tagging Models.” In <i>32nd European Signal Processing Conference (EUSIPCO 2024)</i>, 2024.","mla":"Werning, Alexander, and Reinhold Haeb-Umbach. “Target-Specific Dataset Pruning for Compression of Audio Tagging Models.” <i>32nd European Signal Processing Conference (EUSIPCO 2024)</i>, 2024.","short":"A. Werning, R. Haeb-Umbach, in: 32nd European Signal Processing Conference (EUSIPCO 2024), 2024.","bibtex":"@inproceedings{Werning_Haeb-Umbach_2024, title={Target-Specific Dataset Pruning for Compression of Audio Tagging Models}, booktitle={32nd European Signal Processing Conference (EUSIPCO 2024)}, author={Werning, Alexander and Haeb-Umbach, Reinhold}, year={2024} }","apa":"Werning, A., &#38; Haeb-Umbach, R. (2024). Target-Specific Dataset Pruning for Compression of Audio Tagging Models. <i>32nd European Signal Processing Conference (EUSIPCO 2024)</i>. 32nd European Signal Processing Conference, Lyon."},"year":"2024","author":[{"last_name":"Werning","id":"62152","full_name":"Werning, Alexander","first_name":"Alexander"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242"}],"date_created":"2024-11-18T09:29:16Z","date_updated":"2025-11-28T13:22:00Z","conference":{"location":"Lyon","name":"32nd European Signal Processing Conference"},"title":"Target-Specific Dataset Pruning for Compression of Audio Tagging Models","publication":"32nd European Signal Processing Conference (EUSIPCO 2024)","type":"conference","status":"public","file":[{"file_name":"Eusipco__Target_specific_Dataset_Pruning_for_Compression_of_Audio_Tagging_Models.pdf","file_id":"57200","access_level":"closed","file_size":183539,"date_created":"2024-11-18T12:10:09Z","creator":"awerning","date_updated":"2024-11-18T12:10:09Z","relation":"main_file","success":1,"content_type":"application/pdf"}],"abstract":[{"text":"Large audio tagging models are usually trained or pre-trained on AudioSet, a dataset that encompasses a large amount of different sound classes and acoustic environments. Knowledge distillation has emerged as a method to compress such models without compromising their effectiveness. There are many different applications for audio tagging, some of which require a specialization to a narrow domain of sounds to be classified. For these scenarios, it is beneficial to distill the large audio tagger with respect to a specific subset of sounds of interest. A method to prune a general dataset with respect to a target dataset is presented. By distilling with such a specialized pruned dataset, we obtain a compressed model with better classification accuracy in the specific target domain than with target-agnostic distillation.","lang":"eng"}],"department":[{"_id":"54"}],"user_id":"62152","_id":"57160","project":[{"_id":"512","name":"WestAI - AI Service Center West"}],"file_date_updated":"2024-11-18T12:10:09Z","language":[{"iso":"eng"}],"keyword":["data pruning","knowledge distillation","audio tagging"],"ddc":["000"]},{"publication_status":"published","has_accepted_license":"1","quality_controlled":"1","year":"2024","citation":{"ama":"Gburrek T, Meise AT, Schmalenstroeer J, Haeb-Umbach R. Diminishing Domain Mismatch for DNN-Based Acoustic Distance Estimation via Stochastic Room Reverberation Models. In: <i>2024 18th International Workshop on Acoustic Signal Enhancement (IWAENC)</i>. IEEE; 2024. doi:<a href=\"https://doi.org/10.1109/iwaenc61483.2024.10694103\">10.1109/iwaenc61483.2024.10694103</a>","chicago":"Gburrek, Tobias, Adrian Tobias Meise, Joerg Schmalenstroeer, and Reinhold Haeb-Umbach. “Diminishing Domain Mismatch for DNN-Based Acoustic Distance Estimation via Stochastic Room Reverberation Models.” In <i>2024 18th International Workshop on Acoustic Signal Enhancement (IWAENC)</i>. IEEE, 2024. <a href=\"https://doi.org/10.1109/iwaenc61483.2024.10694103\">https://doi.org/10.1109/iwaenc61483.2024.10694103</a>.","ieee":"T. Gburrek, A. T. Meise, J. Schmalenstroeer, and R. Haeb-Umbach, “Diminishing Domain Mismatch for DNN-Based Acoustic Distance Estimation via Stochastic Room Reverberation Models,” 2024, doi: <a href=\"https://doi.org/10.1109/iwaenc61483.2024.10694103\">10.1109/iwaenc61483.2024.10694103</a>.","apa":"Gburrek, T., Meise, A. T., Schmalenstroeer, J., &#38; Haeb-Umbach, R. (2024). Diminishing Domain Mismatch for DNN-Based Acoustic Distance Estimation via Stochastic Room Reverberation Models. <i>2024 18th International Workshop on Acoustic Signal Enhancement (IWAENC)</i>. <a href=\"https://doi.org/10.1109/iwaenc61483.2024.10694103\">https://doi.org/10.1109/iwaenc61483.2024.10694103</a>","mla":"Gburrek, Tobias, et al. “Diminishing Domain Mismatch for DNN-Based Acoustic Distance Estimation via Stochastic Room Reverberation Models.” <i>2024 18th International Workshop on Acoustic Signal Enhancement (IWAENC)</i>, IEEE, 2024, doi:<a href=\"https://doi.org/10.1109/iwaenc61483.2024.10694103\">10.1109/iwaenc61483.2024.10694103</a>.","bibtex":"@inproceedings{Gburrek_Meise_Schmalenstroeer_Haeb-Umbach_2024, title={Diminishing Domain Mismatch for DNN-Based Acoustic Distance Estimation via Stochastic Room Reverberation Models}, DOI={<a href=\"https://doi.org/10.1109/iwaenc61483.2024.10694103\">10.1109/iwaenc61483.2024.10694103</a>}, booktitle={2024 18th International Workshop on Acoustic Signal Enhancement (IWAENC)}, publisher={IEEE}, author={Gburrek, Tobias and Meise, Adrian Tobias and Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}, year={2024} }","short":"T. Gburrek, A.T. Meise, J. Schmalenstroeer, R. Haeb-Umbach, in: 2024 18th International Workshop on Acoustic Signal Enhancement (IWAENC), IEEE, 2024."},"oa":"1","date_updated":"2026-01-05T16:28:23Z","publisher":"IEEE","date_created":"2024-11-13T08:39:57Z","author":[{"id":"44006","full_name":"Gburrek, Tobias","last_name":"Gburrek","first_name":"Tobias"},{"id":"79268","full_name":"Meise, Adrian Tobias","last_name":"Meise","first_name":"Adrian Tobias"},{"full_name":"Schmalenstroeer, Joerg","id":"460","last_name":"Schmalenstroeer","first_name":"Joerg"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242"}],"title":"Diminishing Domain Mismatch for DNN-Based Acoustic Distance Estimation via Stochastic Room Reverberation Models","doi":"10.1109/iwaenc61483.2024.10694103","type":"conference","publication":"2024 18th International Workshop on Acoustic Signal Enhancement (IWAENC)","file":[{"relation":"main_file","content_type":"application/pdf","file_id":"57033","access_level":"open_access","file_name":"dist_est_iwaenc24_gburrek.pdf","file_size":300589,"creator":"tgburrek","date_created":"2024-11-13T08:42:13Z","date_updated":"2024-11-13T08:42:13Z"}],"status":"public","_id":"57031","user_id":"44006","department":[{"_id":"54"}],"ddc":["006"],"file_date_updated":"2024-11-13T08:42:13Z","language":[{"iso":"eng"}]},{"publication":"European Signal Processing Conference (EUSIPCO)","type":"conference","status":"public","_id":"48269","department":[{"_id":"54"}],"user_id":"460","language":[{"iso":"eng"}],"quality_controlled":"1","year":"2023","citation":{"short":"T. Gburrek, J. Schmalenstroeer, R. Haeb-Umbach, in: European Signal Processing Conference (EUSIPCO), 2023.","bibtex":"@inproceedings{Gburrek_Schmalenstroeer_Haeb-Umbach_2023, title={On the Integration of Sampling Rate Synchronization and Acoustic Beamforming}, booktitle={European Signal Processing Conference (EUSIPCO)}, author={Gburrek, Tobias and Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}, year={2023} }","mla":"Gburrek, Tobias, et al. “On the Integration of Sampling Rate Synchronization and Acoustic Beamforming.” <i>European Signal Processing Conference (EUSIPCO)</i>, 2023.","apa":"Gburrek, T., Schmalenstroeer, J., &#38; Haeb-Umbach, R. (2023). On the Integration of Sampling Rate Synchronization and Acoustic Beamforming. <i>European Signal Processing Conference (EUSIPCO)</i>. European Signal Processing Conference (EUSIPCO), Helsinki.","ama":"Gburrek T, Schmalenstroeer J, Haeb-Umbach R. On the Integration of Sampling Rate Synchronization and Acoustic Beamforming. In: <i>European Signal Processing Conference (EUSIPCO)</i>. ; 2023.","ieee":"T. Gburrek, J. Schmalenstroeer, and R. Haeb-Umbach, “On the Integration of Sampling Rate Synchronization and Acoustic Beamforming,” presented at the European Signal Processing Conference (EUSIPCO), Helsinki, 2023.","chicago":"Gburrek, Tobias, Joerg Schmalenstroeer, and Reinhold Haeb-Umbach. “On the Integration of Sampling Rate Synchronization and Acoustic Beamforming.” In <i>European Signal Processing Conference (EUSIPCO)</i>, 2023."},"oa":"1","date_updated":"2023-10-26T08:16:23Z","author":[{"last_name":"Gburrek","id":"44006","full_name":"Gburrek, Tobias","first_name":"Tobias"},{"id":"460","full_name":"Schmalenstroeer, Joerg","last_name":"Schmalenstroeer","first_name":"Joerg"},{"id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach","first_name":"Reinhold"}],"date_created":"2023-10-18T12:54:08Z","title":"On the Integration of Sampling Rate Synchronization and Acoustic Beamforming","conference":{"name":"European Signal Processing Conference (EUSIPCO)","location":"Helsinki"},"main_file_link":[{"open_access":"1","url":"https://eurasip.org/Proceedings/Eusipco/Eusipco2023/pdfs/0000011.pdf"}]},{"department":[{"_id":"54"}],"user_id":"460","_id":"48270","file_date_updated":"2023-11-15T14:48:44Z","language":[{"iso":"eng"}],"ddc":["004"],"publication":"ITG Conference on Speech Communication","type":"conference","status":"public","file":[{"relation":"main_file","content_type":"application/pdf","file_id":"48483","access_level":"open_access","file_name":"SchTgbHaeb2023Final.pdf","file_size":2844502,"date_created":"2023-10-26T08:20:15Z","creator":"schmalen","date_updated":"2023-11-15T14:48:44Z"}],"author":[{"full_name":"Schmalenstroeer, Joerg","id":"460","last_name":"Schmalenstroeer","first_name":"Joerg"},{"first_name":"Tobias","last_name":"Gburrek","id":"44006","full_name":"Gburrek, Tobias"},{"last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold","first_name":"Reinhold"}],"date_created":"2023-10-18T13:00:54Z","date_updated":"2023-11-15T14:48:45Z","oa":"1","conference":{"location":"Aachen","name":"ITG Conference on Speech Communication"},"title":"LibriWASN: A Data Set for Meeting Separation, Diarization, and Recognition with Asynchronous Recording Devices","has_accepted_license":"1","quality_controlled":"1","citation":{"ieee":"J. Schmalenstroeer, T. Gburrek, and R. Haeb-Umbach, “LibriWASN: A Data Set for Meeting Separation, Diarization, and Recognition with Asynchronous Recording Devices,” presented at the ITG Conference on Speech Communication, Aachen, 2023.","chicago":"Schmalenstroeer, Joerg, Tobias Gburrek, and Reinhold Haeb-Umbach. “LibriWASN: A Data Set for Meeting Separation, Diarization, and Recognition with Asynchronous Recording Devices.” In <i>ITG Conference on Speech Communication</i>, 2023.","ama":"Schmalenstroeer J, Gburrek T, Haeb-Umbach R. LibriWASN: A Data Set for Meeting Separation, Diarization, and Recognition with Asynchronous Recording Devices. In: <i>ITG Conference on Speech Communication</i>. ; 2023.","apa":"Schmalenstroeer, J., Gburrek, T., &#38; Haeb-Umbach, R. (2023). LibriWASN: A Data Set for Meeting Separation, Diarization, and Recognition with Asynchronous Recording Devices. <i>ITG Conference on Speech Communication</i>. ITG Conference on Speech Communication, Aachen.","short":"J. Schmalenstroeer, T. Gburrek, R. Haeb-Umbach, in: ITG Conference on Speech Communication, 2023.","bibtex":"@inproceedings{Schmalenstroeer_Gburrek_Haeb-Umbach_2023, title={LibriWASN: A Data Set for Meeting Separation, Diarization, and Recognition with Asynchronous Recording Devices}, booktitle={ITG Conference on Speech Communication}, author={Schmalenstroeer, Joerg and Gburrek, Tobias and Haeb-Umbach, Reinhold}, year={2023} }","mla":"Schmalenstroeer, Joerg, et al. “LibriWASN: A Data Set for Meeting Separation, Diarization, and Recognition with Asynchronous Recording Devices.” <i>ITG Conference on Speech Communication</i>, 2023."},"year":"2023"},{"title":"On Feature Importance and Interpretability of Speaker Representations","date_created":"2023-10-20T08:04:46Z","year":"2023","ddc":["000"],"language":[{"iso":"eng"}],"external_id":{"arxiv":["2310.12599"]},"abstract":[{"lang":"eng","text":"Unsupervised speech disentanglement aims at separating fast varying from\r\nslowly varying components of a speech signal. In this contribution, we take a\r\ncloser look at the embedding vector representing the slowly varying signal\r\ncomponents, commonly named the speaker embedding vector. We ask, which\r\nproperties of a speaker's voice are captured and investigate to which extent do\r\nindividual embedding vector components sign responsible for them, using the\r\nconcept of Shapley values. Our findings show that certain speaker-specific\r\nacoustic-phonetic properties can be fairly well predicted from the speaker\r\nembedding, while the investigated more abstract voice quality features cannot."}],"file":[{"success":1,"relation":"main_file","content_type":"application/pdf","file_size":272390,"file_name":"arxiv.pdf","file_id":"48359","access_level":"closed","date_updated":"2023-10-20T08:20:58Z","creator":"frra","date_created":"2023-10-20T08:20:58Z"}],"publication":"ITG Conference on Speech Communication","main_file_link":[{"url":"https://arxiv.org/abs/2310.12599","open_access":"1"}],"conference":{"location":"Aachen","end_date":"2023-09-22","start_date":"2023-09-20","name":"ITG Conference on Speech Communication"},"date_updated":"2023-11-22T13:44:33Z","oa":"1","author":[{"first_name":"Frederik","last_name":"Rautenberg","full_name":"Rautenberg, Frederik","id":"72602"},{"first_name":"Michael","last_name":"Kuhlmann","id":"49871","full_name":"Kuhlmann, Michael"},{"first_name":"Jana","full_name":"Wiechmann, Jana","last_name":"Wiechmann"},{"full_name":"Seebauer, Fritz","last_name":"Seebauer","first_name":"Fritz"},{"first_name":"Petra","last_name":"Wagner","full_name":"Wagner, Petra"},{"last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242","first_name":"Reinhold"}],"citation":{"ama":"Rautenberg F, Kuhlmann M, Wiechmann J, Seebauer F, Wagner P, Haeb-Umbach R. On Feature Importance and Interpretability of Speaker Representations. In: <i>ITG Conference on Speech Communication</i>. ; 2023.","ieee":"F. Rautenberg, M. Kuhlmann, J. Wiechmann, F. Seebauer, P. Wagner, and R. Haeb-Umbach, “On Feature Importance and Interpretability of Speaker Representations,” presented at the ITG Conference on Speech Communication, Aachen, 2023.","chicago":"Rautenberg, Frederik, Michael Kuhlmann, Jana Wiechmann, Fritz Seebauer, Petra Wagner, and Reinhold Haeb-Umbach. “On Feature Importance and Interpretability of Speaker Representations.” In <i>ITG Conference on Speech Communication</i>, 2023.","bibtex":"@inproceedings{Rautenberg_Kuhlmann_Wiechmann_Seebauer_Wagner_Haeb-Umbach_2023, title={On Feature Importance and Interpretability of Speaker Representations}, booktitle={ITG Conference on Speech Communication}, author={Rautenberg, Frederik and Kuhlmann, Michael and Wiechmann, Jana and Seebauer, Fritz and Wagner, Petra and Haeb-Umbach, Reinhold}, year={2023} }","mla":"Rautenberg, Frederik, et al. “On Feature Importance and Interpretability of Speaker Representations.” <i>ITG Conference on Speech Communication</i>, 2023.","short":"F. Rautenberg, M. Kuhlmann, J. Wiechmann, F. Seebauer, P. Wagner, R. Haeb-Umbach, in: ITG Conference on Speech Communication, 2023.","apa":"Rautenberg, F., Kuhlmann, M., Wiechmann, J., Seebauer, F., Wagner, P., &#38; Haeb-Umbach, R. (2023). On Feature Importance and Interpretability of Speaker Representations. <i>ITG Conference on Speech Communication</i>. ITG Conference on Speech Communication, Aachen."},"has_accepted_license":"1","file_date_updated":"2023-10-20T08:20:58Z","project":[{"grant_number":"438445824","name":"TRR 318 - C06: TRR 318 - Technisch unterstütztes Erklären von Stimmcharakteristika (Teilprojekt C06)","_id":"129"}],"_id":"48355","user_id":"72602","department":[{"_id":"54"},{"_id":"660"}],"status":"public","type":"conference"}]
