[{"user_id":"62152","department":[{"_id":"54"}],"project":[{"name":"WestAI - AI Service Center West","_id":"512"}],"_id":"59900","status":"public","type":"conference","conference":{"start_date":"2025-03-17","name":"DAS|DAGA 2025 - 51st Annual Meeting on Acoustics","location":"Copenhagen","end_date":"2025-03-20"},"author":[{"last_name":"Werning","full_name":"Werning, Alexander","id":"62152","first_name":"Alexander"},{"first_name":"Reinhold","id":"242","full_name":"Häb-Umbach, Reinhold","last_name":"Häb-Umbach"}],"date_updated":"2025-11-28T13:21:13Z","citation":{"short":"A. Werning, R. Häb-Umbach, in: Deutsche Gesellschaft für Akustik e.V. (DEGA), Berlin, 2025 (Ed.), Proceedings of DAS|DAGA 2025, Copenhagen, 2025.","bibtex":"@inproceedings{Werning_Häb-Umbach_2025, place={Copenhagen}, title={Distilling Efficient Audio Models using Data Pruning with CLAP}, booktitle={Proceedings of DAS|DAGA 2025}, author={Werning, Alexander and Häb-Umbach, Reinhold}, editor={Deutsche Gesellschaft für Akustik e.V. (DEGA), Berlin, 2025}, year={2025} }","mla":"Werning, Alexander, and Reinhold Häb-Umbach. “Distilling Efficient Audio Models Using Data Pruning with CLAP.” <i>Proceedings of DAS|DAGA 2025</i>, edited by Deutsche Gesellschaft für Akustik e.V. (DEGA), Berlin, 2025, 2025.","apa":"Werning, A., &#38; Häb-Umbach, R. (2025). Distilling Efficient Audio Models using Data Pruning with CLAP. In Deutsche Gesellschaft für Akustik e.V. (DEGA), Berlin, 2025 (Ed.), <i>Proceedings of DAS|DAGA 2025</i>.","ama":"Werning A, Häb-Umbach R. Distilling Efficient Audio Models using Data Pruning with CLAP. In: Deutsche Gesellschaft für Akustik e.V. (DEGA), Berlin, 2025, ed. <i>Proceedings of DAS|DAGA 2025</i>. ; 2025.","chicago":"Werning, Alexander, and Reinhold Häb-Umbach. “Distilling Efficient Audio Models Using Data Pruning with CLAP.” In <i>Proceedings of DAS|DAGA 2025</i>, edited by Deutsche Gesellschaft für Akustik e.V. (DEGA), Berlin, 2025. Copenhagen, 2025.","ieee":"A. Werning and R. Häb-Umbach, “Distilling Efficient Audio Models using Data Pruning with CLAP,” in <i>Proceedings of DAS|DAGA 2025</i>, Copenhagen, 2025."},"place":"Copenhagen","publication_status":"published","publication_identifier":{"unknown":["978-3-939296-23-2"]},"has_accepted_license":"1","language":[{"iso":"eng"}],"ddc":["004"],"abstract":[{"text":"Running state-of-the-art large-scale audio models on edge devices is often infeasible due to their limited storage and computing resources. It is therefore necessary to compress and tune the models for the specific target task and hardware. This is commonly achieved by distilling the audio model, the teacher, to a small target model, the student. However, this approach can be improved by prepending a dataset pruning stage and training the teacher on the pruned data set only, which contains examples relevant to the target task. Recently, CLAP models have emerged that embed audio and text examples in a common embedding space. We use the audio embeddings of the CLAP model for the above pruning stage, which is realized using a domain classifier. After knowledge distillation, the student is eventually fine-tuned on some data from the target domain. The CLAP architecture combines text and audio embedding spaces, which allows to search for data given only a textual description, such as a class label. We show how this can help data pruning.","lang":"eng"}],"publication":"Proceedings of DAS|DAGA 2025","title":"Distilling Efficient Audio Models using Data Pruning with CLAP","date_created":"2025-05-14T13:18:10Z","corporate_editor":["Deutsche Gesellschaft für Akustik e.V. (DEGA), Berlin, 2025"],"year":"2025"},{"publication_identifier":{"isbn":["978-3-8007-6617-8"]},"citation":{"apa":"Meise, A. T., Cord-Landwehr, T., &#38; Haeb-Umbach, R. (2025). On the Application of Diffusion Models for Simultaneous Denoising and Dereverberation. <i> ITG Conference on Speech Communication</i>. ITG Conference on Speech Communication, Berlin.","bibtex":"@inproceedings{Meise_Cord-Landwehr_Haeb-Umbach_2025, title={On the Application of Diffusion Models for Simultaneous Denoising and Dereverberation}, booktitle={ ITG Conference on Speech Communication}, author={Meise, Adrian Tobias and Cord-Landwehr, Tobias and Haeb-Umbach, Reinhold}, year={2025} }","mla":"Meise, Adrian Tobias, et al. “On the Application of Diffusion Models for Simultaneous Denoising and Dereverberation.” <i> ITG Conference on Speech Communication</i>, 2025.","short":"A.T. Meise, T. Cord-Landwehr, R. Haeb-Umbach, in:  ITG Conference on Speech Communication, 2025.","ama":"Meise AT, Cord-Landwehr T, Haeb-Umbach R. On the Application of Diffusion Models for Simultaneous Denoising and Dereverberation. In: <i> ITG Conference on Speech Communication</i>. ; 2025.","ieee":"A. T. Meise, T. Cord-Landwehr, and R. Haeb-Umbach, “On the Application of Diffusion Models for Simultaneous Denoising and Dereverberation,” presented at the ITG Conference on Speech Communication, Berlin, 2025.","chicago":"Meise, Adrian Tobias, Tobias Cord-Landwehr, and Reinhold Haeb-Umbach. “On the Application of Diffusion Models for Simultaneous Denoising and Dereverberation.” In <i> ITG Conference on Speech Communication</i>, 2025."},"year":"2025","author":[{"last_name":"Meise","full_name":"Meise, Adrian Tobias","id":"79268","first_name":"Adrian Tobias"},{"first_name":"Tobias","last_name":"Cord-Landwehr","full_name":"Cord-Landwehr, Tobias","id":"44393"},{"first_name":"Reinhold","full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach"}],"date_created":"2025-11-13T07:21:51Z","date_updated":"2026-01-05T09:05:14Z","conference":{"location":"Berlin","name":"ITG Conference on Speech Communication"},"title":"On the Application of Diffusion Models for Simultaneous Denoising and Dereverberation","publication":" ITG Conference on Speech Communication","type":"conference","status":"public","department":[{"_id":"54"}],"user_id":"44393","_id":"62174","language":[{"iso":"eng"}]},{"file_date_updated":"2025-04-16T10:21:45Z","user_id":"40767","department":[{"_id":"54"}],"project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"},{"grant_number":"448568305","_id":"508","name":"Automatische Transkription von Gesprächssituationen"}],"_id":"52958","status":"public","type":"journal_article","main_file_link":[{"open_access":"1","url":"https://arxiv.org/abs/2303.03849"}],"doi":"10.1109/taslp.2024.3350887","author":[{"id":"40767","full_name":"Boeddeker, Christoph","last_name":"Boeddeker","first_name":"Christoph"},{"first_name":"Aswin Shanmugam","last_name":"Subramanian","full_name":"Subramanian, Aswin Shanmugam"},{"first_name":"Gordon","last_name":"Wichern","full_name":"Wichern, Gordon"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242"},{"last_name":"Le Roux","full_name":"Le Roux, Jonathan","first_name":"Jonathan"}],"volume":32,"oa":"1","date_updated":"2025-04-16T10:21:45Z","citation":{"ieee":"C. Boeddeker, A. S. Subramanian, G. Wichern, R. Haeb-Umbach, and J. Le Roux, “TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings,” <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i>, vol. 32, pp. 1185–1197, 2024, doi: <a href=\"https://doi.org/10.1109/taslp.2024.3350887\">10.1109/taslp.2024.3350887</a>.","chicago":"Boeddeker, Christoph, Aswin Shanmugam Subramanian, Gordon Wichern, Reinhold Haeb-Umbach, and Jonathan Le Roux. “TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings.” <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i> 32 (2024): 1185–97. <a href=\"https://doi.org/10.1109/taslp.2024.3350887\">https://doi.org/10.1109/taslp.2024.3350887</a>.","ama":"Boeddeker C, Subramanian AS, Wichern G, Haeb-Umbach R, Le Roux J. TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings. <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i>. 2024;32:1185-1197. doi:<a href=\"https://doi.org/10.1109/taslp.2024.3350887\">10.1109/taslp.2024.3350887</a>","apa":"Boeddeker, C., Subramanian, A. S., Wichern, G., Haeb-Umbach, R., &#38; Le Roux, J. (2024). TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings. <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i>, <i>32</i>, 1185–1197. <a href=\"https://doi.org/10.1109/taslp.2024.3350887\">https://doi.org/10.1109/taslp.2024.3350887</a>","mla":"Boeddeker, Christoph, et al. “TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings.” <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i>, vol. 32, Institute of Electrical and Electronics Engineers (IEEE), 2024, pp. 1185–97, doi:<a href=\"https://doi.org/10.1109/taslp.2024.3350887\">10.1109/taslp.2024.3350887</a>.","bibtex":"@article{Boeddeker_Subramanian_Wichern_Haeb-Umbach_Le Roux_2024, title={TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings}, volume={32}, DOI={<a href=\"https://doi.org/10.1109/taslp.2024.3350887\">10.1109/taslp.2024.3350887</a>}, journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Boeddeker, Christoph and Subramanian, Aswin Shanmugam and Wichern, Gordon and Haeb-Umbach, Reinhold and Le Roux, Jonathan}, year={2024}, pages={1185–1197} }","short":"C. Boeddeker, A.S. Subramanian, G. Wichern, R. Haeb-Umbach, J. Le Roux, IEEE/ACM Transactions on Audio, Speech, and Language Processing 32 (2024) 1185–1197."},"page":"1185-1197","intvolume":"        32","publication_status":"published","has_accepted_license":"1","publication_identifier":{"issn":["2329-9290","2329-9304"]},"language":[{"iso":"eng"}],"ddc":["000"],"keyword":["Electrical and Electronic Engineering","Acoustics and Ultrasonics","Computer Science (miscellaneous)","Computational Mathematics"],"file":[{"content_type":"application/pdf","relation":"main_file","creator":"cbj","date_created":"2025-04-16T10:14:47Z","date_updated":"2025-04-16T10:21:45Z","file_id":"59602","access_level":"open_access","file_name":"main.pdf","file_size":3432879},{"relation":"main_file","content_type":"application/pdf","file_size":2838635,"file_name":"slides.pdf","access_level":"open_access","file_id":"59603","date_updated":"2025-04-16T10:21:45Z","creator":"cbj","date_created":"2025-04-16T10:15:08Z"},{"file_name":"poster.pdf","file_id":"59604","access_level":"open_access","file_size":2038741,"creator":"cbj","date_created":"2025-04-16T10:15:22Z","date_updated":"2025-04-16T10:21:45Z","relation":"main_file","content_type":"application/pdf"}],"publication":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","title":"TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings","date_created":"2024-03-26T16:11:54Z","publisher":"Institute of Electrical and Electronics Engineers (IEEE)","year":"2024"},{"title":"UPB-NT submission to DCASE24: Dataset pruning for targeted knowledge distillation","date_created":"2024-11-18T09:44:46Z","author":[{"first_name":"Alexander","full_name":"Werning, Alexander","id":"62152","last_name":"Werning"},{"first_name":"Reinhold","id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach"}],"date_updated":"2024-11-18T09:45:14Z","citation":{"ama":"Werning A, Haeb-Umbach R. <i>UPB-NT Submission to DCASE24: Dataset Pruning for Targeted Knowledge Distillation</i>.; 2024.","ieee":"A. Werning and R. Haeb-Umbach, <i>UPB-NT submission to DCASE24: Dataset pruning for targeted knowledge distillation</i>. 2024.","chicago":"Werning, Alexander, and Reinhold Haeb-Umbach. <i>UPB-NT Submission to DCASE24: Dataset Pruning for Targeted Knowledge Distillation</i>, 2024.","apa":"Werning, A., &#38; Haeb-Umbach, R. (2024). <i>UPB-NT submission to DCASE24: Dataset pruning for targeted knowledge distillation</i>.","short":"A. Werning, R. Haeb-Umbach, UPB-NT Submission to DCASE24: Dataset Pruning for Targeted Knowledge Distillation, 2024.","mla":"Werning, Alexander, and Reinhold Haeb-Umbach. <i>UPB-NT Submission to DCASE24: Dataset Pruning for Targeted Knowledge Distillation</i>. 2024.","bibtex":"@book{Werning_Haeb-Umbach_2024, title={UPB-NT submission to DCASE24: Dataset pruning for targeted knowledge distillation}, author={Werning, Alexander and Haeb-Umbach, Reinhold}, year={2024} }"},"year":"2024","language":[{"iso":"eng"}],"department":[{"_id":"54"}],"user_id":"62152","_id":"57161","project":[{"name":"WestAI - AI Service Center West","_id":"512"}],"status":"public","type":"report"},{"language":[{"iso":"eng"}],"_id":"57099","user_id":"49871","department":[{"_id":"54"}],"status":"public","type":"conference","publication":"2024 32nd European Signal Processing Conference (EUSIPCO)","title":"Speaker and Style Disentanglement of Speech Based on Contrastive Predictive Coding Supported Factorized Variational Autoencoder","date_updated":"2024-11-15T06:54:40Z","author":[{"first_name":"Yuying","last_name":"Xie","full_name":"Xie, Yuying"},{"first_name":"Michael","id":"49871","full_name":"Kuhlmann, Michael","last_name":"Kuhlmann"},{"id":"72602","full_name":"Rautenberg, Frederik","last_name":"Rautenberg","first_name":"Frederik"},{"first_name":"Zheng-Hua","last_name":"Tan","full_name":"Tan, Zheng-Hua"},{"id":"242","full_name":"Häb-Umbach, Reinhold","last_name":"Häb-Umbach","first_name":"Reinhold"}],"date_created":"2024-11-15T06:52:54Z","year":"2024","citation":{"apa":"Xie, Y., Kuhlmann, M., Rautenberg, F., Tan, Z.-H., &#38; Häb-Umbach, R. (2024). Speaker and Style Disentanglement of Speech Based on Contrastive Predictive Coding Supported Factorized Variational Autoencoder. <i>2024 32nd European Signal Processing Conference (EUSIPCO)</i>, 436–440.","bibtex":"@inproceedings{Xie_Kuhlmann_Rautenberg_Tan_Häb-Umbach_2024, title={Speaker and Style Disentanglement of Speech Based on Contrastive Predictive Coding Supported Factorized Variational Autoencoder}, booktitle={2024 32nd European Signal Processing Conference (EUSIPCO)}, author={Xie, Yuying and Kuhlmann, Michael and Rautenberg, Frederik and Tan, Zheng-Hua and Häb-Umbach, Reinhold}, year={2024}, pages={436–440} }","mla":"Xie, Yuying, et al. “Speaker and Style Disentanglement of Speech Based on Contrastive Predictive Coding Supported Factorized Variational Autoencoder.” <i>2024 32nd European Signal Processing Conference (EUSIPCO)</i>, 2024, pp. 436–440.","short":"Y. Xie, M. Kuhlmann, F. Rautenberg, Z.-H. Tan, R. Häb-Umbach, in: 2024 32nd European Signal Processing Conference (EUSIPCO), 2024, pp. 436–440.","chicago":"Xie, Yuying, Michael Kuhlmann, Frederik Rautenberg, Zheng-Hua Tan, and Reinhold Häb-Umbach. “Speaker and Style Disentanglement of Speech Based on Contrastive Predictive Coding Supported Factorized Variational Autoencoder.” In <i>2024 32nd European Signal Processing Conference (EUSIPCO)</i>, 436–440, 2024.","ieee":"Y. Xie, M. Kuhlmann, F. Rautenberg, Z.-H. Tan, and R. Häb-Umbach, “Speaker and Style Disentanglement of Speech Based on Contrastive Predictive Coding Supported Factorized Variational Autoencoder,” in <i>2024 32nd European Signal Processing Conference (EUSIPCO)</i>, 2024, pp. 436–440.","ama":"Xie Y, Kuhlmann M, Rautenberg F, Tan Z-H, Häb-Umbach R. Speaker and Style Disentanglement of Speech Based on Contrastive Predictive Coding Supported Factorized Variational Autoencoder. In: <i>2024 32nd European Signal Processing Conference (EUSIPCO)</i>. ; 2024:436–440."},"page":"436–440"},{"publication":"2024 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)","type":"conference","status":"public","file":[{"content_type":"application/pdf","relation":"main_file","date_updated":"2024-09-04T07:34:30Z","creator":"tvn","date_created":"2024-09-04T07:34:30Z","file_size":150432,"file_id":"56005","access_level":"open_access","file_name":"main.pdf"}],"department":[{"_id":"54"}],"user_id":"40767","_id":"56004","project":[{"_id":"52","name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing"},{"grant_number":"448568305","_id":"508","name":"Automatische Transkription von Gesprächssituationen"}],"language":[{"iso":"eng"}],"file_date_updated":"2024-09-04T07:34:30Z","ddc":["000"],"has_accepted_license":"1","publication_status":"published","citation":{"mla":"von Neumann, Thilo, et al. “Meeting Recognition with Continuous Speech Separation and Transcription-Supported Diarization.” <i>2024 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)</i>, IEEE, 2024, doi:<a href=\"https://doi.org/10.1109/icasspw62465.2024.10625894\">10.1109/icasspw62465.2024.10625894</a>.","short":"T. von Neumann, C. Boeddeker, T. Cord-Landwehr, M. Delcroix, R. Haeb-Umbach, in: 2024 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW), IEEE, 2024.","bibtex":"@inproceedings{von Neumann_Boeddeker_Cord-Landwehr_Delcroix_Haeb-Umbach_2024, title={Meeting Recognition with Continuous Speech Separation and Transcription-Supported Diarization}, DOI={<a href=\"https://doi.org/10.1109/icasspw62465.2024.10625894\">10.1109/icasspw62465.2024.10625894</a>}, booktitle={2024 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)}, publisher={IEEE}, author={von Neumann, Thilo and Boeddeker, Christoph and Cord-Landwehr, Tobias and Delcroix, Marc and Haeb-Umbach, Reinhold}, year={2024} }","apa":"von Neumann, T., Boeddeker, C., Cord-Landwehr, T., Delcroix, M., &#38; Haeb-Umbach, R. (2024). Meeting Recognition with Continuous Speech Separation and Transcription-Supported Diarization. <i>2024 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)</i>. <a href=\"https://doi.org/10.1109/icasspw62465.2024.10625894\">https://doi.org/10.1109/icasspw62465.2024.10625894</a>","chicago":"Neumann, Thilo von, Christoph Boeddeker, Tobias Cord-Landwehr, Marc Delcroix, and Reinhold Haeb-Umbach. “Meeting Recognition with Continuous Speech Separation and Transcription-Supported Diarization.” In <i>2024 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)</i>. IEEE, 2024. <a href=\"https://doi.org/10.1109/icasspw62465.2024.10625894\">https://doi.org/10.1109/icasspw62465.2024.10625894</a>.","ieee":"T. von Neumann, C. Boeddeker, T. Cord-Landwehr, M. Delcroix, and R. Haeb-Umbach, “Meeting Recognition with Continuous Speech Separation and Transcription-Supported Diarization,” 2024, doi: <a href=\"https://doi.org/10.1109/icasspw62465.2024.10625894\">10.1109/icasspw62465.2024.10625894</a>.","ama":"von Neumann T, Boeddeker C, Cord-Landwehr T, Delcroix M, Haeb-Umbach R. Meeting Recognition with Continuous Speech Separation and Transcription-Supported Diarization. In: <i>2024 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)</i>. IEEE; 2024. doi:<a href=\"https://doi.org/10.1109/icasspw62465.2024.10625894\">10.1109/icasspw62465.2024.10625894</a>"},"year":"2024","author":[{"first_name":"Thilo","last_name":"von Neumann","orcid":"https://orcid.org/0000-0002-7717-8670","id":"49870","full_name":"von Neumann, Thilo"},{"first_name":"Christoph","last_name":"Boeddeker","id":"40767","full_name":"Boeddeker, Christoph"},{"first_name":"Tobias","last_name":"Cord-Landwehr","full_name":"Cord-Landwehr, Tobias","id":"44393"},{"full_name":"Delcroix, Marc","last_name":"Delcroix","first_name":"Marc"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold"}],"date_created":"2024-09-04T07:26:02Z","oa":"1","date_updated":"2025-02-12T09:20:07Z","publisher":"IEEE","doi":"10.1109/icasspw62465.2024.10625894","title":"Meeting Recognition with Continuous Speech Separation and Transcription-Supported Diarization"},{"language":[{"iso":"eng"}],"_id":"56272","project":[{"_id":"52","name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing"},{"_id":"508","name":"Automatische Transkription von Gesprächssituationen","grant_number":"448568305"}],"department":[{"_id":"54"}],"user_id":"40767","status":"public","publication":"Interspeech 2024","type":"conference","title":"Once more Diarization: Improving meeting transcription systems through segment-level speaker reassignment","doi":"10.21437/interspeech.2024-1286","main_file_link":[{"open_access":"1","url":"https://www.isca-archive.org/interspeech_2024/boeddeker24_interspeech.pdf"}],"date_updated":"2025-02-12T09:18:36Z","oa":"1","publisher":"ISCA","date_created":"2024-09-30T08:04:47Z","author":[{"first_name":"Christoph","id":"40767","full_name":"Boeddeker, Christoph","last_name":"Boeddeker"},{"full_name":"Cord-Landwehr, Tobias","id":"44393","last_name":"Cord-Landwehr","first_name":"Tobias"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold"}],"year":"2024","citation":{"ama":"Boeddeker C, Cord-Landwehr T, Haeb-Umbach R. Once more Diarization: Improving meeting transcription systems through segment-level speaker reassignment. In: <i>Interspeech 2024</i>. ISCA; 2024. doi:<a href=\"https://doi.org/10.21437/interspeech.2024-1286\">10.21437/interspeech.2024-1286</a>","chicago":"Boeddeker, Christoph, Tobias Cord-Landwehr, and Reinhold Haeb-Umbach. “Once More Diarization: Improving Meeting Transcription Systems through Segment-Level Speaker Reassignment.” In <i>Interspeech 2024</i>. ISCA, 2024. <a href=\"https://doi.org/10.21437/interspeech.2024-1286\">https://doi.org/10.21437/interspeech.2024-1286</a>.","ieee":"C. Boeddeker, T. Cord-Landwehr, and R. Haeb-Umbach, “Once more Diarization: Improving meeting transcription systems through segment-level speaker reassignment,” 2024, doi: <a href=\"https://doi.org/10.21437/interspeech.2024-1286\">10.21437/interspeech.2024-1286</a>.","apa":"Boeddeker, C., Cord-Landwehr, T., &#38; Haeb-Umbach, R. (2024). Once more Diarization: Improving meeting transcription systems through segment-level speaker reassignment. <i>Interspeech 2024</i>. <a href=\"https://doi.org/10.21437/interspeech.2024-1286\">https://doi.org/10.21437/interspeech.2024-1286</a>","mla":"Boeddeker, Christoph, et al. “Once More Diarization: Improving Meeting Transcription Systems through Segment-Level Speaker Reassignment.” <i>Interspeech 2024</i>, ISCA, 2024, doi:<a href=\"https://doi.org/10.21437/interspeech.2024-1286\">10.21437/interspeech.2024-1286</a>.","bibtex":"@inproceedings{Boeddeker_Cord-Landwehr_Haeb-Umbach_2024, title={Once more Diarization: Improving meeting transcription systems through segment-level speaker reassignment}, DOI={<a href=\"https://doi.org/10.21437/interspeech.2024-1286\">10.21437/interspeech.2024-1286</a>}, booktitle={Interspeech 2024}, publisher={ISCA}, author={Boeddeker, Christoph and Cord-Landwehr, Tobias and Haeb-Umbach, Reinhold}, year={2024} }","short":"C. Boeddeker, T. Cord-Landwehr, R. Haeb-Umbach, in: Interspeech 2024, ISCA, 2024."},"publication_status":"published"},{"department":[{"_id":"54"}],"user_id":"40767","_id":"57659","project":[{"_id":"52","name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing"},{"grant_number":"448568305","name":"Automatische Transkription von Gesprächssituationen","_id":"508"}],"language":[{"iso":"eng"}],"publication":"2024 IEEE Spoken Language Technology Workshop (SLT)","type":"conference","status":"public","author":[{"first_name":"Peter","full_name":"Vieting, Peter","last_name":"Vieting"},{"last_name":"Berger","full_name":"Berger, Simon","first_name":"Simon"},{"orcid":"https://orcid.org/0000-0002-7717-8670","last_name":"von Neumann","id":"49870","full_name":"von Neumann, Thilo","first_name":"Thilo"},{"full_name":"Boeddeker, Christoph","id":"40767","last_name":"Boeddeker","first_name":"Christoph"},{"first_name":"Ralf","full_name":"Schlüter, Ralf","last_name":"Schlüter"},{"first_name":"Reinhold","full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach"}],"date_created":"2024-12-09T11:46:18Z","date_updated":"2025-02-12T09:20:59Z","oa":"1","main_file_link":[{"open_access":"1","url":"https://www-i6.informatik.rwth-aachen.de/publications/download/1259/VietingPeterBergerSimonNeumannThilovonBoeddekerChristophSchl%FCterRalfHaeb-UmbachReinhold--CombiningTF-GridNetMixtureEncoderforContinuousSpeechSeparationforMeetingTranscription--2024.pdf"}],"title":"Combining TF-GridNet and Mixture Encoder for Continuous Speech Separation for Meeting Transcription","citation":{"bibtex":"@inproceedings{Vieting_Berger_von Neumann_Boeddeker_Schlüter_Haeb-Umbach_2024, title={Combining TF-GridNet and Mixture Encoder for Continuous Speech Separation for Meeting Transcription}, booktitle={2024 IEEE Spoken Language Technology Workshop (SLT)}, author={Vieting, Peter and Berger, Simon and von Neumann, Thilo and Boeddeker, Christoph and Schlüter, Ralf and Haeb-Umbach, Reinhold}, year={2024} }","short":"P. Vieting, S. Berger, T. von Neumann, C. Boeddeker, R. Schlüter, R. Haeb-Umbach, in: 2024 IEEE Spoken Language Technology Workshop (SLT), 2024.","mla":"Vieting, Peter, et al. “Combining TF-GridNet and Mixture Encoder for Continuous Speech Separation for Meeting Transcription.” <i>2024 IEEE Spoken Language Technology Workshop (SLT)</i>, 2024.","apa":"Vieting, P., Berger, S., von Neumann, T., Boeddeker, C., Schlüter, R., &#38; Haeb-Umbach, R. (2024). Combining TF-GridNet and Mixture Encoder for Continuous Speech Separation for Meeting Transcription. <i>2024 IEEE Spoken Language Technology Workshop (SLT)</i>.","ieee":"P. Vieting, S. Berger, T. von Neumann, C. Boeddeker, R. Schlüter, and R. Haeb-Umbach, “Combining TF-GridNet and Mixture Encoder for Continuous Speech Separation for Meeting Transcription,” 2024.","chicago":"Vieting, Peter, Simon Berger, Thilo von Neumann, Christoph Boeddeker, Ralf Schlüter, and Reinhold Haeb-Umbach. “Combining TF-GridNet and Mixture Encoder for Continuous Speech Separation for Meeting Transcription.” In <i>2024 IEEE Spoken Language Technology Workshop (SLT)</i>, 2024.","ama":"Vieting P, Berger S, von Neumann T, Boeddeker C, Schlüter R, Haeb-Umbach R. Combining TF-GridNet and Mixture Encoder for Continuous Speech Separation for Meeting Transcription. In: <i>2024 IEEE Spoken Language Technology Workshop (SLT)</i>. ; 2024."},"year":"2024"},{"department":[{"_id":"54"}],"user_id":"44393","_id":"57085","project":[{"_id":"52","name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing"},{"name":"Automatische Transkription von Gesprächssituationen","_id":"508"}],"file_date_updated":"2025-08-14T08:11:57Z","type":"conference","status":"public","author":[{"first_name":"Tobias","last_name":"Cord-Landwehr","full_name":"Cord-Landwehr, Tobias","id":"44393"},{"first_name":"Christoph","id":"40767","full_name":"Boeddeker, Christoph","last_name":"Boeddeker"},{"first_name":"Reinhold","id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach"}],"date_updated":"2025-08-14T08:12:22Z","oa":"1","doi":"10.1109/ICASSP49660.2025.10888445","conference":{"location":"Hyderabad, India","name":"2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"},"main_file_link":[{"open_access":"1","url":"https://arxiv.org/pdf/2410.21455"}],"has_accepted_license":"1","citation":{"ama":"Cord-Landwehr T, Boeddeker C, Haeb-Umbach R. Simultaneous Diarization and Separation of Meetings through the Integration of Statistical Mixture Models. In: <i>ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. ; 2024. doi:<a href=\"https://doi.org/10.1109/ICASSP49660.2025.10888445\">10.1109/ICASSP49660.2025.10888445</a>","chicago":"Cord-Landwehr, Tobias, Christoph Boeddeker, and Reinhold Haeb-Umbach. “Simultaneous Diarization and Separation of Meetings through the Integration of Statistical Mixture Models.” In <i>ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2024. <a href=\"https://doi.org/10.1109/ICASSP49660.2025.10888445\">https://doi.org/10.1109/ICASSP49660.2025.10888445</a>.","ieee":"T. Cord-Landwehr, C. Boeddeker, and R. Haeb-Umbach, “Simultaneous Diarization and Separation of Meetings through the Integration of Statistical Mixture Models,” presented at the 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Hyderabad, India, 2024, doi: <a href=\"https://doi.org/10.1109/ICASSP49660.2025.10888445\">10.1109/ICASSP49660.2025.10888445</a>.","bibtex":"@inproceedings{Cord-Landwehr_Boeddeker_Haeb-Umbach_2024, title={Simultaneous Diarization and Separation of Meetings through the Integration of Statistical Mixture Models}, DOI={<a href=\"https://doi.org/10.1109/ICASSP49660.2025.10888445\">10.1109/ICASSP49660.2025.10888445</a>}, booktitle={ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, author={Cord-Landwehr, Tobias and Boeddeker, Christoph and Haeb-Umbach, Reinhold}, year={2024} }","mla":"Cord-Landwehr, Tobias, et al. “Simultaneous Diarization and Separation of Meetings through the Integration of Statistical Mixture Models.” <i>ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2024, doi:<a href=\"https://doi.org/10.1109/ICASSP49660.2025.10888445\">10.1109/ICASSP49660.2025.10888445</a>.","short":"T. Cord-Landwehr, C. Boeddeker, R. Haeb-Umbach, in: ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2024.","apa":"Cord-Landwehr, T., Boeddeker, C., &#38; Haeb-Umbach, R. (2024). Simultaneous Diarization and Separation of Meetings through the Integration of Statistical Mixture Models. <i>ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Hyderabad, India. <a href=\"https://doi.org/10.1109/ICASSP49660.2025.10888445\">https://doi.org/10.1109/ICASSP49660.2025.10888445</a>"},"language":[{"iso":"eng"}],"keyword":["diarization","source separation","mixture model","meeting"],"ddc":["000"],"publication":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","file":[{"content_type":"application/pdf","relation":"main_file","success":1,"date_created":"2025-08-14T08:11:57Z","creator":"cord","date_updated":"2025-08-14T08:11:57Z","access_level":"closed","file_name":"main.pdf","file_id":"60930","file_size":259907}],"abstract":[{"lang":"eng","text":"We propose an approach for simultaneous diarization and separation of meeting data. It consists of a complex Angular Central Gaussian Mixture Model (cACGMM) for speech source separation, and a von-Mises-Fisher Mixture Model (VMFMM) for diarization in a joint statistical framework. Through the integration, both spatial and spectral information are exploited for diarization and separation. We also develop a method for counting the number of active speakers in a segment of a meeting to support block-wise processing. While the total number of speakers in a meeting may be known, it is usually not known on a per-segment level. With the proposed speaker counting, joint diarization and source separation can be done segment-by-segment, and the permutation problem across segments is solved, thus allowing for block-online processing in the future. Experimental results on the LibriCSS meeting corpus show that the integrated approach outperforms a cascaded approach of diarization and speech enhancement in terms of WER, both on a per-segment and on a per-meeting level."}],"date_created":"2024-11-14T09:32:38Z","title":"Simultaneous Diarization and Separation of Meetings through the Integration of Statistical Mixture Models","year":"2024"},{"year":"2024","date_created":"2024-04-25T12:57:22Z","publisher":"IEEE","title":"Geodesic Interpolation of Frame-Wise Speaker Embeddings for the Diarization of Meeting Scenarios","publication":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","file":[{"access_level":"closed","file_id":"60929","file_name":"main.pdf","file_size":254478,"date_created":"2025-08-14T08:09:52Z","creator":"cord","date_updated":"2025-08-14T08:09:52Z","relation":"main_file","success":1,"content_type":"application/pdf"}],"language":[{"iso":"eng"}],"ddc":["000"],"publication_status":"published","has_accepted_license":"1","citation":{"ama":"Cord-Landwehr T, Boeddeker C, Zorilă C, Doddipatla R, Haeb-Umbach R. Geodesic Interpolation of Frame-Wise Speaker Embeddings for the Diarization of Meeting Scenarios. In: <i>ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. IEEE; 2024. doi:<a href=\"https://doi.org/10.1109/icassp48485.2024.10445911\">10.1109/icassp48485.2024.10445911</a>","chicago":"Cord-Landwehr, Tobias, Christoph Boeddeker, Cătălin Zorilă, Rama Doddipatla, and Reinhold Haeb-Umbach. “Geodesic Interpolation of Frame-Wise Speaker Embeddings for the Diarization of Meeting Scenarios.” In <i>ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. IEEE, 2024. <a href=\"https://doi.org/10.1109/icassp48485.2024.10445911\">https://doi.org/10.1109/icassp48485.2024.10445911</a>.","ieee":"T. Cord-Landwehr, C. Boeddeker, C. Zorilă, R. Doddipatla, and R. Haeb-Umbach, “Geodesic Interpolation of Frame-Wise Speaker Embeddings for the Diarization of Meeting Scenarios,” presented at the 2024 IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), Seoul, 2024, doi: <a href=\"https://doi.org/10.1109/icassp48485.2024.10445911\">10.1109/icassp48485.2024.10445911</a>.","short":"T. Cord-Landwehr, C. Boeddeker, C. Zorilă, R. Doddipatla, R. Haeb-Umbach, in: ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE, 2024.","mla":"Cord-Landwehr, Tobias, et al. “Geodesic Interpolation of Frame-Wise Speaker Embeddings for the Diarization of Meeting Scenarios.” <i>ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, IEEE, 2024, doi:<a href=\"https://doi.org/10.1109/icassp48485.2024.10445911\">10.1109/icassp48485.2024.10445911</a>.","bibtex":"@inproceedings{Cord-Landwehr_Boeddeker_Zorilă_Doddipatla_Haeb-Umbach_2024, title={Geodesic Interpolation of Frame-Wise Speaker Embeddings for the Diarization of Meeting Scenarios}, DOI={<a href=\"https://doi.org/10.1109/icassp48485.2024.10445911\">10.1109/icassp48485.2024.10445911</a>}, booktitle={ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, publisher={IEEE}, author={Cord-Landwehr, Tobias and Boeddeker, Christoph and Zorilă, Cătălin and Doddipatla, Rama and Haeb-Umbach, Reinhold}, year={2024} }","apa":"Cord-Landwehr, T., Boeddeker, C., Zorilă, C., Doddipatla, R., &#38; Haeb-Umbach, R. (2024). Geodesic Interpolation of Frame-Wise Speaker Embeddings for the Diarization of Meeting Scenarios. <i>ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. 2024 IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), Seoul. <a href=\"https://doi.org/10.1109/icassp48485.2024.10445911\">https://doi.org/10.1109/icassp48485.2024.10445911</a>"},"author":[{"last_name":"Cord-Landwehr","id":"44393","full_name":"Cord-Landwehr, Tobias","first_name":"Tobias"},{"first_name":"Christoph","id":"40767","full_name":"Boeddeker, Christoph","last_name":"Boeddeker"},{"full_name":"Zorilă, Cătălin","last_name":"Zorilă","first_name":"Cătălin"},{"first_name":"Rama","full_name":"Doddipatla, Rama","last_name":"Doddipatla"},{"first_name":"Reinhold","full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach"}],"date_updated":"2025-08-14T08:11:07Z","conference":{"location":"Seoul","name":"2024 IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)"},"doi":"10.1109/icassp48485.2024.10445911","type":"conference","status":"public","user_id":"44393","department":[{"_id":"54"}],"project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"},{"_id":"508","name":"Automatische Transkription von Gesprächssituationen"}],"_id":"53659","file_date_updated":"2025-08-14T08:09:52Z"},{"abstract":[{"text":"Large audio tagging models are usually trained or pre-trained on AudioSet, a dataset that encompasses a large amount of different sound classes and acoustic environments. Knowledge distillation has emerged as a method to compress such models without compromising their effectiveness. There are many different applications for audio tagging, some of which require a specialization to a narrow domain of sounds to be classified. For these scenarios, it is beneficial to distill the large audio tagger with respect to a specific subset of sounds of interest. A method to prune a general dataset with respect to a target dataset is presented. By distilling with such a specialized pruned dataset, we obtain a compressed model with better classification accuracy in the specific target domain than with target-agnostic distillation.","lang":"eng"}],"file":[{"file_size":183539,"file_name":"Eusipco__Target_specific_Dataset_Pruning_for_Compression_of_Audio_Tagging_Models.pdf","file_id":"57200","access_level":"closed","date_updated":"2024-11-18T12:10:09Z","date_created":"2024-11-18T12:10:09Z","creator":"awerning","success":1,"relation":"main_file","content_type":"application/pdf"}],"status":"public","type":"conference","publication":"32nd European Signal Processing Conference (EUSIPCO 2024)","ddc":["000"],"keyword":["data pruning","knowledge distillation","audio tagging"],"file_date_updated":"2024-11-18T12:10:09Z","language":[{"iso":"eng"}],"project":[{"name":"WestAI - AI Service Center West","_id":"512"}],"_id":"57160","user_id":"62152","department":[{"_id":"54"}],"year":"2024","citation":{"ama":"Werning A, Haeb-Umbach R. Target-Specific Dataset Pruning for Compression of Audio Tagging Models. In: <i>32nd European Signal Processing Conference (EUSIPCO 2024)</i>. ; 2024.","ieee":"A. Werning and R. Haeb-Umbach, “Target-Specific Dataset Pruning for Compression of Audio Tagging Models,” presented at the 32nd European Signal Processing Conference, Lyon, 2024.","chicago":"Werning, Alexander, and Reinhold Haeb-Umbach. “Target-Specific Dataset Pruning for Compression of Audio Tagging Models.” In <i>32nd European Signal Processing Conference (EUSIPCO 2024)</i>, 2024.","short":"A. Werning, R. Haeb-Umbach, in: 32nd European Signal Processing Conference (EUSIPCO 2024), 2024.","mla":"Werning, Alexander, and Reinhold Haeb-Umbach. “Target-Specific Dataset Pruning for Compression of Audio Tagging Models.” <i>32nd European Signal Processing Conference (EUSIPCO 2024)</i>, 2024.","bibtex":"@inproceedings{Werning_Haeb-Umbach_2024, title={Target-Specific Dataset Pruning for Compression of Audio Tagging Models}, booktitle={32nd European Signal Processing Conference (EUSIPCO 2024)}, author={Werning, Alexander and Haeb-Umbach, Reinhold}, year={2024} }","apa":"Werning, A., &#38; Haeb-Umbach, R. (2024). Target-Specific Dataset Pruning for Compression of Audio Tagging Models. <i>32nd European Signal Processing Conference (EUSIPCO 2024)</i>. 32nd European Signal Processing Conference, Lyon."},"quality_controlled":"1","has_accepted_license":"1","title":"Target-Specific Dataset Pruning for Compression of Audio Tagging Models","conference":{"location":"Lyon","name":"32nd European Signal Processing Conference"},"date_updated":"2025-11-28T13:22:00Z","author":[{"first_name":"Alexander","last_name":"Werning","full_name":"Werning, Alexander","id":"62152"},{"first_name":"Reinhold","full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach"}],"date_created":"2024-11-18T09:29:16Z"},{"author":[{"last_name":"Gburrek","id":"44006","full_name":"Gburrek, Tobias","first_name":"Tobias"},{"last_name":"Meise","id":"79268","full_name":"Meise, Adrian Tobias","first_name":"Adrian Tobias"},{"first_name":"Joerg","last_name":"Schmalenstroeer","full_name":"Schmalenstroeer, Joerg","id":"460"},{"id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach","first_name":"Reinhold"}],"date_created":"2024-11-13T08:39:57Z","date_updated":"2026-01-05T16:28:23Z","publisher":"IEEE","oa":"1","doi":"10.1109/iwaenc61483.2024.10694103","title":"Diminishing Domain Mismatch for DNN-Based Acoustic Distance Estimation via Stochastic Room Reverberation Models","has_accepted_license":"1","quality_controlled":"1","publication_status":"published","citation":{"ama":"Gburrek T, Meise AT, Schmalenstroeer J, Haeb-Umbach R. Diminishing Domain Mismatch for DNN-Based Acoustic Distance Estimation via Stochastic Room Reverberation Models. In: <i>2024 18th International Workshop on Acoustic Signal Enhancement (IWAENC)</i>. IEEE; 2024. doi:<a href=\"https://doi.org/10.1109/iwaenc61483.2024.10694103\">10.1109/iwaenc61483.2024.10694103</a>","chicago":"Gburrek, Tobias, Adrian Tobias Meise, Joerg Schmalenstroeer, and Reinhold Haeb-Umbach. “Diminishing Domain Mismatch for DNN-Based Acoustic Distance Estimation via Stochastic Room Reverberation Models.” In <i>2024 18th International Workshop on Acoustic Signal Enhancement (IWAENC)</i>. IEEE, 2024. <a href=\"https://doi.org/10.1109/iwaenc61483.2024.10694103\">https://doi.org/10.1109/iwaenc61483.2024.10694103</a>.","ieee":"T. Gburrek, A. T. Meise, J. Schmalenstroeer, and R. Haeb-Umbach, “Diminishing Domain Mismatch for DNN-Based Acoustic Distance Estimation via Stochastic Room Reverberation Models,” 2024, doi: <a href=\"https://doi.org/10.1109/iwaenc61483.2024.10694103\">10.1109/iwaenc61483.2024.10694103</a>.","apa":"Gburrek, T., Meise, A. T., Schmalenstroeer, J., &#38; Haeb-Umbach, R. (2024). Diminishing Domain Mismatch for DNN-Based Acoustic Distance Estimation via Stochastic Room Reverberation Models. <i>2024 18th International Workshop on Acoustic Signal Enhancement (IWAENC)</i>. <a href=\"https://doi.org/10.1109/iwaenc61483.2024.10694103\">https://doi.org/10.1109/iwaenc61483.2024.10694103</a>","mla":"Gburrek, Tobias, et al. “Diminishing Domain Mismatch for DNN-Based Acoustic Distance Estimation via Stochastic Room Reverberation Models.” <i>2024 18th International Workshop on Acoustic Signal Enhancement (IWAENC)</i>, IEEE, 2024, doi:<a href=\"https://doi.org/10.1109/iwaenc61483.2024.10694103\">10.1109/iwaenc61483.2024.10694103</a>.","bibtex":"@inproceedings{Gburrek_Meise_Schmalenstroeer_Haeb-Umbach_2024, title={Diminishing Domain Mismatch for DNN-Based Acoustic Distance Estimation via Stochastic Room Reverberation Models}, DOI={<a href=\"https://doi.org/10.1109/iwaenc61483.2024.10694103\">10.1109/iwaenc61483.2024.10694103</a>}, booktitle={2024 18th International Workshop on Acoustic Signal Enhancement (IWAENC)}, publisher={IEEE}, author={Gburrek, Tobias and Meise, Adrian Tobias and Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}, year={2024} }","short":"T. Gburrek, A.T. Meise, J. Schmalenstroeer, R. Haeb-Umbach, in: 2024 18th International Workshop on Acoustic Signal Enhancement (IWAENC), IEEE, 2024."},"year":"2024","department":[{"_id":"54"}],"user_id":"44006","_id":"57031","file_date_updated":"2024-11-13T08:42:13Z","language":[{"iso":"eng"}],"ddc":["006"],"publication":"2024 18th International Workshop on Acoustic Signal Enhancement (IWAENC)","type":"conference","status":"public","file":[{"date_updated":"2024-11-13T08:42:13Z","date_created":"2024-11-13T08:42:13Z","creator":"tgburrek","file_size":300589,"access_level":"open_access","file_name":"dist_est_iwaenc24_gburrek.pdf","file_id":"57033","content_type":"application/pdf","relation":"main_file"}]},{"citation":{"apa":"Gburrek, T., Schmalenstroeer, J., &#38; Haeb-Umbach, R. (2023). On the Integration of Sampling Rate Synchronization and Acoustic Beamforming. <i>European Signal Processing Conference (EUSIPCO)</i>. European Signal Processing Conference (EUSIPCO), Helsinki.","bibtex":"@inproceedings{Gburrek_Schmalenstroeer_Haeb-Umbach_2023, title={On the Integration of Sampling Rate Synchronization and Acoustic Beamforming}, booktitle={European Signal Processing Conference (EUSIPCO)}, author={Gburrek, Tobias and Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}, year={2023} }","mla":"Gburrek, Tobias, et al. “On the Integration of Sampling Rate Synchronization and Acoustic Beamforming.” <i>European Signal Processing Conference (EUSIPCO)</i>, 2023.","short":"T. Gburrek, J. Schmalenstroeer, R. Haeb-Umbach, in: European Signal Processing Conference (EUSIPCO), 2023.","ieee":"T. Gburrek, J. Schmalenstroeer, and R. Haeb-Umbach, “On the Integration of Sampling Rate Synchronization and Acoustic Beamforming,” presented at the European Signal Processing Conference (EUSIPCO), Helsinki, 2023.","chicago":"Gburrek, Tobias, Joerg Schmalenstroeer, and Reinhold Haeb-Umbach. “On the Integration of Sampling Rate Synchronization and Acoustic Beamforming.” In <i>European Signal Processing Conference (EUSIPCO)</i>, 2023.","ama":"Gburrek T, Schmalenstroeer J, Haeb-Umbach R. On the Integration of Sampling Rate Synchronization and Acoustic Beamforming. In: <i>European Signal Processing Conference (EUSIPCO)</i>. ; 2023."},"year":"2023","quality_controlled":"1","conference":{"location":"Helsinki","name":"European Signal Processing Conference (EUSIPCO)"},"main_file_link":[{"open_access":"1","url":"https://eurasip.org/Proceedings/Eusipco/Eusipco2023/pdfs/0000011.pdf"}],"title":"On the Integration of Sampling Rate Synchronization and Acoustic Beamforming","date_created":"2023-10-18T12:54:08Z","author":[{"first_name":"Tobias","id":"44006","full_name":"Gburrek, Tobias","last_name":"Gburrek"},{"full_name":"Schmalenstroeer, Joerg","id":"460","last_name":"Schmalenstroeer","first_name":"Joerg"},{"first_name":"Reinhold","id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach"}],"oa":"1","date_updated":"2023-10-26T08:16:23Z","status":"public","publication":"European Signal Processing Conference (EUSIPCO)","type":"conference","language":[{"iso":"eng"}],"department":[{"_id":"54"}],"user_id":"460","_id":"48269"},{"oa":"1","date_updated":"2023-11-15T14:48:45Z","author":[{"first_name":"Joerg","full_name":"Schmalenstroeer, Joerg","id":"460","last_name":"Schmalenstroeer"},{"first_name":"Tobias","full_name":"Gburrek, Tobias","id":"44006","last_name":"Gburrek"},{"last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242","first_name":"Reinhold"}],"date_created":"2023-10-18T13:00:54Z","title":"LibriWASN: A Data Set for Meeting Separation, Diarization, and Recognition with Asynchronous Recording Devices","conference":{"name":"ITG Conference on Speech Communication","location":"Aachen"},"quality_controlled":"1","has_accepted_license":"1","year":"2023","citation":{"apa":"Schmalenstroeer, J., Gburrek, T., &#38; Haeb-Umbach, R. (2023). LibriWASN: A Data Set for Meeting Separation, Diarization, and Recognition with Asynchronous Recording Devices. <i>ITG Conference on Speech Communication</i>. ITG Conference on Speech Communication, Aachen.","short":"J. Schmalenstroeer, T. Gburrek, R. Haeb-Umbach, in: ITG Conference on Speech Communication, 2023.","mla":"Schmalenstroeer, Joerg, et al. “LibriWASN: A Data Set for Meeting Separation, Diarization, and Recognition with Asynchronous Recording Devices.” <i>ITG Conference on Speech Communication</i>, 2023.","bibtex":"@inproceedings{Schmalenstroeer_Gburrek_Haeb-Umbach_2023, title={LibriWASN: A Data Set for Meeting Separation, Diarization, and Recognition with Asynchronous Recording Devices}, booktitle={ITG Conference on Speech Communication}, author={Schmalenstroeer, Joerg and Gburrek, Tobias and Haeb-Umbach, Reinhold}, year={2023} }","ama":"Schmalenstroeer J, Gburrek T, Haeb-Umbach R. LibriWASN: A Data Set for Meeting Separation, Diarization, and Recognition with Asynchronous Recording Devices. In: <i>ITG Conference on Speech Communication</i>. ; 2023.","ieee":"J. Schmalenstroeer, T. Gburrek, and R. Haeb-Umbach, “LibriWASN: A Data Set for Meeting Separation, Diarization, and Recognition with Asynchronous Recording Devices,” presented at the ITG Conference on Speech Communication, Aachen, 2023.","chicago":"Schmalenstroeer, Joerg, Tobias Gburrek, and Reinhold Haeb-Umbach. “LibriWASN: A Data Set for Meeting Separation, Diarization, and Recognition with Asynchronous Recording Devices.” In <i>ITG Conference on Speech Communication</i>, 2023."},"_id":"48270","department":[{"_id":"54"}],"user_id":"460","ddc":["004"],"file_date_updated":"2023-11-15T14:48:44Z","language":[{"iso":"eng"}],"publication":"ITG Conference on Speech Communication","type":"conference","status":"public","file":[{"relation":"main_file","content_type":"application/pdf","file_size":2844502,"access_level":"open_access","file_id":"48483","file_name":"SchTgbHaeb2023Final.pdf","date_updated":"2023-11-15T14:48:44Z","creator":"schmalen","date_created":"2023-10-26T08:20:15Z"}]},{"file_date_updated":"2023-10-20T08:20:58Z","_id":"48355","project":[{"grant_number":"438445824","name":"TRR 318 - C06: TRR 318 - Technisch unterstütztes Erklären von Stimmcharakteristika (Teilprojekt C06)","_id":"129"}],"department":[{"_id":"54"},{"_id":"660"}],"user_id":"72602","status":"public","type":"conference","conference":{"location":"Aachen","end_date":"2023-09-22","start_date":"2023-09-20","name":"ITG Conference on Speech Communication"},"main_file_link":[{"url":"https://arxiv.org/abs/2310.12599","open_access":"1"}],"oa":"1","date_updated":"2023-11-22T13:44:33Z","author":[{"id":"72602","full_name":"Rautenberg, Frederik","last_name":"Rautenberg","first_name":"Frederik"},{"last_name":"Kuhlmann","full_name":"Kuhlmann, Michael","id":"49871","first_name":"Michael"},{"first_name":"Jana","last_name":"Wiechmann","full_name":"Wiechmann, Jana"},{"first_name":"Fritz","full_name":"Seebauer, Fritz","last_name":"Seebauer"},{"last_name":"Wagner","full_name":"Wagner, Petra","first_name":"Petra"},{"last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold","first_name":"Reinhold"}],"citation":{"chicago":"Rautenberg, Frederik, Michael Kuhlmann, Jana Wiechmann, Fritz Seebauer, Petra Wagner, and Reinhold Haeb-Umbach. “On Feature Importance and Interpretability of Speaker Representations.” In <i>ITG Conference on Speech Communication</i>, 2023.","ieee":"F. Rautenberg, M. Kuhlmann, J. Wiechmann, F. Seebauer, P. Wagner, and R. Haeb-Umbach, “On Feature Importance and Interpretability of Speaker Representations,” presented at the ITG Conference on Speech Communication, Aachen, 2023.","ama":"Rautenberg F, Kuhlmann M, Wiechmann J, Seebauer F, Wagner P, Haeb-Umbach R. On Feature Importance and Interpretability of Speaker Representations. In: <i>ITG Conference on Speech Communication</i>. ; 2023.","mla":"Rautenberg, Frederik, et al. “On Feature Importance and Interpretability of Speaker Representations.” <i>ITG Conference on Speech Communication</i>, 2023.","short":"F. Rautenberg, M. Kuhlmann, J. Wiechmann, F. Seebauer, P. Wagner, R. Haeb-Umbach, in: ITG Conference on Speech Communication, 2023.","bibtex":"@inproceedings{Rautenberg_Kuhlmann_Wiechmann_Seebauer_Wagner_Haeb-Umbach_2023, title={On Feature Importance and Interpretability of Speaker Representations}, booktitle={ITG Conference on Speech Communication}, author={Rautenberg, Frederik and Kuhlmann, Michael and Wiechmann, Jana and Seebauer, Fritz and Wagner, Petra and Haeb-Umbach, Reinhold}, year={2023} }","apa":"Rautenberg, F., Kuhlmann, M., Wiechmann, J., Seebauer, F., Wagner, P., &#38; Haeb-Umbach, R. (2023). On Feature Importance and Interpretability of Speaker Representations. <i>ITG Conference on Speech Communication</i>. ITG Conference on Speech Communication, Aachen."},"has_accepted_license":"1","ddc":["000"],"language":[{"iso":"eng"}],"external_id":{"arxiv":["2310.12599"]},"abstract":[{"text":"Unsupervised speech disentanglement aims at separating fast varying from\r\nslowly varying components of a speech signal. In this contribution, we take a\r\ncloser look at the embedding vector representing the slowly varying signal\r\ncomponents, commonly named the speaker embedding vector. We ask, which\r\nproperties of a speaker's voice are captured and investigate to which extent do\r\nindividual embedding vector components sign responsible for them, using the\r\nconcept of Shapley values. Our findings show that certain speaker-specific\r\nacoustic-phonetic properties can be fairly well predicted from the speaker\r\nembedding, while the investigated more abstract voice quality features cannot.","lang":"eng"}],"file":[{"access_level":"closed","file_id":"48359","file_name":"arxiv.pdf","file_size":272390,"creator":"frra","date_created":"2023-10-20T08:20:58Z","date_updated":"2023-10-20T08:20:58Z","relation":"main_file","success":1,"content_type":"application/pdf"}],"publication":"ITG Conference on Speech Communication","title":"On Feature Importance and Interpretability of Speaker Representations","date_created":"2023-10-20T08:04:46Z","year":"2023"},{"project":[{"grant_number":"438445824","name":"TRR 318 - C06: TRR 318 - Technisch unterstütztes Erklären von Stimmcharakteristika (Teilprojekt C06)","_id":"129"}],"_id":"48410","user_id":"72602","department":[{"_id":"54"},{"_id":"660"}],"ddc":["040"],"language":[{"iso":"eng"}],"file_date_updated":"2023-10-24T08:03:27Z","type":"conference","publication":"20th International Congress of the Phonetic Sciences (ICPhS) ","file":[{"success":1,"relation":"main_file","content_type":"application/pdf","file_size":209980,"access_level":"closed","file_id":"48413","file_name":"188.pdf","date_updated":"2023-10-24T08:03:27Z","date_created":"2023-10-24T08:03:27Z","creator":"frra"}],"status":"public","date_updated":"2023-11-22T13:44:59Z","oa":"1","date_created":"2023-10-24T08:05:40Z","author":[{"full_name":"Wiechmann, Jana","last_name":"Wiechmann","first_name":"Jana"},{"last_name":"Rautenberg","full_name":"Rautenberg, Frederik","id":"72602","first_name":"Frederik"},{"first_name":"Petra","full_name":"Wagner, Petra","last_name":"Wagner"},{"first_name":"Reinhold","id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach"}],"title":"Explaining voice characteristics to novice voice practitioners-How successful is it?","main_file_link":[{"open_access":"1"}],"conference":{"start_date":"2023-08-07","end_date":"2023-08-11"},"has_accepted_license":"1","year":"2023","citation":{"apa":"Wiechmann, J., Rautenberg, F., Wagner, P., &#38; Haeb-Umbach, R. (2023). Explaining voice characteristics to novice voice practitioners-How successful is it? <i>20th International Congress of the Phonetic Sciences (ICPhS) </i>.","bibtex":"@inproceedings{Wiechmann_Rautenberg_Wagner_Haeb-Umbach_2023, title={Explaining voice characteristics to novice voice practitioners-How successful is it?}, booktitle={20th International Congress of the Phonetic Sciences (ICPhS) }, author={Wiechmann, Jana and Rautenberg, Frederik and Wagner, Petra and Haeb-Umbach, Reinhold}, year={2023} }","short":"J. Wiechmann, F. Rautenberg, P. Wagner, R. Haeb-Umbach, in: 20th International Congress of the Phonetic Sciences (ICPhS) , 2023.","mla":"Wiechmann, Jana, et al. “Explaining Voice Characteristics to Novice Voice Practitioners-How Successful Is It?” <i>20th International Congress of the Phonetic Sciences (ICPhS) </i>, 2023.","chicago":"Wiechmann, Jana, Frederik Rautenberg, Petra Wagner, and Reinhold Haeb-Umbach. “Explaining Voice Characteristics to Novice Voice Practitioners-How Successful Is It?” In <i>20th International Congress of the Phonetic Sciences (ICPhS) </i>, 2023.","ieee":"J. Wiechmann, F. Rautenberg, P. Wagner, and R. Haeb-Umbach, “Explaining voice characteristics to novice voice practitioners-How successful is it?,” 2023.","ama":"Wiechmann J, Rautenberg F, Wagner P, Haeb-Umbach R. Explaining voice characteristics to novice voice practitioners-How successful is it? In: <i>20th International Congress of the Phonetic Sciences (ICPhS) </i>. ; 2023."}},{"year":"2023","citation":{"ama":"Seebauer F, Kuhlmann M, Haeb-Umbach R, Wagner P. Re-examining the quality dimensions of synthetic speech. In: <i>12th Speech Synthesis Workshop (SSW) 2023</i>. ; 2023.","ieee":"F. Seebauer, M. Kuhlmann, R. Haeb-Umbach, and P. Wagner, “Re-examining the quality dimensions of synthetic speech,” 2023.","chicago":"Seebauer, Fritz, Michael Kuhlmann, Reinhold Haeb-Umbach, and Petra Wagner. “Re-Examining the Quality Dimensions of Synthetic Speech.” In <i>12th Speech Synthesis Workshop (SSW) 2023</i>, 2023.","short":"F. Seebauer, M. Kuhlmann, R. Haeb-Umbach, P. Wagner, in: 12th Speech Synthesis Workshop (SSW) 2023, 2023.","bibtex":"@inproceedings{Seebauer_Kuhlmann_Haeb-Umbach_Wagner_2023, title={Re-examining the quality dimensions of synthetic speech}, booktitle={12th Speech Synthesis Workshop (SSW) 2023}, author={Seebauer, Fritz and Kuhlmann, Michael and Haeb-Umbach, Reinhold and Wagner, Petra}, year={2023} }","mla":"Seebauer, Fritz, et al. “Re-Examining the Quality Dimensions of Synthetic Speech.” <i>12th Speech Synthesis Workshop (SSW) 2023</i>, 2023.","apa":"Seebauer, F., Kuhlmann, M., Haeb-Umbach, R., &#38; Wagner, P. (2023). Re-examining the quality dimensions of synthetic speech. <i>12th Speech Synthesis Workshop (SSW) 2023</i>."},"has_accepted_license":"1","title":"Re-examining the quality dimensions of synthetic speech","date_updated":"2023-10-25T08:42:56Z","date_created":"2023-07-15T16:10:20Z","author":[{"first_name":"Fritz","full_name":"Seebauer, Fritz","last_name":"Seebauer"},{"first_name":"Michael","id":"49871","full_name":"Kuhlmann, Michael","last_name":"Kuhlmann"},{"first_name":"Reinhold","id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach"},{"full_name":"Wagner, Petra","last_name":"Wagner","first_name":"Petra"}],"status":"public","type":"conference","publication":"12th Speech Synthesis Workshop (SSW) 2023","language":[{"iso":"eng"}],"project":[{"grant_number":"438445824","_id":"129","name":"TRR 318 - C06: TRR 318 - Technisch unterstütztes Erklären von Stimmcharakteristika (Teilprojekt C06)"}],"_id":"46069","user_id":"242","department":[{"_id":"54"}]},{"publication":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","abstract":[{"lang":"eng","text":"Continuous Speech Separation (CSS) has been proposed to address speech overlaps during the analysis of realistic meeting-like conversations by eliminating any overlaps before further processing.\r\nCSS separates a recording of arbitrarily many speakers into a small number of overlap-free output channels, where each output channel may contain speech of multiple speakers.\r\nThis is often done by applying a conventional separation model trained with Utterance-level Permutation Invariant Training (uPIT), which exclusively maps a speaker to an output channel, in sliding window approach called stitching.\r\nRecently, we introduced an alternative training scheme called Graph-PIT that teaches the separation network to directly produce output streams in the required format without stitching.\r\nIt can handle an arbitrary number of speakers as long as never more of them overlap at the same time than the separator has output channels.\r\nIn this contribution, we further investigate the Graph-PIT training scheme.\r\nWe show in extended experiments that models trained with Graph-PIT also work in challenging reverberant conditions.\r\nModels trained in this way are able to perform segment-less CSS, i.e., without stitching, and achieve comparable and often better separation quality than the conventional CSS with uPIT and stitching.\r\nWe simplify the training schedule for Graph-PIT with the recently proposed Source Aggregated Signal-to-Distortion Ratio (SA-SDR) loss.\r\nIt eliminates unfavorable properties of the previously used A-SDR loss and thus enables training with Graph-PIT from scratch.\r\nGraph-PIT training relaxes the constraints w.r.t. the allowed numbers of speakers and speaking patterns which allows using a larger variety of training data.\r\nFurthermore, we introduce novel signal-level evaluation metrics for meeting scenarios, namely the source-aggregated scale- and convolution-invariant Signal-to-Distortion Ratio (SA-SI-SDR and SA-CI-SDR), which are generalizations of the commonly used SDR-based metrics for the CSS case."}],"file":[{"file_size":7185077,"access_level":"open_access","file_id":"35607","file_name":"main.pdf","date_updated":"2023-01-11T08:50:19Z","date_created":"2023-01-09T17:46:05Z","creator":"haebumb","relation":"main_file","content_type":"application/pdf"}],"ddc":["000"],"keyword":["Continuous Speech Separation","Source Separation","Graph-PIT","Dynamic Programming","Permutation Invariant Training"],"language":[{"iso":"eng"}],"quality_controlled":"1","year":"2023","publisher":"Institute of Electrical and Electronics Engineers (IEEE)","date_created":"2023-01-09T17:24:17Z","title":"Segment-Less Continuous Speech Separation of Meetings: Training and Evaluation Criteria","type":"journal_article","status":"public","project":[{"_id":"52","name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"_id":"35602","user_id":"49870","department":[{"_id":"54"}],"article_type":"original","file_date_updated":"2023-01-11T08:50:19Z","publication_status":"published","publication_identifier":{"issn":["2329-9290","2329-9304"]},"has_accepted_license":"1","citation":{"bibtex":"@article{von Neumann_Kinoshita_Boeddeker_Delcroix_Haeb-Umbach_2023, title={Segment-Less Continuous Speech Separation of Meetings: Training and Evaluation Criteria}, volume={31}, DOI={<a href=\"https://doi.org/10.1109/taslp.2022.3228629\">10.1109/taslp.2022.3228629</a>}, journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={von Neumann, Thilo and Kinoshita, Keisuke and Boeddeker, Christoph and Delcroix, Marc and Haeb-Umbach, Reinhold}, year={2023}, pages={576–589} }","short":"T. von Neumann, K. Kinoshita, C. Boeddeker, M. Delcroix, R. Haeb-Umbach, IEEE/ACM Transactions on Audio, Speech, and Language Processing 31 (2023) 576–589.","mla":"von Neumann, Thilo, et al. “Segment-Less Continuous Speech Separation of Meetings: Training and Evaluation Criteria.” <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i>, vol. 31, Institute of Electrical and Electronics Engineers (IEEE), 2023, pp. 576–89, doi:<a href=\"https://doi.org/10.1109/taslp.2022.3228629\">10.1109/taslp.2022.3228629</a>.","apa":"von Neumann, T., Kinoshita, K., Boeddeker, C., Delcroix, M., &#38; Haeb-Umbach, R. (2023). Segment-Less Continuous Speech Separation of Meetings: Training and Evaluation Criteria. <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i>, <i>31</i>, 576–589. <a href=\"https://doi.org/10.1109/taslp.2022.3228629\">https://doi.org/10.1109/taslp.2022.3228629</a>","ieee":"T. von Neumann, K. Kinoshita, C. Boeddeker, M. Delcroix, and R. Haeb-Umbach, “Segment-Less Continuous Speech Separation of Meetings: Training and Evaluation Criteria,” <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i>, vol. 31, pp. 576–589, 2023, doi: <a href=\"https://doi.org/10.1109/taslp.2022.3228629\">10.1109/taslp.2022.3228629</a>.","chicago":"Neumann, Thilo von, Keisuke Kinoshita, Christoph Boeddeker, Marc Delcroix, and Reinhold Haeb-Umbach. “Segment-Less Continuous Speech Separation of Meetings: Training and Evaluation Criteria.” <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i> 31 (2023): 576–89. <a href=\"https://doi.org/10.1109/taslp.2022.3228629\">https://doi.org/10.1109/taslp.2022.3228629</a>.","ama":"von Neumann T, Kinoshita K, Boeddeker C, Delcroix M, Haeb-Umbach R. Segment-Less Continuous Speech Separation of Meetings: Training and Evaluation Criteria. <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i>. 2023;31:576-589. doi:<a href=\"https://doi.org/10.1109/taslp.2022.3228629\">10.1109/taslp.2022.3228629</a>"},"intvolume":"        31","page":"576-589","date_updated":"2023-11-15T12:16:11Z","oa":"1","author":[{"full_name":"von Neumann, Thilo","id":"49870","orcid":"https://orcid.org/0000-0002-7717-8670","last_name":"von Neumann","first_name":"Thilo"},{"first_name":"Keisuke","full_name":"Kinoshita, Keisuke","last_name":"Kinoshita"},{"first_name":"Christoph","id":"40767","full_name":"Boeddeker, Christoph","last_name":"Boeddeker"},{"first_name":"Marc","full_name":"Delcroix, Marc","last_name":"Delcroix"},{"first_name":"Reinhold","id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach"}],"volume":31,"doi":"10.1109/taslp.2022.3228629"},{"file_date_updated":"2023-11-22T07:58:49Z","_id":"49109","department":[{"_id":"54"}],"user_id":"460","status":"public","type":"conference","conference":{"name":"57th Asilomar Conference on Signals, Systems, and Computers","start_date":"2023-10-31","end_date":"2023-11-01"},"date_updated":"2023-11-22T07:58:49Z","oa":"1","author":[{"full_name":"Gburrek, Tobias","id":"44006","last_name":"Gburrek","first_name":"Tobias"},{"last_name":"Schmalenstroeer","id":"460","full_name":"Schmalenstroeer, Joerg","first_name":"Joerg"},{"first_name":"Reinhold","full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach"}],"citation":{"ieee":"T. Gburrek, J. Schmalenstroeer, and R. Haeb-Umbach, “Spatial Diarization for Meeting Transcription with Ad-Hoc Acoustic Sensor Networks,” presented at the 57th Asilomar Conference on Signals, Systems, and Computers, 2023.","chicago":"Gburrek, Tobias, Joerg Schmalenstroeer, and Reinhold Haeb-Umbach. “Spatial Diarization for Meeting Transcription with Ad-Hoc Acoustic Sensor Networks.” In <i>Proc. Asilomar Conference on Signals, Systems, and Computers</i>, 2023.","ama":"Gburrek T, Schmalenstroeer J, Haeb-Umbach R. Spatial Diarization for Meeting Transcription with Ad-Hoc Acoustic Sensor Networks. In: <i>Proc. Asilomar Conference on Signals, Systems, and Computers</i>. ; 2023.","apa":"Gburrek, T., Schmalenstroeer, J., &#38; Haeb-Umbach, R. (2023). Spatial Diarization for Meeting Transcription with Ad-Hoc Acoustic Sensor Networks. <i>Proc. Asilomar Conference on Signals, Systems, and Computers</i>. 57th Asilomar Conference on Signals, Systems, and Computers.","mla":"Gburrek, Tobias, et al. “Spatial Diarization for Meeting Transcription with Ad-Hoc Acoustic Sensor Networks.” <i>Proc. Asilomar Conference on Signals, Systems, and Computers</i>, 2023.","bibtex":"@inproceedings{Gburrek_Schmalenstroeer_Haeb-Umbach_2023, title={Spatial Diarization for Meeting Transcription with Ad-Hoc Acoustic Sensor Networks}, booktitle={Proc. Asilomar Conference on Signals, Systems, and Computers}, author={Gburrek, Tobias and Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}, year={2023} }","short":"T. Gburrek, J. Schmalenstroeer, R. Haeb-Umbach, in: Proc. Asilomar Conference on Signals, Systems, and Computers, 2023."},"has_accepted_license":"1","keyword":["Diarization","time difference of arrival","ad-hoc acoustic sensor network","meeting transcription"],"ddc":["004"],"language":[{"iso":"eng"}],"abstract":[{"text":"We propose a diarization system, that estimates “who spoke when” based on spatial information, to be used as a front-end of a meeting transcription system running on the signals gathered from an acoustic sensor network (ASN). Although the\r\nspatial distribution of the microphones is advantageous, exploiting the spatial diversity for diarization and signal enhancement is challenging, because the microphones’ positions are typically unknown, and the recorded signals are initially unsynchronized in general. Here, we approach these issues by first blindly synchronizing the signals and then estimating time differences of arrival (TDOAs). The TDOA information is exploited to estimate the speakers’ activity, even in the presence of multiple speakers being simultaneously active. This speaker activity information serves as a guide for a spatial mixture model, on which basis the individual speaker’s signals are extracted via beamforming. Finally, the extracted signals are forwarded to a speech recognizer. Additionally, a novel initialization scheme for spatial mixture models based on the TDOA estimates is proposed. Experiments conducted on real recordings from the LibriWASN data set have shown that our proposed system is advantageous compared to a system using a spatial mixture model, which does not make use\r\nof external diarization information.","lang":"eng"}],"file":[{"content_type":"application/pdf","relation":"main_file","creator":"schmalen","date_created":"2023-11-22T07:51:18Z","date_updated":"2023-11-22T07:58:49Z","file_name":"asilomar.pdf","access_level":"open_access","file_id":"49110","file_size":212317}],"publication":"Proc. Asilomar Conference on Signals, Systems, and Computers","title":"Spatial Diarization for Meeting Transcription with Ad-Hoc Acoustic Sensor Networks","date_created":"2023-11-22T07:52:29Z","year":"2023","quality_controlled":"1"},{"main_file_link":[{"open_access":"1","url":"https://pub.dega-akustik.de/DAGA_2023/data/articles/000105.pdf"}],"conference":{"start_date":"2023-03-06","name":"DAGA 2023 - 49. Jahrestagung für Akustik","location":"Hamburg","end_date":"2023-03-09"},"title":"Speech Disentanglement for Analysis and Modification of Acoustic and Perceptual Speaker Characteristics","date_created":"2023-05-15T08:48:54Z","author":[{"first_name":"Frederik","full_name":"Rautenberg, Frederik","id":"72602","last_name":"Rautenberg"},{"first_name":"Michael","full_name":"Kuhlmann, Michael","id":"49871","last_name":"Kuhlmann"},{"full_name":"Ebbers, Janek","id":"34851","last_name":"Ebbers","first_name":"Janek"},{"last_name":"Wiechmann","full_name":"Wiechmann, Jana","first_name":"Jana"},{"first_name":"Fritz","full_name":"Seebauer, Fritz","last_name":"Seebauer"},{"full_name":"Wagner, Petra","last_name":"Wagner","first_name":"Petra"},{"first_name":"Reinhold","full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach"}],"date_updated":"2024-02-29T17:05:16Z","oa":"1","citation":{"apa":"Rautenberg, F., Kuhlmann, M., Ebbers, J., Wiechmann, J., Seebauer, F., Wagner, P., &#38; Haeb-Umbach, R. (2023). Speech Disentanglement for Analysis and Modification of Acoustic and Perceptual Speaker Characteristics. <i>Fortschritte Der Akustik - DAGA 2023</i>, 1409–1412.","bibtex":"@inproceedings{Rautenberg_Kuhlmann_Ebbers_Wiechmann_Seebauer_Wagner_Haeb-Umbach_2023, title={Speech Disentanglement for Analysis and Modification of Acoustic and Perceptual Speaker Characteristics}, booktitle={Fortschritte der Akustik - DAGA 2023}, author={Rautenberg, Frederik and Kuhlmann, Michael and Ebbers, Janek and Wiechmann, Jana and Seebauer, Fritz and Wagner, Petra and Haeb-Umbach, Reinhold}, year={2023}, pages={1409–1412} }","mla":"Rautenberg, Frederik, et al. “Speech Disentanglement for Analysis and Modification of Acoustic and Perceptual Speaker Characteristics.” <i>Fortschritte Der Akustik - DAGA 2023</i>, 2023, pp. 1409–12.","short":"F. Rautenberg, M. Kuhlmann, J. Ebbers, J. Wiechmann, F. Seebauer, P. Wagner, R. Haeb-Umbach, in: Fortschritte Der Akustik - DAGA 2023, 2023, pp. 1409–1412.","ama":"Rautenberg F, Kuhlmann M, Ebbers J, et al. Speech Disentanglement for Analysis and Modification of Acoustic and Perceptual Speaker Characteristics. In: <i>Fortschritte Der Akustik - DAGA 2023</i>. ; 2023:1409-1412.","ieee":"F. Rautenberg <i>et al.</i>, “Speech Disentanglement for Analysis and Modification of Acoustic and Perceptual Speaker Characteristics,” in <i>Fortschritte der Akustik - DAGA 2023</i>, Hamburg, 2023, pp. 1409–1412.","chicago":"Rautenberg, Frederik, Michael Kuhlmann, Janek Ebbers, Jana Wiechmann, Fritz Seebauer, Petra Wagner, and Reinhold Haeb-Umbach. “Speech Disentanglement for Analysis and Modification of Acoustic and Perceptual Speaker Characteristics.” In <i>Fortschritte Der Akustik - DAGA 2023</i>, 1409–12, 2023."},"page":"1409-1412","year":"2023","publication_status":"published","has_accepted_license":"1","language":[{"iso":"eng"}],"file_date_updated":"2024-02-29T16:15:12Z","ddc":["000"],"user_id":"72602","department":[{"_id":"54"},{"_id":"660"}],"project":[{"name":"TRR 318 - C06: TRR 318 - Technisch unterstütztes Erklären von Stimmcharakteristika (Teilprojekt C06)","_id":"129","grant_number":"438445824"}],"_id":"44849","file":[{"file_size":289493,"access_level":"open_access","file_id":"52221","file_name":"Daga_2023_Rautenberg_Paper.pdf","date_updated":"2024-02-29T16:15:12Z","date_created":"2024-02-29T16:15:12Z","creator":"frra","relation":"main_file","content_type":"application/pdf"}],"status":"public","type":"conference","publication":"Fortschritte der Akustik - DAGA 2023"}]