[{"title":"On Synchronization of Wireless Acoustic Sensor Networks in the Presence of Time-Varying Sampling Rate Offsets and Speaker Changes","date_created":"2022-10-18T09:25:51Z","publisher":"IEEE","year":"2022","quality_controlled":"1","language":[{"iso":"eng"}],"ddc":["004"],"file":[{"content_type":"application/pdf","relation":"main_file","date_updated":"2023-11-17T06:39:04Z","creator":"tgburrek","date_created":"2023-11-17T06:39:04Z","file_size":358015,"access_level":"open_access","file_id":"48990","file_name":"gburrek_icassp22.pdf"}],"publication":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","doi":"10.1109/icassp43922.2022.9746284","author":[{"first_name":"Tobias","id":"44006","full_name":"Gburrek, Tobias","last_name":"Gburrek"},{"full_name":"Schmalenstroeer, Joerg","id":"460","last_name":"Schmalenstroeer","first_name":"Joerg"},{"first_name":"Reinhold","full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach"}],"date_updated":"2023-11-17T06:39:28Z","oa":"1","citation":{"bibtex":"@inproceedings{Gburrek_Schmalenstroeer_Haeb-Umbach_2022, title={On Synchronization of Wireless Acoustic Sensor Networks in the Presence of Time-Varying Sampling Rate Offsets and Speaker Changes}, DOI={<a href=\"https://doi.org/10.1109/icassp43922.2022.9746284\">10.1109/icassp43922.2022.9746284</a>}, booktitle={ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, publisher={IEEE}, author={Gburrek, Tobias and Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}, year={2022} }","mla":"Gburrek, Tobias, et al. “On Synchronization of Wireless Acoustic Sensor Networks in the Presence of Time-Varying Sampling Rate Offsets and Speaker Changes.” <i>ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, IEEE, 2022, doi:<a href=\"https://doi.org/10.1109/icassp43922.2022.9746284\">10.1109/icassp43922.2022.9746284</a>.","short":"T. Gburrek, J. Schmalenstroeer, R. Haeb-Umbach, in: ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE, 2022.","apa":"Gburrek, T., Schmalenstroeer, J., &#38; Haeb-Umbach, R. (2022). On Synchronization of Wireless Acoustic Sensor Networks in the Presence of Time-Varying Sampling Rate Offsets and Speaker Changes. <i>ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. <a href=\"https://doi.org/10.1109/icassp43922.2022.9746284\">https://doi.org/10.1109/icassp43922.2022.9746284</a>","ieee":"T. Gburrek, J. Schmalenstroeer, and R. Haeb-Umbach, “On Synchronization of Wireless Acoustic Sensor Networks in the Presence of Time-Varying Sampling Rate Offsets and Speaker Changes,” 2022, doi: <a href=\"https://doi.org/10.1109/icassp43922.2022.9746284\">10.1109/icassp43922.2022.9746284</a>.","chicago":"Gburrek, Tobias, Joerg Schmalenstroeer, and Reinhold Haeb-Umbach. “On Synchronization of Wireless Acoustic Sensor Networks in the Presence of Time-Varying Sampling Rate Offsets and Speaker Changes.” In <i>ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. IEEE, 2022. <a href=\"https://doi.org/10.1109/icassp43922.2022.9746284\">https://doi.org/10.1109/icassp43922.2022.9746284</a>.","ama":"Gburrek T, Schmalenstroeer J, Haeb-Umbach R. On Synchronization of Wireless Acoustic Sensor Networks in the Presence of Time-Varying Sampling Rate Offsets and Speaker Changes. In: <i>ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. IEEE; 2022. doi:<a href=\"https://doi.org/10.1109/icassp43922.2022.9746284\">10.1109/icassp43922.2022.9746284</a>"},"has_accepted_license":"1","publication_status":"published","file_date_updated":"2023-11-17T06:39:04Z","department":[{"_id":"54"}],"user_id":"44006","_id":"33807","status":"public","type":"conference"},{"citation":{"bibtex":"@article{Grimm_Fei_Warsitz_Farhoud_Breddermann_Haeb-Umbach_2022, title={Warping of Radar Data Into Camera Image for Cross-Modal Supervision in Automotive Applications}, volume={71}, DOI={<a href=\"https://doi.org/10.1109/TVT.2022.3182411\">10.1109/TVT.2022.3182411</a>}, number={9}, journal={IEEE Transactions on Vehicular Technology}, author={Grimm, Christopher and Fei, Tai and Warsitz, Ernst and Farhoud, Ridha and Breddermann, Tobias and Haeb-Umbach, Reinhold}, year={2022}, pages={9435–9449} }","short":"C. Grimm, T. Fei, E. Warsitz, R. Farhoud, T. Breddermann, R. Haeb-Umbach, IEEE Transactions on Vehicular Technology 71 (2022) 9435–9449.","mla":"Grimm, Christopher, et al. “Warping of Radar Data Into Camera Image for Cross-Modal Supervision in Automotive Applications.” <i>IEEE Transactions on Vehicular Technology</i>, vol. 71, no. 9, 2022, pp. 9435–49, doi:<a href=\"https://doi.org/10.1109/TVT.2022.3182411\">10.1109/TVT.2022.3182411</a>.","apa":"Grimm, C., Fei, T., Warsitz, E., Farhoud, R., Breddermann, T., &#38; Haeb-Umbach, R. (2022). Warping of Radar Data Into Camera Image for Cross-Modal Supervision in Automotive Applications. <i>IEEE Transactions on Vehicular Technology</i>, <i>71</i>(9), 9435–9449. <a href=\"https://doi.org/10.1109/TVT.2022.3182411\">https://doi.org/10.1109/TVT.2022.3182411</a>","ama":"Grimm C, Fei T, Warsitz E, Farhoud R, Breddermann T, Haeb-Umbach R. Warping of Radar Data Into Camera Image for Cross-Modal Supervision in Automotive Applications. <i>IEEE Transactions on Vehicular Technology</i>. 2022;71(9):9435-9449. doi:<a href=\"https://doi.org/10.1109/TVT.2022.3182411\">10.1109/TVT.2022.3182411</a>","chicago":"Grimm, Christopher, Tai Fei, Ernst Warsitz, Ridha Farhoud, Tobias Breddermann, and Reinhold Haeb-Umbach. “Warping of Radar Data Into Camera Image for Cross-Modal Supervision in Automotive Applications.” <i>IEEE Transactions on Vehicular Technology</i> 71, no. 9 (2022): 9435–49. <a href=\"https://doi.org/10.1109/TVT.2022.3182411\">https://doi.org/10.1109/TVT.2022.3182411</a>.","ieee":"C. Grimm, T. Fei, E. Warsitz, R. Farhoud, T. Breddermann, and R. Haeb-Umbach, “Warping of Radar Data Into Camera Image for Cross-Modal Supervision in Automotive Applications,” <i>IEEE Transactions on Vehicular Technology</i>, vol. 71, no. 9, pp. 9435–9449, 2022, doi: <a href=\"https://doi.org/10.1109/TVT.2022.3182411\">10.1109/TVT.2022.3182411</a>."},"intvolume":"        71","page":"9435-9449","has_accepted_license":"1","doi":"10.1109/TVT.2022.3182411","author":[{"full_name":"Grimm, Christopher","last_name":"Grimm","first_name":"Christopher"},{"full_name":"Fei, Tai","last_name":"Fei","first_name":"Tai"},{"last_name":"Warsitz","full_name":"Warsitz, Ernst","first_name":"Ernst"},{"first_name":"Ridha","last_name":"Farhoud","full_name":"Farhoud, Ridha"},{"first_name":"Tobias","last_name":"Breddermann","full_name":"Breddermann, Tobias"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242"}],"volume":71,"oa":"1","date_updated":"2023-11-20T16:37:16Z","status":"public","type":"journal_article","file_date_updated":"2022-09-22T07:00:29Z","user_id":"242","department":[{"_id":"54"}],"_id":"33451","year":"2022","issue":"9","quality_controlled":"1","title":"Warping of Radar Data Into Camera Image for Cross-Modal Supervision in Automotive Applications","date_created":"2022-09-21T07:26:19Z","file":[{"relation":"main_file","content_type":"application/pdf","file_size":12117870,"file_name":"T-VT_AcceptedVersion.pdf","access_level":"open_access","file_id":"33460","date_updated":"2022-09-22T07:00:29Z","creator":"huesera","date_created":"2022-09-22T07:00:29Z"}],"abstract":[{"text":"We present an approach to automatically generate semantic labels for real recordings of automotive range-Doppler (RD) radar spectra. Such labels are required when training a neural network for object recognition from radar data. The automatic labeling approach rests on the simultaneous recording of camera and lidar data in addition to the radar spectrum. By warping radar spectra into the camera image, state-of-the-art object recognition algorithms can be applied to label relevant objects, such as cars, in the camera image. The warping operation is designed to be fully differentiable, which allows backpropagating the gradient computed on the camera image through the warping operation to the neural network operating on the radar data. As the warping operation relies on accurate scene flow estimation, we further propose a novel scene flow estimation algorithm which exploits information from camera, lidar and radar sensors. The\r\nproposed scene flow estimation approach is compared against a state-of-the-art scene flow algorithm, and it outperforms it by approximately 30% w.r.t. mean average error. The feasibility of the overall framework for automatic label generation for\r\nRD spectra is verified by evaluating the performance of neural networks trained with the proposed framework for Direction-of-Arrival estimation.","lang":"eng"}],"publication":"IEEE Transactions on Vehicular Technology","language":[{"iso":"eng"}],"ddc":["000"]},{"conference":{"start_date":"2022-10-06","end_date":"2022-10-07","location":"Bielefeld"},"title":"Technically enabled explaining of voice characteristics","date_created":"2022-10-12T07:10:03Z","author":[{"first_name":"Jana","full_name":"Wiechmann, Jana","last_name":"Wiechmann"},{"first_name":"Thomas","last_name":"Glarner","full_name":"Glarner, Thomas"},{"first_name":"Frederik","id":"72602","full_name":"Rautenberg, Frederik","last_name":"Rautenberg"},{"full_name":"Wagner, Petra","last_name":"Wagner","first_name":"Petra"},{"first_name":"Reinhold","id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach"}],"oa":"1","date_updated":"2023-11-22T13:45:30Z","citation":{"ama":"Wiechmann J, Glarner T, Rautenberg F, Wagner P, Haeb-Umbach R. Technically enabled explaining of voice characteristics. In: <i>18. Phonetik Und Phonologie Im Deutschsprachigen Raum (P&#38;P)</i>. ; 2022.","chicago":"Wiechmann, Jana, Thomas Glarner, Frederik Rautenberg, Petra Wagner, and Reinhold Haeb-Umbach. “Technically Enabled Explaining of Voice Characteristics.” In <i>18. Phonetik Und Phonologie Im Deutschsprachigen Raum (P&#38;P)</i>, 2022.","ieee":"J. Wiechmann, T. Glarner, F. Rautenberg, P. Wagner, and R. Haeb-Umbach, “Technically enabled explaining of voice characteristics,” Bielefeld, 2022.","apa":"Wiechmann, J., Glarner, T., Rautenberg, F., Wagner, P., &#38; Haeb-Umbach, R. (2022). Technically enabled explaining of voice characteristics. <i>18. Phonetik Und Phonologie Im Deutschsprachigen Raum (P&#38;P)</i>.","bibtex":"@inproceedings{Wiechmann_Glarner_Rautenberg_Wagner_Haeb-Umbach_2022, title={Technically enabled explaining of voice characteristics}, booktitle={18. Phonetik und Phonologie im deutschsprachigen Raum (P&#38;P)}, author={Wiechmann, Jana and Glarner, Thomas and Rautenberg, Frederik and Wagner, Petra and Haeb-Umbach, Reinhold}, year={2022} }","short":"J. Wiechmann, T. Glarner, F. Rautenberg, P. Wagner, R. Haeb-Umbach, in: 18. Phonetik Und Phonologie Im Deutschsprachigen Raum (P&#38;P), 2022.","mla":"Wiechmann, Jana, et al. “Technically Enabled Explaining of Voice Characteristics.” <i>18. Phonetik Und Phonologie Im Deutschsprachigen Raum (P&#38;P)</i>, 2022."},"year":"2022","has_accepted_license":"1","language":[{"iso":"eng"}],"file_date_updated":"2022-10-12T07:09:26Z","ddc":["000"],"department":[{"_id":"54"},{"_id":"660"}],"user_id":"72602","_id":"33696","project":[{"_id":"129","name":"TRR 318 - C06: TRR 318 - Technisch unterstütztes Erklären von Stimmcharakteristika (Teilprojekt C06)","grant_number":"438445824"}],"status":"public","file":[{"relation":"main_file","content_type":"application/pdf","file_size":109294,"access_level":"open_access","file_id":"33697","file_name":"PP_2022_paper_8911.pdf","date_updated":"2022-10-12T07:09:26Z","creator":"huesera","date_created":"2022-10-12T07:09:26Z"}],"publication":"18. Phonetik und Phonologie im deutschsprachigen Raum (P&P)","type":"conference"},{"language":[{"iso":"eng"}],"ddc":["000"],"publication":"Interspeech 2022","file":[{"success":1,"relation":"main_file","content_type":"application/pdf","file_size":303863,"file_id":"46070","access_level":"closed","file_name":"kuhlmann22_interspeech.pdf","date_updated":"2023-07-15T16:16:12Z","creator":"mikuhl","date_created":"2023-07-15T16:16:12Z"}],"date_created":"2022-10-21T06:50:59Z","publisher":"ISCA","title":"Investigation into Target Speaking Rate Adaptation for Voice Conversion","quality_controlled":"1","year":"2022","user_id":"34851","department":[{"_id":"54"}],"project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"_id":"33857","file_date_updated":"2023-07-15T16:16:12Z","type":"conference","status":"public","author":[{"full_name":"Kuhlmann, Michael","id":"49871","last_name":"Kuhlmann","first_name":"Michael"},{"last_name":"Seebauer","full_name":"Seebauer, Fritz","first_name":"Fritz"},{"first_name":"Janek","id":"34851","full_name":"Ebbers, Janek","last_name":"Ebbers"},{"last_name":"Wagner","full_name":"Wagner, Petra","first_name":"Petra"},{"first_name":"Reinhold","full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach"}],"date_updated":"2023-10-25T09:04:45Z","oa":"1","main_file_link":[{"url":"https://www.isca-speech.org/archive/pdfs/interspeech_2022/kuhlmann22_interspeech.pdf","open_access":"1"}],"doi":"10.21437/interspeech.2022-10740","publication_status":"published","has_accepted_license":"1","citation":{"chicago":"Kuhlmann, Michael, Fritz Seebauer, Janek Ebbers, Petra Wagner, and Reinhold Haeb-Umbach. “Investigation into Target Speaking Rate Adaptation for Voice Conversion.” In <i>Interspeech 2022</i>. ISCA, 2022. <a href=\"https://doi.org/10.21437/interspeech.2022-10740\">https://doi.org/10.21437/interspeech.2022-10740</a>.","ieee":"M. Kuhlmann, F. Seebauer, J. Ebbers, P. Wagner, and R. Haeb-Umbach, “Investigation into Target Speaking Rate Adaptation for Voice Conversion,” 2022, doi: <a href=\"https://doi.org/10.21437/interspeech.2022-10740\">10.21437/interspeech.2022-10740</a>.","ama":"Kuhlmann M, Seebauer F, Ebbers J, Wagner P, Haeb-Umbach R. Investigation into Target Speaking Rate Adaptation for Voice Conversion. In: <i>Interspeech 2022</i>. ISCA; 2022. doi:<a href=\"https://doi.org/10.21437/interspeech.2022-10740\">10.21437/interspeech.2022-10740</a>","apa":"Kuhlmann, M., Seebauer, F., Ebbers, J., Wagner, P., &#38; Haeb-Umbach, R. (2022). Investigation into Target Speaking Rate Adaptation for Voice Conversion. <i>Interspeech 2022</i>. <a href=\"https://doi.org/10.21437/interspeech.2022-10740\">https://doi.org/10.21437/interspeech.2022-10740</a>","short":"M. Kuhlmann, F. Seebauer, J. Ebbers, P. Wagner, R. Haeb-Umbach, in: Interspeech 2022, ISCA, 2022.","bibtex":"@inproceedings{Kuhlmann_Seebauer_Ebbers_Wagner_Haeb-Umbach_2022, title={Investigation into Target Speaking Rate Adaptation for Voice Conversion}, DOI={<a href=\"https://doi.org/10.21437/interspeech.2022-10740\">10.21437/interspeech.2022-10740</a>}, booktitle={Interspeech 2022}, publisher={ISCA}, author={Kuhlmann, Michael and Seebauer, Fritz and Ebbers, Janek and Wagner, Petra and Haeb-Umbach, Reinhold}, year={2022} }","mla":"Kuhlmann, Michael, et al. “Investigation into Target Speaking Rate Adaptation for Voice Conversion.” <i>Interspeech 2022</i>, ISCA, 2022, doi:<a href=\"https://doi.org/10.21437/interspeech.2022-10740\">10.21437/interspeech.2022-10740</a>."}},{"file_date_updated":"2023-11-17T06:40:40Z","_id":"33808","project":[{"_id":"52","name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"department":[{"_id":"54"}],"user_id":"44006","status":"public","type":"conference","doi":"10.1109/IWAENC53105.2022.9914772","conference":{"start_date":"2022-09-05","name":"17th International Workshop on Acoustic Signal Enhancement (IWAENC 2022)","location":" Bamberg, Germany ","end_date":"2022-09-08"},"oa":"1","date_updated":"2023-11-17T06:40:58Z","author":[{"full_name":"Gburrek, Tobias","id":"44006","last_name":"Gburrek","first_name":"Tobias"},{"first_name":"Joerg","id":"460","full_name":"Schmalenstroeer, Joerg","last_name":"Schmalenstroeer"},{"full_name":"Heitkaemper, Jens","id":"27643","last_name":"Heitkaemper","first_name":"Jens"},{"first_name":"Reinhold","id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach"}],"citation":{"apa":"Gburrek, T., Schmalenstroeer, J., Heitkaemper, J., &#38; Haeb-Umbach, R. (2022). Informed vs. Blind Beamforming in Ad-Hoc Acoustic Sensor Networks for Meeting Transcription. <i>2022 International Workshop on Acoustic Signal Enhancement (IWAENC)</i>. 17th International Workshop on Acoustic Signal Enhancement (IWAENC 2022),  Bamberg, Germany . <a href=\"https://doi.org/10.1109/IWAENC53105.2022.9914772\">https://doi.org/10.1109/IWAENC53105.2022.9914772</a>","bibtex":"@inproceedings{Gburrek_Schmalenstroeer_Heitkaemper_Haeb-Umbach_2022, title={Informed vs. Blind Beamforming in Ad-Hoc Acoustic Sensor Networks for Meeting Transcription}, DOI={<a href=\"https://doi.org/10.1109/IWAENC53105.2022.9914772\">10.1109/IWAENC53105.2022.9914772</a>}, booktitle={2022 International Workshop on Acoustic Signal Enhancement (IWAENC)}, publisher={IEEE}, author={Gburrek, Tobias and Schmalenstroeer, Joerg and Heitkaemper, Jens and Haeb-Umbach, Reinhold}, year={2022} }","short":"T. Gburrek, J. Schmalenstroeer, J. Heitkaemper, R. Haeb-Umbach, in: 2022 International Workshop on Acoustic Signal Enhancement (IWAENC), IEEE, 2022.","mla":"Gburrek, Tobias, et al. “Informed vs. Blind Beamforming in Ad-Hoc Acoustic Sensor Networks for Meeting Transcription.” <i>2022 International Workshop on Acoustic Signal Enhancement (IWAENC)</i>, IEEE, 2022, doi:<a href=\"https://doi.org/10.1109/IWAENC53105.2022.9914772\">10.1109/IWAENC53105.2022.9914772</a>.","ama":"Gburrek T, Schmalenstroeer J, Heitkaemper J, Haeb-Umbach R. Informed vs. Blind Beamforming in Ad-Hoc Acoustic Sensor Networks for Meeting Transcription. In: <i>2022 International Workshop on Acoustic Signal Enhancement (IWAENC)</i>. IEEE; 2022. doi:<a href=\"https://doi.org/10.1109/IWAENC53105.2022.9914772\">10.1109/IWAENC53105.2022.9914772</a>","ieee":"T. Gburrek, J. Schmalenstroeer, J. Heitkaemper, and R. Haeb-Umbach, “Informed vs. Blind Beamforming in Ad-Hoc Acoustic Sensor Networks for Meeting Transcription,” presented at the 17th International Workshop on Acoustic Signal Enhancement (IWAENC 2022),  Bamberg, Germany , 2022, doi: <a href=\"https://doi.org/10.1109/IWAENC53105.2022.9914772\">10.1109/IWAENC53105.2022.9914772</a>.","chicago":"Gburrek, Tobias, Joerg Schmalenstroeer, Jens Heitkaemper, and Reinhold Haeb-Umbach. “Informed vs. Blind Beamforming in Ad-Hoc Acoustic Sensor Networks for Meeting Transcription.” In <i>2022 International Workshop on Acoustic Signal Enhancement (IWAENC)</i>. IEEE, 2022. <a href=\"https://doi.org/10.1109/IWAENC53105.2022.9914772\">https://doi.org/10.1109/IWAENC53105.2022.9914772</a>."},"has_accepted_license":"1","ddc":["004"],"language":[{"iso":"eng"}],"file":[{"content_type":"application/pdf","relation":"main_file","date_updated":"2023-11-17T06:40:40Z","creator":"tgburrek","date_created":"2023-11-17T06:40:40Z","file_size":266475,"access_level":"open_access","file_id":"48991","file_name":"iwaenc_22_camera_ready_ieee_check.pdf"}],"publication":"2022 International Workshop on Acoustic Signal Enhancement (IWAENC)","title":"Informed vs. Blind Beamforming in Ad-Hoc Acoustic Sensor Networks for Meeting Transcription","publisher":"IEEE","date_created":"2022-10-18T09:30:24Z","year":"2022","quality_controlled":"1"},{"_id":"34072","department":[{"_id":"54"}],"user_id":"34851","ddc":["000"],"language":[{"iso":"eng"}],"file_date_updated":"2022-11-14T12:19:55Z","publication":"Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","type":"conference","abstract":[{"text":"Performing an adequate evaluation of sound event detection (SED) systems is far from trivial and is still subject to ongoing research. The recently proposed polyphonic sound detection (PSD)-receiver operating characteristic (ROC) and PSD score (PSDS) make an important step into the direction of an evaluation of SED systems which is independent from a certain decision threshold. This allows to obtain a more complete picture of the overall system behavior which is less biased by threshold tuning. Yet, the PSD-ROC is currently only approximated using a finite set of thresholds. The choice of\r\nthe thresholds used in approximation, however, can have a severe impact on the resulting PSDS. In this paper we propose a method which allows for computing system performance on an evaluation set for all possible thresholds jointly, enabling accurate computation not only of the PSD-ROC and PSDS but also of other collar-based\r\nand intersection-based performance curves. It further allows to select the threshold which best fulfills the requirements of a given application. Source code is publicly available in our SED evaluation package sed_scores_eval.","lang":"eng"}],"status":"public","file":[{"file_id":"34073","file_name":"Template.pdf","access_level":"open_access","file_size":214001,"creator":"ebbers","date_created":"2022-11-14T12:19:55Z","date_updated":"2022-11-14T12:19:55Z","relation":"main_file","content_type":"application/pdf"}],"oa":"1","date_updated":"2023-11-22T08:26:58Z","date_created":"2022-11-14T12:17:03Z","author":[{"full_name":"Ebbers, Janek","id":"34851","last_name":"Ebbers","first_name":"Janek"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold"},{"last_name":"Serizel","full_name":"Serizel, Romain","first_name":"Romain"}],"title":"Threshold Independent Evaluation of Sound Event Detection Scores","quality_controlled":"1","has_accepted_license":"1","year":"2022","citation":{"ieee":"J. Ebbers, R. Haeb-Umbach, and R. Serizel, “Threshold Independent Evaluation of Sound Event Detection Scores,” 2022.","chicago":"Ebbers, Janek, Reinhold Haeb-Umbach, and Romain Serizel. “Threshold Independent Evaluation of Sound Event Detection Scores.” In <i>Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2022.","ama":"Ebbers J, Haeb-Umbach R, Serizel R. Threshold Independent Evaluation of Sound Event Detection Scores. In: <i>Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. ; 2022.","apa":"Ebbers, J., Haeb-Umbach, R., &#38; Serizel, R. (2022). Threshold Independent Evaluation of Sound Event Detection Scores. <i>Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>.","short":"J. Ebbers, R. Haeb-Umbach, R. Serizel, in: Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2022.","mla":"Ebbers, Janek, et al. “Threshold Independent Evaluation of Sound Event Detection Scores.” <i>Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2022.","bibtex":"@inproceedings{Ebbers_Haeb-Umbach_Serizel_2022, title={Threshold Independent Evaluation of Sound Event Detection Scores}, booktitle={Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, author={Ebbers, Janek and Haeb-Umbach, Reinhold and Serizel, Romain}, year={2022} }"}},{"language":[{"iso":"eng"}],"file_date_updated":"2023-11-22T08:35:23Z","ddc":["000"],"user_id":"34851","department":[{"_id":"54"}],"project":[{"_id":"52","name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"_id":"49113","file":[{"file_size":491650,"file_id":"49114","access_level":"closed","file_name":"dcase2022_tech_report_ebbers.pdf","date_updated":"2023-11-22T08:35:23Z","creator":"ebbers","date_created":"2023-11-22T08:35:23Z","success":1,"relation":"main_file","content_type":"application/pdf"}],"status":"public","abstract":[{"lang":"eng","text":"In this report we present our system for the Detection and Classification of Acoustic Scenes and Events (DCASE) 2022 Challenge Task 4: Sound Event Detection in Domestic Environments 1 . As in previous editions of the Challenge, we use forward-backward convolutional recurrent neural networks (FBCRNNs) [1, 2] for weakly labeled and semi-supervised sound event detection (SED) and eventually generate strong pseudo labels for weakly labeled and unlabeled data. Then, (tag-conditioned) bidirectional CRNNs (Bi-CRNNs) [1, 2] are trained in a strongly supervised manner as our final SED models. In each of the training stages we use multiple iterations of self-training. Compared to previous editions, we improved our system performance by 1) some tweaks regarding data augmentation, pseudo labeling and inference 2) using weakly labeled AudioSet data [3] for pretraining larger networks and 3) augmenting the DESED data [4] with strongly labeled AudioSet data [5] for finetuning of the networks. Source code is publicly available at https://github.com/fgnt/pb_sed."}],"type":"report","title":"Pre-Training And Self-Training For Sound Event Detection In Domestic Environments","author":[{"first_name":"Janek","full_name":"Ebbers, Janek","id":"34851","last_name":"Ebbers"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold"}],"date_created":"2023-11-22T08:34:23Z","date_updated":"2024-11-15T20:34:52Z","citation":{"ieee":"J. Ebbers and R. Haeb-Umbach, <i>Pre-Training And Self-Training For Sound Event Detection In Domestic Environments</i>. 2022.","chicago":"Ebbers, Janek, and Reinhold Haeb-Umbach. <i>Pre-Training And Self-Training For Sound Event Detection In Domestic Environments</i>, 2022.","ama":"Ebbers J, Haeb-Umbach R. <i>Pre-Training And Self-Training For Sound Event Detection In Domestic Environments</i>.; 2022.","apa":"Ebbers, J., &#38; Haeb-Umbach, R. (2022). <i>Pre-Training And Self-Training For Sound Event Detection In Domestic Environments</i>.","mla":"Ebbers, Janek, and Reinhold Haeb-Umbach. <i>Pre-Training And Self-Training For Sound Event Detection In Domestic Environments</i>. 2022.","short":"J. Ebbers, R. Haeb-Umbach, Pre-Training And Self-Training For Sound Event Detection In Domestic Environments, 2022.","bibtex":"@book{Ebbers_Haeb-Umbach_2022, title={Pre-Training And Self-Training For Sound Event Detection In Domestic Environments}, author={Ebbers, Janek and Haeb-Umbach, Reinhold}, year={2022} }"},"year":"2022","has_accepted_license":"1"},{"external_id":{"arxiv":["2111.07578"]},"language":[{"iso":"eng"}],"ddc":["000"],"publication":"2022 International Workshop on Acoustic Signal Enhancement (IWAENC)","file":[{"file_size":212890,"access_level":"open_access","file_id":"48930","file_name":"monaural_source_separation.pdf","date_updated":"2023-11-15T14:52:16Z","date_created":"2023-11-15T14:52:16Z","creator":"cord","relation":"main_file","content_type":"application/pdf"}],"abstract":[{"lang":"eng","text":"Impressive progress in neural network-based single-channel speech source\r\nseparation has been made in recent years. But those improvements have been\r\nmostly reported on anechoic data, a situation that is hardly met in practice.\r\nTaking the SepFormer as a starting point, which achieves state-of-the-art\r\nperformance on anechoic mixtures, we gradually modify it to optimize its\r\nperformance on reverberant mixtures. Although this leads to a word error rate\r\nimprovement by 7 percentage points compared to the standard SepFormer\r\nimplementation, the system ends up with only marginally better performance than\r\na PIT-BLSTM separation system, that is optimized with rather straightforward\r\nmeans. This is surprising and at the same time sobering, challenging the\r\npractical usefulness of many improvements reported in recent years for monaural\r\nsource separation on nonreverberant data."}],"date_created":"2022-10-20T14:07:28Z","publisher":"IEEE","title":"Monaural source separation: From anechoic to reverberant environments","year":"2022","user_id":"40767","department":[{"_id":"54"}],"project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"},{"grant_number":"448568305","name":"Automatische Transkription von Gesprächssituationen","_id":"508"}],"_id":"33848","file_date_updated":"2023-11-15T14:52:16Z","type":"conference","status":"public","author":[{"first_name":"Tobias","last_name":"Cord-Landwehr","full_name":"Cord-Landwehr, Tobias","id":"44393"},{"last_name":"Boeddeker","id":"40767","full_name":"Boeddeker, Christoph","first_name":"Christoph"},{"first_name":"Thilo","last_name":"von Neumann","orcid":"https://orcid.org/0000-0002-7717-8670","id":"49870","full_name":"von Neumann, Thilo"},{"full_name":"Zorila, Catalin","last_name":"Zorila","first_name":"Catalin"},{"last_name":"Doddipatla","full_name":"Doddipatla, Rama","first_name":"Rama"},{"full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach","first_name":"Reinhold"}],"date_updated":"2025-02-12T09:05:25Z","oa":"1","conference":{"name":"2022 International Workshop on Acoustic Signal Enhancement (IWAENC)"},"has_accepted_license":"1","citation":{"chicago":"Cord-Landwehr, Tobias, Christoph Boeddeker, Thilo von Neumann, Catalin Zorila, Rama Doddipatla, and Reinhold Haeb-Umbach. “Monaural Source Separation: From Anechoic to Reverberant Environments.” In <i>2022 International Workshop on Acoustic Signal Enhancement (IWAENC)</i>. Bamberg: IEEE, 2022.","ieee":"T. Cord-Landwehr, C. Boeddeker, T. von Neumann, C. Zorila, R. Doddipatla, and R. Haeb-Umbach, “Monaural source separation: From anechoic to reverberant environments,” presented at the 2022 International Workshop on Acoustic Signal Enhancement (IWAENC), 2022.","ama":"Cord-Landwehr T, Boeddeker C, von Neumann T, Zorila C, Doddipatla R, Haeb-Umbach R. Monaural source separation: From anechoic to reverberant environments. In: <i>2022 International Workshop on Acoustic Signal Enhancement (IWAENC)</i>. IEEE; 2022.","apa":"Cord-Landwehr, T., Boeddeker, C., von Neumann, T., Zorila, C., Doddipatla, R., &#38; Haeb-Umbach, R. (2022). Monaural source separation: From anechoic to reverberant environments. <i>2022 International Workshop on Acoustic Signal Enhancement (IWAENC)</i>. 2022 International Workshop on Acoustic Signal Enhancement (IWAENC).","bibtex":"@inproceedings{Cord-Landwehr_Boeddeker_von Neumann_Zorila_Doddipatla_Haeb-Umbach_2022, place={Bamberg}, title={Monaural source separation: From anechoic to reverberant environments}, booktitle={2022 International Workshop on Acoustic Signal Enhancement (IWAENC)}, publisher={IEEE}, author={Cord-Landwehr, Tobias and Boeddeker, Christoph and von Neumann, Thilo and Zorila, Catalin and Doddipatla, Rama and Haeb-Umbach, Reinhold}, year={2022} }","mla":"Cord-Landwehr, Tobias, et al. “Monaural Source Separation: From Anechoic to Reverberant Environments.” <i>2022 International Workshop on Acoustic Signal Enhancement (IWAENC)</i>, IEEE, 2022.","short":"T. Cord-Landwehr, C. Boeddeker, T. von Neumann, C. Zorila, R. Doddipatla, R. Haeb-Umbach, in: 2022 International Workshop on Acoustic Signal Enhancement (IWAENC), IEEE, Bamberg, 2022."},"place":"Bamberg"},{"file":[{"relation":"main_file","content_type":"application/pdf","access_level":"open_access","file_id":"33820","file_name":"main.pdf","file_size":228069,"creator":"tvn","date_created":"2022-10-20T05:33:10Z","date_updated":"2022-10-20T05:33:10Z"},{"date_created":"2022-10-20T05:35:32Z","creator":"tvn","date_updated":"2022-10-20T05:35:32Z","file_id":"33821","access_level":"open_access","file_name":"poster.pdf","file_size":229166,"content_type":"application/pdf","relation":"poster"}],"publication":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","language":[{"iso":"eng"}],"ddc":["000"],"year":"2022","quality_controlled":"1","title":"SA-SDR: A Novel Loss Function for Separation of Meeting Style Data","date_created":"2022-10-20T05:29:12Z","publisher":"IEEE","status":"public","type":"conference","file_date_updated":"2022-10-20T05:35:32Z","department":[{"_id":"54"}],"user_id":"40767","_id":"33819","project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"},{"grant_number":"448568305","_id":"508","name":"Automatische Transkription von Gesprächssituationen"}],"citation":{"short":"T. von Neumann, K. Kinoshita, C. Boeddeker, M. Delcroix, R. Haeb-Umbach, in: ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE, 2022.","bibtex":"@inproceedings{von Neumann_Kinoshita_Boeddeker_Delcroix_Haeb-Umbach_2022, title={SA-SDR: A Novel Loss Function for Separation of Meeting Style Data}, DOI={<a href=\"https://doi.org/10.1109/icassp43922.2022.9746757\">10.1109/icassp43922.2022.9746757</a>}, booktitle={ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, publisher={IEEE}, author={von Neumann, Thilo and Kinoshita, Keisuke and Boeddeker, Christoph and Delcroix, Marc and Haeb-Umbach, Reinhold}, year={2022} }","mla":"von Neumann, Thilo, et al. “SA-SDR: A Novel Loss Function for Separation of Meeting Style Data.” <i>ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, IEEE, 2022, doi:<a href=\"https://doi.org/10.1109/icassp43922.2022.9746757\">10.1109/icassp43922.2022.9746757</a>.","apa":"von Neumann, T., Kinoshita, K., Boeddeker, C., Delcroix, M., &#38; Haeb-Umbach, R. (2022). SA-SDR: A Novel Loss Function for Separation of Meeting Style Data. <i>ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. <a href=\"https://doi.org/10.1109/icassp43922.2022.9746757\">https://doi.org/10.1109/icassp43922.2022.9746757</a>","ama":"von Neumann T, Kinoshita K, Boeddeker C, Delcroix M, Haeb-Umbach R. SA-SDR: A Novel Loss Function for Separation of Meeting Style Data. In: <i>ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. IEEE; 2022. doi:<a href=\"https://doi.org/10.1109/icassp43922.2022.9746757\">10.1109/icassp43922.2022.9746757</a>","ieee":"T. von Neumann, K. Kinoshita, C. Boeddeker, M. Delcroix, and R. Haeb-Umbach, “SA-SDR: A Novel Loss Function for Separation of Meeting Style Data,” 2022, doi: <a href=\"https://doi.org/10.1109/icassp43922.2022.9746757\">10.1109/icassp43922.2022.9746757</a>.","chicago":"Neumann, Thilo von, Keisuke Kinoshita, Christoph Boeddeker, Marc Delcroix, and Reinhold Haeb-Umbach. “SA-SDR: A Novel Loss Function for Separation of Meeting Style Data.” In <i>ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. IEEE, 2022. <a href=\"https://doi.org/10.1109/icassp43922.2022.9746757\">https://doi.org/10.1109/icassp43922.2022.9746757</a>."},"related_material":{"link":[{"url":"https://github.com/fgnt/graph_pit","relation":"supplementary_material"}]},"has_accepted_license":"1","publication_status":"published","doi":"10.1109/icassp43922.2022.9746757","author":[{"full_name":"von Neumann, Thilo","id":"49870","orcid":"https://orcid.org/0000-0002-7717-8670","last_name":"von Neumann","first_name":"Thilo"},{"full_name":"Kinoshita, Keisuke","last_name":"Kinoshita","first_name":"Keisuke"},{"first_name":"Christoph","id":"40767","full_name":"Boeddeker, Christoph","last_name":"Boeddeker"},{"first_name":"Marc","full_name":"Delcroix, Marc","last_name":"Delcroix"},{"id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach","first_name":"Reinhold"}],"date_updated":"2025-02-12T09:08:14Z","oa":"1"},{"has_accepted_license":"1","year":"2022","citation":{"bibtex":"@book{Gburrek_Boeddeker_von Neumann_Cord-Landwehr_Schmalenstroeer_Haeb-Umbach_2022, title={A Meeting Transcription System for an Ad-Hoc Acoustic Sensor Network}, DOI={<a href=\"https://doi.org/10.48550/ARXIV.2205.00944\">10.48550/ARXIV.2205.00944</a>}, publisher={arXiv}, author={Gburrek, Tobias and Boeddeker, Christoph and von Neumann, Thilo and Cord-Landwehr, Tobias and Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}, year={2022} }","short":"T. Gburrek, C. Boeddeker, T. von Neumann, T. Cord-Landwehr, J. Schmalenstroeer, R. Haeb-Umbach, A Meeting Transcription System for an Ad-Hoc Acoustic Sensor Network, arXiv, 2022.","mla":"Gburrek, Tobias, et al. <i>A Meeting Transcription System for an Ad-Hoc Acoustic Sensor Network</i>. arXiv, 2022, doi:<a href=\"https://doi.org/10.48550/ARXIV.2205.00944\">10.48550/ARXIV.2205.00944</a>.","apa":"Gburrek, T., Boeddeker, C., von Neumann, T., Cord-Landwehr, T., Schmalenstroeer, J., &#38; Haeb-Umbach, R. (2022). <i>A Meeting Transcription System for an Ad-Hoc Acoustic Sensor Network</i>. arXiv. <a href=\"https://doi.org/10.48550/ARXIV.2205.00944\">https://doi.org/10.48550/ARXIV.2205.00944</a>","chicago":"Gburrek, Tobias, Christoph Boeddeker, Thilo von Neumann, Tobias Cord-Landwehr, Joerg Schmalenstroeer, and Reinhold Haeb-Umbach. <i>A Meeting Transcription System for an Ad-Hoc Acoustic Sensor Network</i>. arXiv, 2022. <a href=\"https://doi.org/10.48550/ARXIV.2205.00944\">https://doi.org/10.48550/ARXIV.2205.00944</a>.","ieee":"T. Gburrek, C. Boeddeker, T. von Neumann, T. Cord-Landwehr, J. Schmalenstroeer, and R. Haeb-Umbach, <i>A Meeting Transcription System for an Ad-Hoc Acoustic Sensor Network</i>. arXiv, 2022.","ama":"Gburrek T, Boeddeker C, von Neumann T, Cord-Landwehr T, Schmalenstroeer J, Haeb-Umbach R. <i>A Meeting Transcription System for an Ad-Hoc Acoustic Sensor Network</i>. arXiv; 2022. doi:<a href=\"https://doi.org/10.48550/ARXIV.2205.00944\">10.48550/ARXIV.2205.00944</a>"},"oa":"1","publisher":"arXiv","date_updated":"2025-02-12T09:03:42Z","author":[{"id":"44006","full_name":"Gburrek, Tobias","last_name":"Gburrek","first_name":"Tobias"},{"first_name":"Christoph","last_name":"Boeddeker","id":"40767","full_name":"Boeddeker, Christoph"},{"first_name":"Thilo","last_name":"von Neumann","orcid":"https://orcid.org/0000-0002-7717-8670","full_name":"von Neumann, Thilo","id":"49870"},{"first_name":"Tobias","id":"44393","full_name":"Cord-Landwehr, Tobias","last_name":"Cord-Landwehr"},{"last_name":"Schmalenstroeer","id":"460","full_name":"Schmalenstroeer, Joerg","first_name":"Joerg"},{"first_name":"Reinhold","id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach"}],"date_created":"2022-10-18T11:10:58Z","title":"A Meeting Transcription System for an Ad-Hoc Acoustic Sensor Network","doi":"10.48550/ARXIV.2205.00944","type":"misc","file":[{"date_updated":"2023-11-17T06:42:04Z","date_created":"2023-11-17T06:42:04Z","creator":"tgburrek","file_size":199006,"file_name":"meeting_transcription_22.pdf","access_level":"open_access","file_id":"48992","content_type":"application/pdf","relation":"main_file"}],"status":"public","project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"},{"_id":"508","name":"Automatische Transkription von Gesprächssituationen","grant_number":"448568305"}],"_id":"33816","user_id":"40767","department":[{"_id":"54"}],"ddc":["004"],"file_date_updated":"2023-11-17T06:42:04Z","language":[{"iso":"eng"}]},{"publication_status":"published","year":"2022","citation":{"short":"C. Boeddeker, T. Cord-Landwehr, T. von Neumann, R. Haeb-Umbach, in: Interspeech 2022, ISCA, 2022.","bibtex":"@inproceedings{Boeddeker_Cord-Landwehr_von Neumann_Haeb-Umbach_2022, title={An Initialization Scheme for Meeting Separation with Spatial Mixture Models}, DOI={<a href=\"https://doi.org/10.21437/interspeech.2022-10929\">10.21437/interspeech.2022-10929</a>}, booktitle={Interspeech 2022}, publisher={ISCA}, author={Boeddeker, Christoph and Cord-Landwehr, Tobias and von Neumann, Thilo and Haeb-Umbach, Reinhold}, year={2022} }","mla":"Boeddeker, Christoph, et al. “An Initialization Scheme for Meeting Separation with Spatial Mixture Models.” <i>Interspeech 2022</i>, ISCA, 2022, doi:<a href=\"https://doi.org/10.21437/interspeech.2022-10929\">10.21437/interspeech.2022-10929</a>.","apa":"Boeddeker, C., Cord-Landwehr, T., von Neumann, T., &#38; Haeb-Umbach, R. (2022). An Initialization Scheme for Meeting Separation with Spatial Mixture Models. <i>Interspeech 2022</i>. <a href=\"https://doi.org/10.21437/interspeech.2022-10929\">https://doi.org/10.21437/interspeech.2022-10929</a>","ama":"Boeddeker C, Cord-Landwehr T, von Neumann T, Haeb-Umbach R. An Initialization Scheme for Meeting Separation with Spatial Mixture Models. In: <i>Interspeech 2022</i>. ISCA; 2022. doi:<a href=\"https://doi.org/10.21437/interspeech.2022-10929\">10.21437/interspeech.2022-10929</a>","chicago":"Boeddeker, Christoph, Tobias Cord-Landwehr, Thilo von Neumann, and Reinhold Haeb-Umbach. “An Initialization Scheme for Meeting Separation with Spatial Mixture Models.” In <i>Interspeech 2022</i>. ISCA, 2022. <a href=\"https://doi.org/10.21437/interspeech.2022-10929\">https://doi.org/10.21437/interspeech.2022-10929</a>.","ieee":"C. Boeddeker, T. Cord-Landwehr, T. von Neumann, and R. Haeb-Umbach, “An Initialization Scheme for Meeting Separation with Spatial Mixture Models,” 2022, doi: <a href=\"https://doi.org/10.21437/interspeech.2022-10929\">10.21437/interspeech.2022-10929</a>."},"date_updated":"2025-02-12T09:06:56Z","oa":"1","publisher":"ISCA","author":[{"last_name":"Boeddeker","id":"40767","full_name":"Boeddeker, Christoph","first_name":"Christoph"},{"last_name":"Cord-Landwehr","id":"44393","full_name":"Cord-Landwehr, Tobias","first_name":"Tobias"},{"full_name":"von Neumann, Thilo","id":"49870","orcid":"https://orcid.org/0000-0002-7717-8670","last_name":"von Neumann","first_name":"Thilo"},{"first_name":"Reinhold","full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach"}],"date_created":"2022-10-28T10:53:56Z","title":"An Initialization Scheme for Meeting Separation with Spatial Mixture Models","main_file_link":[{"open_access":"1","url":"https://www.isca-archive.org/interspeech_2022/boeddeker22_interspeech.pdf"}],"doi":"10.21437/interspeech.2022-10929","type":"conference","publication":"Interspeech 2022","status":"public","project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"},{"name":"Automatische Transkription von Gesprächssituationen","_id":"508","grant_number":"448568305"}],"_id":"33954","user_id":"40767","department":[{"_id":"54"}],"language":[{"iso":"eng"}]},{"language":[{"iso":"eng"}],"_id":"33958","department":[{"_id":"54"}],"user_id":"40767","abstract":[{"lang":"eng","text":"Recent speaker diarization studies showed that integration of end-to-end neural diarization (EEND) and clustering-based diarization is a promising approach for achieving state-of-the-art performance on various tasks. Such an approach first divides an observed signal into fixed-length segments, then performs {\\it segment-level} local diarization based on an EEND module, and merges the segment-level results via clustering to form a final global diarization result. The segmentation is done to limit the number of speakers in each segment since the current EEND cannot handle a large number of speakers. In this paper, we argue that such an approach involving the segmentation has several issues; for example, it inevitably faces a dilemma that larger segment sizes increase both the context available for enhancing the performance and the number of speakers for the local EEND module to handle. To resolve such a problem, this paper proposes a novel framework that performs diarization without segmentation. However, it can still handle challenging data containing many speakers and a significant amount of overlapping speech. The proposed method can take an entire meeting for inference and perform {\\it utterance-by-utterance} diarization that clusters utterance activities in terms of speakers. To this end, we leverage a neural network training scheme called Graph-PIT proposed recently for neural source separation. Experiments with simulated active-meeting-like data and CALLHOME data show the superiority of the proposed approach over the conventional methods."}],"status":"public","publication":"Proc. Interspeech 2022","type":"conference","title":"Utterance-by-utterance overlap-aware neural diarization with Graph-PIT","doi":"10.21437/Interspeech.2022-11408","conference":{"name":"Interspeech 2022"},"main_file_link":[{"url":"https://www.isca-archive.org/interspeech_2022/kinoshita22_interspeech.pdf"}],"date_updated":"2025-02-12T09:09:05Z","publisher":"ISCA","author":[{"full_name":"Kinoshita, Keisuke","last_name":"Kinoshita","first_name":"Keisuke"},{"first_name":"Thilo","full_name":"von Neumann, Thilo","id":"49870","orcid":"https://orcid.org/0000-0002-7717-8670","last_name":"von Neumann"},{"last_name":"Delcroix","full_name":"Delcroix, Marc","first_name":"Marc"},{"first_name":"Christoph","id":"40767","full_name":"Boeddeker, Christoph","last_name":"Boeddeker"},{"last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold","first_name":"Reinhold"}],"date_created":"2022-10-28T12:07:57Z","year":"2022","page":"1486-1490","citation":{"ama":"Kinoshita K, von Neumann T, Delcroix M, Boeddeker C, Haeb-Umbach R. Utterance-by-utterance overlap-aware neural diarization with Graph-PIT. In: <i>Proc. Interspeech 2022</i>. ISCA; 2022:1486-1490. doi:<a href=\"https://doi.org/10.21437/Interspeech.2022-11408\">10.21437/Interspeech.2022-11408</a>","chicago":"Kinoshita, Keisuke, Thilo von Neumann, Marc Delcroix, Christoph Boeddeker, and Reinhold Haeb-Umbach. “Utterance-by-Utterance Overlap-Aware Neural Diarization with Graph-PIT.” In <i>Proc. Interspeech 2022</i>, 1486–90. ISCA, 2022. <a href=\"https://doi.org/10.21437/Interspeech.2022-11408\">https://doi.org/10.21437/Interspeech.2022-11408</a>.","ieee":"K. Kinoshita, T. von Neumann, M. Delcroix, C. Boeddeker, and R. Haeb-Umbach, “Utterance-by-utterance overlap-aware neural diarization with Graph-PIT,” in <i>Proc. Interspeech 2022</i>, 2022, pp. 1486–1490, doi: <a href=\"https://doi.org/10.21437/Interspeech.2022-11408\">10.21437/Interspeech.2022-11408</a>.","bibtex":"@inproceedings{Kinoshita_von Neumann_Delcroix_Boeddeker_Haeb-Umbach_2022, title={Utterance-by-utterance overlap-aware neural diarization with Graph-PIT}, DOI={<a href=\"https://doi.org/10.21437/Interspeech.2022-11408\">10.21437/Interspeech.2022-11408</a>}, booktitle={Proc. Interspeech 2022}, publisher={ISCA}, author={Kinoshita, Keisuke and von Neumann, Thilo and Delcroix, Marc and Boeddeker, Christoph and Haeb-Umbach, Reinhold}, year={2022}, pages={1486–1490} }","mla":"Kinoshita, Keisuke, et al. “Utterance-by-Utterance Overlap-Aware Neural Diarization with Graph-PIT.” <i>Proc. Interspeech 2022</i>, ISCA, 2022, pp. 1486–90, doi:<a href=\"https://doi.org/10.21437/Interspeech.2022-11408\">10.21437/Interspeech.2022-11408</a>.","short":"K. Kinoshita, T. von Neumann, M. Delcroix, C. Boeddeker, R. Haeb-Umbach, in: Proc. Interspeech 2022, ISCA, 2022, pp. 1486–1490.","apa":"Kinoshita, K., von Neumann, T., Delcroix, M., Boeddeker, C., &#38; Haeb-Umbach, R. (2022). Utterance-by-utterance overlap-aware neural diarization with Graph-PIT. <i>Proc. Interspeech 2022</i>, 1486–1490. <a href=\"https://doi.org/10.21437/Interspeech.2022-11408\">https://doi.org/10.21437/Interspeech.2022-11408</a>"},"quality_controlled":"1","publication_status":"published"},{"has_accepted_license":"1","intvolume":"       109","page":"124-148","citation":{"chicago":"Haeb-Umbach, Reinhold, Jahn Heymann, Lukas Drude, Shinji Watanabe, Marc Delcroix, and Tomohiro Nakatani. “Far-Field Automatic Speech Recognition.” <i>Proceedings of the IEEE</i> 109, no. 2 (2021): 124–48. <a href=\"https://doi.org/10.1109/JPROC.2020.3018668\">https://doi.org/10.1109/JPROC.2020.3018668</a>.","ieee":"R. Haeb-Umbach, J. Heymann, L. Drude, S. Watanabe, M. Delcroix, and T. Nakatani, “Far-Field Automatic Speech Recognition,” <i>Proceedings of the IEEE</i>, vol. 109, no. 2, pp. 124–148, 2021.","ama":"Haeb-Umbach R, Heymann J, Drude L, Watanabe S, Delcroix M, Nakatani T. Far-Field Automatic Speech Recognition. <i>Proceedings of the IEEE</i>. 2021;109(2):124-148. doi:<a href=\"https://doi.org/10.1109/JPROC.2020.3018668\">10.1109/JPROC.2020.3018668</a>","apa":"Haeb-Umbach, R., Heymann, J., Drude, L., Watanabe, S., Delcroix, M., &#38; Nakatani, T. (2021). Far-Field Automatic Speech Recognition. <i>Proceedings of the IEEE</i>, <i>109</i>(2), 124–148. <a href=\"https://doi.org/10.1109/JPROC.2020.3018668\">https://doi.org/10.1109/JPROC.2020.3018668</a>","mla":"Haeb-Umbach, Reinhold, et al. “Far-Field Automatic Speech Recognition.” <i>Proceedings of the IEEE</i>, vol. 109, no. 2, 2021, pp. 124–48, doi:<a href=\"https://doi.org/10.1109/JPROC.2020.3018668\">10.1109/JPROC.2020.3018668</a>.","bibtex":"@article{Haeb-Umbach_Heymann_Drude_Watanabe_Delcroix_Nakatani_2021, title={Far-Field Automatic Speech Recognition}, volume={109}, DOI={<a href=\"https://doi.org/10.1109/JPROC.2020.3018668\">10.1109/JPROC.2020.3018668</a>}, number={2}, journal={Proceedings of the IEEE}, author={Haeb-Umbach, Reinhold and Heymann, Jahn and Drude, Lukas and Watanabe, Shinji and Delcroix, Marc and Nakatani, Tomohiro}, year={2021}, pages={124–148} }","short":"R. Haeb-Umbach, J. Heymann, L. Drude, S. Watanabe, M. Delcroix, T. Nakatani, Proceedings of the IEEE 109 (2021) 124–148."},"date_updated":"2022-01-06T06:54:44Z","oa":"1","volume":109,"author":[{"last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold","first_name":"Reinhold"},{"full_name":"Heymann, Jahn","last_name":"Heymann","first_name":"Jahn"},{"full_name":"Drude, Lukas","last_name":"Drude","first_name":"Lukas"},{"first_name":"Shinji","full_name":"Watanabe, Shinji","last_name":"Watanabe"},{"first_name":"Marc","last_name":"Delcroix","full_name":"Delcroix, Marc"},{"first_name":"Tomohiro","last_name":"Nakatani","full_name":"Nakatani, Tomohiro"}],"doi":"10.1109/JPROC.2020.3018668","type":"journal_article","status":"public","_id":"21065","project":[{"name":"Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"department":[{"_id":"54"}],"user_id":"59789","file_date_updated":"2021-01-25T08:17:23Z","issue":"2","year":"2021","date_created":"2021-01-25T08:15:27Z","title":"Far-Field Automatic Speech Recognition","publication":"Proceedings of the IEEE","abstract":[{"text":"The machine recognition of speech spoken at a distance from the microphones, known as far-field automatic speech recognition (ASR), has received a significant increase of attention in science and industry, which caused or was caused by an equally significant improvement in recognition accuracy. Meanwhile it has entered the consumer market with digital home assistants with a spoken language interface being its most prominent application. Speech recorded at a distance is affected by various acoustic distortions and, consequently, quite different processing pipelines have emerged compared to ASR for close-talk speech. A signal enhancement front-end for dereverberation, source separation and acoustic beamforming is employed to clean up the speech, and the back-end ASR engine is robustified by multi-condition training and adaptation. We will also describe the so-called end-to-end approach to ASR, which is a new promising architecture that has recently been extended to the far-field scenario. This tutorial article gives an account of the algorithms used to enable accurate speech recognition from a distance, and it will be seen that, although deep learning has a significant share in the technological breakthroughs, a clever combination with traditional signal processing can lead to surprisingly effective solutions.","lang":"eng"}],"file":[{"relation":"main_file","content_type":"application/pdf","access_level":"open_access","file_name":"proceedings_2021_haebumbach_Paper.pdf","file_id":"21066","file_size":4173988,"date_created":"2021-01-25T08:17:23Z","creator":"huesera","date_updated":"2021-01-25T08:17:23Z"}],"ddc":["000"],"language":[{"iso":"eng"}]},{"title":"End-to-End Dereverberation, Beamforming, and Speech Recognition with Improved Numerical Stability and Advanced Frontend","doi":"10.1109/icassp39728.2021.9414464","date_updated":"2022-01-13T08:31:27Z","date_created":"2021-12-03T11:31:42Z","author":[{"full_name":"Zhang, Wangyou","last_name":"Zhang","first_name":"Wangyou"},{"first_name":"Christoph","last_name":"Boeddeker","id":"40767","full_name":"Boeddeker, Christoph"},{"full_name":"Watanabe, Shinji","last_name":"Watanabe","first_name":"Shinji"},{"first_name":"Tomohiro","last_name":"Nakatani","full_name":"Nakatani, Tomohiro"},{"first_name":"Marc","last_name":"Delcroix","full_name":"Delcroix, Marc"},{"full_name":"Kinoshita, Keisuke","last_name":"Kinoshita","first_name":"Keisuke"},{"full_name":"Ochiai, Tsubasa","last_name":"Ochiai","first_name":"Tsubasa"},{"last_name":"Kamo","full_name":"Kamo, Naoyuki","first_name":"Naoyuki"},{"last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242","first_name":"Reinhold"},{"first_name":"Yanmin","last_name":"Qian","full_name":"Qian, Yanmin"}],"year":"2021","citation":{"ama":"Zhang W, Boeddeker C, Watanabe S, et al. End-to-End Dereverberation, Beamforming, and Speech Recognition with Improved Numerical Stability and Advanced Frontend. In: <i>ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. ; 2021. doi:<a href=\"https://doi.org/10.1109/icassp39728.2021.9414464\">10.1109/icassp39728.2021.9414464</a>","ieee":"W. Zhang <i>et al.</i>, “End-to-End Dereverberation, Beamforming, and Speech Recognition with Improved Numerical Stability and Advanced Frontend,” 2021, doi: <a href=\"https://doi.org/10.1109/icassp39728.2021.9414464\">10.1109/icassp39728.2021.9414464</a>.","chicago":"Zhang, Wangyou, Christoph Boeddeker, Shinji Watanabe, Tomohiro Nakatani, Marc Delcroix, Keisuke Kinoshita, Tsubasa Ochiai, Naoyuki Kamo, Reinhold Haeb-Umbach, and Yanmin Qian. “End-to-End Dereverberation, Beamforming, and Speech Recognition with Improved Numerical Stability and Advanced Frontend.” In <i>ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2021. <a href=\"https://doi.org/10.1109/icassp39728.2021.9414464\">https://doi.org/10.1109/icassp39728.2021.9414464</a>.","apa":"Zhang, W., Boeddeker, C., Watanabe, S., Nakatani, T., Delcroix, M., Kinoshita, K., Ochiai, T., Kamo, N., Haeb-Umbach, R., &#38; Qian, Y. (2021). End-to-End Dereverberation, Beamforming, and Speech Recognition with Improved Numerical Stability and Advanced Frontend. <i>ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. <a href=\"https://doi.org/10.1109/icassp39728.2021.9414464\">https://doi.org/10.1109/icassp39728.2021.9414464</a>","bibtex":"@inproceedings{Zhang_Boeddeker_Watanabe_Nakatani_Delcroix_Kinoshita_Ochiai_Kamo_Haeb-Umbach_Qian_2021, title={End-to-End Dereverberation, Beamforming, and Speech Recognition with Improved Numerical Stability and Advanced Frontend}, DOI={<a href=\"https://doi.org/10.1109/icassp39728.2021.9414464\">10.1109/icassp39728.2021.9414464</a>}, booktitle={ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, author={Zhang, Wangyou and Boeddeker, Christoph and Watanabe, Shinji and Nakatani, Tomohiro and Delcroix, Marc and Kinoshita, Keisuke and Ochiai, Tsubasa and Kamo, Naoyuki and Haeb-Umbach, Reinhold and Qian, Yanmin}, year={2021} }","short":"W. Zhang, C. Boeddeker, S. Watanabe, T. Nakatani, M. Delcroix, K. Kinoshita, T. Ochiai, N. Kamo, R. Haeb-Umbach, Y. Qian, in: ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2021.","mla":"Zhang, Wangyou, et al. “End-to-End Dereverberation, Beamforming, and Speech Recognition with Improved Numerical Stability and Advanced Frontend.” <i>ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2021, doi:<a href=\"https://doi.org/10.1109/icassp39728.2021.9414464\">10.1109/icassp39728.2021.9414464</a>."},"publication_status":"published","language":[{"iso":"eng"}],"_id":"28256","department":[{"_id":"54"}],"user_id":"40767","status":"public","publication":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","type":"conference"},{"date_created":"2021-12-03T12:07:35Z","author":[{"first_name":"Chenda","last_name":"Li","full_name":"Li, Chenda"},{"first_name":"Jing","full_name":"Shi, Jing","last_name":"Shi"},{"first_name":"Wangyou","last_name":"Zhang","full_name":"Zhang, Wangyou"},{"first_name":"Aswin Shanmugam","last_name":"Subramanian","full_name":"Subramanian, Aswin Shanmugam"},{"first_name":"Xuankai","full_name":"Chang, Xuankai","last_name":"Chang"},{"first_name":"Naoyuki","full_name":"Kamo, Naoyuki","last_name":"Kamo"},{"first_name":"Moto","last_name":"Hira","full_name":"Hira, Moto"},{"first_name":"Tomoki","full_name":"Hayashi, Tomoki","last_name":"Hayashi"},{"first_name":"Christoph","last_name":"Boeddeker","id":"40767","full_name":"Boeddeker, Christoph"},{"first_name":"Zhuo","full_name":"Chen, Zhuo","last_name":"Chen"},{"first_name":"Shinji","last_name":"Watanabe","full_name":"Watanabe, Shinji"}],"date_updated":"2022-01-13T08:34:25Z","doi":"10.1109/slt48900.2021.9383615","title":"ESPnet-SE: End-To-End Speech Enhancement and Separation Toolkit Designed for ASR Integration","publication_status":"published","citation":{"apa":"Li, C., Shi, J., Zhang, W., Subramanian, A. S., Chang, X., Kamo, N., Hira, M., Hayashi, T., Boeddeker, C., Chen, Z., &#38; Watanabe, S. (2021). ESPnet-SE: End-To-End Speech Enhancement and Separation Toolkit Designed for ASR Integration. <i>2021 IEEE Spoken Language Technology Workshop (SLT)</i>. <a href=\"https://doi.org/10.1109/slt48900.2021.9383615\">https://doi.org/10.1109/slt48900.2021.9383615</a>","short":"C. Li, J. Shi, W. Zhang, A.S. Subramanian, X. Chang, N. Kamo, M. Hira, T. Hayashi, C. Boeddeker, Z. Chen, S. Watanabe, in: 2021 IEEE Spoken Language Technology Workshop (SLT), 2021.","bibtex":"@inproceedings{Li_Shi_Zhang_Subramanian_Chang_Kamo_Hira_Hayashi_Boeddeker_Chen_et al._2021, title={ESPnet-SE: End-To-End Speech Enhancement and Separation Toolkit Designed for ASR Integration}, DOI={<a href=\"https://doi.org/10.1109/slt48900.2021.9383615\">10.1109/slt48900.2021.9383615</a>}, booktitle={2021 IEEE Spoken Language Technology Workshop (SLT)}, author={Li, Chenda and Shi, Jing and Zhang, Wangyou and Subramanian, Aswin Shanmugam and Chang, Xuankai and Kamo, Naoyuki and Hira, Moto and Hayashi, Tomoki and Boeddeker, Christoph and Chen, Zhuo and et al.}, year={2021} }","mla":"Li, Chenda, et al. “ESPnet-SE: End-To-End Speech Enhancement and Separation Toolkit Designed for ASR Integration.” <i>2021 IEEE Spoken Language Technology Workshop (SLT)</i>, 2021, doi:<a href=\"https://doi.org/10.1109/slt48900.2021.9383615\">10.1109/slt48900.2021.9383615</a>.","ama":"Li C, Shi J, Zhang W, et al. ESPnet-SE: End-To-End Speech Enhancement and Separation Toolkit Designed for ASR Integration. In: <i>2021 IEEE Spoken Language Technology Workshop (SLT)</i>. ; 2021. doi:<a href=\"https://doi.org/10.1109/slt48900.2021.9383615\">10.1109/slt48900.2021.9383615</a>","chicago":"Li, Chenda, Jing Shi, Wangyou Zhang, Aswin Shanmugam Subramanian, Xuankai Chang, Naoyuki Kamo, Moto Hira, et al. “ESPnet-SE: End-To-End Speech Enhancement and Separation Toolkit Designed for ASR Integration.” In <i>2021 IEEE Spoken Language Technology Workshop (SLT)</i>, 2021. <a href=\"https://doi.org/10.1109/slt48900.2021.9383615\">https://doi.org/10.1109/slt48900.2021.9383615</a>.","ieee":"C. Li <i>et al.</i>, “ESPnet-SE: End-To-End Speech Enhancement and Separation Toolkit Designed for ASR Integration,” 2021, doi: <a href=\"https://doi.org/10.1109/slt48900.2021.9383615\">10.1109/slt48900.2021.9383615</a>."},"year":"2021","user_id":"40767","department":[{"_id":"54"}],"_id":"28262","language":[{"iso":"eng"}],"type":"conference","publication":"2021 IEEE Spoken Language Technology Workshop (SLT)","status":"public"},{"title":"Dual-Path RNN for Long Recording Speech Separation","doi":"10.1109/slt48900.2021.9383514","date_updated":"2022-01-13T08:34:07Z","author":[{"full_name":"Li, Chenda","last_name":"Li","first_name":"Chenda"},{"last_name":"Luo","full_name":"Luo, Yi","first_name":"Yi"},{"first_name":"Cong","full_name":"Han, Cong","last_name":"Han"},{"first_name":"Jinyu","full_name":"Li, Jinyu","last_name":"Li"},{"first_name":"Takuya","full_name":"Yoshioka, Takuya","last_name":"Yoshioka"},{"first_name":"Tianyan","last_name":"Zhou","full_name":"Zhou, Tianyan"},{"first_name":"Marc","full_name":"Delcroix, Marc","last_name":"Delcroix"},{"full_name":"Kinoshita, Keisuke","last_name":"Kinoshita","first_name":"Keisuke"},{"first_name":"Christoph","id":"40767","full_name":"Boeddeker, Christoph","last_name":"Boeddeker"},{"first_name":"Yanmin","last_name":"Qian","full_name":"Qian, Yanmin"},{"first_name":"Shinji","full_name":"Watanabe, Shinji","last_name":"Watanabe"},{"last_name":"Chen","full_name":"Chen, Zhuo","first_name":"Zhuo"}],"date_created":"2021-12-03T12:07:03Z","year":"2021","citation":{"mla":"Li, Chenda, et al. “Dual-Path RNN for Long Recording Speech Separation.” <i>2021 IEEE Spoken Language Technology Workshop (SLT)</i>, 2021, doi:<a href=\"https://doi.org/10.1109/slt48900.2021.9383514\">10.1109/slt48900.2021.9383514</a>.","short":"C. Li, Y. Luo, C. Han, J. Li, T. Yoshioka, T. Zhou, M. Delcroix, K. Kinoshita, C. Boeddeker, Y. Qian, S. Watanabe, Z. Chen, in: 2021 IEEE Spoken Language Technology Workshop (SLT), 2021.","bibtex":"@inproceedings{Li_Luo_Han_Li_Yoshioka_Zhou_Delcroix_Kinoshita_Boeddeker_Qian_et al._2021, title={Dual-Path RNN for Long Recording Speech Separation}, DOI={<a href=\"https://doi.org/10.1109/slt48900.2021.9383514\">10.1109/slt48900.2021.9383514</a>}, booktitle={2021 IEEE Spoken Language Technology Workshop (SLT)}, author={Li, Chenda and Luo, Yi and Han, Cong and Li, Jinyu and Yoshioka, Takuya and Zhou, Tianyan and Delcroix, Marc and Kinoshita, Keisuke and Boeddeker, Christoph and Qian, Yanmin and et al.}, year={2021} }","apa":"Li, C., Luo, Y., Han, C., Li, J., Yoshioka, T., Zhou, T., Delcroix, M., Kinoshita, K., Boeddeker, C., Qian, Y., Watanabe, S., &#38; Chen, Z. (2021). Dual-Path RNN for Long Recording Speech Separation. <i>2021 IEEE Spoken Language Technology Workshop (SLT)</i>. <a href=\"https://doi.org/10.1109/slt48900.2021.9383514\">https://doi.org/10.1109/slt48900.2021.9383514</a>","ieee":"C. Li <i>et al.</i>, “Dual-Path RNN for Long Recording Speech Separation,” 2021, doi: <a href=\"https://doi.org/10.1109/slt48900.2021.9383514\">10.1109/slt48900.2021.9383514</a>.","chicago":"Li, Chenda, Yi Luo, Cong Han, Jinyu Li, Takuya Yoshioka, Tianyan Zhou, Marc Delcroix, et al. “Dual-Path RNN for Long Recording Speech Separation.” In <i>2021 IEEE Spoken Language Technology Workshop (SLT)</i>, 2021. <a href=\"https://doi.org/10.1109/slt48900.2021.9383514\">https://doi.org/10.1109/slt48900.2021.9383514</a>.","ama":"Li C, Luo Y, Han C, et al. Dual-Path RNN for Long Recording Speech Separation. In: <i>2021 IEEE Spoken Language Technology Workshop (SLT)</i>. ; 2021. doi:<a href=\"https://doi.org/10.1109/slt48900.2021.9383514\">10.1109/slt48900.2021.9383514</a>"},"publication_status":"published","language":[{"iso":"eng"}],"_id":"28261","department":[{"_id":"54"}],"user_id":"40767","status":"public","publication":"2021 IEEE Spoken Language Technology Workshop (SLT)","type":"conference"},{"publication":"Speech Communication; 14th ITG-Symposium","type":"conference","status":"public","_id":"24000","project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"department":[{"_id":"54"}],"user_id":"460","language":[{"iso":"eng"}],"quality_controlled":"1","year":"2021","page":"1-5","citation":{"ieee":"J. Heitkaemper, J. Schmalenstroeer, V. Ion, and R. Haeb-Umbach, “A Database for Research on Detection and Enhancement of Speech Transmitted over HF links,” in <i>Speech Communication; 14th ITG-Symposium</i>, 2021, pp. 1–5.","chicago":"Heitkaemper, Jens, Joerg Schmalenstroeer, Valentin Ion, and Reinhold Haeb-Umbach. “A Database for Research on Detection and Enhancement of Speech Transmitted over HF Links.” In <i>Speech Communication; 14th ITG-Symposium</i>, 1–5, 2021.","ama":"Heitkaemper J, Schmalenstroeer J, Ion V, Haeb-Umbach R. A Database for Research on Detection and Enhancement of Speech Transmitted over HF links. In: <i>Speech Communication; 14th ITG-Symposium</i>. ; 2021:1-5.","apa":"Heitkaemper, J., Schmalenstroeer, J., Ion, V., &#38; Haeb-Umbach, R. (2021). A Database for Research on Detection and Enhancement of Speech Transmitted over HF links. <i>Speech Communication; 14th ITG-Symposium</i>, 1–5.","short":"J. Heitkaemper, J. Schmalenstroeer, V. Ion, R. Haeb-Umbach, in: Speech Communication; 14th ITG-Symposium, 2021, pp. 1–5.","bibtex":"@inproceedings{Heitkaemper_Schmalenstroeer_Ion_Haeb-Umbach_2021, title={A Database for Research on Detection and Enhancement of Speech Transmitted over HF links}, booktitle={Speech Communication; 14th ITG-Symposium}, author={Heitkaemper, Jens and Schmalenstroeer, Joerg and Ion, Valentin and Haeb-Umbach, Reinhold}, year={2021}, pages={1–5} }","mla":"Heitkaemper, Jens, et al. “A Database for Research on Detection and Enhancement of Speech Transmitted over HF Links.” <i>Speech Communication; 14th ITG-Symposium</i>, 2021, pp. 1–5."},"date_updated":"2023-10-26T08:06:57Z","author":[{"first_name":"Jens","last_name":"Heitkaemper","full_name":"Heitkaemper, Jens","id":"27643"},{"first_name":"Joerg","id":"460","full_name":"Schmalenstroeer, Joerg","last_name":"Schmalenstroeer"},{"full_name":"Ion, Valentin","last_name":"Ion","first_name":"Valentin"},{"id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach","first_name":"Reinhold"}],"date_created":"2021-09-09T08:41:25Z","title":"A Database for Research on Detection and Enhancement of Speech Transmitted over HF links"},{"year":"2021","title":"A Comparison and Combination of Unsupervised Blind Source Separation  Techniques","date_created":"2023-05-15T07:59:33Z","file":[{"content_type":"application/pdf","relation":"main_file","creator":"frra","date_created":"2023-05-16T08:37:31Z","date_updated":"2023-11-15T15:29:32Z","file_name":"2106.05627.pdf","file_id":"44856","access_level":"open_access","file_size":295972}],"abstract":[{"text":"Unsupervised blind source separation methods do not require a training phase\r\nand thus cannot suffer from a train-test mismatch, which is a common concern in\r\nneural network based source separation. The unsupervised techniques can be\r\ncategorized in two classes, those building upon the sparsity of speech in the\r\nShort-Time Fourier transform domain and those exploiting non-Gaussianity or\r\nnon-stationarity of the source signals. In this contribution, spatial mixture\r\nmodels which fall in the first category and independent vector analysis (IVA)\r\nas a representative of the second category are compared w.r.t. their separation\r\nperformance and the performance of a downstream speech recognizer on a\r\nreverberant dataset of reasonable size. Furthermore, we introduce a serial\r\nconcatenation of the two, where the result of the mixture model serves as\r\ninitialization of IVA, which achieves significantly better WER performance than\r\neach algorithm individually and even approaches the performance of a much more\r\ncomplex neural network based technique.","lang":"eng"}],"publication":"ITG Conference on Speech Communication","language":[{"iso":"eng"}],"ddc":["000"],"external_id":{"arxiv":["2106.05627"]},"citation":{"ieee":"C. Boeddeker, F. Rautenberg, and R. Haeb-Umbach, “A Comparison and Combination of Unsupervised Blind Source Separation  Techniques,” presented at the ITG Conference on Speech Communication, Kiel, 2021.","chicago":"Boeddeker, Christoph, Frederik Rautenberg, and Reinhold Haeb-Umbach. “A Comparison and Combination of Unsupervised Blind Source Separation  Techniques.” In <i>ITG Conference on Speech Communication</i>, 2021.","ama":"Boeddeker C, Rautenberg F, Haeb-Umbach R. A Comparison and Combination of Unsupervised Blind Source Separation  Techniques. In: <i>ITG Conference on Speech Communication</i>. ; 2021.","mla":"Boeddeker, Christoph, et al. “A Comparison and Combination of Unsupervised Blind Source Separation  Techniques.” <i>ITG Conference on Speech Communication</i>, 2021.","bibtex":"@inproceedings{Boeddeker_Rautenberg_Haeb-Umbach_2021, title={A Comparison and Combination of Unsupervised Blind Source Separation  Techniques}, booktitle={ITG Conference on Speech Communication}, author={Boeddeker, Christoph and Rautenberg, Frederik and Haeb-Umbach, Reinhold}, year={2021} }","short":"C. Boeddeker, F. Rautenberg, R. Haeb-Umbach, in: ITG Conference on Speech Communication, 2021.","apa":"Boeddeker, C., Rautenberg, F., &#38; Haeb-Umbach, R. (2021). A Comparison and Combination of Unsupervised Blind Source Separation  Techniques. <i>ITG Conference on Speech Communication</i>. ITG Conference on Speech Communication, Kiel."},"has_accepted_license":"1","conference":{"name":"ITG Conference on Speech Communication","location":"Kiel"},"main_file_link":[{"open_access":"1","url":"https://arxiv.org/pdf/2106.05627.pdf"}],"author":[{"first_name":"Christoph","last_name":"Boeddeker","full_name":"Boeddeker, Christoph","id":"40767"},{"last_name":"Rautenberg","id":"72602","full_name":"Rautenberg, Frederik","first_name":"Frederik"},{"last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold","first_name":"Reinhold"}],"date_updated":"2023-11-15T15:29:32Z","oa":"1","status":"public","type":"conference","file_date_updated":"2023-11-15T15:29:32Z","department":[{"_id":"54"}],"user_id":"40767","_id":"44843"},{"user_id":"40767","department":[{"_id":"54"}],"project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"_id":"28259","file_date_updated":"2023-11-15T15:18:08Z","language":[{"iso":"eng"}],"ddc":["000"],"type":"conference","publication":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","file":[{"content_type":"application/pdf","relation":"main_file","date_created":"2021-12-03T12:01:20Z","creator":"cbj","date_updated":"2023-11-15T15:18:08Z","access_level":"open_access","file_id":"28260","file_name":"ICASSP2021_BSSEval.pdf","file_size":228717}],"status":"public","author":[{"full_name":"Boeddeker, Christoph","id":"40767","last_name":"Boeddeker","first_name":"Christoph"},{"last_name":"Zhang","full_name":"Zhang, Wangyou","first_name":"Wangyou"},{"first_name":"Tomohiro","full_name":"Nakatani, Tomohiro","last_name":"Nakatani"},{"first_name":"Keisuke","full_name":"Kinoshita, Keisuke","last_name":"Kinoshita"},{"first_name":"Tsubasa","last_name":"Ochiai","full_name":"Ochiai, Tsubasa"},{"first_name":"Marc","full_name":"Delcroix, Marc","last_name":"Delcroix"},{"last_name":"Kamo","full_name":"Kamo, Naoyuki","first_name":"Naoyuki"},{"last_name":"Qian","full_name":"Qian, Yanmin","first_name":"Yanmin"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold"}],"date_created":"2021-12-03T12:00:16Z","date_updated":"2023-11-15T15:18:09Z","oa":"1","doi":"10.1109/icassp39728.2021.9414661","title":"Convolutive Transfer Function Invariant SDR Training Criteria for Multi-Channel Reverberant Speech Separation","publication_status":"published","has_accepted_license":"1","citation":{"bibtex":"@inproceedings{Boeddeker_Zhang_Nakatani_Kinoshita_Ochiai_Delcroix_Kamo_Qian_Haeb-Umbach_2021, title={Convolutive Transfer Function Invariant SDR Training Criteria for Multi-Channel Reverberant Speech Separation}, DOI={<a href=\"https://doi.org/10.1109/icassp39728.2021.9414661\">10.1109/icassp39728.2021.9414661</a>}, booktitle={ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, author={Boeddeker, Christoph and Zhang, Wangyou and Nakatani, Tomohiro and Kinoshita, Keisuke and Ochiai, Tsubasa and Delcroix, Marc and Kamo, Naoyuki and Qian, Yanmin and Haeb-Umbach, Reinhold}, year={2021} }","short":"C. Boeddeker, W. Zhang, T. Nakatani, K. Kinoshita, T. Ochiai, M. Delcroix, N. Kamo, Y. Qian, R. Haeb-Umbach, in: ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2021.","mla":"Boeddeker, Christoph, et al. “Convolutive Transfer Function Invariant SDR Training Criteria for Multi-Channel Reverberant Speech Separation.” <i>ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2021, doi:<a href=\"https://doi.org/10.1109/icassp39728.2021.9414661\">10.1109/icassp39728.2021.9414661</a>.","apa":"Boeddeker, C., Zhang, W., Nakatani, T., Kinoshita, K., Ochiai, T., Delcroix, M., Kamo, N., Qian, Y., &#38; Haeb-Umbach, R. (2021). Convolutive Transfer Function Invariant SDR Training Criteria for Multi-Channel Reverberant Speech Separation. <i>ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. <a href=\"https://doi.org/10.1109/icassp39728.2021.9414661\">https://doi.org/10.1109/icassp39728.2021.9414661</a>","ieee":"C. Boeddeker <i>et al.</i>, “Convolutive Transfer Function Invariant SDR Training Criteria for Multi-Channel Reverberant Speech Separation,” 2021, doi: <a href=\"https://doi.org/10.1109/icassp39728.2021.9414661\">10.1109/icassp39728.2021.9414661</a>.","chicago":"Boeddeker, Christoph, Wangyou Zhang, Tomohiro Nakatani, Keisuke Kinoshita, Tsubasa Ochiai, Marc Delcroix, Naoyuki Kamo, Yanmin Qian, and Reinhold Haeb-Umbach. “Convolutive Transfer Function Invariant SDR Training Criteria for Multi-Channel Reverberant Speech Separation.” In <i>ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2021. <a href=\"https://doi.org/10.1109/icassp39728.2021.9414661\">https://doi.org/10.1109/icassp39728.2021.9414661</a>.","ama":"Boeddeker C, Zhang W, Nakatani T, et al. Convolutive Transfer Function Invariant SDR Training Criteria for Multi-Channel Reverberant Speech Separation. In: <i>ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. ; 2021. doi:<a href=\"https://doi.org/10.1109/icassp39728.2021.9414661\">10.1109/icassp39728.2021.9414661</a>"},"year":"2021"},{"date_updated":"2023-11-15T14:56:38Z","oa":"1","author":[{"first_name":"Joerg","last_name":"Schmalenstroeer","id":"460","full_name":"Schmalenstroeer, Joerg"},{"last_name":"Heitkaemper","id":"27643","full_name":"Heitkaemper, Jens","first_name":"Jens"},{"last_name":"Ullmann","id":"16256","full_name":"Ullmann, Joerg","first_name":"Joerg"},{"full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach","first_name":"Reinhold"}],"date_created":"2021-09-09T08:40:04Z","title":"Open Range Pitch Tracking for Carrier Frequency Difference Estimation from HF Transmitted Speech","main_file_link":[{"open_access":"1","url":"https://doi.org/10.48550/arXiv.2103.01599"}],"year":"2021","citation":{"chicago":"Schmalenstroeer, Joerg, Jens Heitkaemper, Joerg Ullmann, and Reinhold Haeb-Umbach. “Open Range Pitch Tracking for Carrier Frequency Difference Estimation from HF Transmitted Speech.” In <i>29th European Signal Processing Conference (EUSIPCO)</i>, 1–5, 2021.","ieee":"J. Schmalenstroeer, J. Heitkaemper, J. Ullmann, and R. Haeb-Umbach, “Open Range Pitch Tracking for Carrier Frequency Difference Estimation from HF Transmitted Speech,” in <i>29th European Signal Processing Conference (EUSIPCO)</i>, 2021, pp. 1–5.","ama":"Schmalenstroeer J, Heitkaemper J, Ullmann J, Haeb-Umbach R. Open Range Pitch Tracking for Carrier Frequency Difference Estimation from HF Transmitted Speech. In: <i>29th European Signal Processing Conference (EUSIPCO)</i>. ; 2021:1-5.","apa":"Schmalenstroeer, J., Heitkaemper, J., Ullmann, J., &#38; Haeb-Umbach, R. (2021). Open Range Pitch Tracking for Carrier Frequency Difference Estimation from HF Transmitted Speech. <i>29th European Signal Processing Conference (EUSIPCO)</i>, 1–5.","short":"J. Schmalenstroeer, J. Heitkaemper, J. Ullmann, R. Haeb-Umbach, in: 29th European Signal Processing Conference (EUSIPCO), 2021, pp. 1–5.","mla":"Schmalenstroeer, Joerg, et al. “Open Range Pitch Tracking for Carrier Frequency Difference Estimation from HF Transmitted Speech.” <i>29th European Signal Processing Conference (EUSIPCO)</i>, 2021, pp. 1–5.","bibtex":"@inproceedings{Schmalenstroeer_Heitkaemper_Ullmann_Haeb-Umbach_2021, title={Open Range Pitch Tracking for Carrier Frequency Difference Estimation from HF Transmitted Speech}, booktitle={29th European Signal Processing Conference (EUSIPCO)}, author={Schmalenstroeer, Joerg and Heitkaemper, Jens and Ullmann, Joerg and Haeb-Umbach, Reinhold}, year={2021}, pages={1–5} }"},"page":"1-5","_id":"23998","user_id":"460","department":[{"_id":"54"}],"extern":"1","language":[{"iso":"eng"}],"type":"conference","publication":"29th European Signal Processing Conference (EUSIPCO)","status":"public"}]