[{"type":"conference","keyword":["mixture models","meeting processing","diarization","source separation"],"department":[{"_id":"54"},{"_id":"1063"}],"date_created":"2026-05-11T14:20:48Z","abstract":[{"text":"Sound capture by microphone arrays opens the possibility to exploit spatial, in addition to spectral, information for diarization and signal enhancement, two important tasks in meeting transcription. However, there is no one-to-one mapping of positions in space to speakers if speakers move. Here, we address this by proposing a novel joint spatial and spectral mixture model, whose two submodels are loosely coupled by modeling the relationship between speaker and position index probabilistically. Thus, spatial and spectral information can be jointly exploited, while at the same time allowing for speakers speaking from different positions. Experiments on the LibriCSS data set with simulated speaker position changes show great improvements over tightly coupled subsystems.","lang":"eng"}],"publication":"ICASSP 2026 - 2026 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","doi":"10.1109/icassp55912.2026.11463540","main_file_link":[{"url":"https://arxiv.org/pdf/2601.16077","open_access":"1"}],"language":[{"iso":"eng"}],"date_updated":"2026-05-15T08:17:25Z","year":"2026","title":"Loose Coupling of Spectral and Spatial Models for Multi-Channel Diarization and Enhancement of Meetings in Dynamic Environments","author":[{"last_name":"Meise","first_name":"Adrian Tobias","full_name":"Meise, Adrian Tobias","id":"79268"},{"id":"44393","full_name":"Cord-Landwehr, Tobias","first_name":"Tobias","last_name":"Cord-Landwehr"},{"id":"40767","full_name":"Boeddeker, Christoph","first_name":"Christoph","last_name":"Boeddeker"},{"full_name":"Delcroix, Marc","first_name":"Marc","last_name":"Delcroix"},{"first_name":"Tomohiro","last_name":"Nakatani","full_name":"Nakatani, Tomohiro"},{"id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach","first_name":"Reinhold"}],"oa":"1","external_id":{"arxiv":["https://arxiv.org/abs/2601.16077"]},"project":[{"_id":"52","name":"Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"citation":{"apa":"Meise, A. T., Cord-Landwehr, T., Boeddeker, C., Delcroix, M., Nakatani, T., &#38; Haeb-Umbach, R. (2026). Loose Coupling of Spectral and Spatial Models for Multi-Channel Diarization and Enhancement of Meetings in Dynamic Environments. <i>ICASSP 2026 - 2026 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>.  2026 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) , Barcelona. <a href=\"https://doi.org/10.1109/icassp55912.2026.11463540\">https://doi.org/10.1109/icassp55912.2026.11463540</a>","ieee":"A. T. Meise, T. Cord-Landwehr, C. Boeddeker, M. Delcroix, T. Nakatani, and R. Haeb-Umbach, “Loose Coupling of Spectral and Spatial Models for Multi-Channel Diarization and Enhancement of Meetings in Dynamic Environments,” presented at the  2026 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) , Barcelona, 2026, doi: <a href=\"https://doi.org/10.1109/icassp55912.2026.11463540\">10.1109/icassp55912.2026.11463540</a>.","chicago":"Meise, Adrian Tobias, Tobias Cord-Landwehr, Christoph Boeddeker, Marc Delcroix, Tomohiro Nakatani, and Reinhold Haeb-Umbach. “Loose Coupling of Spectral and Spatial Models for Multi-Channel Diarization and Enhancement of Meetings in Dynamic Environments.” In <i>ICASSP 2026 - 2026 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. IEEE, 2026. <a href=\"https://doi.org/10.1109/icassp55912.2026.11463540\">https://doi.org/10.1109/icassp55912.2026.11463540</a>.","short":"A.T. Meise, T. Cord-Landwehr, C. Boeddeker, M. Delcroix, T. Nakatani, R. Haeb-Umbach, in: ICASSP 2026 - 2026 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), IEEE, 2026.","mla":"Meise, Adrian Tobias, et al. “Loose Coupling of Spectral and Spatial Models for Multi-Channel Diarization and Enhancement of Meetings in Dynamic Environments.” <i>ICASSP 2026 - 2026 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, IEEE, 2026, doi:<a href=\"https://doi.org/10.1109/icassp55912.2026.11463540\">10.1109/icassp55912.2026.11463540</a>.","ama":"Meise AT, Cord-Landwehr T, Boeddeker C, Delcroix M, Nakatani T, Haeb-Umbach R. Loose Coupling of Spectral and Spatial Models for Multi-Channel Diarization and Enhancement of Meetings in Dynamic Environments. In: <i>ICASSP 2026 - 2026 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. IEEE; 2026. doi:<a href=\"https://doi.org/10.1109/icassp55912.2026.11463540\">10.1109/icassp55912.2026.11463540</a>","bibtex":"@inproceedings{Meise_Cord-Landwehr_Boeddeker_Delcroix_Nakatani_Haeb-Umbach_2026, title={Loose Coupling of Spectral and Spatial Models for Multi-Channel Diarization and Enhancement of Meetings in Dynamic Environments}, DOI={<a href=\"https://doi.org/10.1109/icassp55912.2026.11463540\">10.1109/icassp55912.2026.11463540</a>}, booktitle={ICASSP 2026 - 2026 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, publisher={IEEE}, author={Meise, Adrian Tobias and Cord-Landwehr, Tobias and Boeddeker, Christoph and Delcroix, Marc and Nakatani, Tomohiro and Haeb-Umbach, Reinhold}, year={2026} }"},"user_id":"79268","publisher":"IEEE","_id":"65606","status":"public","conference":{"name":" 2026 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) ","location":"Barcelona"}},{"year":"2024","title":"Simultaneous Diarization and Separation of Meetings through the Integration of Statistical Mixture Models","author":[{"full_name":"Cord-Landwehr, Tobias","first_name":"Tobias","last_name":"Cord-Landwehr","id":"44393"},{"full_name":"Boeddeker, Christoph","last_name":"Boeddeker","first_name":"Christoph","id":"40767"},{"full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach","first_name":"Reinhold","id":"242"}],"date_updated":"2025-08-14T08:12:22Z","main_file_link":[{"url":"https://arxiv.org/pdf/2410.21455","open_access":"1"}],"language":[{"iso":"eng"}],"doi":"10.1109/ICASSP49660.2025.10888445","publication":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","abstract":[{"lang":"eng","text":"We propose an approach for simultaneous diarization and separation of meeting data. It consists of a complex Angular Central Gaussian Mixture Model (cACGMM) for speech source separation, and a von-Mises-Fisher Mixture Model (VMFMM) for diarization in a joint statistical framework. Through the integration, both spatial and spectral information are exploited for diarization and separation. We also develop a method for counting the number of active speakers in a segment of a meeting to support block-wise processing. While the total number of speakers in a meeting may be known, it is usually not known on a per-segment level. With the proposed speaker counting, joint diarization and source separation can be done segment-by-segment, and the permutation problem across segments is solved, thus allowing for block-online processing in the future. Experimental results on the LibriCSS meeting corpus show that the integrated approach outperforms a cascaded approach of diarization and speech enhancement in terms of WER, both on a per-segment and on a per-meeting level."}],"file":[{"creator":"cord","date_created":"2025-08-14T08:11:57Z","date_updated":"2025-08-14T08:11:57Z","relation":"main_file","access_level":"closed","file_size":259907,"file_name":"main.pdf","content_type":"application/pdf","success":1,"file_id":"60930"}],"date_created":"2024-11-14T09:32:38Z","keyword":["diarization","source separation","mixture model","meeting"],"type":"conference","department":[{"_id":"54"}],"status":"public","conference":{"location":"Hyderabad, India","name":"2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"},"has_accepted_license":"1","_id":"57085","ddc":["000"],"user_id":"44393","file_date_updated":"2025-08-14T08:11:57Z","citation":{"apa":"Cord-Landwehr, T., Boeddeker, C., &#38; Haeb-Umbach, R. (2024). Simultaneous Diarization and Separation of Meetings through the Integration of Statistical Mixture Models. <i>ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Hyderabad, India. <a href=\"https://doi.org/10.1109/ICASSP49660.2025.10888445\">https://doi.org/10.1109/ICASSP49660.2025.10888445</a>","ieee":"T. Cord-Landwehr, C. Boeddeker, and R. Haeb-Umbach, “Simultaneous Diarization and Separation of Meetings through the Integration of Statistical Mixture Models,” presented at the 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Hyderabad, India, 2024, doi: <a href=\"https://doi.org/10.1109/ICASSP49660.2025.10888445\">10.1109/ICASSP49660.2025.10888445</a>.","chicago":"Cord-Landwehr, Tobias, Christoph Boeddeker, and Reinhold Haeb-Umbach. “Simultaneous Diarization and Separation of Meetings through the Integration of Statistical Mixture Models.” In <i>ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2024. <a href=\"https://doi.org/10.1109/ICASSP49660.2025.10888445\">https://doi.org/10.1109/ICASSP49660.2025.10888445</a>.","short":"T. Cord-Landwehr, C. Boeddeker, R. Haeb-Umbach, in: ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2024.","mla":"Cord-Landwehr, Tobias, et al. “Simultaneous Diarization and Separation of Meetings through the Integration of Statistical Mixture Models.” <i>ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>, 2024, doi:<a href=\"https://doi.org/10.1109/ICASSP49660.2025.10888445\">10.1109/ICASSP49660.2025.10888445</a>.","ama":"Cord-Landwehr T, Boeddeker C, Haeb-Umbach R. Simultaneous Diarization and Separation of Meetings through the Integration of Statistical Mixture Models. In: <i>ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</i>. ; 2024. doi:<a href=\"https://doi.org/10.1109/ICASSP49660.2025.10888445\">10.1109/ICASSP49660.2025.10888445</a>","bibtex":"@inproceedings{Cord-Landwehr_Boeddeker_Haeb-Umbach_2024, title={Simultaneous Diarization and Separation of Meetings through the Integration of Statistical Mixture Models}, DOI={<a href=\"https://doi.org/10.1109/ICASSP49660.2025.10888445\">10.1109/ICASSP49660.2025.10888445</a>}, booktitle={ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, author={Cord-Landwehr, Tobias and Boeddeker, Christoph and Haeb-Umbach, Reinhold}, year={2024} }"},"project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"},{"_id":"508","name":"Automatische Transkription von Gesprächssituationen"}],"oa":"1"},{"doi":"10.1109/taslp.2022.3228629","language":[{"iso":"eng"}],"publication_status":"published","date_updated":"2023-11-15T12:16:11Z","article_type":"original","intvolume":"        31","year":"2023","title":"Segment-Less Continuous Speech Separation of Meetings: Training and Evaluation Criteria","publication_identifier":{"issn":["2329-9290","2329-9304"]},"author":[{"id":"49870","full_name":"von Neumann, Thilo","first_name":"Thilo","orcid":"https://orcid.org/0000-0002-7717-8670","last_name":"von Neumann"},{"last_name":"Kinoshita","first_name":"Keisuke","full_name":"Kinoshita, Keisuke"},{"id":"40767","first_name":"Christoph","last_name":"Boeddeker","full_name":"Boeddeker, Christoph"},{"first_name":"Marc","last_name":"Delcroix","full_name":"Delcroix, Marc"},{"id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach","first_name":"Reinhold"}],"keyword":["Continuous Speech Separation","Source Separation","Graph-PIT","Dynamic Programming","Permutation Invariant Training"],"type":"journal_article","department":[{"_id":"54"}],"file":[{"creator":"haebumb","date_created":"2023-01-09T17:46:05Z","file_name":"main.pdf","file_size":7185077,"access_level":"open_access","relation":"main_file","date_updated":"2023-01-11T08:50:19Z","file_id":"35607","content_type":"application/pdf"}],"date_created":"2023-01-09T17:24:17Z","abstract":[{"text":"Continuous Speech Separation (CSS) has been proposed to address speech overlaps during the analysis of realistic meeting-like conversations by eliminating any overlaps before further processing.\r\nCSS separates a recording of arbitrarily many speakers into a small number of overlap-free output channels, where each output channel may contain speech of multiple speakers.\r\nThis is often done by applying a conventional separation model trained with Utterance-level Permutation Invariant Training (uPIT), which exclusively maps a speaker to an output channel, in sliding window approach called stitching.\r\nRecently, we introduced an alternative training scheme called Graph-PIT that teaches the separation network to directly produce output streams in the required format without stitching.\r\nIt can handle an arbitrary number of speakers as long as never more of them overlap at the same time than the separator has output channels.\r\nIn this contribution, we further investigate the Graph-PIT training scheme.\r\nWe show in extended experiments that models trained with Graph-PIT also work in challenging reverberant conditions.\r\nModels trained in this way are able to perform segment-less CSS, i.e., without stitching, and achieve comparable and often better separation quality than the conventional CSS with uPIT and stitching.\r\nWe simplify the training schedule for Graph-PIT with the recently proposed Source Aggregated Signal-to-Distortion Ratio (SA-SDR) loss.\r\nIt eliminates unfavorable properties of the previously used A-SDR loss and thus enables training with Graph-PIT from scratch.\r\nGraph-PIT training relaxes the constraints w.r.t. the allowed numbers of speakers and speaking patterns which allows using a larger variety of training data.\r\nFurthermore, we introduce novel signal-level evaluation metrics for meeting scenarios, namely the source-aggregated scale- and convolution-invariant Signal-to-Distortion Ratio (SA-SI-SDR and SA-CI-SDR), which are generalizations of the commonly used SDR-based metrics for the CSS case.","lang":"eng"}],"publication":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","user_id":"49870","ddc":["000"],"volume":31,"page":"576-589","_id":"35602","publisher":"Institute of Electrical and Electronics Engineers (IEEE)","has_accepted_license":"1","status":"public","oa":"1","quality_controlled":"1","project":[{"_id":"52","name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"file_date_updated":"2023-01-11T08:50:19Z","citation":{"apa":"von Neumann, T., Kinoshita, K., Boeddeker, C., Delcroix, M., &#38; Haeb-Umbach, R. (2023). Segment-Less Continuous Speech Separation of Meetings: Training and Evaluation Criteria. <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i>, <i>31</i>, 576–589. <a href=\"https://doi.org/10.1109/taslp.2022.3228629\">https://doi.org/10.1109/taslp.2022.3228629</a>","ieee":"T. von Neumann, K. Kinoshita, C. Boeddeker, M. Delcroix, and R. Haeb-Umbach, “Segment-Less Continuous Speech Separation of Meetings: Training and Evaluation Criteria,” <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i>, vol. 31, pp. 576–589, 2023, doi: <a href=\"https://doi.org/10.1109/taslp.2022.3228629\">10.1109/taslp.2022.3228629</a>.","chicago":"Neumann, Thilo von, Keisuke Kinoshita, Christoph Boeddeker, Marc Delcroix, and Reinhold Haeb-Umbach. “Segment-Less Continuous Speech Separation of Meetings: Training and Evaluation Criteria.” <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i> 31 (2023): 576–89. <a href=\"https://doi.org/10.1109/taslp.2022.3228629\">https://doi.org/10.1109/taslp.2022.3228629</a>.","short":"T. von Neumann, K. Kinoshita, C. Boeddeker, M. Delcroix, R. Haeb-Umbach, IEEE/ACM Transactions on Audio, Speech, and Language Processing 31 (2023) 576–589.","mla":"von Neumann, Thilo, et al. “Segment-Less Continuous Speech Separation of Meetings: Training and Evaluation Criteria.” <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i>, vol. 31, Institute of Electrical and Electronics Engineers (IEEE), 2023, pp. 576–89, doi:<a href=\"https://doi.org/10.1109/taslp.2022.3228629\">10.1109/taslp.2022.3228629</a>.","ama":"von Neumann T, Kinoshita K, Boeddeker C, Delcroix M, Haeb-Umbach R. Segment-Less Continuous Speech Separation of Meetings: Training and Evaluation Criteria. <i>IEEE/ACM Transactions on Audio, Speech, and Language Processing</i>. 2023;31:576-589. doi:<a href=\"https://doi.org/10.1109/taslp.2022.3228629\">10.1109/taslp.2022.3228629</a>","bibtex":"@article{von Neumann_Kinoshita_Boeddeker_Delcroix_Haeb-Umbach_2023, title={Segment-Less Continuous Speech Separation of Meetings: Training and Evaluation Criteria}, volume={31}, DOI={<a href=\"https://doi.org/10.1109/taslp.2022.3228629\">10.1109/taslp.2022.3228629</a>}, journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={von Neumann, Thilo and Kinoshita, Keisuke and Boeddeker, Christoph and Delcroix, Marc and Haeb-Umbach, Reinhold}, year={2023}, pages={576–589} }"}},{"citation":{"mla":"Drude, Lukas, et al. “Towards Online Source Counting in Speech Mixtures Applying a Variational EM for Complex Watson Mixture Models.” <i>14th International Workshop on Acoustic Signal Enhancement (IWAENC 2014)</i>, 2014, pp. 213–17.","bibtex":"@inproceedings{Drude_Chinaev_Tran Vu_Haeb-Umbach_2014, title={Towards Online Source Counting in Speech Mixtures Applying a Variational EM for Complex Watson Mixture Models}, booktitle={14th International Workshop on Acoustic Signal Enhancement (IWAENC 2014)}, author={Drude, Lukas and Chinaev, Aleksej and Tran Vu, Dang Hai and Haeb-Umbach, Reinhold}, year={2014}, pages={213–217} }","ama":"Drude L, Chinaev A, Tran Vu DH, Haeb-Umbach R. Towards Online Source Counting in Speech Mixtures Applying a Variational EM for Complex Watson Mixture Models. In: <i>14th International Workshop on Acoustic Signal Enhancement (IWAENC 2014)</i>. ; 2014:213-217.","ieee":"L. Drude, A. Chinaev, D. H. Tran Vu, and R. Haeb-Umbach, “Towards Online Source Counting in Speech Mixtures Applying a Variational EM for Complex Watson Mixture Models,” in <i>14th International Workshop on Acoustic Signal Enhancement (IWAENC 2014)</i>, 2014, pp. 213–217.","apa":"Drude, L., Chinaev, A., Tran Vu, D. H., &#38; Haeb-Umbach, R. (2014). Towards Online Source Counting in Speech Mixtures Applying a Variational EM for Complex Watson Mixture Models. In <i>14th International Workshop on Acoustic Signal Enhancement (IWAENC 2014)</i> (pp. 213–217).","chicago":"Drude, Lukas, Aleksej Chinaev, Dang Hai Tran Vu, and Reinhold Haeb-Umbach. “Towards Online Source Counting in Speech Mixtures Applying a Variational EM for Complex Watson Mixture Models.” In <i>14th International Workshop on Acoustic Signal Enhancement (IWAENC 2014)</i>, 213–17, 2014.","short":"L. Drude, A. Chinaev, D.H. Tran Vu, R. Haeb-Umbach, in: 14th International Workshop on Acoustic Signal Enhancement (IWAENC 2014), 2014, pp. 213–217."},"publication":"14th International Workshop on Acoustic Signal Enhancement (IWAENC 2014)","related_material":{"link":[{"description":"Poster","relation":"supplementary_material","url":"https://groups.uni-paderborn.de/nt/pubs/2014/DrChTrHaeb14_Poster.pdf"}]},"abstract":[{"text":"This contribution describes a step-wise source counting algorithm to determine the number of speakers in an offline scenario. Each speaker is identified by a variational expectation maximization (VEM) algorithm for complex Watson mixture models and therefore directly yields beamforming vectors for a subsequent speech separation process. An observation selection criterion is proposed which improves the robustness of the source counting in noise. The algorithm is compared to an alternative VEM approach with Gaussian mixture models based on directions of arrival and shown to deliver improved source counting accuracy. The article concludes by extending the offline algorithm towards a low-latency online estimation of the number of active sources from the streaming input data.","lang":"eng"}],"date_created":"2019-07-12T05:27:35Z","department":[{"_id":"54"}],"oa":"1","type":"conference","keyword":["Accuracy","Acoustics","Estimation","Mathematical model","Soruce separation","Speech","Vectors","Bayes methods","Blind source separation","Directional statistics","Number of speakers","Speaker diarization"],"author":[{"id":"11213","full_name":"Drude, Lukas","first_name":"Lukas","last_name":"Drude"},{"full_name":"Chinaev, Aleksej","last_name":"Chinaev","first_name":"Aleksej"},{"full_name":"Tran Vu, Dang Hai","last_name":"Tran Vu","first_name":"Dang Hai"},{"full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach","first_name":"Reinhold","id":"242"}],"title":"Towards Online Source Counting in Speech Mixtures Applying a Variational EM for Complex Watson Mixture Models","status":"public","year":"2014","date_updated":"2022-01-06T06:51:08Z","language":[{"iso":"eng"}],"_id":"11753","main_file_link":[{"open_access":"1","url":"https://groups.uni-paderborn.de/nt/pubs/2014/DrChTrHaeb14.pdf"}],"page":"213-217","user_id":"44006"},{"citation":{"ieee":"D. H. Tran Vu and R. Haeb-Umbach, “Blind speech separation employing directional statistics in an Expectation Maximization framework,” in <i>IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2010)</i>, 2010, pp. 241–244.","apa":"Tran Vu, D. H., &#38; Haeb-Umbach, R. (2010). Blind speech separation employing directional statistics in an Expectation Maximization framework. In <i>IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2010)</i> (pp. 241–244). <a href=\"https://doi.org/10.1109/ICASSP.2010.5495994\">https://doi.org/10.1109/ICASSP.2010.5495994</a>","short":"D.H. Tran Vu, R. Haeb-Umbach, in: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2010), 2010, pp. 241–244.","chicago":"Tran Vu, Dang Hai, and Reinhold Haeb-Umbach. “Blind Speech Separation Employing Directional Statistics in an Expectation Maximization Framework.” In <i>IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2010)</i>, 241–44, 2010. <a href=\"https://doi.org/10.1109/ICASSP.2010.5495994\">https://doi.org/10.1109/ICASSP.2010.5495994</a>.","mla":"Tran Vu, Dang Hai, and Reinhold Haeb-Umbach. “Blind Speech Separation Employing Directional Statistics in an Expectation Maximization Framework.” <i>IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2010)</i>, 2010, pp. 241–44, doi:<a href=\"https://doi.org/10.1109/ICASSP.2010.5495994\">10.1109/ICASSP.2010.5495994</a>.","bibtex":"@inproceedings{Tran Vu_Haeb-Umbach_2010, title={Blind speech separation employing directional statistics in an Expectation Maximization framework}, DOI={<a href=\"https://doi.org/10.1109/ICASSP.2010.5495994\">10.1109/ICASSP.2010.5495994</a>}, booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2010)}, author={Tran Vu, Dang Hai and Haeb-Umbach, Reinhold}, year={2010}, pages={241–244} }","ama":"Tran Vu DH, Haeb-Umbach R. Blind speech separation employing directional statistics in an Expectation Maximization framework. In: <i>IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2010)</i>. ; 2010:241-244. doi:<a href=\"https://doi.org/10.1109/ICASSP.2010.5495994\">10.1109/ICASSP.2010.5495994</a>"},"publication":"IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2010)","abstract":[{"text":"In this paper we propose to employ directional statistics in a complex vector space to approach the problem of blind speech separation in the presence of spatially correlated noise. We interpret the values of the short time Fourier transform of the microphone signals to be draws from a mixture of complex Watson distributions, a probabilistic model which naturally accounts for spatial aliasing. The parameters of the density are related to the a priori source probabilities, the power of the sources and the transfer function ratios from sources to sensors. Estimation formulas are derived for these parameters by employing the Expectation Maximization (EM) algorithm. The E-step corresponds to the estimation of the source presence probabilities for each time-frequency bin, while the M-step leads to a maximum signal-to-noise ratio (MaxSNR) beamformer in the presence of uncertainty about the source activity. Experimental results are reported for an implementation in a generalized sidelobe canceller (GSC) like spatial beamforming configuration for 3 speech sources with significant coherent noise in reverberant environments, demonstrating the usefulness of the novel modeling framework.","lang":"eng"}],"date_created":"2019-07-12T05:30:40Z","oa":"1","department":[{"_id":"54"}],"keyword":["array signal processing","blind source separation","blind speech separation","complex vector space","complex Watson distribution","directional statistics","expectation-maximisation algorithm","expectation maximization algorithm","Fourier transform","Fourier transforms","generalized sidelobe canceller","interference suppression","maximum signal-to-noise ratio beamformer","microphone signal","probabilistic model","spatial aliasing","spatial beamforming configuration","speech enhancement","statistical distributions"],"type":"conference","author":[{"first_name":"Dang Hai","last_name":"Tran Vu","full_name":"Tran Vu, Dang Hai"},{"id":"242","full_name":"Haeb-Umbach, Reinhold","first_name":"Reinhold","last_name":"Haeb-Umbach"}],"title":"Blind speech separation employing directional statistics in an Expectation Maximization framework","status":"public","year":"2010","date_updated":"2022-01-06T06:51:12Z","_id":"11913","language":[{"iso":"eng"}],"page":"241-244","main_file_link":[{"url":"https://groups.uni-paderborn.de/nt/pubs/2010/DaHa10-2.pdf","open_access":"1"}],"doi":"10.1109/ICASSP.2010.5495994","user_id":"44006"}]