@inproceedings{57085,
  abstract     = {{We propose an approach for simultaneous diarization and separation of meeting data. It consists of a complex Angular Central Gaussian Mixture Model (cACGMM) for speech source separation, and a von-Mises-Fisher Mixture Model (VMFMM) for diarization in a joint statistical framework. Through the integration, both spatial and spectral information are exploited for diarization and separation. We also develop a method for counting the number of active speakers in a segment of a meeting to support block-wise processing. While the total number of speakers in a meeting may be known, it is usually not known on a per-segment level. With the proposed speaker counting, joint diarization and source separation can be done segment-by-segment, and the permutation problem across segments is solved, thus allowing for block-online processing in the future. Experimental results on the LibriCSS meeting corpus show that the integrated approach outperforms a cascaded approach of diarization and speech enhancement in terms of WER, both on a per-segment and on a per-meeting level.}},
  author       = {{Cord-Landwehr, Tobias and Boeddeker, Christoph and Haeb-Umbach, Reinhold}},
  booktitle    = {{ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}},
  keywords     = {{diarization, source separation, mixture model, meeting}},
  location     = {{Hyderabad, India}},
  title        = {{{Simultaneous Diarization and Separation of Meetings through the Integration of Statistical Mixture Models}}},
  doi          = {{10.1109/ICASSP49660.2025.10888445}},
  year         = {{2024}},
}

@inproceedings{11740,
  abstract     = {{In this contribution we derive the Maximum A-Posteriori (MAP) estimates of the parameters of a Gaussian Mixture Model (GMM) in the presence of noisy observations. We assume the distortion to be white Gaussian noise of known mean and variance. An approximate conjugate prior of the GMM parameters is derived allowing for a computationally efficient implementation in a sequential estimation framework. Simulations on artificially generated data demonstrate the superiority of the proposed method compared to the Maximum Likelihood technique and to the ordinary MAP approach, whose estimates are corrected by the known statistics of the distortion in a straightforward manner.}},
  author       = {{Chinaev, Aleksej and Haeb-Umbach, Reinhold}},
  booktitle    = {{38th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013)}},
  issn         = {{1520-6149}},
  keywords     = {{Gaussian noise, maximum likelihood estimation, parameter estimation, GMM parameter, Gaussian mixture model, MAP estimation, Map-based estimation, maximum a-posteriori estimation, maximum likelihood technique, noisy observation, sequential estimation framework, white Gaussian noise, Additive noise, Gaussian mixture model, Maximum likelihood estimation, Noise measurement, Gaussian mixture model, Maximum a posteriori estimation, Maximum likelihood estimation}},
  pages        = {{3352--3356}},
  title        = {{{MAP-based Estimation of the Parameters of a Gaussian Mixture Model in the Presence of Noisy Observations}}},
  doi          = {{10.1109/ICASSP.2013.6638279}},
  year         = {{2013}},
}

