@inproceedings{61079,
  abstract     = {{We propose a spatio-spectral, combined model-based and data-driven
diarization pipeline consisting of TDOA-based segmentation followed by
embedding-based clustering. The proposed system requires neither access to
multi-channel training data nor prior knowledge about the number or placement
of microphones. It works for both a compact microphone array and distributed
microphones, with minor adjustments. Due to its superior handling of
overlapping speech during segmentation, the proposed pipeline significantly
outperforms the single-channel pyannote approach, both in a scenario with a
compact microphone array and in a setup with distributed microphones.
Additionally, we show that, unlike fully spatial diarization pipelines, the
proposed system can correctly track speakers when they change positions.}},
  author       = {{Cord-Landwehr, Tobias and Gburrek, Tobias and Deegen, Marc and Haeb-Umbach, Reinhold}},
  booktitle    = {{Proceedings of INTERSPEECH}},
  location     = {{Rotterdam}},
  title        = {{{Spatio-spectral diarization of meetings by combining TDOA-based  segmentation and speaker embedding-based clustering}}},
  doi          = {{10.21437/Interspeech.2025-1663}},
  year         = {{2025}},
}