@inproceedings{59999,
  author       = {{Rautenberg, Frederik and Kuhlmann, Michael and Seebauer, Fritz and Wiechmann, Jana and Wagner, Petra and Haeb-Umbach, Reinhold}},
  booktitle    = {{ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}},
  location     = {{Hyderabad, India }},
  publisher    = {{IEEE}},
  title        = {{{Speech Synthesis along Perceptual Voice Quality Dimensions}}},
  doi          = {{10.1109/icassp49660.2025.10888012}},
  year         = {{2025}},
}

@inproceedings{61047,
  author       = {{Rautenberg, Frederik and Seebauer, Fritz and Wiechmann, Jana and Kuhlmann, Michael and Wagner, Petra and Haeb-Umbach, Reinhold}},
  booktitle    = {{Interspeech 2025}},
  location     = {{Rotterdam}},
  publisher    = {{ISCA}},
  title        = {{{Synthesizing Speech with Selected Perceptual Voice Qualities – A Case Study with Creaky Voice}}},
  doi          = {{10.21437/Interspeech.2025-1443}},
  year         = {{2025}},
}

@inproceedings{62164,
  author       = {{Kuhlmann, Michael and Seebauer, Fritz and Wagner, Petra and Häb-Umbach, Reinhold}},
  booktitle    = {{Interspeech 2025}},
  publisher    = {{ISCA}},
  title        = {{{Towards Frame-level Quality Predictions of Synthetic Speech}}},
  doi          = {{10.21437/interspeech.2025-2190}},
  year         = {{2025}},
}

@inproceedings{57099,
  author       = {{Xie, Yuying and Kuhlmann, Michael and Rautenberg, Frederik and Tan, Zheng-Hua and Häb-Umbach, Reinhold}},
  booktitle    = {{2024 32nd European Signal Processing Conference (EUSIPCO)}},
  pages        = {{436–440}},
  title        = {{{Speaker and Style Disentanglement of Speech Based on Contrastive Predictive Coding Supported Factorized Variational Autoencoder}}},
  year         = {{2024}},
}

@inproceedings{48355,
  abstract     = {{Unsupervised speech disentanglement aims at separating fast varying from
slowly varying components of a speech signal. In this contribution, we take a
closer look at the embedding vector representing the slowly varying signal
components, commonly named the speaker embedding vector. We ask, which
properties of a speaker's voice are captured and investigate to which extent do
individual embedding vector components sign responsible for them, using the
concept of Shapley values. Our findings show that certain speaker-specific
acoustic-phonetic properties can be fairly well predicted from the speaker
embedding, while the investigated more abstract voice quality features cannot.}},
  author       = {{Rautenberg, Frederik and Kuhlmann, Michael and Wiechmann, Jana and Seebauer, Fritz and Wagner, Petra and Haeb-Umbach, Reinhold}},
  booktitle    = {{ITG Conference on Speech Communication}},
  location     = {{Aachen}},
  title        = {{{On Feature Importance and Interpretability of Speaker Representations}}},
  year         = {{2023}},
}

@inproceedings{46069,
  author       = {{Seebauer, Fritz and Kuhlmann, Michael and Haeb-Umbach, Reinhold and Wagner, Petra}},
  booktitle    = {{12th Speech Synthesis Workshop (SSW) 2023}},
  title        = {{{Re-examining the quality dimensions of synthetic speech}}},
  year         = {{2023}},
}

@inproceedings{44849,
  author       = {{Rautenberg, Frederik and Kuhlmann, Michael and Ebbers, Janek and Wiechmann, Jana and Seebauer, Fritz and Wagner, Petra and Haeb-Umbach, Reinhold}},
  booktitle    = {{Fortschritte der Akustik - DAGA 2023}},
  location     = {{Hamburg}},
  pages        = {{1409--1412}},
  title        = {{{Speech Disentanglement for Analysis and Modification of Acoustic and Perceptual Speaker Characteristics}}},
  year         = {{2023}},
}

@inproceedings{57098,
  author       = {{Seebauer, Fritz and Kuhlmann, Michael and Häb-Umbach, Reinhold and Wagner, Petra}},
  booktitle    = {{Proceedings of the 20th International Congress of Phonetic Sciences}},
  isbn         = {{978-80-908 114-2-3}},
  location     = {{Prague}},
  title        = {{{DISCERNING DIMENSIONS OF QUALITY FOR STATE OF THE ART SYNTHETIC SPEECH}}},
  year         = {{2023}},
}

@inproceedings{57086,
  author       = {{Kuhlmann, Michael and Meise, Adrian Tobias and Seebauer, Fritz and Wagner, Petra and Häb-Umbach, Reinhold}},
  booktitle    = {{Speech Communication; 15th ITG Conference}},
  pages        = {{121–125}},
  title        = {{{Investigating Speaker Embedding Disentanglement on Natural Read Speech}}},
  year         = {{2023}},
}

@inproceedings{33857,
  author       = {{Kuhlmann, Michael and Seebauer, Fritz and Ebbers, Janek and Wagner, Petra and Haeb-Umbach, Reinhold}},
  booktitle    = {{Interspeech 2022}},
  publisher    = {{ISCA}},
  title        = {{{Investigation into Target Speaking Rate Adaptation for Voice Conversion}}},
  doi          = {{10.21437/interspeech.2022-10740}},
  year         = {{2022}},
}

@inproceedings{29304,
  abstract     = {{In this work we address disentanglement of style and content in speech signals. We propose a fully convolutional variational autoencoder employing two encoders: a content encoder and a style encoder. To foster disentanglement, we propose adversarial contrastive predictive coding. This new disentanglement method does neither need parallel data nor any supervision. We show that the proposed technique is capable of separating speaker and content traits into the two different representations and show competitive speaker-content disentanglement performance compared to other unsupervised approaches. We further demonstrate an increased robustness of the content representation against a train-test mismatch compared to spectral features, when used for phone recognition.}},
  author       = {{Ebbers, Janek and Kuhlmann, Michael and Cord-Landwehr, Tobias and Haeb-Umbach, Reinhold}},
  booktitle    = {{Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}},
  pages        = {{3860–3864}},
  title        = {{{Contrastive Predictive Coding Supported Factorized Variational Autoencoder for Unsupervised Learning of Disentangled Speech Representations}}},
  year         = {{2021}},
}

