---
_id: '52958'
author:
- first_name: Christoph
full_name: Boeddeker, Christoph
id: '40767'
last_name: Boeddeker
- first_name: Aswin Shanmugam
full_name: Subramanian, Aswin Shanmugam
last_name: Subramanian
- first_name: Gordon
full_name: Wichern, Gordon
last_name: Wichern
- first_name: Reinhold
full_name: Haeb-Umbach, Reinhold
id: '242'
last_name: Haeb-Umbach
- first_name: Jonathan
full_name: Le Roux, Jonathan
last_name: Le Roux
citation:
ama: 'Boeddeker C, Subramanian AS, Wichern G, Haeb-Umbach R, Le Roux J. TS-SEP:
Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings.
IEEE/ACM Transactions on Audio, Speech, and Language Processing. 2024;32:1185-1197.
doi:10.1109/taslp.2024.3350887'
apa: 'Boeddeker, C., Subramanian, A. S., Wichern, G., Haeb-Umbach, R., & Le
Roux, J. (2024). TS-SEP: Joint Diarization and Separation Conditioned on Estimated
Speaker Embeddings. IEEE/ACM Transactions on Audio, Speech, and Language Processing,
32, 1185–1197. https://doi.org/10.1109/taslp.2024.3350887'
bibtex: '@article{Boeddeker_Subramanian_Wichern_Haeb-Umbach_Le Roux_2024, title={TS-SEP:
Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings},
volume={32}, DOI={10.1109/taslp.2024.3350887},
journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing}, publisher={Institute
of Electrical and Electronics Engineers (IEEE)}, author={Boeddeker, Christoph
and Subramanian, Aswin Shanmugam and Wichern, Gordon and Haeb-Umbach, Reinhold
and Le Roux, Jonathan}, year={2024}, pages={1185–1197} }'
chicago: 'Boeddeker, Christoph, Aswin Shanmugam Subramanian, Gordon Wichern, Reinhold
Haeb-Umbach, and Jonathan Le Roux. “TS-SEP: Joint Diarization and Separation Conditioned
on Estimated Speaker Embeddings.” IEEE/ACM Transactions on Audio, Speech, and
Language Processing 32 (2024): 1185–97. https://doi.org/10.1109/taslp.2024.3350887.'
ieee: 'C. Boeddeker, A. S. Subramanian, G. Wichern, R. Haeb-Umbach, and J. Le Roux,
“TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings,”
IEEE/ACM Transactions on Audio, Speech, and Language Processing, vol. 32,
pp. 1185–1197, 2024, doi: 10.1109/taslp.2024.3350887.'
mla: 'Boeddeker, Christoph, et al. “TS-SEP: Joint Diarization and Separation Conditioned
on Estimated Speaker Embeddings.” IEEE/ACM Transactions on Audio, Speech, and
Language Processing, vol. 32, Institute of Electrical and Electronics Engineers
(IEEE), 2024, pp. 1185–97, doi:10.1109/taslp.2024.3350887.'
short: C. Boeddeker, A.S. Subramanian, G. Wichern, R. Haeb-Umbach, J. Le Roux, IEEE/ACM
Transactions on Audio, Speech, and Language Processing 32 (2024) 1185–1197.
date_created: 2024-03-26T16:11:54Z
date_updated: 2024-03-26T16:16:34Z
department:
- _id: '54'
doi: 10.1109/taslp.2024.3350887
intvolume: ' 32'
keyword:
- Electrical and Electronic Engineering
- Acoustics and Ultrasonics
- Computer Science (miscellaneous)
- Computational Mathematics
language:
- iso: eng
main_file_link:
- open_access: '1'
url: https://arxiv.org/abs/2303.03849
oa: '1'
page: 1185-1197
publication: IEEE/ACM Transactions on Audio, Speech, and Language Processing
publication_identifier:
issn:
- 2329-9290
- 2329-9304
publication_status: published
publisher: Institute of Electrical and Electronics Engineers (IEEE)
status: public
title: 'TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker
Embeddings'
type: journal_article
user_id: '40767'
volume: 32
year: '2024'
...
---
_id: '48269'
author:
- first_name: Tobias
full_name: Gburrek, Tobias
id: '44006'
last_name: Gburrek
- first_name: Joerg
full_name: Schmalenstroeer, Joerg
id: '460'
last_name: Schmalenstroeer
- first_name: Reinhold
full_name: Haeb-Umbach, Reinhold
id: '242'
last_name: Haeb-Umbach
citation:
ama: 'Gburrek T, Schmalenstroeer J, Haeb-Umbach R. On the Integration of Sampling
Rate Synchronization and Acoustic Beamforming. In: European Signal Processing
Conference (EUSIPCO). ; 2023.'
apa: Gburrek, T., Schmalenstroeer, J., & Haeb-Umbach, R. (2023). On the Integration
of Sampling Rate Synchronization and Acoustic Beamforming. European Signal
Processing Conference (EUSIPCO). European Signal Processing Conference (EUSIPCO),
Helsinki.
bibtex: '@inproceedings{Gburrek_Schmalenstroeer_Haeb-Umbach_2023, title={On the
Integration of Sampling Rate Synchronization and Acoustic Beamforming}, booktitle={European
Signal Processing Conference (EUSIPCO)}, author={Gburrek, Tobias and Schmalenstroeer,
Joerg and Haeb-Umbach, Reinhold}, year={2023} }'
chicago: Gburrek, Tobias, Joerg Schmalenstroeer, and Reinhold Haeb-Umbach. “On the
Integration of Sampling Rate Synchronization and Acoustic Beamforming.” In European
Signal Processing Conference (EUSIPCO), 2023.
ieee: T. Gburrek, J. Schmalenstroeer, and R. Haeb-Umbach, “On the Integration of
Sampling Rate Synchronization and Acoustic Beamforming,” presented at the European
Signal Processing Conference (EUSIPCO), Helsinki, 2023.
mla: Gburrek, Tobias, et al. “On the Integration of Sampling Rate Synchronization
and Acoustic Beamforming.” European Signal Processing Conference (EUSIPCO),
2023.
short: 'T. Gburrek, J. Schmalenstroeer, R. Haeb-Umbach, in: European Signal Processing
Conference (EUSIPCO), 2023.'
conference:
location: Helsinki
name: European Signal Processing Conference (EUSIPCO)
date_created: 2023-10-18T12:54:08Z
date_updated: 2023-10-26T08:16:23Z
department:
- _id: '54'
language:
- iso: eng
main_file_link:
- open_access: '1'
url: https://eurasip.org/Proceedings/Eusipco/Eusipco2023/pdfs/0000011.pdf
oa: '1'
publication: European Signal Processing Conference (EUSIPCO)
quality_controlled: '1'
status: public
title: On the Integration of Sampling Rate Synchronization and Acoustic Beamforming
type: conference
user_id: '460'
year: '2023'
...
---
_id: '47128'
author:
- first_name: Tobias
full_name: Cord-Landwehr, Tobias
id: '44393'
last_name: Cord-Landwehr
- first_name: Christoph
full_name: Boeddeker, Christoph
id: '40767'
last_name: Boeddeker
- first_name: Cătălin
full_name: Zorilă, Cătălin
last_name: Zorilă
- first_name: Rama
full_name: Doddipatla, Rama
last_name: Doddipatla
- first_name: Reinhold
full_name: Haeb-Umbach, Reinhold
id: '242'
last_name: Haeb-Umbach
citation:
ama: 'Cord-Landwehr T, Boeddeker C, Zorilă C, Doddipatla R, Haeb-Umbach R. Frame-Wise
and Overlap-Robust Speaker Embeddings for Meeting Diarization. In: ICASSP 2023
- 2023 IEEE International Conference on Acoustics, Speech and Signal Processing
(ICASSP). IEEE; 2023. doi:10.1109/icassp49357.2023.10095370'
apa: Cord-Landwehr, T., Boeddeker, C., Zorilă, C., Doddipatla, R., & Haeb-Umbach,
R. (2023). Frame-Wise and Overlap-Robust Speaker Embeddings for Meeting Diarization.
ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal
Processing (ICASSP). 2023 IEEE International Conference on Acoustics, Speech,
and Signal Processing (ICASSP), Rhodes. https://doi.org/10.1109/icassp49357.2023.10095370
bibtex: '@inproceedings{Cord-Landwehr_Boeddeker_Zorilă_Doddipatla_Haeb-Umbach_2023,
title={Frame-Wise and Overlap-Robust Speaker Embeddings for Meeting Diarization},
DOI={10.1109/icassp49357.2023.10095370},
booktitle={ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech
and Signal Processing (ICASSP)}, publisher={IEEE}, author={Cord-Landwehr, Tobias
and Boeddeker, Christoph and Zorilă, Cătălin and Doddipatla, Rama and Haeb-Umbach,
Reinhold}, year={2023} }'
chicago: Cord-Landwehr, Tobias, Christoph Boeddeker, Cătălin Zorilă, Rama Doddipatla,
and Reinhold Haeb-Umbach. “Frame-Wise and Overlap-Robust Speaker Embeddings for
Meeting Diarization.” In ICASSP 2023 - 2023 IEEE International Conference on
Acoustics, Speech and Signal Processing (ICASSP). IEEE, 2023. https://doi.org/10.1109/icassp49357.2023.10095370.
ieee: 'T. Cord-Landwehr, C. Boeddeker, C. Zorilă, R. Doddipatla, and R. Haeb-Umbach,
“Frame-Wise and Overlap-Robust Speaker Embeddings for Meeting Diarization,” presented
at the 2023 IEEE International Conference on Acoustics, Speech, and Signal Processing
(ICASSP), Rhodes, 2023, doi: 10.1109/icassp49357.2023.10095370.'
mla: Cord-Landwehr, Tobias, et al. “Frame-Wise and Overlap-Robust Speaker Embeddings
for Meeting Diarization.” ICASSP 2023 - 2023 IEEE International Conference
on Acoustics, Speech and Signal Processing (ICASSP), IEEE, 2023, doi:10.1109/icassp49357.2023.10095370.
short: 'T. Cord-Landwehr, C. Boeddeker, C. Zorilă, R. Doddipatla, R. Haeb-Umbach,
in: ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and
Signal Processing (ICASSP), IEEE, 2023.'
conference:
location: Rhodes
name: 2023 IEEE International Conference on Acoustics, Speech, and Signal Processing
(ICASSP)
date_created: 2023-09-19T14:01:20Z
date_updated: 2023-11-15T14:56:27Z
ddc:
- '000'
department:
- _id: '54'
doi: 10.1109/icassp49357.2023.10095370
file:
- access_level: open_access
content_type: application/pdf
creator: cord
date_created: 2023-11-15T14:56:18Z
date_updated: 2023-11-15T14:56:18Z
file_id: '48932'
file_name: teacher_student_embeddings.pdf
file_size: 246306
relation: main_file
file_date_updated: 2023-11-15T14:56:18Z
has_accepted_license: '1'
language:
- iso: eng
oa: '1'
project:
- _id: '52'
name: 'PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing'
publication: ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech
and Signal Processing (ICASSP)
publication_status: published
publisher: IEEE
status: public
title: Frame-Wise and Overlap-Robust Speaker Embeddings for Meeting Diarization
type: conference
user_id: '44393'
year: '2023'
...
---
_id: '48270'
author:
- first_name: Joerg
full_name: Schmalenstroeer, Joerg
id: '460'
last_name: Schmalenstroeer
- first_name: Tobias
full_name: Gburrek, Tobias
id: '44006'
last_name: Gburrek
- first_name: Reinhold
full_name: Haeb-Umbach, Reinhold
id: '242'
last_name: Haeb-Umbach
citation:
ama: 'Schmalenstroeer J, Gburrek T, Haeb-Umbach R. LibriWASN: A Data Set for Meeting
Separation, Diarization, and Recognition with Asynchronous Recording Devices.
In: ITG Conference on Speech Communication. ; 2023.'
apa: 'Schmalenstroeer, J., Gburrek, T., & Haeb-Umbach, R. (2023). LibriWASN:
A Data Set for Meeting Separation, Diarization, and Recognition with Asynchronous
Recording Devices. ITG Conference on Speech Communication. ITG Conference
on Speech Communication, Aachen.'
bibtex: '@inproceedings{Schmalenstroeer_Gburrek_Haeb-Umbach_2023, title={LibriWASN:
A Data Set for Meeting Separation, Diarization, and Recognition with Asynchronous
Recording Devices}, booktitle={ITG Conference on Speech Communication}, author={Schmalenstroeer,
Joerg and Gburrek, Tobias and Haeb-Umbach, Reinhold}, year={2023} }'
chicago: 'Schmalenstroeer, Joerg, Tobias Gburrek, and Reinhold Haeb-Umbach. “LibriWASN:
A Data Set for Meeting Separation, Diarization, and Recognition with Asynchronous
Recording Devices.” In ITG Conference on Speech Communication, 2023.'
ieee: 'J. Schmalenstroeer, T. Gburrek, and R. Haeb-Umbach, “LibriWASN: A Data Set
for Meeting Separation, Diarization, and Recognition with Asynchronous Recording
Devices,” presented at the ITG Conference on Speech Communication, Aachen, 2023.'
mla: 'Schmalenstroeer, Joerg, et al. “LibriWASN: A Data Set for Meeting Separation,
Diarization, and Recognition with Asynchronous Recording Devices.” ITG Conference
on Speech Communication, 2023.'
short: 'J. Schmalenstroeer, T. Gburrek, R. Haeb-Umbach, in: ITG Conference on Speech
Communication, 2023.'
conference:
location: Aachen
name: ITG Conference on Speech Communication
date_created: 2023-10-18T13:00:54Z
date_updated: 2023-11-15T14:48:45Z
ddc:
- '004'
department:
- _id: '54'
file:
- access_level: open_access
content_type: application/pdf
creator: schmalen
date_created: 2023-10-26T08:20:15Z
date_updated: 2023-11-15T14:48:44Z
file_id: '48483'
file_name: SchTgbHaeb2023Final.pdf
file_size: 2844502
relation: main_file
file_date_updated: 2023-11-15T14:48:44Z
has_accepted_license: '1'
language:
- iso: eng
oa: '1'
publication: ITG Conference on Speech Communication
quality_controlled: '1'
status: public
title: 'LibriWASN: A Data Set for Meeting Separation, Diarization, and Recognition
with Asynchronous Recording Devices'
type: conference
user_id: '460'
year: '2023'
...
---
_id: '47129'
author:
- first_name: Tobias
full_name: Cord-Landwehr, Tobias
id: '44393'
last_name: Cord-Landwehr
- first_name: Christoph
full_name: Boeddeker, Christoph
id: '40767'
last_name: Boeddeker
- first_name: Cătălin
full_name: Zorilă, Cătălin
last_name: Zorilă
- first_name: Rama
full_name: Doddipatla, Rama
last_name: Doddipatla
- first_name: Reinhold
full_name: Haeb-Umbach, Reinhold
id: '242'
last_name: Haeb-Umbach
citation:
ama: 'Cord-Landwehr T, Boeddeker C, Zorilă C, Doddipatla R, Haeb-Umbach R. A Teacher-Student
Approach for Extracting Informative Speaker Embeddings From Speech Mixtures. In:
INTERSPEECH 2023. ISCA; 2023. doi:10.21437/interspeech.2023-1379'
apa: Cord-Landwehr, T., Boeddeker, C., Zorilă, C., Doddipatla, R., & Haeb-Umbach,
R. (2023). A Teacher-Student Approach for Extracting Informative Speaker Embeddings
From Speech Mixtures. INTERSPEECH 2023. https://doi.org/10.21437/interspeech.2023-1379
bibtex: '@inproceedings{Cord-Landwehr_Boeddeker_Zorilă_Doddipatla_Haeb-Umbach_2023,
title={A Teacher-Student Approach for Extracting Informative Speaker Embeddings
From Speech Mixtures}, DOI={10.21437/interspeech.2023-1379},
booktitle={INTERSPEECH 2023}, publisher={ISCA}, author={Cord-Landwehr, Tobias
and Boeddeker, Christoph and Zorilă, Cătălin and Doddipatla, Rama and Haeb-Umbach,
Reinhold}, year={2023} }'
chicago: Cord-Landwehr, Tobias, Christoph Boeddeker, Cătălin Zorilă, Rama Doddipatla,
and Reinhold Haeb-Umbach. “A Teacher-Student Approach for Extracting Informative
Speaker Embeddings From Speech Mixtures.” In INTERSPEECH 2023. ISCA, 2023.
https://doi.org/10.21437/interspeech.2023-1379.
ieee: 'T. Cord-Landwehr, C. Boeddeker, C. Zorilă, R. Doddipatla, and R. Haeb-Umbach,
“A Teacher-Student Approach for Extracting Informative Speaker Embeddings From
Speech Mixtures,” 2023, doi: 10.21437/interspeech.2023-1379.'
mla: Cord-Landwehr, Tobias, et al. “A Teacher-Student Approach for Extracting Informative
Speaker Embeddings From Speech Mixtures.” INTERSPEECH 2023, ISCA, 2023,
doi:10.21437/interspeech.2023-1379.
short: 'T. Cord-Landwehr, C. Boeddeker, C. Zorilă, R. Doddipatla, R. Haeb-Umbach,
in: INTERSPEECH 2023, ISCA, 2023.'
date_created: 2023-09-19T14:34:37Z
date_updated: 2023-11-15T15:00:12Z
ddc:
- '000'
department:
- _id: '54'
doi: 10.21437/interspeech.2023-1379
file:
- access_level: open_access
content_type: application/pdf
creator: cord
date_created: 2023-11-15T15:00:02Z
date_updated: 2023-11-15T15:00:02Z
file_id: '48933'
file_name: multispeaker_embeddings.pdf
file_size: 303203
relation: main_file
file_date_updated: 2023-11-15T15:00:02Z
has_accepted_license: '1'
language:
- iso: eng
oa: '1'
project:
- _id: '52'
name: 'PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing'
publication: INTERSPEECH 2023
publication_status: published
publisher: ISCA
status: public
title: A Teacher-Student Approach for Extracting Informative Speaker Embeddings From
Speech Mixtures
type: conference
user_id: '44393'
year: '2023'
...
---
_id: '48355'
abstract:
- lang: eng
text: "Unsupervised speech disentanglement aims at separating fast varying from\r\nslowly
varying components of a speech signal. In this contribution, we take a\r\ncloser
look at the embedding vector representing the slowly varying signal\r\ncomponents,
commonly named the speaker embedding vector. We ask, which\r\nproperties of a
speaker's voice are captured and investigate to which extent do\r\nindividual
embedding vector components sign responsible for them, using the\r\nconcept of
Shapley values. Our findings show that certain speaker-specific\r\nacoustic-phonetic
properties can be fairly well predicted from the speaker\r\nembedding, while the
investigated more abstract voice quality features cannot."
author:
- first_name: Frederik
full_name: Rautenberg, Frederik
id: '72602'
last_name: Rautenberg
- first_name: Michael
full_name: Kuhlmann, Michael
id: '49871'
last_name: Kuhlmann
- first_name: Jana
full_name: Wiechmann, Jana
last_name: Wiechmann
- first_name: Fritz
full_name: Seebauer, Fritz
last_name: Seebauer
- first_name: Petra
full_name: Wagner, Petra
last_name: Wagner
- first_name: Reinhold
full_name: Haeb-Umbach, Reinhold
id: '242'
last_name: Haeb-Umbach
citation:
ama: 'Rautenberg F, Kuhlmann M, Wiechmann J, Seebauer F, Wagner P, Haeb-Umbach R.
On Feature Importance and Interpretability of Speaker Representations. In: ITG
Conference on Speech Communication. ; 2023.'
apa: Rautenberg, F., Kuhlmann, M., Wiechmann, J., Seebauer, F., Wagner, P., &
Haeb-Umbach, R. (2023). On Feature Importance and Interpretability of Speaker
Representations. ITG Conference on Speech Communication. ITG Conference
on Speech Communication, Aachen.
bibtex: '@inproceedings{Rautenberg_Kuhlmann_Wiechmann_Seebauer_Wagner_Haeb-Umbach_2023,
title={On Feature Importance and Interpretability of Speaker Representations},
booktitle={ITG Conference on Speech Communication}, author={Rautenberg, Frederik
and Kuhlmann, Michael and Wiechmann, Jana and Seebauer, Fritz and Wagner, Petra
and Haeb-Umbach, Reinhold}, year={2023} }'
chicago: Rautenberg, Frederik, Michael Kuhlmann, Jana Wiechmann, Fritz Seebauer,
Petra Wagner, and Reinhold Haeb-Umbach. “On Feature Importance and Interpretability
of Speaker Representations.” In ITG Conference on Speech Communication,
2023.
ieee: F. Rautenberg, M. Kuhlmann, J. Wiechmann, F. Seebauer, P. Wagner, and R. Haeb-Umbach,
“On Feature Importance and Interpretability of Speaker Representations,” presented
at the ITG Conference on Speech Communication, Aachen, 2023.
mla: Rautenberg, Frederik, et al. “On Feature Importance and Interpretability of
Speaker Representations.” ITG Conference on Speech Communication, 2023.
short: 'F. Rautenberg, M. Kuhlmann, J. Wiechmann, F. Seebauer, P. Wagner, R. Haeb-Umbach,
in: ITG Conference on Speech Communication, 2023.'
conference:
end_date: 2023-09-22
location: Aachen
name: ITG Conference on Speech Communication
start_date: 2023-09-20
date_created: 2023-10-20T08:04:46Z
date_updated: 2023-11-22T13:44:33Z
ddc:
- '000'
department:
- _id: '54'
- _id: '660'
external_id:
arxiv:
- '2310.12599'
file:
- access_level: closed
content_type: application/pdf
creator: frra
date_created: 2023-10-20T08:20:58Z
date_updated: 2023-10-20T08:20:58Z
file_id: '48359'
file_name: arxiv.pdf
file_size: 272390
relation: main_file
success: 1
file_date_updated: 2023-10-20T08:20:58Z
has_accepted_license: '1'
language:
- iso: eng
main_file_link:
- open_access: '1'
url: https://arxiv.org/abs/2310.12599
oa: '1'
project:
- _id: '129'
grant_number: '438445824'
name: 'TRR 318 - C06: TRR 318 - Technisch unterstütztes Erklären von Stimmcharakteristika
(Teilprojekt C06)'
publication: ITG Conference on Speech Communication
status: public
title: On Feature Importance and Interpretability of Speaker Representations
type: conference
user_id: '72602'
year: '2023'
...
---
_id: '48410'
author:
- first_name: Jana
full_name: Wiechmann, Jana
last_name: Wiechmann
- first_name: Frederik
full_name: Rautenberg, Frederik
id: '72602'
last_name: Rautenberg
- first_name: Petra
full_name: Wagner, Petra
last_name: Wagner
- first_name: Reinhold
full_name: Haeb-Umbach, Reinhold
id: '242'
last_name: Haeb-Umbach
citation:
ama: 'Wiechmann J, Rautenberg F, Wagner P, Haeb-Umbach R. Explaining voice characteristics
to novice voice practitioners-How successful is it? In: 20th International
Congress of the Phonetic Sciences (ICPhS) . ; 2023.'
apa: Wiechmann, J., Rautenberg, F., Wagner, P., & Haeb-Umbach, R. (2023). Explaining
voice characteristics to novice voice practitioners-How successful is it? 20th
International Congress of the Phonetic Sciences (ICPhS) .
bibtex: '@inproceedings{Wiechmann_Rautenberg_Wagner_Haeb-Umbach_2023, title={Explaining
voice characteristics to novice voice practitioners-How successful is it?}, booktitle={20th
International Congress of the Phonetic Sciences (ICPhS) }, author={Wiechmann,
Jana and Rautenberg, Frederik and Wagner, Petra and Haeb-Umbach, Reinhold}, year={2023}
}'
chicago: Wiechmann, Jana, Frederik Rautenberg, Petra Wagner, and Reinhold Haeb-Umbach.
“Explaining Voice Characteristics to Novice Voice Practitioners-How Successful
Is It?” In 20th International Congress of the Phonetic Sciences (ICPhS) ,
2023.
ieee: J. Wiechmann, F. Rautenberg, P. Wagner, and R. Haeb-Umbach, “Explaining voice
characteristics to novice voice practitioners-How successful is it?,” 2023.
mla: Wiechmann, Jana, et al. “Explaining Voice Characteristics to Novice Voice Practitioners-How
Successful Is It?” 20th International Congress of the Phonetic Sciences (ICPhS)
, 2023.
short: 'J. Wiechmann, F. Rautenberg, P. Wagner, R. Haeb-Umbach, in: 20th International
Congress of the Phonetic Sciences (ICPhS) , 2023.'
conference:
end_date: 2023-08-11
start_date: 2023-08-07
date_created: 2023-10-24T08:05:40Z
date_updated: 2023-11-22T13:44:59Z
ddc:
- '040'
department:
- _id: '54'
- _id: '660'
file:
- access_level: closed
content_type: application/pdf
creator: frra
date_created: 2023-10-24T08:03:27Z
date_updated: 2023-10-24T08:03:27Z
file_id: '48413'
file_name: 188.pdf
file_size: 209980
relation: main_file
success: 1
file_date_updated: 2023-10-24T08:03:27Z
has_accepted_license: '1'
language:
- iso: eng
main_file_link:
- open_access: '1'
oa: '1'
project:
- _id: '129'
grant_number: '438445824'
name: 'TRR 318 - C06: TRR 318 - Technisch unterstütztes Erklären von Stimmcharakteristika
(Teilprojekt C06)'
publication: '20th International Congress of the Phonetic Sciences (ICPhS) '
status: public
title: Explaining voice characteristics to novice voice practitioners-How successful
is it?
type: conference
user_id: '72602'
year: '2023'
...
---
_id: '48391'
author:
- first_name: Rohith
full_name: Aralikatti, Rohith
last_name: Aralikatti
- first_name: Christoph
full_name: Boeddeker, Christoph
id: '40767'
last_name: Boeddeker
- first_name: Gordon
full_name: Wichern, Gordon
last_name: Wichern
- first_name: Aswin
full_name: Subramanian, Aswin
last_name: Subramanian
- first_name: Jonathan
full_name: Le Roux, Jonathan
last_name: Le Roux
citation:
ama: 'Aralikatti R, Boeddeker C, Wichern G, Subramanian A, Le Roux J. Reverberation
as Supervision For Speech Separation. In: ICASSP 2023 - 2023 IEEE International
Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE; 2023.
doi:10.1109/icassp49357.2023.10095022'
apa: Aralikatti, R., Boeddeker, C., Wichern, G., Subramanian, A., & Le Roux,
J. (2023). Reverberation as Supervision For Speech Separation. ICASSP 2023
- 2023 IEEE International Conference on Acoustics, Speech and Signal Processing
(ICASSP). https://doi.org/10.1109/icassp49357.2023.10095022
bibtex: '@inproceedings{Aralikatti_Boeddeker_Wichern_Subramanian_Le Roux_2023, title={Reverberation
as Supervision For Speech Separation}, DOI={10.1109/icassp49357.2023.10095022},
booktitle={ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech
and Signal Processing (ICASSP)}, publisher={IEEE}, author={Aralikatti, Rohith
and Boeddeker, Christoph and Wichern, Gordon and Subramanian, Aswin and Le Roux,
Jonathan}, year={2023} }'
chicago: Aralikatti, Rohith, Christoph Boeddeker, Gordon Wichern, Aswin Subramanian,
and Jonathan Le Roux. “Reverberation as Supervision For Speech Separation.” In
ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal
Processing (ICASSP). IEEE, 2023. https://doi.org/10.1109/icassp49357.2023.10095022.
ieee: 'R. Aralikatti, C. Boeddeker, G. Wichern, A. Subramanian, and J. Le Roux,
“Reverberation as Supervision For Speech Separation,” 2023, doi: 10.1109/icassp49357.2023.10095022.'
mla: Aralikatti, Rohith, et al. “Reverberation as Supervision For Speech Separation.”
ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal
Processing (ICASSP), IEEE, 2023, doi:10.1109/icassp49357.2023.10095022.
short: 'R. Aralikatti, C. Boeddeker, G. Wichern, A. Subramanian, J. Le Roux, in:
ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal
Processing (ICASSP), IEEE, 2023.'
date_created: 2023-10-23T15:09:13Z
date_updated: 2023-10-23T15:10:16Z
department:
- _id: '54'
doi: 10.1109/icassp49357.2023.10095022
language:
- iso: eng
publication: ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech
and Signal Processing (ICASSP)
publication_status: published
publisher: IEEE
status: public
title: Reverberation as Supervision For Speech Separation
type: conference
user_id: '40767'
year: '2023'
...
---
_id: '48390'
author:
- first_name: Simon
full_name: Berger, Simon
last_name: Berger
- first_name: Peter
full_name: Vieting, Peter
last_name: Vieting
- first_name: Christoph
full_name: Boeddeker, Christoph
id: '40767'
last_name: Boeddeker
- first_name: Ralf
full_name: Schlüter, Ralf
last_name: Schlüter
- first_name: Reinhold
full_name: Haeb-Umbach, Reinhold
id: '242'
last_name: Haeb-Umbach
citation:
ama: 'Berger S, Vieting P, Boeddeker C, Schlüter R, Haeb-Umbach R. Mixture Encoder
for Joint Speech Separation and Recognition. In: INTERSPEECH 2023. ISCA;
2023. doi:10.21437/interspeech.2023-1815'
apa: Berger, S., Vieting, P., Boeddeker, C., Schlüter, R., & Haeb-Umbach, R.
(2023). Mixture Encoder for Joint Speech Separation and Recognition. INTERSPEECH
2023. https://doi.org/10.21437/interspeech.2023-1815
bibtex: '@inproceedings{Berger_Vieting_Boeddeker_Schlüter_Haeb-Umbach_2023, title={Mixture
Encoder for Joint Speech Separation and Recognition}, DOI={10.21437/interspeech.2023-1815},
booktitle={INTERSPEECH 2023}, publisher={ISCA}, author={Berger, Simon and Vieting,
Peter and Boeddeker, Christoph and Schlüter, Ralf and Haeb-Umbach, Reinhold},
year={2023} }'
chicago: Berger, Simon, Peter Vieting, Christoph Boeddeker, Ralf Schlüter, and Reinhold
Haeb-Umbach. “Mixture Encoder for Joint Speech Separation and Recognition.” In
INTERSPEECH 2023. ISCA, 2023. https://doi.org/10.21437/interspeech.2023-1815.
ieee: 'S. Berger, P. Vieting, C. Boeddeker, R. Schlüter, and R. Haeb-Umbach, “Mixture
Encoder for Joint Speech Separation and Recognition,” 2023, doi: 10.21437/interspeech.2023-1815.'
mla: Berger, Simon, et al. “Mixture Encoder for Joint Speech Separation and Recognition.”
INTERSPEECH 2023, ISCA, 2023, doi:10.21437/interspeech.2023-1815.
short: 'S. Berger, P. Vieting, C. Boeddeker, R. Schlüter, R. Haeb-Umbach, in: INTERSPEECH
2023, ISCA, 2023.'
date_created: 2023-10-23T15:06:39Z
date_updated: 2023-10-23T15:10:19Z
department:
- _id: '54'
doi: 10.21437/interspeech.2023-1815
language:
- iso: eng
publication: INTERSPEECH 2023
publication_status: published
publisher: ISCA
status: public
title: Mixture Encoder for Joint Speech Separation and Recognition
type: conference
user_id: '40767'
year: '2023'
...
---
_id: '46069'
author:
- first_name: Fritz
full_name: Seebauer, Fritz
last_name: Seebauer
- first_name: Michael
full_name: Kuhlmann, Michael
id: '49871'
last_name: Kuhlmann
- first_name: Reinhold
full_name: Haeb-Umbach, Reinhold
id: '242'
last_name: Haeb-Umbach
- first_name: Petra
full_name: Wagner, Petra
last_name: Wagner
citation:
ama: 'Seebauer F, Kuhlmann M, Haeb-Umbach R, Wagner P. Re-examining the quality
dimensions of synthetic speech. In: 12th Speech Synthesis Workshop (SSW) 2023.
; 2023.'
apa: Seebauer, F., Kuhlmann, M., Haeb-Umbach, R., & Wagner, P. (2023). Re-examining
the quality dimensions of synthetic speech. 12th Speech Synthesis Workshop
(SSW) 2023.
bibtex: '@inproceedings{Seebauer_Kuhlmann_Haeb-Umbach_Wagner_2023, title={Re-examining
the quality dimensions of synthetic speech}, booktitle={12th Speech Synthesis
Workshop (SSW) 2023}, author={Seebauer, Fritz and Kuhlmann, Michael and Haeb-Umbach,
Reinhold and Wagner, Petra}, year={2023} }'
chicago: Seebauer, Fritz, Michael Kuhlmann, Reinhold Haeb-Umbach, and Petra Wagner.
“Re-Examining the Quality Dimensions of Synthetic Speech.” In 12th Speech Synthesis
Workshop (SSW) 2023, 2023.
ieee: F. Seebauer, M. Kuhlmann, R. Haeb-Umbach, and P. Wagner, “Re-examining the
quality dimensions of synthetic speech,” 2023.
mla: Seebauer, Fritz, et al. “Re-Examining the Quality Dimensions of Synthetic Speech.”
12th Speech Synthesis Workshop (SSW) 2023, 2023.
short: 'F. Seebauer, M. Kuhlmann, R. Haeb-Umbach, P. Wagner, in: 12th Speech Synthesis
Workshop (SSW) 2023, 2023.'
date_created: 2023-07-15T16:10:20Z
date_updated: 2023-10-25T08:42:56Z
department:
- _id: '54'
has_accepted_license: '1'
language:
- iso: eng
project:
- _id: '129'
grant_number: '438445824'
name: 'TRR 318 - C06: TRR 318 - Technisch unterstütztes Erklären von Stimmcharakteristika
(Teilprojekt C06)'
publication: 12th Speech Synthesis Workshop (SSW) 2023
status: public
title: Re-examining the quality dimensions of synthetic speech
type: conference
user_id: '242'
year: '2023'
...
---
_id: '35602'
abstract:
- lang: eng
text: "Continuous Speech Separation (CSS) has been proposed to address speech overlaps
during the analysis of realistic meeting-like conversations by eliminating any
overlaps before further processing.\r\nCSS separates a recording of arbitrarily
many speakers into a small number of overlap-free output channels, where each
output channel may contain speech of multiple speakers.\r\nThis is often done
by applying a conventional separation model trained with Utterance-level Permutation
Invariant Training (uPIT), which exclusively maps a speaker to an output channel,
in sliding window approach called stitching.\r\nRecently, we introduced an alternative
training scheme called Graph-PIT that teaches the separation network to directly
produce output streams in the required format without stitching.\r\nIt can handle
an arbitrary number of speakers as long as never more of them overlap at the same
time than the separator has output channels.\r\nIn this contribution, we further
investigate the Graph-PIT training scheme.\r\nWe show in extended experiments
that models trained with Graph-PIT also work in challenging reverberant conditions.\r\nModels
trained in this way are able to perform segment-less CSS, i.e., without stitching,
and achieve comparable and often better separation quality than the conventional
CSS with uPIT and stitching.\r\nWe simplify the training schedule for Graph-PIT
with the recently proposed Source Aggregated Signal-to-Distortion Ratio (SA-SDR)
loss.\r\nIt eliminates unfavorable properties of the previously used A-SDR loss
and thus enables training with Graph-PIT from scratch.\r\nGraph-PIT training relaxes
the constraints w.r.t. the allowed numbers of speakers and speaking patterns which
allows using a larger variety of training data.\r\nFurthermore, we introduce novel
signal-level evaluation metrics for meeting scenarios, namely the source-aggregated
scale- and convolution-invariant Signal-to-Distortion Ratio (SA-SI-SDR and SA-CI-SDR),
which are generalizations of the commonly used SDR-based metrics for the CSS case."
article_type: original
author:
- first_name: Thilo
full_name: von Neumann, Thilo
id: '49870'
last_name: von Neumann
orcid: https://orcid.org/0000-0002-7717-8670
- first_name: Keisuke
full_name: Kinoshita, Keisuke
last_name: Kinoshita
- first_name: Christoph
full_name: Boeddeker, Christoph
id: '40767'
last_name: Boeddeker
- first_name: Marc
full_name: Delcroix, Marc
last_name: Delcroix
- first_name: Reinhold
full_name: Haeb-Umbach, Reinhold
id: '242'
last_name: Haeb-Umbach
citation:
ama: 'von Neumann T, Kinoshita K, Boeddeker C, Delcroix M, Haeb-Umbach R. Segment-Less
Continuous Speech Separation of Meetings: Training and Evaluation Criteria. IEEE/ACM
Transactions on Audio, Speech, and Language Processing. 2023;31:576-589. doi:10.1109/taslp.2022.3228629'
apa: 'von Neumann, T., Kinoshita, K., Boeddeker, C., Delcroix, M., & Haeb-Umbach,
R. (2023). Segment-Less Continuous Speech Separation of Meetings: Training and
Evaluation Criteria. IEEE/ACM Transactions on Audio, Speech, and Language Processing,
31, 576–589. https://doi.org/10.1109/taslp.2022.3228629'
bibtex: '@article{von Neumann_Kinoshita_Boeddeker_Delcroix_Haeb-Umbach_2023, title={Segment-Less
Continuous Speech Separation of Meetings: Training and Evaluation Criteria}, volume={31},
DOI={10.1109/taslp.2022.3228629},
journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing}, publisher={Institute
of Electrical and Electronics Engineers (IEEE)}, author={von Neumann, Thilo and
Kinoshita, Keisuke and Boeddeker, Christoph and Delcroix, Marc and Haeb-Umbach,
Reinhold}, year={2023}, pages={576–589} }'
chicago: 'Neumann, Thilo von, Keisuke Kinoshita, Christoph Boeddeker, Marc Delcroix,
and Reinhold Haeb-Umbach. “Segment-Less Continuous Speech Separation of Meetings:
Training and Evaluation Criteria.” IEEE/ACM Transactions on Audio, Speech,
and Language Processing 31 (2023): 576–89. https://doi.org/10.1109/taslp.2022.3228629.'
ieee: 'T. von Neumann, K. Kinoshita, C. Boeddeker, M. Delcroix, and R. Haeb-Umbach,
“Segment-Less Continuous Speech Separation of Meetings: Training and Evaluation
Criteria,” IEEE/ACM Transactions on Audio, Speech, and Language Processing,
vol. 31, pp. 576–589, 2023, doi: 10.1109/taslp.2022.3228629.'
mla: 'von Neumann, Thilo, et al. “Segment-Less Continuous Speech Separation of Meetings:
Training and Evaluation Criteria.” IEEE/ACM Transactions on Audio, Speech,
and Language Processing, vol. 31, Institute of Electrical and Electronics
Engineers (IEEE), 2023, pp. 576–89, doi:10.1109/taslp.2022.3228629.'
short: T. von Neumann, K. Kinoshita, C. Boeddeker, M. Delcroix, R. Haeb-Umbach,
IEEE/ACM Transactions on Audio, Speech, and Language Processing 31 (2023) 576–589.
date_created: 2023-01-09T17:24:17Z
date_updated: 2023-11-15T12:16:11Z
ddc:
- '000'
department:
- _id: '54'
doi: 10.1109/taslp.2022.3228629
file:
- access_level: open_access
content_type: application/pdf
creator: haebumb
date_created: 2023-01-09T17:46:05Z
date_updated: 2023-01-11T08:50:19Z
file_id: '35607'
file_name: main.pdf
file_size: 7185077
relation: main_file
file_date_updated: 2023-01-11T08:50:19Z
has_accepted_license: '1'
intvolume: ' 31'
keyword:
- Continuous Speech Separation
- Source Separation
- Graph-PIT
- Dynamic Programming
- Permutation Invariant Training
language:
- iso: eng
oa: '1'
page: 576-589
project:
- _id: '52'
name: 'PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing'
publication: IEEE/ACM Transactions on Audio, Speech, and Language Processing
publication_identifier:
issn:
- 2329-9290
- 2329-9304
publication_status: published
publisher: Institute of Electrical and Electronics Engineers (IEEE)
quality_controlled: '1'
status: public
title: 'Segment-Less Continuous Speech Separation of Meetings: Training and Evaluation
Criteria'
type: journal_article
user_id: '49870'
volume: 31
year: '2023'
...
---
_id: '48281'
abstract:
- lang: eng
text: "\tWe propose a general framework to compute the word error rate (WER) of
ASR systems that process recordings containing multiple speakers at their input
and that produce multiple output word sequences (MIMO).\r\n\tSuch ASR systems
are typically required, e.g., for meeting transcription.\r\n\tWe provide an efficient
implementation based on a dynamic programming search in a multi-dimensional Levenshtein
distance tensor under the constraint that a reference utterance must be matched
consistently with one hypothesis output. \r\n\tThis also results in an efficient
implementation of the ORC WER which previously suffered from exponential complexity.\r\n\tWe
give an overview of commonly used WER definitions for multi-speaker scenarios
and show that they are specializations of the above MIMO WER tuned to particular
application scenarios. \r\n\tWe conclude with a discussion of the pros and cons
of the various WER definitions and a recommendation when to use which."
author:
- first_name: Thilo
full_name: von Neumann, Thilo
id: '49870'
last_name: von Neumann
orcid: https://orcid.org/0000-0002-7717-8670
- first_name: Christoph
full_name: Boeddeker, Christoph
id: '40767'
last_name: Boeddeker
- first_name: Keisuke
full_name: Kinoshita, Keisuke
last_name: Kinoshita
- first_name: Marc
full_name: Delcroix, Marc
last_name: Delcroix
- first_name: Reinhold
full_name: Haeb-Umbach, Reinhold
id: '242'
last_name: Haeb-Umbach
citation:
ama: 'von Neumann T, Boeddeker C, Kinoshita K, Delcroix M, Haeb-Umbach R. On Word
Error Rate Definitions and Their Efficient Computation for Multi-Speaker Speech
Recognition Systems. In: ICASSP 2023 - 2023 IEEE International Conference on
Acoustics, Speech and Signal Processing (ICASSP). IEEE; 2023. doi:10.1109/icassp49357.2023.10094784'
apa: von Neumann, T., Boeddeker, C., Kinoshita, K., Delcroix, M., & Haeb-Umbach,
R. (2023). On Word Error Rate Definitions and Their Efficient Computation for
Multi-Speaker Speech Recognition Systems. ICASSP 2023 - 2023 IEEE International
Conference on Acoustics, Speech and Signal Processing (ICASSP). https://doi.org/10.1109/icassp49357.2023.10094784
bibtex: '@inproceedings{von Neumann_Boeddeker_Kinoshita_Delcroix_Haeb-Umbach_2023,
title={On Word Error Rate Definitions and Their Efficient Computation for Multi-Speaker
Speech Recognition Systems}, DOI={10.1109/icassp49357.2023.10094784},
booktitle={ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech
and Signal Processing (ICASSP)}, publisher={IEEE}, author={von Neumann, Thilo
and Boeddeker, Christoph and Kinoshita, Keisuke and Delcroix, Marc and Haeb-Umbach,
Reinhold}, year={2023} }'
chicago: Neumann, Thilo von, Christoph Boeddeker, Keisuke Kinoshita, Marc Delcroix,
and Reinhold Haeb-Umbach. “On Word Error Rate Definitions and Their Efficient
Computation for Multi-Speaker Speech Recognition Systems.” In ICASSP 2023 -
2023 IEEE International Conference on Acoustics, Speech and Signal Processing
(ICASSP). IEEE, 2023. https://doi.org/10.1109/icassp49357.2023.10094784.
ieee: 'T. von Neumann, C. Boeddeker, K. Kinoshita, M. Delcroix, and R. Haeb-Umbach,
“On Word Error Rate Definitions and Their Efficient Computation for Multi-Speaker
Speech Recognition Systems,” 2023, doi: 10.1109/icassp49357.2023.10094784.'
mla: von Neumann, Thilo, et al. “On Word Error Rate Definitions and Their Efficient
Computation for Multi-Speaker Speech Recognition Systems.” ICASSP 2023 - 2023
IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP),
IEEE, 2023, doi:10.1109/icassp49357.2023.10094784.
short: 'T. von Neumann, C. Boeddeker, K. Kinoshita, M. Delcroix, R. Haeb-Umbach,
in: ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and
Signal Processing (ICASSP), IEEE, 2023.'
date_created: 2023-10-19T07:38:31Z
date_updated: 2023-11-15T12:14:15Z
ddc:
- '000'
department:
- _id: '54'
doi: 10.1109/icassp49357.2023.10094784
file:
- access_level: open_access
content_type: application/pdf
creator: tvn
date_created: 2023-10-19T07:39:57Z
date_updated: 2023-10-19T07:41:56Z
file_id: '48282'
file_name: ICASSP_2023_Meeting_Evaluation.pdf
file_size: 204994
relation: main_file
file_date_updated: 2023-10-19T07:41:56Z
has_accepted_license: '1'
keyword:
- Word Error Rate
- Meeting Recognition
- Levenshtein Distance
language:
- iso: eng
main_file_link:
- url: https://ieeexplore.ieee.org/document/10094784
oa: '1'
publication: ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech
and Signal Processing (ICASSP)
publication_status: published
publisher: IEEE
quality_controlled: '1'
related_material:
link:
- relation: software
url: https://github.com/fgnt/meeteval
status: public
title: On Word Error Rate Definitions and Their Efficient Computation for Multi-Speaker
Speech Recognition Systems
type: conference
user_id: '49870'
year: '2023'
...
---
_id: '48275'
abstract:
- lang: eng
text: "MeetEval is an open-source toolkit to evaluate all kinds of meeting transcription
systems.\r\nIt provides a unified interface for the computation of commonly used
Word Error Rates (WERs), specifically cpWER, ORC WER and MIMO WER along other
WER definitions.\r\nWe extend the cpWER computation by a temporal constraint to
ensure that only words are identified as correct when the temporal alignment is
plausible.\r\nThis leads to a better quality of the matching of the hypothesis
string to the reference string that more closely resembles the actual transcription
quality, and a system is penalized if it provides poor time annotations.\r\nSince
word-level timing information is often not available, we present a way to approximate
exact word-level timings from segment-level timings (e.g., a sentence) and show
that the approximation leads to a similar WER as a matching with exact word-level
annotations.\r\nAt the same time, the time constraint leads to a speedup of the
matching algorithm, which outweighs the additional overhead caused by processing
the time stamps."
author:
- first_name: Thilo
full_name: von Neumann, Thilo
id: '49870'
last_name: von Neumann
orcid: https://orcid.org/0000-0002-7717-8670
- first_name: Christoph
full_name: Boeddeker, Christoph
id: '40767'
last_name: Boeddeker
- first_name: Marc
full_name: Delcroix, Marc
last_name: Delcroix
- first_name: Reinhold
full_name: Haeb-Umbach, Reinhold
id: '242'
last_name: Haeb-Umbach
citation:
ama: 'von Neumann T, Boeddeker C, Delcroix M, Haeb-Umbach R. MeetEval: A Toolkit
for Computation of Word Error Rates for Meeting Transcription Systems. In: Proc.
CHiME 2023 Workshop on Speech Processing in Everyday Environments. ; 2023.'
apa: 'von Neumann, T., Boeddeker, C., Delcroix, M., & Haeb-Umbach, R. (2023).
MeetEval: A Toolkit for Computation of Word Error Rates for Meeting Transcription
Systems. Proc. CHiME 2023 Workshop on Speech Processing in Everyday Environments.
CHiME 2023 Workshop on Speech Processing in Everyday Environments, Dublin.'
bibtex: '@inproceedings{von Neumann_Boeddeker_Delcroix_Haeb-Umbach_2023, title={MeetEval:
A Toolkit for Computation of Word Error Rates for Meeting Transcription Systems},
booktitle={Proc. CHiME 2023 Workshop on Speech Processing in Everyday Environments},
author={von Neumann, Thilo and Boeddeker, Christoph and Delcroix, Marc and Haeb-Umbach,
Reinhold}, year={2023} }'
chicago: 'Neumann, Thilo von, Christoph Boeddeker, Marc Delcroix, and Reinhold Haeb-Umbach.
“MeetEval: A Toolkit for Computation of Word Error Rates for Meeting Transcription
Systems.” In Proc. CHiME 2023 Workshop on Speech Processing in Everyday Environments,
2023.'
ieee: 'T. von Neumann, C. Boeddeker, M. Delcroix, and R. Haeb-Umbach, “MeetEval:
A Toolkit for Computation of Word Error Rates for Meeting Transcription Systems,”
presented at the CHiME 2023 Workshop on Speech Processing in Everyday Environments,
Dublin, 2023.'
mla: 'von Neumann, Thilo, et al. “MeetEval: A Toolkit for Computation of Word Error
Rates for Meeting Transcription Systems.” Proc. CHiME 2023 Workshop on Speech
Processing in Everyday Environments, 2023.'
short: 'T. von Neumann, C. Boeddeker, M. Delcroix, R. Haeb-Umbach, in: Proc. CHiME
2023 Workshop on Speech Processing in Everyday Environments, 2023.'
conference:
location: Dublin
name: CHiME 2023 Workshop on Speech Processing in Everyday Environments
date_created: 2023-10-19T07:24:51Z
date_updated: 2023-11-15T12:14:02Z
ddc:
- '000'
department:
- _id: '54'
file:
- access_level: open_access
content_type: application/pdf
creator: tvn
date_created: 2023-10-19T07:19:59Z
date_updated: 2023-10-19T07:19:59Z
file_id: '48276'
file_name: Chime_7__MeetEval.pdf
file_size: 263744
relation: main_file
file_date_updated: 2023-10-19T07:19:59Z
has_accepted_license: '1'
keyword:
- Speech Recognition
- Word Error Rate
- Meeting Transcription
language:
- iso: eng
main_file_link:
- open_access: '1'
url: https://arxiv.org/abs/2307.11394
oa: '1'
project:
- _id: '52'
name: 'PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing'
publication: Proc. CHiME 2023 Workshop on Speech Processing in Everyday Environments
quality_controlled: '1'
related_material:
link:
- relation: software
url: https://github.com/fgnt/meeteval
status: public
title: 'MeetEval: A Toolkit for Computation of Word Error Rates for Meeting Transcription
Systems'
type: conference
user_id: '49870'
year: '2023'
...
---
_id: '49109'
abstract:
- lang: eng
text: "We propose a diarization system, that estimates “who spoke when” based on
spatial information, to be used as a front-end of a meeting transcription system
running on the signals gathered from an acoustic sensor network (ASN). Although
the\r\nspatial distribution of the microphones is advantageous, exploiting the
spatial diversity for diarization and signal enhancement is challenging, because
the microphones’ positions are typically unknown, and the recorded signals are
initially unsynchronized in general. Here, we approach these issues by first blindly
synchronizing the signals and then estimating time differences of arrival (TDOAs).
The TDOA information is exploited to estimate the speakers’ activity, even in
the presence of multiple speakers being simultaneously active. This speaker activity
information serves as a guide for a spatial mixture model, on which basis the
individual speaker’s signals are extracted via beamforming. Finally, the extracted
signals are forwarded to a speech recognizer. Additionally, a novel initialization
scheme for spatial mixture models based on the TDOA estimates is proposed. Experiments
conducted on real recordings from the LibriWASN data set have shown that our proposed
system is advantageous compared to a system using a spatial mixture model, which
does not make use\r\nof external diarization information."
author:
- first_name: Tobias
full_name: Gburrek, Tobias
id: '44006'
last_name: Gburrek
- first_name: Joerg
full_name: Schmalenstroeer, Joerg
id: '460'
last_name: Schmalenstroeer
- first_name: Reinhold
full_name: Haeb-Umbach, Reinhold
id: '242'
last_name: Haeb-Umbach
citation:
ama: 'Gburrek T, Schmalenstroeer J, Haeb-Umbach R. Spatial Diarization for Meeting
Transcription with Ad-Hoc Acoustic Sensor Networks. In: Proc. Asilomar Conference
on Signals, Systems, and Computers. ; 2023.'
apa: Gburrek, T., Schmalenstroeer, J., & Haeb-Umbach, R. (2023). Spatial Diarization
for Meeting Transcription with Ad-Hoc Acoustic Sensor Networks. Proc. Asilomar
Conference on Signals, Systems, and Computers. 57th Asilomar Conference on
Signals, Systems, and Computers.
bibtex: '@inproceedings{Gburrek_Schmalenstroeer_Haeb-Umbach_2023, title={Spatial
Diarization for Meeting Transcription with Ad-Hoc Acoustic Sensor Networks}, booktitle={Proc.
Asilomar Conference on Signals, Systems, and Computers}, author={Gburrek, Tobias
and Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}, year={2023} }'
chicago: Gburrek, Tobias, Joerg Schmalenstroeer, and Reinhold Haeb-Umbach. “Spatial
Diarization for Meeting Transcription with Ad-Hoc Acoustic Sensor Networks.” In
Proc. Asilomar Conference on Signals, Systems, and Computers, 2023.
ieee: T. Gburrek, J. Schmalenstroeer, and R. Haeb-Umbach, “Spatial Diarization for
Meeting Transcription with Ad-Hoc Acoustic Sensor Networks,” presented at the
57th Asilomar Conference on Signals, Systems, and Computers, 2023.
mla: Gburrek, Tobias, et al. “Spatial Diarization for Meeting Transcription with
Ad-Hoc Acoustic Sensor Networks.” Proc. Asilomar Conference on Signals, Systems,
and Computers, 2023.
short: 'T. Gburrek, J. Schmalenstroeer, R. Haeb-Umbach, in: Proc. Asilomar Conference
on Signals, Systems, and Computers, 2023.'
conference:
end_date: 2023-11-01
name: 57th Asilomar Conference on Signals, Systems, and Computers
start_date: 2023-10-31
date_created: 2023-11-22T07:52:29Z
date_updated: 2023-11-22T07:58:49Z
ddc:
- '004'
department:
- _id: '54'
file:
- access_level: open_access
content_type: application/pdf
creator: schmalen
date_created: 2023-11-22T07:51:18Z
date_updated: 2023-11-22T07:58:49Z
file_id: '49110'
file_name: asilomar.pdf
file_size: 212317
relation: main_file
file_date_updated: 2023-11-22T07:58:49Z
has_accepted_license: '1'
keyword:
- Diarization
- time difference of arrival
- ad-hoc acoustic sensor network
- meeting transcription
language:
- iso: eng
oa: '1'
publication: Proc. Asilomar Conference on Signals, Systems, and Computers
quality_controlled: '1'
status: public
title: Spatial Diarization for Meeting Transcription with Ad-Hoc Acoustic Sensor Networks
type: conference
user_id: '460'
year: '2023'
...
---
_id: '49111'
abstract:
- lang: eng
text: Due to the high variation in the application requirements of sound event detection
(SED) systems, it is not sufficient to evaluate systems only in a single operating
mode. Therefore, the community recently adopted the polyphonic sound detection
score (PSDS) as an evaluation metric, which is the normalized area under the PSD
receiver operating characteristic (PSD-ROC). It summarizes the system performance
over a range of operating modes resulting from varying the decision threshold
that is used to translate the system output scores into a binary detection output.
Hence, it provides a more complete picture of the overall system behavior and
is less biased by specific threshold tuning. However, besides the decision threshold
there is also the post-processing that can be changed to enter another operating
mode. In this paper we propose the post-processing independent PSDS (piPSDS) as
a generalization of the PSDS. Here, the post-processing independent PSD-ROC includes
operating points from varying post-processings with varying decision thresholds.
Thus, it summarizes even more operating modes of an SED system and allows for
system comparison without the need of implementing a post-processing and without
a bias due to different post-processings. While piPSDS can in principle combine
different types of post-processing, we here, as a first step, present median filter
independent PSDS (miPSDS) results for this year’s DCASE Challenge Task4a systems.
Source code is publicly available in our sed_scores_eval package (https://github.com/fgnt/sed_scores_eval).
author:
- first_name: Janek
full_name: Ebbers, Janek
last_name: Ebbers
- first_name: Reinhold
full_name: Haeb-Umbach, Reinhold
last_name: Haeb-Umbach
- first_name: Romain
full_name: Serizel, Romain
last_name: Serizel
citation:
ama: 'Ebbers J, Haeb-Umbach R, Serizel R. Post-Processing Independent Evaluation
of Sound Event Detection Systems. In: Proceedings of the 8th Detection and
Classification of Acoustic Scenes and Events 2023 Workshop (DCASE2023). ;
2023:36–40.'
apa: Ebbers, J., Haeb-Umbach, R., & Serizel, R. (2023). Post-Processing Independent
Evaluation of Sound Event Detection Systems. Proceedings of the 8th Detection
and Classification of Acoustic Scenes and Events 2023 Workshop (DCASE2023),
36–40.
bibtex: '@inproceedings{Ebbers_Haeb-Umbach_Serizel_2023, place={Tampere, Finland},
title={Post-Processing Independent Evaluation of Sound Event Detection Systems},
booktitle={Proceedings of the 8th Detection and Classification of Acoustic Scenes
and Events 2023 Workshop (DCASE2023)}, author={Ebbers, Janek and Haeb-Umbach,
Reinhold and Serizel, Romain}, year={2023}, pages={36–40} }'
chicago: Ebbers, Janek, Reinhold Haeb-Umbach, and Romain Serizel. “Post-Processing
Independent Evaluation of Sound Event Detection Systems.” In Proceedings of
the 8th Detection and Classification of Acoustic Scenes and Events 2023 Workshop
(DCASE2023), 36–40. Tampere, Finland, 2023.
ieee: J. Ebbers, R. Haeb-Umbach, and R. Serizel, “Post-Processing Independent Evaluation
of Sound Event Detection Systems,” in Proceedings of the 8th Detection and
Classification of Acoustic Scenes and Events 2023 Workshop (DCASE2023), 2023,
pp. 36–40.
mla: Ebbers, Janek, et al. “Post-Processing Independent Evaluation of Sound Event
Detection Systems.” Proceedings of the 8th Detection and Classification of
Acoustic Scenes and Events 2023 Workshop (DCASE2023), 2023, pp. 36–40.
short: 'J. Ebbers, R. Haeb-Umbach, R. Serizel, in: Proceedings of the 8th Detection
and Classification of Acoustic Scenes and Events 2023 Workshop (DCASE2023), Tampere,
Finland, 2023, pp. 36–40.'
date_created: 2023-11-22T08:20:26Z
date_updated: 2023-11-22T08:26:14Z
ddc:
- '000'
department:
- _id: '54'
file:
- access_level: closed
content_type: application/pdf
creator: ebbers
date_created: 2023-11-22T08:25:08Z
date_updated: 2023-11-22T08:25:08Z
file_id: '49112'
file_name: dcase2023_ebbers.pdf
file_size: 221875
relation: main_file
success: 1
file_date_updated: 2023-11-22T08:25:08Z
has_accepted_license: '1'
language:
- iso: eng
page: 36–40
place: Tampere, Finland
project:
- _id: '52'
name: 'PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing'
publication: Proceedings of the 8th Detection and Classification of Acoustic Scenes
and Events 2023 Workshop (DCASE2023)
quality_controlled: '1'
status: public
title: Post-Processing Independent Evaluation of Sound Event Detection Systems
type: conference
user_id: '34851'
year: '2023'
...
---
_id: '44849'
author:
- first_name: Frederik
full_name: Rautenberg, Frederik
id: '72602'
last_name: Rautenberg
- first_name: Michael
full_name: Kuhlmann, Michael
id: '49871'
last_name: Kuhlmann
- first_name: Janek
full_name: Ebbers, Janek
id: '34851'
last_name: Ebbers
- first_name: Jana
full_name: Wiechmann, Jana
last_name: Wiechmann
- first_name: Fritz
full_name: Seebauer, Fritz
last_name: Seebauer
- first_name: Petra
full_name: Wagner, Petra
last_name: Wagner
- first_name: Reinhold
full_name: Haeb-Umbach, Reinhold
id: '242'
last_name: Haeb-Umbach
citation:
ama: 'Rautenberg F, Kuhlmann M, Ebbers J, et al. Speech Disentanglement for Analysis
and Modification of Acoustic and Perceptual Speaker Characteristics. In: Fortschritte
Der Akustik - DAGA 2023. ; 2023:1409-1412.'
apa: Rautenberg, F., Kuhlmann, M., Ebbers, J., Wiechmann, J., Seebauer, F., Wagner,
P., & Haeb-Umbach, R. (2023). Speech Disentanglement for Analysis and Modification
of Acoustic and Perceptual Speaker Characteristics. Fortschritte Der Akustik
- DAGA 2023, 1409–1412.
bibtex: '@inproceedings{Rautenberg_Kuhlmann_Ebbers_Wiechmann_Seebauer_Wagner_Haeb-Umbach_2023,
title={Speech Disentanglement for Analysis and Modification of Acoustic and Perceptual
Speaker Characteristics}, booktitle={Fortschritte der Akustik - DAGA 2023}, author={Rautenberg,
Frederik and Kuhlmann, Michael and Ebbers, Janek and Wiechmann, Jana and Seebauer,
Fritz and Wagner, Petra and Haeb-Umbach, Reinhold}, year={2023}, pages={1409–1412}
}'
chicago: Rautenberg, Frederik, Michael Kuhlmann, Janek Ebbers, Jana Wiechmann, Fritz
Seebauer, Petra Wagner, and Reinhold Haeb-Umbach. “Speech Disentanglement for
Analysis and Modification of Acoustic and Perceptual Speaker Characteristics.”
In Fortschritte Der Akustik - DAGA 2023, 1409–12, 2023.
ieee: F. Rautenberg et al., “Speech Disentanglement for Analysis and Modification
of Acoustic and Perceptual Speaker Characteristics,” in Fortschritte der Akustik
- DAGA 2023, Hamburg, 2023, pp. 1409–1412.
mla: Rautenberg, Frederik, et al. “Speech Disentanglement for Analysis and Modification
of Acoustic and Perceptual Speaker Characteristics.” Fortschritte Der Akustik
- DAGA 2023, 2023, pp. 1409–12.
short: 'F. Rautenberg, M. Kuhlmann, J. Ebbers, J. Wiechmann, F. Seebauer, P. Wagner,
R. Haeb-Umbach, in: Fortschritte Der Akustik - DAGA 2023, 2023, pp. 1409–1412.'
conference:
end_date: 2023-03-09
location: Hamburg
name: DAGA 2023 - 49. Jahrestagung für Akustik
start_date: 2023-03-06
date_created: 2023-05-15T08:48:54Z
date_updated: 2024-02-29T17:05:16Z
ddc:
- '000'
department:
- _id: '54'
- _id: '660'
file:
- access_level: open_access
content_type: application/pdf
creator: frra
date_created: 2024-02-29T16:15:12Z
date_updated: 2024-02-29T16:15:12Z
file_id: '52221'
file_name: Daga_2023_Rautenberg_Paper.pdf
file_size: 289493
relation: main_file
file_date_updated: 2024-02-29T16:15:12Z
has_accepted_license: '1'
language:
- iso: eng
main_file_link:
- open_access: '1'
url: https://pub.dega-akustik.de/DAGA_2023/data/articles/000105.pdf
oa: '1'
page: 1409-1412
project:
- _id: '129'
grant_number: '438445824'
name: 'TRR 318 - C06: TRR 318 - Technisch unterstütztes Erklären von Stimmcharakteristika
(Teilprojekt C06)'
publication: Fortschritte der Akustik - DAGA 2023
publication_status: published
status: public
title: Speech Disentanglement for Analysis and Modification of Acoustic and Perceptual
Speaker Characteristics
type: conference
user_id: '72602'
year: '2023'
...
---
_id: '33669'
abstract:
- lang: eng
text: Far-field multi-speaker automatic speech recognition (ASR) has drawn increasing
attention in recent years. Most existing methods feature a signal processing frontend
and an ASR backend. In realistic scenarios, these modules are usually trained
separately or progressively, which suffers from either inter-module mismatch or
a complicated training process. In this paper, we propose an end-to-end multi-channel
model that jointly optimizes the speech enhancement (including speech dereverberation,
denoising, and separation) frontend and the ASR backend as a single system. To
the best of our knowledge, this is the first work that proposes to optimize dereverberation,
beamforming, and multi-speaker ASR in a fully end-to-end manner. The frontend
module consists of a weighted prediction error (WPE) based submodule for dereverberation
and a neural beamformer for denoising and speech separation. For the backend,
we adopt a widely used end-to-end (E2E) ASR architecture. It is worth noting that
the entire model is differentiable and can be optimized in a fully end-to-end
manner using only the ASR criterion, without the need of parallel signal-level
labels. We evaluate the proposed model on several multi-speaker benchmark datasets,
and experimental results show that the fully E2E ASR model can achieve competitive
performance on both noisy and reverberant conditions, with over 30% relative word
error rate (WER) reduction over the single-channel baseline systems.
author:
- first_name: Wangyou
full_name: Zhang, Wangyou
last_name: Zhang
- first_name: Xuankai
full_name: Chang, Xuankai
last_name: Chang
- first_name: Christoph
full_name: Boeddeker, Christoph
id: '40767'
last_name: Boeddeker
- first_name: Tomohiro
full_name: Nakatani, Tomohiro
last_name: Nakatani
- first_name: Shinji
full_name: Watanabe, Shinji
last_name: Watanabe
- first_name: Yanmin
full_name: Qian, Yanmin
last_name: Qian
citation:
ama: Zhang W, Chang X, Boeddeker C, Nakatani T, Watanabe S, Qian Y. End-to-End Dereverberation,
Beamforming, and Speech Recognition in A Cocktail Party. IEEE/ACM Transactions
on Audio, Speech, and Language Processing. Published online 2022. doi:10.1109/TASLP.2022.3209942
apa: Zhang, W., Chang, X., Boeddeker, C., Nakatani, T., Watanabe, S., & Qian,
Y. (2022). End-to-End Dereverberation, Beamforming, and Speech Recognition in
A Cocktail Party. IEEE/ACM Transactions on Audio, Speech, and Language Processing.
https://doi.org/10.1109/TASLP.2022.3209942
bibtex: '@article{Zhang_Chang_Boeddeker_Nakatani_Watanabe_Qian_2022, title={End-to-End
Dereverberation, Beamforming, and Speech Recognition in A Cocktail Party}, DOI={10.1109/TASLP.2022.3209942},
journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing}, author={Zhang,
Wangyou and Chang, Xuankai and Boeddeker, Christoph and Nakatani, Tomohiro and
Watanabe, Shinji and Qian, Yanmin}, year={2022} }'
chicago: Zhang, Wangyou, Xuankai Chang, Christoph Boeddeker, Tomohiro Nakatani,
Shinji Watanabe, and Yanmin Qian. “End-to-End Dereverberation, Beamforming, and
Speech Recognition in A Cocktail Party.” IEEE/ACM Transactions on Audio, Speech,
and Language Processing, 2022. https://doi.org/10.1109/TASLP.2022.3209942.
ieee: 'W. Zhang, X. Chang, C. Boeddeker, T. Nakatani, S. Watanabe, and Y. Qian,
“End-to-End Dereverberation, Beamforming, and Speech Recognition in A Cocktail
Party,” IEEE/ACM Transactions on Audio, Speech, and Language Processing,
2022, doi: 10.1109/TASLP.2022.3209942.'
mla: Zhang, Wangyou, et al. “End-to-End Dereverberation, Beamforming, and Speech
Recognition in A Cocktail Party.” IEEE/ACM Transactions on Audio, Speech, and
Language Processing, 2022, doi:10.1109/TASLP.2022.3209942.
short: W. Zhang, X. Chang, C. Boeddeker, T. Nakatani, S. Watanabe, Y. Qian, IEEE/ACM
Transactions on Audio, Speech, and Language Processing (2022).
date_created: 2022-10-11T07:27:51Z
date_updated: 2022-12-05T12:35:31Z
ddc:
- '000'
department:
- _id: '54'
doi: 10.1109/TASLP.2022.3209942
file:
- access_level: open_access
content_type: application/pdf
creator: huesera
date_created: 2022-10-11T07:23:13Z
date_updated: 2022-10-11T07:23:13Z
file_id: '33674'
file_name: End-to-End_Dereverberation_Beamforming_and_Speech_Recognition_in_A_Cocktail_Party.pdf
file_size: 6167931
relation: main_file
file_date_updated: 2022-10-11T07:23:13Z
has_accepted_license: '1'
language:
- iso: eng
oa: '1'
publication: IEEE/ACM Transactions on Audio, Speech, and Language Processing
publication_identifier:
issn:
- 'Print ISSN: 2329-9290 Electronic ISSN: 2329-9304'
publication_status: published
related_material:
link:
- relation: confirmation
url: https://ieeexplore.ieee.org/abstract/document/9904314
status: public
title: End-to-End Dereverberation, Beamforming, and Speech Recognition in A Cocktail
Party
type: journal_article
user_id: '40767'
year: '2022'
...
---
_id: '33954'
author:
- first_name: Christoph
full_name: Boeddeker, Christoph
id: '40767'
last_name: Boeddeker
- first_name: Tobias
full_name: Cord-Landwehr, Tobias
id: '44393'
last_name: Cord-Landwehr
- first_name: Thilo
full_name: von Neumann, Thilo
id: '49870'
last_name: von Neumann
orcid: https://orcid.org/0000-0002-7717-8670
- first_name: Reinhold
full_name: Haeb-Umbach, Reinhold
id: '242'
last_name: Haeb-Umbach
citation:
ama: 'Boeddeker C, Cord-Landwehr T, von Neumann T, Haeb-Umbach R. An Initialization
Scheme for Meeting Separation with Spatial Mixture Models. In: Interspeech
2022. ISCA; 2022. doi:10.21437/interspeech.2022-10929'
apa: Boeddeker, C., Cord-Landwehr, T., von Neumann, T., & Haeb-Umbach, R. (2022).
An Initialization Scheme for Meeting Separation with Spatial Mixture Models. Interspeech
2022. https://doi.org/10.21437/interspeech.2022-10929
bibtex: '@inproceedings{Boeddeker_Cord-Landwehr_von Neumann_Haeb-Umbach_2022, title={An
Initialization Scheme for Meeting Separation with Spatial Mixture Models}, DOI={10.21437/interspeech.2022-10929},
booktitle={Interspeech 2022}, publisher={ISCA}, author={Boeddeker, Christoph and
Cord-Landwehr, Tobias and von Neumann, Thilo and Haeb-Umbach, Reinhold}, year={2022}
}'
chicago: Boeddeker, Christoph, Tobias Cord-Landwehr, Thilo von Neumann, and Reinhold
Haeb-Umbach. “An Initialization Scheme for Meeting Separation with Spatial Mixture
Models.” In Interspeech 2022. ISCA, 2022. https://doi.org/10.21437/interspeech.2022-10929.
ieee: 'C. Boeddeker, T. Cord-Landwehr, T. von Neumann, and R. Haeb-Umbach, “An Initialization
Scheme for Meeting Separation with Spatial Mixture Models,” 2022, doi: 10.21437/interspeech.2022-10929.'
mla: Boeddeker, Christoph, et al. “An Initialization Scheme for Meeting Separation
with Spatial Mixture Models.” Interspeech 2022, ISCA, 2022, doi:10.21437/interspeech.2022-10929.
short: 'C. Boeddeker, T. Cord-Landwehr, T. von Neumann, R. Haeb-Umbach, in: Interspeech
2022, ISCA, 2022.'
date_created: 2022-10-28T10:53:56Z
date_updated: 2022-10-28T10:57:22Z
department:
- _id: '54'
doi: 10.21437/interspeech.2022-10929
language:
- iso: eng
main_file_link:
- open_access: '1'
url: https://www.isca-speech.org/archive/pdfs/interspeech_2022/boeddeker22_interspeech.pdf
oa: '1'
project:
- _id: '52'
name: 'PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing'
publication: Interspeech 2022
publication_status: published
publisher: ISCA
status: public
title: An Initialization Scheme for Meeting Separation with Spatial Mixture Models
type: conference
user_id: '40767'
year: '2022'
...
---
_id: '33471'
abstract:
- lang: eng
text: "The intelligibility of demodulated audio signals from analog high frequency
transmissions, e.g., using single-sideband\r\n(SSB) modulation, can be severely
degraded by channel distortions and/or a mismatch between modulation and demodulation
carrier frequency. In this work a neural network (NN)-based approach for carrier
frequency offset (CFO) estimation from demodulated SSB signals is proposed, whereby
a task specific architecture is presented. Additionally, a simulation framework
for SSB signals is introduced and utilized for training the NNs. The CFO estimator
is combined with a speech enhancement network to investigate its influence on
the enhancement performance. The NN-based system is compared to a recently proposed
pitch tracking based approach on publicly available data from real high frequency
transmissions. Experiments show that the NN exhibits good CFO estimation properties
and results in significant improvements in speech intelligibility, especially
when combined with a noise reduction network."
author:
- first_name: Jens
full_name: Heitkämper, Jens
id: '27643'
last_name: Heitkämper
- first_name: Joerg
full_name: Schmalenstroeer, Joerg
id: '460'
last_name: Schmalenstroeer
- first_name: Reinhold
full_name: Haeb-Umbach, Reinhold
id: '242'
last_name: Haeb-Umbach
citation:
ama: 'Heitkämper J, Schmalenstroeer J, Haeb-Umbach R. Neural Network Based Carrier
Frequency Offset Estimation From Speech Transmitted Over High Frequency Channels.
In: Proceedings of the 30th European Signal Processing Conference (EUSIPCO).'
apa: Heitkämper, J., Schmalenstroeer, J., & Haeb-Umbach, R. (n.d.). Neural Network
Based Carrier Frequency Offset Estimation From Speech Transmitted Over High Frequency
Channels. Proceedings of the 30th European Signal Processing Conference (EUSIPCO).
30th European Signal Processing Conference (EUSIPCO), Belgrad.
bibtex: '@inproceedings{Heitkämper_Schmalenstroeer_Haeb-Umbach, place={Belgrad},
title={Neural Network Based Carrier Frequency Offset Estimation From Speech Transmitted
Over High Frequency Channels}, booktitle={Proceedings of the 30th European Signal
Processing Conference (EUSIPCO)}, author={Heitkämper, Jens and Schmalenstroeer,
Joerg and Haeb-Umbach, Reinhold} }'
chicago: Heitkämper, Jens, Joerg Schmalenstroeer, and Reinhold Haeb-Umbach. “Neural
Network Based Carrier Frequency Offset Estimation From Speech Transmitted Over
High Frequency Channels.” In Proceedings of the 30th European Signal Processing
Conference (EUSIPCO). Belgrad, n.d.
ieee: J. Heitkämper, J. Schmalenstroeer, and R. Haeb-Umbach, “Neural Network Based
Carrier Frequency Offset Estimation From Speech Transmitted Over High Frequency
Channels,” presented at the 30th European Signal Processing Conference (EUSIPCO),
Belgrad.
mla: Heitkämper, Jens, et al. “Neural Network Based Carrier Frequency Offset Estimation
From Speech Transmitted Over High Frequency Channels.” Proceedings of the 30th
European Signal Processing Conference (EUSIPCO).
short: 'J. Heitkämper, J. Schmalenstroeer, R. Haeb-Umbach, in: Proceedings of the
30th European Signal Processing Conference (EUSIPCO), Belgrad, n.d.'
conference:
end_date: 2022-09-02
location: Belgrad
name: 30th European Signal Processing Conference (EUSIPCO)
start_date: 2022-08-29
date_created: 2022-09-22T10:56:13Z
date_updated: 2023-10-26T08:15:57Z
ddc:
- '000'
department:
- _id: '54'
file:
- access_level: closed
content_type: application/pdf
creator: jensheit
date_created: 2022-09-22T10:48:31Z
date_updated: 2022-09-22T10:48:31Z
file_id: '33472'
file_name: cfo.pdf
file_size: 1231379
relation: main_file
success: 1
file_date_updated: 2022-09-22T10:48:31Z
has_accepted_license: '1'
language:
- iso: eng
place: Belgrad
project:
- _id: '52'
name: 'PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing'
publication: Proceedings of the 30th European Signal Processing Conference (EUSIPCO)
publication_status: accepted
quality_controlled: '1'
status: public
title: Neural Network Based Carrier Frequency Offset Estimation From Speech Transmitted
Over High Frequency Channels
type: conference
user_id: '460'
year: '2022'
...
---
_id: '33806'
author:
- first_name: Haitham
full_name: Afifi, Haitham
last_name: Afifi
- first_name: Holger
full_name: Karl, Holger
last_name: Karl
- first_name: Tobias
full_name: Gburrek, Tobias
id: '44006'
last_name: Gburrek
- first_name: Joerg
full_name: Schmalenstroeer, Joerg
id: '460'
last_name: Schmalenstroeer
citation:
ama: 'Afifi H, Karl H, Gburrek T, Schmalenstroeer J. Data-driven Time Synchronization
in Wireless Multimedia Networks. In: 2022 International Wireless Communications
and Mobile Computing (IWCMC). IEEE; 2022. doi:10.1109/iwcmc55113.2022.9824980'
apa: Afifi, H., Karl, H., Gburrek, T., & Schmalenstroeer, J. (2022). Data-driven
Time Synchronization in Wireless Multimedia Networks. 2022 International Wireless
Communications and Mobile Computing (IWCMC). https://doi.org/10.1109/iwcmc55113.2022.9824980
bibtex: '@inproceedings{Afifi_Karl_Gburrek_Schmalenstroeer_2022, title={Data-driven
Time Synchronization in Wireless Multimedia Networks}, DOI={10.1109/iwcmc55113.2022.9824980},
booktitle={2022 International Wireless Communications and Mobile Computing (IWCMC)},
publisher={IEEE}, author={Afifi, Haitham and Karl, Holger and Gburrek, Tobias
and Schmalenstroeer, Joerg}, year={2022} }'
chicago: Afifi, Haitham, Holger Karl, Tobias Gburrek, and Joerg Schmalenstroeer.
“Data-Driven Time Synchronization in Wireless Multimedia Networks.” In 2022
International Wireless Communications and Mobile Computing (IWCMC). IEEE,
2022. https://doi.org/10.1109/iwcmc55113.2022.9824980.
ieee: 'H. Afifi, H. Karl, T. Gburrek, and J. Schmalenstroeer, “Data-driven Time
Synchronization in Wireless Multimedia Networks,” 2022, doi: 10.1109/iwcmc55113.2022.9824980.'
mla: Afifi, Haitham, et al. “Data-Driven Time Synchronization in Wireless Multimedia
Networks.” 2022 International Wireless Communications and Mobile Computing
(IWCMC), IEEE, 2022, doi:10.1109/iwcmc55113.2022.9824980.
short: 'H. Afifi, H. Karl, T. Gburrek, J. Schmalenstroeer, in: 2022 International
Wireless Communications and Mobile Computing (IWCMC), IEEE, 2022.'
date_created: 2022-10-18T09:24:17Z
date_updated: 2023-10-26T08:16:07Z
department:
- _id: '54'
doi: 10.1109/iwcmc55113.2022.9824980
language:
- iso: eng
publication: 2022 International Wireless Communications and Mobile Computing (IWCMC)
publication_status: published
publisher: IEEE
quality_controlled: '1'
status: public
title: Data-driven Time Synchronization in Wireless Multimedia Networks
type: conference
user_id: '460'
year: '2022'
...