@article{11861,
  abstract     = {{In this contribution we present a theoretical and experimental investigation into the effects of reverberation and noise on features in the logarithmic mel power spectral domain, an intermediate stage in the computation of the mel frequency cepstral coefficients, prevalent in automatic speech recognition (ASR). Gaining insight into the complex interaction between clean speech, noise, and noisy reverberant speech features is essential for any ASR system to be robust against noise and reverberation present in distant microphone input signals. The findings are gathered in a probabilistic formulation of an observation model which may be used in model-based feature compensation schemes. The proposed observation model extends previous models in three major directions: First, the contribution of additive background noise to the observation error is explicitly taken into account. Second, an energy compensation constant is introduced which ensures an unbiased estimate of the reverberant speech features, and, third, a recursive variant of the observation model is developed resulting in reduced computational complexity when used in model-based feature compensation. The experimental section is used to evaluate the accuracy of the model and to describe how its parameters can be determined from test data.}},
  author       = {{Leutnant, Volker and Krueger, Alexander and Haeb-Umbach, Reinhold}},
  issn         = {{2329-9290}},
  journal      = {{IEEE/ACM Transactions on Audio, Speech, and Language Processing}},
  keywords     = {{computational complexity, reverberation, speech recognition, automatic speech recognition, background noise, clean speech, computational complexity, energy compensation, logarithmic mel power spectral domain, mel frequency cepstral coefficients, microphone input signals, model-based feature compensation schemes, noisy reverberant speech automatic recognition, noisy reverberant speech features, reverberation, Atmospheric modeling, Computational modeling, Noise, Noise measurement, Reverberation, Speech, Vectors, Model-based feature compensation, observation model for reverberant and noisy speech, recursive observation model, robust automatic speech recognition}},
  number       = {{1}},
  pages        = {{95--109}},
  title        = {{{A New Observation Model in the Logarithmic Mel Power Spectral Domain for the Automatic Recognition of Noisy Reverberant Speech}}},
  doi          = {{10.1109/TASLP.2013.2285480}},
  volume       = {{22}},
  year         = {{2014}},
}

@inproceedings{11841,
  abstract     = {{Recently, substantial progress has been made in the field of reverberant speech signal processing, including both single- and multichannel de-reverberation techniques, and automatic speech recognition (ASR) techniques robust to reverberation. To evaluate state-of-the-art algorithms and obtain new insights regarding potential future research directions, we propose a common evaluation framework including datasets, tasks, and evaluation metrics for both speech enhancement and ASR techniques. The proposed framework will be used as a common basis for the REVERB (REverberant Voice Enhancement and Recognition Benchmark) challenge. This paper describes the rationale behind the challenge, and provides a detailed description of the evaluation framework and benchmark results.}},
  author       = {{Kinoshita, Keisuke and Delcroix, Marc and Yoshioka, Takuya and Nakatani, Tomohiro and Habets, Emanuel and Haeb-Umbach, Reinhold and Leutnant, Volker and Sehr, Armin and Kellermann, Walter and Maas, Roland and Gannot, Sharon and Raj, Bhiksha}},
  booktitle    = {{ IEEE Workshop on Applications of Signal Processing to Audio and Acoustics }},
  keywords     = {{Reverberant speech, dereverberation, ASR, evaluation, challenge}},
  pages        = {{ 22--23 }},
  title        = {{{The reverb challenge: a common evaluation framework for dereverberation and recognition of reverberant speech}}},
  year         = {{2013}},
}

@article{11846,
  abstract     = {{In this paper, we present a new technique for automatic speech recognition (ASR) in reverberant environments. Our approach is aimed at the enhancement of the logarithmic Mel power spectrum, which is computed at an intermediate stage to obtain the widely used Mel frequency cepstral coefficients (MFCCs). Given the reverberant logarithmic Mel power spectral coefficients (LMPSCs), a minimum mean square error estimate of the clean LMPSCs is computed by carrying out Bayesian inference. We employ switching linear dynamical models as an a priori model for the dynamics of the clean LMPSCs. Further, we derive a stochastic observation model which relates the clean to the reverberant LMPSCs through a simplified model of the room impulse response (RIR). This model requires only two parameters, namely RIR energy and reverberation time, which can be estimated from the captured microphone signal. The performance of the proposed enhancement technique is studied on the AURORA5 database and compared to that of constrained maximum-likelihood linear regression (CMLLR). It is shown by experimental results that our approach significantly outperforms CMLLR and that up to 80\% of the errors caused by the reverberation are recovered. In addition to the fact that the approach is compatible with the standard MFCC feature vectors, it leaves the ASR back-end unchanged. It is of moderate computational complexity and suitable for real time applications.}},
  author       = {{Krueger, Alexander and Haeb-Umbach, Reinhold}},
  journal      = {{IEEE Transactions on Audio, Speech, and Language Processing}},
  keywords     = {{ASR, AURORA5 database, automatic speech recognition, Bayesian inference, belief networks, CMLLR, computational complexity, constrained maximum likelihood linear regression, least mean squares methods, LMPSC computation, logarithmic Mel power spectrum, maximum likelihood estimation, Mel frequency cepstral coefficients, MFCC feature vectors, microphone signal, minimum mean square error estimation, model-based feature enhancement, regression analysis, reverberant speech recognition, reverberation, RIR energy, room impulse response, speech recognition, stochastic observation model, stochastic processes}},
  number       = {{7}},
  pages        = {{1692--1707}},
  title        = {{{Model-Based Feature Enhancement for Reverberant Speech Recognition}}},
  doi          = {{10.1109/TASL.2010.2049684}},
  volume       = {{18}},
  year         = {{2010}},
}