@article{11861,
  abstract     = {{In this contribution we present a theoretical and experimental investigation into the effects of reverberation and noise on features in the logarithmic mel power spectral domain, an intermediate stage in the computation of the mel frequency cepstral coefficients, prevalent in automatic speech recognition (ASR). Gaining insight into the complex interaction between clean speech, noise, and noisy reverberant speech features is essential for any ASR system to be robust against noise and reverberation present in distant microphone input signals. The findings are gathered in a probabilistic formulation of an observation model which may be used in model-based feature compensation schemes. The proposed observation model extends previous models in three major directions: First, the contribution of additive background noise to the observation error is explicitly taken into account. Second, an energy compensation constant is introduced which ensures an unbiased estimate of the reverberant speech features, and, third, a recursive variant of the observation model is developed resulting in reduced computational complexity when used in model-based feature compensation. The experimental section is used to evaluate the accuracy of the model and to describe how its parameters can be determined from test data.}},
  author       = {{Leutnant, Volker and Krueger, Alexander and Haeb-Umbach, Reinhold}},
  issn         = {{2329-9290}},
  journal      = {{IEEE/ACM Transactions on Audio, Speech, and Language Processing}},
  keywords     = {{computational complexity, reverberation, speech recognition, automatic speech recognition, background noise, clean speech, computational complexity, energy compensation, logarithmic mel power spectral domain, mel frequency cepstral coefficients, microphone input signals, model-based feature compensation schemes, noisy reverberant speech automatic recognition, noisy reverberant speech features, reverberation, Atmospheric modeling, Computational modeling, Noise, Noise measurement, Reverberation, Speech, Vectors, Model-based feature compensation, observation model for reverberant and noisy speech, recursive observation model, robust automatic speech recognition}},
  number       = {{1}},
  pages        = {{95--109}},
  title        = {{{A New Observation Model in the Logarithmic Mel Power Spectral Domain for the Automatic Recognition of Noisy Reverberant Speech}}},
  doi          = {{10.1109/TASLP.2013.2285480}},
  volume       = {{22}},
  year         = {{2014}},
}

@article{11862,
  abstract     = {{In this contribution we extend a previously proposed Bayesian approach for the enhancement of reverberant logarithmic mel power spectral coefficients for robust automatic speech recognition to the additional compensation of background noise. A recently proposed observation model is employed whose time-variant observation error statistics are obtained as a side product of the inference of the a posteriori probability density function of the clean speech feature vectors. Further a reduction of the computational effort and the memory requirements are achieved by using a recursive formulation of the observation model. The performance of the proposed algorithms is first experimentally studied on a connected digits recognition task with artificially created noisy reverberant data. It is shown that the use of the time-variant observation error model leads to a significant error rate reduction at low signal-to-noise ratios compared to a time-invariant model. Further experiments were conducted on a 5000 word task recorded in a reverberant and noisy environment. A significant word error rate reduction was obtained demonstrating the effectiveness of the approach on real-world data.}},
  author       = {{Leutnant, Volker and Krueger, Alexander and Haeb-Umbach, Reinhold}},
  journal      = {{IEEE Transactions on Audio, Speech, and Language Processing}},
  keywords     = {{Bayes methods, compensation, error statistics, reverberation, speech recognition, Bayesian feature enhancement, background noise, clean speech feature vectors, compensation, connected digits recognition task, error statistics, memory requirements, noisy reverberant data, posteriori probability density function, recursive formulation, reverberant logarithmic mel power spectral coefficients, robust automatic speech recognition, signal-to-noise ratios, time-variant observation, word error rate reduction, Robust automatic speech recognition, model-based Bayesian feature enhancement, observation model for reverberant and noisy speech, recursive observation model}},
  number       = {{8}},
  pages        = {{1640--1652}},
  title        = {{{Bayesian Feature Enhancement for Reverberation and Noise Robust Speech Recognition}}},
  doi          = {{10.1109/TASL.2013.2258013}},
  volume       = {{21}},
  year         = {{2013}},
}

@inproceedings{11943,
  abstract     = {{A marginalized particle filter is proposed for performing single channel speech enhancement with a non-linear dynamic state model. The system consists of a particle filter for tracking line spectral pair (LSP) parameters and a Kalman filter per particle for speech enhancement. The state model for the LSPs has been learnt on clean speech training data. In our approach parameters and speech samples are processed at different time scales by assuming the parameters to be constant for small blocks of data. Further enhancement is obtained by an iteration which can be applied on these small blocks. The experiments show that similar SNR gains are obtained as with the Kalman-LM-iterative algorithm. However better values of the noise level and the log-spectral distance are achieved}},
  author       = {{Windmann, Stefan and Haeb-Umbach, Reinhold}},
  booktitle    = {{IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2006)}},
  keywords     = {{clean speech training data, iterative methods, iterative speech enhancement, Kalman filter, Kalman filters, Kalman-LM-iterative algorithm, line spectral pair parameters, log-spectral distance, marginalized particle filter, noise level, nonlinear dynamic state speech model, particle filtering (numerical methods), single channel speech enhancement, SNR gains, speech enhancement, speech samples}},
  pages        = {{I}},
  title        = {{{Iterative Speech Enhancement using a Non-Linear Dynamic State Model of Speech and its Parameters}}},
  doi          = {{10.1109/ICASSP.2006.1660058}},
  volume       = {{1}},
  year         = {{2006}},
}