@article{11938,
  abstract     = {{In this paper, parameter estimation of a state-space model of noise or noisy speech cepstra is investigated. A blockwise EM algorithm is derived for the estimation of the state and observation noise covariance from noise-only input data. It is supposed to be used during the offline training mode of a speech recognizer. Further a sequential online EM algorithm is developed to adapt the observation noise covariance on noisy speech cepstra at its input. The estimated parameters are then used in model-based speech feature enhancement for noise-robust automatic speech recognition. Experiments on the AURORA4 database lead to improved recognition results with a linear state model compared to the assumption of stationary noise.}},
  author       = {{Windmann, Stefan and Haeb-Umbach, Reinhold}},
  journal      = {{IEEE Transactions on Audio, Speech, and Language Processing}},
  keywords     = {{AURORA4 database, blockwise EM algorithm, covariance analysis, linear state model, noise covariance, noise-robust automatic speech recognition, noisy speech cepstra, offline training mode, parameter estimation, speech recognition, speech recognition equipment, speech recognizer, state-space methods, state-space model}},
  number       = {{8}},
  pages        = {{1577--1590}},
  title        = {{{Parameter Estimation of a State-Space Model of Noise for Robust Speech Recognition}}},
  doi          = {{10.1109/TASL.2009.2023172}},
  volume       = {{17}},
  year         = {{2009}},
}

@inproceedings{11931,
  abstract     = {{The paper is concerned with binaural signal processing for a bimodal human-robot interface with hearing and vision. The two microphone signals are processed to obtain an enhanced single-channel input signal for the subsequent speech recognizer and to localize the acoustic source, an important information for establishing a natural human-robot communication. We utilize a robust adaptive algorithm for filter-and-sum beamforming (FSB) and extract speaker direction information from the resulting FIR filter coefficients. Further, particle filtering is applied which conducts a nonlinear Bayesian tracking of speaker movement. Good location accuracy can be achieved even in highly reverberant environments. The results obtained outperform the conventional generalized cross correlation (GCC) method.}},
  author       = {{Warsitz, Ernst and Haeb-Umbach, Reinhold}},
  booktitle    = {{IEEE Workshop on Multimedia Signal Processing (MMSP 2004)}},
  keywords     = {{bimodal human-robot interface, binaural signal processing, enhanced single-channel input signal, filter-and-sum beamforming, filtering theory, FIR filter coefficient, generalized cross correlation method, microphones, microphone signal, nonlinear Bayesian tracking, particle filtering, robust adaptive algorithm, robust speaker direction estimation, signal processing, speech enhancement, speech recognition, speech recognizer, user interfaces}},
  pages        = {{367--370}},
  title        = {{{Robust speaker direction estimation with particle filtering}}},
  doi          = {{10.1109/MMSP.2004.1436569}},
  year         = {{2004}},
}

