@inproceedings{11753,
  abstract     = {{This contribution describes a step-wise source counting algorithm to determine the number of speakers in an offline scenario. Each speaker is identified by a variational expectation maximization (VEM) algorithm for complex Watson mixture models and therefore directly yields beamforming vectors for a subsequent speech separation process. An observation selection criterion is proposed which improves the robustness of the source counting in noise. The algorithm is compared to an alternative VEM approach with Gaussian mixture models based on directions of arrival and shown to deliver improved source counting accuracy. The article concludes by extending the offline algorithm towards a low-latency online estimation of the number of active sources from the streaming input data.}},
  author       = {{Drude, Lukas and Chinaev, Aleksej and Tran Vu, Dang Hai and Haeb-Umbach, Reinhold}},
  booktitle    = {{14th International Workshop on Acoustic Signal Enhancement (IWAENC 2014)}},
  keywords     = {{Accuracy, Acoustics, Estimation, Mathematical model, Soruce separation, Speech, Vectors, Bayes methods, Blind source separation, Directional statistics, Number of speakers, Speaker diarization}},
  pages        = {{213--217}},
  title        = {{{Towards Online Source Counting in Speech Mixtures Applying a Variational EM for Complex Watson Mixture Models}}},
  year         = {{2014}},
}

@inproceedings{11716,
  abstract     = {{The accuracy of automatic speech recognition systems in noisy and reverberant environments can be improved notably by exploiting the uncertainty of the estimated speech features using so-called uncertainty-of-observation techniques. In this paper, we introduce a new Bayesian decision rule that can serve as a mathematical framework from which both known and new uncertainty-of-observation techniques can be either derived or approximated. The new decision rule in its direct form leads to the new significance decoding approach for Gaussian mixture models, which results in better performance compared to standard uncertainty-of-observation techniques in different additive and convolutive noise scenarios.}},
  author       = {{Abdelaziz, Ahmed H. and Zeiler, Steffen and Kolossa, Dorothea and Leutnant, Volker and Haeb-Umbach, Reinhold}},
  booktitle    = {{Acoustics, Speech and Signal Processing (ICASSP), 2013 IEEE International Conference on}},
  issn         = {{1520-6149}},
  keywords     = {{Bayes methods, Gaussian processes, convolution, decision theory, decoding, noise, reverberation, speech coding, speech recognition, Bayesian decision rule, GMM, Gaussian mixture models, additive noise scenarios, automatic speech recognition systems, convolutive noise scenarios, decoding approach, mathematical framework, reverberant environments, significance decoding, speech feature estimation, uncertainty-of-observation techniques, Hidden Markov models, Maximum likelihood decoding, Noise, Speech, Speech recognition, Uncertainty, Uncertainty-of-observation, modified imputation, noise robust speech recognition, significance decoding, uncertainty decoding}},
  pages        = {{6827--6831}},
  title        = {{{GMM-based significance decoding}}},
  doi          = {{10.1109/ICASSP.2013.6638984}},
  year         = {{2013}},
}

@article{11862,
  abstract     = {{In this contribution we extend a previously proposed Bayesian approach for the enhancement of reverberant logarithmic mel power spectral coefficients for robust automatic speech recognition to the additional compensation of background noise. A recently proposed observation model is employed whose time-variant observation error statistics are obtained as a side product of the inference of the a posteriori probability density function of the clean speech feature vectors. Further a reduction of the computational effort and the memory requirements are achieved by using a recursive formulation of the observation model. The performance of the proposed algorithms is first experimentally studied on a connected digits recognition task with artificially created noisy reverberant data. It is shown that the use of the time-variant observation error model leads to a significant error rate reduction at low signal-to-noise ratios compared to a time-invariant model. Further experiments were conducted on a 5000 word task recorded in a reverberant and noisy environment. A significant word error rate reduction was obtained demonstrating the effectiveness of the approach on real-world data.}},
  author       = {{Leutnant, Volker and Krueger, Alexander and Haeb-Umbach, Reinhold}},
  journal      = {{IEEE Transactions on Audio, Speech, and Language Processing}},
  keywords     = {{Bayes methods, compensation, error statistics, reverberation, speech recognition, Bayesian feature enhancement, background noise, clean speech feature vectors, compensation, connected digits recognition task, error statistics, memory requirements, noisy reverberant data, posteriori probability density function, recursive formulation, reverberant logarithmic mel power spectral coefficients, robust automatic speech recognition, signal-to-noise ratios, time-variant observation, word error rate reduction, Robust automatic speech recognition, model-based Bayesian feature enhancement, observation model for reverberant and noisy speech, recursive observation model}},
  number       = {{8}},
  pages        = {{1640--1652}},
  title        = {{{Bayesian Feature Enhancement for Reverberation and Noise Robust Speech Recognition}}},
  doi          = {{10.1109/TASL.2013.2258013}},
  volume       = {{21}},
  year         = {{2013}},
}

@inproceedings{11939,
  abstract     = {{In this paper a switching linear dynamical model (SLDM) approach for speech feature enhancement is improved by employing more accurate models for the dynamics of speech and noise. The model of the clean speech feature trajectory is improved by augmenting the state vector to capture information derived from the delta features. Further a hidden noise state variable is introduced to obtain a more elaborated model for the noise dynamics. Approximate Bayesian inference in the SLDM is carried out by a bank of extended Kalman filters, whose outputs are combined according to the a posteriori probability of the individual state models. Experimental results on the AURORA2 database show improved recognition accuracy.}},
  author       = {{Windmann, Stefan and Haeb-Umbach, Reinhold}},
  booktitle    = {{IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2008)}},
  keywords     = {{a posteriori probability, AURORA2 database, Bayesian inference, Bayes methods, channel bank filters, extended Kalman filter banks, hidden noise state variable, Kalman filters, noise dynamics, speech enhancement, speech feature enhancement, speech feature trajectory, switching linear dynamical model approach}},
  pages        = {{4409--4412}},
  title        = {{{Modeling the dynamics of speech and noise for speech feature enhancement in ASR}}},
  doi          = {{10.1109/ICASSP.2008.4518633}},
  year         = {{2008}},
}

@article{11870,
  abstract     = {{We derive a class of computationally inexpensive linear dimension reduction criteria by introducing a weighted variant of the well-known K-class Fisher criterion associated with linear discriminant analysis (LDA). It can be seen that LDA weights contributions of individual class pairs according to the Euclidean distance of the respective class means. We generalize upon LDA by introducing a different weighting function}},
  author       = {{Loog, M. and Duin, R.P.W. and Haeb-Umbach, Reinhold}},
  journal      = {{IEEE Transactions on Pattern Analysis and Machine Intelligence}},
  keywords     = {{approximate pairwise accuracy, Bayes error, Bayes methods, error statistics, Euclidean distance, Fisher criterion, linear dimension reduction, linear discriminant analysis, pattern classification, statistical analysis, statistical pattern classification, weighting function}},
  number       = {{7}},
  pages        = {{762--766}},
  title        = {{{Multiclass linear dimension reduction by weighted pairwise Fisher criteria}}},
  doi          = {{10.1109/34.935849}},
  volume       = {{23}},
  year         = {{2001}},
}

