@article{11807, author = {{Herbig, Tobias and Gerl, Franz and Minker, Wolfgang and Haeb-Umbach, Reinhold}}, journal = {{Evolving Systems}}, number = {{3}}, pages = {{199--214}}, title = {{{Adaptive Systems for Unsupervised Speaker Tracking and Speech Recognition}}}, volume = {{2}}, year = {{2011}}, } @inbook{11843, abstract = {{Employing automatic speech recognition systems in hands-free communication applications is accompanied by perfomance degradation due to background noise and, in particular, due to reverberation. These two kinds of distortion alter the shape of the feature vector trajectory extracted from the microphone signal and consequently lead to a discrepancy between training and testing conditions for the recognizer. In this chapter we present a feature enhancement approach aiming at the joint compensation of noise and reverberation to improve the performance by restoring the training conditions. For the enhancement we concentrate on the logarithmic mel power spectral coefficients as features, which are computed at an intermediate stage to obtain the widely used mel frequency cepstral coefficients. The proposed technique is based on a Bayesian framework, to attempt to infer the posterior distribution of the clean features given the observation of all past corrupted features. It exploits information from a priori models describing the dynamics of clean speech and noise-only feature vector trajectories as well as from an observation model relating the reverberant noisy to the clean features. The observation model relies on a simplified stochastic model of the room impulse response (RIR) between the speaker and the microphone, having only two parameters, namely RIR energy and reverberation time, which can be estimated from the captured microphone signal. The performance of the proposed enhancement technique is finally experimentally studied by means of recognition accuracy obtained for a connected digits recognition task under different noise and reverberation conditions using the Aurora~5 database.}}, author = {{Krueger, Alexander and Haeb-Umbach, Reinhold}}, booktitle = {{Robust Speech Recognition of Uncertain or Missing Data}}, editor = {{Haeb-Umbach, Reinhold and Kolossa, Dorothea}}, publisher = {{Springer}}, title = {{{A Model-Based Approach to Joint Compensation of Noise and Reverberation for Speech Recognition}}}, year = {{2011}}, } @inproceedings{11845, abstract = {{The paper proposes a modification of the standard maximum a posteriori (MAP) method for the estimation of the parameters of a Gaussian process for cases where the process is superposed by additive Gaussian observation errors of known variance. Simulations on artificially generated data demonstrate the superiority of the proposed method. While reducing to the ordinary MAP approach in the absence of observation noise, the improvement becomes the more pronounced the larger the variance of the observation noise. The method is further extended to track the parameters in case of non-stationary Gaussian processes.}}, author = {{Krueger, Alexander and Haeb-Umbach, Reinhold}}, booktitle = {{IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2011)}}, keywords = {{Gaussian processes, MAP-based estimation, maximum a posteriori method, maximum likelihood estimation, nonstationary Gaussian processes}}, pages = {{3596--3599}}, title = {{{MAP-based estimation of the parameters of non-stationary Gaussian processes from noisy observations}}}, doi = {{10.1109/ICASSP.2011.5946256}}, year = {{2011}}, } @article{11850, abstract = {{In this paper, we present a novel blocking matrix and fixed beamformer design for a generalized sidelobe canceler for speech enhancement in a reverberant enclosure. They are based on a new method for estimating the acoustical transfer function ratios in the presence of stationary noise. The estimation method relies on solving a generalized eigenvalue problem in each frequency bin. An adaptive eigenvector tracking utilizing the power iteration method is employed and shown to achieve a high convergence speed. Simulation results demonstrate that the proposed beamformer leads to better noise and interference reduction and reduced speech distortions compared to other blocking matrix designs from the literature.}}, author = {{Krueger, Alexander and Warsitz, Ernst and Haeb-Umbach, Reinhold}}, journal = {{IEEE Transactions on Audio, Speech, and Language Processing}}, keywords = {{acoustical transfer function ratio, adaptive eigenvector tracking, array signal processing, beamformer design, blocking matrix, eigenvalues and eigenfunctions, eigenvector-based transfer function ratios estimation, generalized sidelobe canceler, interference reduction, iterative methods, power iteration method, reduced speech distortions, reverberant enclosure, reverberation, speech enhancement, stationary noise}}, number = {{1}}, pages = {{206--219}}, title = {{{Speech Enhancement With a GSC-Like Structure Employing Eigenvector-Based Transfer Function Ratios Estimation}}}, doi = {{10.1109/TASL.2010.2047324}}, volume = {{19}}, year = {{2011}}, } @inbook{11856, abstract = {{In this contribution, conditional Bayesian estimation employing a phase-sensitive observation model for noise robust speech recognition will be studied. After a review of speech recognition under the presence of corrupted features, termed uncertainty decoding, the estimation of the posterior distribution of the uncorrupted (clean) feature vector will be shown to be a key element of noise robust speech recognition. The estimation process will be based on three major components: an a priori model of the unobservable data, an observation model relating the unobservable data to the corrupted observation and an inference algorithm, finally allowing for a computationally tractable solution. Special stress will be laid on a detailed derivation of the phase-sensitive observation model and the required moments of the phase factor distribution. Thereby, it will not only be proven analytically that the phase factor distribution is non-Gaussian but also that all central moments can (approximately) be computed solely based on the used mel filter bank, finally rendering the moments independent of noise type and signal-to-noise ratio. The phase-sensitive observation model will then be incorporated into a model-based feature enhancement scheme and recognition experiments will be carried out on the Aurora~2 and Aurora~4 databases. The importance of incorporating phase factor information into the enhancement scheme is pointed out by all recognition results. Application of the proposed scheme under the derived uncertainty decoding framework further leads to significant improvements in both recognition tasks, eventually reaching the performance achieved with the ETSI advanced front-end.}}, author = {{Leutnant, Volker and Haeb-Umbach, Reinhold}}, booktitle = {{Robust Speech Recognition of Uncertain or Missing Data}}, editor = {{Haeb-Umbach, Reinhold and Kolossa, Dorothea}}, publisher = {{Springer}}, title = {{{Conditional Bayesian Estimation Employing a Phase-Sensitive Observation Model for Noise Robust Speech Recognition}}}, year = {{2011}}, } @inproceedings{11866, abstract = {{In this work, a splitting and weighting scheme that allows for splitting a Gaussian density into a Gaussian mixture density (GMM) is extended to allow the mixture components to be arranged along arbitrary directions. The parameters of the Gaussian mixture are chosen such that the GMM and the original Gaussian still exhibit equal central moments up to an order of four. The resulting mixtures{\rq} covariances will have eigenvalues that are smaller than those of the covariance of the original distribution, which is a desirable property in the context of non-linear state estimation, since the underlying assumptions of the extended K ALMAN filter are better justified in this case. Application to speech feature enhancement in the context of noise-robust automatic speech recognition reveals the beneficial properties of the proposed approach in terms of a reduced word error rate on the Aurora 2 recognition task.}}, author = {{Leutnant, Volker and Krueger, Alexander and Haeb-Umbach, Reinhold}}, booktitle = {{Interspeech 2011}}, title = {{{A versatile Gaussian splitting approach to non-linear state estimation and its application to noise-robust ASR}}}, year = {{2011}}, } @inproceedings{11911, abstract = {{In this paper we address the problem of initial seed selection for frequency domain iterative blind speech separation (BSS) algorithms. The derivation of the seeding algorithm is guided by the goal to select samples which are likely to be caused by source activity and not by noise and at the same time originate from different sources. The proposed algorithm has moderate computational complexity and finds better seed values than alternative schemes, as is demonstrated by experiments on the database of the SiSEC2010 challenge.}}, author = {{Tran Vu, Dang Hai and Haeb-Umbach, Reinhold}}, booktitle = {{Interspeech 2011}}, title = {{{On Initial Seed Selection for Frequency Domain Blind Speech Separation}}}, year = {{2011}}, } @book{11945, editor = {{Kolossa, Dorothea and Haeb-Umbach, Reinhold}}, publisher = {{Springer}}, title = {{{Robust Speech Recognition of Uncertain or Missing Data --- Theory and Applications}}}, year = {{2011}}, } @inproceedings{11973, author = {{Ayaz, Serkan and Hoffmann, Felix and Sommer, Christoph and German, Reinhard and Dressler, Falko}}, booktitle = {{2010 IEEE Global Telecommunications Conference GLOBECOM 2010}}, isbn = {{9781424456369}}, title = {{{Performance Evaluation of Network Mobility Handover over Future Aeronautical Data Link}}}, doi = {{10.1109/glocom.2010.5684106}}, year = {{2011}}, } @article{11993, author = {{Dressler, Falko and Sommer, Christoph and Eckhoff, David and Tonguz, Ozan}}, issn = {{1556-6072}}, journal = {{IEEE Vehicular Technology Magazine}}, pages = {{43--51}}, title = {{{Toward Realistic Simulation of Intervehicle Communication}}}, doi = {{10.1109/mvt.2011.941898}}, year = {{2011}}, }