@article{11825,
  abstract     = {{In this paper, we propose an enhanced error concealment strategy at the server side of a distributed speech recognition (DSR) system, which is fully compatible with the existing DSR standard. It is based on a Bayesian approach, where the a posteriori probability density of the error-free feature vector is computed, given all received feature vectors which are possibly corrupted by transmission errors. Rather than computing a point estimate, such as the MMSE estimate, and plugging it into the Bayesian decision rule, we employ uncertainty decoding, which results in an integration over the uncertainty in the feature domain. In a typical scenario the communication between the thin client, often a mobile device, and the recognition server spreads across heterogeneous networks. Both bit errors on circuit-switched links and lost data packets on IP connections are mitigated by our approach in a unified manner. The experiments reveal improved robustness both for small- and large-vocabulary recognition tasks.}},
  author       = {{Ion, Valentin and Haeb-Umbach, Reinhold}},
  journal      = {{Speech Communication}},
  keywords     = {{Channel error robustness, Distributed speech recognition, Soft features, Uncertainty decoding}},
  number       = {{11}},
  pages        = {{1435--1446}},
  title        = {{{Uncertainty decoding for distributed speech recognition over error-prone networks}}},
  doi          = {{10.1016/j.specom.2006.03.007}},
  volume       = {{48}},
  year         = {{2006}},
}

@inproceedings{11826,
  abstract     = {{The accuracy of distributed speech recognition has been shown to be very sensitive to errors occurring during transmission. One reason for this is that the classifier, usually trained under error free conditions, is unable to cope with the mismatch between an error free and error prone channel. In this paper we present a novel decision rule for classification which is able to account for channel errors. To achieve this, the classical Bayesian speech recognition approach has been reformulated for the server side, where the observation is known only to the extent, as is given by its a posteriori density function. We present a method to estimate the a posteriori density which is based on a Markov model of the source, which captures correlations of both static and dynamic features. A practical implementation is given, accompanied by experimental results for distributed speech recognition over an IP-network.}},
  author       = {{Ion, Valentin and Haeb-Umbach, Reinhold}},
  booktitle    = {{Interspeech 2006}},
  title        = {{{Improved Source Modeling and Predictive Classification for Channel Robust Speech Recognition}}},
  year         = {{2006}},
}

@inproceedings{11884,
  abstract     = {{In this paper we present the design of a particle filter for post filtering instantaneous positioning estimates of GSM mobile terminals. The instantaneous estimates are obtained by comparing signal power levels, which are reported by the mobile terminal to the base station, with a database of predictions using a novel statistically motivated similarity measure. Unlike a simple Euclidian distance measure, the proposed scheme incorporates inherent information about signal power level measurements requested by the serving base station but not reported by the mobile terminal. Furthermore, we show how the Monte Carlo method of particle filtering helps to obtain better position estimates and, surprisingly, also helps to reduce the computational complexity. Results are presented for real field data.}},
  author       = {{Peschke, Sven and Haeb-Umbach, Reinhold}},
  booktitle    = {{European Navigation Conference \& Exhibition (ENC 2006)}},
  title        = {{{A Probabilistic Similarity Measure and a Non-Linear Post-Filter for Mobile Phone Positioning using GSM Signal Power Measurements}}},
  year         = {{2006}},
}

@inproceedings{11885,
  abstract     = {{In this paper we present a novel and statistically motivated similarity measure for database assisted positioning of GSM mobile terminals by evaluating signal power level reports which are transmitted regulary. Unlike a simple Euclidian distance measure, the proposed scheme incorporates inherent information about signal power level measurements requested by the serving base station but not reported by the mobile terminal. Furthermore we show how the Monte Carlo method of nonlinear post filtering using particle filtering helps to obtain better position estimates and surprisingly also helps to reduce the computational complexity. Results are presented for real field data.}},
  author       = {{Peschke, Sven and Haeb-Umbach, Reinhold}},
  booktitle    = {{3rd Workshop on Positioning Navigation and Communication (WPNC 2006)}},
  title        = {{{Particle Filtering of Database assisted Positioning Estimates using a novel Similarity Measure for GSM Signal Power Level Measurements}}},
  year         = {{2006}},
}

@inproceedings{11928,
  abstract     = {{Broadband adaptive beamformers, which use a narrowband SNR-maximization optimization criterion for noise reduction, typically cause distortions of the desired speech signal at the beamformer output. In this paper two methods are investigated to control the speech distortion by comparing the eigenvector beamformer with a maximum likelihood beamformer: One is an analytic solution for the ideal case of absence of reverberation and the other one is a statistically motivated approach. We use the recently introduced gradient-ascent algorithm for adaptive principal eigenvector beamforming and then normalize the filter coefficients by the proposed distortion control methods. Experimental results in terms of the achievable SNR gain and a perceptual speech quality measure are given for the normalized eigenvector beamformer and are compared to standard beamforming methods.}},
  author       = {{Warsitz, Ernst and Haeb-Umbach, Reinhold}},
  booktitle    = {{32. Deutsche Jahrestagung fuer Akustik (DAGA 2006)}},
  title        = {{{Mehrkanalige Sprachsignalverarbeitung durch adaptives Eigenbeamforming fuer Freisprecheinrichtungen im Kraftfahrzeug}}},
  year         = {{2006}},
}

@inproceedings{11929,
  abstract     = {{Broadband adaptive beamformers, which use a narrowband SNR-maximization optimization criterion for noise reduction, typically cause distortions of the desired speech signal at the beamformer output. In this paper two methodsare investigated to control the speech distortion by comparing the eigenvector beamformer with a maximum likelihood beamformer: One is an analytic solution for the ideal case of absence of reverberation and the other one is a statistically motivated approach. We use the recently introduced gradient-ascent algorithm for adaptive principal eigenvector beamforming and then normalize the filter coefficient s by the proposed distortion control methods. Experimental results in terms of the achievable SNR gain and a perceptual speech quality measure are given for the normalized eigenvector beamformer and are compared to standard beamforming methods.}},
  author       = {{Warsitz, Ernst and Haeb-Umbach, Reinhold}},
  booktitle    = {{International Workshop on Acoustic Echo and Noise Control (IWAENC 2006)}},
  title        = {{{Controlling Speech Distortion in Adaptive Frequency-Domain Principal Eigenvector Beamforming}}},
  year         = {{2006}},
}

@inproceedings{11942,
  abstract     = {{Es wird ein marginalisiertes Partikelfilter beschrieben, das zur einkanaligen Sprachsignalverbesserung mit einem nichtlinearen dynamischen Zustandsmodell eingesetzt werden soll. Das System besteht aus einem Partikelfilter zum Tracking von LSP-Parametern und einem Kalman-Filter fuer jedes Partikel, das zur Sprachsignalverbesserung verwendet wird. In unserem Ansatz wird angenommen, dass die Parameter in kurzen Sprachsignalbloecken konstant sind, waehrend das Sprachsignal sich mit jedem Abtastwert aendert. Bei weissem Rauschen werden aehnliche SNR-Gewinne wie mit einem Kalman-EM-iterative Algorithmus erzielt, waehrend das Hintergrundrauschen und die Log-spektrale Distanz etwas geringer sind. Mit einem erweiterten Zustandsmodell wurden auch Untersuchungen fuer farbiges Rauschen durchgefuehrt.}},
  author       = {{Windmann, Stefan and Haeb-Umbach, Reinhold}},
  booktitle    = {{7. ITG-Fachtagung Sprachkommunikation}},
  title        = {{{Einkanalige Sprachsignalverbesserung mit Hilfe eines marginalisierten Partikelfilters}}},
  year         = {{2006}},
}

@inproceedings{11943,
  abstract     = {{A marginalized particle filter is proposed for performing single channel speech enhancement with a non-linear dynamic state model. The system consists of a particle filter for tracking line spectral pair (LSP) parameters and a Kalman filter per particle for speech enhancement. The state model for the LSPs has been learnt on clean speech training data. In our approach parameters and speech samples are processed at different time scales by assuming the parameters to be constant for small blocks of data. Further enhancement is obtained by an iteration which can be applied on these small blocks. The experiments show that similar SNR gains are obtained as with the Kalman-LM-iterative algorithm. However better values of the noise level and the log-spectral distance are achieved}},
  author       = {{Windmann, Stefan and Haeb-Umbach, Reinhold}},
  booktitle    = {{IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2006)}},
  keywords     = {{clean speech training data, iterative methods, iterative speech enhancement, Kalman filter, Kalman filters, Kalman-LM-iterative algorithm, line spectral pair parameters, log-spectral distance, marginalized particle filter, noise level, nonlinear dynamic state speech model, particle filtering (numerical methods), single channel speech enhancement, SNR gains, speech enhancement, speech samples}},
  pages        = {{I}},
  title        = {{{Iterative Speech Enhancement using a Non-Linear Dynamic State Model of Speech and its Parameters}}},
  doi          = {{10.1109/ICASSP.2006.1660058}},
  volume       = {{1}},
  year         = {{2006}},
}

@inproceedings{11894,
  abstract     = {{In this paper we consider the problem of detecting speaker changes in audio signals recorded by distant microphones. It is shown that the possibility to exploit the spatial separation of speakers more than makes up the degradation in detection accuracy due to the increased source-to-sensor distance compared to close-talking microphones. Speaker direction information is derived from the filter coefficients of an adaptive Filter-and-Sum Beamformer and is combined with BIC analysis. The experimental results reveal significant improvements compared to BIC-only change detection, be it with the distant or close-talking microphone.}},
  author       = {{Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}},
  booktitle    = {{Interspeech 2006}},
  title        = {{{Online Speaker Change Detection by Combining BIC with Microphone Array Beamforming}}},
  year         = {{2006}},
}

@inproceedings{11803,
  abstract     = {{In this paper we propose a novel adaptation algorithm for Filter-and-Sum beamforming in spatially correlated noise. Deterministic and stochastic gradient ascent algorithms are derived from a constrained optimization problem, which iteratively estimate the principal eigenvecto r of a generalized eigenvalue problem. The method does not require an explicit estimation of the speaker location. It is shown that the well-known Delay-and-Sum beamformer and the previously introduced Filter-and-Sum beamformer in spatially white noise are obtained as special cases. Further, bounds on the maximally achievable SNR gains are derived and it is shown that the proposed adaptation algorithm is able to approach these performance bounds.}},
  author       = {{Haeb-Umbach, Reinhold and Warsitz, Ernst}},
  booktitle    = {{International Workshop on Acoustic Echo and Noise Control (IWAENC 2005)}},
  title        = {{{Adaptive Filter-and-Sum Beamforming in Spatially Correlated Noise}}},
  year         = {{2005}},
}

@inproceedings{11827,
  abstract     = {{The transmission errors in a wireless or packet oriented network may dramatically decrease the performance of a distributed speech recognition DSR) system. Error concealment has been shown to be an effective way to mantain an acceptable word error rate when dealing with error prone communication channels. In this paper we propose an extension of our previously introduced soft features approach for the case that the soft-output of the channel decoder is not available at the server side of the DSR system. We found a simple method to estimate bit reliability information which still gives good speech recognition results. It is shown that some other error concealment schemes turn out to be special cases of the method proposed here.}},
  author       = {{Ion, Valentin and Haeb-Umbach, Reinhold}},
  booktitle    = {{Interspeech 2005}},
  title        = {{{A Unified Probabilistic Approach to Error Concealment for Distributed Speech Recognition}}},
  year         = {{2005}},
}

@inproceedings{11828,
  abstract     = {{In this paper we present a comparison of the recently proposed Soft-Feature Distributed Speech Recognition (SFDSR) with the two evaluated candidate codecs for Speech Enabled Services over wireless networks: Adaptive Multirate Codec (AMR) and the ETSI Extended Advanced Front-End for Distributed Speech Recognition (XAFE). It is shown that SFDSR achieves the best recognition performance on a simulated GSM transmission, followed by XAFE and AMR.We also present some new results concerning SFDSR which demonstrate the versatility of the approach. Further, a simple method is introduced which considerably reduces the computational effort.}},
  author       = {{Ion, Valentin and Haeb-Umbach, Reinhold}},
  booktitle    = {{IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2005)}},
  keywords     = {{adaptive codes, adaptive multirate codec, AMR, distributed speech recognition, ETSI, extended advanced front-end, recognition performance, SFDSR, simulated GSM transmission, soft-feature distributed speech recognition, speech codecs, speech coding, speech recognition, variable rate codes, XAFE}},
  pages        = {{333--336}},
  title        = {{{A Comparison of Soft-Feature Distributed Speech Recognition with Candidate Codecs for Speech Enabled Mobile Services}}},
  doi          = {{10.1109/ICASSP.2005.1415118}},
  volume       = {{1}},
  year         = {{2005}},
}

@inproceedings{11930,
  abstract     = {{For human-machine interfaces in distant-talking environments multichannel signal processing is often employed to obtain an enhanced signal for subsequent processing. In this paper we propose a novel adaptation algorithm for a filter-and-sum beamformer to adjust the coefficients of FIR filters to changing acoustic room impulses, e.g. due to speaker movement. A deterministic and a stochastic gradient ascent algorithm are derived from a constrained optimization problem, which iteratively estimates the eigenvector corresponding to the largest eigenvalue of the cross power spectral density of the microphone signals. The method does not require an explicit estimation of the speaker location. The experimental results show fast adaptation and excellent robustness of the proposed algorithm.}},
  author       = {{Warsitz, Ernst and Haeb-Umbach, Reinhold}},
  booktitle    = {{IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2005)}},
  keywords     = {{acoustic filter-and-sum beamforming, acoustic room impulses, acoustic signal processing, adaptive principal component analysis, adaptive signal processing, architectural acoustics, constrained optimization problem, cross power spectral density, deterministic algorithm, deterministic algorithms, distant-talking environments, eigenvalues and eigenfunctions, eigenvector, enhanced signal, filter-and-sum beamformer, FIR filter coefficients, FIR filter coefficients, FIR filters, gradient methods, human-machine interfaces, iterative estimation, iterative methods, largest eigenvalue, microphone signals, multichannel signal processing, optimisation, principal component analysis, spectral analysis, stochastic gradient ascent algorithm, stochastic processes}},
  pages        = {{iv/797--iv/800 Vol. 4}},
  title        = {{{Acoustic filter-and-sum beamforming by adaptive principal component analysis}}},
  doi          = {{10.1109/ICASSP.2005.1416129}},
  volume       = {{4}},
  year         = {{2005}},
}

@inproceedings{11802,
  author       = {{Haeb-Umbach, Reinhold and Schmalenstroeer, Joerg}},
  booktitle    = {{Interspeech 2005}},
  title        = {{{Speech Processing in the Networked Home Environment - A View on the Amigo Project}}},
  year         = {{2005}},
}

@inproceedings{11801,
  author       = {{Haeb-Umbach, Reinhold and Schmalenstroeer, Joerg}},
  booktitle    = {{Interspeech 2005}},
  title        = {{{A Comparison of Particle Filtering Variants for Speech Feature Enhancement}}},
  year         = {{2005}},
}

@article{11732,
  abstract     = {{Satellite positioning systems, such as GPS or the future European system Galileo, employ direct-sequence spread-spectrum signals. The positioning accuracy is strongly affected by the quality of the pseudo range measurements. These measurements necessitate code and carrier synchronization of the received signal with the internally generated reference signals. In this type of systems one major error source is the multipath phenomenon, which results in a sum of delayed and weighted copies of the original signal to be present at the receiver input. This can result in a systematic error of the code tracking loop resulting in range errors in the order of several tens of meters. In this paper we propose an extension of the standard code tracking loop capable of estimating the parameters of the line-of-sight (LOS) signal and separating the LOS from the reflected signal portions. It is based on an analysis of the cross correlation of the received signal with a locally generated code sequence in the vicinity of the tracking point of a Delay-Locked Loop (DLL). For this reason, we call this method Cross Correlation Function (CCF) Analysis. The proposed method achieves considerably more accurate estimates than a DLL. Its performance is comparable to the Multipath Estimating Delay-Locked Loop (MEDLL) which is considered to be the best method for reducing multipath induced errors, so far. However, the computational complexity of the CCF Analysis is by a factor of three smaller compared to the MEDLL. Extensive simulations have been conducted for the proposed method and the MEDLL in order to assess the robustness of the two approaches under various signal constellations.}},
  author       = {{Bischoff, R. and Haeb-Umbach, Reinhold and Nammi, Sai Ramesh}},
  journal      = {{AEUe, Int. Journal on Electronics and Communications}},
  number       = {{1}},
  title        = {{{Multipath-Resistant Time of Arrival Estimation for Satellite Positioning}}},
  volume       = {{58}},
  year         = {{2004}},
}

@inproceedings{11790,
  abstract     = {{A major drawback of distributed versus terminal-based speech recognition is the fact that transmission errors can lead to degraded recognition performance. In this paper we employ soft features to mitigate the effect of bit errors on wireless transmission links: At the receiver a posteriori probabilities of the transmitted feature vectors are computed by combining bit reliability information provided by the channel decoder and a priori knowledge about residual redundancy in the feature vectors. While the first-order moment of the a posteriori probability function is the MMSE estimate, the second-order moment is a measure of the uncertainty in the reconstructed features. We conducted realistic simulations of GSM transmission and achieved significant improvements in word accuracy compared to the error mitigation strategy described in the ETSI standard.}},
  author       = {{Haeb-Umbach, Reinhold and Ion, Valentin}},
  booktitle    = {{International Conference on Spoken Language Processing (ICSLP 2004)}},
  title        = {{{Soft Features for Improved Distributed Speech Recognition over Wireless Networks}}},
  year         = {{2004}},
}

@inproceedings{11931,
  abstract     = {{The paper is concerned with binaural signal processing for a bimodal human-robot interface with hearing and vision. The two microphone signals are processed to obtain an enhanced single-channel input signal for the subsequent speech recognizer and to localize the acoustic source, an important information for establishing a natural human-robot communication. We utilize a robust adaptive algorithm for filter-and-sum beamforming (FSB) and extract speaker direction information from the resulting FIR filter coefficients. Further, particle filtering is applied which conducts a nonlinear Bayesian tracking of speaker movement. Good location accuracy can be achieved even in highly reverberant environments. The results obtained outperform the conventional generalized cross correlation (GCC) method.}},
  author       = {{Warsitz, Ernst and Haeb-Umbach, Reinhold}},
  booktitle    = {{IEEE Workshop on Multimedia Signal Processing (MMSP 2004)}},
  keywords     = {{bimodal human-robot interface, binaural signal processing, enhanced single-channel input signal, filter-and-sum beamforming, filtering theory, FIR filter coefficient, generalized cross correlation method, microphones, microphone signal, nonlinear Bayesian tracking, particle filtering, robust adaptive algorithm, robust speaker direction estimation, signal processing, speech enhancement, speech recognition, speech recognizer, user interfaces}},
  pages        = {{367--370}},
  title        = {{{Robust speaker direction estimation with particle filtering}}},
  doi          = {{10.1109/MMSP.2004.1436569}},
  year         = {{2004}},
}

@inproceedings{11932,
  abstract     = {{While the main objective of adaptive Filter-and-Sum beamforming is to obtain an enhanced speech signal for subsequent processing like speech recognition, we show how speaker localization information can be derived from the filter coefficients. To increase localization accuracy, speaker tracking is performed by non-linear Bayesian state estimation, which is realized by sequential Monte Carlo methods. Improved acquisition and tracking performance was achieved even in highly reverberant environments, in comparison with both a Kalman Filter and a recently proposed Particle Filter operating on the output of a nonadaptive Delay-and-Sum beamformer.}},
  author       = {{Warsitz, Ernst and Haeb-Umbach, Reinhold and Peschke, Sven}},
  booktitle    = {{International Conference on Spoken Language Processing (ICSLP 2004)}},
  title        = {{{Adaptive Beamforming Combined with Particle Filtering for Acoustic Source Localization}}},
  year         = {{2004}},
}

@article{11777,
  author       = {{Haeb-Umbach, Reinhold}},
  journal      = {{Forschungsforum Paderborn}},
  pages        = {{68--71}},
  title        = {{{Auf ein Wort - Moeglichkeiten und Grenzen der automatischen Spracherkennung}}},
  year         = {{2003}},
}

