@article{11820,
  abstract     = {{In this paper, we derive an uncertainty decoding rule for automatic speech recognition (ASR), which accounts for both corrupted observations and inter-frame correlation. The conditional independence assumption, prevalent in hidden Markov model-based ASR, is relaxed to obtain a clean speech posterior that is conditioned on the complete observed feature vector sequence. This is a more informative posterior than one conditioned only on the current observation. The novel decoding is used to obtain a transmission-error robust remote ASR system, where the speech capturing unit is connected to the decoder via an error-prone communication network. We show how the clean speech posterior can be computed for communication links being characterized by either bit errors or packet loss. Recognition results are presented for both distributed and network speech recognition, where in the latter case common voice-over-IP codecs are employed.}},
  author       = {{Ion, Valentin and Haeb-Umbach, Reinhold}},
  journal      = {{IEEE Transactions on Audio, Speech, and Language Processing}},
  keywords     = {{automatic speech recognition, bit errors, codecs, communication links, corrupted observations, decoding, distributed speech recognition, error-prone communication network, feature vector sequence, hidden Markov model-based ASR, hidden Markov models, inter-frame correlation, Internet telephony, network speech recognition, packet loss, speech posterior, speech recognition, transmission error robust speech recognition, uncertainty decoding, voice-over-IP codecs}},
  number       = {{5}},
  pages        = {{1047--1060}},
  title        = {{{A Novel Uncertainty Decoding Rule With Applications to Transmission Error Robust Speech Recognition}}},
  doi          = {{10.1109/TASL.2008.925879}},
  volume       = {{16}},
  year         = {{2008}},
}

@inproceedings{11824,
  abstract     = {{Soft-feature based speech recognition, which is an example of uncertainty decoding, has been proven to be a robust error mitigation method for distributed speech recognition over wireless channels exhibiting bit errors. In this paper we extend this concept to packet-oriented transmissions. The a posteriori probability density function of the lost feature vector, given the closest received neighbours, is computed. In the experiments, the nearest frame repetition, which is shown to be equivalent to the MAP estimate, outperforms the MMSE estimate for long bursts. Taking the variance into account at the speech recognition stage results in superior performance compared to classical schemes using point estimates. A computationally and memory efficient implementation of the proposed packet loss compensation scheme based on table lookup is presented}},
  author       = {{Ion, Valentin and Haeb-Umbach, Reinhold}},
  booktitle    = {{IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2006)}},
  keywords     = {{distributed speech recognition, least mean squares methods, MAP estimate, maximum likelihood estimation, MMSE estimate, packet loss compensation scheme, packet switched communication, posteriori probability density function, robust error mitigation method, soft-features, speech recognition, table lookup, voice communication, wireless channels}},
  pages        = {{I}},
  title        = {{{An Inexpensive Packet Loss Compensation Scheme for Distributed Speech Recognition Based on Soft-Features}}},
  doi          = {{10.1109/ICASSP.2006.1659984}},
  volume       = {{1}},
  year         = {{2006}},
}

