@article{33398,
  author       = {{Mulder, E. and Clément, G. and Linnarsson, D. and Paloski, W. H. and Wuyts, F. P. and Zange, J. and Frings-Meuthen, P. and Johannes, B. and Shushakov, V. and Grunewald, M. and Maassen, N. and Bühlmeier, Judith and Rittweger, J.}},
  issn         = {{1439-6319}},
  journal      = {{European Journal of Applied Physiology}},
  keywords     = {{Physiology (medical), Public Health, Environmental and Occupational Health, Orthopedics and Sports Medicine, General Medicine, Public Health, Environmental and Occupational Health, Physiology}},
  number       = {{4}},
  pages        = {{727--738}},
  publisher    = {{Springer Science and Business Media LLC}},
  title        = {{{Musculoskeletal effects of 5 days of bed rest with and without locomotion replacement training}}},
  doi          = {{10.1007/s00421-014-3045-0}},
  volume       = {{115}},
  year         = {{2014}},
}

@inproceedings{11716,
  abstract     = {{The accuracy of automatic speech recognition systems in noisy and reverberant environments can be improved notably by exploiting the uncertainty of the estimated speech features using so-called uncertainty-of-observation techniques. In this paper, we introduce a new Bayesian decision rule that can serve as a mathematical framework from which both known and new uncertainty-of-observation techniques can be either derived or approximated. The new decision rule in its direct form leads to the new significance decoding approach for Gaussian mixture models, which results in better performance compared to standard uncertainty-of-observation techniques in different additive and convolutive noise scenarios.}},
  author       = {{Abdelaziz, Ahmed H. and Zeiler, Steffen and Kolossa, Dorothea and Leutnant, Volker and Haeb-Umbach, Reinhold}},
  booktitle    = {{Acoustics, Speech and Signal Processing (ICASSP), 2013 IEEE International Conference on}},
  issn         = {{1520-6149}},
  keywords     = {{Bayes methods, Gaussian processes, convolution, decision theory, decoding, noise, reverberation, speech coding, speech recognition, Bayesian decision rule, GMM, Gaussian mixture models, additive noise scenarios, automatic speech recognition systems, convolutive noise scenarios, decoding approach, mathematical framework, reverberant environments, significance decoding, speech feature estimation, uncertainty-of-observation techniques, Hidden Markov models, Maximum likelihood decoding, Noise, Speech, Speech recognition, Uncertainty, Uncertainty-of-observation, modified imputation, noise robust speech recognition, significance decoding, uncertainty decoding}},
  pages        = {{6827--6831}},
  title        = {{{GMM-based significance decoding}}},
  doi          = {{10.1109/ICASSP.2013.6638984}},
  year         = {{2013}},
}

@inproceedings{11841,
  abstract     = {{Recently, substantial progress has been made in the field of reverberant speech signal processing, including both single- and multichannel de-reverberation techniques, and automatic speech recognition (ASR) techniques robust to reverberation. To evaluate state-of-the-art algorithms and obtain new insights regarding potential future research directions, we propose a common evaluation framework including datasets, tasks, and evaluation metrics for both speech enhancement and ASR techniques. The proposed framework will be used as a common basis for the REVERB (REverberant Voice Enhancement and Recognition Benchmark) challenge. This paper describes the rationale behind the challenge, and provides a detailed description of the evaluation framework and benchmark results.}},
  author       = {{Kinoshita, Keisuke and Delcroix, Marc and Yoshioka, Takuya and Nakatani, Tomohiro and Habets, Emanuel and Haeb-Umbach, Reinhold and Leutnant, Volker and Sehr, Armin and Kellermann, Walter and Maas, Roland and Gannot, Sharon and Raj, Bhiksha}},
  booktitle    = {{ IEEE Workshop on Applications of Signal Processing to Audio and Acoustics }},
  keywords     = {{Reverberant speech, dereverberation, ASR, evaluation, challenge}},
  pages        = {{ 22--23 }},
  title        = {{{The reverb challenge: a common evaluation framework for dereverberation and recognition of reverberant speech}}},
  year         = {{2013}},
}

@article{11862,
  abstract     = {{In this contribution we extend a previously proposed Bayesian approach for the enhancement of reverberant logarithmic mel power spectral coefficients for robust automatic speech recognition to the additional compensation of background noise. A recently proposed observation model is employed whose time-variant observation error statistics are obtained as a side product of the inference of the a posteriori probability density function of the clean speech feature vectors. Further a reduction of the computational effort and the memory requirements are achieved by using a recursive formulation of the observation model. The performance of the proposed algorithms is first experimentally studied on a connected digits recognition task with artificially created noisy reverberant data. It is shown that the use of the time-variant observation error model leads to a significant error rate reduction at low signal-to-noise ratios compared to a time-invariant model. Further experiments were conducted on a 5000 word task recorded in a reverberant and noisy environment. A significant word error rate reduction was obtained demonstrating the effectiveness of the approach on real-world data.}},
  author       = {{Leutnant, Volker and Krueger, Alexander and Haeb-Umbach, Reinhold}},
  journal      = {{IEEE Transactions on Audio, Speech, and Language Processing}},
  keywords     = {{Bayes methods, compensation, error statistics, reverberation, speech recognition, Bayesian feature enhancement, background noise, clean speech feature vectors, compensation, connected digits recognition task, error statistics, memory requirements, noisy reverberant data, posteriori probability density function, recursive formulation, reverberant logarithmic mel power spectral coefficients, robust automatic speech recognition, signal-to-noise ratios, time-variant observation, word error rate reduction, Robust automatic speech recognition, model-based Bayesian feature enhancement, observation model for reverberant and noisy speech, recursive observation model}},
  number       = {{8}},
  pages        = {{1640--1652}},
  title        = {{{Bayesian Feature Enhancement for Reverberation and Noise Robust Speech Recognition}}},
  doi          = {{10.1109/TASL.2013.2258013}},
  volume       = {{21}},
  year         = {{2013}},
}

@inproceedings{11917,
  abstract     = {{In this paper we present a speech presence probability (SPP) estimation algorithmwhich exploits both temporal and spectral correlations of speech. To this end, the SPP estimation is formulated as the posterior probability estimation of the states of a two-dimensional (2D) Hidden Markov Model (HMM). We derive an iterative algorithm to decode the 2D-HMM which is based on the turbo principle. The experimental results show that indeed the SPP estimates improve from iteration to iteration, and further clearly outperform another state-of-the-art SPP estimation algorithm.}},
  author       = {{Vu, Dang Hai Tran and Haeb-Umbach, Reinhold}},
  booktitle    = {{38th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013)}},
  issn         = {{1520-6149}},
  keywords     = {{correlation methods, estimation theory, hidden Markov models, iterative methods, probability, spectral analysis, speech processing, 2D HMM, SPP estimates, iterative algorithm, posterior probability estimation, spectral correlation, speech presence probability estimation, state-of-the-art SPP estimation algorithm, temporal correlation, turbo principle, two-dimensional hidden Markov model, Correlation, Decoding, Estimation, Iterative decoding, Noise, Speech, Vectors}},
  pages        = {{863--867}},
  title        = {{{Using the turbo principle for exploiting temporal and spectral correlations in speech presence probability estimation}}},
  doi          = {{10.1109/ICASSP.2013.6637771}},
  year         = {{2013}},
}

@inproceedings{11745,
  abstract     = {{In this paper we present a novel noise power spectral density tracking algorithm and its use in single-channel speech enhancement. It has the unique feature that it is able to track the noise statistics even if speech is dominant in a given time-frequency bin. As a consequence it can follow non-stationary noise superposed by speech, even in the critical case of rising noise power. The algorithm requires an initial estimate of the power spectrum of speech and is thus meant to be used as a postprocessor to a first speech enhancement stage. An experimental comparison with a state-of-the-art noise tracking algorithm demonstrates lower estimation errors under low SNR conditions and smaller fluctuations of the estimated values, resulting in improved speech quality as measured by PESQ scores.}},
  author       = {{Chinaev, Aleksej and Krueger, Alexander and Tran Vu, Dang Hai and Haeb-Umbach, Reinhold}},
  booktitle    = {{37th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2012)}},
  keywords     = {{MAP parameter estimation, noise power estimation, speech enhancement}},
  title        = {{{Improved Noise Power Spectral Density Tracking by a MAP-based Postprocessor}}},
  year         = {{2012}},
}

@inproceedings{11864,
  abstract     = {{In this work, an observation model for the joint compensation of noise and reverberation in the logarithmic mel power spectral density domain is considered. It relates the features of the noisy reverberant speech to those of the non-reverberant speech and the noise. In contrast to enhancement of features only corrupted by reverberation (reverberant features), enhancement of noisy reverberant features requires a more sophisticated model for the error introduced by the proposed observation model. In a first consideration, it will be shown that this error is highly dependent on the instantaneous ratio of the power of reverberant speech to the power of the noise and, moreover, sensitive to the phase between reverberant speech and noise in the short-time discrete Fourier domain. Afterwards, a statistically motivated approach will be presented allowing for the model of the observation error to be inferred from the error model previously used for the reverberation only case. Finally, the developed observation error model will be utilized in a Bayesian feature enhancement scheme, leading to improvements in word accuracy on the AURORA5 database.}},
  author       = {{Leutnant, Volker and Krueger, Alexander and Haeb-Umbach, Reinhold}},
  booktitle    = {{Signal Processing, Communications and Computing (ICSPCC), 2012 IEEE International Conference on}},
  keywords     = {{Robust Automatic Speech Recognition, Bayesian feature enhancement, observation model for reverberant and noisy speech}},
  title        = {{{A Statistical Observation Model For Noisy Reverberant Speech Features and its Application to Robust ASR}}},
  year         = {{2012}},
}

@article{11850,
  abstract     = {{In this paper, we present a novel blocking matrix and fixed beamformer design for a generalized sidelobe canceler for speech enhancement in a reverberant enclosure. They are based on a new method for estimating the acoustical transfer function ratios in the presence of stationary noise. The estimation method relies on solving a generalized eigenvalue problem in each frequency bin. An adaptive eigenvector tracking utilizing the power iteration method is employed and shown to achieve a high convergence speed. Simulation results demonstrate that the proposed beamformer leads to better noise and interference reduction and reduced speech distortions compared to other blocking matrix designs from the literature.}},
  author       = {{Krueger, Alexander and Warsitz, Ernst and Haeb-Umbach, Reinhold}},
  journal      = {{IEEE Transactions on Audio, Speech, and Language Processing}},
  keywords     = {{acoustical transfer function ratio, adaptive eigenvector tracking, array signal processing, beamformer design, blocking matrix, eigenvalues and eigenfunctions, eigenvector-based transfer function ratios estimation, generalized sidelobe canceler, interference reduction, iterative methods, power iteration method, reduced speech distortions, reverberant enclosure, reverberation, speech enhancement, stationary noise}},
  number       = {{1}},
  pages        = {{206--219}},
  title        = {{{Speech Enhancement With a GSC-Like Structure Employing Eigenvector-Based Transfer Function Ratios Estimation}}},
  doi          = {{10.1109/TASL.2010.2047324}},
  volume       = {{19}},
  year         = {{2011}},
}

@article{33400,
  abstract     = {{<jats:p>We examined, in immobilization, the effect of a diet high in sodium chloride (NaCl) on bone markers, nitrogen balance, and acid-base status. Eight healthy male test subjects participated in a 14-day head-down-tilt bed rest (HDBR) study. During the bed rest period they received, in a randomized crossover design, a high (7.7 meq Na<jats:sup>+</jats:sup>/kg body wt per day) and a low (0.7 meq Na<jats:sup>+</jats:sup>/kg body wt per day) NaCl diet. As expected, 24-h excretion of urinary calcium was significantly greater in the high-NaCl-intake HDBR phase than in the low-NaCl-intake HDBR phase ( P &lt; 0.001). High NaCl intake caused a 43–50% greater excretion of the bone resorption markers COOH- (CTX) and NH<jats:sub>2</jats:sub>- (NTX) terminal telopeptide of type I collagen in HDBR than low NaCl in HDBR (CTX/NTX: P &lt; 0.001). Serum concentrations of the bone formation markers bone-specific alkaline phosphatase (bAP) and NH<jats:sub>2</jats:sub>-terminal propeptide of type I procollagen (PINP) were identical in both NaCl intake phases. High NaCl intake led to a more negative nitrogen balance in HDBR ( P &lt; 0.001). Changes were accompanied by increased serum chloride concentration ( P = 0.008), reduced blood bicarbonate ( P = 0.017), and base excess ( P = 0.009) whereas net acid excretion was lower during high than during low NaCl intake in immobilization ( P &lt; 0.001). High NaCl intake during immobilization exacerbates disuse-induced bone and muscle loss by causing further protein wasting and an increase in bone resorption. Changes in the acid-base status, mainly caused by disturbances in electrolyte metabolism, seem to determine NaCl-induced degradation processes.</jats:p>}},
  author       = {{Frings-Meuthen, Petra and Bühlmeier, Judith and Baecker, Natalie and Stehle, Peter and Fimmers, Rolf and May, Francisca and Kluge, Goetz and Heer, Martina}},
  issn         = {{8750-7587}},
  journal      = {{Journal of Applied Physiology}},
  keywords     = {{Physiology (medical), Physiology}},
  number       = {{2}},
  pages        = {{537--542}},
  publisher    = {{American Physiological Society}},
  title        = {{{High sodium chloride intake exacerbates immobilization-induced bone resorption and protein losses}}},
  doi          = {{10.1152/japplphysiol.00454.2011}},
  volume       = {{111}},
  year         = {{2011}},
}

@article{17233,
  abstract     = {{It has been proposed that the design of robots might benefit from interactions that are similar to caregiver–child interactions, which is tailored to children’s respective capacities to a high degree. However, so far little is known about how people adapt their tutoring behaviour to robots and whether robots can evoke input that is similar to child-directed interaction. The paper presents detailed analyses of speakers’ linguistic and non-linguistic behaviour, such as action demonstration, in two comparable situations: In one experiment, parents described and explained to their nonverbal infants the use of certain everyday objects; in the other experiment, participants tutored a simulated robot on the same objects. The results, which show considerable differences between the two situations on almost all measures, are discussed in the light of the computer-as-social-actor paradigm and the register hypothesis.}},
  author       = {{Fischer, Kerstin and Foth, Kilian and Rohlfing, Katharina and Wrede, Britta}},
  issn         = {{1572-0381}},
  journal      = {{Interaction Studies}},
  keywords     = {{human–robot interaction (HRI), social communication, register theory, motionese, robotese, child-directed speech (CDS), motherese, mindless transfer, computers-as-social-actors}},
  number       = {{1}},
  pages        = {{134--161}},
  publisher    = {{John Benjamins Publishing Company}},
  title        = {{{Mindful tutors: Linguistic choice and action demonstration in speech to infants and a simulated robot}}},
  doi          = {{10.1075/is.12.1.06fis}},
  volume       = {{12}},
  year         = {{2011}},
}

@article{11846,
  abstract     = {{In this paper, we present a new technique for automatic speech recognition (ASR) in reverberant environments. Our approach is aimed at the enhancement of the logarithmic Mel power spectrum, which is computed at an intermediate stage to obtain the widely used Mel frequency cepstral coefficients (MFCCs). Given the reverberant logarithmic Mel power spectral coefficients (LMPSCs), a minimum mean square error estimate of the clean LMPSCs is computed by carrying out Bayesian inference. We employ switching linear dynamical models as an a priori model for the dynamics of the clean LMPSCs. Further, we derive a stochastic observation model which relates the clean to the reverberant LMPSCs through a simplified model of the room impulse response (RIR). This model requires only two parameters, namely RIR energy and reverberation time, which can be estimated from the captured microphone signal. The performance of the proposed enhancement technique is studied on the AURORA5 database and compared to that of constrained maximum-likelihood linear regression (CMLLR). It is shown by experimental results that our approach significantly outperforms CMLLR and that up to 80\% of the errors caused by the reverberation are recovered. In addition to the fact that the approach is compatible with the standard MFCC feature vectors, it leaves the ASR back-end unchanged. It is of moderate computational complexity and suitable for real time applications.}},
  author       = {{Krueger, Alexander and Haeb-Umbach, Reinhold}},
  journal      = {{IEEE Transactions on Audio, Speech, and Language Processing}},
  keywords     = {{ASR, AURORA5 database, automatic speech recognition, Bayesian inference, belief networks, CMLLR, computational complexity, constrained maximum likelihood linear regression, least mean squares methods, LMPSC computation, logarithmic Mel power spectrum, maximum likelihood estimation, Mel frequency cepstral coefficients, MFCC feature vectors, microphone signal, minimum mean square error estimation, model-based feature enhancement, regression analysis, reverberant speech recognition, reverberation, RIR energy, room impulse response, speech recognition, stochastic observation model, stochastic processes}},
  number       = {{7}},
  pages        = {{1692--1707}},
  title        = {{{Model-Based Feature Enhancement for Reverberant Speech Recognition}}},
  doi          = {{10.1109/TASL.2010.2049684}},
  volume       = {{18}},
  year         = {{2010}},
}

@inproceedings{11913,
  abstract     = {{In this paper we propose to employ directional statistics in a complex vector space to approach the problem of blind speech separation in the presence of spatially correlated noise. We interpret the values of the short time Fourier transform of the microphone signals to be draws from a mixture of complex Watson distributions, a probabilistic model which naturally accounts for spatial aliasing. The parameters of the density are related to the a priori source probabilities, the power of the sources and the transfer function ratios from sources to sensors. Estimation formulas are derived for these parameters by employing the Expectation Maximization (EM) algorithm. The E-step corresponds to the estimation of the source presence probabilities for each time-frequency bin, while the M-step leads to a maximum signal-to-noise ratio (MaxSNR) beamformer in the presence of uncertainty about the source activity. Experimental results are reported for an implementation in a generalized sidelobe canceller (GSC) like spatial beamforming configuration for 3 speech sources with significant coherent noise in reverberant environments, demonstrating the usefulness of the novel modeling framework.}},
  author       = {{Tran Vu, Dang Hai and Haeb-Umbach, Reinhold}},
  booktitle    = {{IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2010)}},
  keywords     = {{array signal processing, blind source separation, blind speech separation, complex vector space, complex Watson distribution, directional statistics, expectation-maximisation algorithm, expectation maximization algorithm, Fourier transform, Fourier transforms, generalized sidelobe canceller, interference suppression, maximum signal-to-noise ratio beamformer, microphone signal, probabilistic model, spatial aliasing, spatial beamforming configuration, speech enhancement, statistical distributions}},
  pages        = {{241--244}},
  title        = {{{Blind speech separation employing directional statistics in an Expectation Maximization framework}}},
  doi          = {{10.1109/ICASSP.2010.5495994}},
  year         = {{2010}},
}

@article{11892,
  abstract     = {{For an environment to be perceived as being smart, contextual information has to be gathered to adapt the system's behavior and its interface towards the user. Being a rich source of context information speech can be acquired unobtrusively by microphone arrays and then processed to extract information about the user and his environment. In this paper, a system for joint temporal segmentation, speaker localization, and identification is presented, which is supported by face identification from video data obtained from a steerable camera. Special attention is paid to latency aspects and online processing capabilities, as they are important for the application under investigation, namely ambient communication. It describes the vision of terminal-less, session-less and multi-modal telecommunication with remote partners, where the user can move freely within his home while the communication follows him. The speaker diarization serves as a context source, which has been integrated in a service-oriented middleware architecture and provided to the application to select the most appropriate I/O device and to steer the camera towards the speaker during ambient communication.}},
  author       = {{Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}},
  journal      = {{IEEE Journal of Selected Topics in Signal Processing}},
  keywords     = {{audio streaming, audio visual data streaming, context information speech, face identification, face recognition, image segmentation, middleware, multimodal telecommunication, online diarization, service oriented middleware architecture, sessionless telecommunication, software architecture, speaker identification, speaker localization, speaker recognition, steerable camera, telecommunication computing, temporal segmentation, terminal-less telecommunication, video streaming}},
  number       = {{5}},
  pages        = {{845--856}},
  title        = {{{Online Diarization of Streaming Audio-Visual Data for Smart Environments}}},
  doi          = {{10.1109/JSTSP.2010.2050519}},
  volume       = {{4}},
  year         = {{2010}},
}

@article{11937,
  abstract     = {{In automatic speech recognition, hidden Markov models (HMMs) are commonly used for speech decoding, while switching linear dynamic models (SLDMs) can be employed for a preceding model-based speech feature enhancement. In this paper, these model types are combined in order to obtain a novel iterative speech feature enhancement and recognition architecture. It is shown that speech feature enhancement with SLDMs can be improved by feeding back information from the HMM to the enhancement stage. Two different feedback structures are derived. In the first, the posteriors of the HMM states are used to control the model probabilities of the SLDMs, while in the second they are employed to directly influence the estimate of the speech feature distribution. Both approaches lead to improvements in recognition accuracy both on the AURORA2 and AURORA4 databases compared to non-iterative speech feature enhancement with SLDMs. It is also shown that a combination with uncertainty decoding further enhances performance.}},
  author       = {{Windmann, Stefan and Haeb-Umbach, Reinhold}},
  journal      = {{IEEE Transactions on Audio, Speech, and Language Processing}},
  keywords     = {{AURORA2 databases, AURORA4 databases, automatic speech recognition, feedback structures, hidden Markov models, HMM, iterative methods, iterative speech feature enhancement, model probabilities, speech decoding, speech enhancement, speech feature distribution, speech recognition, switching linear dynamic models}},
  number       = {{5}},
  pages        = {{974--984}},
  title        = {{{Approaches to Iterative Speech Feature Enhancement and Recognition}}},
  doi          = {{10.1109/TASL.2009.2014894}},
  volume       = {{17}},
  year         = {{2009}},
}

@article{11938,
  abstract     = {{In this paper, parameter estimation of a state-space model of noise or noisy speech cepstra is investigated. A blockwise EM algorithm is derived for the estimation of the state and observation noise covariance from noise-only input data. It is supposed to be used during the offline training mode of a speech recognizer. Further a sequential online EM algorithm is developed to adapt the observation noise covariance on noisy speech cepstra at its input. The estimated parameters are then used in model-based speech feature enhancement for noise-robust automatic speech recognition. Experiments on the AURORA4 database lead to improved recognition results with a linear state model compared to the assumption of stationary noise.}},
  author       = {{Windmann, Stefan and Haeb-Umbach, Reinhold}},
  journal      = {{IEEE Transactions on Audio, Speech, and Language Processing}},
  keywords     = {{AURORA4 database, blockwise EM algorithm, covariance analysis, linear state model, noise covariance, noise-robust automatic speech recognition, noisy speech cepstra, offline training mode, parameter estimation, speech recognition, speech recognition equipment, speech recognizer, state-space methods, state-space model}},
  number       = {{8}},
  pages        = {{1577--1590}},
  title        = {{{Parameter Estimation of a State-Space Model of Noise for Robust Speech Recognition}}},
  doi          = {{10.1109/TASL.2009.2023172}},
  volume       = {{17}},
  year         = {{2009}},
}

@inproceedings{17272,
  abstract     = {{In developmental research, tutoring behavior has been identified as scaffolding infants' learning processes. It has been defined in terms of child-directed speech (Motherese), child-directed motion (Motionese), and contingency. In the field of developmental robotics, research often assumes that in human-robot interaction (HRI), robots are treated similar to infants, because their immature cognitive capabilities benefit from this behavior. However, according to our knowledge, it has barely been studied whether this is true and how exactly humans alter their behavior towards a robotic interaction partner. In this paper, we present results concerning the acceptance of a robotic agent in a social learning scenario obtained via comparison to adults and 8-11 months old infants in equal conditions. These results constitute an important empirical basis for making use of tutoring behavior in social robotics. In our study, we performed a detailed multimodal analysis of HRI in a tutoring situation using the example of a robot simulation equipped with a bottom-up saliency-based attention model. Our results reveal significant differences in hand movement velocity, motion pauses, range of motion, and eye gaze suggesting that for example adults decrease their hand movement velocity in an Adult-Child Interaction (ACI), opposed to an Adult-Adult Interaction (AAI) and this decrease is even higher in the Adult-Robot Interaction (ARI). We also found important differences between ACI and ARI in how the behavior is modified over time as the interaction unfolds. These findings indicate the necessity of integrating top-down feedback structures into a bottom-up system for robots to be fully accepted as interaction partners.}},
  author       = {{Vollmer, Anna-Lisa and Lohan, Katrin Solveig and Fischer, Kerstin and Nagai, Yukie and Pitsch, Karola and Fritsch, Jannik and Rohlfing, Katharina and Wrede, Britta}},
  booktitle    = {{Development and Learning, 2009. ICDL 2009. IEEE 8th International Conference on Development and Learning}},
  keywords     = {{robot simulation, hand movement velocity, robotic interaction partner, robotic agent, robot-directed interaction, multimodal analysis, Motionese, Motherese, intelligent tutoring systems, immature cognitive capability, human computer interaction, eye gaze, child-directed speech, child-directed motion, bottom-up system, bottom-up saliency-based attention model, adult-robot interaction, adult-child interaction, adult-adult interaction, human-robot interaction, action learning, social learning scenario, social robotics, software agents, top-down feedback structures, tutoring behavior}},
  pages        = {{1--6}},
  publisher    = {{IEEE}},
  title        = {{{People modify their tutoring behavior in robot-directed interaction for action learning}}},
  doi          = {{10.1109/DEVLRN.2009.5175516}},
  year         = {{2009}},
}

@article{11820,
  abstract     = {{In this paper, we derive an uncertainty decoding rule for automatic speech recognition (ASR), which accounts for both corrupted observations and inter-frame correlation. The conditional independence assumption, prevalent in hidden Markov model-based ASR, is relaxed to obtain a clean speech posterior that is conditioned on the complete observed feature vector sequence. This is a more informative posterior than one conditioned only on the current observation. The novel decoding is used to obtain a transmission-error robust remote ASR system, where the speech capturing unit is connected to the decoder via an error-prone communication network. We show how the clean speech posterior can be computed for communication links being characterized by either bit errors or packet loss. Recognition results are presented for both distributed and network speech recognition, where in the latter case common voice-over-IP codecs are employed.}},
  author       = {{Ion, Valentin and Haeb-Umbach, Reinhold}},
  journal      = {{IEEE Transactions on Audio, Speech, and Language Processing}},
  keywords     = {{automatic speech recognition, bit errors, codecs, communication links, corrupted observations, decoding, distributed speech recognition, error-prone communication network, feature vector sequence, hidden Markov model-based ASR, hidden Markov models, inter-frame correlation, Internet telephony, network speech recognition, packet loss, speech posterior, speech recognition, transmission error robust speech recognition, uncertainty decoding, voice-over-IP codecs}},
  number       = {{5}},
  pages        = {{1047--1060}},
  title        = {{{A Novel Uncertainty Decoding Rule With Applications to Transmission Error Robust Speech Recognition}}},
  doi          = {{10.1109/TASL.2008.925879}},
  volume       = {{16}},
  year         = {{2008}},
}

@inproceedings{11935,
  abstract     = {{The generalized sidelobe canceller by Griffith and Jim is a robust beamforming method to enhance a desired (speech) signal in the presence of stationary noise. Its performance depends to a high degree on the construction of the blocking matrix which produces noise reference signals for the subsequent adaptive interference canceller. Especially in reverberated environments the beamformer may suffer from signal leakage and reduced noise suppression. In this paper a new blocking matrix is proposed. It is based on a generalized eigenvalue problem whose solution provides an indirect estimation of the transfer functions from the source to the sensors. The quality of the new generalized eigenvector blocking matrix is studied in simulated rooms with different reverberation times and is compared to alternatives proposed in the literature.}},
  author       = {{Warsitz, Ernst and Krueger, Alexander and Haeb-Umbach, Reinhold}},
  booktitle    = {{IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2008)}},
  keywords     = {{adaptive interference canceller, adaptive signal processing, array signal processing, beamforming method, eigenvalues and eigenfunctions, generalized eigenvector blocking matrix, generalized sidelobe canceller, interference suppression, matrix algebra, noise suppression, speech enhancement, transfer function estimation, transfer functions}},
  pages        = {{73--76}},
  title        = {{{Speech enhancement with a new generalized eigenvector blocking matrix for application in a generalized sidelobe canceller}}},
  doi          = {{10.1109/ICASSP.2008.4517549}},
  year         = {{2008}},
}

@inproceedings{11939,
  abstract     = {{In this paper a switching linear dynamical model (SLDM) approach for speech feature enhancement is improved by employing more accurate models for the dynamics of speech and noise. The model of the clean speech feature trajectory is improved by augmenting the state vector to capture information derived from the delta features. Further a hidden noise state variable is introduced to obtain a more elaborated model for the noise dynamics. Approximate Bayesian inference in the SLDM is carried out by a bank of extended Kalman filters, whose outputs are combined according to the a posteriori probability of the individual state models. Experimental results on the AURORA2 database show improved recognition accuracy.}},
  author       = {{Windmann, Stefan and Haeb-Umbach, Reinhold}},
  booktitle    = {{IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2008)}},
  keywords     = {{a posteriori probability, AURORA2 database, Bayesian inference, Bayes methods, channel bank filters, extended Kalman filter banks, hidden noise state variable, Kalman filters, noise dynamics, speech enhancement, speech feature enhancement, speech feature trajectory, switching linear dynamical model approach}},
  pages        = {{4409--4412}},
  title        = {{{Modeling the dynamics of speech and noise for speech feature enhancement in ASR}}},
  doi          = {{10.1109/ICASSP.2008.4518633}},
  year         = {{2008}},
}

@inproceedings{17278,
  abstract     = {{This paper investigates the influence of feedback provided by an autonomous robot (BIRON) on users’ discursive behavior. A user study is described during which users show objects to the robot. The results of the experiment indicate, that the robot’s verbal feedback utterances cause the humans to adapt their own way of speaking. The changes in users’ verbal behavior are due to their beliefs about the robots knowledge and abilities. In this paper they are identified and grouped. Moreover, the data implies variations in user behavior regarding gestures. Unlike speech, the robot was not able to give feedback with gestures. Due to the lack of feedback, users did not seem to have a consistent mental representation of the robot’s abilities to recognize gestures. As a result, changes between different gestures are interpreted to be unconscious variations accompanying speech.}},
  author       = {{Lohse, Manja and Rohlfing, Katharina and Wrede, Britta and Sagerer, Gerhard}},
  isbn         = {{1050-4729}},
  keywords     = {{discursive behavior, autonomous robot, BIRON, man-machine systems, robot abilities, robot knowledge, user gestures, robot verbal feedback utterance, speech processing, user verbal behavior, service robots, human-robot interaction, human computer interaction, gesture recognition}},
  pages        = {{3481--3486}},
  title        = {{{“Try something else!” — When users change their discursive behavior in human-robot interaction}}},
  doi          = {{10.1109/ROBOT.2008.4543743}},
  year         = {{2008}},
}

