@inproceedings{11753,
  abstract     = {{This contribution describes a step-wise source counting algorithm to determine the number of speakers in an offline scenario. Each speaker is identified by a variational expectation maximization (VEM) algorithm for complex Watson mixture models and therefore directly yields beamforming vectors for a subsequent speech separation process. An observation selection criterion is proposed which improves the robustness of the source counting in noise. The algorithm is compared to an alternative VEM approach with Gaussian mixture models based on directions of arrival and shown to deliver improved source counting accuracy. The article concludes by extending the offline algorithm towards a low-latency online estimation of the number of active sources from the streaming input data.}},
  author       = {{Drude, Lukas and Chinaev, Aleksej and Tran Vu, Dang Hai and Haeb-Umbach, Reinhold}},
  booktitle    = {{14th International Workshop on Acoustic Signal Enhancement (IWAENC 2014)}},
  keywords     = {{Accuracy, Acoustics, Estimation, Mathematical model, Soruce separation, Speech, Vectors, Bayes methods, Blind source separation, Directional statistics, Number of speakers, Speaker diarization}},
  pages        = {{213--217}},
  title        = {{{Towards Online Source Counting in Speech Mixtures Applying a Variational EM for Complex Watson Mixture Models}}},
  year         = {{2014}},
}

@inproceedings{11814,
  abstract     = {{ "In this paper we present an algorithm for the unsupervised segmentation of a lattice produced by a phoneme recognizer into words. Using a lattice rather than a single phoneme string accounts for the uncertainty of the recognizer about the true label sequence. An example application is the discovery of lexical units from the output of an error-prone phoneme recognizer in a zero-resource setting, where neither the lexicon nor the language model (LM) is known. We propose a computationally efficient iterative approach, which alternates between the following two steps: First, the most probable string is extracted from the lattice using a phoneme LM learned on the segmentation result of the previous iteration. Second, word segmentation is performed on the extracted string using a word and phoneme LM which is learned alongside the new segmentation. We present results on lattices produced by a phoneme recognizer on the WSJCAM0 dataset. We show that our approach delivers superior segmentation performance than an earlier approach found in the literature, in particular for higher-order language models. " }},
  author       = {{Heymann, Jahn and Walter, Oliver and Haeb-Umbach, Reinhold and Raj, Bhiksha}},
  booktitle    = {{39th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2014)}},
  title        = {{{Iterative Bayesian Word Segmentation for Unspuervised Vocabulary Discovery from Phoneme Lattices}}},
  year         = {{2014}},
}

@inproceedings{11831,
  abstract     = {{ "Several self-localization algorithms have been proposed, that determine the positions of either acoustic or visual sensors autonomously. Usually these positions are given in a modality specific coordinate system, with an unknown rotation, translation and scale between the different systems. For a joint audiovisual tracking, where the different modalities support each other, the two modalities need to be mapped into a common coordinate system. In this paper we propose to estimate this mapping based on audiovisual correlates, i.e., a speaker that can be localized by both, a microphone and a camera network separately. The voice is tracked by a microphone network, which had to be calibrated by a self-localization algorithm at first, and the head is tracked by a calibrated camera network. Unlike existing Singular Value Decomposition based approaches to estimate the coordinate system mapping, we propose to perform an estimation in the shape domain, which turns out to be computationally more efficient. Simulations of the self-localization of an acoustic sensor network and a following coordinate mapping for a joint speaker localization showed a significant improvement of the localization performance, since the modalities were able to support each other." }},
  author       = {{Jacob, Florian and Haeb-Umbach, Reinhold}},
  booktitle    = {{11. ITG Fachtagung Sprachkommunikation (ITG 2014)}},
  title        = {{{Coordinate Mapping Between an Acoustic and Visual Sensor Network in the Shape Domain for a Joint Self-Calibrating Speaker Tracking}}},
  year         = {{2014}},
}

@article{11861,
  abstract     = {{In this contribution we present a theoretical and experimental investigation into the effects of reverberation and noise on features in the logarithmic mel power spectral domain, an intermediate stage in the computation of the mel frequency cepstral coefficients, prevalent in automatic speech recognition (ASR). Gaining insight into the complex interaction between clean speech, noise, and noisy reverberant speech features is essential for any ASR system to be robust against noise and reverberation present in distant microphone input signals. The findings are gathered in a probabilistic formulation of an observation model which may be used in model-based feature compensation schemes. The proposed observation model extends previous models in three major directions: First, the contribution of additive background noise to the observation error is explicitly taken into account. Second, an energy compensation constant is introduced which ensures an unbiased estimate of the reverberant speech features, and, third, a recursive variant of the observation model is developed resulting in reduced computational complexity when used in model-based feature compensation. The experimental section is used to evaluate the accuracy of the model and to describe how its parameters can be determined from test data.}},
  author       = {{Leutnant, Volker and Krueger, Alexander and Haeb-Umbach, Reinhold}},
  issn         = {{2329-9290}},
  journal      = {{IEEE/ACM Transactions on Audio, Speech, and Language Processing}},
  keywords     = {{computational complexity, reverberation, speech recognition, automatic speech recognition, background noise, clean speech, computational complexity, energy compensation, logarithmic mel power spectral domain, mel frequency cepstral coefficients, microphone input signals, model-based feature compensation schemes, noisy reverberant speech automatic recognition, noisy reverberant speech features, reverberation, Atmospheric modeling, Computational modeling, Noise, Noise measurement, Reverberation, Speech, Vectors, Model-based feature compensation, observation model for reverberant and noisy speech, recursive observation model, robust automatic speech recognition}},
  number       = {{1}},
  pages        = {{95--109}},
  title        = {{{A New Observation Model in the Logarithmic Mel Power Spectral Domain for the Automatic Recognition of Noisy Reverberant Speech}}},
  doi          = {{10.1109/TASLP.2013.2285480}},
  volume       = {{22}},
  year         = {{2014}},
}

@article{11867,
  abstract     = {{New waves of consumer-centric applications, such as voice search and voice interaction with mobile devices and home entertainment systems, increasingly require automatic speech recognition (ASR) to be robust to the full range of real-world noise and other acoustic distorting conditions. Despite its practical importance, however, the inherent links between and distinctions among the myriad of methods for noise-robust ASR have yet to be carefully studied in order to advance the field further. To this end, it is critical to establish a solid, consistent, and common mathematical foundation for noise-robust ASR, which is lacking at present. This article is intended to fill this gap and to provide a thorough overview of modern noise-robust techniques for ASR developed over the past 30 years. We emphasize methods that are proven to be successful and that are likely to sustain or expand their future applicability. We distill key insights from our comprehensive overview in this field and take a fresh look at a few old problems, which nevertheless are still highly relevant today. Specifically, we have analyzed and categorized a wide range of noise-robust techniques using five different criteria: 1) feature-domain vs. model-domain processing, 2) the use of prior knowledge about the acoustic environment distortion, 3) the use of explicit environment-distortion models, 4) deterministic vs. uncertainty processing, and 5) the use of acoustic models trained jointly with the same feature enhancement or model adaptation process used in the testing stage. With this taxonomy-oriented review, we equip the reader with the insight to choose among techniques and with the awareness of the performance-complexity tradeoffs. The pros and cons of using different noise-robust ASR techniques in practical application scenarios are provided as a guide to interested practitioners. The current challenges and future research directions in this field is also carefully analyzed.}},
  author       = {{Li, Jinyu and Deng, Li and Gong, Yifan and Haeb-Umbach, Reinhold}},
  journal      = {{IEEE Transactions on Audio, Speech and Language Processing}},
  keywords     = {{Speech recognition, compensation, distortion modeling, joint model training, noise, robustness, uncertainty processing}},
  number       = {{4}},
  pages        = {{745--777}},
  title        = {{{An Overview of Noise-Robust Automatic Speech Recognition}}},
  doi          = {{10.1109/TASLP.2014.2304637}},
  volume       = {{22}},
  year         = {{2014}},
}

@inproceedings{11918,
  abstract     = {{In this paper, we investigate unsupervised acoustic model training approaches for dysarthric-speech recognition. These models are first, frame-based Gaussian posteriorgrams, obtained from Vector Quantization (VQ), second, so-called Acoustic Unit Descriptors (AUDs), which are hidden Markov models of phone-like units, that are trained in an unsupervised fashion, and, third, posteriorgrams computed on the AUDs. Experiments were carried out on a database collected from a home automation task and containing nine speakers, of which seven are considered to utter dysarthric speech. All unsupervised modeling approaches delivered significantly better recognition rates than a speaker-independent phoneme recognition baseline, showing the suitability of unsupervised acoustic model training for dysarthric speech. While the AUD models led to the most compact representation of an utterance for the subsequent semantic inference stage, posteriorgram-based representations resulted in higher recognition rates, with the Gaussian posteriorgram achieving the highest slot filling F-score of 97.02%. Index Terms: unsupervised learning, acoustic unit descriptors, dysarthric speech, non-negative matrix factorization}},
  author       = {{Walter, Oliver and Despotovic, Vladimir and Haeb-Umbach, Reinhold and Gemmeke, Jrt and Ons, Bart and Van hamme, Hugo}},
  booktitle    = {{INTERSPEECH 2014}},
  title        = {{{An Evaluation of Unsupervised Acoustic Model Training for a Dysarthric Speech Interface}}},
  year         = {{2014}},
}

@inproceedings{11974,
  author       = {{Berger, Mario and Erlacher, Felix and Sommer, Christoph and Dressler, Falko}},
  booktitle    = {{2014 International Conference on Computing, Networking and Communications (ICNC)}},
  isbn         = {{9781479923588}},
  title        = {{{Adaptive load allocation for combining Anomaly Detectors using controlled skips}}},
  doi          = {{10.1109/iccnc.2014.6785438}},
  year         = {{2014}},
}

@inproceedings{11978,
  author       = {{Bloessl, Bastian and Segata, Michele and Sommer, Christoph and Dressler, Falko}},
  booktitle    = {{2013 IEEE Vehicular Networking Conference}},
  isbn         = {{9781479926879}},
  title        = {{{Towards an Open Source IEEE 802.11p stack: A full SDR-based transceiver in GNU Radio}}},
  doi          = {{10.1109/vnc.2013.6737601}},
  year         = {{2014}},
}

@inproceedings{11979,
  author       = {{Bloessl, Bastian and Puschmann, Andre and Sommer, Christoph and Dressler, Falko}},
  booktitle    = {{Proceedings of the 9th ACM international workshop on Wireless network testbeds, experimental evaluation and characterization - WiNTECH '14}},
  isbn         = {{9781450330725}},
  title        = {{{Timings matter}}},
  doi          = {{10.1145/2643230.2643240}},
  year         = {{2014}},
}

@inproceedings{11994,
  author       = {{Dressler, Falko and Handle, Philipp and Sommer, Christoph}},
  booktitle    = {{Proceedings of the 2014 ACM international workshop on Wireless and mobile technologies for smart cities - WiMobCity '14}},
  isbn         = {{9781450330367}},
  title        = {{{Towards a vehicular cloud - using parked vehicles as a temporary network and storage infrastructure}}},
  doi          = {{10.1145/2633661.2633671}},
  year         = {{2014}},
}

@inproceedings{11996,
  author       = {{Eckert, Juergen and Sommer, Christoph and Eckhoff, David}},
  booktitle    = {{Proceedings of the 11th ACM symposium on Performance evaluation of wireless ad hoc, sensor, & ubiquitous networks - PE-WASUN '14}},
  isbn         = {{9781450330251}},
  title        = {{{Towards a simulation framework for paraglider networks}}},
  doi          = {{10.1145/2653481.2655754}},
  year         = {{2014}},
}

@inproceedings{12002,
  author       = {{Eckhoff, David and Dressler, Falko and Sommer, Christoph}},
  booktitle    = {{38th Annual IEEE Conference on Local Computer Networks}},
  isbn         = {{9781479905379}},
  title        = {{{SmartRevoc: An efficient and privacy preserving revocation system using parked vehicles}}},
  doi          = {{10.1109/lcn.2013.6761338}},
  year         = {{2014}},
}

@article{12003,
  author       = {{Eckhoff, David and Sommer, Christoph}},
  issn         = {{1540-7993}},
  journal      = {{IEEE Security & Privacy}},
  pages        = {{77--79}},
  title        = {{{Driving for Big Data? Privacy Concerns in Vehicular Networking}}},
  doi          = {{10.1109/msp.2014.2}},
  year         = {{2014}},
}

@inproceedings{12008,
  author       = {{Erlacher, Felix and Klingler, Florian and Sommer, Christoph and Dressler, Falko}},
  booktitle    = {{2014 11th Annual Conference on Wireless On-demand Network Systems and Services (WONS)}},
  isbn         = {{9781479949373}},
  title        = {{{On the impact of street width on 5.9 GHz radio signal propagation in vehicular networks}}},
  doi          = {{10.1109/wons.2014.6814735}},
  year         = {{2014}},
}

@article{12038,
  author       = {{Malandrino, Francesco and Casetti, Claudio and Chiasserini, Carla-Fabiana and Sommer, Christoph and Dressler, Falko}},
  issn         = {{0018-9545}},
  journal      = {{IEEE Transactions on Vehicular Technology}},
  pages        = {{4606--4617}},
  title        = {{{The Role of Parked Cars in Content Downloading for Vehicular Networks}}},
  doi          = {{10.1109/tvt.2014.2316645}},
  year         = {{2014}},
}

@inproceedings{12046,
  author       = {{Segata, Michele and Bloessl, Bastian and Joerer, Stefan and Sommer, Christoph and Lo Cigno, Renato and Dressler, Falko}},
  booktitle    = {{2013 IEEE Vehicular Networking Conference}},
  isbn         = {{9781479926879}},
  title        = {{{Short paper: Vehicle shadowing distribution depends on vehicle type: Results of an experimental study}}},
  doi          = {{10.1109/vnc.2013.6737623}},
  year         = {{2014}},
}

@inproceedings{12049,
  author       = {{Segata, Michele and Bloessl, Bastian and Sommer, Christoph and Dressler, Falko}},
  booktitle    = {{2014 IEEE International Conference on Communications (ICC)}},
  isbn         = {{9781479920037}},
  title        = {{{Towards energy efficient smart phone applications: Energy models for offloading tasks into the cloud}}},
  doi          = {{10.1109/icc.2014.6883681}},
  year         = {{2014}},
}

@inproceedings{12067,
  author       = {{Sommer, Christoph and Hagenauer, Florian and Dressler, Falko}},
  booktitle    = {{2014 IEEE World Forum on Internet of Things (WF-IoT)}},
  isbn         = {{9781479934591}},
  title        = {{{A networking perspective on self-organizing intersection management}}},
  doi          = {{10.1109/wf-iot.2014.6803164}},
  year         = {{2014}},
}

@inproceedings{12073,
  author       = {{Tung, Lung-Chih and Mena, Jorge and Gerla, Mario and Sommer, Christoph}},
  booktitle    = {{2013 12th Annual Mediterranean Ad Hoc Networking Workshop (MED-HOC-NET)}},
  isbn         = {{9781479910045}},
  title        = {{{A cluster based architecture for intersection collision avoidance using heterogeneous networks}}},
  doi          = {{10.1109/medhocnet.2013.6767414}},
  year         = {{2014}},
}

@inproceedings{12977,
  author       = {{Hellebrand, Sybille and Indlekofer, Thomas and Kampmann, Matthias and A. Kochte, Michael and Liu, Chang and Wunderlich, Hans-Joachim}},
  booktitle    = {{IEEE International Test Conference (ITC'14)}},
  publisher    = {{IEEE}},
  title        = {{{FAST-BIST: Faster-than-at-Speed BIST Targeting Hidden Delay Defects}}},
  doi          = {{10.1109/test.2014.7035360}},
  year         = {{2014}},
}