@misc{10781,
  author       = {{Hermansen, Sven}},
  publisher    = {{Paderborn University}},
  title        = {{{Custom Memory Controller for ReconOS}}},
  year         = {{2016}},
}

@misc{10785,
  author       = {{Fürnkranz, J. and Hüllermeier, Eyke}},
  booktitle    = {{Encyclopedia of Machine Learning and Data Mining}},
  editor       = {{Sammut, C. and Webb, G.I.}},
  publisher    = {{Springer}},
  title        = {{{Preference Learning}}},
  year         = {{2016}},
}

@misc{1082,
  author       = {{Handirk, Tobias}},
  publisher    = {{Universität Paderborn}},
  title        = {{{Über die Rolle von Informationen in Verkehrsnetzwerken}}},
  year         = {{2016}},
}

@inproceedings{11738,
  abstract     = {{In this contribution we investigate a priori signal-to-noise ratio (SNR) estimation, a crucial component of a single-channel speech enhancement system based on spectral subtraction. The majority of the state-of-the art a priori SNR estimators work in the power spectral domain, which is, however, not confirmed to be the optimal domain for the estimation. Motivated by the generalized spectral subtraction rule, we show how the estimation of the a priori SNR can be formulated in the so called generalized SNR domain. This formulation allows to generalize the widely used decision directed (DD) approach. An experimental investigation with different noise types reveals the superiority of the generalized DD approach over the conventional DD approach in terms of both the mean opinion score - listening quality objective measure and the output global SNR in the medium to high input SNR regime, while we show that the power spectrum is the optimal domain for low SNR. We further develop a parameterization which adjusts the domain of estimation automatically according to the estimated input global SNR. Index Terms: single-channel speech enhancement, a priori SNR estimation, generalized spectral subtraction}},
  author       = {{Chinaev, Aleksej and Haeb-Umbach, Reinhold}},
  booktitle    = {{INTERSPEECH 2016, San Francisco, USA}},
  title        = {{{A Priori SNR Estimation Using a Generalized Decision Directed Approach}}},
  year         = {{2016}},
}

@inproceedings{11743,
  abstract     = {{This contribution introduces a novel causal a priori signal-to-noise ratio (SNR) estimator for single-channel speech enhancement. To exploit the advantages of the generalized spectral subtraction, a normalized ?-order magnitude (NAOM) domain is introduced where an a priori SNR estimation is carried out. In this domain, the NAOM coefficients of noise and clean speech signals are modeled by a Weibull distribution and aWeibullmixturemodel (WMM), respectively. While the parameters of the noise model are calculated from the noise power spectral density estimates, the speechWMM parameters are estimated from the noisy signal by applying a causal Expectation-Maximization algorithm. Further a maximum a posteriori estimate of the a priori SNR is developed. The experiments in different noisy environments show the superiority of the proposed estimator compared to the well-known decision-directed approach in terms of estimation error, estimator variance and speech quality of the enhanced signals when used for speech enhancement.}},
  author       = {{Chinaev, Aleksej and Heitkaemper, Jens and Haeb-Umbach, Reinhold}},
  booktitle    = {{12. ITG Fachtagung Sprachkommunikation (ITG 2016)}},
  title        = {{{A Priori SNR Estimation Using Weibull Mixture Model}}},
  year         = {{2016}},
}

@inproceedings{11744,
  abstract     = {{A noise power spectral density (PSD) estimation is an indispensable component of speech spectral enhancement systems. In this paper we present a noise PSD tracking algorithm, which employs a noise presence probability estimate delivered by a deep neural network (DNN). The algorithm provides a causal noise PSD estimate and can thus be used in speech enhancement systems for communication purposes. An extensive performance comparison has been carried out with ten causal state-of-the-art noise tracking algorithms taken from the literature and categorized acc. to applied techniques. The experiments showed that the proposed DNN-based noise PSD tracker outperforms all competing methods with respect to all tested performance measures, which include the noise tracking performance and the performance of a speech enhancement system employing the noise tracking component.}},
  author       = {{Chinaev, Aleksej and Heymann, Jahn and Drude, Lukas and Haeb-Umbach, Reinhold}},
  booktitle    = {{12. ITG Fachtagung Sprachkommunikation (ITG 2016)}},
  title        = {{{Noise-Presence-Probability-Based Noise PSD Estimation by Using DNNs}}},
  year         = {{2016}},
}

@inproceedings{11751,
  author       = {{Drude, Lukas and Boeddeker, Christoph and Haeb-Umbach, Reinhold}},
  booktitle    = {{Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)}},
  title        = {{{Blind Speech Separation based on Complex Spherical k-Mode Clustering}}},
  year         = {{2016}},
}

@inproceedings{11756,
  abstract     = {{Although complex-valued neural networks (CVNNs) â?? networks which can operate with complex arithmetic â?? have been around for a while, they have not been given reconsideration since the breakthrough of deep network architectures. This paper presents a critical assessment whether the novel tool set of deep neural networks (DNNs) should be extended to complex-valued arithmetic. Indeed, with DNNs making inroads in speech enhancement tasks, the use of complex-valued input data, specifically the short-time Fourier transform coefficients, is an obvious consideration. In particular when it comes to performing tasks that heavily rely on phase information, such as acoustic beamforming, complex-valued algorithms are omnipresent. In this contribution we recapitulate backpropagation in CVNNs, develop complex-valued network elements, such as the split-rectified non-linearity, and compare real- and complex-valued networks on a beamforming task. We find that CVNNs hardly provide a performance gain and conclude that the effort of developing the complex-valued counterparts of the building blocks of modern deep or recurrent neural networks can hardly be justified.}},
  author       = {{Drude, Lukas and Raj, Bhiksha and Haeb-Umbach, Reinhold}},
  booktitle    = {{INTERSPEECH 2016, San Francisco, USA}},
  title        = {{{On the appropriateness of complex-valued neural networks for speech enhancement}}},
  year         = {{2016}},
}

@inproceedings{11771,
  abstract     = {{This paper is concerned with speech presence probability estimation employing an explicit model of the temporal and spectral correlations of speech. An undirected graphical model is introduced, based on a Factor Graph formulation. It is shown that this undirected model cures some of the theoretical issues of an earlier directed graphical model. Furthermore, we formulate a message passing inference scheme based on an approximate graph factorization, identify this inference scheme as a particular message passing schedule based on the turbo principle and suggest further alternative schedules. The experiments show an improved performance over speech presence probability estimation based on an IID assumption, and a slightly better performance of the turbo schedule over the alternatives.}},
  author       = {{Glarner, Thomas and Mahdi Momenzadeh, Mohammad and Drude, Lukas and Haeb-Umbach, Reinhold}},
  booktitle    = {{12. ITG Fachtagung Sprachkommunikation (ITG 2016)}},
  title        = {{{Factor Graph Decoding for Speech Presence Probability Estimation}}},
  year         = {{2016}},
}

@inproceedings{11812,
  author       = {{Heymann, Jahn and Drude, Lukas and Haeb-Umbach, Reinhold}},
  booktitle    = {{Proc. IEEE Intl. Conf. on Acoustics, Speech and Signal Processing (ICASSP)}},
  title        = {{{Neural Network Based Spectral Mask Estimation for Acoustic Beamforming}}},
  year         = {{2016}},
}

@inproceedings{11829,
  abstract     = {{This contribution investigates Direction of Arrival (DoA) estimation using linearly arranged microphone arrays. We are going to develop a model for the DoA estimation error in a reverberant scenario and show the existence of a bias, that is a consequence of the linear arrangement and limited field of view (FoV) bias: First, the limited FoV leading to a clipping of the measurements, and, second, the angular distribution of the signal energy of the reflections being non-uniform. Since both issues are a consequence of the linear arrangement of the sensors, the bias arises largely independent of the kind of DoA estimator. The experimental evaluation demonstrates the existence of the bias for a selected number of DoA estimation methods and proves that the prediction from the developed theoretical model matches the simulation results.}},
  author       = {{Jacob, Florian and Haeb-Umbach, Reinhold}},
  booktitle    = {{12. ITG Fachtagung Sprachkommunikation (ITG 2016)}},
  title        = {{{On the Bias of Direction of Arrival Estimation Using Linear Microphone Arrays}}},
  year         = {{2016}},
}

@inproceedings{11834,
  abstract     = {{We present a system for the 4th CHiME challenge which significantly increases the performance for all three tracks with respect to the provided baseline system. The front-end uses a bi-directional Long Short-Term Memory (BLSTM)-based neural network to estimate signal statistics. These then steer a Generalized Eigenvalue beamformer. The back-end consists of a 22 layer deep Wide Residual Network and two extra BLSTM layers. Working on a whole utterance instead of frames allows us to refine Batch-Normalization. We also train our own BLSTM-based language model. Adding a discriminative speaker adaptation leads to further gains. The final system achieves a word error rate on the six channel real test data of 3.48%. For the two channel track we achieve 5.96% and for the one channel track 9.34%. This is the best reported performance on the challenge achieved by a single system, i.e., a configuration, which does not combine multiple systems. At the same time, our system is independent of the microphone configuration. We can thus use the same components for all three tracks.}},
  author       = {{Heymann, Jahn and Drude, Lukas and Haeb-Umbach, Reinhold}},
  booktitle    = {{Computer Speech and Language}},
  title        = {{{Wide Residual BLSTM Network with Discriminative Speaker Adaptation for Robust Speech Recognition}}},
  year         = {{2016}},
}

@article{11840,
  author       = {{Kinoshita, Keisuke and Delcroix, Marc and Gannot, Sharon and Habets, Emanuel A. P. and Haeb-Umbach, Reinhold and Kellermann, Walter and Leutnant, Volker and Maas, Roland and Nakatani, Tomohiro and Raj, Bhiksha and Sehr, Armin and Yoshioka, Takuya}},
  journal      = {{EURASIP Journal on Advances in Signal Processing}},
  title        = {{{A summary of the REVERB challenge: state-of-the-art and remaining challenges in reverberant speech processing research}}},
  year         = {{2016}},
}

@article{11886,
  abstract     = {{Today, we are often surrounded by devices with one or more microphones, such as smartphones, laptops, and wireless microphones. If they are part of an acoustic sensor network, their distribution in the environment can be beneficially exploited for various speech processing tasks. However, applications like speaker localization, speaker tracking, and speech enhancement by beamforming avail themselves of the geometrical configuration of the sensors. Therefore, acoustic microphone geometry calibration has recently become a very active field of research. This article provides an application-oriented, comprehensive survey of existing methods for microphone position self-calibration, which will be categorized by the measurements they use and the scenarios they can calibrate. Selected methods will be evaluated comparatively with real-world recordings.}},
  author       = {{Plinge, Axel and Jacob, Florian and Haeb-Umbach, Reinhold and Fink, Gernot A.}},
  issn         = {{1053-5888}},
  journal      = {{IEEE Signal Processing Magazine}},
  keywords     = {{Acoustic sensors, Microphones, Portable computers, Smart phones, Wireless communication, Wireless sensor networks}},
  number       = {{4}},
  pages        = {{14--29}},
  title        = {{{Acoustic Microphone Geometry Calibration: An overview and experimental evaluation of state-of-the-art algorithms}}},
  doi          = {{10.1109/MSP.2016.2555198}},
  volume       = {{33}},
  year         = {{2016}},
}

@phdthesis{1190,
  author       = {{Isenberg, Tobias}},
  publisher    = {{Universität Paderborn}},
  title        = {{{Induction-based Verification of Timed Systems}}},
  year         = {{2016}},
}

@inproceedings{11908,
  abstract     = {{This paper describes automatic speech recognition (ASR) systems developed jointly by RWTH, UPB and FORTH for the 1ch, 2ch and 6ch track of the 4th CHiME Challenge. In the 2ch and 6ch tracks the final system output is obtained by a Confusion Network Combination (CNC) of multiple systems. The Acoustic Model (AM) is a deep neural network based on Bidirectional Long Short-Term Memory (BLSTM) units. The systems differ by front ends and training sets used for the acoustic training. The model for the 1ch track is trained without any preprocessing. For each front end we trained and evaluated individual acoustic models. We compare the ASR performance of different beamforming approaches: a conventional superdirective beamformer [1] and an MVDR beamformer as in [2], where the steering vector is estimated based on [3]. Furthermore we evaluated a BLSTM supported Generalized Eigenvalue beamformer using NN-GEV [4]. The back end is implemented using RWTH?s open-source toolkits RASR [5], RETURNN [6] and rwthlm [7]. We rescore lattices with a Long Short-Term Memory (LSTM) based language model. The overall best results are obtained by a system combination that includes the lattices from the system of UPB?s submission [8]. Our final submission scored second in each of the three tracks of the 4th CHiME Challenge.}},
  author       = {{Menne, Tobias and Heymann, Jahn and Alexandridis, Anastasios and Irie, Kazuki and Zeyer, Albert and Kitza, Markus and Golik, Pavel and Kulikov, Ilia and Drude, Lukas and Schlüter, Ralf and Ney, Hermann and Haeb-Umbach, Reinhold and Mouchtaris, Athanasios}},
  booktitle    = {{Computer Speech and Language}},
  title        = {{{The RWTH/UPB/FORTH System Combination for the 4th CHiME Challenge Evaluation}}},
  year         = {{2016}},
}

@inproceedings{11920,
  abstract     = {{In this paper we demonstrate an algorithm to learn words from speech using non-parametric Bayesian hierarchical models in an unsupervised setting. We exploit the assumption of a hierarchical structure of speech, namely the formation of spoken words as a sequence of phonemes. We employ the Nested Hierarchical Pitman-Yor Language Model, which allows an a priori unknown and possibly unlimited number of words. We assume the n-gram probabilities of words, the m-gram probabilities of phoneme sequences in words and the phoneme sequences of the words themselves as latent variables to be learned. We evaluate the algorithm on a cross language task using an existing speech recognizer trained on English speech to decode speech in the Xitsonga language supplied for the 2015 ZeroSpeech challenge. We apply the learning algorithm on the resulting phoneme graphs and achieve the highest token precision and F score compared to present systems.}},
  author       = {{Walter, Oliver and Haeb-Umbach, Reinhold}},
  booktitle    = {{38th German Conference on Pattern Recognition (GCPR 2016)}},
  title        = {{{Unsupervised Word Discovery from Speech using Bayesian Hierarchical Models}}},
  year         = {{2016}},
}

@inproceedings{11975,
  author       = {{Blobel, Johannes and Sommer, Christoph and Dressler, Falko}},
  booktitle    = {{2016 IEEE International Conference on Communications (ICC)}},
  isbn         = {{9781479966646}},
  title        = {{{Protocol options for low power sensor network MAC using wake-up receivers with duty cycling}}},
  doi          = {{10.1109/icc.2016.7511318}},
  year         = {{2016}},
}

@inproceedings{11988,
  author       = {{Campolo, Claudia and Sommer, Christoph and Dressler, Falko and Molinaro, Antonella}},
  booktitle    = {{2016 IEEE International Conference on Communications (ICC)}},
  isbn         = {{9781479966646}},
  title        = {{{On the impact of adjacent channel interference in multi-channel VANETs}}},
  doi          = {{10.1109/icc.2016.7511085}},
  year         = {{2016}},
}

@inproceedings{12011,
  author       = {{Hagenauer, Florian and Sommer, Christoph and Merschjohann, Simon and Higuchi, Takamasa and Dressler, Falko and Altintas, Onur}},
  booktitle    = {{2016 IEEE Conference on Computer Communications Workshops (INFOCOM WKSHPS)}},
  isbn         = {{9781467399555}},
  title        = {{{Cars as the base for service discovery and provision in highly dynamic networks}}},
  doi          = {{10.1109/infcomw.2016.7562101}},
  year         = {{2016}},
}

