@inproceedings{11876,
  abstract     = {{This paper describes the systems for the single-array track and the multiple-array track of the 5th CHiME Challenge. The final system is a combination of multiple systems, using Confusion Network Combination (CNC). The different systems presented here are utilizing different front-ends and training sets for a Bidirectional Long Short-Term Memory (BLSTM) Acoustic Model (AM). The front-end was replaced by enhancements provided by Paderborn University [1]. The back-end has been implemented using RASR [2] and RETURNN [3]. Additionally, a system combination including the hypothesis word graphs from the system of the submission [1] has been performed, which results in the final best system.}},
  author       = {{Kitza, Markus and Michel, Wilfried and Boeddeker, Christoph and Heitkaemper, Jens and Menne, Tobias and Schlüter, Ralf and Ney, Hermann and Schmalenstroeer, Joerg and Drude, Lukas and Heymann, Jahn and Haeb-Umbach, Reinhold}},
  booktitle    = {{Proc. CHiME 2018 Workshop on Speech Processing in Everyday Environments, Hyderabad, India}},
  title        = {{{The RWTH/UPB System Combination for the CHiME 2018 Workshop}}},
  year         = {{2018}},
}

@inproceedings{11836,
  abstract     = {{Due to their distributed nature wireless acoustic sensor networks offer great potential for improved signal acquisition, processing and classification for applications such as monitoring and surveillance, home automation, or hands-free telecommunication. To reduce the communication demand with a central server and to raise the privacy level it is desirable to perform processing at node level. The limited processing and memory capabilities on a sensor node, however, stand in contrast to the compute and memory intensive deep learning algorithms used in modern speech and audio processing. In this work, we perform benchmarking of commonly used convolutional and recurrent neural network architectures on a Raspberry Pi based acoustic sensor node. We show that it is possible to run medium-sized neural network topologies used for speech enhancement and speech recognition in real time. For acoustic event recognition, where predictions in a lower temporal resolution are sufficient, it is even possible to run current state-of-the-art deep convolutional models with a real-time-factor of 0:11.}},
  author       = {{Ebbers, Janek and Heitkaemper, Jens and Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}},
  booktitle    = {{ITG 2018, Oldenburg, Germany}},
  title        = {{{Benchmarking Neural Network Architectures for Acoustic Sensor Networks}}},
  year         = {{2018}},
}

@inproceedings{11839,
  abstract     = {{It has been experimentally verified that sampling rate offsets (SROs) between the input channels of an acoustic beamformer have a detrimental effect on the achievable SNR gains. In this paper we derive an analytic model to study the impact of SRO on the estimation of the spatial noise covariance matrix used in MVDR beamforming. It is shown that a perfect compensation of the SRO is impossible if the noise covariance matrix is estimated by time averaging, even if the SRO is perfectly known. The SRO should therefore be compensated for prior to beamformer coefficient estimation. We present a novel scheme where SRO compensation and beamforming closely interact, saving some computational effort compared to separate SRO adjustment followed by acoustic beamforming.}},
  author       = {{Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}},
  booktitle    = {{ITG 2018, Oldenburg, Germany}},
  title        = {{{Insights into the Interplay of Sampling Rate Offsets and MVDR Beamforming}}},
  year         = {{2018}},
}

@inproceedings{15952,
  abstract     = {{Arbitrary sampling rate conversion has already received considerable attention in the past, but still lacks an equivalent representation of the effective time-dilation process in the block frequency domain. Good sampling rate converters in the time domain have been known, for instance, in terms of time-varying 'Sinc' or fixed 'Farrow' polynomial filters. The former can deliver nearly exact conversion at high complexity, while the latter has pronounced computational efficiency with limited accuracy. Only recently, it was shown that a composite 'polyphase Farrow' form with high resampling precision can be implemented with quasi-fixed filters that operate at the input sampling rate. We therefore propose to capitalize from that fixed-filter architecture in that we translate the polyphase-Farrow filters into an equivalent FFT-based overlap-save form. Experimental evaluation and comparison with other state-of-the art frequency-domain approaches then proves currently the best price-performance ratio of the proposed algorithm. It is thus an ideal candidate for the new framework of acoustic sensor networks that critically rests upon fast and accurate alignment of autonomous sampling processes.}},
  author       = {{Schmalenstroeer, Joerg and Chinaev, Aleksej and Enzner, Gerald}},
  booktitle    = {{Speech Communication; 13th ITG-Symposium}},
  issn         = {{null}},
  pages        = {{1--5}},
  title        = {{{Fast and Accurate Audio Resampling for Acoustic Sensor Networks by Polyphase-Farrow Filters with FFT Realization}}},
  year         = {{2018}},
}

@misc{12081,
  abstract     = {{The invention relates to a building or enclosure termination opening and/or closing apparatus having communication signed or encrypted by means of a key, and to a method for operating such. To allow simple, convenient and secure use by exclusively authorised users, the apparatus comprises: a first and a second user terminal, with secure forwarding of a time-limited key from the first to the second user terminal being possible. According to an alternative, individual keys are generated by a user identification and a secret device key.}},
  author       = {{Jacob, Florian and Schmalenstroeer, Joerg}},
  title        = {{{Building or Enclosure Termination Closing and/or Opening Apparatus, and Method for Operating a Building or Enclosure Termination}}},
  year         = {{2017}},
}

@inproceedings{11895,
  abstract     = {{Multi-channel speech enhancement algorithms rely on a synchronous sampling of the microphone signals. This, however, cannot always be guaranteed, especially if the sensors are distributed in an environment. To avoid performance degradation the sampling rate offset needs to be estimated and compensated for. In this contribution we extend the recently proposed coherence drift based method in two important directions. First, the increasing phase shift in the short-time Fourier transform domain is estimated from the coherence drift in a Matched Filterlike fashion, where intermediate estimates are weighted by their instantaneous SNR. Second, an observed bias is removed by iterating between offset estimation and compensation by resampling a couple of times. The effectiveness of the proposed method is demonstrated by speech recognition results on the output of a beamformer with and without sampling rate offset compensation between the input channels. We compare MVDR and maximum-SNR beamformers in reverberant environments and further show that both benefit from a novel phase normalization, which we also propose in this contribution.}},
  author       = {{Schmalenstroeer, Joerg and Heymann, Jahn and Drude, Lukas and Boeddeker, Christoph and Haeb-Umbach, Reinhold}},
  booktitle    = {{IEEE 19th International Workshop on Multimedia Signal Processing (MMSP)}},
  title        = {{{Multi-Stage Coherence Drift Based Sampling Rate Synchronization for Acoustic Beamforming}}},
  year         = {{2017}},
}

@inproceedings{11890,
  abstract     = {{In this paper we study the influence of directional radio patterns of Bluetooth low energy (BLE) beacons on smartphone localization accuracy and beacon network planning. A two-dimensional model of the power emission characteristic is derived from measurements of the radiation pattern of BLE beacons carried out in an RF chamber. The Cramer-Rao lower bound (CRLB) for position estimation is then derived for this directional power emission model. With this lower bound on the RMS positioning error the coverage of different beacon network configurations can be evaluated. For near-optimal network planing an evolutionary optimization algorithm for finding the best beacon placement is presented.}},
  author       = {{Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}},
  booktitle    = {{24th European Signal Processing Conference (EUSIPCO 2016)}},
  title        = {{{Investigations into Bluetooth Low Energy Localization Precision Limits}}},
  year         = {{2016}},
}

@inproceedings{11874,
  author       = {{Hoang, Manh Kha and Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}},
  booktitle    = {{40th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2015)}},
  title        = {{{Aligning training models with smartphone properties in WiFi fingerprinting based indoor localization}}},
  year         = {{2015}},
}

@article{11898,
  abstract     = {{Abstract In this paper we present an approach for synchronizing a wireless acoustic sensor network using a two-stage procedure. First the clock frequency and phase differences between pairs of nodes are estimated employing a two-way message exchange protocol. The estimates are further improved in a Kalman filter with a dedicated observation error model. In the second stage network-wide synchronization is achieved by means of a gossiping algorithm which estimates the average clock frequency and phase of the sensor nodes. These averages are viewed as frequency and phase of a virtual master clock, to which the clocks of the sensor nodes have to be adjusted. The amount of adjustment is computed in a specific control loop. While these steps are done in software, the actual sampling rate correction is carried out in hardware by using an adjustable frequency synthesizer. Experimental results obtained from hardware devices and software simulations of large scale networks are presented.}},
  author       = {{Schmalenstroeer, Joerg and Jebramcik, Patrick and Haeb-Umbach, Reinhold}},
  issn         = {{0165-1684}},
  journal      = {{Signal Processing}},
  keywords     = {{Gossip algorithm}},
  pages        = {{ -- }},
  title        = {{{A combined hardware-software approach for acoustic sensor network synchronization }}},
  doi          = {{http://dx.doi.org/10.1016/j.sigpro.2014.06.030}},
  year         = {{2014}},
}

@inproceedings{11897,
  abstract     = {{ "In this paper we present an approach for synchronizing the sampling clocks of distributed microphones over a wireless network. The proposed system uses a two stage procedure. It first employs a two-way message exchange algorithm to estimate the clock phase and frequency difference between two nodes and then uses a gossiping algorithmto estimate a virtual master clock, to which all sensor nodes synchronize. Simulation results are presented for networks of different topology and size, showing the effectiveness of our approach." }},
  author       = {{Schmalenstroeer, Joerg and Jebramcik, Patrick and Haeb-Umbach, Reinhold}},
  booktitle    = {{39th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2014)}},
  title        = {{{A Gossiping Approach to Sampling Clock Synchronization in Wireless Acoustic Sensor Networks}}},
  year         = {{2014}},
}

@inproceedings{11903,
  abstract     = {{"Acoustic sensor network clock synchronization via time stamp exchange between the sensor nodes is not accurate enough for many acoustic signal processing tasks, such as speaker localization. To improve synchronization accuracy it has therefore been proposed to employ a Kalman Filter to obtain improved frequency deviation and phase offset estimates. The estimation requires a statistical model of the errors of the measurements obtained from the time stamp exchange algorithm. These errors are caused by random transmission delays and hardware effects and are thus network specific. In this contribution we develop an algorithm to estimate the parameters of the measurement error model alongside the Kalman filter based sampling clock synchronization, employing the Expectation Maximization algorithm. Simulation results demonstrate that the online estimation of the error model parameters leads only to a small degradation of the synchronization performance compared to a perfectly known observation error model."}},
  author       = {{Schmalenstroeer, Joerg and Zhao, Weile and Haeb-Umbach, Reinhold}},
  booktitle    = {{11. ITG Fachtagung Sprachkommunikation (ITG 2014)}},
  title        = {{{Online Observation Error Model Estimation for Acoustic Sensor Network Synchronization}}},
  year         = {{2014}},
}

@techreport{11926,
  abstract     = {{In this paper we present a novel initialization method for unsupervised learning of acoustic patterns in recordings of continuous speech. The pattern discovery task is solved by dynamic time warping whose performance we improve by a smart starting point selection. This enables a more accurate discovery of patterns compared to conventional approaches. After graph-based clustering the patterns are employed for training hidden Markov models for an unsupervised speech acquisition. By iterating between model training and decoding in an EM-like framework the word accuracy is continuously improved. On the TIDIGITS corpus we achieve a word error rate of about 13 percent by the proposed unsupervised pattern discovery approach, which neither assumes knowledge of the acoustic units nor of the labels of the training data.}},
  author       = {{Walter, Oliver and Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}},
  title        = {{{A Novel Initialization Method for Unsupervised Learning of Acoustic Patterns in Speech (FGNT-2013-01)}}},
  year         = {{2013}},
}

@inproceedings{11832,
  abstract     = {{In this paper we propose an approach to retrieve the absolute geometry of an acoustic sensor network, consisting of spatially distributed microphone arrays, from reverberant speech input. The calibration relies on direction of arrival measurements of the individual arrays. The proposed calibration algorithm is derived from a maximum-likelihood approach employing circular statistics. Since a sensor node consists of a microphone array with known intra-array geometry, we are able to obtain an absolute geometry estimate, including angles and distances. Simulation results demonstrate the effectiveness of the approach.}},
  author       = {{Jacob, Florian and Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}},
  booktitle    = {{38th International Conference on Acoustics, Speech, and Signal Processing (ICASSP 2013)}},
  issn         = {{1520-6149}},
  keywords     = {{Geometry calibration, microphone arrays, position self-calibration}},
  pages        = {{116--120}},
  title        = {{{DoA-Based Microphone Array Position Self-Calibration Using Circular Statistic}}},
  doi          = {{10.1109/ICASSP.2013.6637620}},
  year         = {{2013}},
}

@inproceedings{11891,
  abstract     = {{In this paper we present a combined hardware/software approach for synchronizing the sampling clocks of an acoustic sensor network. A first clock frequency offset estimate is obtained by a time stamp exchange protocol with a low data rate and computational requirements. The estimate is then postprocessed by a Kalman filter which exploits the specific properties of the statistics of the frequency offset estimation error. In long term experiments the deviation between the sampling oscillators of two sensor nodes never exceeded half a sample with a wired and with a wireless link between the nodes. The achieved precision enables the estimation of time difference of arrival values across different hardware devices without sharing a common sampling hardware.}},
  author       = {{Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}},
  booktitle    = {{21th European Signal Processing Conference (EUSIPCO 2013)}},
  keywords     = {{synchronization, acoustic sensor network}},
  title        = {{{Sampling Rate Synchronisation in Acoustic Sensor Networks with a Pre-Trained Clock Skew Error Model}}},
  year         = {{2013}},
}

@inproceedings{11818,
  abstract     = {{In this paper we present a system for indoor navigation based on received signal strength index information of Wireless-LAN access points and relative position estimates. The relative position information is gathered from inertial smartphone sensors using a step detection and an orientation estimate. Our map data is hosted on a server employing a map renderer and a SQL database. The database includes a complete multilevel office building, within which the user can navigate. During navigation, the client retrieves the position estimate from the server, together with the corresponding map tiles to visualize the user's position on the smartphone display.}},
  author       = {{Hoang, Manh Kha and Schmitz, Sarah and Drueke, Christian and Vu, Dang Hai Tran and Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}},
  booktitle    = {{Positioning Navigation and Communication (WPNC), 2013 10th Workshop on}},
  keywords     = {{SQL, navigation, smart phones, wireless LAN, RSSI, SQL database, complete multilevel office building, inertial sensor information, inertial smartphone sensors, map renderer, received signal strength index information, relative position estimates, server based indoor navigation, step detection, wireless-LAN access points, Smartphone, fingerprint, indoor navigation, map tile}},
  pages        = {{1--6}},
  title        = {{{Server based indoor navigation using RSSI and inertial sensor information}}},
  doi          = {{10.1109/WPNC.2013.6533263}},
  year         = {{2013}},
}

@inproceedings{11817,
  abstract     = {{In this paper we present a modified hidden Markov model (HMM) for the fusion of received signal strength index (RSSI) information of WiFi access points and relative position information which is obtained from the inertial sensors of a smartphone for indoor positioning. Since the states of the HMM represent the potential user locations, their number determines the quantization error introduced by discretizing the allowable user positions through the use of the HMM. To reduce this quantization error we introduce â??pseudoâ?? states, whose emission probability, which models the RSSI measurements at this location, is synthesized from those of the neighboring states of which a Gaussian emission probability has been estimated during the training phase. The experimental results demonstrate the effectiveness of this approach. By introducing on average two pseudo states per original HMM state the positioning error could be significantly reduced without increasing the training effort.}},
  author       = {{Hoang, Manh Kha and Schmalenstroeer, Joerg and Drueke, Christian and Tran Vu, Dang Hai and Haeb-Umbach, Reinhold}},
  booktitle    = {{21th European Signal Processing Conference (EUSIPCO 2013)}},
  title        = {{{A Hidden Markov Model for Indoor User Tracking Based on WiFi Fingerprinting and Step Detection}}},
  year         = {{2013}},
}

@inproceedings{11833,
  abstract     = {{In this paper we propose an approach to retrieve the geometry of an acoustic sensor network consisting of spatially distributed microphone arrays from unconstrained speech input. The calibration relies on Direction of Arrival (DoA) measurements which do not require a clock synchronization among the sensor nodes. The calibration problem is formulated as a cost function optimization task, which minimizes the squared differences between measured and predicted observations and additionally avoids the existence of minima that correspond to mirrored versions of the actual sensor orientations. Further, outlier measurements caused by reverberation are mitigated by a Random Sample Consensus (RANSAC) approach. The experimental results show a mean positioning error of at most 25 cm even in highly reverberant environments.}},
  author       = {{Jacob, Florian and Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}},
  booktitle    = {{International Workshop on Acoustic Signal Enhancement (IWAENC 2012)}},
  keywords     = {{Unsupervised, geometry calibration, microphone arrays, position self-calibration}},
  title        = {{{Microphone Array Position Self-Calibration from Reverberant Speech Input}}},
  year         = {{2012}},
}

@inproceedings{11925,
  abstract     = {{In this paper we present a system for car navigation by fusing sensor data on an Android smartphone. The key idea is to use both the internal sensors of the smartphone (e.g., gyroscope) and sensor data from the car (e.g., speed information) to support navigation via GPS. To this end we employ a CAN-Bus-to-Bluetooth adapter to establish a wireless connection between the smartphone and the CAN-Bus of the car. On the smartphone a strapdown algorithm and an error-state Kalman filter are used to fuse the different sensor data streams. The experimental results show that the system is able to maintain higher positioning accuracy during GPS dropouts, thus improving the availability and reliability, compared to GPS-only solutions.}},
  author       = {{Walter, Oliver and Schmalenstroeer, Joerg and Engler, Andreas and Haeb-Umbach, Reinhold}},
  booktitle    = {{9th Workshop on Positioning Navigation and Communication (WPNC 2012)}},
  keywords     = {{Smartphone, navigation, sensor fusion}},
  title        = {{{Smartphone-Based Sensor Fusion for Improved Vehicular Navigation}}},
  year         = {{2012}},
}

@inproceedings{11889,
  abstract     = {{In this paper we propose to jointly consider Segmental Dynamic Time Warping and distance clustering for the unsupervised learning of acoustic events. As a result, the computational complexity increases only linearly with the dababase size compared to a quadratic increase in a sequential setup, where all pairwise SDTW distances between segments are computed prior to clustering. Further, we discuss options for seed value selection for clustering and show that drawing seeds with a probability proportional to the distance from the already drawn seeds, known as K-means++ clustering, results in a significantly higher probability of finding representatives of each of the underlying classes, compared to the commonly used draws from a uniform distribution. Experiments are performed on an acoustic event classification and an isolated digit recognition task, where on the latter the final word accuracy approaches that of supervised training.}},
  author       = {{Schmalenstroeer, Joerg and Bartek, Markus and Haeb-Umbach, Reinhold}},
  booktitle    = {{Interspeech 2011}},
  title        = {{{Unsupervised learning of acoustic events using dynamic time warping and hierarchical K-means++ clustering}}},
  year         = {{2011}},
}

@inproceedings{11896,
  abstract     = {{In this paper we propose a procedure for estimating the geometric configuration of an arbitrary acoustic sensor placement. It determines the position and the orientation of microphone arrays in 2D while locating a source by direction-of-arrival (DoA) estimation. Neither artificial calibration signals nor unnatural user activity are required. The problem of scale indeterminacy inherent to DoA-only observations is solved by adding time difference of arrival (TDOA) measurements. The geometry calibration method is numerically stable and delivers precise results in moderately reverberated rooms. Simulation results are confirmed by laboratory experiments.}},
  author       = {{Schmalenstroeer, Joerg and Jacob, Florian and Haeb-Umbach, Reinhold and Hennecke, Marius and Fink, Gernot A.}},
  booktitle    = {{Interspeech 2011}},
  title        = {{{Unsupervised Geometry Calibration of Acoustic Sensor Networks Using Source Correspondences}}},
  year         = {{2011}},
}