@inproceedings{47605,
  abstract     = {{In diesem Beitrag werden die Elemente „Begleitseminar“ und „Begleitforschungsseminar“ des Praxissemesters für Lehramtsstudierende der beruflichen Fachrichtungen Maschinenbautechnik und Elektrotechnik an der Universität Paderborn im Hinblick auf die Veränderungen durch die Covid-19-Pandemie erläutert.}},
  author       = {{Jonas-Ahrend, Gabriela and Vernholz, Mats and Temmen, Katrin}},
  location     = {{Osnabrück}},
  title        = {{{Digitale Begleitseminare im Praxissemester der gewerblich-technischen Fachrichtungen}}},
  year         = {{2021}},
}

@inproceedings{24000,
  author       = {{Heitkaemper, Jens and Schmalenstroeer, Joerg and Ion, Valentin and Haeb-Umbach, Reinhold}},
  booktitle    = {{Speech Communication; 14th ITG-Symposium}},
  pages        = {{1--5}},
  title        = {{{A Database for Research on Detection and Enhancement of Speech Transmitted over HF links}}},
  year         = {{2021}},
}

@inproceedings{44843,
  abstract     = {{Unsupervised blind source separation methods do not require a training phase
and thus cannot suffer from a train-test mismatch, which is a common concern in
neural network based source separation. The unsupervised techniques can be
categorized in two classes, those building upon the sparsity of speech in the
Short-Time Fourier transform domain and those exploiting non-Gaussianity or
non-stationarity of the source signals. In this contribution, spatial mixture
models which fall in the first category and independent vector analysis (IVA)
as a representative of the second category are compared w.r.t. their separation
performance and the performance of a downstream speech recognizer on a
reverberant dataset of reasonable size. Furthermore, we introduce a serial
concatenation of the two, where the result of the mixture model serves as
initialization of IVA, which achieves significantly better WER performance than
each algorithm individually and even approaches the performance of a much more
complex neural network based technique.}},
  author       = {{Boeddeker, Christoph and Rautenberg, Frederik and Haeb-Umbach, Reinhold}},
  booktitle    = {{ITG Conference on Speech Communication}},
  location     = {{Kiel}},
  title        = {{{A Comparison and Combination of Unsupervised Blind Source Separation  Techniques}}},
  year         = {{2021}},
}

@inproceedings{28259,
  author       = {{Boeddeker, Christoph and Zhang, Wangyou and Nakatani, Tomohiro and Kinoshita, Keisuke and Ochiai, Tsubasa and Delcroix, Marc and Kamo, Naoyuki and Qian, Yanmin and Haeb-Umbach, Reinhold}},
  booktitle    = {{ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}},
  title        = {{{Convolutive Transfer Function Invariant SDR Training Criteria for Multi-Channel Reverberant Speech Separation}}},
  doi          = {{10.1109/icassp39728.2021.9414661}},
  year         = {{2021}},
}

@inproceedings{23998,
  author       = {{Schmalenstroeer, Joerg and Heitkaemper, Jens and Ullmann, Joerg and Haeb-Umbach, Reinhold}},
  booktitle    = {{29th European Signal Processing Conference (EUSIPCO)}},
  pages        = {{1--5}},
  title        = {{{Open Range Pitch Tracking for Carrier Frequency Difference Estimation from HF Transmitted Speech}}},
  year         = {{2021}},
}

@article{22528,
  abstract     = {{Due to the ad hoc nature of wireless acoustic sensor networks, the position of the sensor nodes is typically unknown. This contribution proposes a technique to estimate the position and orientation of the sensor nodes from the recorded speech signals. The method assumes that a node comprises a microphone array with synchronously sampled microphones rather than a single microphone, but does not require the sampling clocks of the nodes to be synchronized. From the observed audio signals, the distances between the acoustic sources and arrays, as well as the directions of arrival, are estimated. They serve as input to a non-linear least squares problem, from which both the sensor nodes’ positions and orientations, as well as the source positions, are alternatingly estimated in an iterative process. Given one set of unknowns, i.e., either the source positions or the sensor nodes’ geometry, the other set of unknowns can be computed in closed-form. The proposed approach is computationally efficient and the first one, which employs both distance and directional information for geometry calibration in a common cost function. Since both distance and direction of arrival measurements suffer from outliers, e.g., caused by strong reflections of the sound waves on the surfaces of the room, we introduce measures to deemphasize or remove unreliable measurements. Additionally, we discuss modifications of our previously proposed deep neural network-based acoustic distance estimator, to account not only for omnidirectional sources but also for directional sources. Simulation results show good positioning accuracy and compare very favorably with alternative approaches from the literature.}},
  author       = {{Gburrek, Tobias and Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}},
  issn         = {{1687-4722}},
  journal      = {{EURASIP Journal on Audio, Speech, and Music Processing}},
  title        = {{{Geometry calibration in wireless acoustic sensor networks utilizing DoA and distance information}}},
  doi          = {{10.1186/s13636-021-00210-x}},
  year         = {{2021}},
}

@inproceedings{23994,
  author       = {{Gburrek, Tobias and Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}},
  booktitle    = {{ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}},
  title        = {{{Iterative Geometry Calibration from Distance Estimates for Wireless Acoustic Sensor Networks}}},
  doi          = {{10.1109/icassp39728.2021.9413831}},
  year         = {{2021}},
}

@inproceedings{23999,
  author       = {{Gburrek, Tobias and Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}},
  booktitle    = {{Speech Communication; 14th ITG-Symposium}},
  pages        = {{1--5}},
  title        = {{{On Source-Microphone Distance Estimation Using Convolutional Recurrent Neural Networks}}},
  year         = {{2021}},
}

@inproceedings{23997,
  author       = {{Chinaev, Aleksej and Enzner, Gerald and Gburrek, Tobias and Schmalenstroeer, Joerg}},
  booktitle    = {{29th European Signal Processing Conference (EUSIPCO)}},
  pages        = {{1--5}},
  title        = {{{Online Estimation of Sampling Rate Offsets in Wireless Acoustic Sensor Networks with Packet Loss}}},
  year         = {{2021}},
}

@inproceedings{29304,
  abstract     = {{In this work we address disentanglement of style and content in speech signals. We propose a fully convolutional variational autoencoder employing two encoders: a content encoder and a style encoder. To foster disentanglement, we propose adversarial contrastive predictive coding. This new disentanglement method does neither need parallel data nor any supervision. We show that the proposed technique is capable of separating speaker and content traits into the two different representations and show competitive speaker-content disentanglement performance compared to other unsupervised approaches. We further demonstrate an increased robustness of the content representation against a train-test mismatch compared to spectral features, when used for phone recognition.}},
  author       = {{Ebbers, Janek and Kuhlmann, Michael and Cord-Landwehr, Tobias and Haeb-Umbach, Reinhold}},
  booktitle    = {{Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}},
  pages        = {{3860–3864}},
  title        = {{{Contrastive Predictive Coding Supported Factorized Variational Autoencoder for Unsupervised Learning of Disentangled Speech Representations}}},
  year         = {{2021}},
}

@inproceedings{29893,
  abstract     = {{Phase-shift modulated full bridge converters suffer from thermal imbalances of the inverter switches. The lagging leg switches are subject to larger commutation currents compared to those of the leading leg as the transformer current reduces in the freewheeling interval. Furthermore, after this interval, the energy in the series inductance may not be large enough to achieve zero-voltage switching (ZVS) for the leading leg. Both effects result in thermal imbalances. This paper analyzes the alternating-asymmetrical phase-shift modulation to achieve balanced conduction and switching losses for all four switches while showing that this modulation is easily implemented on standard DSPs. The modulation has been implemented to LLC converters where experimental measurement results proved its effectiveness for LLC converters by reducing the temperature deviation from 6.3 K to only 0.2 K such that the peak temperature is reduced from 95 °C to 92 °C. The paper also proves that the modulation can be utilized to improve the efficiency of LLC converters operated at very low gains while simultaneously reducing the junction temperature of all four switches compared to the conventional complementary modulation. Finally, EMI implications are analyzed, which show that the modulation may be beneficial for reducing the common-mode emissions around the operating frequency.}},
  author       = {{Rehlaender, Philipp and Unruh, Roland and Schafmeister, Frank and Böcker, Joachim}},
  booktitle    = {{2021 IEEE Applied Power Electronics Conference and Exposition (APEC)}},
  isbn         = {{978-1-7281-8950-5}},
  keywords     = {{Phase-Shifted Full Bridge, Full-Bridge Converter, Phase-Shift Control, Phase-Shift Modulation, LLC Converter, Thermal Balancing}},
  location     = {{Phoenix, AZ, USA}},
  publisher    = {{IEEE}},
  title        = {{{Alternating Asymmetrical Phase-Shift Modulation for Full-Bridge Converters with Balanced Switching Losses to Reduce Thermal Imbalances}}},
  doi          = {{10.1109/apec42165.2021.9487104}},
  year         = {{2021}},
}

@inproceedings{26770,
  abstract     = {{Automatic transcription of meetings requires handling of overlapped speech, which calls for continuous speech separation (CSS) systems. The uPIT criterion was proposed for utterance-level separation with neural networks and introduces the constraint that the total number of speakers must not exceed the number of output channels. When processing meeting-like data in a segment-wise manner, i.e., by separating overlapping segments independently and stitching adjacent segments to continuous output streams, this constraint has to be fulfilled for any segment. In this contribution, we show that this constraint can be significantly relaxed. We propose a novel graph-based PIT criterion, which casts the assignment of utterances to output channels in a graph coloring problem. It only requires that the number of concurrently active speakers must not exceed the number of output channels. As a consequence, the system can process an arbitrary number of speakers and arbitrarily long segments and thus can handle more diverse scenarios.
Further, the stitching algorithm for obtaining a consistent output order in neighboring segments is of less importance and can even be eliminated completely, not the least reducing the computational effort. Experiments on meeting-style WSJ data show improvements in recognition performance over using the uPIT criterion. }},
  author       = {{von Neumann, Thilo and Kinoshita, Keisuke and Boeddeker, Christoph and Delcroix, Marc and Haeb-Umbach, Reinhold}},
  booktitle    = {{Interspeech 2021}},
  keywords     = {{Continuous speech separation, automatic speech recognition, overlapped speech, permutation invariant training}},
  title        = {{{Graph-PIT: Generalized Permutation Invariant Training for Continuous Separation of Arbitrary Numbers of Speakers}}},
  doi          = {{10.21437/interspeech.2021-1177}},
  year         = {{2021}},
}

@inproceedings{29173,
  author       = {{von Neumann, Thilo and Boeddeker, Christoph and Kinoshita, Keisuke and Delcroix, Marc and Haeb-Umbach, Reinhold}},
  booktitle    = {{Speech Communication; 14th ITG Conference}},
  location     = {{Kiel}},
  title        = {{{Speeding Up Permutation Invariant Training for Source Separation}}},
  year         = {{2021}},
}

@inproceedings{29308,
  abstract     = {{In this paper we present our system for the Detection and Classification of Acoustic Scenes and Events (DCASE) 2021 Challenge Task 4: Sound Event Detection and Separation in Domestic Environments, where it scored the fourth rank. Our presented solution is an advancement of our system used in the previous edition of the task.We use a forward-backward convolutional recurrent neural network (FBCRNN) for tagging and pseudo labeling followed by tag-conditioned sound event detection (SED) models which are trained using strong pseudo labels provided by the FBCRNN. Our advancement over our earlier model is threefold. First, we introduce a strong label loss in the objective of the FBCRNN to take advantage of the strongly labeled synthetic data during training. Second, we perform multiple iterations of self-training for both the FBCRNN and tag-conditioned SED models. Third, while we used only tag-conditioned CNNs as our SED model in the previous edition we here explore sophisticated tag-conditioned SED model architectures, namely, bidirectional CRNNs and bidirectional convolutional transformer neural networks (CTNNs), and combine them. With metric and class specific tuning of median filter lengths for post-processing, our final SED model, consisting of 6 submodels (2 of each architecture), achieves on the public evaluation set poly-phonic sound event detection scores (PSDS) of 0.455 for scenario 1 and 0.684 for scenario as well as a collar-based F1-score of 0.596 outperforming the baselines and our model from the previous edition by far. Source code is publicly available at https://github.com/fgnt/pb_sed.}},
  author       = {{Ebbers, Janek and Haeb-Umbach, Reinhold}},
  booktitle    = {{Proceedings of the 6th Detection and Classification of Acoustic Scenes and Events 2021 Workshop (DCASE2021)}},
  isbn         = {{978-84-09-36072-7}},
  pages        = {{226–230}},
  title        = {{{Self-Trained Audio Tagging and Sound Event Detection in Domestic Environments}}},
  year         = {{2021}},
}

@inproceedings{29306,
  abstract     = {{Recently, there has been a rising interest in sound recognition via Acoustic Sensor Networks to support applications such as ambient assisted living or environmental habitat monitoring. With state-of-the-art sound recognition being dominated by deep-learning-based approaches, there is a high demand for labeled training data. Despite the availability of large-scale  data sets such as Google's AudioSet, acquiring training data matching a certain application environment is still often a problem. In this paper we are concerned with human activity monitoring in a domestic environment using an ASN consisting of multiple nodes each providing multichannel signals. We propose a self-training based domain adaptation approach, which only requires unlabeled data from the target environment. Here, a sound recognition system trained on AudioSet, the teacher, generates pseudo labels for data from the target environment on which a student network is trained. The student can furthermore glean information about the spatial arrangement of sensors and sound sources to further improve classification performance. It is shown that  the student significantly improves recognition performance over the pre-trained teacher without relying on labeled data from the environment the system is deployed in.}},
  author       = {{Ebbers, Janek and Keyser, Moritz Curt and Haeb-Umbach, Reinhold}},
  booktitle    = {{Proceedings of the 29th European Signal Processing Conference (EUSIPCO)}},
  pages        = {{1135–1139}},
  title        = {{{Adapting Sound Recognition to A New Environment Via Self-Training}}},
  year         = {{2021}},
}

@article{24456,
  abstract     = {{One objective of current research in explainable intelligent systems is to implement social aspects in order to increase the relevance of explanations. In this paper, we argue that a novel conceptual framework is needed to overcome shortcomings of existing AI systems with little attention to processes of interaction and learning. Drawing from research in interaction and development, we first outline the novel conceptual framework that pushes the design of AI systems toward true interactivity with an emphasis on the role of the partner and social relevance. We propose that AI systems will be able to provide a meaningful and relevant explanation only if the process of explaining is extended to active contribution of both partners that brings about dynamics that is modulated by different levels of analysis. Accordingly, our conceptual framework comprises monitoring and scaffolding as key concepts and claims that the process of explaining is not only modulated by the interaction between explainee and explainer but is embedded into a larger social context in which conventionalized and routinized behaviors are established. We discuss our conceptual framework in relation to the established objectives of transparency and autonomy that are raised for the design of explainable AI systems currently.}},
  author       = {{Rohlfing, Katharina J. and Cimiano, Philipp and Scharlau, Ingrid and Matzner, Tobias and Buhl, Heike M. and Buschmeier, Hendrik and Esposito, Elena and Grimminger, Angela and Hammer, Barbara and Haeb-Umbach, Reinhold and Horwath, Ilona and Hüllermeier, Eyke and Kern, Friederike and Kopp, Stefan and Thommes, Kirsten and Ngonga Ngomo, Axel-Cyrille and Schulte, Carsten and Wachsmuth, Henning and Wagner, Petra and Wrede, Britta}},
  issn         = {{2379-8920}},
  journal      = {{IEEE Transactions on Cognitive and Developmental Systems}},
  keywords     = {{Explainability, process ofexplaining andunderstanding, explainable artificial systems}},
  number       = {{3}},
  pages        = {{717--728}},
  title        = {{{Explanation as a Social Practice: Toward a Conceptual Framework for the Social Design of AI Systems}}},
  doi          = {{10.1109/tcds.2020.3044366}},
  volume       = {{13}},
  year         = {{2021}},
}

@inproceedings{22218,
  author       = {{Krauter, Stefan and Böcker, Joachim and Freitag, Christine and Hehenkamp, Burkhard and Hilleringmann, Ulrich and Temmen, Katrin and Klaus, Tobias and Rohrer, Nicolaus and Lehmann, Sven}},
  booktitle    = {{Tagungsband des 36. PV-Symposiums, 18.-26 Mai 2021}},
  isbn         = {{978-3-948176-14-3}},
  keywords     = {{Art-D, Afrika, Resilienz, Resilience, Grid stability, robustness, microgrids}},
  location     = {{Staffelstein / online}},
  pages        = {{305--309}},
  publisher    = {{Conexio}},
  title        = {{{Projekt Art-D Grids: Nachhaltige und stabile Microgrids in Afrika - eine Plattform für Forschung und Lehre für die Entwicklung}}},
  year         = {{2021}},
}

@article{53268,
  author       = {{Soleymani, Mohammad and Santamaria, Ignacio and Schreier, Peter J.}},
  issn         = {{2169-3536}},
  journal      = {{IEEE Access}},
  keywords     = {{General Engineering, General Materials Science, General Computer Science}},
  pages        = {{96948--96963}},
  publisher    = {{Institute of Electrical and Electronics Engineers (IEEE)}},
  title        = {{{Distributed Algorithms for Spectral and Energy-Efficiency Maximization of <i>K</i>-User Interference Channels}}},
  doi          = {{10.1109/access.2021.3094976}},
  volume       = {{9}},
  year         = {{2021}},
}

@article{21821,
  abstract     = {{We present a combined experimental and numerical study of the far-field emission properties of optical travelling wave antennas made from low-loss dielectric materials. The antennas considered here are composed of two simple building blocks, a director and a reflector, deposited on a glass substrate. Colloidal quantum dots placed in the feed gap between the two elements serve as internal light source. The emission profile of the antenna is mainly formed by the director while the reflector suppresses backward emission. Systematic studies of the director dimensions as well as variation of antenna material show that the effective refractive index of the director primarily governs the far-field emission pattern. Below cut off, i.e., if the director’s effective refractive index is smaller than the refractive index of the substrate, the main lobe results from leaky wave emission along the director. In contrast, if the director supports a guided mode, the emission predominately originates from the end facet of the director.}},
  author       = {{Leuteritz, T. and Farheen, Henna and Qiao, S. and Spreyer, F. and Schlickriede, Christian and Zentgraf, Thomas and Myroshnychenko, Viktor and Förstner, Jens and Linden, S.}},
  issn         = {{1094-4087}},
  journal      = {{Optics Express}},
  keywords     = {{tet_topic_opticalantenna}},
  number       = {{10}},
  title        = {{{Dielectric travelling wave antennas for directional light emission}}},
  doi          = {{10.1364/oe.422984}},
  volume       = {{29}},
  year         = {{2021}},
}

@article{37289,
  author       = {{Graefe, Grit and Temmen, Katrin}},
  journal      = {{Bildung und Beruf, Zeitschrift des Bundesverbandes der Lehrkräfte für Berufsbildung e.V.}},
  number       = {{02/2021}},
  pages        = {{46--54}},
  publisher    = {{DBB Verlag}},
  title        = {{{Rekrutierungspotenzial für das Lehramt gewerblich-technischer Fachrichtungen aus dem Beruflichen Gymnasium mit Schwerpunkt Technik?}}},
  year         = {{2021}},
}