[{"user_id":"40767","department":[{"_id":"54"}],"project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"_id":"20504","file_date_updated":"2020-12-11T12:36:37Z","type":"conference","status":"public","author":[{"first_name":"Jens","id":"27643","full_name":"Heitkaemper, Jens","last_name":"Heitkaemper"},{"first_name":"Darius","full_name":"Jakobeit, Darius","last_name":"Jakobeit"},{"id":"40767","full_name":"Boeddeker, Christoph","last_name":"Boeddeker","first_name":"Christoph"},{"first_name":"Lukas","full_name":"Drude, Lukas","last_name":"Drude"},{"first_name":"Reinhold","full_name":"Haeb-Umbach, Reinhold","id":"242","last_name":"Haeb-Umbach"}],"date_updated":"2022-01-13T08:47:32Z","has_accepted_license":"1","citation":{"apa":"Heitkaemper, J., Jakobeit, D., Boeddeker, C., Drude, L., &#38; Haeb-Umbach, R. (2020). Demystifying TasNet: A Dissecting Approach. <i>ICASSP 2020 Virtual Barcelona Spain</i>.","short":"J. Heitkaemper, D. Jakobeit, C. Boeddeker, L. Drude, R. Haeb-Umbach, in: ICASSP 2020 Virtual Barcelona Spain, 2020.","bibtex":"@inproceedings{Heitkaemper_Jakobeit_Boeddeker_Drude_Haeb-Umbach_2020, title={Demystifying TasNet: A Dissecting Approach}, booktitle={ICASSP 2020 Virtual Barcelona Spain}, author={Heitkaemper, Jens and Jakobeit, Darius and Boeddeker, Christoph and Drude, Lukas and Haeb-Umbach, Reinhold}, year={2020} }","mla":"Heitkaemper, Jens, et al. “Demystifying TasNet: A Dissecting Approach.” <i>ICASSP 2020 Virtual Barcelona Spain</i>, 2020.","chicago":"Heitkaemper, Jens, Darius Jakobeit, Christoph Boeddeker, Lukas Drude, and Reinhold Haeb-Umbach. “Demystifying TasNet: A Dissecting Approach.” In <i>ICASSP 2020 Virtual Barcelona Spain</i>, 2020.","ieee":"J. Heitkaemper, D. Jakobeit, C. Boeddeker, L. Drude, and R. Haeb-Umbach, “Demystifying TasNet: A Dissecting Approach,” 2020.","ama":"Heitkaemper J, Jakobeit D, Boeddeker C, Drude L, Haeb-Umbach R. Demystifying TasNet: A Dissecting Approach. In: <i>ICASSP 2020 Virtual Barcelona Spain</i>. ; 2020."},"language":[{"iso":"eng"}],"ddc":["000"],"keyword":["voice activity detection","speech activity detection","neural network","statistical speech processing"],"publication":"ICASSP 2020 Virtual Barcelona Spain","file":[{"success":1,"relation":"main_file","content_type":"application/pdf","file_size":3871374,"access_level":"closed","file_id":"20699","file_name":"ms.pdf","date_updated":"2020-12-11T12:36:37Z","creator":"jensheit","date_created":"2020-12-11T12:36:37Z"}],"abstract":[{"text":"In recent years time domain speech separation has excelled over frequency domain separation in single channel scenarios and noise-free environments. In this paper we dissect the gains of the time-domain audio separation network (TasNet) approach by gradually replacing components of an utterance-level permutation invariant training (u-PIT) based separation system in the frequency domain until the TasNet system is reached, thus blending components of frequency domain approaches with those of time domain approaches. Some of the intermediate variants achieve comparable signal-to-distortion ratio (SDR) gains to TasNet, but retain the advantage of frequency domain processing: compatibility with classic signal processing tools such as frequency-domain beamforming and the human interpretability of the masks. Furthermore, we show that the scale invariant signal-to-distortion ratio (si-SDR) criterion used as loss function in TasNet is related to a logarithmic mean square error criterion and that it is this criterion which contributes most reliable to the performance advantage of TasNet. Finally, we critically assess which gains in a noise-free single channel environment generalize to more realistic reverberant conditions.","lang":"eng"}],"date_created":"2020-11-25T14:56:53Z","title":"Demystifying TasNet: A Dissecting Approach","quality_controlled":"1","year":"2020"},{"date_created":"2020-11-25T15:03:19Z","author":[{"last_name":"Heitkaemper","id":"27643","full_name":"Heitkaemper, Jens","first_name":"Jens"},{"first_name":"Joerg","full_name":"Schmalenstroeer, Joerg","id":"460","last_name":"Schmalenstroeer"},{"first_name":"Reinhold","id":"242","full_name":"Haeb-Umbach, Reinhold","last_name":"Haeb-Umbach"}],"date_updated":"2023-10-26T08:28:49Z","title":"Statistical and Neural Network Based Speech Activity Detection in Non-Stationary Acoustic Environments","has_accepted_license":"1","citation":{"chicago":"Heitkaemper, Jens, Joerg Schmalenstroeer, and Reinhold Haeb-Umbach. “Statistical and Neural Network Based Speech Activity Detection in Non-Stationary Acoustic Environments.” In <i>INTERSPEECH 2020 Virtual Shanghai China</i>, 2020.","ieee":"J. Heitkaemper, J. Schmalenstroeer, and R. Haeb-Umbach, “Statistical and Neural Network Based Speech Activity Detection in Non-Stationary Acoustic Environments,” 2020.","ama":"Heitkaemper J, Schmalenstroeer J, Haeb-Umbach R. Statistical and Neural Network Based Speech Activity Detection in Non-Stationary Acoustic Environments. In: <i>INTERSPEECH 2020 Virtual Shanghai China</i>. ; 2020.","apa":"Heitkaemper, J., Schmalenstroeer, J., &#38; Haeb-Umbach, R. (2020). Statistical and Neural Network Based Speech Activity Detection in Non-Stationary Acoustic Environments. <i>INTERSPEECH 2020 Virtual Shanghai China</i>.","bibtex":"@inproceedings{Heitkaemper_Schmalenstroeer_Haeb-Umbach_2020, title={Statistical and Neural Network Based Speech Activity Detection in Non-Stationary Acoustic Environments}, booktitle={INTERSPEECH 2020 Virtual Shanghai China}, author={Heitkaemper, Jens and Schmalenstroeer, Joerg and Haeb-Umbach, Reinhold}, year={2020} }","mla":"Heitkaemper, Jens, et al. “Statistical and Neural Network Based Speech Activity Detection in Non-Stationary Acoustic Environments.” <i>INTERSPEECH 2020 Virtual Shanghai China</i>, 2020.","short":"J. Heitkaemper, J. Schmalenstroeer, R. Haeb-Umbach, in: INTERSPEECH 2020 Virtual Shanghai China, 2020."},"year":"2020","user_id":"460","department":[{"_id":"54"}],"project":[{"name":"Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"_id":"20505","language":[{"iso":"eng"}],"file_date_updated":"2020-12-11T12:33:04Z","ddc":["000"],"keyword":["voice activity detection","speech activity detection","neural network","statistical speech processing"],"type":"conference","publication":"INTERSPEECH 2020 Virtual Shanghai China","file":[{"access_level":"closed","file_name":"ms.pdf","file_id":"20697","file_size":998706,"date_created":"2020-12-11T12:33:04Z","creator":"jensheit","date_updated":"2020-12-11T12:33:04Z","relation":"main_file","success":1,"content_type":"application/pdf"}],"status":"public","abstract":[{"text":"Speech activity detection (SAD), which often rests on the fact that the noise is \"more'' stationary than speech, is particularly challenging in non-stationary environments, because the time variance of the acoustic scene makes it difficult to discriminate  speech from noise. We propose two approaches to SAD, where one is based on statistical signal processing, while the other utilizes neural networks. The former employs sophisticated signal processing to track the noise and speech energies and is meant to support the case for a resource efficient, unsupervised signal processing approach.\r\nThe latter introduces a recurrent network layer that operates on short segments of the input speech to do temporal smoothing in the presence of non-stationary noise. The systems are tested on the Fearless Steps challenge database, which consists of the transmission data from the Apollo-11 space mission.\r\nThe statistical SAD  achieves comparable detection performance to earlier proposed neural network based SADs, while the neural network based approach leads to a decision cost function of 1.07% on the evaluation set of the 2020 Fearless Steps Challenge, which sets a new state of the art.","lang":"eng"}]},{"author":[{"full_name":"Vu, Dang Hai Tran","last_name":"Vu","first_name":"Dang Hai Tran"},{"last_name":"Haeb-Umbach","id":"242","full_name":"Haeb-Umbach, Reinhold","first_name":"Reinhold"}],"date_created":"2019-07-12T05:30:45Z","date_updated":"2022-01-06T06:51:12Z","doi":"10.1109/ICASSP.2013.6637771","title":"Using the turbo principle for exploiting temporal and spectral correlations in speech presence probability estimation","publication_identifier":{"issn":["1520-6149"]},"page":"863-867","citation":{"bibtex":"@inproceedings{Vu_Haeb-Umbach_2013, title={Using the turbo principle for exploiting temporal and spectral correlations in speech presence probability estimation}, DOI={<a href=\"https://doi.org/10.1109/ICASSP.2013.6637771\">10.1109/ICASSP.2013.6637771</a>}, booktitle={38th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013)}, author={Vu, Dang Hai Tran and Haeb-Umbach, Reinhold}, year={2013}, pages={863–867} }","short":"D.H.T. Vu, R. Haeb-Umbach, in: 38th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013), 2013, pp. 863–867.","mla":"Vu, Dang Hai Tran, and Reinhold Haeb-Umbach. “Using the Turbo Principle for Exploiting Temporal and Spectral Correlations in Speech Presence Probability Estimation.” <i>38th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013)</i>, 2013, pp. 863–67, doi:<a href=\"https://doi.org/10.1109/ICASSP.2013.6637771\">10.1109/ICASSP.2013.6637771</a>.","apa":"Vu, D. H. T., &#38; Haeb-Umbach, R. (2013). Using the turbo principle for exploiting temporal and spectral correlations in speech presence probability estimation. In <i>38th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013)</i> (pp. 863–867). <a href=\"https://doi.org/10.1109/ICASSP.2013.6637771\">https://doi.org/10.1109/ICASSP.2013.6637771</a>","ama":"Vu DHT, Haeb-Umbach R. Using the turbo principle for exploiting temporal and spectral correlations in speech presence probability estimation. In: <i>38th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013)</i>. ; 2013:863-867. doi:<a href=\"https://doi.org/10.1109/ICASSP.2013.6637771\">10.1109/ICASSP.2013.6637771</a>","chicago":"Vu, Dang Hai Tran, and Reinhold Haeb-Umbach. “Using the Turbo Principle for Exploiting Temporal and Spectral Correlations in Speech Presence Probability Estimation.” In <i>38th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013)</i>, 863–67, 2013. <a href=\"https://doi.org/10.1109/ICASSP.2013.6637771\">https://doi.org/10.1109/ICASSP.2013.6637771</a>.","ieee":"D. H. T. Vu and R. Haeb-Umbach, “Using the turbo principle for exploiting temporal and spectral correlations in speech presence probability estimation,” in <i>38th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013)</i>, 2013, pp. 863–867."},"year":"2013","department":[{"_id":"54"}],"user_id":"44006","_id":"11917","language":[{"iso":"eng"}],"keyword":["correlation methods","estimation theory","hidden Markov models","iterative methods","probability","spectral analysis","speech processing","2D HMM","SPP estimates","iterative algorithm","posterior probability estimation","spectral correlation","speech presence probability estimation","state-of-the-art SPP estimation algorithm","temporal correlation","turbo principle","two-dimensional hidden Markov model","Correlation","Decoding","Estimation","Iterative decoding","Noise","Speech","Vectors"],"publication":"38th International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013)","type":"conference","status":"public","abstract":[{"lang":"eng","text":"In this paper we present a speech presence probability (SPP) estimation algorithmwhich exploits both temporal and spectral correlations of speech. To this end, the SPP estimation is formulated as the posterior probability estimation of the states of a two-dimensional (2D) Hidden Markov Model (HMM). We derive an iterative algorithm to decode the 2D-HMM which is based on the turbo principle. The experimental results show that indeed the SPP estimates improve from iteration to iteration, and further clearly outperform another state-of-the-art SPP estimation algorithm."}]},{"abstract":[{"text":"This paper investigates the influence of feedback provided by an autonomous robot (BIRON) on users’ discursive behavior. A user study is described during which users show objects to the robot. The results of the experiment indicate, that the robot’s verbal feedback utterances cause the humans to adapt their own way of speaking. The changes in users’ verbal behavior are due to their beliefs about the robots knowledge and abilities. In this paper they are identified and grouped. Moreover, the data implies variations in user behavior regarding gestures. Unlike speech, the robot was not able to give feedback with gestures. Due to the lack of feedback, users did not seem to have a consistent mental representation of the robot’s abilities to recognize gestures. As a result, changes between different gestures are interpreted to be unconscious variations accompanying speech.","lang":"eng"}],"status":"public","type":"conference","keyword":["discursive behavior","autonomous robot","BIRON","man-machine systems","robot abilities","robot knowledge","user gestures","robot verbal feedback utterance","speech processing","user verbal behavior","service robots","human-robot interaction","human computer interaction","gesture recognition"],"language":[{"iso":"eng"}],"_id":"17278","department":[{"_id":"749"}],"user_id":"14931","year":"2008","page":"3481-3486","citation":{"mla":"Lohse, Manja, et al. <i>“Try Something Else!” — When Users Change Their Discursive Behavior in Human-Robot Interaction</i>. 2008, pp. 3481–86, doi:<a href=\"https://doi.org/10.1109/ROBOT.2008.4543743\">10.1109/ROBOT.2008.4543743</a>.","short":"M. Lohse, K. Rohlfing, B. Wrede, G. Sagerer, in: 2008, pp. 3481–3486.","bibtex":"@inproceedings{Lohse_Rohlfing_Wrede_Sagerer_2008, title={“Try something else!” — When users change their discursive behavior in human-robot interaction}, DOI={<a href=\"https://doi.org/10.1109/ROBOT.2008.4543743\">10.1109/ROBOT.2008.4543743</a>}, author={Lohse, Manja and Rohlfing, Katharina and Wrede, Britta and Sagerer, Gerhard}, year={2008}, pages={3481–3486} }","apa":"Lohse, M., Rohlfing, K., Wrede, B., &#38; Sagerer, G. (2008). <i>“Try something else!” — When users change their discursive behavior in human-robot interaction</i>. 3481–3486. <a href=\"https://doi.org/10.1109/ROBOT.2008.4543743\">https://doi.org/10.1109/ROBOT.2008.4543743</a>","ama":"Lohse M, Rohlfing K, Wrede B, Sagerer G. “Try something else!” — When users change their discursive behavior in human-robot interaction. In: ; 2008:3481-3486. doi:<a href=\"https://doi.org/10.1109/ROBOT.2008.4543743\">10.1109/ROBOT.2008.4543743</a>","ieee":"M. Lohse, K. Rohlfing, B. Wrede, and G. Sagerer, “‘Try something else!’ — When users change their discursive behavior in human-robot interaction,” 2008, pp. 3481–3486, doi: <a href=\"https://doi.org/10.1109/ROBOT.2008.4543743\">10.1109/ROBOT.2008.4543743</a>.","chicago":"Lohse, Manja, Katharina Rohlfing, Britta Wrede, and Gerhard Sagerer. “‘Try Something Else!’ — When Users Change Their Discursive Behavior in Human-Robot Interaction,” 3481–86, 2008. <a href=\"https://doi.org/10.1109/ROBOT.2008.4543743\">https://doi.org/10.1109/ROBOT.2008.4543743</a>."},"publication_identifier":{"isbn":["1050-4729"]},"title":"“Try something else!” — When users change their discursive behavior in human-robot interaction","doi":"10.1109/ROBOT.2008.4543743","date_updated":"2023-02-01T13:08:20Z","date_created":"2020-06-24T13:02:49Z","author":[{"full_name":"Lohse, Manja","last_name":"Lohse","first_name":"Manja"},{"first_name":"Katharina","last_name":"Rohlfing","id":"50352","full_name":"Rohlfing, Katharina"},{"last_name":"Wrede","full_name":"Wrede, Britta","first_name":"Britta"},{"full_name":"Sagerer, Gerhard","last_name":"Sagerer","first_name":"Gerhard"}]},{"citation":{"chicago":"Plessl, Christian, and Simon Maurer. <i>Hardware/Software Codesign in Speech Compression Applications</i>. Computer Engineering and Networks Lab, ETH Zurich, Switzerland, 2000.","ieee":"C. Plessl and S. Maurer, <i>Hardware/Software Codesign in Speech Compression Applications</i>. Computer Engineering and Networks Lab, ETH Zurich, Switzerland, 2000.","ama":"Plessl C, Maurer S. <i>Hardware/Software Codesign in Speech Compression Applications</i>. Computer Engineering and Networks Lab, ETH Zurich, Switzerland; 2000.","short":"C. Plessl, S. Maurer, Hardware/Software Codesign in Speech Compression Applications, Computer Engineering and Networks Lab, ETH Zurich, Switzerland, 2000.","mla":"Plessl, Christian, and Simon Maurer. <i>Hardware/Software Codesign in Speech Compression Applications</i>. Computer Engineering and Networks Lab, ETH Zurich, Switzerland, 2000.","bibtex":"@book{Plessl_Maurer_2000, title={Hardware/Software Codesign in Speech Compression Applications}, publisher={Computer Engineering and Networks Lab, ETH Zurich, Switzerland}, author={Plessl, Christian and Maurer, Simon}, year={2000} }","apa":"Plessl, C., &#38; Maurer, S. (2000). <i>Hardware/Software Codesign in Speech Compression Applications</i>. Computer Engineering and Networks Lab, ETH Zurich, Switzerland."},"year":"2000","date_created":"2018-04-17T15:56:00Z","author":[{"first_name":"Christian","orcid":"0000-0001-5728-9982","last_name":"Plessl","full_name":"Plessl, Christian","id":"16153"},{"first_name":"Simon","full_name":"Maurer, Simon","last_name":"Maurer"}],"publisher":"Computer Engineering and Networks Lab, ETH Zurich, Switzerland","date_updated":"2022-01-06T06:56:17Z","title":"Hardware/Software Codesign in Speech Compression Applications","type":"mastersthesis","status":"public","department":[{"_id":"518"}],"user_id":"24135","_id":"2433","keyword":["co-design","speech processing"]}]
