[{"project":[{"name":"WestAI - AI Service Center West","_id":"512"}],"_id":"62163","user_id":"62152","department":[{"_id":"54"}],"language":[{"iso":"eng"}],"type":"conference","publication":"Proceedings of the 16th ITG Conference on Speech Communication","editor":[{"full_name":"Möller, Sebastian","last_name":"Möller","first_name":"Sebastian"},{"first_name":"Timo","full_name":"Gerkmann, Timo","last_name":"Gerkmann"},{"full_name":"Kolossa, Dorothea","last_name":"Kolossa","first_name":"Dorothea"}],"abstract":[{"lang":"eng","text":"Zero-shot classifiers based on Contrastive Language-Audio Pretraining (CLAP) models enable classification of given audio into classes defined at test time using text. These models are costly to run with respect to computation and memory requirements. In this work, we propose to build a specialized low-resource classifier for classes pre-defined using text, using a two-stage procedure consisting of zero-shot data set pruning and model compression. First, relevant in-domain data is selected from a source dataset using class label embeddings obtained from a pre-trained CLAP model. This data is then used to distill the audio encoder of a CLAP model. The proposed compression method produces compact audio encoders with slightly reduced accuracy. Note that neither labeled nor unlabeled in-domain audio data is required for its development. We verify by cross-dataset tests that the resulting classifiers are indeed specialized to their task."}],"status":"public","date_updated":"2025-11-28T13:20:17Z","author":[{"last_name":"Werning","id":"62152","full_name":"Werning, Alexander","first_name":"Alexander"},{"last_name":"Häb-Umbach","full_name":"Häb-Umbach, Reinhold","id":"242","first_name":"Reinhold"}],"date_created":"2025-11-11T11:46:42Z","title":"A Fully Zero-Shot Approach to Obtaining Specialized and Compact Audio Tagging Models","conference":{"start_date":"2025-09-24","name":"16th ITG Conference on Speech Communication","location":"Berlin","end_date":"2025-09-26"},"publication_status":"published","quality_controlled":"1","publication_identifier":{"unknown":["978-3-8007-6617-8"]},"place":"Berlin","year":"2025","citation":{"chicago":"Werning, Alexander, and Reinhold Häb-Umbach. “A Fully Zero-Shot Approach to Obtaining Specialized and Compact Audio Tagging Models.” In <i>Proceedings of the 16th ITG Conference on Speech Communication</i>, edited by Sebastian Möller, Timo Gerkmann, and Dorothea Kolossa, 76–80. Berlin, 2025.","ieee":"A. Werning and R. Häb-Umbach, “A Fully Zero-Shot Approach to Obtaining Specialized and Compact Audio Tagging Models,” in <i>Proceedings of the 16th ITG Conference on Speech Communication</i>, Berlin, 2025, pp. 76–80.","ama":"Werning A, Häb-Umbach R. A Fully Zero-Shot Approach to Obtaining Specialized and Compact Audio Tagging Models. In: Möller S, Gerkmann T, Kolossa D, eds. <i>Proceedings of the 16th ITG Conference on Speech Communication</i>. ; 2025:76-80.","mla":"Werning, Alexander, and Reinhold Häb-Umbach. “A Fully Zero-Shot Approach to Obtaining Specialized and Compact Audio Tagging Models.” <i>Proceedings of the 16th ITG Conference on Speech Communication</i>, edited by Sebastian Möller et al., 2025, pp. 76–80.","short":"A. Werning, R. Häb-Umbach, in: S. Möller, T. Gerkmann, D. Kolossa (Eds.), Proceedings of the 16th ITG Conference on Speech Communication, Berlin, 2025, pp. 76–80.","bibtex":"@inproceedings{Werning_Häb-Umbach_2025, place={Berlin}, title={A Fully Zero-Shot Approach to Obtaining Specialized and Compact Audio Tagging Models}, booktitle={Proceedings of the 16th ITG Conference on Speech Communication}, author={Werning, Alexander and Häb-Umbach, Reinhold}, editor={Möller, Sebastian and Gerkmann, Timo and Kolossa, Dorothea}, year={2025}, pages={76–80} }","apa":"Werning, A., &#38; Häb-Umbach, R. (2025). A Fully Zero-Shot Approach to Obtaining Specialized and Compact Audio Tagging Models. In S. Möller, T. Gerkmann, &#38; D. Kolossa (Eds.), <i>Proceedings of the 16th ITG Conference on Speech Communication</i> (pp. 76–80)."},"page":"76-80"},{"publication":"Proceedings of DAS|DAGA 2025","abstract":[{"text":"Running state-of-the-art large-scale audio models on edge devices is often infeasible due to their limited storage and computing resources. It is therefore necessary to compress and tune the models for the specific target task and hardware. This is commonly achieved by distilling the audio model, the teacher, to a small target model, the student. However, this approach can be improved by prepending a dataset pruning stage and training the teacher on the pruned data set only, which contains examples relevant to the target task. Recently, CLAP models have emerged that embed audio and text examples in a common embedding space. We use the audio embeddings of the CLAP model for the above pruning stage, which is realized using a domain classifier. After knowledge distillation, the student is eventually fine-tuned on some data from the target domain. The CLAP architecture combines text and audio embedding spaces, which allows to search for data given only a textual description, such as a class label. We show how this can help data pruning.","lang":"eng"}],"ddc":["004"],"language":[{"iso":"eng"}],"year":"2025","corporate_editor":["Deutsche Gesellschaft für Akustik e.V. (DEGA), Berlin, 2025"],"date_created":"2025-05-14T13:18:10Z","title":"Distilling Efficient Audio Models using Data Pruning with CLAP","type":"conference","status":"public","_id":"59900","project":[{"_id":"512","name":"WestAI - AI Service Center West"}],"department":[{"_id":"54"}],"user_id":"62152","has_accepted_license":"1","publication_identifier":{"unknown":["978-3-939296-23-2"]},"publication_status":"published","place":"Copenhagen","citation":{"ama":"Werning A, Häb-Umbach R. Distilling Efficient Audio Models using Data Pruning with CLAP. In: Deutsche Gesellschaft für Akustik e.V. (DEGA), Berlin, 2025, ed. <i>Proceedings of DAS|DAGA 2025</i>. ; 2025.","ieee":"A. Werning and R. Häb-Umbach, “Distilling Efficient Audio Models using Data Pruning with CLAP,” in <i>Proceedings of DAS|DAGA 2025</i>, Copenhagen, 2025.","chicago":"Werning, Alexander, and Reinhold Häb-Umbach. “Distilling Efficient Audio Models Using Data Pruning with CLAP.” In <i>Proceedings of DAS|DAGA 2025</i>, edited by Deutsche Gesellschaft für Akustik e.V. (DEGA), Berlin, 2025. Copenhagen, 2025.","apa":"Werning, A., &#38; Häb-Umbach, R. (2025). Distilling Efficient Audio Models using Data Pruning with CLAP. In Deutsche Gesellschaft für Akustik e.V. (DEGA), Berlin, 2025 (Ed.), <i>Proceedings of DAS|DAGA 2025</i>.","mla":"Werning, Alexander, and Reinhold Häb-Umbach. “Distilling Efficient Audio Models Using Data Pruning with CLAP.” <i>Proceedings of DAS|DAGA 2025</i>, edited by Deutsche Gesellschaft für Akustik e.V. (DEGA), Berlin, 2025, 2025.","short":"A. Werning, R. Häb-Umbach, in: Deutsche Gesellschaft für Akustik e.V. (DEGA), Berlin, 2025 (Ed.), Proceedings of DAS|DAGA 2025, Copenhagen, 2025.","bibtex":"@inproceedings{Werning_Häb-Umbach_2025, place={Copenhagen}, title={Distilling Efficient Audio Models using Data Pruning with CLAP}, booktitle={Proceedings of DAS|DAGA 2025}, author={Werning, Alexander and Häb-Umbach, Reinhold}, editor={Deutsche Gesellschaft für Akustik e.V. (DEGA), Berlin, 2025}, year={2025} }"},"date_updated":"2025-11-28T13:21:13Z","author":[{"last_name":"Werning","id":"62152","full_name":"Werning, Alexander","first_name":"Alexander"},{"last_name":"Häb-Umbach","full_name":"Häb-Umbach, Reinhold","id":"242","first_name":"Reinhold"}],"conference":{"end_date":"2025-03-20","location":"Copenhagen","name":"DAS|DAGA 2025 - 51st Annual Meeting on Acoustics","start_date":"2025-03-17"}},{"citation":{"apa":"Werning, A., &#38; Haeb-Umbach, R. (2024). <i>UPB-NT submission to DCASE24: Dataset pruning for targeted knowledge distillation</i>.","short":"A. Werning, R. Haeb-Umbach, UPB-NT Submission to DCASE24: Dataset Pruning for Targeted Knowledge Distillation, 2024.","bibtex":"@book{Werning_Haeb-Umbach_2024, title={UPB-NT submission to DCASE24: Dataset pruning for targeted knowledge distillation}, author={Werning, Alexander and Haeb-Umbach, Reinhold}, year={2024} }","mla":"Werning, Alexander, and Reinhold Haeb-Umbach. <i>UPB-NT Submission to DCASE24: Dataset Pruning for Targeted Knowledge Distillation</i>. 2024.","ieee":"A. Werning and R. Haeb-Umbach, <i>UPB-NT submission to DCASE24: Dataset pruning for targeted knowledge distillation</i>. 2024.","chicago":"Werning, Alexander, and Reinhold Haeb-Umbach. <i>UPB-NT Submission to DCASE24: Dataset Pruning for Targeted Knowledge Distillation</i>, 2024.","ama":"Werning A, Haeb-Umbach R. <i>UPB-NT Submission to DCASE24: Dataset Pruning for Targeted Knowledge Distillation</i>.; 2024."},"year":"2024","author":[{"first_name":"Alexander","id":"62152","full_name":"Werning, Alexander","last_name":"Werning"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242"}],"date_created":"2024-11-18T09:44:46Z","date_updated":"2024-11-18T09:45:14Z","title":"UPB-NT submission to DCASE24: Dataset pruning for targeted knowledge distillation","type":"report","status":"public","department":[{"_id":"54"}],"user_id":"62152","_id":"57161","project":[{"name":"WestAI - AI Service Center West","_id":"512"}],"language":[{"iso":"eng"}]},{"file":[{"file_id":"57200","file_name":"Eusipco__Target_specific_Dataset_Pruning_for_Compression_of_Audio_Tagging_Models.pdf","access_level":"closed","file_size":183539,"date_created":"2024-11-18T12:10:09Z","creator":"awerning","date_updated":"2024-11-18T12:10:09Z","relation":"main_file","success":1,"content_type":"application/pdf"}],"status":"public","abstract":[{"lang":"eng","text":"Large audio tagging models are usually trained or pre-trained on AudioSet, a dataset that encompasses a large amount of different sound classes and acoustic environments. Knowledge distillation has emerged as a method to compress such models without compromising their effectiveness. There are many different applications for audio tagging, some of which require a specialization to a narrow domain of sounds to be classified. For these scenarios, it is beneficial to distill the large audio tagger with respect to a specific subset of sounds of interest. A method to prune a general dataset with respect to a target dataset is presented. By distilling with such a specialized pruned dataset, we obtain a compressed model with better classification accuracy in the specific target domain than with target-agnostic distillation."}],"type":"conference","publication":"32nd European Signal Processing Conference (EUSIPCO 2024)","language":[{"iso":"eng"}],"file_date_updated":"2024-11-18T12:10:09Z","ddc":["000"],"keyword":["data pruning","knowledge distillation","audio tagging"],"user_id":"62152","department":[{"_id":"54"}],"project":[{"name":"WestAI - AI Service Center West","_id":"512"}],"_id":"57160","citation":{"apa":"Werning, A., &#38; Haeb-Umbach, R. (2024). Target-Specific Dataset Pruning for Compression of Audio Tagging Models. <i>32nd European Signal Processing Conference (EUSIPCO 2024)</i>. 32nd European Signal Processing Conference, Lyon.","short":"A. Werning, R. Haeb-Umbach, in: 32nd European Signal Processing Conference (EUSIPCO 2024), 2024.","mla":"Werning, Alexander, and Reinhold Haeb-Umbach. “Target-Specific Dataset Pruning for Compression of Audio Tagging Models.” <i>32nd European Signal Processing Conference (EUSIPCO 2024)</i>, 2024.","bibtex":"@inproceedings{Werning_Haeb-Umbach_2024, title={Target-Specific Dataset Pruning for Compression of Audio Tagging Models}, booktitle={32nd European Signal Processing Conference (EUSIPCO 2024)}, author={Werning, Alexander and Haeb-Umbach, Reinhold}, year={2024} }","chicago":"Werning, Alexander, and Reinhold Haeb-Umbach. “Target-Specific Dataset Pruning for Compression of Audio Tagging Models.” In <i>32nd European Signal Processing Conference (EUSIPCO 2024)</i>, 2024.","ieee":"A. Werning and R. Haeb-Umbach, “Target-Specific Dataset Pruning for Compression of Audio Tagging Models,” presented at the 32nd European Signal Processing Conference, Lyon, 2024.","ama":"Werning A, Haeb-Umbach R. Target-Specific Dataset Pruning for Compression of Audio Tagging Models. In: <i>32nd European Signal Processing Conference (EUSIPCO 2024)</i>. ; 2024."},"year":"2024","has_accepted_license":"1","quality_controlled":"1","conference":{"location":"Lyon","name":"32nd European Signal Processing Conference"},"title":"Target-Specific Dataset Pruning for Compression of Audio Tagging Models","date_created":"2024-11-18T09:29:16Z","author":[{"first_name":"Alexander","last_name":"Werning","id":"62152","full_name":"Werning, Alexander"},{"first_name":"Reinhold","last_name":"Haeb-Umbach","full_name":"Haeb-Umbach, Reinhold","id":"242"}],"date_updated":"2025-11-28T13:22:00Z"}]
