@inproceedings{11889, abstract = {{In this paper we propose to jointly consider Segmental Dynamic Time Warping and distance clustering for the unsupervised learning of acoustic events. As a result, the computational complexity increases only linearly with the dababase size compared to a quadratic increase in a sequential setup, where all pairwise SDTW distances between segments are computed prior to clustering. Further, we discuss options for seed value selection for clustering and show that drawing seeds with a probability proportional to the distance from the already drawn seeds, known as K-means++ clustering, results in a significantly higher probability of finding representatives of each of the underlying classes, compared to the commonly used draws from a uniform distribution. Experiments are performed on an acoustic event classification and an isolated digit recognition task, where on the latter the final word accuracy approaches that of supervised training.}}, author = {{Schmalenstroeer, Joerg and Bartek, Markus and Haeb-Umbach, Reinhold}}, booktitle = {{Interspeech 2011}}, title = {{{Unsupervised learning of acoustic events using dynamic time warping and hierarchical K-means++ clustering}}}, year = {{2011}}, }