@article{45673,
  author       = {{Merten, Marie-Luis and Wever, Marcel and Tophinke, Doris and Geierhos, Michaela and Hüllermeier, Eyke}},
  journal      = {{International Journal of Corpus Linguistics}},
  title        = {{{Annotation uncertainty in the context of grammatical change}}},
  doi          = {{https://doi.org/10.1075/ijcl.20113.mer}},
  year         = {{2023}},
}

@unpublished{17605,
  abstract     = {{Syntactic annotation of corpora in the form of part-of-speech (POS) tags is a key requirement for both linguistic research and subsequent automated natural language processing (NLP) tasks. This problem is commonly tackled using machine learning methods, i.e., by training a POS tagger on a sufficiently large corpus of labeled data. 
While the problem of POS tagging can essentially be considered as solved for modern languages, historical corpora turn out to be much more difficult, especially due to the lack of native speakers and sparsity of training data. Moreover, most texts have no sentences as we know them today, nor a common orthography.
These irregularities render the task of automated POS tagging more difficult and error-prone. Under these circumstances, instead  of forcing the POS tagger to predict and commit to a single tag, it should be enabled to express its uncertainty. In this paper, we consider POS tagging within the framework of set-valued prediction, which allows the POS tagger to express its uncertainty via predicting a set of candidate POS tags instead of guessing a single one. The goal is to guarantee a high confidence that the correct POS tag is included while keeping the number of candidates small.
In our experimental study, we find that extending state-of-the-art POS taggers to set-valued prediction yields more precise and robust taggings, especially for unknown words, i.e., words not occurring in the training data.}},
  author       = {{Heid, Stefan Helmut and Wever, Marcel Dominik and Hüllermeier, Eyke}},
  booktitle    = {{Journal of Data Mining and Digital Humanities}},
  publisher    = {{episciences}},
  title        = {{{Reliable Part-of-Speech Tagging of Historical Corpora through Set-Valued Prediction}}},
  year         = {{2020}},
}

@article{17565,
  author       = {{Merten, Marie-Luis and Seemann, Nina and Wever, Marcel Dominik}},
  journal      = {{Niederdeutsches Jahrbuch}},
  number       = {{142}},
  pages        = {{124--146}},
  title        = {{{Grammatikwandel digital-kulturwissenschaftlich erforscht. Mittelniederdeutscher Sprachausbau im interdisziplinären Zugriff}}},
  year         = {{2019}},
}

@inproceedings{8529,
  author       = {{Seemann, Nina and Merten, Marie-Luis}},
  booktitle    = {{DHd 2019 Digital Humanities: multimedial & multimodal. Konferenzabstracts}},
  editor       = {{Sahle, Patrick}},
  isbn         = {{978-3-00-062166-6}},
  location     = {{Mainz and Frankfurt am Main, Germany}},
  pages        = {{352--353}},
  publisher    = {{Zenodo}},
  title        = {{{UPB-Annotate: Ein maßgeschneidertes Toolkit für historische Texte}}},
  doi          = {{10.5281/ZENODO.2596094}},
  year         = {{2019}},
}

@article{18155,
  author       = {{Merten, Marie-Luis and Tophinke, Doris}},
  journal      = {{Jahrbuch für Germanistische Sprachgeschichte}},
  pages        = {{303 -- 323}},
  title        = {{{Interaktive Analyse historischen Grammatikwandels. Konstruktionsgrammatik trifft auf machine learning}}},
  volume       = {{10 (1)}},
  year         = {{2019}},
}

@inproceedings{4345,
  abstract     = {{This paper presents the various sources of uncertainty we encounter in our project. Our research focus lies on the investigation of language elaboration processes in Middle Low German. We are particularly interested in diachronic constructional changes and constructionalizations involving and affecting all linguistic dimensions. For this, it is necessary to annotate our corpus with Part-of-Speech and constructional tags. Here, we are confronted with gradualness, gradience, and ambiguity as potential sources of uncertainty that complicate the annotation process. Furthermore, due to the historicity of the investigated language, we expect cases of incomplete knowledge and comparative fallacy from the annotators. For this reason, we develop an interface that captures all annotators’ doubts.}},
  author       = {{Merten, Marie-Luis and Seemann, Nina}},
  booktitle    = {{Proceedings of the 6th International Conference on Technological Ecosystems for Enhancing Multiculturality (TEEM'18)}},
  editor       = {{García-Peñalvo, Francisco José}},
  isbn         = {{978-1-4503-6518-5}},
  keywords     = {{historical languages, linguistic annotations, gradience and gradualness, ambiguity, incomplete knowledge}},
  location     = {{Salamanca, Spain}},
  pages        = {{819--825}},
  publisher    = {{ACM}},
  title        = {{{Analysing Constructional Change: Linguistic Annotation and Sources of Uncertainty}}},
  doi          = {{10.1145/3284179.3284320}},
  year         = {{2018}},
}

@inproceedings{1379,
  author       = {{Seemann, Nina and Geierhos, Michaela and Merten, Marie-Luis and Tophinke, Doris and Wever, Marcel Dominik and Hüllermeier, Eyke}},
  booktitle    = {{Postersession Computerlinguistik der 40. Jahrestagung der Deutschen Gesellschaft für Sprachwissenschaft}},
  editor       = {{Eckart, Kerstin  and Schlechtweg, Dominik }},
  location     = {{Stuttgart, Germany}},
  title        = {{{Supporting the Cognitive Process in Annotation Tasks}}},
  year         = {{2018}},
}

@inproceedings{1158,
  abstract     = {{In this paper, we present the annotation challenges we have encountered when working on a historical language that was undergoing elaboration processes. We especially focus on syntactic ambiguity and gradience in Middle Low German, which causes uncertainty to some extent. Since current annotation tools consider construction contexts and the dynamics of the grammaticalization only partially, we plan to extend CorA – a web-based annotation tool for historical and other non-standard language data – to capture elaboration phenomena and annotator unsureness. Moreover, we seek to interactively learn morphological as well as syntactic annotations.}},
  author       = {{Seemann, Nina and Merten, Marie-Luis and Geierhos, Michaela and Tophinke, Doris and Hüllermeier, Eyke}},
  booktitle    = {{Proceedings of the Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature}},
  location     = {{Vancouver, BC, Canada}},
  pages        = {{40--45}},
  publisher    = {{Association for Computational Linguistics (ACL)}},
  title        = {{{Annotation Challenges for Reconstructing the Structural Elaboration of Middle Low German}}},
  doi          = {{10.18653/v1/W17-2206}},
  year         = {{2017}},
}

@inproceedings{25246,
  author       = {{Seemann, Nina and  Merten, Marie-Luis  and Geierhos, Michaela and Tophinke, Doris and  Hüllermeier, Eyke}},
  booktitle    = {{Proceedings of the Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature}},
  pages        = {{40--45}},
  title        = {{{Annotation Challenges for Reconstructing the Structural Elaboration of Middle Low German}}},
  year         = {{2017}},
}

