@phdthesis{44323, abstract = {{Reading between the lines has so far been reserved for humans. The present dissertation addresses this research gap using machine learning methods. Implicit expressions are not comprehensible by computers and cannot be localized in the text. However, many texts arise on interpersonal topics that, unlike commercial evaluation texts, often imply information only by means of longer phrases. Examples are the kindness and the attentiveness of a doctor, which are only paraphrased (“he didn’t even look me in the eye”). The analysis of such data, especially the identification and localization of implicit statements, is a research gap (1). This work uses so-called Aspect-based Sentiment Analysis as a method for this purpose. It remains open how the aspect categories to be extracted can be discovered and thematically delineated based on the data (2). Furthermore, it is not yet explored how a collection of tools should look like, with which implicit phrases can be identified and thus made explicit (3). Last, it is an open question how to correlate the identified phrases from the text data with other data, including the investigation of the relationship between quantitative scores (e.g., school grades) and the thematically related text (4). Based on these research gaps, the research question is posed as follows: Using text mining methods, how can implicit rating content be properly interpreted and thus made explicit before it is automatically categorized and quantified? The uniqueness of this dissertation is based on the automated recognition of implicit linguistic statements alongside explicit statements. These are identified in unstructured text data so that features expressed only in the text can later be compared across data sources, even though they were not included in rating categories such as stars or school grades. German-language physician ratings from websites in three countries serve as the sample domain. The solution approach consists of data creation, a pipeline for text processing and analyses based on this. In the data creation, aspect classes are identified and delineated across platforms and marked in text data. This results in six datasets with over 70,000 annotated sentences and detailed guidelines. The models that were created based on the training data extract and categorize the aspects. In addition, the sentiment polarity and the evaluation weight, i. e., the importance of each phrase, are determined. The models, which are combined in a pipeline, are used in a prototype in the form of a web application. The analyses built on the pipeline quantify the rating contents by linking the obtained information with further data, thus allowing new insights. As a result, a toolbox is provided to identify quantifiable rating content and categories using text mining for a sample domain. This is used to evaluate the approach, which in principle can also be adapted to any other domain.}}, author = {{Kersting, Joschka}}, pages = {{208}}, publisher = {{Universität der Bundeswehr München }}, title = {{{Identifizierung quantifizierbarer Bewertungsinhalte und -kategorien mittels Text Mining}}}, year = {{2023}}, } @inbook{46205, abstract = {{We present a concept for quantifying evaluative phrases to later compare rating texts numerically instead of just relying on stars or grades. We achievethis by combining deep learning models in an aspect-based sentiment analysis pipeline along with sentiment weighting, polarity, and correlation analyses that combine deep learning results with metadata. The results provide new insights for the medical field. Our application domain, physician reviews, shows that there are millions of review texts on the Internet that cannot yet be comprehensively analyzed because previous studies have focused on explicit aspects from other domains (e.g., products). We identify, extract, and classify implicit and explicit aspect phrases equally from German-language review texts. To do so, we annotated aspect phrases representing reviews on numerous aspects of a physician, medical practice, or practice staff. We apply the best performing transformer model, XLM-RoBERTa, to a large physician review dataset and correlate the results with existing metadata. As a result, we can show different correlations between the sentiment polarity of certain aspect classes (e.g., friendliness, practice equipment) and physicians’ professions (e.g., surgeon, ophthalmologist). As a result, we have individual numerical scores that contain a variety of information based on deep learning algorithms that extract textual (evaluative) information and metadata from the Web.}}, author = {{Kersting, Joschka and Geierhos, Michaela}}, booktitle = {{Data Management Technologies and Applications}}, editor = {{Cuzzocrea, Alfredo and Gusikhin, Oleg and Hammoudi, Slimane and Quix, Christoph}}, isbn = {{9783031378898}}, issn = {{1865-0929}}, pages = {{45--65}}, publisher = {{Springer Nature Switzerland}}, title = {{{Towards Comparable Ratings: Quantifying Evaluative Phrases in Physician Reviews}}}, doi = {{10.1007/978-3-031-37890-4_3}}, volume = {{1860}}, year = {{2023}}, } @inbook{32179, abstract = {{This work addresses the automatic resolution of software requirements. In the vision of On-The-Fly Computing, software services should be composed on demand, based solely on natural language input from human users. To enable this, we build a chatbot solution that works with human-in-the-loop support to receive, analyze, correct, and complete their software requirements. The chatbot is equipped with a natural language processing pipeline and a large knowledge base, as well as sophisticated dialogue management skills to enhance the user experience. Previous solutions have focused on analyzing software requirements to point out errors such as vagueness, ambiguity, or incompleteness. Our work shows how apps can collaborate with users to efficiently produce correct requirements. We developed and compared three different chatbot apps that can work with built-in knowledge. We rely on ChatterBot, DialoGPT and Rasa for this purpose. While DialoGPT provides its own knowledge base, Rasa is the best system to combine the text mining and knowledge solutions at our disposal. The evaluation shows that users accept 73% of the suggested answers from Rasa, while they accept only 63% from DialoGPT or even 36% from ChatterBot.}}, author = {{Kersting, Joschka and Ahmed, Mobeen and Geierhos, Michaela}}, booktitle = {{HCI International 2022 Posters}}, editor = {{Stephanidis, Constantine and Antona, Margherita and Ntoa, Stavroula}}, isbn = {{9783031064166}}, issn = {{1865-0929}}, keywords = {{On-The-Fly Computing, Chatbot, Knowledge Base}}, location = {{Virtual}}, pages = {{419----426}}, publisher = {{Springer International Publishing}}, title = {{{Chatbot-Enhanced Requirements Resolution for Automated Service Compositions}}}, doi = {{10.1007/978-3-031-06417-3_56}}, volume = {{1580}}, year = {{2022}}, } @inbook{17905, abstract = {{This chapter concentrates on aspect-based sentiment analysis, a form of opinion mining where algorithms detect sentiments expressed about features of products, services, etc. We especially focus on novel approaches for aspect phrase extraction and classification trained on feature-rich datasets. Here, we present two new datasets, which we gathered from the linguistically rich domain of physician reviews, as other investigations have mainly concentrated on commercial reviews and social media reviews so far. To give readers a better understanding of the underlying datasets, we describe the annotation process and inter-annotator agreement in detail. In our research, we automatically assess implicit mentions or indications of specific aspects. To do this, we propose and utilize neural network models that perform the here-defined aspect phrase extraction and classification task, achieving F1-score values of about 80% and accuracy values of more than 90%. As we apply our models to a comparatively complex domain, we obtain promising results. }}, author = {{Kersting, Joschka and Geierhos, Michaela}}, booktitle = {{Natural Language Processing in Artificial Intelligence -- NLPinAI 2020}}, editor = {{Loukanova, Roussanka}}, pages = {{163----189 }}, publisher = {{Springer}}, title = {{{Towards Aspect Extraction and Classification for Opinion Mining with Deep Sequence Networks}}}, doi = {{10.1007/978-3-030-63787-3_6}}, volume = {{939}}, year = {{2021}}, } @inproceedings{22051, author = {{Kersting, Joschka and Geierhos, Michaela}}, booktitle = {{Proceedings of the 10th International Conference on Data Science, Technology and Applications (DATA 2021)}}, location = {{Online}}, pages = {{275----284}}, publisher = {{SCITEPRESS}}, title = {{{Well-being in Plastic Surgery: Deep Learning Reveals Patients' Evaluations}}}, year = {{2021}}, } @inbook{22052, abstract = {{In this study, we describe a text processing pipeline that transforms user-generated text into structured data. To do this, we train neural and transformer-based models for aspect-based sentiment analysis. As most research deals with explicit aspects from product or service data, we extract and classify implicit and explicit aspect phrases from German-language physician review texts. Patients often rate on the basis of perceived friendliness or competence. The vocabulary is difficult, the topic sensitive, and the data user-generated. The aspect phrases come with various wordings using insertions and are not noun-based, which makes the presented case equally relevant and reality-based. To find complex, indirect aspect phrases, up-to-date deep learning approaches must be combined with supervised training data. We describe three aspect phrase datasets, one of them new, as well as a newly annotated aspect polarity dataset. Alongside this, we build an algorithm to rate the aspect phrase importance. All in all, we train eight transformers on the new raw data domain, compare 54 neural aspect extraction models and, based on this, create eight aspect polarity models for our pipeline. These models are evaluated by using Precision, Recall, and F-Score measures. Finally, we evaluate our aspect phrase importance measure algorithm.}}, author = {{Kersting, Joschka and Geierhos, Michaela}}, booktitle = {{Natural Language Processing and Information Systems}}, editor = {{Kapetanios, Epaminondas and Horacek, Helmut and Métais, Elisabeth and Meziane, Farid}}, location = {{Saarbrücken, Germany}}, pages = {{231----242}}, publisher = {{Springer}}, title = {{{Human Language Comprehension in Aspect Phrase Extraction with Importance Weighting}}}, volume = {{12801}}, year = {{2021}}, } @inbook{17347, abstract = {{Peer-to-Peer news portals allow Internet users to write news articles and make them available online to interested readers. Despite the fact that authors are free in their choice of topics, there are a number of quality characteristics that an article must meet before it is published. In addition to meaningful titles, comprehensibly written texts and meaning- ful images, relevant tags are an important criteria for the quality of such news. In this case study, we discuss the challenges and common mistakes that Peer-to-Peer reporters face when tagging news and how incorrect information can be corrected through the orchestration of existing Natu- ral Language Processing services. Lastly, we use this illustrative example to give insight into the challenges of dealing with bottom-up taxonomies.}}, author = {{Bäumer, Frederik Simon and Kersting, Joschka and Buff, Bianca and Geierhos, Michaela}}, booktitle = {{Information and Software Technologies}}, editor = {{Audrius, Lopata and Rita, Butkienė and Daina, Gudonienė and Vilma, Sukackė}}, location = {{Kaunas, Litauen}}, pages = {{368----382}}, publisher = {{Springer}}, title = {{{Tag Me If You Can: Insights into the Challenges of Supporting Unrestricted P2P News Tagging}}}, doi = {{https://doi.org/10.1007/978-3-030-59506-7_30}}, volume = {{1283}}, year = {{2020}}, } @inproceedings{18686, author = {{Kersting, Joschka and Bäumer, Frederik Simon}}, booktitle = {{PROCEEDINGS OF THE INTERNATIONAL CONFERENCE ON APPLIED COMPUTING 2020}}, keywords = {{Software Requirements, Natural Language Processing, Transfer Learning, On-The-Fly Computing}}, location = {{Lisbon, Portugal}}, pages = {{119----123}}, publisher = {{IADIS}}, title = {{{SEMANTIC TAGGING OF REQUIREMENT DESCRIPTIONS: A TRANSFORMER-BASED APPROACH}}}, year = {{2020}}, } @inproceedings{15580, abstract = {{This paper deals with aspect phrase extraction and classification in sentiment analysis. We summarize current approaches and datasets from the domain of aspect-based sentiment analysis. This domain detects sentiments expressed for individual aspects in unstructured text data. So far, mainly commercial user reviews for products or services such as restaurants were investigated. We here present our dataset consisting of German physician reviews, a sensitive and linguistically complex field. Furthermore, we describe the annotation process of a dataset for supervised learning with neural networks. Moreover, we introduce our model for extracting and classifying aspect phrases in one step, which obtains an F1-score of 80%. By applying it to a more complex domain, our approach and results outperform previous approaches.}}, author = {{Kersting, Joschka and Geierhos, Michaela}}, booktitle = {{Proceedings of the 12th International Conference on Agents and Artificial Intelligence (ICAART 2020) -- Special Session on Natural Language Processing in Artificial Intelligence (NLPinAI 2020)}}, keywords = {{Deep Learning, Natural Language Processing, Aspect-based Sentiment Analysis}}, location = {{Valetta, Malta}}, pages = {{391----400}}, publisher = {{SCITEPRESS}}, title = {{{Aspect Phrase Extraction in Sentiment Analysis with Deep Learning}}}, year = {{2020}}, } @inproceedings{15582, abstract = {{When it comes to increased digitization in the health care domain, privacy is a relevant topic nowadays. This relates to patient data, electronic health records or physician reviews published online, for instance. There exist different approaches to the protection of individuals’ privacy, which focus on the anonymization and masking of personal information subsequent to their mining. In the medical domain in particular, measures to protect the privacy of patients are of high importance due to the amount of sensitive data that is involved (e.g. age, gender, illnesses, medication). While privacy breaches in structured data can be detected more easily, disclosure in written texts is more difficult to find automatically due to the unstructured nature of natural language. Therefore, we take a detailed look at existing research on areas related to privacy protection. Likewise, we review approaches to the automatic detection of privacy disclosure in different types of medical data. We provide a survey of several studies concerned with privacy breaches in the medical domain with a focus on Physician Review Websites (PRWs). Finally, we briefly develop implications and directions for further research.}}, author = {{Buff, Bianca and Kersting, Joschka and Geierhos, Michaela}}, booktitle = {{Proceedings of the 9th International Conference on Pattern Recognition Applications and Methods (ICPRAM 2020)}}, keywords = {{Identity Disclosure, Privacy Protection, Physician Review Website, De-Anonymization, Medical Domain}}, location = {{Valetta, Malta}}, pages = {{630----637}}, publisher = {{SCITEPRESS}}, title = {{{Detection of Privacy Disclosure in the Medical Domain: A Survey}}}, year = {{2020}}, } @inproceedings{15635, author = {{Kersting, Joschka and Geierhos, Michaela}}, booktitle = {{Proceedings of the 33rd International Florida Artificial Intelligence Research Symposium (FLAIRS) Conference}}, location = {{North Miami Beach, FL, USA}}, pages = {{282----285}}, publisher = {{AAAI}}, title = {{{Neural Learning for Aspect Phrase Extraction and Classification in Sentiment Analysis}}}, year = {{2020}}, } @inproceedings{15256, abstract = {{This paper deals with online customer reviews of local multi-service providers. While many studies investigate product reviews and online labour markets with service providers delivering intangible products “over the wire”, we focus on websites where providers offer multiple distinct services that can be booked, paid and reviewed online but are performed locally offline. This type of service providers has so far been neglected in the literature. This paper analyses reviews and applies sentiment analysis. It aims to gain new insights into local multi-service providers’ performance. There is a broad literature range presented with regard to the topics addressed. The results show, among other things, that providers with good ratings continue to perform well over time. We find that many positive reviews seem to encourage sales. On average, quantitative star ratings and qualitative ratings in the form of review texts match. Further results are also achieved in this study.}}, author = {{Kersting, Joschka and Geierhos, Michaela}}, booktitle = {{Proceedings of the 9th International Conference on Pattern Recognition Applications and Methods}}, keywords = {{Customer Reviews, Sentiment Analysis, Online Labour Markets}}, location = {{Valetta, Malta}}, pages = {{263----272}}, publisher = {{SCITEPRESS}}, title = {{{What Reviews in Local Online Labour Markets Reveal about the Performance of Multi-Service Providers}}}, year = {{2020}}, } @misc{8312, author = {{Bäumer, Frederik Simon and Geierhos, Michaela}}, booktitle = {{encyclopedia.pub}}, keywords = {{OTF Computing, Natural Language Processing, Requirements Engineering}}, publisher = {{MDPI}}, title = {{{Requirements Engineering in OTF-Computing}}}, year = {{2019}}, } @article{8424, abstract = {{The vision of On-the-Fly (OTF) Computing is to compose and provide software services ad hoc, based on requirement descriptions in natural language. Since non-technical users write their software requirements themselves and in unrestricted natural language, deficits occur such as inaccuracy and incompleteness. These deficits are usually met by natural language processing methods, which have to face special challenges in OTF Computing because maximum automation is the goal. In this paper, we present current automatic approaches for solving inaccuracies and incompletenesses in natural language requirement descriptions and elaborate open challenges. In particular, we will discuss the necessity of domain-specific resources and show why, despite far-reaching automation, an intelligent and guided integration of end users into the compensation process is required. In this context, we present our idea of a chat bot that integrates users into the compensation process depending on the given circumstances. }}, author = {{Bäumer, Frederik Simon and Kersting, Joschka and Geierhos, Michaela}}, issn = {{2073-431X}}, journal = {{Computers}}, keywords = {{Inaccuracy Detection, Natural Language Software Requirements, Chat Bot}}, location = {{Vilnius, Lithuania}}, number = {{1}}, publisher = {{MDPI AG, Basel, Switzerland}}, title = {{{Natural Language Processing in OTF Computing: Challenges and the Need for Interactive Approaches}}}, doi = {{10.3390/computers8010022}}, volume = {{8}}, year = {{2019}}, } @inproceedings{8529, author = {{Seemann, Nina and Merten, Marie-Luis}}, booktitle = {{DHd 2019 Digital Humanities: multimedial & multimodal. Konferenzabstracts}}, editor = {{Sahle, Patrick}}, isbn = {{978-3-00-062166-6}}, location = {{Mainz and Frankfurt am Main, Germany}}, pages = {{352--353}}, publisher = {{Zenodo}}, title = {{{UPB-Annotate: Ein maßgeschneidertes Toolkit für historische Texte}}}, doi = {{10.5281/ZENODO.2596094}}, year = {{2019}}, } @inproceedings{8532, author = {{Bäumer, Frederik Simon and Buff, Bianca and Geierhos, Michaela}}, booktitle = {{DHd 2019 Digital Humanities: multimedial & multimodal. Konferenzabstracts}}, editor = {{Sahle, Patrick}}, isbn = {{978-3-00-062166-6}}, location = {{Mainz and Frankfurt am Main, Germany}}, pages = {{192--193}}, publisher = {{Zenodo}}, title = {{{Potentielle Privatsphäreverletzungen aufdecken und automatisiert sichtbar machen}}}, doi = {{10.5281/zenodo.2596095}}, year = {{2019}}, } @inproceedings{9613, abstract = {{The ability to openly evaluate products, locations and services is an achievement of the Web 2.0. It has never been easier to inform oneself about the quality of products or services and possible alternatives. Forming one’s own opinion based on the impressions of other people can lead to better experiences. However, this presupposes trust in one’s fellows as well as in the quality of the review platforms. In previous work on physician reviews and the corresponding websites, it was observed that there occurs faulty behavior by some reviewers and there were noteworthy differences in the technical implementation of the portals and in the efforts of site operators to maintain high quality reviews. These experiences raise new questions regarding what trust means on review platforms, how trust arises and how easily it can be destroyed.}}, author = {{Kersting, Joschka and Bäumer, Frederik Simon and Geierhos, Michaela}}, booktitle = {{Proceedings of the 4th International Conference on Internet of Things, Big Data and Security}}, editor = {{Ramachandran, Muthu and Walters, Robert and Wills, Gary and Méndez Muñoz, Víctor and Chang, Victor}}, isbn = {{978-989-758-369-8}}, keywords = {{Trust, Physician Reviews, Network Analysis}}, location = {{Heraklion, Greece}}, pages = {{147--155}}, publisher = {{SCITEPRESS}}, title = {{{In Reviews We Trust: But Should We? Experiences with Physician Review Websites}}}, year = {{2019}}, } @inproceedings{12946, author = {{Bäumer, Frederik Simon and Buff, Bianca}}, booktitle = {{Proceedings of the 8th International Conference on Data Science, Technology and Applications}}, isbn = {{9789897583773}}, title = {{{How to Boost Customer Relationship Management via Web Mining Benefiting from the Glass Customer’s Openness}}}, doi = {{10.5220/0007828301290136}}, year = {{2019}}, } @misc{13435, author = {{Friesen, Edwin}}, publisher = {{Universität Paderborn}}, title = {{{Requirements Engineering im OTF-Computing: Informationsextraktion und Unvollständigkeitskompensation mittels domänenspezifischer Wissensbasis}}}, year = {{2019}}, } @inbook{2322, abstract = {{The vision of On-The-Fly Computing is an automatic composition of existing software services. Based on natural language software descriptions, end users will receive compositions tailored to their needs. For this reason, the quality of the initial software service description strongly determines whether a software composition really meets the expectations of end users. In this paper, we expose open NLP challenges needed to be faced for service composition in On-The-Fly Computing.}}, author = {{Bäumer, Frederik Simon and Geierhos, Michaela}}, booktitle = {{Proceedings of the 23rd International Conference on Natural Language and Information Systems}}, editor = {{Silberztein, Max and Atigui, Faten and Kornyshova, Elena and Métais, Elisabeth and Meziane, Farid }}, isbn = {{978-3-319-91946-1}}, keywords = {{Requirements Extraction, Temporal Reordering of Software Functions, Inaccuracy Compensation}}, location = {{Paris, France}}, pages = {{509--513}}, publisher = {{Springer}}, title = {{{How to Deal with Inaccurate Service Descriptions in On-The-Fly Computing: Open Challenges}}}, doi = {{10.1007/978-3-319-91947-8_53}}, volume = {{10859}}, year = {{2018}}, } @article{2331, abstract = {{A user generally writes software requirements in ambiguous and incomplete form by using natural language; therefore, a software developer may have difficulty in clearly understanding what the meanings are. To solve this problem with automation, we propose a classifier for semantic annotation with manually pre-defined semantic categories. To improve our classifier, we carefully designed syntactic features extracted by constituency and dependency parsers. Even with a small dataset and a large number of classes, our proposed classifier records an accuracy of 0.75, which outperforms the previous model, REaCT.}}, author = {{Kim, Yeongsu and Lee, Seungwoo and Dollmann, Markus and Geierhos, Michaela}}, issn = {{2207-6360}}, journal = {{International Journal of Advanced Science and Technology}}, keywords = {{Software Engineering, Natural Language Processing, Semantic Annotation, Machine Learning, Feature Engineering, Syntactic Structure}}, pages = {{123--136}}, publisher = {{SERSC Australia}}, title = {{{Improving Classifiers for Semantic Annotation of Software Requirements with Elaborate Syntactic Structure}}}, doi = {{10.14257/ijast.2018.112.12}}, volume = {{112}}, year = {{2018}}, } @inbook{6436, author = {{Geierhos, Michaela}}, booktitle = {{Integration und Toleranz}}, editor = {{Schnebel, Karin B. and Taubenböck, Andrea}}, isbn = {{978-3-86281-135-9}}, pages = {{288--292}}, publisher = {{Klemm+Oelschläger}}, title = {{{Freiraum zur individuellen Reflexion gemeinsamer Werte}}}, year = {{2018}}, } @inbook{4338, abstract = {{Physician review websites are known around the world. Patients review the subjectively experienced quality of medical services supplied to them and publish an overall rating on the Internet, where quantitative grades and qualitative texts come together. On the one hand, these new possibilities reduce the imbalance of power between health care providers and patients, but on the other hand, they can also damage the usually very intimate relationship between health care providers and patients. Review websites must meet these requirements with a high level of responsibility and service quality. In this paper, we look at the situation in Lithuania: Especially, we are interested in the available possibilities of evaluation and interaction, and the quality of a particular review website measured against the available data. We thereby identify quality weaknesses and lay the foundation for future research.}}, author = {{Bäumer, Frederik Simon and Kersting, Joschka and Kuršelis, Vytautas and Geierhos, Michaela}}, booktitle = {{Communications in Computer and Information Science}}, editor = {{Damaševičius, Robertas and Vasiljevienė, Giedrė}}, isbn = {{9783319999715}}, issn = {{1865-0929}}, keywords = {{Lithuanian physician review websites, Medical service ratings}}, location = {{Vilnius, Lithuania}}, pages = {{43--58}}, publisher = {{Springer}}, title = {{{Rate Your Physician: Findings from a Lithuanian Physician Rating Website}}}, doi = {{10.1007/978-3-319-99972-2_4}}, volume = {{920}}, year = {{2018}}, } @inproceedings{4339, abstract = {{On-The-Fly Computing is the vision of covering software needs of end users by fully-automatic compositions of existing software services. End users will receive so-called service compositions tailored to their very individual needs, based on natural language software descriptions. This everyday language may contain inaccuracies and incompleteness, which are well-known challenges in requirements engineering. In addition to existing approaches that try to automatically identify and correct these deficits, there are also new trends to involve users more in the elaboration and refinement process. In this paper, we present the relevant state of the art in the field of automated detection and compensation of multiple inaccuracies in natural language service descriptions and name open challenges needed to be tackled in NL-based software service composition. }}, author = {{Bäumer, Frederik Simon and Geierhos, Michaela}}, booktitle = {{Proceedings of the 24th International Conference on Information and Software Technologies (ICIST 2018)}}, editor = {{Damaševičius, Robertas and Vasiljevienė, Giedrė}}, isbn = {{9783319999715}}, issn = {{1865-0929}}, keywords = {{Inaccuracy detection, Natural language software requirements}}, location = {{Vilnius, Lithuania}}, pages = {{559--570}}, publisher = {{Springer}}, title = {{{NLP in OTF Computing: Current Approaches and Open Challenges}}}, doi = {{10.1007/978-3-319-99972-2_46}}, volume = {{920}}, year = {{2018}}, } @inproceedings{4341, author = {{Bäumer, Frederik Simon and Geierhos, Michaela}}, booktitle = {{European Conference on Data Analysis 2018: Multidisciplinary Facets of Data Science - Book of Abstracts}}, location = {{Paderborn, Germany}}, title = {{{Text Broom: A ML-based Tool to Detect and Highlight Privacy Breaches in Physician Reviews: An Insight into Our Current Work}}}, year = {{2018}}, } @inproceedings{4345, abstract = {{This paper presents the various sources of uncertainty we encounter in our project. Our research focus lies on the investigation of language elaboration processes in Middle Low German. We are particularly interested in diachronic constructional changes and constructionalizations involving and affecting all linguistic dimensions. For this, it is necessary to annotate our corpus with Part-of-Speech and constructional tags. Here, we are confronted with gradualness, gradience, and ambiguity as potential sources of uncertainty that complicate the annotation process. Furthermore, due to the historicity of the investigated language, we expect cases of incomplete knowledge and comparative fallacy from the annotators. For this reason, we develop an interface that captures all annotators’ doubts.}}, author = {{Merten, Marie-Luis and Seemann, Nina}}, booktitle = {{Proceedings of the 6th International Conference on Technological Ecosystems for Enhancing Multiculturality (TEEM'18)}}, editor = {{García-Peñalvo, Francisco José}}, isbn = {{978-1-4503-6518-5}}, keywords = {{historical languages, linguistic annotations, gradience and gradualness, ambiguity, incomplete knowledge}}, location = {{Salamanca, Spain}}, pages = {{819--825}}, publisher = {{ACM}}, title = {{{Analysing Constructional Change: Linguistic Annotation and Sources of Uncertainty}}}, doi = {{10.1145/3284179.3284320}}, year = {{2018}}, } @inproceedings{4349, abstract = {{Physician Review Websites allow users to evaluate their experiences with health services. As these evaluations are regularly contextualized with facts from users’ private lives, they often accidentally disclose personal information on the Web. This poses a serious threat to users’ privacy. In this paper, we report on early work in progress on “Text Broom”, a tool to detect privacy breaches in user-generated texts. For this purpose, we conceptualize a pipeline which combines methods of Natural Language Processing such as Named Entity Recognition, linguistic patterns and domain-specific Machine Learning approaches which have the potential to recognize privacy violations with wide coverage. A prototypical web application is openly accesible.}}, author = {{Bäumer, Frederik Simon and Kersting, Joschka and Orlikowski, Matthias and Geierhos, Michaela}}, booktitle = {{Proceedings of the Posters and Demos Track of the 14th International Conference on Semantic Systems co-located with the 14th International Conference on Semantic Systems (SEMANTiCS 2018)}}, editor = {{Khalili, Ali and Koutraki, Maria}}, issn = {{1613-0073}}, keywords = {{Detection of Privacy Violations, Physician Reviews}}, location = {{Vienna, Austria}}, publisher = {{CEUR-WS.org}}, title = {{{Towards a Multi-Stage Approach to Detect Privacy Breaches in Physician Reviews}}}, volume = {{2198}}, year = {{2018}}, } @inproceedings{44, abstract = {{Natural language software requirements descriptions enable end users to formulate their wishes and expectations for a future software product without much prior knowledge in requirements engineering. However, these descriptions are susceptible to linguistic inaccuracies such as ambiguities and incompleteness that can harm the development process. There is a number of software solutions that can detect deficits in requirements descriptions and partially solve them, but they are often hard to use and not suitable for end users. For this reason, we develop a software system that helps end-users to create unambiguous and complete requirements descriptions by combining existing expert tools and controlling them using automatic compensation strategies. In order to recognize the necessity of individual compensation methods in the descriptions, we have developed linguistic indicators, which we present in this paper. Based on these indicators, the whole text analysis pipeline is ad-hoc configured and thus adapted to the individual circumstances of a requirements description.}}, author = {{Bäumer, Frederik Simon and Geierhos, Michaela}}, booktitle = {{Proceedings of the 51st Hawaii International Conference on System Sciences}}, isbn = {{978-0-9981331-1-9}}, keywords = {{Software Product Lines: Engineering, Services, and Management, Ambiguities, Incompleteness, Natural Language Processing, Software Requirements}}, location = {{Big Island, Waikoloa Village}}, pages = {{5746--5755}}, title = {{{Flexible Ambiguity Resolution and Incompleteness Detection in Requirements Descriptions via an Indicator-based Configuration of Text Analysis Pipelines}}}, doi = {{10125/50609}}, year = {{2018}}, } @inbook{1104, abstract = {{Präzision ist kein Zufall. Sie wird vom Menschen herbeigeführt, indem Übereinstimmung mit einem Standard oder einem akzeptierten Wert angestrebt wird oder die Reproduzierbarkeit von Experimenten möglichst hoch sein muss. Was aber tun, wenn Präzision mangels verfügbarer Informationen nicht hergestellt werden kann? Wie gehen Wissenschaft und Kunst dann mit dieser fehlenden Eindeutigkeit um? Die Autorinnen und Autoren dieses Sammelbandes beleuchten aus der Perspektive ihrer jeweiligen Fachdisziplin die Chancen bei der Berücksichtigung von Unschärfe(n) in ihrer Forschung und Kunst. Denn Unschärfe ist Realität. }}, author = {{Geierhos, Michaela}}, booktitle = {{Unschärfe - Der Umgang mit fehlender Eindeutigkeit}}, editor = {{Freitag, Steffen and Geierhos, Michaela and Asmani, Rozbeh and Haug, Judith I.}}, isbn = {{978-3-506-78896-2}}, pages = {{111--128}}, publisher = {{Ferdinand Schöningh}}, title = {{{Unschärfe bei der Interpretation natürlichsprachlicher Anforderungsbeschreibungen}}}, year = {{2018}}, } @inproceedings{1181, abstract = {{The main idea in On-The-Fly Computing is to automatically compose existing software services according to the wishes of end-users. However, since user requirements are often ambiguous, vague and incomplete, the selection and composition of suitable software services is a challanging task. In this paper, we present our current approach to improve requirement descriptions before they are used for software composition. This procedure is fully automated, but also has limitations, for example, if necessary information is missing. In addition, and in response to the limitations, we provide insights into our above-mentioned current work that combines the existing optimization approach with a chatbot solution.}}, author = {{Bäumer, Frederik Simon and Geierhos, Michaela}}, booktitle = {{Joint Proceedings of REFSQ-2018 Workshops, Doctoral Symposium, Live Studies Track, and Poster Track co-located with the 23rd International Conference on Requirements Engineering: Foundation for Software Quality (REFSQ 2018)}}, editor = {{Schmid, Klaus and Spoletini, Paola and Ben Charrada, Eya and Chisik, Yoram and Dalpiaz, Fabiano and Ferrari, Alessio and Forbrig, Peter and Franch, Xavier and Kirikova, Marite and Madhavji, Nazim and Palomares, Cristina and Ralyté, Jolita and Sabetzadeh, Mehrdad and Sawyer, Pete and van der Linden, Dirk and Zamansky, Anna}}, issn = {{1613-0073}}, location = {{Utrecht, The Netherlands}}, publisher = {{CEUR-WS.org}}, title = {{{How to Deal with Inaccurate Service Requirements? Insights in Our Current Approach and New Ideas}}}, volume = {{2075}}, year = {{2018}}, } @inproceedings{1182, abstract = {{Natural language requirement descriptions are often unstructured, contradictory and incomplete and are therefore challenging for automatic processing. Although many of these deficits can be compensated by means of Natural Language Processing, there still remain cases where interaction with end-users is necessary for clarification. In this paper, we present our idea of using chatbot technology to establish end-user communication in order to support the automatic compensation of some deficits in natural language requirement descriptions.}}, author = {{Friesen, Edwin and Bäumer, Frederik Simon and Geierhos, Michaela}}, booktitle = {{Joint Proceedings of REFSQ-2018 Workshops, Doctoral Symposium, Live Studies Track, and Poster Track co-located with the 23rd International Conference on Requirements Engineering: Foundation for Software Quality (REFSQ 2018)}}, editor = {{Schmid, Klaus and Spoletini, Paola and Ben Charrada, Eya and Chisik, Yoram and Dalpiaz, Fabiano and Ferrari, Alessio and Forbrig, Peter and Franch, Xavier and Kirikova, Marite and Madhavji, Nazim and Palomares, Cristina and Ralyté, Jolita and Sabetzadeh, Mehrdad and Sawyer, Pete and van der Linden, Dirk and Zamansky, Anna }}, issn = {{1613-0073}}, location = {{Utrecht, The Netherlands}}, publisher = {{CEUR-WS.org}}, title = {{{CORDULA: Software Requirements Extraction Utilizing Chatbot as Communication Interface}}}, volume = {{2075}}, year = {{2018}}, } @inproceedings{1183, abstract = {{As our world grows in complexity, companies and employees alike need, more than ever before, solutions tailored to their exact needs. Since such tools cannot always be purchased off-the-shelf and need to be designed from the ground up, developers rely on software requirements. In this paper, we present our vision of a syntactic rule-based extraction tool for software requirements specification documents. In contrast to other methods, our tool will allow stakeholders to express their needs and wishes in unfiltered natural language, which we believe is essential for non-expert users.}}, author = {{Caron, Matthew and Bäumer, Frederik Simon and Geierhos, Michaela}}, booktitle = {{Joint Proceedings of REFSQ-2018 Workshops, Doctoral Symposium, Live Studies Track, and Poster Track co-located with the 23rd International Conference on Requirements Engineering: Foundation for Software Quality (REFSQ 2018)}}, editor = {{Schmid, Klaus and Spoletini, Paola and Ben Charrada, Eya and Chisik, Yoram and Dalpiaz, Fabiano and Ferrari, Alessio and Forbrig, Peter and Franch, Xavier and Kirikova, Marite and Madhavji, Nazim and Palomares, Cristina and Ralyté, Jolita and Sabetzadeh, Mehrdad and Sawyer, Pete and van der Linden, Dirk and Zamansky, Anna}}, issn = {{1613-0073}}, location = {{Utrecht, The Netherlands}}, publisher = {{CEUR-WS.org}}, title = {{{Back to Basics: Extracting Software Requirements with a Syntactic Approach}}}, volume = {{2075}}, year = {{2018}}, } @inproceedings{1379, author = {{Seemann, Nina and Geierhos, Michaela and Merten, Marie-Luis and Tophinke, Doris and Wever, Marcel Dominik and Hüllermeier, Eyke}}, booktitle = {{Postersession Computerlinguistik der 40. Jahrestagung der Deutschen Gesellschaft für Sprachwissenschaft}}, editor = {{Eckart, Kerstin and Schlechtweg, Dominik }}, location = {{Stuttgart, Germany}}, title = {{{Supporting the Cognitive Process in Annotation Tasks}}}, year = {{2018}}, } @inproceedings{1156, abstract = {{In this paper, we present an IoT architecture which handles stream sensor data of air pollution. Particle pollution is known as a serious threat to human health. Along with developments in the use of wireless sensors and the IoT, we propose an architecture that flexibly measures and processes stream data collected in real-time by movable and low-cost IoT sensors. Thus, it enables a wide-spread network of wireless sensors that can follow changes in human behavior. Apart from stating reasons for the need of such a development and its requirements, we provide a conceptual design as well as a technological design of such an architecture. The technological design consists of Kaa and Apache Storm which can collect air pollution information in real-time and solve various problems to process data such as missing data and synchronization. This enables us to add a simulation in which we provide issues that might come up when having our architecture in use. Together with these issues, we state r easons for choosing specific modules among candidates. Our architecture combines wireless sensors with the Kaa IoT framework, an Apache Kafka pipeline and an Apache Storm Data Stream Management System among others. We even provide open-government data sets that are freely available.}}, author = {{Kersting, Joschka and Geierhos, Michaela and Jung, Hanmin and Kim, Taehong}}, booktitle = {{Proceedings of the 2nd International Conference on Internet of Things, Big Data and Security}}, editor = {{Ramachandran, Muthu and Méndez Muñoz, Víctor and Kantere, Verena and Wills, Gary and Walters, Robert and Chang, Victor}}, isbn = {{978-989-758-245-5}}, keywords = {{Wireless Sensor Network, Internet of Things, Stream Data, Air Pollution, DSMS, Real-time Data Processing}}, location = {{Porto, Portugal}}, pages = {{117--124}}, publisher = {{SCITEPRESS}}, title = {{{Internet of Things Architecture for Handling Stream Air Pollution Data}}}, doi = {{10.5220/0006354801170124}}, year = {{2017}}, } @inproceedings{1158, abstract = {{In this paper, we present the annotation challenges we have encountered when working on a historical language that was undergoing elaboration processes. We especially focus on syntactic ambiguity and gradience in Middle Low German, which causes uncertainty to some extent. Since current annotation tools consider construction contexts and the dynamics of the grammaticalization only partially, we plan to extend CorA – a web-based annotation tool for historical and other non-standard language data – to capture elaboration phenomena and annotator unsureness. Moreover, we seek to interactively learn morphological as well as syntactic annotations.}}, author = {{Seemann, Nina and Merten, Marie-Luis and Geierhos, Michaela and Tophinke, Doris and Hüllermeier, Eyke}}, booktitle = {{Proceedings of the Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature}}, location = {{Vancouver, BC, Canada}}, pages = {{40--45}}, publisher = {{Association for Computational Linguistics (ACL)}}, title = {{{Annotation Challenges for Reconstructing the Structural Elaboration of Middle Low German}}}, doi = {{10.18653/v1/W17-2206}}, year = {{2017}}, } @inbook{1159, abstract = {{In this paper, we present a search solution that makes local news information easily accessible. In the era of fake news, we provide an approach for accessing news information through opinion mining. This enables users to view news on the same topics from different web sources. By applying sentiment analysis on social media posts, users can better understand how issues are captured and see people’s reactions. Therefore, we provide a local search service that first localizes news articles, then visualizes their occurrence according to the frequency of mentioned topics on a heatmap and even shows the sentiment score for each text. }}, author = {{Kersting, Joschka and Geierhos, Michaela}}, booktitle = {{Information and Software Technologies: 23rd International Conference, ICIST 2017, Druskininkai, Lithuania, October 12–14, 2017, Proceedings}}, editor = {{Damaševičius, Robertas and Mikašytė, Vilma}}, isbn = {{978-3-319-67641-8}}, location = {{Druskininkai, Lithuania}}, pages = {{528--538}}, publisher = {{Springer}}, title = {{{Using Sentiment Analysis on Local Up-to-the-Minute News: An Integrated Approach}}}, doi = {{10.1007/978-3-319-67642-5}}, volume = {{756}}, year = {{2017}}, } @inbook{1161, abstract = {{Consulting a physician was long regarded as an intimate and private matter. The physician-patient relationship was perceived as sensitive and trustful. Nowadays, there is a change, as medical procedures and physicians consultations are reviewed like other services on the Internet. To allay user’s privacy doubts, physician review websites assure anonymity and the protection of private data. However, there are hundreds of reviews that reveal private information and hence enable physicians or the public to identify patients. Thus, we draw attention to the cases when de-anonymization is possible. We therefore introduce an approach that highlights private information in physician reviews for users to avoid an accidental disclosure. For this reason, we combine established natural-language-processing techniques such as named entity recognition as well as handcrafted patterns to achieve a high detection accuracy. That way, we can help websites to increase privacy protection by recognizing and uncovering apparently uncritical information in user-generated texts.}}, author = {{Bäumer, Frederik Simon and Grote, Nicolai and Kersting, Joschka and Geierhos, Michaela}}, booktitle = {{Information and Software Technologies: 23rd International Conference, ICIST 2017, Druskininkai, Lithuania, October 12–14, 2017, Proceedings}}, editor = {{Damaševičius, Robertas and Mikašytė, Víctor}}, isbn = {{978-3-319-67641-8}}, keywords = {{Physician Reviews, User Privacy, Nocuous Data Exposure}}, location = {{Druskininkai, Lithuania}}, pages = {{77--89}}, publisher = {{Springer}}, title = {{{Privacy Matters: Detecting Nocuous Patient Data Exposure in Online Physician Reviews}}}, doi = {{10.1007/978-3-319-67642-5_7}}, volume = {{756}}, year = {{2017}}, } @inbook{93, abstract = {{In recent years, there has been a proliferation of technological developments that incorporate processing of human language. Hardware and software can be specialized for designated subject areas, and computational devices are designed for a widening variety of applications. At the same time, new areas and applications are emerging by demanding intelligent technology enhanced by the processing of human language. These new applications often perform tasks which handle information, and they have a capacity to reason, using both formal and human language. Many sub-areas of Artificial Intelligence demand integration of Natural Language Processing, at least to some degree. Furthermore, technologies require coverage of known as well as unknown agents, and tasks with potential variations. All of this takes place in environments with unknown factors. The book covers theoretical work, advanced applications, approaches, and techniques for computational models of information, reasoning systems, and presentation in language. The book promotes work on intelligent natural language processing and related models of information, thought, reasoning, and other cognitive processes. The topics covered by the chapters prompt further research and developments of advanced systems in the areas of logic, computability, computational linguistics, cognitive science, neuroscience of language, robotics, and artificial intelligence, among others.}}, author = {{Geierhos, Michaela and Bäumer, Frederik Simon}}, booktitle = {{Partiality and Underspecification in Information, Languages, and Knowledge}}, editor = {{Christiansen, Henning and Jiménez-López, M. Dolores and Loukanova, Roussanka and Moss, Lawrence S.}}, isbn = {{978-1- 4438-7947-7}}, pages = {{65--108}}, publisher = {{Cambridge Scholars Publishing}}, title = {{{Guesswork? Resolving Vagueness in User-Generated Software Requirements}}}, year = {{2017}}, } @inproceedings{57, abstract = {{Users prefer natural language software requirements because of their usability and accessibility. Many approaches exist to elaborate these requirements and to support the users during the elicitation process. But there is a lack of adequate resources, which are needed to train and evaluate approaches for requirement refinement. We are trying to close this gap by using online available software descriptions from SourceForge and app stores. Thus, we present two real-life requirements collections based on online-available software descriptions. Our goal is to show the domain-specific characteristics of content words describing functional requirements. On the one hand, we created a semantic role-labeled requirements set, which we use for requirements classification. On the other hand, we enriched software descriptions with linguistic features and dependencies to provide evidence for the context-awareness of software functionalities. }}, author = {{Bäumer, Frederik Simon and Dollmann, Markus and Geierhos, Michaela}}, booktitle = {{Proceedings of the 2nd ACM SIGSOFT International Workshop on App Market Analytics}}, editor = {{Sarro, Federica and Shihab, Emad and Nagappan, Meiyappan and Platenius, Marie Christin and Kaimann, Daniel}}, isbn = {{978-1-4503-5158-4}}, location = {{Paderborn, Germany}}, pages = {{19--25}}, publisher = {{ACM}}, title = {{{Studying Software Descriptions in SourceForge and App Stores for a better Understanding of real-life Requirements}}}, doi = {{10.1145/3121264.3121269}}, year = {{2017}}, } @phdthesis{89, abstract = {{The vision of OTF Computing is to have the software needs of end users in the future covered by an automatic composition of existing software services. Here we focus on natural language software requirements that end users formulate and submit to OTF providers as requirement specifications. These requirements serve as the sole foundation for the composition of software; but they can be inaccurate and incomplete. Up to now, software developers have identified and corrected these deficits by using a bidirectional consolidation process. However, this type of quality assurance is no longer included in OTF Computing - the classic consolidation process is dropped. This is where this work picks up, dealing with the inaccuracies of freely formulated software design requirements. To do this, we developed the CORDULA (Compensation of Requirements Descriptions Using Linguistic Analysis) system that recognizes and compensates for language deficiencies (e.g., ambiguity, vagueness and incompleteness) in requirements written by inexperienced end users. CORDULA supports the search for suitable software services that can be combined in a composition by transferring requirement specifications into canonical core functionalities. This dissertation provides the first-ever method for holistically recording and improving language deficiencies in user-generated requirement specifications by dealing with ambiguity, incompleteness and vagueness in parallel and in sequence.}}, author = {{Bäumer, Frederik Simon}}, publisher = {{Universität Paderborn}}, title = {{{Indikatorbasierte Erkennung und Kompensation von ungenauen und unvollständig beschriebenen Softwareanforderungen}}}, doi = {{10.17619/UNIPB/1-157}}, year = {{2017}}, } @inproceedings{97, abstract = {{Bridging the gap between informal, imprecise, and vague user requirements descriptions and precise formalized specifications is the main task of requirements engineering. Techniques such as interviews or story telling are used when requirements engineers try to identify a user's needs. The requirements specification process is typically done in a dialogue between users, domain experts, and requirements engineers. In our research, we aim at automating the specification of requirements. The idea is to distinguish between untrained users and trained users, and to exploit domain knowledge learned from previous runs of our system. We let untrained users provide unstructured natural language descriptions, while we allow trained users to provide examples of behavioral descriptions. In both cases, our goal is to synthesize formal requirements models similar to statecharts. From requirements specification processes with trained users, behavioral ontologies are learned which are later used to support the requirements specification process for untrained users. Our research method is original in combining natural language processing and search-based techniques for the synthesis of requirements specifications. Our work is embedded in a larger project that aims at automating the whole software development and deployment process in envisioned future software service markets.}}, author = {{van Rooijen, Lorijn and Bäumer, Frederik Simon and Platenius, Marie Christin and Geierhos, Michaela and Hamann, Heiko and Engels, Gregor}}, booktitle = {{2017 IEEE 25th International Requirements Engineering Conference Workshops (REW)}}, isbn = {{978-1-5386-3489-9}}, keywords = {{Software, Unified modeling language, Requirements engineering, Ontologies, Search problems, Natural languages}}, location = {{Lisbon, Portugal}}, pages = {{379--385}}, publisher = {{IEEE}}, title = {{{From User Demand to Software Service: Using Machine Learning to Automate the Requirements Specification Process}}}, doi = {{10.1109/REW.2017.26}}, year = {{2017}}, } @article{1098, abstract = {{An end user generally writes down software requirements in ambiguous expressions using natural language; hence, a software developer attuned to programming language finds it difficult to understand th meaning of the requirements. To solve this problem we define semantic categories for disambiguation and classify/annotate the requirement into the categories by using machine-learning models. We extensively use a language frame closely related to such categories for designing features to overcome the problem of insufficient training data compare to the large number of classes. Our proposed model obtained a micro-average F1-score of 0.75, outperforming the previous model, REaCT.}}, author = {{Kim, Yeong-Su and Lee, Seung-Woo and Dollmann, Markus and Geierhos, Michaela}}, issn = {{2205-8494}}, journal = {{International Journal of Software Engineering for Smart Device}}, keywords = {{Natural Language Processing, Semantic Annotation, Machine Learning}}, number = {{2}}, pages = {{1--6}}, publisher = {{Global Vision School Publication}}, title = {{{Semantic Annotation of Software Requirements with Language Frame}}}, volume = {{4}}, year = {{2017}}, } @inproceedings{1151, abstract = {{Welche Informationen über Unternehmenszusammenschlüsse werden in Zeitungsnachrichten vermittelt, und wie können diese Informationen automatisch extrahiert werden? Dies soll am Beispiel des Verhaltens von Aktionären während eines Zusammenschlusses ermittelt werden. Dazu werden die wichtigsten Aussagen über das Votum der Aktionäre im Hinblick auf eine automatische Erkennung sprachlich analysiert. Im Fokus stehen dabei die Berichte über Aktionärsabstimmungen hinsichtlich der Annahme bzw. Ablehnung eines Übernahmeangebots.}}, author = {{Stotz, Sophia and Geierhos, Michaela}}, booktitle = {{DHd 2016: Modellierung - Vernetzung - Visualisierung. Die Digital Humanities als fächerübergreifendes Forschungsparadigma. Konferenzabstracts, Universität Leipzig, 7. bis 12. März 2016}}, isbn = {{978-3-941379-05-3}}, location = {{Leipzig, Germany}}, pages = {{378--381}}, publisher = {{Nisaba-Verlag}}, title = {{{Wie verhalten sich Aktionäre bei Unternehmenszusammenschlüssen? Modellierung sprachlicher Muster zur Analyse treibender Faktoren bei der Berichterstattung}}}, year = {{2016}}, } @misc{1152, author = {{Geierhos, Michaela}}, booktitle = {{Enzyklopädie der Wirtschaftsinformatik}}, editor = {{Gronau, Norbert and Becker, Jörg and Sinz, Elmar and Suhl, Leena and Leimeister, Jan M.}}, keywords = {{Webmonitoring}}, publisher = {{GITO-Verlag}}, title = {{{Webmonitoring}}}, year = {{2016}}, } @misc{1153, author = {{Geierhos, Michaela}}, booktitle = {{Enzyklopädie der Wirtschaftsinformatik}}, editor = {{Gronau, Norbert and Becker, Jörg and Sinz, Elmar and Suhl, Leena and Leimeister, Jan M.}}, keywords = {{Sentimentanalyse}}, publisher = {{GITO-Verlag}}, title = {{{Sentimentanalyse}}}, year = {{2016}}, } @misc{1154, author = {{Geierhos, Michaela}}, booktitle = {{Enzyklopädie der Wirtschaftsinformatik}}, editor = {{Gronau, Norbert and Becker, Jörg and Sinz, Elmar and Suhl, Leena and Leimeister, Jan M.}}, keywords = {{Text Mining}}, publisher = {{GITO-Verlag}}, title = {{{Text Mining}}}, year = {{2016}}, } @misc{1155, author = {{Geierhos, Michaela}}, booktitle = {{Enzyklopädie der Wirtschaftsinformatik}}, editor = {{Gronau, Norbert and Becker, Jörg and Sinz, Elmar and Suhl, Leena and Leimeister, Jan M.}}, keywords = {{Crawler}}, publisher = {{GITO-Verlag}}, title = {{{Crawler (fokussiert / nicht fokussiert)}}}, year = {{2016}}, } @inproceedings{176, abstract = {{Users prefer natural language software requirements because of their usability and accessibility. When they describe their wishes for software development, they often provide off-topic information. We therefore present an automated approach for identifying and semantically annotating the on-topic parts of the given descriptions. It is designed to support requirement engineers in the requirement elicitation process on detecting and analyzing requirements in user-generated content. Since no lexical resources with domain-specific information about requirements are available, we created a corpus of requirements written in controlled language by instructed users and uncontrolled language by uninstructed users. We annotated these requirements regarding predicate-argument structures, conditions, priorities, motivations and semantic roles and used this information to train classifiers for information extraction purposes. The approach achieves an accuracy of 92% for the on- and off-topic classification task and an F1-measure of 72% for the semantic annotation.}}, author = {{Dollmann, Markus and Geierhos, Michaela}}, booktitle = {{Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing (EMNLP)}}, location = {{Austin, TX, USA}}, pages = {{1807--1816}}, publisher = {{Association for Computational Linguistics (ACL)}}, title = {{{On- and Off-Topic Classification and Semantic Annotation of User-Generated Software Requirements}}}, year = {{2016}}, } @inproceedings{191, abstract = {{One purpose of requirement refinement is that higher-level requirements have to be translated to something usable by developers. Since customer requirements are often written in natural language by end users, they lack precision, completeness and consistency. Although user stories are often used in the requirement elicitation process in order to describe the possibilities how to interact with the software, there is always something unspoken. Here, we present techniques how to automatically refine vague software descriptions. Thus, we can bridge the gap by first revising natural language utterances from higher-level to more detailed customer requirements, before functionality matters. We therefore focus on the resolution of semantically incomplete user-generated sentences (i.e. non-instantiated arguments of predicates) and provide ontology-based gap-filling suggestions how to complete unverbalized information in the user’s demand.}}, author = {{Geierhos, Michaela and Bäumer, Frederik Simon}}, booktitle = {{Proceedings of the 21st International Conference on Applications of Natural Language to Information Systems (NLDB)}}, editor = {{Métais, Elisabeth and Meziane, Farid and Saraee, Mohamad and Sugumaran, Vijayan and Vadera, Sunil }}, isbn = {{978-3-319-41753-0}}, keywords = {{Requirement refinement, Concept expansion, Ontology-based instantiation of predicate-argument structure}}, location = {{Salford, UK}}, pages = {{37--47}}, publisher = {{Springer}}, title = {{{How to Complete Customer Requirements: Using Concept Expansion for Requirement Refinement}}}, doi = {{10.1007/978-3-319-41754-7_4}}, volume = {{9612}}, year = {{2016}}, } @inproceedings{158, abstract = {{While requirements focus on how the user interacts with the system, user stories concentrate on the purpose of software features. But in practice, functional requirements are also described in user stories. For this reason, requirements clarification is needed, especially when they are written in natural language and do not stick to any templates (e.g., "as an X, I want Y so that Z ..."). However, there is a lot of implicit knowledge that is not expressed in words. As a result, natural language requirements descriptions may suffer from incompleteness. Existing approaches try to formalize natural language or focus only on entirely missing and not on deficient requirements. In this paper, we therefore present an approach to detect knowledge gaps in user-generated software requirements for interactive requirement clarification: We provide tailored suggestions to the users in order to get more precise descriptions. For this purpose, we identify not fully instantiated predicate argument structures in requirements written in natural language and use context information to realize what was meant by the user.}}, author = {{Bäumer, Frederik Simon and Geierhos, Michaela}}, booktitle = {{Proceedings of the 22nd International Conference on Information and Software Technologies (ICIST)}}, editor = {{Dregvaite, Giedre and Damasevicius, Robertas }}, isbn = {{978-3-319-46253-0}}, keywords = {{Natural language requirements clarification, Syntactically incomplete requirements, Compensatory user stories}}, location = {{Druskininkai, Lithuania}}, pages = {{549--558}}, publisher = {{Springer}}, title = {{{Running out of Words: How Similar User Stories Can Help to Elaborate Individual Natural Language Requirement Descriptions}}}, doi = {{10.1007/978-3-319-46254-7_44}}, volume = {{639}}, year = {{2016}}, }