@article{45299, abstract = {{Many applications are driven by Machine Learning (ML) today. While complex ML models lead to an accurate prediction, their inner decision-making is obfuscated. However, especially for high-stakes decisions, interpretability and explainability of the model are necessary. Therefore, we develop a holistic interpretability and explainability framework (HIEF) to objectively describe and evaluate an intelligent system’s explainable AI (XAI) capacities. This guides data scientists to create more transparent models. To evaluate our framework, we analyse 50 real estate appraisal papers to ensure the robustness of HIEF. Additionally, we identify six typical types of intelligent systems, so-called archetypes, which range from explanatory to predictive, and demonstrate how researchers can use the framework to identify blind-spot topics in their domain. Finally, regarding comprehensiveness, we used a random sample of six intelligent systems and conducted an applicability check to provide external validity.}}, author = {{Kucklick, Jan-Peter}}, issn = {{1246-0125}}, journal = {{Journal of Decision Systems}}, keywords = {{Explainable AI (XAI), machine learning, interpretability, real estate appraisal, framework, taxonomy}}, pages = {{1--41}}, publisher = {{Taylor & Francis}}, title = {{{HIEF: a holistic interpretability and explainability framework}}}, doi = {{10.1080/12460125.2023.2207268}}, year = {{2023}}, } @inproceedings{34140, abstract = {{In this paper, machine learning techniques will be used to classify different PCB layouts given their electromagnetic frequency spectra. These spectra result from a simulated near-field measurement of electric field strengths at different locations. Measured values consist of real and imaginary parts (amplitude and phase) in X, Y and Z directions. Training data was obtained in the time domain by varying transmission line geometries (size, distance and signaling). It was then transformed into the frequency domain and used as deep neural network input. Principal component analysis was applied to reduce the sample dimension. The results show that classifying different designs is possible with high accuracy based on synthetic data. Future work comprises measurements of real, custom-made PCB with varying parameters to adapt the simulation model and also test the neural network. Finally, the trained model could be used to give hints about the error’s cause when overshooting EMC limits.}}, author = {{Maalouly, Jad and Hemker, Dennis and Hedayat, Christian and Rückert, Christian and Kaufmann, Ivan and Olbrich, Marcel and Lange, Sven and Mathis, Harald}}, booktitle = {{2022 Kleinheubach Conference}}, keywords = {{emc, pcb, electronic system development, machine learning, neural network}}, location = {{Miltenberg, Germany}}, publisher = {{IEEE}}, title = {{{AI Assisted Interference Classification to Improve EMC Troubleshooting in Electronic System Development}}}, year = {{2022}}, } @inproceedings{33510, abstract = {{In the manufacture of real wood products, defects can quickly occur during the production process. To quickly sort out these defects, a system is needed that finds damage in the irregularly structured surfaces of the product. The difficulty in this task is that each surface is visually different and no standard defects can be defined. Thus, damage detection using correlation does not work, so this paper will test different machine learning methods. To evaluate different machine learning methods, a data set is needed. For this reason, the available samples were recorded manually using a static fixed camera. Subsequently, the images were divided into sub-images, which resulted in a relatively small data set. Next, a convolutional neural network (CNN) was constructed to classify the images. However, this approach did not lead to a generalized solution, so the dataset was hashed using the a- and pHash. These hash values were then trained with a fully supervised system that will later serve as a reference model, in the semi-supervised learning procedures. To improve the supervised model and not have to label every data point, semi-supervised learning methods are used in the following. For this purpose, the CEAL method (wrapper method) is considered in the first and then the Π-Model (intrinsically semi-supervised).}}, author = {{Sander, Tom and Lange, Sven and Hilleringmann, Ulrich and Geneiß, Volker and Hedayat, Christian and Kuhn, Harald}}, booktitle = {{2022 Smart Systems Integration (SSI)}}, keywords = {{Machine Learning, CNN, Hashing, semi-supervised learning}}, location = {{Grenoble, France}}, publisher = {{IEEE}}, title = {{{Detection of Defects on Irregularly Structured Surfaces using Supervised and Semi-Supervised Learning Methods}}}, doi = {{10.1109/ssi56489.2022.9901433}}, year = {{2022}}, } @inproceedings{31066, abstract = {{While trade-offs between modeling effort and model accuracy remain a major concern with system identification, resorting to data-driven methods often leads to a complete disregard for physical plausibility. To address this issue, we propose a physics-guided hybrid approach for modeling non-autonomous systems under control. Starting from a traditional physics-based model, this is extended by a recurrent neural network and trained using a sophisticated multi-objective strategy yielding physically plausible models. While purely data-driven methods fail to produce satisfying results, experiments conducted on real data reveal substantial accuracy improvements by our approach compared to a physics-based model. }}, author = {{Schön, Oliver and Götte, Ricarda-Samantha and Timmermann, Julia}}, booktitle = {{14th IFAC Workshop on Adaptive and Learning Control Systems (ALCOS 2022)}}, keywords = {{neural networks, physics-guided, data-driven, multi-objective optimization, system identification, machine learning, dynamical systems}}, location = {{Casablanca, Morocco}}, number = {{12}}, pages = {{19--24}}, title = {{{Multi-Objective Physics-Guided Recurrent Neural Networks for Identifying Non-Autonomous Dynamical Systems}}}, doi = {{https://doi.org/10.1016/j.ifacol.2022.07.282}}, volume = {{55}}, year = {{2022}}, } @article{48878, abstract = {{Due to the rise of continuous data-generating applications, analyzing data streams has gained increasing attention over the past decades. A core research area in stream data is stream classification, which categorizes or detects data points within an evolving stream of observations. Areas of stream classification are diverse\textemdash ranging, e.g., from monitoring sensor data to analyzing a wide range of (social) media applications. Research in stream classification is related to developing methods that adapt to the changing and potentially volatile data stream. It focuses on individual aspects of the stream classification pipeline, e.g., designing suitable algorithm architectures, an efficient train and test procedure, or detecting so-called concept drifts. As a result of the many different research questions and strands, the field is challenging to grasp, especially for beginners. This survey explores, summarizes, and categorizes work within the domain of stream classification and identifies core research threads over the past few years. It is structured based on the stream classification process to facilitate coordination within this complex topic, including common application scenarios and benchmarking data sets. Thus, both newcomers to the field and experts who want to widen their scope can gain (additional) insight into this research area and find starting points and pointers to more in-depth literature on specific issues and research directions in the field.}}, author = {{Clever, Lena and Pohl, Janina Susanne and Bossek, Jakob and Kerschke, Pascal and Trautmann, Heike}}, issn = {{2076-3417}}, journal = {{Applied Sciences}}, keywords = {{big data, data mining, data stream analysis, machine learning, stream classification, supervised learning}}, number = {{18}}, pages = {{9094}}, publisher = {{{Multidisciplinary Digital Publishing Institute}}}, title = {{{Process-Oriented Stream Classification Pipeline: A Literature Review}}}, doi = {{10.3390/app12189094}}, volume = {{12}}, year = {{2022}}, } @inproceedings{27491, abstract = {{ Students often have a lack of understanding and awareness of where, how, and why personal data about them is collected and processed. Especially, when interacting with data-driven digital artifacts, an appropriate perception of the data collection and processing is necessary for self-determination. This dissertation deals with the development and evaluation of a concept called data awareness which aims to foster students’ self-determination interacting with data-driven digital artifacts.}}, author = {{Höper, Lukas}}, booktitle = {{21st Koli Calling International Conference on Computing Education Research}}, isbn = {{9781450384889}}, keywords = {{data awareness, machine learning, data science education, data-driven digital artifacts, artificial intelligence}}, publisher = {{Association for Computing Machinery}}, title = {{{Developing and Evaluating the Concept Data Awareness for K12 Computing Education}}}, doi = {{10.1145/3488042.3490509}}, year = {{2021}}, } @article{21004, abstract = {{Automated machine learning (AutoML) supports the algorithmic construction and data-specific customization of machine learning pipelines, including the selection, combination, and parametrization of machine learning algorithms as main constituents. Generally speaking, AutoML approaches comprise two major components: a search space model and an optimizer for traversing the space. Recent approaches have shown impressive results in the realm of supervised learning, most notably (single-label) classification (SLC). Moreover, first attempts at extending these approaches towards multi-label classification (MLC) have been made. While the space of candidate pipelines is already huge in SLC, the complexity of the search space is raised to an even higher power in MLC. One may wonder, therefore, whether and to what extent optimizers established for SLC can scale to this increased complexity, and how they compare to each other. This paper makes the following contributions: First, we survey existing approaches to AutoML for MLC. Second, we augment these approaches with optimizers not previously tried for MLC. Third, we propose a benchmarking framework that supports a fair and systematic comparison. Fourth, we conduct an extensive experimental study, evaluating the methods on a suite of MLC problems. We find a grammar-based best-first search to compare favorably to other optimizers.}}, author = {{Wever, Marcel Dominik and Tornede, Alexander and Mohr, Felix and Hüllermeier, Eyke}}, issn = {{0162-8828}}, journal = {{IEEE Transactions on Pattern Analysis and Machine Intelligence}}, keywords = {{Automated Machine Learning, Multi Label Classification, Hierarchical Planning, Bayesian Optimization}}, pages = {{1--1}}, title = {{{AutoML for Multi-Label Classification: Overview and Empirical Evaluation}}}, doi = {{10.1109/tpami.2021.3051276}}, year = {{2021}}, } @inproceedings{22481, abstract = {{During the industrial processing of materials for the manufacture of new products, surface defects can quickly occur. In order to achieve high quality without a long time delay, it makes sense to inspect the work pieces so that defective work pieces can be sorted out right at the beginning of the process. At the same time, the evaluation unit should come close the perception of the human eye regarding detection of defects in surfaces. Such defects often manifest themselves by a deviation of the existing structure. The only restriction should be that only matt surfaces should be considered here. Therefore in this work, different classification and image processing algorithms are applied to surface data to identify possible surface damages. For this purpose, the Gabor filter and the FST (Fused Structure and Texture) features generated with it, as well as the salience metric are used on the image processing side. On the classification side, however, deep neural networks, Convolutional Neural Networks (CNN), and autoencoders are used to make a decision. A distinction is also made between training using class labels and without. It turns out later that the salience metric are best performed by CNN. On the other hand, if there is no labeled training data available, a novelty classification can easily be achieved by using autoencoders as well as the salience metric and some filters.}}, author = {{Sander, Tom and Lange, Sven and Hilleringmann, Ulrich and Geneis, Volker and Hedayat, Christian and Kuhn, Harald and Gockel, Franz-Barthold}}, booktitle = {{22nd IEEE International Conference on Industrial Technology (ICIT)}}, isbn = {{9781728157306}}, keywords = {{Image Processing, Defect Detection, wooden surfaces, Machine Learning, Neural Networks}}, location = {{Valencia, Spain }}, publisher = {{IEEE}}, title = {{{Detection of Defects on Irregular Structured Surfaces by Image Processing Methods for Feature Extraction}}}, doi = {{10.1109/icit46573.2021.9453646}}, year = {{2021}}, } @techreport{35889, abstract = {{Network and service coordination is important to provide modern services consisting of multiple interconnected components, e.g., in 5G, network function virtualization (NFV), or cloud and edge computing. In this paper, I outline my dissertation research, which proposes six approaches to automate such network and service coordination. All approaches dynamically react to the current demand and optimize coordination for high service quality and low costs. The approaches range from centralized to distributed methods and from conventional heuristic algorithms and mixed-integer linear programs to machine learning approaches using supervised and reinforcement learning. I briefly discuss their main ideas and advantages over other state-of-the-art approaches and compare strengths and weaknesses.}}, author = {{Schneider, Stefan Balthasar}}, keywords = {{nfv, coordination, machine learning, reinforcement learning, phd, digest}}, title = {{{Conventional and Machine Learning Approaches for Network and Service Coordination}}}, year = {{2021}}, } @inproceedings{15332, abstract = {{Artificial intelligence (AI) has the potential for far-reaching – in our opinion – irreversible changes. They range from effects on the individual and society to new societal and social issues. The question arises as to how students can learn the basic functioning of AI systems, what areas of life and society are affected by these and – most important – how their own lives are affected by these changes. Therefore, we are developing and evaluating school materials for the German ”Science Year AI”. It can be used for students of all school types from the seventh grade upwards and will be distributed to about 2000 schools in autumn with the support of the Federal Ministry of Education and Research. The material deals with the following aspects of AI: Discussing everyday experiences with AI, how does machine learning work, historical development of AI concepts, difference between man and machine, future distribution of roles between man and machine, in which AI world do we want to live and how much AI would we like to have in our lives. Through an accompanying evaluation, high quality of the technical content and didactic preparation is achieved in order to guarantee the long-term applicability in the teaching context in the different age groups and school types. In this paper, we describe the current state of the material development, the challenges arising, and the results of tests with different classes to date. We also present first ideas for evaluating the results.}}, author = {{Schlichtig, Michael and Opel, Simone Anna and Budde, Lea and Schulte, Carsten}}, booktitle = {{ISSEP 2019 - 12th International conference on informatics in schools: Situation, evaluation and perspectives, Local Proceedings}}, editor = {{Jasutė, Eglė and Pozdniakov, Sergei}}, isbn = {{978-9925-553-27-3}}, keywords = {{Artificial Intelligence, Machine Learning, Teaching Material, Societal Aspects, Ethics. Social Aspects, Science Year, Simulation Game}}, location = {{Lanarca}}, pages = {{65 -- 73}}, title = {{{Understanding Artificial Intelligence – A Project for the Development of Comprehensive Teaching Material}}}, volume = {{12}}, year = {{2019}}, } @article{48877, abstract = {{OpenML is an online machine learning platform where researchers can easily share data, machine learning tasks and experiments as well as organize them online to work and collaborate more efficiently. In this paper, we present an R package to interface with the OpenML platform and illustrate its usage in combination with the machine learning R package mlr (Bischl et al. J Mach Learn Res 17(170):1—5, 2016). We show how the OpenML package allows R users to easily search, download and upload data sets and machine learning tasks. Furthermore, we also show how to upload results of experiments, share them with others and download results from other users. Beyond ensuring reproducibility of results, the OpenML platform automates much of the drudge work, speeds up research, facilitates collaboration and increases the users’ visibility online.}}, author = {{Casalicchio, Giuseppe and Bossek, Jakob and Lang, Michel and Kirchhoff, Dominik and Kerschke, Pascal and Hofner, Benjamin and Seibold, Heidi and Vanschoren, Joaquin and Bischl, Bernd}}, issn = {{0943-4062}}, journal = {{Computational Statistics}}, keywords = {{Databases, Machine learning, R, Reproducible research}}, number = {{3}}, pages = {{977–991}}, title = {{{OpenML: An R Package to Connect to the Machine Learning Platform OpenML}}}, doi = {{10.1007/s00180-017-0742-2}}, volume = {{34}}, year = {{2019}}, } @inproceedings{3852, abstract = {{In automated machine learning (AutoML), the process of engineering machine learning applications with respect to a specific problem is (partially) automated. Various AutoML tools have already been introduced to provide out-of-the-box machine learning functionality. More specifically, by selecting machine learning algorithms and optimizing their hyperparameters, these tools produce a machine learning pipeline tailored to the problem at hand. Except for TPOT, all of these tools restrict the maximum number of processing steps of such a pipeline. However, as TPOT follows an evolutionary approach, it suffers from performance issues when dealing with larger datasets. In this paper, we present an alternative approach leveraging a hierarchical planning to configure machine learning pipelines that are unlimited in length. We evaluate our approach and find its performance to be competitive with other AutoML tools, including TPOT.}}, author = {{Wever, Marcel Dominik and Mohr, Felix and Hüllermeier, Eyke}}, booktitle = {{ICML 2018 AutoML Workshop}}, keywords = {{automated machine learning, complex pipelines, hierarchical planning}}, location = {{Stockholm, Sweden}}, title = {{{ML-Plan for Unlimited-Length Machine Learning Pipelines}}}, year = {{2018}}, } @article{2331, abstract = {{A user generally writes software requirements in ambiguous and incomplete form by using natural language; therefore, a software developer may have difficulty in clearly understanding what the meanings are. To solve this problem with automation, we propose a classifier for semantic annotation with manually pre-defined semantic categories. To improve our classifier, we carefully designed syntactic features extracted by constituency and dependency parsers. Even with a small dataset and a large number of classes, our proposed classifier records an accuracy of 0.75, which outperforms the previous model, REaCT.}}, author = {{Kim, Yeongsu and Lee, Seungwoo and Dollmann, Markus and Geierhos, Michaela}}, issn = {{2207-6360}}, journal = {{International Journal of Advanced Science and Technology}}, keywords = {{Software Engineering, Natural Language Processing, Semantic Annotation, Machine Learning, Feature Engineering, Syntactic Structure}}, pages = {{123--136}}, publisher = {{SERSC Australia}}, title = {{{Improving Classifiers for Semantic Annotation of Software Requirements with Elaborate Syntactic Structure}}}, doi = {{10.14257/ijast.2018.112.12}}, volume = {{112}}, year = {{2018}}, } @article{48884, abstract = {{The Travelling Salesperson Problem (TSP) is one of the best-studied NP-hard problems. Over the years, many different solution approaches and solvers have been developed. For the first time, we directly compare five state-of-the-art inexact solvers\textemdash namely, LKH, EAX, restart variants of those, and MAOS\textemdash on a large set of well-known benchmark instances and demonstrate complementary performance, in that different instances may be solved most effectively by different algorithms. We leverage this complementarity to build an algorithm selector, which selects the best TSP solver on a per-instance basis and thus achieves significantly improved performance compared to the single best solver, representing an advance in the state of the art in solving the Euclidean TSP. Our in-depth analysis of the selectors provides insight into what drives this performance improvement.}}, author = {{Kerschke, Pascal and Kotthoff, Lars and Bossek, Jakob and Hoos, Holger H. and Trautmann, Heike}}, issn = {{1063-6560}}, journal = {{Evolutionary Computation}}, keywords = {{automated algorithm selection, machine learning., performance modeling, Travelling Salesperson Problem}}, number = {{4}}, pages = {{597–620}}, title = {{{Leveraging TSP Solver Complementarity through Machine Learning}}}, doi = {{10.1162/evco_a_00215}}, volume = {{26}}, year = {{2018}}, } @article{1098, abstract = {{An end user generally writes down software requirements in ambiguous expressions using natural language; hence, a software developer attuned to programming language finds it difficult to understand th meaning of the requirements. To solve this problem we define semantic categories for disambiguation and classify/annotate the requirement into the categories by using machine-learning models. We extensively use a language frame closely related to such categories for designing features to overcome the problem of insufficient training data compare to the large number of classes. Our proposed model obtained a micro-average F1-score of 0.75, outperforming the previous model, REaCT.}}, author = {{Kim, Yeong-Su and Lee, Seung-Woo and Dollmann, Markus and Geierhos, Michaela}}, issn = {{2205-8494}}, journal = {{International Journal of Software Engineering for Smart Device}}, keywords = {{Natural Language Processing, Semantic Annotation, Machine Learning}}, number = {{2}}, pages = {{1--6}}, publisher = {{Global Vision School Publication}}, title = {{{Semantic Annotation of Software Requirements with Language Frame}}}, volume = {{4}}, year = {{2017}}, } @inproceedings{46396, abstract = {{The steady supply of new optimization methods makes the algorithm selection problem (ASP) an increasingly pressing and challenging task, specially for real-world black-box optimization problems. The introduced approach considers the ASP as a cost-sensitive classification task which is based on Exploratory Landscape Analysis. Low-level features gathered by systematic sampling of the function on the feasible set are used to predict a well-performing algorithm out of a given portfolio. Example-specific label costs are defined by the expected runtime of each candidate algorithm. We use one-sided support vector regression to solve this learning problem. The approach is illustrated by means of the optimization problems and algorithms of the BBOB’09/10 workshop.}}, author = {{Bischl, Bernd and Mersmann, Olaf and Trautmann, Heike and Preuß, Mike}}, booktitle = {{Proceedings of the 14th Annual Conference on Genetic and Evolutionary Computation}}, isbn = {{9781450311779}}, keywords = {{machine learning, exploratory landscape analysis, fitness landscape, benchmarking, evolutionary optimization, bbob test set, algorithm selection}}, pages = {{313–320}}, publisher = {{Association for Computing Machinery}}, title = {{{Algorithm Selection Based on Exploratory Landscape Analysis and Cost-Sensitive Learning}}}, doi = {{10.1145/2330163.2330209}}, year = {{2012}}, }