@inbook{45884, author = {{Hanselle, Jonas Manuel and Hüllermeier, Eyke and Mohr, Felix and Ngonga Ngomo, Axel-Cyrille and Sherif, Mohamed and Tornede, Alexander and Wever, Marcel Dominik}}, booktitle = {{On-The-Fly Computing -- Individualized IT-services in dynamic markets}}, editor = {{Haake, Claus-Jochen and Meyer auf der Heide, Friedhelm and Platzner, Marco and Wachsmuth, Henning and Wehrheim, Heike}}, pages = {{85--104}}, publisher = {{Heinz Nixdorf Institut, Universität Paderborn}}, title = {{{Configuration and Evaluation}}}, doi = {{10.5281/zenodo.8068466}}, volume = {{412}}, year = {{2023}}, } @book{45863, abstract = {{In the proposal for our CRC in 2011, we formulated a vision of markets for IT services that describes an approach to the provision of such services that was novel at that time and, to a large extent, remains so today: „Our vision of on-the-fly computing is that of IT services individually and automatically configured and brought to execution from flexibly combinable services traded on markets. At the same time, we aim at organizing markets whose participants maintain a lively market of services through appropriate entrepreneurial actions.“ Over the last 12 years, we have developed methods and techniques to address problems critical to the convenient, efficient, and secure use of on-the-fly computing. Among other things, we have made the description of services more convenient by allowing natural language input, increased the quality of configured services through (natural language) interaction and more efficient configuration processes and analysis procedures, made the quality of (the products of) providers in the marketplace transparent through reputation systems, and increased the resource efficiency of execution through reconfigurable heterogeneous computing nodes and an integrated treatment of service description and configuration. We have also developed network infrastructures that have a high degree of adaptivity, scalability, efficiency, and reliability, and provide cryptographic guarantees of anonymity and security for market participants and their products and services. To demonstrate the pervasiveness of the OTF computing approach, we have implemented a proof-of-concept for OTF computing that can run typical scenarios of an OTF market. We illustrated the approach using a cutting-edge application scenario – automated machine learning (AutoML). Finally, we have been pushing our work for the perpetuation of On-The-Fly Computing beyond the SFB and sharing the expertise gained in the SFB in events with industry partners as well as transfer projects. This work required a broad spectrum of expertise. Computer scientists and economists with research interests such as computer networks and distributed algorithms, security and cryptography, software engineering and verification, configuration and machine learning, computer engineering and HPC, microeconomics and game theory, business informatics and management have successfully collaborated here.}}, author = {{Haake, Claus-Jochen and Meyer auf der Heide, Friedhelm and Platzner, Marco and Wachsmuth, Henning and Wehrheim, Heike}}, pages = {{247}}, publisher = {{Heinz Nixdorf Institut, Universität Paderborn}}, title = {{{On-The-Fly Computing -- Individualized IT-services in dynamic markets}}}, doi = {{10.17619/UNIPB/1-1797}}, volume = {{412}}, year = {{2023}}, } @unpublished{30868, abstract = {{Algorithm configuration (AC) is concerned with the automated search of the most suitable parameter configuration of a parametrized algorithm. There is currently a wide variety of AC problem variants and methods proposed in the literature. Existing reviews do not take into account all derivatives of the AC problem, nor do they offer a complete classification scheme. To this end, we introduce taxonomies to describe the AC problem and features of configuration methods, respectively. We review existing AC literature within the lens of our taxonomies, outline relevant design choices of configuration approaches, contrast methods and problem variants against each other, and describe the state of AC in industry. Finally, our review provides researchers and practitioners with a look at future research directions in the field of AC.}}, author = {{Schede, Elias and Brandt, Jasmin and Tornede, Alexander and Wever, Marcel Dominik and Bengs, Viktor and Hüllermeier, Eyke and Tierney, Kevin}}, booktitle = {{arXiv:2202.01651}}, title = {{{A Survey of Methods for Automated Algorithm Configuration}}}, year = {{2022}}, } @inproceedings{34103, abstract = {{It is well known that different algorithms perform differently well on an instance of an algorithmic problem, motivating algorithm selection (AS): Given an instance of an algorithmic problem, which is the most suitable algorithm to solve it? As such, the AS problem has received considerable attention resulting in various approaches - many of which either solve a regression or ranking problem under the hood. Although both of these formulations yield very natural ways to tackle AS, they have considerable weaknesses. On the one hand, correctly predicting the performance of an algorithm on an instance is a sufficient, but not a necessary condition to produce a correct ranking over algorithms and in particular ranking the best algorithm first. On the other hand, classical ranking approaches often do not account for concrete performance values available in the training data, but only leverage rankings composed from such data. We propose HARRIS- Hybrid rAnking and RegRessIon foreSts - a new algorithm selector leveraging special forests, combining the strengths of both approaches while alleviating their weaknesses. HARRIS' decisions are based on a forest model, whose trees are created based on splits optimized on a hybrid ranking and regression loss function. As our preliminary experimental study on ASLib shows, HARRIS improves over standard algorithm selection approaches on some scenarios showing that combining ranking and regression in trees is indeed promising for AS.}}, author = {{Fehring, Lukass and Hanselle, Jonas Manuel and Tornede, Alexander}}, booktitle = {{Workshop on Meta-Learning (MetaLearn 2022) @ NeurIPS 2022}}, location = {{Baltimore}}, title = {{{HARRIS: Hybrid Ranking and Regression Forests for Algorithm Selection}}}, year = {{2022}}, } @inproceedings{31806, abstract = {{The creation of an RDF knowledge graph for a particular application commonly involves a pipeline of tools that transform a set ofinput data sources into an RDF knowledge graph in a process called dataset augmentation. The components of such augmentation pipelines often require extensive configuration to lead to satisfactory results. Thus, non-experts are often unable to use them. Wepresent an efficient supervised algorithm based on genetic programming for learning knowledge graph augmentation pipelines of arbitrary length. Our approach uses multi-expression learning to learn augmentation pipelines able to achieve a high F-measure on the training data. Our evaluation suggests that our approach can efficiently learn a larger class of RDF dataset augmentation tasks than the state of the art while using only a single training example. Even on the most complex augmentation problem we posed, our approach consistently achieves an average F1-measure of 99% in under 500 iterations with an average runtime of 16 seconds}}, author = {{Dreßler, Kevin and Sherif, Mohamed and Ngonga Ngomo, Axel-Cyrille}}, booktitle = {{Proceedings of the 33rd ACM Conference on Hypertext and Hypermedia}}, keywords = {{2022 RAKI SFB901 deer dice kevin knowgraphs limes ngonga sherif simba}}, location = {{Barcelona (Spain)}}, title = {{{ADAGIO - Automated Data Augmentation of Knowledge Graphs Using Multi-expression Learning}}}, doi = {{10.1145/3511095.3531287}}, year = {{2022}}, } @unpublished{30867, abstract = {{In online algorithm selection (OAS), instances of an algorithmic problem class are presented to an agent one after another, and the agent has to quickly select a presumably best algorithm from a fixed set of candidate algorithms. For decision problems such as satisfiability (SAT), quality typically refers to the algorithm's runtime. As the latter is known to exhibit a heavy-tail distribution, an algorithm is normally stopped when exceeding a predefined upper time limit. As a consequence, machine learning methods used to optimize an algorithm selection strategy in a data-driven manner need to deal with right-censored samples, a problem that has received little attention in the literature so far. In this work, we revisit multi-armed bandit algorithms for OAS and discuss their capability of dealing with the problem. Moreover, we adapt them towards runtime-oriented losses, allowing for partially censored data while keeping a space- and time-complexity independent of the time horizon. In an extensive experimental evaluation on an adapted version of the ASlib benchmark, we demonstrate that theoretically well-founded methods based on Thompson sampling perform specifically strong and improve in comparison to existing methods.}}, author = {{Tornede, Alexander and Bengs, Viktor and Hüllermeier, Eyke}}, booktitle = {{Proceedings of the 36th AAAI Conference on Artificial Intelligence}}, publisher = {{AAAI}}, title = {{{Machine Learning for Online Algorithm Selection under Censored Feedback}}}, year = {{2022}}, } @unpublished{30865, abstract = {{The problem of selecting an algorithm that appears most suitable for a specific instance of an algorithmic problem class, such as the Boolean satisfiability problem, is called instance-specific algorithm selection. Over the past decade, the problem has received considerable attention, resulting in a number of different methods for algorithm selection. Although most of these methods are based on machine learning, surprisingly little work has been done on meta learning, that is, on taking advantage of the complementarity of existing algorithm selection methods in order to combine them into a single superior algorithm selector. In this paper, we introduce the problem of meta algorithm selection, which essentially asks for the best way to combine a given set of algorithm selectors. We present a general methodological framework for meta algorithm selection as well as several concrete learning methods as instantiations of this framework, essentially combining ideas of meta learning and ensemble learning. In an extensive experimental evaluation, we demonstrate that ensembles of algorithm selectors can significantly outperform single algorithm selectors and have the potential to form the new state of the art in algorithm selection.}}, author = {{Tornede, Alexander and Gehring, Lukas and Tornede, Tanja and Wever, Marcel Dominik and Hüllermeier, Eyke}}, booktitle = {{Machine Learning}}, title = {{{Algorithm Selection on a Meta Level}}}, year = {{2022}}, } @article{33090, abstract = {{AbstractHeated tool butt welding is a method often used for joining thermoplastics, especially when the components are made out of different materials. The quality of the connection between the components crucially depends on a suitable choice of the parameters of the welding process, such as heating time, temperature, and the precise way how the parts are then welded. Moreover, when different materials are to be joined, the parameter values need to be tailored to the specifics of the respective material. To this end, in this paper, three approaches to tailor the parameter values to optimize the quality of the connection are compared: a heuristic by Potente, statistical experimental design, and Bayesian optimization. With the suitability for practice in mind, a series of experiments are carried out with these approaches, and their capabilities of proposing well-performing parameter values are investigated. As a result, Bayesian optimization is found to yield peak performance, but the costs for optimization are substantial. In contrast, the Potente heuristic does not require any experimentation and recommends parameter values with competitive quality.}}, author = {{Gevers, Karina and Tornede, Alexander and Wever, Marcel Dominik and Schöppner, Volker and Hüllermeier, Eyke}}, issn = {{0043-2288}}, journal = {{Welding in the World}}, keywords = {{Metals and Alloys, Mechanical Engineering, Mechanics of Materials}}, publisher = {{Springer Science and Business Media LLC}}, title = {{{A comparison of heuristic, statistical, and machine learning methods for heated tool butt welding of two different materials}}}, doi = {{10.1007/s40194-022-01339-9}}, year = {{2022}}, } @inproceedings{28350, abstract = {{In recent years, we observe an increasing amount of software with machine learning components being deployed. This poses the question of quality assurance for such components: how can we validate whether specified requirements are fulfilled by a machine learned software? Current testing and verification approaches either focus on a single requirement (e.g., fairness) or specialize on a single type of machine learning model (e.g., neural networks). In this paper, we propose property-driven testing of machine learning models. Our approach MLCheck encompasses (1) a language for property specification, and (2) a technique for systematic test case generation. The specification language is comparable to property-based testing languages. Test case generation employs advanced verification technology for a systematic, property dependent construction of test suites, without additional user supplied generator functions. We evaluate MLCheck using requirements and data sets from three different application areas (software discrimination, learning on knowledge graphs and security). Our evaluation shows that despite its generality MLCheck can even outperform specialised testing approaches while having a comparable runtime}}, author = {{Sharma, Arnab and Demir, Caglar and Ngonga Ngomo, Axel-Cyrille and Wehrheim, Heike}}, booktitle = {{Proceedings of the 20th IEEE International Conference on Machine Learning and Applications (ICMLA)}}, publisher = {{IEEE}}, title = {{{MLCHECK–Property-Driven Testing of Machine Learning Classifiers}}}, year = {{2021}}, } @article{21004, abstract = {{Automated machine learning (AutoML) supports the algorithmic construction and data-specific customization of machine learning pipelines, including the selection, combination, and parametrization of machine learning algorithms as main constituents. Generally speaking, AutoML approaches comprise two major components: a search space model and an optimizer for traversing the space. Recent approaches have shown impressive results in the realm of supervised learning, most notably (single-label) classification (SLC). Moreover, first attempts at extending these approaches towards multi-label classification (MLC) have been made. While the space of candidate pipelines is already huge in SLC, the complexity of the search space is raised to an even higher power in MLC. One may wonder, therefore, whether and to what extent optimizers established for SLC can scale to this increased complexity, and how they compare to each other. This paper makes the following contributions: First, we survey existing approaches to AutoML for MLC. Second, we augment these approaches with optimizers not previously tried for MLC. Third, we propose a benchmarking framework that supports a fair and systematic comparison. Fourth, we conduct an extensive experimental study, evaluating the methods on a suite of MLC problems. We find a grammar-based best-first search to compare favorably to other optimizers.}}, author = {{Wever, Marcel Dominik and Tornede, Alexander and Mohr, Felix and Hüllermeier, Eyke}}, issn = {{0162-8828}}, journal = {{IEEE Transactions on Pattern Analysis and Machine Intelligence}}, keywords = {{Automated Machine Learning, Multi Label Classification, Hierarchical Planning, Bayesian Optimization}}, pages = {{1--1}}, title = {{{AutoML for Multi-Label Classification: Overview and Empirical Evaluation}}}, doi = {{10.1109/tpami.2021.3051276}}, year = {{2021}}, } @article{21092, abstract = {{Automated Machine Learning (AutoML) seeks to automatically find so-called machine learning pipelines that maximize the prediction performance when being used to train a model on a given dataset. One of the main and yet open challenges in AutoML is an effective use of computational resources: An AutoML process involves the evaluation of many candidate pipelines, which are costly but often ineffective because they are canceled due to a timeout. In this paper, we present an approach to predict the runtime of two-step machine learning pipelines with up to one pre-processor, which can be used to anticipate whether or not a pipeline will time out. Separate runtime models are trained offline for each algorithm that may be used in a pipeline, and an overall prediction is derived from these models. We empirically show that the approach increases successful evaluations made by an AutoML tool while preserving or even improving on the previously best solutions.}}, author = {{Mohr, Felix and Wever, Marcel Dominik and Tornede, Alexander and Hüllermeier, Eyke}}, journal = {{IEEE Transactions on Pattern Analysis and Machine Intelligence}}, publisher = {{IEEE}}, title = {{{Predicting Machine Learning Pipeline Runtimes in the Context of Automated Machine Learning}}}, year = {{2021}}, } @inproceedings{21570, author = {{Tornede, Tanja and Tornede, Alexander and Wever, Marcel Dominik and Hüllermeier, Eyke}}, booktitle = {{Proceedings of the Genetic and Evolutionary Computation Conference}}, title = {{{Coevolution of Remaining Useful Lifetime Estimation Pipelines for Automated Predictive Maintenance}}}, year = {{2021}}, } @inproceedings{22913, author = {{Hüllermeier, Eyke and Mohr, Felix and Tornede, Alexander and Wever, Marcel Dominik}}, location = {{Bilbao (Virtual)}}, title = {{{Automated Machine Learning, Bounded Rationality, and Rational Metareasoning}}}, year = {{2021}}, } @unpublished{30866, abstract = {{Automated machine learning (AutoML) strives for the automatic configuration of machine learning algorithms and their composition into an overall (software) solution - a machine learning pipeline - tailored to the learning task (dataset) at hand. Over the last decade, AutoML has developed into an independent research field with hundreds of contributions. While AutoML offers many prospects, it is also known to be quite resource-intensive, which is one of its major points of criticism. The primary cause for a high resource consumption is that many approaches rely on the (costly) evaluation of many machine learning pipelines while searching for good candidates. This problem is amplified in the context of research on AutoML methods, due to large scale experiments conducted with many datasets and approaches, each of them being run with several repetitions to rule out random effects. In the spirit of recent work on Green AI, this paper is written in an attempt to raise the awareness of AutoML researchers for the problem and to elaborate on possible remedies. To this end, we identify four categories of actions the community may take towards more sustainable research on AutoML, i.e. Green AutoML: design of AutoML systems, benchmarking, transparency and research incentives.}}, author = {{Tornede, Tanja and Tornede, Alexander and Hanselle, Jonas Manuel and Wever, Marcel Dominik and Mohr, Felix and Hüllermeier, Eyke}}, booktitle = {{arXiv:2111.05850}}, title = {{{Towards Green Automated Machine Learning: Status Quo and Future Directions}}}, year = {{2021}}, } @inproceedings{21198, author = {{Hanselle, Jonas Manuel and Tornede, Alexander and Wever, Marcel Dominik and Hüllermeier, Eyke}}, location = {{Delhi, India}}, title = {{{Algorithm Selection as Superset Learning: Constructing Algorithm Selectors from Imprecise Performance Data}}}, year = {{2021}}, } @inproceedings{17407, author = {{Tornede, Alexander and Wever, Marcel Dominik and Hüllermeier, Eyke}}, booktitle = {{Discovery Science}}, title = {{{Extreme Algorithm Selection with Dyadic Feature Representation}}}, year = {{2020}}, } @inproceedings{17408, author = {{Hanselle, Jonas Manuel and Tornede, Alexander and Wever, Marcel Dominik and Hüllermeier, Eyke}}, booktitle = {{KI 2020: Advances in Artificial Intelligence}}, title = {{{Hybrid Ranking and Regression for Algorithm Selection}}}, year = {{2020}}, } @inproceedings{17424, author = {{Tornede, Tanja and Tornede, Alexander and Wever, Marcel Dominik and Mohr, Felix and Hüllermeier, Eyke}}, booktitle = {{Proceedings of the ECMLPKDD 2020}}, title = {{{AutoML for Predictive Maintenance: One Tool to RUL Them All}}}, doi = {{10.1007/978-3-030-66770-2_8}}, year = {{2020}}, } @inproceedings{20306, author = {{Tornede, Alexander and Wever, Marcel Dominik and Hüllermeier, Eyke}}, booktitle = {{Workshop MetaLearn 2020 @ NeurIPS 2020}}, location = {{Online}}, title = {{{Towards Meta-Algorithm Selection}}}, year = {{2020}}, } @inproceedings{18276, abstract = {{Algorithm selection (AS) deals with the automatic selection of an algorithm from a fixed set of candidate algorithms most suitable for a specific instance of an algorithmic problem class, where "suitability" often refers to an algorithm's runtime. Due to possibly extremely long runtimes of candidate algorithms, training data for algorithm selection models is usually generated under time constraints in the sense that not all algorithms are run to completion on all instances. Thus, training data usually comprises censored information, as the true runtime of algorithms timed out remains unknown. However, many standard AS approaches are not able to handle such information in a proper way. On the other side, survival analysis (SA) naturally supports censored data and offers appropriate ways to use such data for learning distributional models of algorithm runtime, as we demonstrate in this work. We leverage such models as a basis of a sophisticated decision-theoretic approach to algorithm selection, which we dub Run2Survive. Moreover, taking advantage of a framework of this kind, we advocate a risk-averse approach to algorithm selection, in which the avoidance of a timeout is given high priority. In an extensive experimental study with the standard benchmark ASlib, our approach is shown to be highly competitive and in many cases even superior to state-of-the-art AS approaches.}}, author = {{Tornede, Alexander and Wever, Marcel Dominik and Werner, Stefan and Mohr, Felix and Hüllermeier, Eyke}}, booktitle = {{ACML 2020}}, location = {{Bangkok, Thailand}}, title = {{{Run2Survive: A Decision-theoretic Approach to Algorithm Selection based on Survival Analysis}}}, year = {{2020}}, } @inproceedings{15629, abstract = {{In multi-label classification (MLC), each instance is associated with a set of class labels, in contrast to standard classification where an instance is assigned a single label. Binary relevance (BR) learning, which reduces a multi-label to a set of binary classification problems, one per label, is arguably the most straight-forward approach to MLC. In spite of its simplicity, BR proved to be competitive to more sophisticated MLC methods, and still achieves state-of-the-art performance for many loss functions. Somewhat surprisingly, the optimal choice of the base learner for tackling the binary classification problems has received very little attention so far. Taking advantage of the label independence assumption inherent to BR, we propose a label-wise base learner selection method optimizing label-wise macro averaged performance measures. In an extensive experimental evaluation, we find that or approach, called LiBRe, can significantly improve generalization performance.}}, author = {{Wever, Marcel Dominik and Tornede, Alexander and Mohr, Felix and Hüllermeier, Eyke}}, location = {{Konstanz, Germany}}, publisher = {{Springer}}, title = {{{LiBRe: Label-Wise Selection of Base Learners in Binary Relevance for Multi-Label Classification}}}, year = {{2020}}, } @article{15025, abstract = {{In software engineering, the imprecise requirements of a user are transformed to a formal requirements specification during the requirements elicitation process. This process is usually guided by requirements engineers interviewing the user. We want to partially automate this first step of the software engineering process in order to enable users to specify a desired software system on their own. With our approach, users are only asked to provide exemplary behavioral descriptions. The problem of synthesizing a requirements specification from examples can partially be reduced to the problem of grammatical inference, to which we apply an active coevolutionary learning approach. However, this approach would usually require many feedback queries to be sent to the user. In this work, we extend and generalize our active learning approach to receive knowledge from multiple oracles, also known as proactive learning. The ‘user oracle’ represents input received from the user and the ‘knowledge oracle’ represents available, formalized domain knowledge. We call our two-oracle approach the ‘first apply knowledge then query’ (FAKT/Q) algorithm. We compare FAKT/Q to the active learning approach and provide an extensive benchmark evaluation. As result we find that the number of required user queries is reduced and the inference process is sped up significantly. Finally, with so-called On-The-Fly Markets, we present a motivation and an application of our approach where such knowledge is available.}}, author = {{Wever, Marcel Dominik and van Rooijen, Lorijn and Hamann, Heiko}}, journal = {{Evolutionary Computation}}, number = {{2}}, pages = {{165–193}}, publisher = {{MIT Press Journals}}, title = {{{Multi-Oracle Coevolutionary Learning of Requirements Specifications from Examples in On-The-Fly Markets}}}, doi = {{10.1162/evco_a_00266}}, volume = {{28}}, year = {{2020}}, } @article{13770, author = {{Karl, Holger and Kundisch, Dennis and Meyer auf der Heide, Friedhelm and Wehrheim, Heike}}, journal = {{Business & Information Systems Engineering}}, number = {{6}}, pages = {{467--481}}, publisher = {{Springer}}, title = {{{A Case for a New IT Ecosystem: On-The-Fly Computing}}}, doi = {{10.1007/s12599-019-00627-x}}, volume = {{62}}, year = {{2020}}, } @inproceedings{8868, author = {{Wever, Marcel Dominik and Mohr, Felix and Hüllermeier, Eyke and Hetzer, Alexander}}, location = {{Bayreuth, Germany}}, title = {{{Towards Automated Machine Learning for Multi-Label Classification}}}, year = {{2019}}, } @inproceedings{15007, author = {{Melnikov, Vitaly and Hüllermeier, Eyke}}, booktitle = {{Proceedings ACML, Asian Conference on Machine Learning (Proceedings of Machine Learning Research, 101)}}, title = {{{Learning to Aggregate: Tackling the Aggregation/Disaggregation Problem for OWA}}}, doi = {{10.1016/j.jmva.2019.02.017}}, year = {{2019}}, } @inproceedings{15011, author = {{Tornede, Alexander and Wever, Marcel Dominik and Hüllermeier, Eyke}}, booktitle = {{Proceedings - 29. Workshop Computational Intelligence, Dortmund, 28. - 29. November 2019}}, editor = {{Hoffmann, Frank and Hüllermeier, Eyke and Mikut, Ralf}}, isbn = {{978-3-7315-0979-0}}, location = {{Dortmund}}, pages = {{135--146}}, publisher = {{KIT Scientific Publishing, Karlsruhe}}, title = {{{Algorithm Selection as Recommendation: From Collaborative Filtering to Dyad Ranking}}}, year = {{2019}}, } @inproceedings{13132, author = {{Mohr, Felix and Wever, Marcel Dominik and Tornede, Alexander and Hüllermeier, Eyke}}, booktitle = {{INFORMATIK 2019: 50 Jahre Gesellschaft für Informatik – Informatik für Gesellschaft}}, location = {{Kassel}}, pages = {{ 273--274 }}, publisher = {{Gesellschaft für Informatik e.V.}}, title = {{{From Automated to On-The-Fly Machine Learning}}}, year = {{2019}}, } @inproceedings{10232, abstract = {{Existing tools for automated machine learning, such as Auto-WEKA, TPOT, auto-sklearn, and more recently ML-Plan, have shown impressive results for the tasks of single-label classification and regression. Yet, there is only little work on other types of machine learning problems so far. In particular, there is almost no work on automating the engineering of machine learning solutions for multi-label classification (MLC). We show how the scope of ML-Plan, an AutoML-tool for multi-class classification, can be extended towards MLC using MEKA, which is a multi-label extension of the well-known Java library WEKA. The resulting approach recursively refines MEKA's multi-label classifiers, nesting other multi-label classifiers for meta algorithms and single-label classifiers provided by WEKA as base learners. In our evaluation, we find that the proposed approach yields strong results and performs significantly better than a set of baselines we compare with.}}, author = {{Wever, Marcel Dominik and Mohr, Felix and Tornede, Alexander and Hüllermeier, Eyke}}, location = {{Long Beach, CA, USA}}, title = {{{Automating Multi-Label Classification Extending ML-Plan}}}, year = {{2019}}, } @inproceedings{2479, author = {{Mohr, Felix and Wever, Marcel Dominik and Hüllermeier, Eyke and Faez, Amin}}, booktitle = {{SCC}}, location = {{San Francisco, CA, USA}}, publisher = {{IEEE}}, title = {{{(WIP) Towards the Automated Composition of Machine Learning Services}}}, doi = {{10.1109/SCC.2018.00039}}, year = {{2018}}, } @inproceedings{2857, author = {{Mohr, Felix and Lettmann, Theodor and Hüllermeier, Eyke and Wever, Marcel Dominik}}, booktitle = {{Proceedings of the 1st ICAPS Workshop on Hierarchical Planning}}, location = {{Delft, Netherlands}}, pages = {{31--39}}, publisher = {{AAAI}}, title = {{{Programmatic Task Network Planning}}}, year = {{2018}}, } @inproceedings{2471, author = {{Mohr, Felix and Wever, Marcel Dominik and Hüllermeier, Eyke}}, booktitle = {{SCC}}, location = {{San Francisco, CA, USA}}, publisher = {{IEEE Computer Society}}, title = {{{On-The-Fly Service Construction with Prototypes}}}, doi = {{10.1109/SCC.2018.00036}}, year = {{2018}}, } @article{3510, abstract = {{Automated machine learning (AutoML) seeks to automatically select, compose, and parametrize machine learning algorithms, so as to achieve optimal performance on a given task (dataset). Although current approaches to AutoML have already produced impressive results, the field is still far from mature, and new techniques are still being developed. In this paper, we present ML-Plan, a new approach to AutoML based on hierarchical planning. To highlight the potential of this approach, we compare ML-Plan to the state-of-the-art frameworks Auto-WEKA, auto-sklearn, and TPOT. In an extensive series of experiments, we show that ML-Plan is highly competitive and often outperforms existing approaches.}}, author = {{Mohr, Felix and Wever, Marcel Dominik and Hüllermeier, Eyke}}, issn = {{1573-0565}}, journal = {{Machine Learning}}, keywords = {{AutoML, Hierarchical Planning, HTN planning, ML-Plan}}, location = {{Dublin, Ireland}}, pages = {{1495--1515}}, publisher = {{Springer}}, title = {{{ML-Plan: Automated Machine Learning via Hierarchical Planning}}}, doi = {{10.1007/s10994-018-5735-z}}, year = {{2018}}, } @inproceedings{3552, author = {{Mohr, Felix and Wever, Marcel Dominik and Hüllermeier, Eyke}}, booktitle = {{Proceedings of the Symposium on Intelligent Data Analysis}}, location = {{‘s-Hertogenbosch, the Netherlands}}, title = {{{Reduction Stumps for Multi-Class Classification}}}, doi = {{10.1007/978-3-030-01768-2_19}}, year = {{2018}}, } @inproceedings{3852, abstract = {{In automated machine learning (AutoML), the process of engineering machine learning applications with respect to a specific problem is (partially) automated. Various AutoML tools have already been introduced to provide out-of-the-box machine learning functionality. More specifically, by selecting machine learning algorithms and optimizing their hyperparameters, these tools produce a machine learning pipeline tailored to the problem at hand. Except for TPOT, all of these tools restrict the maximum number of processing steps of such a pipeline. However, as TPOT follows an evolutionary approach, it suffers from performance issues when dealing with larger datasets. In this paper, we present an alternative approach leveraging a hierarchical planning to configure machine learning pipelines that are unlimited in length. We evaluate our approach and find its performance to be competitive with other AutoML tools, including TPOT.}}, author = {{Wever, Marcel Dominik and Mohr, Felix and Hüllermeier, Eyke}}, booktitle = {{ICML 2018 AutoML Workshop}}, keywords = {{automated machine learning, complex pipelines, hierarchical planning}}, location = {{Stockholm, Sweden}}, title = {{{ML-Plan for Unlimited-Length Machine Learning Pipelines}}}, year = {{2018}}, } @inproceedings{2109, abstract = {{In multinomial classification, reduction techniques are commonly used to decompose the original learning problem into several simpler problems. For example, by recursively bisecting the original set of classes, so-called nested dichotomies define a set of binary classification problems that are organized in the structure of a binary tree. In contrast to the existing one-shot heuristics for constructing nested dichotomies and motivated by recent work on algorithm configuration, we propose a genetic algorithm for optimizing the structure of such dichotomies. A key component of this approach is the proposed genetic representation that facilitates the application of standard genetic operators, while still supporting the exchange of partial solutions under recombination. We evaluate the approach in an extensive experimental study, showing that it yields classifiers with superior generalization performance.}}, author = {{Wever, Marcel Dominik and Mohr, Felix and Hüllermeier, Eyke}}, booktitle = {{Proceedings of the Genetic and Evolutionary Computation Conference, GECCO 2018, Kyoto, Japan, July 15-19, 2018}}, keywords = {{Classification, Hierarchical Decomposition, Indirect Encoding}}, location = {{Kyoto, Japan}}, publisher = {{ACM}}, title = {{{Ensembles of Evolved Nested Dichotomies for Classification}}}, doi = {{10.1145/3205455.3205562}}, year = {{2018}}, } @unpublished{17713, author = {{Wever, Marcel Dominik and Mohr, Felix and Hüllermeier, Eyke}}, publisher = {{Arxiv}}, title = {{{Automated Multi-Label Classification based on ML-Plan}}}, year = {{2018}}, } @unpublished{17714, author = {{Mohr, Felix and Wever, Marcel Dominik and Hüllermeier, Eyke}}, title = {{{Automated machine learning service composition}}}, year = {{2018}}, } @inbook{6423, author = {{Schäfer, Dirk and Hüllermeier, Eyke}}, booktitle = {{Discovery Science}}, isbn = {{9783030017705}}, issn = {{0302-9743}}, pages = {{161--175}}, publisher = {{Springer International Publishing}}, title = {{{Preference-Based Reinforcement Learning Using Dyad Ranking}}}, doi = {{10.1007/978-3-030-01771-2_11}}, year = {{2018}}, } @inproceedings{115, abstract = {{Whenever customers have to decide between different instances of the same product, they are interested in buying the best product. In contrast, companies are interested in reducing the construction effort (and usually as a consequence thereof, the quality) to gain profit. The described setting is widely known as opposed preferences in quality of the product and also applies to the context of service-oriented computing. In general, service-oriented computing emphasizes the construction of large software systems out of existing services, where services are small and self-contained pieces of software that adhere to a specified interface. Several implementations of the same interface are considered as several instances of the same service. Thereby, customers are interested in buying the best service implementation for their service composition wrt. to metrics, such as costs, energy, memory consumption, or execution time. One way to ensure the service quality is to employ certificates, which can come in different kinds: Technical certificates proving correctness can be automatically constructed by the service provider and again be automatically checked by the user. Digital certificates allow proof of the integrity of a product. Other certificates might be rolled out if service providers follow a good software construction principle, which is checked in annual audits. Whereas all of these certificates are handled differently in service markets, what they have in common is that they influence the buying decisions of customers. In this paper, we review state-of-the-art developments in certification with respect to service-oriented computing. We not only discuss how certificates are constructed and handled in service-oriented computing but also review the effects of certificates on the market from an economic perspective.}}, author = {{Jakobs, Marie-Christine and Krämer, Julia and van Straaten, Dirk and Lettmann, Theodor}}, booktitle = {{The Ninth International Conferences on Advanced Service Computing (SERVICE COMPUTATION)}}, editor = {{Marcelo De Barros, Janusz Klink,Tadeus Uhl, Thomas Prinz}}, pages = {{7--12}}, title = {{{Certification Matters for Service Markets}}}, year = {{2017}}, } @inproceedings{71, abstract = {{Today, software verification tools have reached the maturity to be used for large scale programs. Different tools perform differently well on varying code. A software developer is hence faced with the problem of choosing a tool appropriate for her program at hand. A ranking of tools on programs could facilitate the choice. Such rankings can, however, so far only be obtained by running all considered tools on the program.In this paper, we present a machine learning approach to predicting rankings of tools on programs. The method builds upon so-called label ranking algorithms, which we complement with appropriate kernels providing a similarity measure for programs. Our kernels employ a graph representation for software source code that mixes elements of control flow and program dependence graphs with abstract syntax trees. Using data sets from the software verification competition SV-COMP, we demonstrate our rank prediction technique to generalize well and achieve a rather high predictive accuracy (rank correlation > 0.6).}}, author = {{Czech, Mike and Hüllermeier, Eyke and Jakobs, Marie-Christine and Wehrheim, Heike}}, booktitle = {{Proceedings of the 3rd International Workshop on Software Analytics}}, pages = {{23--26}}, title = {{{Predicting Rankings of Software Verification Tools}}}, doi = {{10.1145/3121257.3121262}}, year = {{2017}}, } @inproceedings{1180, abstract = {{These days, there is a strong rise in the needs for machine learning applications, requiring an automation of machine learning engineering which is referred to as AutoML. In AutoML the selection, composition and parametrization of machine learning algorithms is automated and tailored to a specific problem, resulting in a machine learning pipeline. Current approaches reduce the AutoML problem to optimization of hyperparameters. Based on recursive task networks, in this paper we present one approach from the field of automated planning and one evolutionary optimization approach. Instead of simply parametrizing a given pipeline, this allows for structure optimization of machine learning pipelines, as well. We evaluate the two approaches in an extensive evaluation, finding both approaches to have their strengths in different areas. Moreover, the two approaches outperform the state-of-the-art tool Auto-WEKA in many settings.}}, author = {{Wever, Marcel Dominik and Mohr, Felix and Hüllermeier, Eyke}}, booktitle = {{27th Workshop Computational Intelligence}}, location = {{Dortmund}}, title = {{{Automatic Machine Learning: Hierachical Planning Versus Evolutionary Optimization}}}, year = {{2017}}, } @article{190, abstract = {{Today, software components are provided by global markets in the form of services. In order to optimally satisfy service requesters and service providers, adequate techniques for automatic service matching are needed. However, a requester’s requirements may be vague and the information available about a provided service may be incomplete. As a consequence, fuzziness is induced into the matching procedure. The contribution of this paper is the development of a systematic matching procedure that leverages concepts and techniques from fuzzy logic and possibility theory based on our formal distinction between different sources and types of fuzziness in the context of service matching. In contrast to existing methods, our approach is able to deal with imprecision and incompleteness in service specifications and to inform users about the extent of induced fuzziness in order to improve the user’s decision-making. We demonstrate our approach on the example of specifications for service reputation based on ratings given by previous users. Our evaluation based on real service ratings shows the utility and applicability of our approach.}}, author = {{Platenius, Marie Christin and Shaker, Ammar and Becker, Matthias and Hüllermeier, Eyke and Schäfer, Wilhelm}}, journal = {{IEEE Transactions on Software Engineering (TSE), presented at ICSE 2017}}, number = {{8}}, pages = {{739--759}}, publisher = {{IEEE}}, title = {{{Imprecise Matching of Requirements Specifications for Software Services using Fuzzy Logic}}}, doi = {{10.1109/TSE.2016.2632115}}, year = {{2016}}, } @inproceedings{225, abstract = {{Image Processing is fundamental for any camera-based vision system. In order to automate the prototyping process of image processing solutions to some extend, we propose a holistic, adaptive approach that comprises concepts for specification, composition, recommendation, execution, and rating of image processing functionality. The fundamental idea is to realize image processing applications according to Service-oriented Computing design principles. That is, distinct image processing functionality is encapsulated in terms of stateless services. Services are then used as building blocks for more complex image processing functionality. To automatically compose complex image processing functionality, our proposed approach incorporates a flexible, Artificial Intelligence planning-based forward search algorithm. Decision-making between alternative composition steps is supported by a learning recommendation system, which keeps track of valid composition steps by automatically constructing a composition grammar. In addition, it adapts to solutions of high quality by means of feedback-based Reinforcement Learning techniques. A concrete use case serves as proof of concept and demonstrates the feasibility of our holistic, adaptive approach.}}, author = {{Jungmann, Alexander and Kleinjohann, Bernd}}, booktitle = {{Proceedings of the 21st IEEE International Conference on Emerging Technologies and Factory Automation (ETFA)}}, pages = {{1----8}}, title = {{{A Holistic and Adaptive Approach for Automated Prototyping of Image Processing Functionality}}}, doi = {{10.1109/ETFA.2016.7733522}}, year = {{2016}}, } @inproceedings{218, abstract = {{In the Image Processing domain, automated generation of complex Image Processing functionality is highly desirable; e.g., for rapid prototyping. Service composition techniques, in turn, facilitate automated generation of complex functionality based on building blocks in terms of services. For that reason, we aim for transferring the Service Composition paradigm into the Image Processing domain. In this paper, we present our symbolic composition approach that enables us to automatically generate Image Processing applications. Functionality of Image Processing services is described by means of a variant of first-order logic, which grounds on domain knowledge operationalized in terms of ontologies. A Petri-net formalism serves as basis for modeling data-flow of services and composed services. A planning-based composition algorithm automatically composes complex data-flow for a required functionality. A brief evaluation serves as proof of concept.}}, author = {{Jungmann, Alexander and Kleinjohann, Bernd}}, booktitle = {{Proceedings of the 13th IEEE International Conference on Services Computing (SCC)}}, pages = {{106----113}}, title = {{{Automatic Composition of Service-based Image Processing Applications}}}, doi = {{10.1109/SCC.2016.21}}, year = {{2016}}, } @inproceedings{280, abstract = {{The Collaborative Research Centre "On-The-Fly Computing" works on foundations and principles for the vision of the Future Internet. It proposes the paradigm of On-The-Fly Computing, which tackles emerging worldwide service markets. In these markets, service providers trade software, platform, and infrastructure as a service. Service requesters state requirements on services. To satisfy these requirements, the new role of brokers, who are (human) actors building service compositions on the fly, is introduced. Brokers have to specify service compositions formally and comprehensively using a domain-specific language (DSL), and to use service matching for the discovery of the constituent services available in the market. The broker's choice of the DSL and matching approaches influences her success of building compositions as distinctive properties of different service markets play a significant role. In this paper, we propose a new approach of engineering a situation-specific DSL by customizing a comprehensive, modular DSL and its matching for given service market properties. This enables the broker to create market-specific composition specifications and to perform market-specific service matching. As a result, the broker builds service compositions satisfying the requester's requirements more accurately. We evaluated the presented concepts using case studies in service markets for tourism and university management.}}, author = {{Arifulina, Svetlana and Platenius, Marie Christin and Mohr, Felix and Engels, Gregor and Schäfer, Wilhelm}}, booktitle = {{Proceedings of the IEEE 11th World Congress on Services (SERVICES), Visionary Track: Service Composition for the Future Internet}}, pages = {{333----340}}, title = {{{Market-Specific Service Compositions: Specification and Matching}}}, doi = {{10.1109/SERVICES.2015.58}}, year = {{2015}}, } @inproceedings{245, abstract = {{In cloud computing, software architects develop systems for virtually unlimited resources that cloud providers account on a pay-per-use basis. Elasticity management systems provision these resources autonomously to deal with changing workload. Such changing workloads call for new objective metrics allowing architects to quantify quality properties like scalability, elasticity, and efficiency, e.g., for requirements/SLO engineering and software design analysis. In literature, initial metrics for these properties have been proposed. However, current metrics lack a systematic derivation and assume knowledge of implementation details like resource handling. Therefore, these metrics are inapplicable where such knowledge is unavailable.To cope with these lacks, this short paper derives metrics for scalability, elasticity, and efficiency properties of cloud computing systems using the goal question metric (GQM) method. Our derivation uses a running example that outlines characteristics of cloud computing systems. Eventually, this example allows us to set up a systematic GQM plan and to derive an initial set of six new metrics. We particularly show that our GQM plan allows to classify existing metrics.}}, author = {{Becker, Matthias and Lehrig, Sebastian and Becker, Steffen}}, booktitle = {{Proceedings of the 6th ACM/SPEC International Conference on Performance Engineering}}, pages = {{169----174}}, title = {{{Systematically Deriving Quality Metrics for Cloud Computing Systems}}}, doi = {{10.1145/2668930.2688043}}, year = {{2015}}, } @article{323, abstract = {{On-the-fly composition of service-based software solutions is still a challenging task. Even more challenges emerge when facing automatic service composition in markets of composed services for end users. In this paper, we focus on the functional discrepancy between “what a user wants” specified in terms of a request and “what a user gets” when executing a composed service. To meet the challenge of functional discrepancy, we propose the combination of existing symbolic composition approaches with machine learning techniques. We developed a learning recommendation system that expands the capabilities of existing composition algorithms to facilitate adaptivity and consequently reduces functional discrepancy. As a representative of symbolic techniques, an Artificial Intelligence planning based approach produces solutions that are correct with respect to formal specifications. Our learning recommendation system supports the symbolic approach in decision-making. Reinforcement Learning techniques enable the recommendation system to adjust its recommendation strategy over time based on user ratings. We implemented the proposed functionality in terms of a prototypical composition framework. Preliminary results from experiments conducted in the image processing domain illustrate the benefit of combining both complementary techniques.}}, author = {{Jungmann, Alexander and Mohr, Felix}}, journal = {{Journal of Internet Services and Applications}}, number = {{1}}, pages = {{1--18}}, publisher = {{Springer}}, title = {{{An approach towards adaptive service composition in markets of composed services}}}, doi = {{10.1186/s13174-015-0022-8}}, year = {{2015}}, } @inproceedings{324, abstract = {{Services are self-contained software components that can beused platform independent and that aim at maximizing software reuse. Abasic concern in service oriented architectures is to measure the reusabilityof services. One of the most important qualities is the functionalreusability, which indicates how relevant the task is that a service solves.Current metrics for functional reusability of software, however, have verylittle explanatory power and do not accomplish this goal.This paper presents a new approach to estimate the functional reusabilityof services based on their relevance. To this end, it denes the degreeto which a service enables the execution of other services as its contri-bution. Based on the contribution, relevance of services is dened as anestimation for their functional reusability. Explanatory power is obtainedby normalizing relevance values with a reference service. The applicationof the metric to a service test set conrms its supposed capabilities.}}, author = {{Mohr, Felix}}, booktitle = {{Proceedings of the 14th International Conference on Software Reuse (ICSR)}}, pages = {{298----313}}, title = {{{A Metric for Functional Reusability of Services}}}, doi = {{10.1007/978-3-319-14130-5_21}}, year = {{2015}}, } @article{3343, abstract = {{In this paper we consider an extended variant of query learning where the hidden concept is embedded in some Boolean circuit. This additional processing layer modifies query arguments and answers by fixed transformation functions which are known to the learner. For this scenario, we provide a characterization of the solution space and an ordering on it. We give a compact representation of the minimal and maximal solutions as quantified Boolean formulas and we adapt the original algorithms for exact learning of specific classes of propositional formulas.}}, author = {{Bubeck, Uwe and Kleine Büning, Hans}}, issn = {{0004-3702}}, journal = {{Artificial Intelligence}}, keywords = {{Query learning, Propositional logic}}, pages = {{246 -- 257}}, publisher = {{Elsevier}}, title = {{{Learning Boolean Specifications}}}, doi = {{10.1016/j.artint.2015.09.003}}, year = {{2015}}, } @inproceedings{315, abstract = {{In this paper, we introduce an approach for combining embedded systems with Service-oriented Computing techniques based on a concrete application scenario from the robotics domain. Our proposed Service-oriented Architecture allows for incorporating computational expensive functionality as services into a distributed computing environment. Furthermore, our framework facilitates a seamless integration of embedded systems such as robots as service providers into the computing environment. The entire communication is based on so-called recipes, which can be interpreted as autonomous messages that contain all necessary information for executing compositions of services.}}, author = {{Jungmann, Alexander and Jatzkowski, Jan and Kleinjohann, Bernd}}, booktitle = {{Proceedings of the 5th IFIP International Embedded Systems Symposium}}, title = {{{Combining Service-oriented Computing with Embedded Systems - A Robotics Case Study}}}, year = {{2015}}, }