@inproceedings{28350,
  abstract     = {{In recent years, we observe an increasing amount of software with machine learning components being deployed. This poses the question of quality assurance for such components: how can we validate whether specified requirements are fulfilled by a machine learned software? Current testing and verification approaches either focus on a single requirement (e.g., fairness) or specialize on a single type of machine learning model (e.g., neural networks).
In this paper, we propose property-driven testing of machine learning models. Our approach MLCheck encompasses (1) a language for property specification, and (2) a technique for systematic test case generation. The specification language is comparable to property-based testing languages. Test case generation employs advanced verification technology for a systematic, property dependent construction of test suites, without additional user supplied generator functions. We evaluate MLCheck using requirements and data sets from three different application areas (software
discrimination, learning on knowledge graphs and security). Our evaluation shows that despite its generality MLCheck can even outperform specialised testing approaches while having a comparable runtime}},
  author       = {{Sharma, Arnab and Demir, Caglar and Ngonga Ngomo, Axel-Cyrille and Wehrheim, Heike}},
  booktitle    = {{Proceedings of the 20th IEEE International Conference on Machine Learning and Applications (ICMLA)}},
  publisher    = {{IEEE}},
  title        = {{{MLCHECK–Property-Driven Testing of Machine Learning Classifiers}}},
  year         = {{2021}},
}

@inproceedings{26049,
  abstract     = {{Content is the new oil. Users consume billions of terabytes a day while surfing on news sites or blogs, posting on social media sites, and sending chat messages around the globe. While content is heterogeneous, the dominant form of web content is text. There are situations where more diversity needs to be introduced into text content, for example, to reuse it on websites or to allow a chatbot to base its models on the information conveyed rather than of the language used. In order to achieve this, paraphrasing techniques have been developed: One example is Text spinning, a technique that automatically paraphrases text while leaving the intent intact. This makes it easier to reuse content, or to change the language generated by the bot more human. One method for modifying texts is a combination of translation and back-translation. This paper presents NATTS, a naive approach that uses transformer-based translation models to create diversified text, combining translation steps in one model. An advantage of this approach is that it can be fine-tuned and handle technical language.}},
  author       = {{Bäumer, Frederik Simon and Kersting, Joschka and Denisov, Sergej and Geierhos, Michaela}},
  booktitle    = {{PROCEEDINGS OF THE INTERNATIONAL CONFERENCES ON WWW/INTERNET 2021 AND APPLIED COMPUTING 2021}},
  keywords     = {{Software Requirements, Natural Language Processing, Transfer Learning, On-The-Fly Computing}},
  location     = {{Lisbon, Portugal}},
  pages        = {{221----225}},
  publisher    = {{IADIS}},
  title        = {{{IN OTHER WORDS: A NAIVE APPROACH TO TEXT SPINNING}}},
  year         = {{2021}},
}

@phdthesis{26746,
  abstract     = {{Previous research in proof-carrying hardware has established the feasibility and utility of the approach, and provided a concrete solution for employing it for the certification of functional equivalence checking against a specification, but fell short in connecting it to state-of-the-art formal verification insights, methods and tools. Due to the immense complexity of modern circuits, and verification challenges such as the state explosion problem for sequential circuits, this restriction of readily-available verification solutions severely limited the applicability of the approach in wider contexts.

This thesis closes the gap between the PCH approach and current advances in formal hardware verification, provides methods and tools to express and certify a wide range of circuit properties, both functional and non-functional, and presents for the first time prototypes in which circuits that are implemented on actual reconfigurable hardware are verified with PCH methods. Using these results, designers can now apply PCH to establish trust in more complex circuits, by using more diverse properties which they can express using modern, efficient property specification techniques.}},
  author       = {{Wiersema, Tobias}},
  keywords     = {{Proof-Carrying Hardware, Formal Verification, Sequential Circuits, Non-Functional Properties, Functional Properties}},
  pages        = {{293}},
  publisher    = {{Paderborn University}},
  title        = {{{Guaranteeing Properties of Reconfigurable Hardware Circuits with Proof-Carrying Hardware}}},
  year         = {{2021}},
}

@misc{28998,
  author       = {{Suermann, Dennis}},
  title        = {{{Schutz und Stabilisierung von Overlay-Netzwerken mithilfe des Relay-Layers}}},
  year         = {{2021}},
}

@misc{29151,
  abstract     = {{Automation becomes a vital part in the High-Performance computing system in situational dynamics to take the decisions on the fly. Heterogeneous compute nodes consist of computing resources such as CPU, GPU and FPGA and are the important components of the high-performance computing system that can adapt the automation to achieve the given goal. While implanting automation in the computing resources, management of the resources is one of the essential aspects that need to be taken care of. Tasks are continuously executed on the resources using its unique characteristics. Effective scheduling is essential to make the best use of the characteristics provided by each resource. Scheduling enables the execution of each task by allocating resources so that they take advantage of all the characteristics of the compute resources. Various scheduling heuristics can be used to create effective scheduling, which might require the execution time to schedule the task efficiently. Providing actual execution time is not possible in many cases; hence we can provide the estimations for the actual execution time . The purpose of this master's thesis is to design a predictive model or system that estimates the execution time required to execute tasks using historical execution time data on the heterogeneous compute nodes. In this thesis, regression techniques(SGD Regressor, Passive-Aggressive Regressor, MLP Regressor, and XCSF Regressor) are compared in terms of their prediction accuracy in order to determine which technique produces reliable predictions for the execution time. These estimations must be generated in an online learning environment in which data points arrive in any sequence, one by one, and the regression model must learn from them. After evaluating the regression algorithms, it is seen that the XCSF regressor provides the highest overall prediction accuracy for the supplied data sets. The regression technique's parameters also play a significant role in achieving an acceptable prediction accuracy. As a remark, when using online learning in regression analysis, the accuracy depends upon both the order of sequential data points that are coming to train the model and the parameter configuration for each regression technique.}},
  author       = {{Kashikar, Chinmay}},
  publisher    = {{Paderborn University}},
  title        = {{{A Comparison of Machine Learning Techniques for the On-line Characterization of Tasks Executed on Heterogeneous Compute Nodes}}},
  year         = {{2021}},
}

@article{27045,
  abstract     = {{Due to the lack of established real-world benchmark suites for static taint analyses of Android applications, evaluations of these analyses are often restricted and hard to compare. Even in evaluations that do use real-world apps, details about the ground truth in those apps are rarely documented, which makes it difficult to compare and reproduce the results. To push Android taint analysis research forward, this paper thus recommends criteria for constructing real-world benchmark suites for this specific domain, and presents TaintBench, the first real-world malware benchmark suite with documented taint flows. TaintBench benchmark apps include taint flows with complex structures, and addresses static challenges that are commonly agreed on by the community. Together with the TaintBench suite, we introduce the TaintBench framework, whose goal is to simplify real-world benchmarking of Android taint analyses. First, a usability test shows that the framework improves experts’ performance and perceived usability when documenting and inspecting taint flows. Second, experiments using TaintBench reveal new insights for the taint analysis tools Amandroid and FlowDroid: (i) They are less effective on real-world malware apps than on synthetic benchmark apps. (ii) Predefined lists of sources and sinks heavily impact the tools’ accuracy. (iii) Surprisingly, up-to-date versions of both tools are less accurate than their predecessors.}},
  author       = {{Luo, Linghui and Pauck, Felix and Piskachev, Goran and Benz, Manuel and Pashchenko, Ivan and Mory, Martin and Bodden, Eric and Hermann, Ben and Massacci, Fabio}},
  issn         = {{1382-3256}},
  journal      = {{Empirical Software Engineering}},
  title        = {{{TaintBench: Automatic real-world malware benchmarking of Android taint analyses}}},
  doi          = {{10.1007/s10664-021-10013-5}},
  year         = {{2021}},
}

@misc{27053,
  author       = {{Everling, Leon}},
  title        = {{{Selbststabilisierender Bakery Algorithmus für verteilte Systeme}}},
  year         = {{2021}},
}

@misc{27072,
  author       = {{Adsul, Vaibhav}},
  title        = {{{Peer-to-Peer Matching for Distributed Systems}}},
  year         = {{2021}},
}

@phdthesis{27503,
  author       = {{Hasnain, Asif}},
  title        = {{{Automating Network Resource Allocation for Coflows with Deadlines}}},
  doi          = {{10.17619/UNIPB/1-1241 }},
  year         = {{2021}},
}

@article{21004,
  abstract     = {{Automated machine learning (AutoML) supports the algorithmic construction and data-specific customization of machine learning pipelines, including the selection, combination, and parametrization of machine learning algorithms as main constituents. Generally speaking, AutoML approaches comprise two major components: a search space model and an optimizer for traversing the space. Recent approaches have shown impressive results in the realm of supervised learning, most notably (single-label) classification (SLC). Moreover, first attempts at extending these approaches towards multi-label classification (MLC) have been made. While the space of candidate pipelines is already huge in SLC, the complexity of the search space is raised to an even higher power in MLC. One may wonder, therefore, whether and to what extent optimizers established for SLC can scale to this increased complexity, and how they compare to each other. This paper makes the following contributions: First, we survey existing approaches to AutoML for MLC. Second, we augment these approaches with optimizers not previously tried for MLC. Third, we propose a benchmarking framework that supports a fair and systematic comparison. Fourth, we conduct an extensive experimental study, evaluating the methods on a suite of MLC problems. We find a grammar-based best-first search to compare favorably to other optimizers.}},
  author       = {{Wever, Marcel Dominik and Tornede, Alexander and Mohr, Felix and Hüllermeier, Eyke}},
  issn         = {{0162-8828}},
  journal      = {{IEEE Transactions on Pattern Analysis and Machine Intelligence}},
  keywords     = {{Automated Machine Learning, Multi Label Classification, Hierarchical Planning, Bayesian Optimization}},
  pages        = {{1--1}},
  title        = {{{AutoML for Multi-Label Classification: Overview and Empirical Evaluation}}},
  doi          = {{10.1109/tpami.2021.3051276}},
  year         = {{2021}},
}

@inproceedings{21005,
  abstract     = {{Data-parallel applications are developed using different data programming models, e.g., MapReduce, partition/aggregate. These models represent diverse resource requirements of application in a datacenter network, which can be represented by the coflow abstraction. The conventional method of creating hand-crafted coflow heuristics for admission or scheduling for different workloads is practically infeasible. In this paper, we propose a deep reinforcement learning (DRL)-based coflow admission scheme -- LCS -- that can learn an admission policy for a higher-level performance objective, i.e., maximize successful coflow admissions, without manual feature engineering.  LCS is trained on a production trace, which has online coflow arrivals. The evaluation results show that LCS is able to learn a reasonable admission policy that admits more coflows than state-of-the-art Varys heuristic while meeting their deadlines.}},
  author       = {{Hasnain, Asif and Karl, Holger}},
  booktitle    = {{IEEE INFOCOM 2021 - IEEE Conference on Computer Communications Workshops (INFOCOM WKSHPS)}},
  keywords     = {{Coflow scheduling, Reinforcement learning, Deadlines}},
  location     = {{Vancouver BC Canada}},
  publisher    = {{IEEE Communications Society}},
  title        = {{{Learning Coflow Admissions}}},
  doi          = {{10.1109/INFOCOMWKSHPS51825.2021.9484599}},
  year         = {{2021}},
}

@misc{21084,
  author       = {{Werthmann, Julian}},
  title        = {{{Derandomization and Local Graph Problems in the Node-Capacitated Clique}}},
  year         = {{2021}},
}

@article{21092,
  abstract     = {{Automated Machine Learning (AutoML) seeks to automatically find so-called machine learning pipelines that maximize the prediction performance when being used to train a model on a given dataset. One of the main and yet open challenges in AutoML is an effective use of computational resources: An AutoML process involves the evaluation of many candidate pipelines, which   are costly but often ineffective because they are canceled due to a timeout.
In this paper, we present an approach to predict the runtime of two-step machine learning pipelines with up to one pre-processor, which can be used to anticipate whether or not a pipeline will time out. Separate runtime models are trained offline for each algorithm that may be used in a pipeline, and an overall prediction is derived from these models. We empirically show that the approach increases successful evaluations made by an AutoML tool while preserving or even improving on the previously best solutions.}},
  author       = {{Mohr, Felix and Wever, Marcel Dominik and Tornede, Alexander and Hüllermeier, Eyke}},
  journal      = {{IEEE Transactions on Pattern Analysis and Machine Intelligence}},
  publisher    = {{IEEE}},
  title        = {{{Predicting Machine Learning Pipeline Runtimes in the Context of Automated Machine Learning}}},
  year         = {{2021}},
}

@misc{21197,
  author       = {{Mengshi, Ma}},
  title        = {{{Self-stabilizing Arrow Protocol on Spanning Trees with a Low Diameter}}},
  year         = {{2021}},
}

@article{21242,
  author       = {{Lüttenberg, Hedda and Beverungen, Daniel and Poniatowski, Martin and Kundisch, Dennis and Wünderlich, Nancy}},
  journal      = {{Wirtschaftsinformatik & Management}},
  number       = {{2}},
  pages        = {{120--131}},
  title        = {{{Drei Strategien zur Etablierung digitaler Plattformen in der Industrie}}},
  volume       = {{13}},
  year         = {{2021}},
}

@inproceedings{21525,
  author       = {{Gutt, Dominik and Neumann, Jürgen and Jabr, Wael and Kundisch, Dennis}},
  location     = {{Virtual Conference/Workshop}},
  title        = {{{The Fate of the App: Economic Implications of Updating under Reputation Resetting}}},
  year         = {{2021}},
}

@inproceedings{21543,
  abstract     = {{Services often consist of multiple chained components such as microservices in a service mesh, or machine learning functions in a pipeline. Providing these services requires online coordination including scaling the service, placing instance of all components in the network, scheduling traffic to these instances, and routing traffic through the network. Optimized service coordination is still a hard problem due to many influencing factors such as rapidly arriving user demands and limited node and link capacity. Existing approaches to solve the problem are often built on rigid models and assumptions, tailored to specific scenarios. If the scenario changes and the assumptions no longer hold, they easily break and require manual adjustments by experts. Novel self-learning approaches using deep reinforcement learning (DRL) are promising but still have limitations as they only address simplified versions of the problem and are typically centralized and thus do not scale to practical large-scale networks.

To address these issues, we propose a distributed self-learning service coordination approach using DRL. After centralized training, we deploy a distributed DRL agent at each node in the network, making fast coordination decisions locally in parallel with the other nodes. Each agent only observes its direct neighbors and does not need global knowledge. Hence, our approach scales independently from the size of the network. In our extensive evaluation using real-world network topologies and traffic traces, we show that our proposed approach outperforms a state-of-the-art conventional heuristic as well as a centralized DRL approach (60% higher throughput on average) while requiring less time per online decision (1 ms).}},
  author       = {{Schneider, Stefan Balthasar and Qarawlus, Haydar and Karl, Holger}},
  booktitle    = {{IEEE International Conference on Distributed Computing Systems (ICDCS)}},
  keywords     = {{network management, service management, coordination, reinforcement learning, distributed}},
  location     = {{Washington, DC, USA}},
  publisher    = {{IEEE}},
  title        = {{{Distributed Online Service Coordination Using Deep Reinforcement Learning}}},
  year         = {{2021}},
}

@techreport{21569,
  abstract     = {{Die kontinuierliche Weiterentwicklung des eigenen Geschäftsmodells ist für eine Organisation von entscheidender Bedeutung, um wettbewerbsfähig und somit nachhaltig erfolgreich zu bleiben. Während für die Entwicklung neuer Geschäftsmodelle häufig Workshops und einfache Software-Tools zur Visualisierung genutzt werden, wurden in der Forschung bereits erste Ansätze von datengetriebener Geschäftsmodellentwicklung (GME) vorgestellt. Diese Ansätze nutzen dabei Daten, Informationen oder auch Wissen aus internen und externen Unternehmensquellen, um den GME-Prozess zu unterstützen. Innerhalb dieses Beitrags zeigen wir einige Ansätze aus der aktuellen Literatur und analysieren wie ihre Datennutzung den GME-Prozess unterstützt. Weiterhin stellen wir mit dem BMDL Feature Modeler ein Tool vor, welches den GME-Prozess mit Expertenwissen unterstützt.}},
  author       = {{Gottschalk, Sebastian and Yigitbas, Enes}},
  publisher    = {{Gesellschaft für Informatik}},
  title        = {{{Von datenbasierter zu datengetriebener Geschäftsmodellentwicklung: Ein Überblick über Software-Tools  und deren Datennutzung}}},
  volume       = {{1}},
  year         = {{2021}},
}

@inproceedings{21570,
  author       = {{Tornede, Tanja and Tornede, Alexander and Wever, Marcel Dominik and Hüllermeier, Eyke}},
  booktitle    = {{Proceedings of the Genetic and Evolutionary Computation Conference}},
  title        = {{{Coevolution of Remaining Useful Lifetime Estimation Pipelines for Automated Predictive Maintenance}}},
  year         = {{2021}},
}

@phdthesis{21628,
  abstract     = {{This thesis considers the realization of distributed data structures and the construction of distributed protocols for self-stabilizing overlay networks.

In the first part of this thesis, we provide distributed protocols for queues, stacks and priority queues that serve the insertion and deletion of elements within a logarithmic amount of rounds.
Our protocols respect semantic constraints such as sequential consistency or serializability and the individual semantic constraints given by the type (queue, stack, priority queue) of the data structure.
We furthermore provide a protocol that handles joining and leaving nodes.
As an important side product, we present a novel protocol solving the distributed $k$-selection problem in a logarithmic amount of rounds, that is, to find the $k$-smallest elements among a polynomial number of elements spread among $n$ nodes.
	
The second part of this thesis is devoted to the construction of protocols for self-stabilizing overlay networks, i.e., distributed protocols that transform an overlay network from any initial (potentially illegitimate) state into a legitimate state in finite time.
We present protocols for self-stabilizing generalized De Bruijn graphs, self-stabilizing quadtrees and self-stabilizing supervised skip rings.
Each of those protocols comes with unique properties that makes it interesting for certain distributed applications.
Generalized De Bruijn networks provide routing within a constant amount of hops, thus serving the interest in networks that require a low latency for requests.
The protocol for the quadtree guarantees monotonic searchability as well as a geometric variant of monotonic searchability, making it interesting for wireless networks or applications needed in the area of computational geometry.
The supervised skip ring can be used to construct a self-stabilizing publish-subscribe system.
}},
  author       = {{Feldmann, Michael}},
  title        = {{{Algorithms for Distributed Data Structures and Self-Stabilizing Overlay Networks}}},
  doi          = {{10.17619/UNIPB/1-1113}},
  year         = {{2021}},
}