@phdthesis{29672,
  author       = {{Schneider, Stefan Balthasar}},
  title        = {{{Network and Service Coordination: Conventional and Machine Learning Approaches"}}},
  doi          = {{10.17619/UNIPB/1-1276 }},
  year         = {{2022}},
}

@inproceedings{30236,
  abstract     = {{Recent reinforcement learning approaches for continuous control in wireless mobile networks have shown impressive
results. But due to the lack of open and compatible simulators, authors typically create their own simulation environments for training and evaluation. This is cumbersome and time-consuming for authors and limits reproducibility and comparability, ultimately impeding progress in the field.

To this end, we propose mobile-env, a simple and open platform for training, evaluating, and comparing reinforcement learning and conventional approaches for continuous control in mobile wireless networks. mobile-env is lightweight and implements the common OpenAI Gym interface and additional wrappers, which allows connecting virtually any single-agent or multi-agent reinforcement learning framework to the environment. While mobile-env provides sensible default values and can be used out of the box, it also has many configuration options and is easy to extend. We therefore believe mobile-env to be a valuable platform for driving meaningful progress in autonomous coordination of
wireless mobile networks.}},
  author       = {{Schneider, Stefan Balthasar and Werner, Stefan and Khalili, Ramin and Hecker, Artur and Karl, Holger}},
  booktitle    = {{IEEE/IFIP Network Operations and Management Symposium (NOMS)}},
  keywords     = {{wireless mobile networks, network management, continuous control, cognitive networks, autonomous coordination, reinforcement learning, gym environment, simulation, open source}},
  location     = {{Budapest}},
  publisher    = {{IEEE}},
  title        = {{{mobile-env: An Open Platform for Reinforcement Learning in Wireless Mobile Networks}}},
  year         = {{2022}},
}

@inproceedings{29220,
  abstract     = {{Modern services often comprise several components, such as chained virtual network functions, microservices, or
machine learning functions. Providing such services requires to decide how often to instantiate each component, where to place these instances in the network, how to chain them and route traffic through them. 
To overcome limitations of conventional, hardwired heuristics, deep reinforcement learning (DRL) approaches for self-learning network and service management have emerged recently. These model-free DRL approaches are more flexible but typically learn tabula rasa, i.e., disregard existing understanding of networks, services, and their coordination. 

Instead, we propose FutureCoord, a novel model-based AI approach that leverages existing understanding of networks and services for more efficient and effective coordination without time-intensive training. FutureCoord combines Monte Carlo Tree Search with a stochastic traffic model. This allows FutureCoord to estimate the impact of future incoming traffic and effectively optimize long-term effects, taking fluctuating demand and Quality of Service (QoS) requirements into account. Our extensive evaluation based on real-world network topologies, services, and traffic traces indicates that FutureCoord clearly outperforms state-of-the-art model-free and model-based approaches with up to 51% higher flow success ratios.}},
  author       = {{Werner, Stefan and Schneider, Stefan Balthasar and Karl, Holger}},
  booktitle    = {{IEEE/IFIP Network Operations and Management Symposium (NOMS)}},
  keywords     = {{network management, service management, AI, Monte Carlo Tree Search, model-based, QoS}},
  location     = {{Budapest}},
  publisher    = {{IEEE}},
  title        = {{{Use What You Know: Network and Service Coordination Beyond Certainty}}},
  year         = {{2022}},
}

@inproceedings{21543,
  abstract     = {{Services often consist of multiple chained components such as microservices in a service mesh, or machine learning functions in a pipeline. Providing these services requires online coordination including scaling the service, placing instance of all components in the network, scheduling traffic to these instances, and routing traffic through the network. Optimized service coordination is still a hard problem due to many influencing factors such as rapidly arriving user demands and limited node and link capacity. Existing approaches to solve the problem are often built on rigid models and assumptions, tailored to specific scenarios. If the scenario changes and the assumptions no longer hold, they easily break and require manual adjustments by experts. Novel self-learning approaches using deep reinforcement learning (DRL) are promising but still have limitations as they only address simplified versions of the problem and are typically centralized and thus do not scale to practical large-scale networks.

To address these issues, we propose a distributed self-learning service coordination approach using DRL. After centralized training, we deploy a distributed DRL agent at each node in the network, making fast coordination decisions locally in parallel with the other nodes. Each agent only observes its direct neighbors and does not need global knowledge. Hence, our approach scales independently from the size of the network. In our extensive evaluation using real-world network topologies and traffic traces, we show that our proposed approach outperforms a state-of-the-art conventional heuristic as well as a centralized DRL approach (60% higher throughput on average) while requiring less time per online decision (1 ms).}},
  author       = {{Schneider, Stefan Balthasar and Qarawlus, Haydar and Karl, Holger}},
  booktitle    = {{IEEE International Conference on Distributed Computing Systems (ICDCS)}},
  keywords     = {{network management, service management, coordination, reinforcement learning, distributed}},
  location     = {{Washington, DC, USA}},
  publisher    = {{IEEE}},
  title        = {{{Distributed Online Service Coordination Using Deep Reinforcement Learning}}},
  year         = {{2021}},
}

@inproceedings{20693,
  abstract     = {{In practical, large-scale networks, services are requested
by users across the globe, e.g., for video streaming.
Services consist of multiple interconnected components such as
microservices in a service mesh. Coordinating these services
requires scaling them according to continuously changing user
demand, deploying instances at the edge close to their users,
and routing traffic efficiently between users and connected instances.
Network and service coordination is commonly addressed
through centralized approaches, where a single coordinator
knows everything and coordinates the entire network globally.
While such centralized approaches can reach global optima, they
do not scale to large, realistic networks. In contrast, distributed
approaches scale well, but sacrifice solution quality due to their
limited scope of knowledge and coordination decisions.

To this end, we propose a hierarchical coordination approach
that combines the good solution quality of centralized approaches
with the scalability of distributed approaches. In doing so, we divide
the network into multiple hierarchical domains and optimize
coordination in a top-down manner. We compare our hierarchical
with a centralized approach in an extensive evaluation on a real-world
network topology. Our results indicate that hierarchical
coordination can find close-to-optimal solutions in a fraction of
the runtime of centralized approaches.}},
  author       = {{Schneider, Stefan Balthasar and Jürgens, Mirko and Karl, Holger}},
  booktitle    = {{IFIP/IEEE International Symposium on Integrated Network Management (IM)}},
  keywords     = {{network management, service management, coordination, hierarchical, scalability, nfv}},
  location     = {{Bordeaux, France}},
  publisher    = {{IFIP/IEEE}},
  title        = {{{Divide and Conquer: Hierarchical Network and Service Coordination}}},
  year         = {{2021}},
}

@article{21808,
  abstract     = {{Modern services consist of interconnected components,e.g., microservices in a service mesh or machine learning functions in a pipeline. These services can scale and run across multiple network nodes on demand. To process incoming traffic, service components have to be instantiated and traffic assigned to these instances, taking capacities, changing demands, and Quality of Service (QoS) requirements into account. This challenge is usually solved with custom approaches designed by experts. While this typically works well for the considered scenario, the models often rely on unrealistic assumptions or on knowledge that is not available in practice (e.g., a priori knowledge).

We propose DeepCoord, a novel deep reinforcement learning approach that learns how to best coordinate services and is geared towards realistic assumptions. It interacts with the network and relies on available, possibly delayed monitoring information. Rather than defining a complex model or an algorithm on how to achieve an objective, our model-free approach adapts to various objectives and traffic patterns. An agent is trained offline without expert knowledge and then applied online with minimal overhead. Compared to a state-of-the-art heuristic, DeepCoord significantly improves flow throughput (up to 76%) and overall network utility (more than 2x) on realworld network topologies and traffic traces. It also supports optimizing multiple, possibly competing objectives, learns to respect QoS requirements, generalizes to scenarios with unseen, stochastic traffic, and scales to large real-world networks. For reproducibility and reuse, our code is publicly available.}},
  author       = {{Schneider, Stefan Balthasar and Khalili, Ramin and Manzoor, Adnan and Qarawlus, Haydar and Schellenberg, Rafael and Karl, Holger and Hecker, Artur}},
  journal      = {{Transactions on Network and Service Management}},
  keywords     = {{network management, service management, coordination, reinforcement learning, self-learning, self-adaptation, multi-objective}},
  publisher    = {{IEEE}},
  title        = {{{Self-Learning Multi-Objective Service Coordination Using Deep Reinforcement Learning}}},
  doi          = {{10.1109/TNSM.2021.3076503}},
  year         = {{2021}},
}

@techreport{33854,
  abstract     = {{Macrodiversity is a key technique to increase the capacity of mobile networks. It can be realized using coordinated multipoint (CoMP), simultaneously connecting users to multiple overlapping cells. Selecting which users to serve by how many and which cells is NP-hard but needs to happen continuously in real time as users move and channel state changes. Existing approaches often require strict assumptions about or perfect knowledge of the underlying radio system, its resource allocation scheme, or user movements, none of which is readily available in practice.

Instead, we propose three novel self-learning and self-adapting approaches using model-free deep reinforcement learning (DRL): DeepCoMP, DD-CoMP, and D3-CoMP. DeepCoMP leverages central observations and control of all users to select cells almost optimally. DD-CoMP and D3-CoMP use multi-agent DRL, which allows distributed, robust, and highly scalable coordination. All three approaches learn from experience and self-adapt to varying scenarios, reaching 2x higher Quality of Experience than other approaches. They have very few built-in assumptions and do not need prior system knowledge, making them more robust to change and better applicable in practice than existing approaches.}},
  author       = {{Schneider, Stefan Balthasar and Karl, Holger and Khalili, Ramin and Hecker, Artur}},
  keywords     = {{mobility management, coordinated multipoint, CoMP, cell selection, resource management, reinforcement learning, multi agent, MARL, self-learning, self-adaptation, QoE}},
  title        = {{{DeepCoMP: Coordinated Multipoint Using Multi-Agent Deep Reinforcement Learning}}},
  year         = {{2021}},
}

@techreport{35889,
  abstract     = {{Network and service coordination is important to provide modern services consisting of multiple interconnected components, e.g., in 5G, network function virtualization (NFV), or cloud and edge computing. In this paper, I outline my dissertation research, which proposes six approaches to automate such network and service coordination. All approaches dynamically react to the current demand and optimize coordination for high service quality and low costs. The approaches range from centralized to distributed methods and from conventional heuristic algorithms and mixed-integer linear programs to machine learning approaches using supervised and reinforcement learning. I briefly discuss their main ideas and advantages over other state-of-the-art approaches and compare strengths and weaknesses.}},
  author       = {{Schneider, Stefan Balthasar}},
  keywords     = {{nfv, coordination, machine learning, reinforcement learning, phd, digest}},
  title        = {{{Conventional and Machine Learning Approaches for Network and Service Coordination}}},
  year         = {{2021}},
}

@inproceedings{19607,
  abstract     = {{Modern services consist of modular, interconnected
components, e.g., microservices forming a service mesh. To
dynamically adjust to ever-changing service demands, service
components have to be instantiated on nodes across the network.
Incoming flows requesting a service then need to be routed
through the deployed instances while considering node and link
capacities. Ultimately, the goal is to maximize the successfully
served flows and Quality of Service (QoS) through online service
coordination. Current approaches for service coordination are
usually centralized, assuming up-to-date global knowledge and
making global decisions for all nodes in the network. Such global
knowledge and centralized decisions are not realistic in practical
large-scale networks.

To solve this problem, we propose two algorithms for fully
distributed service coordination. The proposed algorithms can be
executed individually at each node in parallel and require only
very limited global knowledge. We compare and evaluate both
algorithms with a state-of-the-art centralized approach in extensive
simulations on a large-scale, real-world network topology.
Our results indicate that the two algorithms can compete with
centralized approaches in terms of solution quality but require
less global knowledge and are magnitudes faster (more than
100x).}},
  author       = {{Schneider, Stefan Balthasar and Klenner, Lars Dietrich and Karl, Holger}},
  booktitle    = {{IEEE International Conference on Network and Service Management (CNSM)}},
  keywords     = {{distributed management, service coordination, network coordination, nfv, softwarization, orchestration}},
  publisher    = {{IEEE}},
  title        = {{{Every Node for Itself: Fully Distributed Service Coordination}}},
  year         = {{2020}},
}

@inproceedings{19609,
  abstract     = {{Modern services comprise interconnected components,
e.g., microservices in a service mesh, that can scale and
run on multiple nodes across the network on demand. To process
incoming traffic, service components have to be instantiated and
traffic assigned to these instances, taking capacities and changing
demands into account. This challenge is usually solved with
custom approaches designed by experts. While this typically
works well for the considered scenario, the models often rely
on unrealistic assumptions or on knowledge that is not available
in practice (e.g., a priori knowledge).

We propose a novel deep reinforcement learning approach that
learns how to best coordinate services and is geared towards
realistic assumptions. It interacts with the network and relies on
available, possibly delayed monitoring information. Rather than
defining a complex model or an algorithm how to achieve an
objective, our model-free approach adapts to various objectives
and traffic patterns. An agent is trained offline without expert
knowledge and then applied online with minimal overhead. Compared
to a state-of-the-art heuristic, it significantly improves flow
throughput and overall network utility on real-world network
topologies and traffic traces. It also learns to optimize different
objectives, generalizes to scenarios with unseen, stochastic traffic
patterns, and scales to large real-world networks.}},
  author       = {{Schneider, Stefan Balthasar and Manzoor, Adnan and Qarawlus, Haydar and Schellenberg, Rafael and Karl, Holger and Khalili, Ramin and Hecker, Artur}},
  booktitle    = {{IEEE International Conference on Network and Service Management (CNSM)}},
  keywords     = {{self-driving networks, self-learning, network coordination, service coordination, reinforcement learning, deep learning, nfv}},
  publisher    = {{IEEE}},
  title        = {{{Self-Driving Network and Service Coordination Using Deep Reinforcement Learning}}},
  year         = {{2020}},
}

@inproceedings{16219,
  abstract     = {{Network function virtualization (NFV) proposes
to replace physical middleboxes with more flexible virtual
network functions (VNFs). To dynamically adjust to everchanging
traffic demands, VNFs have to be instantiated and
their allocated resources have to be adjusted on demand.
Deciding the amount of allocated resources is non-trivial.
Existing optimization approaches often assume fixed resource
requirements for each VNF instance. However, this can easily
lead to either waste of resources or bad service quality if too
many or too few resources are allocated.

To solve this problem, we train machine learning models
on real VNF data, containing measurements of performance
and resource requirements. For each VNF, the trained models
can then accurately predict the required resources to handle
a certain traffic load. We integrate these machine learning
models into an algorithm for joint VNF scaling and placement
and evaluate their impact on resulting VNF placements. Our
evaluation based on real-world data shows that using suitable
machine learning models effectively avoids over- and underallocation
of resources, leading to up to 12 times lower resource
consumption and better service quality with up to 4.5 times
lower total delay than using standard fixed resource allocation.}},
  author       = {{Schneider, Stefan Balthasar and Satheeschandran, Narayanan Puthenpurayil and Peuster, Manuel and Karl, Holger}},
  booktitle    = {{IEEE Conference on Network Softwarization (NetSoft)}},
  location     = {{Ghent, Belgium}},
  publisher    = {{IEEE}},
  title        = {{{Machine Learning for Dynamic Resource Allocation in Network Function Virtualization}}},
  year         = {{2020}},
}

@inproceedings{16222,
  author       = {{Zafeiropoulos, A. and Fotopoulou, E. and Peuster, Manuel and Schneider, Stefan Balthasar and Gouvas, P. and Behnke, D. and Müller, M. and Bök, P. and Trakadas, P. and Karkazis, P. and Karl, Holger}},
  booktitle    = {{IEEE Conference on Network Softwarization (NetSoft)}},
  title        = {{{Benchmarking and Profiling 5G Verticals' Applications: An Industrial IoT Use Case}}},
  year         = {{2020}},
}

@inproceedings{16400,
  abstract     = {{Softwarization facilitates the introduction of smart
manufacturing applications in the industry. Manifold devices
such as machine computers, Industrial IoT devices, tablets,
smartphones and smart glasses are integrated into factory networks
to enable shop floor digitalization and big data analysis. To
handle the increasing number of devices and the resulting traffic,
a flexible and scalable factory network is necessary which can be
realized using softwarization technologies like Network Function
Virtualization (NFV). However, the security risks increase with
the increasing number of new devices, so that cyber security must
also be considered in NFV-based networks.

Therefore, extending our previous work, we showcase threat
detection using a cloud-native NFV-driven intrusion detection
system (IDS) that is integrated in our industrial-specific network
services. As a result of the threat detection, the affected network
service is put into quarantine via automatic network reconfiguration.
We use the 5GTANGO service platform to deploy our
developed network services on Kubernetes and to initiate the
network reconfiguration.}},
  author       = {{Müller, Marcel and Behnke, Daniel and Bök, Patrick-Benjamin and Schneider, Stefan Balthasar and Peuster, Manuel and Karl, Holger}},
  booktitle    = {{IEEE Conference on Network Softwarization (NetSoft) Demo Track}},
  location     = {{Ghent, Belgium}},
  publisher    = {{IEEE}},
  title        = {{{Cloud-Native Threat Detection and Containment for Smart Manufacturing}}},
  year         = {{2020}},
}

@inproceedings{3287,
  abstract     = {{For optimal placement and orchestration of network services, it is crucial
that their structure and semantics are specified clearly and comprehensively
and are available to an orchestrator. Existing specification approaches are
either ambiguous or miss important aspects regarding the behavior of virtual
network functions (VNFs) forming a service. We propose to formally and
unambiguously specify the behavior of these functions and services using
Queuing Petri Nets (QPNs). QPNs are an established method that allows to
express queuing, synchronization, stochastically distributed processing delays,
and changing traffic volume and characteristics at each VNF. With QPNs,
multiple VNFs can be connected to complete network services in any structure,
even specifying bidirectional network services containing loops.
  We discuss how management and orchestration systems can benefit from our
clear and comprehensive specification approach, leading to better placement of
VNFs and improved Quality of Service. Another benefit of formally specifying
network services with QPNs are diverse analysis options, which allow valuable
insights such as the distribution of end-to-end delay. We propose a tool-based
workflow that supports the specification of network services and the automatic
generation of corresponding simulation code to enable an in-depth analysis of
their behavior and performance.}},
  author       = {{Schneider, Stefan Balthasar and Sharma, Arnab and Karl, Holger and Wehrheim, Heike}},
  booktitle    = {{2019 IFIP/IEEE International Symposium on Integrated Network Management (IM)}},
  location     = {{Washington, DC, USA}},
  pages        = {{116----124}},
  publisher    = {{IFIP}},
  title        = {{{Specifying and Analyzing Virtual Network Services Using Queuing Petri Nets}}},
  year         = {{2019}},
}

@inproceedings{9270,
  abstract     = {{As 5G and network function virtualization (NFV) are maturing, it becomes crucial to demonstrate their feasibility and benefits by means of vertical scenarios. While 5GPPP has identified smart manufacturing as one of the most important vertical industries, there is still a lack of specific, practical use cases. 

Using the experience from a large-scale manufacturing company, Weidm{\"u}ller Group, we present a detailed use case that reflects the needs of real-world manufacturers. We also propose an architecture with specific network services and virtual network functions (VNFs) that realize the use case in practice. As a proof of concept, we implement the required services and deploy them on an emulation-based prototyping platform. Our experimental results indicate that a fully virtualized smart manufacturing use case is not only feasible but also reduces machine interconnection and configuration time and thus improves productivity by orders of magnitude.}},
  author       = {{Schneider, Stefan Balthasar and Peuster, Manuel and Behnke, Daniel and Marcel, Müller and Bök, Patrick-Benjamin and Karl, Holger}},
  booktitle    = {{European Conference on Networks and Communications (EuCNC)}},
  keywords     = {{5g, vertical, smart manufacturing, nfv}},
  publisher    = {{IEEE}},
  title        = {{{Putting 5G into Production: Realizing a Smart Manufacturing Vertical Scenario}}},
  doi          = {{10.1109/eucnc.2019.8802016}},
  year         = {{2019}},
}

@inproceedings{8792,
  abstract     = {{5G together with software defined networking (SDN) and network function virtualisation (NFV) will enable a wide variety of vertical use cases. One of them is the smart man- ufacturing case which utilises 5G networks to interconnect production machines, machine parks, and factory sites to enable new possibilities in terms of flexibility, automation, and novel applications (industry 4.0). However, the availability of realistic and practical proof-of-concepts for those smart manufacturing scenarios is still limited.
This demo fills this gap by not only showing a real-world smart manufacturing application entirely implemented using NFV concepts, but also a lightweight prototyping framework that simplifies the realisation of vertical NFV proof-of-concepts. Dur- ing the demo, we show how an NFV-based smart manufacturing scenario can be specified, on-boarded, and instantiated before we demonstrate how the presented NFV services simplify machine data collection, aggregation, and analysis.}},
  author       = {{Peuster, Manuel and Schneider, Stefan Balthasar and Behnke, Daniel and Müller, Marcel and Bök, Patrick-Benjamin and Karl, Holger}},
  booktitle    = {{5th IEEE International Conference on Network Softwarization (NetSoft 2019)}},
  location     = {{Paris}},
  title        = {{{Prototyping and Demonstrating 5G Verticals: The Smart Manufacturing Case}}},
  doi          = {{10.1109/NETSOFT.2019.8806685}},
  year         = {{2019}},
}

@article{9824,
  author       = {{Peuster, Manuel and Schneider, Stefan Balthasar and Zhao, Mengxuan and Xilouris, George and Trakadas, Panagiotis and Vicens, Felipe and Tavernier, Wouter and Soenen, Thomas and Vilalta, Ricard and Andreou, George and Kyriazis, Dimosthenis and Karl, Holger}},
  issn         = {{0163-6804}},
  journal      = {{IEEE Communications Magazine}},
  pages        = {{96--102}},
  title        = {{{Introducing Automated Verification and Validation for Virtualized Network Functions and Services}}},
  doi          = {{10.1109/mcom.2019.1800873}},
  year         = {{2019}},
}

@inproceedings{15369,
  author       = {{Müller, Marcel and Behnke, Daniel and Bök, Patrick-Benjamin and Peuster, Manuel and Schneider, Stefan Balthasar and Karl, Holger}},
  booktitle    = {{IEEE 17th International Conference on Industrial Informatics (IEEE-INDIN)}},
  publisher    = {{IEEE}},
  title        = {{{5G as Key Technology for Networked Factories: Application of Vertical-specific Network Services for Enabling Flexible Smart Manufacturing}}},
  year         = {{2019}},
}

@inproceedings{15371,
  abstract     = {{More and more management and orchestration approaches for (software) networks are based on machine learning paradigms and solutions. These approaches depend not only on their program code to operate properly, but also require enough input data to train their internal models. However, such training data is barely available for the software networking domain and most presented solutions rely on their own, sometimes not even published, data sets. This makes it hard, or even infeasible, to reproduce and compare many of the existing solutions. As a result, it ultimately slows down the adoption of machine learning approaches in softwarised networks. To this end, we introduce the "softwarised network data zoo" (SNDZoo), an open collection of software networking data sets aiming to streamline and ease machine learning research in the software networking domain. We present a general methodology to collect, archive, and publish those data sets for use by other researches and, as an example, eight initial data sets, focusing on the performance of virtualised network functions.
}},
  author       = {{Peuster, Manuel and Schneider, Stefan Balthasar and Karl, Holger}},
  booktitle    = {{IEEE/IFIP 15th International Conference on Network and Service Management (CNSM)}},
  publisher    = {{IEEE/IFIP}},
  title        = {{{The Softwarised Network Data Zoo}}},
  year         = {{2019}},
}

@inproceedings{15372,
  author       = {{Nuriddinov, Askhat and Tavernier, Wouter and Colle, Didier and Pickavet, Mario and Peuster, Manuel and Schneider, Stefan Balthasar}},
  booktitle    = {{ IEEE Conference on Network Function Virtualization and Software Defined Networks (NFV-SDN)}},
  publisher    = {{IEEE}},
  title        = {{{Reproducible Functional Tests for Multi-scale Network Services}}},
  year         = {{2019}},
}