@unpublished{51160, abstract = {{We rigorously derive novel and sharp finite-data error bounds for highly sample-efficient Extended Dynamic Mode Decomposition (EDMD) for both i.i.d. and ergodic sampling. In particular, we show all results in a very general setting removing most of the typically imposed assumptions such that, among others, discrete- and continuous-time stochastic processes as well as nonlinear partial differential equations are contained in the considered system class. Besides showing an exponential rate for i.i.d. sampling, we prove, to the best of our knowledge, the first superlinear convergence rates for ergodic sampling of deterministic systems. We verify sharpness of the derived error bounds by conducting numerical simulations for highly-complex applications from molecular dynamics and chaotic flame propagation.}}, author = {{Philipp, Friedrich M. and Schaller, Manuel and Boshoff, Septimus and Peitz, Sebastian and Nüske, Feliks and Worthmann, Karl}}, booktitle = {{arXiv:2402.02494}}, title = {{{Extended Dynamic Mode Decomposition: Sharp bounds on the sample efficiency}}}, year = {{2024}}, } @article{46019, abstract = {{We derive efficient algorithms to compute weakly Pareto optimal solutions for smooth, convex and unconstrained multiobjective optimization problems in general Hilbert spaces. To this end, we define a novel inertial gradient-like dynamical system in the multiobjective setting, which trajectories converge weakly to Pareto optimal solutions. Discretization of this system yields an inertial multiobjective algorithm which generates sequences that converge weakly to Pareto optimal solutions. We employ Nesterov acceleration to define an algorithm with an improved convergence rate compared to the plain multiobjective steepest descent method (Algorithm 1). A further improvement in terms of efficiency is achieved by avoiding the solution of a quadratic subproblem to compute a common step direction for all objective functions, which is usually required in first-order methods. Using a different discretization of our inertial gradient-like dynamical system, we obtain an accelerated multiobjective gradient method that does not require the solution of a subproblem in each step (Algorithm 2). While this algorithm does not converge in general, it yields good results on test problems while being faster than standard steepest descent.}}, author = {{Sonntag, Konstantin and Peitz, Sebastian}}, journal = {{Journal of Optimization Theory and Applications}}, publisher = {{Springer}}, title = {{{Fast Multiobjective Gradient Methods with Nesterov Acceleration via Inertial Gradient-Like Systems}}}, doi = {{10.1007/s10957-024-02389-3}}, year = {{2024}}, } @unpublished{51334, abstract = {{The efficient optimization method for locally Lipschitz continuous multiobjective optimization problems from [1] is extended from finite-dimensional problems to general Hilbert spaces. The method iteratively computes Pareto critical points, where in each iteration, an approximation of the subdifferential is computed in an efficient manner and then used to compute a common descent direction for all objective functions. To prove convergence, we present some new optimality results for nonsmooth multiobjective optimization problems in Hilbert spaces. Using these, we can show that every accumulation point of the sequence generated by our algorithm is Pareto critical under common assumptions. Computational efficiency for finding Pareto critical points is numerically demonstrated for multiobjective optimal control of an obstacle problem.}}, author = {{Sonntag, Konstantin and Gebken, Bennet and Müller, Georg and Peitz, Sebastian and Volkwein, Stefan}}, booktitle = {{arXiv:2402.06376}}, title = {{{A Descent Method for Nonsmooth Multiobjective Optimization in Hilbert Spaces}}}, year = {{2024}}, } @article{40171, abstract = {{We present a convolutional framework which significantly reduces the complexity and thus, the computational effort for distributed reinforcement learning control of dynamical systems governed by partial differential equations (PDEs). Exploiting translational equivariances, the high-dimensional distributed control problem can be transformed into a multi-agent control problem with many identical, uncoupled agents. Furthermore, using the fact that information is transported with finite velocity in many cases, the dimension of the agents’ environment can be drastically reduced using a convolution operation over the state space of the PDE, by which we effectively tackle the curse of dimensionality otherwise present in deep reinforcement learning. In this setting, the complexity can be flexibly adjusted via the kernel width or by using a stride greater than one (meaning that we do not place an actuator at each sensor location). Moreover, scaling from smaller to larger domains – or the transfer between different domains – becomes a straightforward task requiring little effort. We demonstrate the performance of the proposed framework using several PDE examples with increasing complexity, where stabilization is achieved by training a low-dimensional deep deterministic policy gradient agent using minimal computing resources.}}, author = {{Peitz, Sebastian and Stenner, Jan and Chidananda, Vikas and Wallscheid, Oliver and Brunton, Steven L. and Taira, Kunihiko}}, journal = {{Physica D: Nonlinear Phenomena}}, pages = {{134096}}, publisher = {{Elsevier}}, title = {{{Distributed Control of Partial Differential Equations Using Convolutional Reinforcement Learning}}}, doi = {{10.1016/j.physd.2024.134096}}, volume = {{461}}, year = {{2024}}, } @article{33461, abstract = {{Data-driven models for nonlinear dynamical systems based on approximating the underlying Koopman operator or generator have proven to be successful tools for forecasting, feature learning, state estimation, and control. It has become well known that the Koopman generators for control-affine systems also have affine dependence on the input, leading to convenient finite-dimensional bilinear approximations of the dynamics. Yet there are still two main obstacles that limit the scope of current approaches for approximating the Koopman generators of systems with actuation. First, the performance of existing methods depends heavily on the choice of basis functions over which the Koopman generator is to be approximated; and there is currently no universal way to choose them for systems that are not measure preserving. Secondly, if we do not observe the full state, we may not gain access to a sufficiently rich collection of such functions to describe the dynamics. This is because the commonly used method of forming time-delayed observables fails when there is actuation. To remedy these issues, we write the dynamics of observables governed by the Koopman generator as a bilinear hidden Markov model, and determine the model parameters using the expectation-maximization (EM) algorithm. The E-step involves a standard Kalman filter and smoother, while the M-step resembles control-affine dynamic mode decomposition for the generator. We demonstrate the performance of this method on three examples, including recovery of a finite-dimensional Koopman-invariant subspace for an actuated system with a slow manifold; estimation of Koopman eigenfunctions for the unforced Duffing equation; and model-predictive control of a fluidic pinball system based only on noisy observations of lift and drag.}}, author = {{Otto, Samuel E. and Peitz, Sebastian and Rowley, Clarence W.}}, journal = {{SIAM Journal on Applied Dynamical Systems}}, number = {{1}}, pages = {{885--923}}, publisher = {{SIAM}}, title = {{{Learning Bilinear Models of Actuated Koopman Generators from Partially-Observed Trajectories}}}, doi = {{10.1137/22M1523601}}, volume = {{23}}, year = {{2024}}, } @article{21199, abstract = {{As in almost every other branch of science, the major advances in data science and machine learning have also resulted in significant improvements regarding the modeling and simulation of nonlinear dynamical systems. It is nowadays possible to make accurate medium to long-term predictions of highly complex systems such as the weather, the dynamics within a nuclear fusion reactor, of disease models or the stock market in a very efficient manner. In many cases, predictive methods are advertised to ultimately be useful for control, as the control of high-dimensional nonlinear systems is an engineering grand challenge with huge potential in areas such as clean and efficient energy production, or the development of advanced medical devices. However, the question of how to use a predictive model for control is often left unanswered due to the associated challenges, namely a significantly higher system complexity, the requirement of much larger data sets and an increased and often problem-specific modeling effort. To solve these issues, we present a universal framework (which we call QuaSiModO: Quantization-Simulation-Modeling-Optimization) to transform arbitrary predictive models into control systems and use them for feedback control. The advantages of our approach are a linear increase in data requirements with respect to the control dimension, performance guarantees that rely exclusively on the accuracy of the predictive model, and only little prior knowledge requirements in control theory to solve complex control problems. In particular the latter point is of key importance to enable a large number of researchers and practitioners to exploit the ever increasing capabilities of predictive models for control in a straight-forward and systematic fashion.}}, author = {{Peitz, Sebastian and Bieker, Katharina}}, journal = {{Automatica}}, publisher = {{Elsevier}}, title = {{{On the Universal Transformation of Data-Driven Models to Control Systems}}}, doi = {{10.1016/j.automatica.2022.110840}}, volume = {{149}}, year = {{2023}}, } @unpublished{38031, abstract = {{We consider the data-driven approximation of the Koopman operator for stochastic differential equations on reproducing kernel Hilbert spaces (RKHS). Our focus is on the estimation error if the data are collected from long-term ergodic simulations. We derive both an exact expression for the variance of the kernel cross-covariance operator, measured in the Hilbert-Schmidt norm, and probabilistic bounds for the finite-data estimation error. Moreover, we derive a bound on the prediction error of observables in the RKHS using a finite Mercer series expansion. Further, assuming Koopman-invariance of the RKHS, we provide bounds on the full approximation error. Numerical experiments using the Ornstein-Uhlenbeck process illustrate our results.}}, author = {{Philipp, Friedrich and Schaller, Manuel and Worthmann, Karl and Peitz, Sebastian and Nüske, Feliks}}, booktitle = {{arXiv:2301.08637}}, title = {{{Error bounds for kernel-based approximations of the Koopman operator}}}, year = {{2023}}, } @unpublished{42160, abstract = {{The goal of this paper is to make a strong point for the usage of dynamical models when using reinforcement learning (RL) for feedback control of dynamical systems governed by partial differential equations (PDEs). To breach the gap between the immense promises we see in RL and the applicability in complex engineering systems, the main challenges are the massive requirements in terms of the training data, as well as the lack of performance guarantees. We present a solution for the first issue using a data-driven surrogate model in the form of a convolutional LSTM with actuation. We demonstrate that learning an actuated model in parallel to training the RL agent significantly reduces the total amount of required data sampled from the real system. Furthermore, we show that iteratively updating the model is of major importance to avoid biases in the RL training. Detailed ablation studies reveal the most important ingredients of the modeling process. We use the chaotic Kuramoto-Sivashinsky equation do demonstarte our findings.}}, author = {{Werner, Stefan and Peitz, Sebastian}}, booktitle = {{arXiv:2302.07160}}, title = {{{Learning a model is paramount for sample efficiency in reinforcement learning control of PDEs}}}, year = {{2023}}, } @article{27426, abstract = {{Regularization is used in many different areas of optimization when solutions are sought which not only minimize a given function, but also possess a certain degree of regularity. Popular applications are image denoising, sparse regression and machine learning. Since the choice of the regularization parameter is crucial but often difficult, path-following methods are used to approximate the entire regularization path, i.e., the set of all possible solutions for all regularization parameters. Due to their nature, the development of these methods requires structural results about the regularization path. The goal of this article is to derive these results for the case of a smooth objective function which is penalized by a piecewise differentiable regularization term. We do this by treating regularization as a multiobjective optimization problem. Our results suggest that even in this general case, the regularization path is piecewise smooth. Moreover, our theory allows for a classification of the nonsmooth features that occur in between smooth parts. This is demonstrated in two applications, namely support-vector machines and exact penalty methods.}}, author = {{Gebken, Bennet and Bieker, Katharina and Peitz, Sebastian}}, journal = {{Journal of Global Optimization}}, number = {{3}}, pages = {{709--741}}, title = {{{On the structure of regularization paths for piecewise differentiable regularization terms}}}, doi = {{10.1007/s10898-022-01223-2}}, volume = {{85}}, year = {{2023}}, } @inproceedings{30125, abstract = {{We present an approach for guaranteed constraint satisfaction by means of data-based optimal control, where the model is unknown and has to be obtained from measurement data. To this end, we utilize the Koopman framework and an eDMD-based bilinear surrogate modeling approach for control systems to show an error bound on predicted observables, i.e., functions of the state. This result is then applied to the constraints of the optimal control problem to show that satisfaction of tightened constraints in the purely data-based surrogate model implies constraint satisfaction for the original system.}}, author = {{Schaller, Manuel and Worthmann, Karl and Philipp, Friedrich and Peitz, Sebastian and Nüske, Feliks}}, booktitle = {{IFAC-PapersOnLine}}, number = {{1}}, pages = {{169--174}}, title = {{{Towards reliable data-based optimal and predictive control using extended DMD}}}, doi = {{10.1016/j.ifacol.2023.02.029}}, volume = {{56}}, year = {{2023}}, } @inproceedings{45695, author = {{Hotegni, Sedjro Salomon and Mahabadi, Sepideh and Vakilian, Ali}}, booktitle = {{Proceedings of the 40th International Conference on Machine Learning, Honolulu, Hawaii, USA. PMLR 202, 2023.}}, keywords = {{Fair range clustering}}, location = {{Honolulu, Hawaii, USA}}, title = {{{Approximation Algorithms for Fair Range Clustering}}}, year = {{2023}}, } @unpublished{46579, abstract = {{The Koopman operator has become an essential tool for data-driven analysis, prediction and control of complex systems, the main reason being the enormous potential of identifying linear function space representations of nonlinear dynamics from measurements. Until now, the situation where for large-scale systems, we (i) only have access to partial observations (i.e., measurements, as is very common for experimental data) or (ii) deliberately perform coarse graining (for efficiency reasons) has not been treated to its full extent. In this paper, we address the pitfall associated with this situation, that the classical EDMD algorithm does not automatically provide a Koopman operator approximation for the underlying system if we do not carefully select the number of observables. Moreover, we show that symmetries in the system dynamics can be carried over to the Koopman operator, which allows us to massively increase the model efficiency. We also briefly draw a connection to domain decomposition techniques for partial differential equations and present numerical evidence using the Kuramoto--Sivashinsky equation.}}, author = {{Peitz, Sebastian and Harder, Hans and Nüske, Feliks and Philipp, Friedrich and Schaller, Manuel and Worthmann, Karl}}, booktitle = {{arXiv:2307.15325}}, title = {{{Partial observations, coarse graining and equivariance in Koopman operator theory for large-scale dynamical systems}}}, year = {{2023}}, } @article{23428, abstract = {{The Koopman operator has become an essential tool for data-driven approximation of dynamical (control) systems in recent years, e.g., via extended dynamic mode decomposition. Despite its popularity, convergence results and, in particular, error bounds are still quite scarce. In this paper, we derive probabilistic bounds for the approximation error and the prediction error depending on the number of training data points; for both ordinary and stochastic differential equations. Moreover, we extend our analysis to nonlinear control-affine systems using either ergodic trajectories or i.i.d. samples. Here, we exploit the linearity of the Koopman generator to obtain a bilinear system and, thus, circumvent the curse of dimensionality since we do not autonomize the system by augmenting the state by the control inputs. To the best of our knowledge, this is the first finite-data error analysis in the stochastic and/or control setting. Finally, we demonstrate the effectiveness of the proposed approach by comparing it with state-of-the-art techniques showing its superiority whenever state and control are coupled.}}, author = {{Nüske, Feliks and Peitz, Sebastian and Philipp, Friedrich and Schaller, Manuel and Worthmann, Karl}}, journal = {{Journal of Nonlinear Science}}, title = {{{Finite-data error bounds for Koopman-based prediction and control}}}, doi = {{10.1007/s00332-022-09862-1}}, volume = {{33}}, year = {{2023}}, } @unpublished{46649, abstract = {{Different conflicting optimization criteria arise naturally in various Deep Learning scenarios. These can address different main tasks (i.e., in the setting of Multi-Task Learning), but also main and secondary tasks such as loss minimization versus sparsity. The usual approach is a simple weighting of the criteria, which formally only works in the convex setting. In this paper, we present a Multi-Objective Optimization algorithm using a modified Weighted Chebyshev scalarization for training Deep Neural Networks (DNNs) with respect to several tasks. By employing this scalarization technique, the algorithm can identify all optimal solutions of the original problem while reducing its complexity to a sequence of single-objective problems. The simplified problems are then solved using an Augmented Lagrangian method, enabling the use of popular optimization techniques such as Adam and Stochastic Gradient Descent, while efficaciously handling constraints. Our work aims to address the (economical and also ecological) sustainability issue of DNN models, with a particular focus on Deep Multi-Task models, which are typically designed with a very large number of weights to perform equally well on multiple tasks. Through experiments conducted on two Machine Learning datasets, we demonstrate the possibility of adaptively sparsifying the model during training without significantly impacting its performance, if we are willing to apply task-specific adaptations to the network weights. Code is available at https://github.com/salomonhotegni/MDMTN.}}, author = {{Hotegni, Sedjro Salomon and Peitz, Sebastian and Berkemeier, Manuel Bastian}}, booktitle = {{arXiv:2308.12243}}, pages = {{13}}, title = {{{Multi-Objective Optimization for Sparse Deep Neural Network Training}}}, year = {{2023}}, } @article{21600, abstract = {{Many problems in science and engineering require an efficient numerical approximation of integrals or solutions to differential equations. For systems with rapidly changing dynamics, an equidistant discretization is often inadvisable as it results in prohibitively large errors or computational effort. To this end, adaptive schemes, such as solvers based on Runge–Kutta pairs, have been developed which adapt the step size based on local error estimations at each step. While the classical schemes apply very generally and are highly efficient on regular systems, they can behave suboptimally when an inefficient step rejection mechanism is triggered by structurally complex systems such as chaotic systems. To overcome these issues, we propose a method to tailor numerical schemes to the problem class at hand. This is achieved by combining simple, classical quadrature rules or ODE solvers with data-driven time-stepping controllers. Compared with learning solution operators to ODEs directly, it generalizes better to unseen initial data as our approach employs classical numerical schemes as base methods. At the same time it can make use of identified structures of a problem class and, therefore, outperforms state-of-the-art adaptive schemes. Several examples demonstrate superior efficiency. Source code is available at https://github.com/lueckem/quadrature-ML.}}, author = {{Dellnitz, Michael and Hüllermeier, Eyke and Lücke, Marvin and Ober-Blöbaum, Sina and Offen, Christian and Peitz, Sebastian and Pfannschmidt, Karlson}}, journal = {{SIAM Journal on Scientific Computing}}, number = {{2}}, pages = {{A579--A595}}, title = {{{Efficient time stepping for numerical integration using reinforcement learning}}}, doi = {{10.1137/21M1412682}}, volume = {{45}}, year = {{2023}}, } @article{46784, author = {{Wallscheid, Oliver and Peitz, Sebastian and Stenner, Jan and Weber, Daniel and Boshoff, Septimus and Meyer, Marvin and Chidananda, Vikas and Schweins, Oliver}}, issn = {{2475-9066}}, journal = {{Journal of Open Source Software}}, keywords = {{General Earth and Planetary Sciences, General Environmental Science}}, number = {{89}}, publisher = {{The Open Journal}}, title = {{{ElectricGrid.jl - A Julia-based modeling and simulationtool for power electronics-driven electric energy grids}}}, doi = {{10.21105/joss.05616}}, volume = {{8}}, year = {{2023}}, } @inproceedings{46813, abstract = {{Modelling of dynamic systems plays an important role in many engineering disciplines. Two different approaches are physical modelling and data‐driven modelling, both of which have their respective advantages and disadvantages. By combining these two approaches, hybrid models can be created in which the respective disadvantages are mitigated, with discrepancy models being a particular subclass. Here, the basic system behaviour is described physically, that is, in the form of differential equations. Inaccuracies resulting from insufficient modelling or numerics lead to a discrepancy between the measurements and the model, which can be compensated by a data‐driven error correction term. Since discrepancy methods still require a large amount of measurement data, this paper investigates the extent to which a single discrepancy model can be trained for a physical model with additional parameter dependencies without the need for retraining. As an example, a damped electromagnetic oscillating circuit is used. The physical model is realised by a differential equation describing the electric current, considering only inductance and capacitance; dissipation due to resistance is neglected. This creates a discrepancy between measurement and model, which is corrected by a data‐driven model. In the experiments, the inductance and the capacity are varied. It is found that the same data‐driven model can only be used if additional parametric dependencies in the data‐driven term are considered as well.}}, author = {{Wohlleben, Meike Claudia and Muth, Lars and Peitz, Sebastian and Sextro, Walter}}, booktitle = {{Proceedings in Applied Mathematics and Mechanics}}, issn = {{1617-7061}}, keywords = {{Electrical and Electronic Engineering, Atomic and Molecular Physics, and Optics}}, publisher = {{Wiley}}, title = {{{Transferability of a discrepancy model for the dynamics of electromagnetic oscillating circuits}}}, doi = {{10.1002/pamm.202300039}}, year = {{2023}}, } @unpublished{48502, abstract = {{The prediction of photon echoes is an important technique for gaining an understanding of optical quantum systems. However, this requires a large number of simulations with varying parameters and/or input pulses, which renders numerical studies expensive. This article investigates how we can use data-driven surrogate models based on the Koopman operator to accelerate this process. In order to be successful, we require a model that is accurate over a large number of time steps. To this end, we employ a bilinear Koopman model using extended dynamic mode decomposition and simulate the optical Bloch equations for an ensemble of inhomogeneously broadened two-level systems. Such systems are well suited to describe the excitation of excitonic resonances in semiconductor nanostructures, for example, ensembles of semiconductor quantum dots. We perform a detailed study on the required number of system simulations such that the resulting data-driven Koopman model is sufficiently accurate for a wide range of parameter settings. We analyze the L2 error and the relative error of the photon echo peak and investigate how the control positions relate to the stabilization. After proper training, the dynamics of the quantum ensemble can be predicted accurately and numerically very efficiently by our methods.}}, author = {{Peitz, Sebastian and Hunstig, Anna and Rose, Hendrik and Meier, Torsten}}, title = {{{Accelerating the analysis of optical quantum systems using the Koopman operator}}}, year = {{2023}}, } @unpublished{51159, abstract = {{Sparsity is a highly desired feature in deep neural networks (DNNs) since it ensures numerical efficiency, improves the interpretability of models (due to the smaller number of relevant features), and robustness. In machine learning approaches based on linear models, it is well known that there exists a connecting path between the sparsest solution in terms of the $\ell^1$ norm,i.e., zero weights and the non-regularized solution, which is called the regularization path. Very recently, there was a first attempt to extend the concept of regularization paths to DNNs by means of treating the empirical loss and sparsity ($\ell^1$ norm) as two conflicting criteria and solving the resulting multiobjective optimization problem. However, due to the non-smoothness of the $\ell^1$ norm and the high number of parameters, this approach is not very efficient from a computational perspective. To overcome this limitation, we present an algorithm that allows for the approximation of the entire Pareto front for the above-mentioned objectives in a very efficient manner. We present numerical examples using both deterministic and stochastic gradients. We furthermore demonstrate that knowledge of the regularization path allows for a well-generalizing network parametrization.}}, author = {{Amakor, Augustina Chidinma and Sonntag, Konstantin and Peitz, Sebastian}}, booktitle = {{arXiv}}, title = {{{A multiobjective continuation method to compute the regularization path of deep neural networks}}}, year = {{2023}}, } @unpublished{51158, abstract = {{Extended Dynamic Mode Decomposition (EDMD) is a popular data-driven method to approximate the Koopman operator for deterministic and stochastic (control) systems. This operator is linear and encompasses full information on the (expected stochastic) dynamics. In this paper, we analyze a kernel-based EDMD algorithm, known as kEDMD, where the dictionary consists of the canonical kernel features at the data points. The latter are acquired by i.i.d. samples from a user-defined and application-driven distribution on a compact set. We prove bounds on the prediction error of the kEDMD estimator when sampling from this (not necessarily ergodic) distribution. The error analysis is further extended to control-affine systems, where the considered invariance of the Reproducing Kernel Hilbert Space is significantly less restrictive in comparison to invariance assumptions on an a-priori chosen dictionary.}}, author = {{Philipp, Friedrich and Schaller, Manuel and Worthmann, Karl and Peitz, Sebastian and Nüske, Feliks}}, booktitle = {{arXiv:2312.10460}}, title = {{{Error analysis of kernel EDMD for prediction and control in the Koopman framework}}}, year = {{2023}}, }