@inproceedings{46649,
  abstract     = {{Different conflicting optimization criteria arise naturally in various Deep
Learning scenarios. These can address different main tasks (i.e., in the
setting of Multi-Task Learning), but also main and secondary tasks such as loss
minimization versus sparsity. The usual approach is a simple weighting of the
criteria, which formally only works in the convex setting. In this paper, we
present a Multi-Objective Optimization algorithm using a modified Weighted
Chebyshev scalarization for training Deep Neural Networks (DNNs) with respect
to several tasks. By employing this scalarization technique, the algorithm can
identify all optimal solutions of the original problem while reducing its
complexity to a sequence of single-objective problems. The simplified problems
are then solved using an Augmented Lagrangian method, enabling the use of
popular optimization techniques such as Adam and Stochastic Gradient Descent,
while efficaciously handling constraints. Our work aims to address the
(economical and also ecological) sustainability issue of DNN models, with a
particular focus on Deep Multi-Task models, which are typically designed with a
very large number of weights to perform equally well on multiple tasks. Through
experiments conducted on two Machine Learning datasets, we demonstrate the
possibility of adaptively sparsifying the model during training without
significantly impacting its performance, if we are willing to apply
task-specific adaptations to the network weights. Code is available at
https://github.com/salomonhotegni/MDMTN.}},
  author       = {{Hotegni, Sedjro Salomon and Berkemeier, Manuel Bastian and Peitz, Sebastian}},
  booktitle    = {{2024 International Joint Conference on Neural Networks (IJCNN)}},
  issn         = {{ 2161-4407}},
  location     = {{Yokohama, Japan}},
  pages        = {{9}},
  publisher    = {{IEEE}},
  title        = {{{Multi-Objective Optimization for Sparse Deep Multi-Task Learning}}},
  doi          = {{10.1109/IJCNN60899.2024.10650994}},
  year         = {{2024}},
}

@unpublished{33150,
  abstract     = {{In this article, we build on previous work to present an optimization algorithm for nonlinearly constrained multi-objective optimization problems. The algorithm combines a surrogate-assisted derivative-free trust-region approach with the filter method known from single-objective optimization. Instead of the true objective and constraint functions, so-called fully linear models are employed and we show how to deal with the gradient inexactness in the composite step setting, adapted from single-objective optimization as well. Under standard assumptions, we prove convergence of a subset of iterates to a quasi-stationary point and if constraint qualifications hold, then the limit point is also a KKT-point of the multi-objective problem.}},
  author       = {{Berkemeier, Manuel Bastian and Peitz, Sebastian}},
  booktitle    = {{arXiv:2208.12094}},
  title        = {{{Multi-Objective Trust-Region Filter Method for Nonlinear Constraints using Inexact Gradients}}},
  year         = {{2022}},
}

@article{21337,
  abstract     = {{We present a flexible trust region descend algorithm for unconstrained and
convexly constrained multiobjective optimization problems. It is targeted at
heterogeneous and expensive problems, i.e., problems that have at least one
objective function that is computationally expensive. The method is
derivative-free in the sense that neither need derivative information be
available for the expensive objectives nor are gradients approximated using
repeated function evaluations as is the case in finite-difference methods.
Instead, a multiobjective trust region approach is used that works similarly to
its well-known scalar pendants. Local surrogate models constructed from
evaluation data of the true objective functions are employed to compute
possible descent directions. In contrast to existing multiobjective trust
region algorithms, these surrogates are not polynomial but carefully
constructed radial basis function networks. This has the important advantage
that the number of data points scales linearly with the parameter space
dimension. The local models qualify as fully linear and the corresponding
general scalar framework is adapted for problems with multiple objectives.
Convergence to Pareto critical points is proven and numerical examples
illustrate our findings.}},
  author       = {{Berkemeier, Manuel Bastian and Peitz, Sebastian}},
  issn         = {{2297-8747}},
  journal      = {{Mathematical and Computational Applications}},
  number       = {{2}},
  title        = {{{Derivative-Free Multiobjective Trust Region Descent Method Using Radial  Basis Function Surrogate Models}}},
  doi          = {{10.3390/mca26020031}},
  volume       = {{26}},
  year         = {{2021}},
}

