@inproceedings{32811,
  abstract     = {{The decentralized nature of multi-agent systems requires continuous data exchange to achieve global objectives. In such scenarios, Age of Information (AoI) has become an important metric of the freshness of exchanged data due to the error-proneness and delays of communication systems. Communication systems usually possess dependencies: the process describing the success or failure of communication is highly correlated when these attempts are ``close'' in some domain (e.g. in time, frequency, space or code as in wireless communication) and is, in general, non-stationary. To study AoI in such scenarios, we consider an abstract event-based AoI process $\Delta(n)$, expressing time since the last update: If, at time $n$, a monitoring node receives a status update from a source node (event $A(n-1)$ occurs), then $\Delta(n)$ is reset to one; otherwise, $\Delta(n)$ grows linearly in time. This AoI process can thus be viewed as a special random walk with resets. The event process $A(n)$ may be nonstationary and we merely assume that its temporal dependencies decay sufficiently, described by $\alpha$-mixing. We calculate moment bounds for the resulting AoI process as a function of the mixing rate of $A(n)$. Furthermore, we prove that the AoI process $\Delta(n)$ is itself $\alpha$-mixing from which we conclude a strong law of large numbers for $\Delta(n)$. These results are new, since AoI processes have not been studied so far in this general strongly mixing setting. This opens up future work on renewal processes with non-independent interarrival times.}},
  author       = {{Redder, Adrian and Ramaswamy, Arunselvan and Karl, Holger}},
  booktitle    = {{Proceedings of the 58th Allerton Conference on Communication, Control, and Computing}},
  title        = {{{Age of Information Process under Strongly Mixing Communication -- Moment Bound, Mixing Rate and Strong Law}}},
  year         = {{2022}},
}

@inproceedings{30793,
  author       = {{Redder, Adrian and Ramaswamy, Arunselvan and Karl, Holger}},
  booktitle    = {{Proceedings of the 14th International Conference on Agents and Artificial Intelligence}},
  publisher    = {{SCITEPRESS - Science and Technology Publications}},
  title        = {{{Multi-agent Policy Gradient Algorithms for Cyber-physical Systems with Lossy Communication}}},
  doi          = {{10.5220/0010845400003116}},
  year         = {{2022}},
}

@unpublished{30790,
  abstract     = {{Iterative distributed optimization algorithms involve multiple agents that
communicate with each other, over time, in order to minimize/maximize a global
objective. In the presence of unreliable communication networks, the
Age-of-Information (AoI), which measures the freshness of data received, may be
large and hence hinder algorithmic convergence. In this paper, we study the
convergence of general distributed gradient-based optimization algorithms in
the presence of communication that neither happens periodically nor at
stochastically independent points in time. We show that convergence is
guaranteed provided the random variables associated with the AoI processes are
stochastically dominated by a random variable with finite first moment. This
improves on previous requirements of boundedness of more than the first moment.
We then introduce stochastically strongly connected (SSC) networks, a new
stochastic form of strong connectedness for time-varying networks. We show: If
for any $p \ge0$ the processes that describe the success of communication
between agents in a SSC network are $\alpha$-mixing with $n^{p-1}\alpha(n)$
summable, then the associated AoI processes are stochastically dominated by a
random variable with finite $p$-th moment. In combination with our first
contribution, this implies that distributed stochastic gradient descend
converges in the presence of AoI, if $\alpha(n)$ is summable.}},
  author       = {{Redder, Adrian and Ramaswamy, Arunselvan and Karl, Holger}},
  booktitle    = {{arXiv:2201.11343}},
  title        = {{{Distributed gradient-based optimization in the presence of dependent  aperiodic communication}}},
  year         = {{2022}},
}

@unpublished{30791,
  abstract     = {{We present sufficient conditions that ensure convergence of the multi-agent
Deep Deterministic Policy Gradient (DDPG) algorithm. It is an example of one of
the most popular paradigms of Deep Reinforcement Learning (DeepRL) for tackling
continuous action spaces: the actor-critic paradigm. In the setting considered
herein, each agent observes a part of the global state space in order to take
local actions, for which it receives local rewards. For every agent, DDPG
trains a local actor (policy) and a local critic (Q-function). The analysis
shows that multi-agent DDPG using neural networks to approximate the local
policies and critics converge to limits with the following properties: The
critic limits minimize the average squared Bellman loss; the actor limits
parameterize a policy that maximizes the local critic's approximation of
$Q_i^*$, where $i$ is the agent index. The averaging is with respect to a
probability distribution over the global state-action space. It captures the
asymptotics of all local training processes. Finally, we extend the analysis to
a fully decentralized setting where agents communicate over a wireless network
prone to delays and losses; a typical scenario in, e.g., robotic applications.}},
  author       = {{Redder, Adrian and Ramaswamy, Arunselvan and Karl, Holger}},
  booktitle    = {{arXiv:2201.00570}},
  title        = {{{Asymptotic Convergence of Deep Multi-Agent Actor-Critic Algorithms}}},
  year         = {{2022}},
}

@article{32854,
  author       = {{Redder, Adrian and Ramaswamy, Arunselvan and Karl, Holger}},
  journal      = {{IFAC-PapersOnLine}},
  number       = {{13}},
  pages        = {{133–138}},
  publisher    = {{Elsevier}},
  title        = {{{Practical Network Conditions for the Convergence of Distributed Optimization}}},
  volume       = {{55}},
  year         = {{2022}},
}

@article{24142,
  author       = {{Ramaswamy, Arunselvan and Bhatnagar, Shalabh}},
  journal      = {{Mathematics of Operations Research (to appear)}},
  title        = {{{Analyzing Approximate Value Iteration Algorithms}}},
  year         = {{2021}},
}

@article{24143,
  author       = {{Drees, Jan Peter and Gupta, Pritha and Hüllermeier, Eyke and Jager, Tibor and Konze, Alexander and Priesterjahn, Claudia and Ramaswamy, Arunselvan and Somorovsky, Juraj}},
  journal      = {{14th ACM Workshop on Artificial Intelligence and Security}},
  title        = {{{Automated Detection of Side Channels in Cryptographic Protocols: DROWN the ROBOTs!}}},
  year         = {{2021}},
}

@article{24148,
  author       = {{Ramaswamy, Arunselvan and Hüllermeier, Eyke}},
  journal      = {{IEEE Transactions on Artificial Intelligence (to appear)}},
  title        = {{{Deep Q-Learning: Theoretical Insights from an Asymptotic Analysis}}},
  year         = {{2021}},
}

@inproceedings{21478,
  abstract     = {{In this work we use autonomous vehicles to improve the performance of Wireless Sensor Networks (WSNs). In contrast to other autonomous vehicle applications, WSNs have two metrics for performance evaluation. First, quality of information (QoI) which is used to measure the quality of sensed data (e.g., measurement uncertainties or signal strength). Second, quality of service (QoS) which is used to measure the network's performance for data forwarding (e.g., delay and packet losses). As a use case, we consider wireless acoustic sensor networks, where a group of speakers move inside a room and there are autonomous vehicles installed with microphones for streaming the audio data. We formulate the problem as a Markov decision problem (MDP) and solve it using Deep-Q-Networks (DQN). Additionally, we compare the performance of DQN solution to two different real-world implementations: speakers holding/passing microphones and microphones being preinstalled in fixed positions. We show that the performance of autonomous vehicles in terms of QoI and QoS is better than the real-world implementation in some scenarios. Moreover, we study the impact of the vehicles speed on the learning process of the DQN solution and show how low speeds degrade the performance. Finally, we compare the DQN solution to a heuristic one and provide theoretical analysis of the performance with respect to dynamic WSNs.}},
  author       = {{Afifi, Haitham and Ramaswamy, Arunselvan and Karl, Holger}},
  booktitle    = {{2021 IEEE International Conference on Communications (ICC): IoT and Sensor Networks Symposium (IEEE ICC'21 - IoTSN Symposium)}},
  title        = {{{Reinforcement Learning for Autonomous Vehicle Movements in Wireless Sensor Networks}}},
  year         = {{2021}},
}

@inproceedings{21479,
  abstract     = {{Two of the most important metrics when developing Wireless Sensor Networks (WSNs) applications are the Quality of Information (QoI) and Quality of Service (QoS). The former is used to specify the quality of the collected data by the sensors (e.g., measurements error or signal's intensity), while the latter defines the network's performance and availability (e.g., packet losses and latency). In this paper, we consider an example of wireless acoustic sensor networks, where we select a subset of microphones for two different objectives. First, we maximize the recording quality under QoS constraints. Second, we apply a trade-off between QoI and QoS. We formulate the problem as a constrained Markov Decision Problem (MDP) and solve it using reinforcement learning (RL). We compare the RL solution to a baseline model and show that in case of QoS-guarantee objective, the RL solution has an optimality gap up to 1\%. Meanwhile, the RL solution is better than the baseline with improvements up to 23\%, when using the trade-off objective.}},
  author       = {{Afifi, Haitham and Ramaswamy, Arunselvan and Karl, Holger}},
  booktitle    = {{2021 IEEE 18th Annual Consumer Communications \& Networking Conference (CCNC) (CCNC 2021)}},
  keywords     = {{reinforcement learning, wireless sensor networks, resource allocation, acoustic sensor networks}},
  title        = {{{A Reinforcement Learning QoI/QoS-Aware Approach in Acoustic Sensor Networks}}},
  year         = {{2021}},
}

@article{24140,
  author       = {{Ramaswamy, Arunselvan and Redder, Adrian and Quevedo, Daniel E.}},
  journal      = {{IEEE Transactions on Automatic Control}},
  pages        = {{1--1}},
  title        = {{{Distributed optimization over time-varying networks with stochastic information delays}}},
  doi          = {{10.1109/TAC.2021.3108492}},
  year         = {{2021}},
}

@inproceedings{24145,
  author       = {{Ramaswamy, Arunselvan}},
  booktitle    = {{2020 28th Euromicro International Conference on Parallel, Distributed and Network-Based Processing (PDP)}},
  pages        = {{54--62}},
  title        = {{{DSPG: Decentralized Simultaneous Perturbations Gradient Descent Scheme}}},
  year         = {{2020}},
}

@inproceedings{24146,
  author       = {{Heid, Stefan Helmut and Ramaswamy, Arunselvan and Hüllermeier, Eyke}},
  booktitle    = {{Proceedings-30. Workshop Computational Intelligence: Berlin, 26.-27. November 2020}},
  pages        = {{247}},
  title        = {{{Constrained Multi-Agent Optimization with Unbounded Information Delay}}},
  volume       = {{26}},
  year         = {{2020}},
}

@article{24147,
  author       = {{Ramaswamy, Arunselvan and Bhatnagar, Shalabh and Quevedo, Daniel E}},
  journal      = {{IEEE Transactions on Automatic Control}},
  publisher    = {{IEEE}},
  title        = {{{Asynchronous stochastic approximations with asymptotically biased errors and deep multi-agent learning}}},
  year         = {{2020}},
}

@inproceedings{24141,
  author       = {{König, Jonas and Malberg, Simon and Martens, Martin and Niehaus, Sebastian and Krohn-Grimberghe, Artus and Ramaswamy, Arunselvan}},
  booktitle    = {{Science and Information Conference}},
  pages        = {{178--191}},
  title        = {{{Multi-stage reinforcement learning for object detection}}},
  year         = {{2019}},
}

@article{15741,
  abstract     = {{
In many cyber–physical systems, we encounter the problem of remote state estimation of geo- graphically distributed and remote physical processes. This paper studies the scheduling of sensor transmissions to estimate the states of multiple remote, dynamic processes. Information from the different sensors has to be transmitted to a central gateway over a wireless network for monitoring purposes, where typically fewer wireless channels are available than there are processes to be monitored. For effective estimation at the gateway, the sensors need to be scheduled appropriately, i.e., at each time instant one needs to decide which sensors have network access and which ones do not. To address this scheduling problem, we formulate an associated Markov decision process (MDP). This MDP is then solved using a Deep Q-Network, a recent deep reinforcement learning algorithm that is at once scalable and model-free. We compare our scheduling algorithm to popular scheduling algorithms such as round-robin and reduced-waiting-time, among others. Our algorithm is shown to significantly outperform these algorithms for many example scenario}},
  author       = {{Leong, Alex S. and Ramaswamy, Arunselvan and Quevedo, Daniel E. and Karl, Holger and Shi, Ling}},
  issn         = {{0005-1098}},
  journal      = {{Automatica}},
  title        = {{{Deep reinforcement learning for wireless sensor scheduling in cyber–physical systems}}},
  doi          = {{10.1016/j.automatica.2019.108759}},
  year         = {{2019}},
}

@inproceedings{13443,
  abstract     = {{This work considers the problem of control and resource allocation in networked
systems. To this end, we present DIRA a Deep reinforcement learning based Iterative Resource
Allocation algorithm, which is scalable and control-aware. Our algorithm is tailored towards
large-scale problems where control and scheduling need to act jointly to optimize performance.
DIRA can be used to schedule general time-domain optimization based controllers. In the present
work, we focus on control designs based on suitably adapted linear quadratic regulators. We
apply our algorithm to networked systems with correlated fading communication channels. Our
simulations show that DIRA scales well to large scheduling problems.}},
  author       = {{Redder, Adrian and Ramaswamy, Arunselvan and Quevedo, Daniel}},
  booktitle    = {{Proceedings of the 8th IFAC Workshop on Distributed Estimation and Control in Networked Systems}},
  keywords     = {{Networked control systems, deep reinforcement learning, large-scale systems, resource scheduling, stochastic control}},
  location     = {{Chicago, USA}},
  title        = {{{Deep reinforcement learning for scheduling in large-scale networked control systems}}},
  year         = {{2019}},
}

@article{24150,
  author       = {{Ramaswamy, Arunselvan and Bhatnagar, Shalabh}},
  journal      = {{IEEE Transactions on Automatic Control}},
  number       = {{6}},
  pages        = {{2614--2620}},
  publisher    = {{IEEE}},
  title        = {{{Stability of stochastic approximations with “controlled markov” noise and temporal difference learning}}},
  volume       = {{64}},
  year         = {{2018}},
}

@article{24151,
  author       = {{Demirel, Burak and Ramaswamy, Arunselvan and Quevedo, Daniel E and Karl, Holger}},
  journal      = {{IEEE Control Systems Letters}},
  number       = {{4}},
  pages        = {{737--742}},
  publisher    = {{IEEE}},
  title        = {{{Deepcas: A deep reinforcement learning algorithm for control-aware scheduling}}},
  volume       = {{2}},
  year         = {{2018}},
}

@article{24152,
  author       = {{Ramaswamy, Arunselvan and Bhatnagar, Shalabh}},
  journal      = {{IEEE Transactions on Automatic Control}},
  number       = {{5}},
  pages        = {{1465--1471}},
  publisher    = {{IEEE}},
  title        = {{{Analysis of gradient descent methods with nondiminishing bounded errors}}},
  volume       = {{63}},
  year         = {{2017}},
}

