@inproceedings{20125,
  abstract     = {{Datacenter applications have different resource requirements from network and developing flow scheduling heuristics for every workload is practically infeasible. In this paper, we show that deep reinforcement learning (RL) can be used to efficiently learn flow scheduling policies for different workloads without manual feature engineering. Specifically, we present LFS, which learns to optimize a high-level performance objective, e.g., maximize the number of flow admissions while meeting the deadlines. The LFS scheduler is trained through deep RL to learn a scheduling policy on continuous online flow arrivals. The evaluation results show that the trained LFS scheduler admits 1.05x more flows than the greedy flow scheduling heuristics under varying network load.}},
  author       = {{Hasnain, Asif and Karl, Holger}},
  booktitle    = {{2021 IEEE 18th Annual Consumer Communications & Networking Conference (CCNC)}},
  keywords     = {{Flow scheduling, Deadlines, Reinforcement learning}},
  location     = {{Las Vegas, USA}},
  publisher    = {{IEEE Computer Society}},
  title        = {{{Learning Flow Scheduling}}},
  doi          = {{https://doi.org/10.1109/CCNC49032.2021.9369514}},
  year         = {{2021}},
}

@inproceedings{21005,
  abstract     = {{Data-parallel applications are developed using different data programming models, e.g., MapReduce, partition/aggregate. These models represent diverse resource requirements of application in a datacenter network, which can be represented by the coflow abstraction. The conventional method of creating hand-crafted coflow heuristics for admission or scheduling for different workloads is practically infeasible. In this paper, we propose a deep reinforcement learning (DRL)-based coflow admission scheme -- LCS -- that can learn an admission policy for a higher-level performance objective, i.e., maximize successful coflow admissions, without manual feature engineering.  LCS is trained on a production trace, which has online coflow arrivals. The evaluation results show that LCS is able to learn a reasonable admission policy that admits more coflows than state-of-the-art Varys heuristic while meeting their deadlines.}},
  author       = {{Hasnain, Asif and Karl, Holger}},
  booktitle    = {{IEEE INFOCOM 2021 - IEEE Conference on Computer Communications Workshops (INFOCOM WKSHPS)}},
  keywords     = {{Coflow scheduling, Reinforcement learning, Deadlines}},
  location     = {{Vancouver BC Canada}},
  publisher    = {{IEEE Communications Society}},
  title        = {{{Learning Coflow Admissions}}},
  doi          = {{10.1109/INFOCOMWKSHPS51825.2021.9484599}},
  year         = {{2021}},
}

@inproceedings{17082,
  abstract     = {{Data-parallel applications run on cluster of servers in a datacenter and their communication triggers correlated resource demand on multiple links that can be abstracted as coflow. They often desire predictable network performance, which can be passed to network via coflow abstraction for application-aware network scheduling. In this paper, we propose a heuristic and an optimization algorithm for predictable network performance such that they guarantee coflows completion within their deadlines. The algorithms also ensure high network utilization, i.e., it's work-conserving, and avoids starvation of coflows. We evaluate both algorithms via trace-driven simulation and show that they admit 1.1x more coflows than the Varys scheme while meeting their deadlines.}},
  author       = {{Hasnain, Asif and Karl, Holger}},
  booktitle    = {{2020 20th IEEE/ACM International Symposium on Cluster, Cloud and Internet Computing (CCGRID)}},
  keywords     = {{Coflow, Scheduling, Deadlines, Data centers}},
  location     = {{Melbourne, Australia}},
  publisher    = {{IEEE Computer Society}},
  title        = {{{Coflow Scheduling with Performance Guarantees for Data Center Applications}}},
  doi          = {{https://doi.org/10.1109/CCGrid49817.2020.00010}},
  year         = {{2020}},
}