2022
Zhiyuan Yao; Zihan Ding; Thomas Heide Clausen
Multi-agent reinforcement learning for network load balancing in data center Proceedings Article
In: 31st ACM International Conference on Information and Knowledge Management (CIKM'22), 2022.
Abstract | Links | BibTeX | Tags: Chaire Cisco, Infrastructure for Big Data, load balancing
@inproceedings{nokeyh,
title = {Multi-agent reinforcement learning for network load balancing in data center},
author = {Zhiyuan Yao and Zihan Ding and Thomas Heide Clausen},
url = {https://www.researchgate.net/profile/Zhiyuan_Yao13/publication/358163217_Multi-Agent_Reinforcement_Learning_for_Network_Load_Balancing_in_Data_Center/links/62fe5fd3e3c7de4c34666311/Multi-Agent-Reinforcement-Learning-for-Network-Load-Balancing-in-Data-Center.pdf},
doi = {10.1145/3511808.3557133},
year = {2022},
date = {2022-10-17},
urldate = {2022-10-17},
booktitle = {31st ACM International Conference on Information and Knowledge Management (CIKM'22)},
abstract = {This paper presents the network load balancing problem, a challenging real-world task for multi-agent reinforcement learning (MARL) methods. Conventional heuristic solutions like Weighted-Cost Multi-Path (WCMP) and Local Shortest Queue (LSQ) are less flexible to the changing workload distributions and arrival rates, with a poor balance among multiple load balancers. The cooperative network load balancing task is formulated as a Dec-POMDP problem, which naturally induces the MARL methods. To bridge the reality gap for applying learning-based methods, all models are directly trained and evaluated on a real-world system from moderate- to large-scale setups. Experimental evaluations show that the independent and “selfish” load balancing strategies are not necessarily the globally optimal ones, while the proposed MARL solution has a superior performance over different realistic settings. Additionally, the potential difficulties of the application and deployment of MARL methods for network load balancing are analysed, which helps draw the attention of the learning and network communities to such challenges.},
keywords = {Chaire Cisco, Infrastructure for Big Data, load balancing},
pubstate = {published},
tppubtype = {inproceedings}
}
Zhiyuan Yao; Yoann Desmouceaux; Juan Antonio Cordero; Thomas Heide Clausen
HLB: Towards Load-Aware Load-Balancing Journal Article
In: IEEE/ACM Transactions on Networking, 2022, ISSN: 1558-2566.
Abstract | Links | BibTeX | Tags: Chaire Cisco, Infrastructure for Big Data, load balancing
@article{nokey,
title = {HLB: Towards Load-Aware Load-Balancing},
author = {Zhiyuan Yao and Yoann Desmouceaux and Juan Antonio Cordero and Thomas Heide Clausen},
doi = {10.1109/TNET.2022.3177163},
issn = {1558-2566},
year = {2022},
date = {2022-06-05},
urldate = {2022-06-05},
journal = {IEEE/ACM Transactions on Networking},
abstract = {The purpose of network load balancers is to optimize quality of service to the users of a set of servers - basically, to improve response times and to reducing computing resources - by properly distributing workloads. This paper proposes a distributed, application-agnostic, Hybrid Load Balancer (HLB) that - without explicit monitoring or signaling - infers server occupancies and processing speeds, which allows making optimised workload placement decisions. This approach is evaluated both through simulations and extensive experiments, including synthetic workloads and Wikipedia replays on a real-world testbed. Results show significant performance gains, in terms of both response time and system utilisation, when compared to existing load-balancing algorithms.},
keywords = {Chaire Cisco, Infrastructure for Big Data, load balancing},
pubstate = {published},
tppubtype = {article}
}
2021
Zhiyuan Yao; Zihan Ding; Thomas Heide Clausen
Reinforced Workload Distribution Fairness Proceedings Article
In: Machine Learning for Systems at 35th Conference on Neural Information Processing Systems (NeurIPS 2021), 2021.
Abstract | Links | BibTeX | Tags: Chaire Cisco, Infrastructure for Big Data, load balancing
@inproceedings{yao2021reinforced,
title = {Reinforced Workload Distribution Fairness},
author = {Zhiyuan Yao and Zihan Ding and Thomas Heide Clausen},
url = {https://www.thomasclausen.net/wp-content/uploads/2021/11/2111.00008-1.pdf},
year = {2021},
date = {2021-12-01},
urldate = {2021-12-01},
booktitle = {Machine Learning for Systems at 35th Conference on Neural Information Processing Systems (NeurIPS 2021)},
abstract = {Network load balancers are central components in data centers, that distributes workloads across multiple servers and thereby contribute to offering scalable services. However, when load balancers operate in dynamic environments with limited monitoring of application server loads, they rely on heuristic algorithms that require manual configurations for fairness and performance. To alleviate that, this paper proposes a distributed asynchronous reinforcement learning mechanism to-with no active load balancer state monitoring and limited network observations-improve the fairness of the workload distribution achieved by a load balancer. The performance of proposed mechanism is evaluated and compared with stateof-the-art load balancing algorithms in a simulator, under configurations with progressively increasing complexities. Preliminary results show promise in RLbased load balancing algorithms, and identify additional challenges and future research directions, including reward function design and model scalability.},
keywords = {Chaire Cisco, Infrastructure for Big Data, load balancing},
pubstate = {published},
tppubtype = {inproceedings}
}
Carmine Rizzi; Zhiyuan Yao; Yoann Desmouceaux; Mark Townsley; Thomas Heide Clausen
Charon: Load-Aware Load-Balancing in P4 Proceedings Article
In: 1st Joint International Workshop on Network Programmability & Automation (NetPA) at 17th International Conference on Network and Service Management (CNSM 2021),, 2021.
Abstract | Links | BibTeX | Tags: Chaire Cisco, Infrastructure for Big Data, load balancing
@inproceedings{rizzi2021charon,
title = {Charon: Load-Aware Load-Balancing in P4},
author = {Carmine Rizzi and Zhiyuan Yao and Yoann Desmouceaux and Mark Townsley and Thomas Heide Clausen},
url = {https://www.thomasclausen.net/wp-content/uploads/2021/11/2110.14389.pdf},
year = {2021},
date = {2021-10-01},
urldate = {2021-01-01},
booktitle = {1st Joint International Workshop on Network Programmability & Automation (NetPA) at 17th International Conference on Network and Service Management (CNSM 2021),},
abstract = {Load-Balancers play an important role in data centers as they distribute network flows across application servers and guarantee per-connection consistency. It is hard however to make fair load balancing decisions so that all resources are efficiently occupied yet not overloaded. Tracking connection states allows to infer server load states and make informed decisions, but at the cost of additional memory space consumption. This makes it hard to implement on programmable hardware, which has constrained memory but offers line-rate performance. This paper presents Charon, a stateless load-aware load balancer that has line-rate performance implemented in P4-NetFPGA. Charon passively collects load states from application servers and employs the power-of-2-choices scheme to make data-driven load balancing decisions and improve resource utilization. Perconnection consistency is preserved statelessly by encoding server ID in a covert channel. The prototype design and implementation details are described in this paper. Simulation results show performance gains in terms of load distribution fairness, quality of service, throughput and processing latency.},
keywords = {Chaire Cisco, Infrastructure for Big Data, load balancing},
pubstate = {published},
tppubtype = {inproceedings}
}
2018
Benoit Pit--Claudel; Yoann Desmouceaux; Pierre Pfister; Mark Townsley; Thomas Clausen
Stateless Load-Aware Load Balancing in P4 Proceedings Article
In: 1st P4 European Workshop (P4EU), 2018.
Abstract | Links | BibTeX | Tags: Chaire Cisco, Infrastructure for Big Data, load balancing, NetFPGA, segment routing
@inproceedings{Pit--Claudel2018,
title = {Stateless Load-Aware Load Balancing in P4},
author = {Benoit Pit--Claudel and Yoann Desmouceaux and Pierre Pfister and Mark Townsley and Thomas Clausen},
url = {http://www.thomasclausen.net/en/p4eu-2018/},
year = {2018},
date = {2018-09-24},
publisher = {1st P4 European Workshop (P4EU)},
abstract = {Leveraging the performance opportunities offered by programmable hardware, stateless load-balancing architectures allowing line-rate processing are appealing. Moreover, it has been demonstrated that significantly fairer load-balancing can be achieved by an architecture that considers the actual load of application instances when dispatching connection requests. Architectures which maintain per-connection state for resiliency and/or track application load state for fairness are, however, at odds with hardware-imposed memory constraints. Thus, a desirable load-balancer for programmable hardware would be both stateless and able to dispatch queries to application instances according to their current load.
This paper presents SHELL, a stateless application-aware load-balancer combining (i) a power-of-choices scheme using IPv6 Segment Routing to dispatch new flows to a suitable application instance from among multiple candidates, and (ii) the use of a covert channel to record/report which flow was assigned to which candidate in a stateless fashion. In addition, consistent hashing versioning is used to ensure that connections are maintained to the correct application instance, using Segment Routing to “browse” through the history when needed. The stateless design of SHELL makes it suitable for hardware implementation, and this paper describes the implementation of a P4-NetFPGA prototype. A performance evaluation of this SHELL implementation demonstrates throughput and latency characteristics comparable to other stateless load-balancing implementations, while enabling application instance-load-aware dispatching and significantly increasing per-connection consistency resiliency.},
keywords = {Chaire Cisco, Infrastructure for Big Data, load balancing, NetFPGA, segment routing},
pubstate = {published},
tppubtype = {inproceedings}
}
This paper presents SHELL, a stateless application-aware load-balancer combining (i) a power-of-choices scheme using IPv6 Segment Routing to dispatch new flows to a suitable application instance from among multiple candidates, and (ii) the use of a covert channel to record/report which flow was assigned to which candidate in a stateless fashion. In addition, consistent hashing versioning is used to ensure that connections are maintained to the correct application instance, using Segment Routing to “browse” through the history when needed. The stateless design of SHELL makes it suitable for hardware implementation, and this paper describes the implementation of a P4-NetFPGA prototype. A performance evaluation of this SHELL implementation demonstrates throughput and latency characteristics comparable to other stateless load-balancing implementations, while enabling application instance-load-aware dispatching and significantly increasing per-connection consistency resiliency.
Yoann Desmouceaux; Pierre Pfister; Jérôme Tollet; Mark Townsley; Thomas Clausen
6LB: Scalable and Application-Aware Load Balancing with Segment Routing Journal Article
In: IEEE/ACM Transactions on Networking, vol. 26, no. 2, pp. 819-834, 2018, ISSN: 1063-6692.
Abstract | Links | BibTeX | Tags: load balancing, Scalability, segment routing
@article{Desmouceaux2018,
title = {6LB: Scalable and Application-Aware Load Balancing with Segment Routing},
author = {Yoann Desmouceaux and Pierre Pfister and Jérôme Tollet and Mark Townsley and Thomas Clausen},
url = {http://www.thomasclausen.net/wp-content/uploads/2018/02/2018-IEEE-Transactions-on-Networking-6LB-Scalable-and-Application-Aware-Load-Balancing-with-Segment-Routing.pdf},
doi = {10.1109/TNET.2018.2799242},
issn = {1063-6692},
year = {2018},
date = {2018-02-16},
journal = {IEEE/ACM Transactions on Networking},
volume = {26},
number = {2},
pages = {819-834},
abstract = {Network load-balancers generally either do not take application state into account, or do so at the cost of a central- ized monitoring system. This paper introduces a load-balancer running exclusively within the IP forwarding plane, i.e. in an application protocol agnostic fashion – yet which still provides application-awareness and makes real-time, decentralized deci- sions. To that end, IPv6 Segment Routing is used to direct data packets from a new flow through a chain of candidate servers, until one decides to accept the connection, based solely on its local state. This way, applications themselves naturally decide on how to fairly share incoming connections, while incurring minimal network overhead, and no out-of-band signaling. A consistent hashing algorithm, as well as an in-band stickiness protocol, allow for the proposed solution to be able to be reliably distributed across a large number of instances.
Performance evaluation by means of an analytical model and actual tests on different workloads (including a Wikipedia replay as a realistic workload) show significant performance benefits in terms of shorter response times, when compared to a traditional random load-balancer. In addition, this paper introduces and compares kernel bypass high-performance implementations of both 6LB and a state-of-the-art load-balancer, showing that the significant system-level benefits of 6LB are achievable with a negligible data-path CPU overhead.},
keywords = {load balancing, Scalability, segment routing},
pubstate = {published},
tppubtype = {article}
}
Performance evaluation by means of an analytical model and actual tests on different workloads (including a Wikipedia replay as a realistic workload) show significant performance benefits in terms of shorter response times, when compared to a traditional random load-balancer. In addition, this paper introduces and compares kernel bypass high-performance implementations of both 6LB and a state-of-the-art load-balancer, showing that the significant system-level benefits of 6LB are achievable with a negligible data-path CPU overhead.
2017
Yoann Desmouceaux; Pierre Pfister; Jerome Tollet; Mark Townsley; Thomas Clausen
SRLB: The Power of Choices in Load Balancing with Segment Routing Proceedings Article
In: In Proceedings of the 37th IEEE International Conference on Distributed Computing Systems (ICDCS), 2017.
Abstract | Links | BibTeX | Tags: Chaire Cisco, dc-optimization, Infrastructure for Big Data, load balancing, segment routing
@inproceedings{Desmouceaux2017b,
title = {SRLB: The Power of Choices in Load Balancing with Segment Routing},
author = {Yoann Desmouceaux and Pierre Pfister and Jerome Tollet and Mark Townsley and Thomas Clausen},
url = {http://www.thomasclausen.net/wp-content/uploads/2017/05/camera-ready-ieeepdfexpress.pdf},
year = {2017},
date = {2017-06-05},
booktitle = {In Proceedings of the 37th IEEE International Conference on Distributed Computing Systems (ICDCS)},
abstract = {Network load-balancers generally either do not take application state into account, or do so at the cost of a central- ized monitoring system. This paper introduces a load-balancer running exclusively within the IP forwarding plane, i.e. in an application protocol agnostic fashion – yet which still provides application-awareness and makes real-time, decentralized deci- sions. To that end, IPv6 Segment Routing is used to direct data packets from a new flow through a chain of candidate servers, until one decides to accept the connection, based on its local state. This way, applications themselves naturally decide on how to share incoming connections, while incurring minimal network overhead, and no out-of-band signaling.
Tests on different workloads – including realistic workloads such as replaying actual Wikipedia access traffic towards a set of replica Wikipedia instances – show significant performance benefits, in terms of shorter response times, when compared to a traditional random load-balancer.},
keywords = {Chaire Cisco, dc-optimization, Infrastructure for Big Data, load balancing, segment routing},
pubstate = {published},
tppubtype = {inproceedings}
}
Tests on different workloads – including realistic workloads such as replaying actual Wikipedia access traffic towards a set of replica Wikipedia instances – show significant performance benefits, in terms of shorter response times, when compared to a traditional random load-balancer.