2023
Thomas Feltin; Léo Marché; Juan Antonio Cordero; Frank Brockners; Thomas Clausen
DNN Partitioning for Inference Throughput Acceleration at the Edge Journal Article
In: IEEE Access, 2023, ISSN: 2169-3536.
Abstract | Links | BibTeX | Tags: Chaire Cisco, Constrained Networks, Optimisation
@article{nokeyj,
title = {DNN Partitioning for Inference Throughput Acceleration at the Edge},
author = {Thomas Feltin and Léo Marché and Juan Antonio Cordero and Frank Brockners and Thomas Clausen},
editor = {IEEE},
url = {https://ieeexplore.ieee.org/document/10042405},
doi = {10.1109/ACCESS.2023.3244497},
issn = {2169-3536},
year = {2023},
date = {2023-02-13},
journal = {IEEE Access},
abstract = {Deep neural network (DNN) inference on streaming data requires computing resources to satisfy inference throughput requirements. However, latency and privacy sensitive deep learning applications cannot afford to offload computation to remote clouds because of the implied transmission cost and lack of trust in third-party cloud providers. Among solutions to increase performance while keeping computation on a constrained environment, hardware acceleration can be onerous, and model optimization requires extensive design efforts while hindering accuracy. DNN partitioning is a third complementary approach, and consists of distributing the inference workload over several available edge devices, taking into account the edge network properties and the DNN structure, with the objective of maximizing the inference throughput (number of inferences per second). This paper introduces a method to predict inference and transmission latencies for multi-threaded distributed DNN deployments, and defines an optimization process to maximize the inference throughput. A branch and bound solver is then presented and analyzed to quantify the achieved performance and complexity. This analysis has led to the definition of the acceleration region, which describes deterministic conditions on the DNN and network properties under which DNN partitioning is beneficial. Finally, experimental results confirm the simulations and show inference throughput improvements in sample edge deployments.},
keywords = {Chaire Cisco, Constrained Networks, Optimisation},
pubstate = {published},
tppubtype = {article}
}
2022
Zhiyuan Yao; Yoann Desmouceaux; Juan Antonio Cordero; Mark Townsley; Thomas Heide Clausen
Aquarius-Enable Fast, Scalable, Data-Driven Service Management in the Cloud Journal Article
In: IEEE Transactions on Network and Service Management, 2022, ISSN: 1932-4537.
Abstract | Links | BibTeX | Tags: Chaire Cisco, Infrastructure for Big Data, Machine Learning, Network Monitoring
@article{nokeyi,
title = {Aquarius-Enable Fast, Scalable, Data-Driven Service Management in the Cloud},
author = {Zhiyuan Yao and Yoann Desmouceaux and Juan Antonio Cordero and Mark Townsley and Thomas Heide Clausen},
url = {https://ieeexplore.ieee.org/abstract/document/9852806},
doi = {10.1109/TNSM.2022.3197130},
issn = {1932-4537},
year = {2022},
date = {2022-12-01},
urldate = {2022-12-01},
journal = {IEEE Transactions on Network and Service Management},
abstract = {In order to dynamically manage and update networking policies in cloud data centers, Virtual Network Functions (VNFs) use, and therefore actively collect, networking state information -and in the process, incur additional control signaling and management overhead, especially in larger data centers. In the meantime, VNFs in production prefer distributed and straightforward heuristics over advanced learning algorithms to avoid intractable additional processing latency under high-performance and low-latency networking constraints. This paper identifies the challenges of deploying learning algorithms in the context of cloud data centers, and proposes Aquarius to bridge the application of machine learning (ML) techniques on distributed systems and service management. Aquarius passively yet efficiently gathers reliable observations, and enables the use of ML techniques to collect, infer, and supply accurate networking state information -without incurring additional signaling and management overhead. It offers fine-grained and programmable visibility to distributed VNFs, and enables both open-and close-loop control over networking systems. This paper illustrates the use of Aquarius with a traffic classifier, an auto-scaling system, and a load balancer -and demonstrates the use of three different ML paradigms -unsupervised, supervised, and reinforcement learning, within Aquarius, for network state inference and service management. Testbed evaluations show that Aquarius suitably improves network state visibility and brings notable performance gains for various scenarios with low overhead.},
keywords = {Chaire Cisco, Infrastructure for Big Data, Machine Learning, Network Monitoring},
pubstate = {published},
tppubtype = {article}
}
Zhiyuan Yao; Yoann Desmouceaux; Juan Antonio Cordero; Thomas Heide Clausen
HLB: Towards Load-Aware Load-Balancing Journal Article
In: IEEE/ACM Transactions on Networking, 2022, ISSN: 1558-2566.
Abstract | Links | BibTeX | Tags: Chaire Cisco, Infrastructure for Big Data, load balancing
@article{nokey,
title = {HLB: Towards Load-Aware Load-Balancing},
author = {Zhiyuan Yao and Yoann Desmouceaux and Juan Antonio Cordero and Thomas Heide Clausen},
doi = {10.1109/TNET.2022.3177163},
issn = {1558-2566},
year = {2022},
date = {2022-06-05},
urldate = {2022-06-05},
journal = {IEEE/ACM Transactions on Networking},
abstract = {The purpose of network load balancers is to optimize quality of service to the users of a set of servers - basically, to improve response times and to reducing computing resources - by properly distributing workloads. This paper proposes a distributed, application-agnostic, Hybrid Load Balancer (HLB) that - without explicit monitoring or signaling - infers server occupancies and processing speeds, which allows making optimised workload placement decisions. This approach is evaluated both through simulations and extensive experiments, including synthetic workloads and Wikipedia replays on a real-world testbed. Results show significant performance gains, in terms of both response time and system utilisation, when compared to existing load-balancing algorithms.},
keywords = {Chaire Cisco, Infrastructure for Big Data, load balancing},
pubstate = {published},
tppubtype = {article}
}
2021
Yoann Desmouceaux; Marcel Enguehard; Thomas Clausen
Joint Monitorless Load-Balancing and Autoscaling for Zero-Wait-Time in Data Centers Journal Article
In: IEEE Transactions on Network and Service Management, vol. 18, no. 1, pp. 672-686, 2021, ISSN: 1932-4537.
Abstract | Links | BibTeX | Tags: Chaire Cisco, Infrastructure for Big Data
@article{Desmouceaux2021,
title = {Joint Monitorless Load-Balancing and Autoscaling for Zero-Wait-Time in Data Centers},
author = {Yoann Desmouceaux and Marcel Enguehard and Thomas Clausen},
url = {https://www.thomasclausen.net/wp-content/uploads/2021/03/Joint-Monitorless-Load-Balancing-and-Autoscaling-for-Zero-Wait-Time-in-Data-Centers.pdf},
doi = {10.1109/TNSM.2020.3045059},
issn = {1932-4537},
year = {2021},
date = {2021-03-01},
journal = {IEEE Transactions on Network and Service Management},
volume = {18},
number = {1},
pages = {672-686},
abstract = {Cloud architectures achieve scaling through two main functions: (i) load-balancers, which dispatch queries among replicated virtualized application instances, and (ii) autoscalers, which automatically adjust the number of replicated instances to accommodate variations in load patterns. These functions are often provided through centralized load monitoring, incurring operational complexity. This article introduces a unified and centralized-monitoring-free architecture achieving both autoscaling and load-balancing, reducing operational overhead while increasing response time performance. Application instances are virtually ordered in a chain, and new queries are forwarded along this chain until an instance, based on its local load, accepts the query. Autoscaling is triggered by the last application instance, which inspects its average load and infers if its chain is under- or over-provisioned. An analytical model of the system is derived, and proves that the proposed technique can achieve asymptotic zero-wait time with high (and controlable) probability. This result is confirmed by extensive simulations, which highlight close-to-ideal performance in terms of both response time and resource costs.},
keywords = {Chaire Cisco, Infrastructure for Big Data},
pubstate = {published},
tppubtype = {article}
}
2018
Yoann Desmouceaux; Sonia Toubaline; Thomas Clausen
Flow-Aware Workload Migration in Data Centers Journal Article
In: Springer - Journal of Network and Systems Management (JONS), 2018.
Abstract | Links | BibTeX | Tags: Chaire Cisco, dc-optimization, Infrastructure for Big Data
@article{Desmouceaux2018a,
title = {Flow-Aware Workload Migration in Data Centers},
author = {Yoann Desmouceaux and Sonia Toubaline and Thomas Clausen},
url = {https://link.springer.com/epdf/10.1007/s10922-018-9452-5?author_access_token=qm_40d91CsNLlZ_vZ0tZFPe4RwlQNchNByi7wbcMAY4xSrvbLplDMLQ3AN9vWEoUIxtZAIdnOGAzJH5W3YOrbGteOLvaEXsEE1xFv66lVxTKlL40BAS25fsaLf8w1RJAvY69owHWqhJkTmAZpvdCkQ%3D%3D
http://www.thomasclausen.net/wp-content/uploads/2018/03/jons-2018.pdf},
doi = {10.1007/s10922-018-9452-5},
year = {2018},
date = {2018-03-10},
journal = {Springer - Journal of Network and Systems Management (JONS)},
abstract = {In data centers, subject to workloads with heterogeneous (and sometimes short) lifetimes, workload migration is a way of attaining a more efficient utilization of the underlying physical machines.
To not introduce performance degradation, such workload migration must take into account not only machine resources, and per-task resource requirements, but also application dependencies in terms of network communication.
This articleformat presents a workload migration model capturing all of these constraints.
A linear programming framework is developed allowing accurate representation of per-task resources requirements and inter-task network demands. Using this, a multi-objective problem is formulated to compute a re-allocation of tasks that (i) maximizes the total inter-task throughput, while (ii) minimizing the cost incurred by migration and (iii) allocating the maximum number of new tasks.
A baseline algorithm, solving this multi-objective problem using the $epsilon$-constraint method is proposed, in order to generate the set of Pareto-optimal solutions. As this algorithm is compute-intensive for large topologies, a heuristic, which computes an approximation of the Pareto front, is then developed, and evaluated on different topologies and with different machine load factors. These evaluations show that the heuristic can provide close-to-optimal solutions, while reducing the solving time by one to two order of magnitudes.},
keywords = {Chaire Cisco, dc-optimization, Infrastructure for Big Data},
pubstate = {published},
tppubtype = {article}
}
To not introduce performance degradation, such workload migration must take into account not only machine resources, and per-task resource requirements, but also application dependencies in terms of network communication.
This articleformat presents a workload migration model capturing all of these constraints.
A linear programming framework is developed allowing accurate representation of per-task resources requirements and inter-task network demands. Using this, a multi-objective problem is formulated to compute a re-allocation of tasks that (i) maximizes the total inter-task throughput, while (ii) minimizing the cost incurred by migration and (iii) allocating the maximum number of new tasks.
A baseline algorithm, solving this multi-objective problem using the $epsilon$-constraint method is proposed, in order to generate the set of Pareto-optimal solutions. As this algorithm is compute-intensive for large topologies, a heuristic, which computes an approximation of the Pareto front, is then developed, and evaluated on different topologies and with different machine load factors. These evaluations show that the heuristic can provide close-to-optimal solutions, while reducing the solving time by one to two order of magnitudes.
Yoann Desmouceaux; Thomas Clausen; Juan Antonio Cordero; Mark Townsley
Reliable Multicast with B.I.E.R. Journal Article
In: IEEE/KICS Journal of Communications and Networks (JCN), vol. 20, no. 2, pp. 182-197, 2018.
Abstract | Links | BibTeX | Tags: B.I.E.R., Chaire Cisco, Infrastructure for Big Data, Performance Evaluation, Reliable Content Distribution
@article{Desmouceaux2018b,
title = {Reliable Multicast with B.I.E.R.},
author = {Yoann Desmouceaux and Thomas Clausen and Juan Antonio Cordero and Mark Townsley},
url = {http://www.thomasclausen.net/wp-content/uploads/2018/03/jcn-2018.pdf},
year = {2018},
date = {2018-03-01},
journal = {IEEE/KICS Journal of Communications and Networks (JCN)},
volume = {20},
number = {2},
pages = {182-197},
abstract = {Inter-network multicast protocols, which build and maintain multicast trees, incur both explicit protocol signalling, and maintenance of state in intermediate routers in the network. B.I.E.R. (Bit-Indexed Explicit Replication) is a technique which can provide a multicast service yet removes such complexities: in- termediate routers are unencumbered by group management, and no per-group state is to be maintained.
This paper explores the use of B.I.E.R. as a basis for develop- ing an efficient and reliable multicast mechanism, where redun- dant traffic is avoided, essential traffic is forwarded along shortest paths, and no per-flow state is required in intermediate routers. Evaluated by way of both an analytical model and network sim- ulation both in generic and in real network topologies with vary- ing background traffic loads, the proposed B.I.E.R.-based reliable multicast mechanism exhibits attractive performance attributes: it attains delivery success rates as high as any other reliable multicast service, but with significantly better link utilisation and no per-flow or per-group state in intermediate routers of the network.},
keywords = {B.I.E.R., Chaire Cisco, Infrastructure for Big Data, Performance Evaluation, Reliable Content Distribution},
pubstate = {published},
tppubtype = {article}
}
This paper explores the use of B.I.E.R. as a basis for develop- ing an efficient and reliable multicast mechanism, where redun- dant traffic is avoided, essential traffic is forwarded along shortest paths, and no per-flow state is required in intermediate routers. Evaluated by way of both an analytical model and network sim- ulation both in generic and in real network topologies with vary- ing background traffic loads, the proposed B.I.E.R.-based reliable multicast mechanism exhibits attractive performance attributes: it attains delivery success rates as high as any other reliable multicast service, but with significantly better link utilisation and no per-flow or per-group state in intermediate routers of the network.
2016
Aloys Augustin; Jiazi Yi; Thomas Clausen; Mark Townsley
A Study of LoRa: Long Range & Low Power Networks for the Internet of Things Journal Article
In: MDPI Sensors, vol. 16, no. 9, pp. 1466, 2016, ISSN: 1424-8220, ((5 yr Impact Factor: 2.437)).
Abstract | Links | BibTeX | Tags: Chaire Cisco, IoT, LoRa, Sensor Networks
@article{Augustin2016,
title = {A Study of LoRa: Long Range & Low Power Networks for the Internet of Things},
author = {Aloys Augustin and Jiazi Yi and Thomas Clausen and Mark Townsley},
url = {http://www.thomasclausen.net/2016-a-study-of-lora-long-range-low-power-networks-for-the-internet-of-things/},
doi = {10.3390/s16091466},
issn = {1424-8220},
year = {2016},
date = {2016-09-09},
journal = {MDPI Sensors},
volume = {16},
number = {9},
pages = {1466},
abstract = {LoRa is a long-range, low-power, low-bitrate, wireless telecommunications system, promoted as an infrastructure solution for the Internet of Things: end-devices use LoRa across a single wireless hop to communicate to gateway(s), connected to the Internet and which act as transparent bridges and relay messages between these end-devices and a central network server. This paper provides an overview of LoRa and an in-depth analysis of its functional components. The physical and data link layer performance is evaluated by field tests and simulations. Based on the analysis and evaluations, some possible solutions for performance enhancements are proposed.},
note = {(5 yr Impact Factor: 2.437)},
keywords = {Chaire Cisco, IoT, LoRa, Sensor Networks},
pubstate = {published},
tppubtype = {article}
}