Creative Commons Attribution 4.0 International license
Safety-critical systems have to absorb accidental and malicious faults to obtain high mean-times-to-failures (MTTFs). Traditionally, this is achieved through re-execution or replication. However, both techniques come with significant overheads, in particular when cold-start effects are considered. Such effects occur after replicas resume from checkpoints or from their initial state. This work aims at improving on the performance of control-task replication by leveraging an inherent stability of many plants to tolerate occasional control-task deadline misses and suggests masking faults just with a detection quorum. To make this possible, we have to eliminate cold-start effects to allow replicas to rejuvenate during each control cycle. We do so, by systematically turning stateful controllers into instants that can be recovered in a stateless manner. We highlight the mechanisms behind this transformation, how it achieves consensual resilient control, and demonstrate on the example of an inverted pendulum how accidental and maliciously-induced faults can be absorbed, even if control tasks run in less predictable environments.
@InProceedings{matovic_et_al:LIPIcs.ECRTS.2023.14,
author = {Matovic, Aleksandar and Graczyk, Rafal and Lucchetti, Federico and V\"{o}lp, Marcus},
title = {{Consensual Resilient Control: Stateless Recovery of Stateful Controllers}},
booktitle = {35th Euromicro Conference on Real-Time Systems (ECRTS 2023)},
pages = {14:1--14:27},
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
ISBN = {978-3-95977-280-8},
ISSN = {1868-8969},
year = {2023},
volume = {262},
editor = {Papadopoulos, Alessandro V.},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.ECRTS.2023.14},
URN = {urn:nbn:de:0030-drops-180430},
doi = {10.4230/LIPIcs.ECRTS.2023.14},
annote = {Keywords: resilience, control, replication}
}