Safety-critical systems have to absorb accidental and malicious faults to obtain high mean-times-to-failures (MTTFs). Traditionally, this is achieved through re-execution or replication. However, both techniques come with significant overheads, in particular when cold-start effects are considered. Such effects occur after replicas resume from checkpoints or from their initial state. This work aims at improving on the performance of control-task replication by leveraging an inherent stability of many plants to tolerate occasional control-task deadline misses and suggests masking faults just with a detection quorum. To make this possible, we have to eliminate cold-start effects to allow replicas to rejuvenate during each control cycle. We do so, by systematically turning stateful controllers into instants that can be recovered in a stateless manner. We highlight the mechanisms behind this transformation, how it achieves consensual resilient control, and demonstrate on the example of an inverted pendulum how accidental and maliciously-induced faults can be absorbed, even if control tasks run in less predictable environments.
@InProceedings{matovic_et_al:LIPIcs.ECRTS.2023.14, author = {Matovic, Aleksandar and Graczyk, Rafal and Lucchetti, Federico and V\"{o}lp, Marcus}, title = {{Consensual Resilient Control: Stateless Recovery of Stateful Controllers}}, booktitle = {35th Euromicro Conference on Real-Time Systems (ECRTS 2023)}, pages = {14:1--14:27}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-280-8}, ISSN = {1868-8969}, year = {2023}, volume = {262}, editor = {Papadopoulos, Alessandro V.}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.ECRTS.2023.14}, URN = {urn:nbn:de:0030-drops-180430}, doi = {10.4230/LIPIcs.ECRTS.2023.14}, annote = {Keywords: resilience, control, replication} }
Feedback for Dagstuhl Publishing