Creative Commons Attribution 3.0 Unported license
In time-sensitive, safety-critical systems that must be fail-operational, active replication is commonly used to mitigate transient faults that arise due to electromagnetic interference (EMI). However, designing an effective and well-performing active replication scheme is challenging since replication conflicts with the size, weight, power, and cost constraints of embedded applications. To enable a systematic and rigorous exploration of the resulting tradeoffs, we present an analysis to quantify the resiliency of fail-operational networked control systems against EMI-induced memory corruption, host crashes, and retransmission delays. Since control systems are typically robust to a few failed iterations, e.g., one missed actuation does not crash an inverted pendulum, traditional solutions based on hard real-time assumptions are often too pessimistic. Our analysis reduces this pessimism by modeling a control system's inherent robustness as an (m,k)-firm specification. A case study with an active suspension workload indicates that the analytical bounds closely predict the failure rate estimates obtained through simulation, thereby enabling a meaningful design-space exploration, and also demonstrates the utility of the analysis in identifying non-trivial and non-obvious reliability tradeoffs.
@InProceedings{gujarati_et_al:LIPIcs.ECRTS.2018.16,
author = {Gujarati, Arpan and Nasri, Mitra and Brandenburg, Bj\"{o}rn B.},
title = {{Quantifying the Resiliency of Fail-Operational Real-Time Networked Control Systems}},
booktitle = {30th Euromicro Conference on Real-Time Systems (ECRTS 2018)},
pages = {16:1--16:24},
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
ISBN = {978-3-95977-075-0},
ISSN = {1868-8969},
year = {2018},
volume = {106},
editor = {Altmeyer, Sebastian},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.ECRTS.2018.16},
URN = {urn:nbn:de:0030-drops-89884},
doi = {10.4230/LIPIcs.ECRTS.2018.16},
annote = {Keywords: probabilistic analysis, reliability analysis, networked control systems}
}