Creative Commons Attribution 3.0 Unported license
Modern Internet services commonly replicate critical data across several geographical locations using state-machine replication (SMR). Due to their reliance on a leader replica, classical SMR protocols offer limited scalability and availability in this setting. To solve this problem, recent protocols follow instead a leaderless approach, in which each replica is able to make progress using a quorum of its peers. In this paper, we study this new emerging class of SMR protocols and states some of their limits. We first propose a framework that captures the essence of leaderless state-machine replication (Leaderless SMR). Then, we introduce a set of desirable properties for these protocols: (R)eliability, (O)ptimal (L)atency and (L)oad Balancing. We show that protocols matching all of the ROLL properties are subject to a trade-off between performance and reliability. We also establish a lower bound on the message delay to execute a command in protocols optimal for the ROLL properties. This lower bound explains the persistent chaining effect observed in experimental results.
@InProceedings{francarezende_et_al:LIPIcs.DISC.2020.24,
author = {Fran\c{c}a Rezende, Tuanir and Sutra, Pierre},
title = {{Leaderless State-Machine Replication: Specification, Properties, Limits}},
booktitle = {34th International Symposium on Distributed Computing (DISC 2020)},
pages = {24:1--24:17},
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
ISBN = {978-3-95977-168-9},
ISSN = {1868-8969},
year = {2020},
volume = {179},
editor = {Attiya, Hagit},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.DISC.2020.24},
URN = {urn:nbn:de:0030-drops-131024},
doi = {10.4230/LIPIcs.DISC.2020.24},
annote = {Keywords: Fault Tolerance, State Machine Replication, Consensus}
}