Creative Commons Attribution 4.0 International license
We consider an approximate version of the trace reconstruction problem, where the goal is to recover an unknown string s ∈ {0,1}ⁿ from m traces (each trace is generated independently by passing s through a probabilistic insertion-deletion channel with rate p). We present a deterministic near-linear time algorithm for the average-case model, where s is random, that uses only three traces. It runs in near-linear time Õ(n) and with high probability reports a string within edit distance Õ(p² n) from s, which significantly improves over the straightforward bound of O(pn).
Technically, our algorithm computes a (1+ε)-approximate median of the three input traces. To prove its correctness, our probabilistic analysis shows that an approximate median is indeed close to the unknown s. To achieve a near-linear time bound, we have to bypass the well-known dynamic programming algorithm that computes an optimal median in time O(n³).
@InProceedings{chakraborty_et_al:LIPIcs.FSTTCS.2021.11,
author = {Chakraborty, Diptarka and Das, Debarati and Krauthgamer, Robert},
title = {{Approximate Trace Reconstruction via Median String (In Average-Case)}},
booktitle = {41st IARCS Annual Conference on Foundations of Software Technology and Theoretical Computer Science (FSTTCS 2021)},
pages = {11:1--11:23},
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
ISBN = {978-3-95977-215-0},
ISSN = {1868-8969},
year = {2021},
volume = {213},
editor = {Boja\'{n}czyk, Miko{\l}aj and Chekuri, Chandra},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.FSTTCS.2021.11},
URN = {urn:nbn:de:0030-drops-155228},
doi = {10.4230/LIPIcs.FSTTCS.2021.11},
annote = {Keywords: Trace Reconstruction, Approximation Algorithms, Edit Distance, String Median}
}