We consider the problem of computing the Maximal Exact Matches (MEMs) of a given pattern P[1..m] on a large repetitive text collection T[1..n], which is represented as a (hopefully much smaller) run-length context-free grammar of size g_{rl}. We show that the problem can be solved in time O(m² log^ε n), for any constant ε > 0, on a data structure of size O(g_{rl}). Further, on a locally consistent grammar of size O(δ log n/δ), the time decreases to O(m log m(log m + log^ε n)). The value δ is a function of the substring complexity of T and Ω(δ log n/δ) is a tight lower bound on the compressibility of repetitive texts T, so our structure has optimal size in terms of n and δ.
@InProceedings{navarro:LIPIcs.CPM.2023.24, author = {Navarro, Gonzalo}, title = {{Computing MEMs on Repetitive Text Collections}}, booktitle = {34th Annual Symposium on Combinatorial Pattern Matching (CPM 2023)}, pages = {24:1--24:17}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-276-1}, ISSN = {1868-8969}, year = {2023}, volume = {259}, editor = {Bulteau, Laurent and Lipt\'{a}k, Zsuzsanna}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.CPM.2023.24}, URN = {urn:nbn:de:0030-drops-179787}, doi = {10.4230/LIPIcs.CPM.2023.24}, annote = {Keywords: grammar-based indices, maximal exact matches, locally consistent grammars, substring complexity} }
Feedback for Dagstuhl Publishing