The fundamental question considered in algorithms on strings is that of indexing, that is, preprocessing a given string for specific queries. By now we have a number of efficient solutions for this problem when the queries ask for an exact occurrence of a given pattern P. However, practical applications motivate the necessity of considering more complex queries, for example concerning near occurrences of two patterns. Recently, Bille et al. [CPM 2021] introduced a variant of such queries, called gapped consecutive occurrences, in which a query consists of two patterns P₁ and P₂ and a range [a,b], and one must find all consecutive occurrences (q₁,q₂) of P₁ and P₂ such that q₂-q₁ ∈ [a,b]. By their results, we cannot hope for a very efficient indexing structure for such queries, even if a = 0 is fixed (although at the same time they provided a non-trivial upper bound). Motivated by this, we focus on a text given as a straight-line program (SLP) and design an index taking space polynomial in the size of the grammar that answers such queries in time optimal up to polylog factors.
@InProceedings{gawrychowski_et_al:LIPIcs.CPM.2023.12, author = {Gawrychowski, Pawe{\l} and Gourdel, Garance and Starikovskaya, Tatiana and Steiner, Teresa Anna}, title = {{Compressed Indexing for Consecutive Occurrences}}, booktitle = {34th Annual Symposium on Combinatorial Pattern Matching (CPM 2023)}, pages = {12:1--12:22}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-276-1}, ISSN = {1868-8969}, year = {2023}, volume = {259}, editor = {Bulteau, Laurent and Lipt\'{a}k, Zsuzsanna}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.CPM.2023.12}, URN = {urn:nbn:de:0030-drops-179666}, doi = {10.4230/LIPIcs.CPM.2023.12}, annote = {Keywords: Compressed indexing, two patterns, consecutive occurrences} }
Feedback for Dagstuhl Publishing