Creative Commons Attribution 3.0 Unported license
Strings with don't care symbols, also called partial words, and more general indeterminate strings are a natural representation of strings containing uncertain symbols. A considerable effort has been made to obtain efficient algorithms for pattern matching and periodicity detection in such strings. Among those, a number of algorithms have been proposed that behave well on random data, but still their worst-case running time is Theta(n^2). We present the first truly subquadratic-time solutions for a number of such problems on partial words that can also be adapted to indeterminate strings over a constant-sized alphabet. We show that $n$ longest common compatible prefix queries (which correspond to longest common extension queries in regular strings) can be answered on-line in O(n * sqrt(n * log(n)) time after O(n * sqrt(n * log(n))-time preprocessing. We also present O(n * sqrt(n * log(n))-time algorithms for computing the prefix array and two types of border array of a partial word.
@InProceedings{iliopoulos_et_al:LIPIcs.CPM.2016.8,
author = {Iliopoulos, Costas S. and Radoszewski, Jakub},
title = {{Truly Subquadratic-Time Extension Queries and Periodicity Detection in Strings with Uncertainties}},
booktitle = {27th Annual Symposium on Combinatorial Pattern Matching (CPM 2016)},
pages = {8:1--8:12},
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
ISBN = {978-3-95977-012-5},
ISSN = {1868-8969},
year = {2016},
volume = {54},
editor = {Grossi, Roberto and Lewenstein, Moshe},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.CPM.2016.8},
URN = {urn:nbn:de:0030-drops-60842},
doi = {10.4230/LIPIcs.CPM.2016.8},
annote = {Keywords: string with don’t cares, partial word, indeterminate string, longest common conservative prefix queries, prefix array}
}