Strings with don't care symbols, also called partial words, and more general indeterminate strings are a natural representation of strings containing uncertain symbols. A considerable effort has been made to obtain efficient algorithms for pattern matching and periodicity detection in such strings. Among those, a number of algorithms have been proposed that behave well on random data, but still their worst-case running time is Theta(n^2). We present the first truly subquadratic-time solutions for a number of such problems on partial words that can also be adapted to indeterminate strings over a constant-sized alphabet. We show that $n$ longest common compatible prefix queries (which correspond to longest common extension queries in regular strings) can be answered on-line in O(n * sqrt(n * log(n)) time after O(n * sqrt(n * log(n))-time preprocessing. We also present O(n * sqrt(n * log(n))-time algorithms for computing the prefix array and two types of border array of a partial word.
@InProceedings{iliopoulos_et_al:LIPIcs.CPM.2016.8, author = {Iliopoulos, Costas S. and Radoszewski, Jakub}, title = {{Truly Subquadratic-Time Extension Queries and Periodicity Detection in Strings with Uncertainties}}, booktitle = {27th Annual Symposium on Combinatorial Pattern Matching (CPM 2016)}, pages = {8:1--8:12}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-012-5}, ISSN = {1868-8969}, year = {2016}, volume = {54}, editor = {Grossi, Roberto and Lewenstein, Moshe}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.CPM.2016.8}, URN = {urn:nbn:de:0030-drops-60842}, doi = {10.4230/LIPIcs.CPM.2016.8}, annote = {Keywords: string with don’t cares, partial word, indeterminate string, longest common conservative prefix queries, prefix array} }
Feedback for Dagstuhl Publishing