Longest common extension queries (often called longest common prefix queries) constitute a fundamental building block in multiple string algorithms, for example computing runs and approximate pattern matching. We show that a sequence of q LCE queries for a string of size n over a general ordered alphabet can be realized in O(q log log n + n log* n) time making only O(q + n) symbol comparisons. Consequently, all runs in a string over a general ordered alphabets can be computed in O(n log log n) time making O(n) symbol comparisons. Our results improve upon a solution by Kosolobov (Information Processing Letters, 2016), who designed an algorithm with O(n log^⅔ n) running time and conjectured that O(n) time is possible. Our paper makes a significant progress towards resolving this conjecture. Our techniques extend to the case of general unordered alphabets, when the time increases to O(q log n + n log* n). The main tools are difference covers and a variant of the disjoint-sets data structure by La Poutré (SODA 1990).
@InProceedings{gawrychowski_et_al:LIPIcs.CPM.2016.5, author = {Gawrychowski, Pawel and Kociumaka, Tomasz and Rytter, Wojciech and Walen, Tomasz}, title = {{Faster Longest Common Extension Queries in Strings over General Alphabets}}, booktitle = {27th Annual Symposium on Combinatorial Pattern Matching (CPM 2016)}, pages = {5:1--5:13}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-012-5}, ISSN = {1868-8969}, year = {2016}, volume = {54}, editor = {Grossi, Roberto and Lewenstein, Moshe}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.CPM.2016.5}, URN = {urn:nbn:de:0030-drops-60810}, doi = {10.4230/LIPIcs.CPM.2016.5}, annote = {Keywords: longest common extension, longest common prefix, maximal repetitions, difference cover} }