Creative Commons Attribution 3.0 Unported license
Given two positions i and j in a string T of length N, a longest common extension (LCE) query asks for the length of the longest common prefix between suffixes beginning at i and j. A compressed LCE data structure stores T in a compressed form while supporting fast LCE queries. In this article we show that the recompression technique is a powerful tool for compressed LCE data structures. We present a new compressed LCE data structure of size O(z lg (N/z)) that supports LCE queries in O(lg N) time, where z is the size of Lempel-Ziv 77 factorization without self-reference of T. Given T as an uncompressed form, we show how to build our data structure in O(N) time and space. Given T as a grammar compressed form, i.e., a straight-line program of size n generating T, we show how to build our data structure in O(n lg (N/n)) time and O(n + z lg (N/z)) space. Our algorithms are deterministic and always return correct answers.
@InProceedings{i:LIPIcs.CPM.2017.18,
author = {I, Tomohiro},
title = {{Longest Common Extensions with Recompression}},
booktitle = {28th Annual Symposium on Combinatorial Pattern Matching (CPM 2017)},
pages = {18:1--18:15},
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
ISBN = {978-3-95977-039-2},
ISSN = {1868-8969},
year = {2017},
volume = {78},
editor = {K\"{a}rkk\"{a}inen, Juha and Radoszewski, Jakub and Rytter, Wojciech},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.CPM.2017.18},
URN = {urn:nbn:de:0030-drops-73234},
doi = {10.4230/LIPIcs.CPM.2017.18},
annote = {Keywords: Longest Common Extension (LCE) queries, compressed data structure, grammar compressed strings, Straight-Line Program (SLP)}
}