We introduce the first index that can be built in o(n) time for a text of length n, and can also be queried in o(q) time for a pattern of length q. On an alphabet of size σ, our index uses O(n log σ) bits, is built in O(n log σ / √{log n}) deterministic time, and computes the number of occurrences of the pattern in time O(q/log_σ n + log n log_σ n). Each such occurrence can then be found in O(log n) time. Other trade-offs between the space usage and the cost of reporting occurrences are also possible.
@InProceedings{munro_et_al:LIPIcs.CPM.2020.24, author = {Munro, J. Ian and Navarro, Gonzalo and Nekrich, Yakov}, title = {{Text Indexing and Searching in Sublinear Time}}, booktitle = {31st Annual Symposium on Combinatorial Pattern Matching (CPM 2020)}, pages = {24:1--24:15}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-149-8}, ISSN = {1868-8969}, year = {2020}, volume = {161}, editor = {G{\o}rtz, Inge Li and Weimann, Oren}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.CPM.2020.24}, URN = {urn:nbn:de:0030-drops-121497}, doi = {10.4230/LIPIcs.CPM.2020.24}, annote = {Keywords: data structures, string indexes} }
Feedback for Dagstuhl Publishing