Searching patterns in long strings is a classical algorithmic problem with countless practical applications. Suffix trees and suffix arrays (and their variants) are a long-established solution that yields linear-time search (in the size of the pattern). In [Paolo Boldi and Sebastiano Vigna, 2018] it is shown that a z-map gadget can be attached to (enhanced) suffix arrays to improve their theoretical query time, obtaining a data structure called zuffix array. The main contribution of this paper is to show that a carefully engineered implementation of the z-map gadget does provide significant speedups with respect to enhanced suffix arrays on real-world datasets, albeit doubling the required space. In particular, for large alphabets we observe a sevenfold improvement in query time with respect to enhanced suffix arrays; even in the worst case (small alphabets), the query time is almost halved. Thus, zuffix arrays provide a very interesting new point in the space-time tradeoff spectrum.
@InProceedings{boldi_et_al:LIPIcs.SEA.2024.2, author = {Boldi, Paolo and Marchini, Stefano and Vigna, Sebastiano}, title = {{Engineering Zuffix Arrays}}, booktitle = {22nd International Symposium on Experimental Algorithms (SEA 2024)}, pages = {2:1--2:18}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-325-6}, ISSN = {1868-8969}, year = {2024}, volume = {301}, editor = {Liberti, Leo}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.SEA.2024.2}, URN = {urn:nbn:de:0030-drops-203677}, doi = {10.4230/LIPIcs.SEA.2024.2}, annote = {Keywords: Suffix trees, suffix arrays, z-fast tries} }
Feedback for Dagstuhl Publishing