Indexing highly repetitive texts is important in fields such as bioinformatics and versioned repositories. The run-length compression of the Burrows-Wheeler transform (BWT) provides a compressed representation particularly well-suited to text indexing. The r-index is one such index. It enables fast locating of occurrences of a pattern within O(r) words of space, where r is the number of equal-letter runs in the BWT. Its mechanism of locating is to maintain one suffix array sample along the backward-search of the pattern, and to compute all the pattern positions from that sample once the backward-search is complete. In this paper we develop this algorithm further, and propose a new bi-directional text index called the br-index, which supports extending the matched pattern both in forward and backward directions, and locating the occurrences of the pattern at any step of the search, within O(r+r_R) words of space, where r_R is the number of equal-letter runs in the BWT of the reversed text. Our experiments show that the br-index captures the long repetitions of the text, and outperforms the existing indexes in text searching allowing some mismatches except in an internal part.
@InProceedings{arakawa_et_al:LIPIcs.CPM.2022.11, author = {Arakawa, Yuma and Navarro, Gonzalo and Sadakane, Kunihiko}, title = {{Bi-Directional r-Indexes}}, booktitle = {33rd Annual Symposium on Combinatorial Pattern Matching (CPM 2022)}, pages = {11:1--11:14}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-234-1}, ISSN = {1868-8969}, year = {2022}, volume = {223}, editor = {Bannai, Hideo and Holub, Jan}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.CPM.2022.11}, URN = {urn:nbn:de:0030-drops-161386}, doi = {10.4230/LIPIcs.CPM.2022.11}, annote = {Keywords: Compressed text indexes, Burrows-Wheeler Transform, highly repetitive text collections} }
Feedback for Dagstuhl Publishing