,
Lavinia Egidi
,
Veronica Guerrini
,
Felipe A. Louza
,
Giovanna Rosone
Creative Commons Attribution 4.0 International license
The Burrows-Wheeler Transform (BWT) is a fundamental string transformation that, although initially introduced for data compression, has been extensively utilized across various domains, including text indexing and pattern matching within large datasets. Although the BWT construction is linear, the constants make the task impractical for large datasets, and as highlighted by Ferragina et al. [Paolo Ferragina et al., 2012], "to use it, one must first build it!". Thus, the construction of the BWT remains a significant challenge. For these reasons, during the past three decades there has been a succession of new algorithms for its construction using techniques that work in external memory or that use text compression. In this survey, we revise some of the most important advancements and tools presented in the past years for computing large BWTs exploiting external memory or text compression approaches without using additional information about the data.
@InProceedings{diazdominguez_et_al:OASIcs.Manzini.7,
author = {D{\'\i}az-Dom{\'\i}nguez, Diego and Egidi, Lavinia and Guerrini, Veronica and Louza, Felipe A. and Rosone, Giovanna},
title = {{Algorithms for Computing Very Large BWTs: a Short Survey}},
booktitle = {The Expanding World of Compressed Data: A Festschrift for Giovanni Manzini's 60th Birthday},
pages = {7:1--7:28},
series = {Open Access Series in Informatics (OASIcs)},
ISBN = {978-3-95977-390-4},
ISSN = {2190-6807},
year = {2025},
volume = {131},
editor = {Ferragina, Paolo and Gagie, Travis and Navarro, Gonzalo},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/OASIcs.Manzini.7},
URN = {urn:nbn:de:0030-drops-239151},
doi = {10.4230/OASIcs.Manzini.7},
annote = {Keywords: Burrows-Wheeler transform, Extended Burrows-Wheeler transform, external memory, text compression, longest common prefix}
}