The FM-index is a fundamental data structure used in bioinformatics to efficiently search for strings and index genomes. However, the FM-index can pose computational challenges, particularly in the context of large-scale genomic datasets, due to the complexity of its underlying components and data encodings. In this paper, we present a comprehensive review of efficient variants of the FM-index and the encoding strategies used to improve performance. We examine hardware-accelerated techniques, such as memory-efficient data layouts and cache-aware structures, as well as software-level innovations, including algorithmic refinements and compact representations. The reviewed work demonstrates substantial gains in both speed and scalability, making methods that use the FM-index more practical for high-throughput genomic applications. By analyzing the trade-offs and design choices of these variants, we highlight how combining hardware-aware and software-centric strategies enables more efficient FM-index construction and usage across a range of bioinformatics tasks.
@InProceedings{ferro_et_al:OASIcs.Manzini.6, author = {Ferro, Eddie and Boucher, Christina}, title = {{Optimizing the Performance of the FM-Index for Large-Scale Data}}, booktitle = {The Expanding World of Compressed Data: A Festschrift for Giovanni Manzini's 60th Birthday}, pages = {6:1--6:21}, series = {Open Access Series in Informatics (OASIcs)}, ISBN = {978-3-95977-390-4}, ISSN = {2190-6807}, year = {2025}, volume = {131}, editor = {Ferragina, Paolo and Gagie, Travis and Navarro, Gonzalo}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/OASIcs.Manzini.6}, URN = {urn:nbn:de:0030-drops-239140}, doi = {10.4230/OASIcs.Manzini.6}, annote = {Keywords: FM-Index Acceleration, Run-Length Encoding, Suffix Array Optimization, Burrows-Wheeler Transform, Efficient Backward Search} }
Feedback for Dagstuhl Publishing