The Burrows-Wheeler-Transform (BWT) is an invertible permutation of a text known to be highly compressible but also useful for sequence analysis, what makes the BWT highly attractive for lossless data compression. In this paper, we present a new technique to reduce the size of a BWT using its combinatorial properties, while keeping it invertible. The technique can be applied to any BWT-based compressor, and, as experiments show, is able to reduce the encoding size by 8-16 % on average and up to 33-57 % in the best cases (depending on the BWT-compressor used), making BWT-based compressors competitive or even superior to today's best lossless compressors.
@InProceedings{baier:LIPIcs.CPM.2018.3, author = {Baier, Uwe}, title = {{On Undetected Redundancy in the Burrows-Wheeler Transform}}, booktitle = {29th Annual Symposium on Combinatorial Pattern Matching (CPM 2018)}, pages = {3:1--3:15}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-074-3}, ISSN = {1868-8969}, year = {2018}, volume = {105}, editor = {Navarro, Gonzalo and Sankoff, David and Zhu, Binhai}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.CPM.2018.3}, URN = {urn:nbn:de:0030-drops-87049}, doi = {10.4230/LIPIcs.CPM.2018.3}, annote = {Keywords: Lossless data compression, BWT, Tunneling} }
Feedback for Dagstuhl Publishing