The Burrows-Wheeler transform (BWT) is a reversible transform that converts a string w into another string BWT(w). The size of the run-length encoded BWT (RLBWT) can be interpreted as a measure of repetitiveness in the class of representations called dictionary compression which are essentially representations based on copy and paste operations. In this paper, we shed new light on the compressiveness of BWT and the bijective BWT (BBWT). We first extend previous results on the relations of their run-length compressed sizes r and r_B. We also show that the so-called "clustering effect" of BWT and BBWT can be captured by measures other than empirical entropy or run-length encoding. In particular, we show that BWT and BBWT do not increase the repetitiveness of the string with respect to various measures based on dictionary compression by more than a polylogarithmic factor. Furthermore, we show that there exists an infinite family of strings that are maximally incompressible by any dictionary compression measure, but become very compressible after applying BBWT. An interesting implication of this result is that it is possible to transcend dictionary compression in some cases by simply applying BBWT before applying dictionary compression.
@InProceedings{bannai_et_al:LIPIcs.CPM.2025.17, author = {Bannai, Hideo and I, Tomohiro and Nakashima, Yuto}, title = {{On the Compressiveness of the Burrows-Wheeler Transform}}, booktitle = {36th Annual Symposium on Combinatorial Pattern Matching (CPM 2025)}, pages = {17:1--17:15}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-369-0}, ISSN = {1868-8969}, year = {2025}, volume = {331}, editor = {Bonizzoni, Paola and M\"{a}kinen, Veli}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.CPM.2025.17}, URN = {urn:nbn:de:0030-drops-231116}, doi = {10.4230/LIPIcs.CPM.2025.17}, annote = {Keywords: Data Compression, Bijective Burrows-Wheeler Transform, Fibonacci words} }
Feedback for Dagstuhl Publishing