,
Estéban Gabory
,
Giuseppe Romana
,
Marinella Sciortino
Creative Commons Attribution 4.0 International license
We prove that for every integer n > 0 and for every alphabet Σ_k of size k ≥ 3, there exist words of length n whose Burrows-Wheeler Transform (BWT) is totally unclustered, i.e., it consists of exactly n runs with no two consecutive equal symbols. These words represent the worst-case behavior of the clustering effect of the BWT. We also establish a lower bound on their number. This contrasts with the binary case, where the existence of infinitely many totally unclustered BWT images is still an open problem, related to Artin’s conjecture on primitive roots.
@InProceedings{fici_et_al:LIPIcs.CPM.2026.13,
author = {Fici, Gabriele and Gabory, Est\'{e}ban and Romana, Giuseppe and Sciortino, Marinella},
title = {{Totally Unclustered BWT Images of Any Length over Non-Binary Alphabets}},
booktitle = {37th Annual Symposium on Combinatorial Pattern Matching (CPM 2026)},
pages = {13:1--13:17},
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
ISBN = {978-3-95977-420-8},
ISSN = {1868-8969},
year = {2026},
volume = {369},
editor = {Bille, Philip and Prezza, Nicola},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.CPM.2026.13},
URN = {urn:nbn:de:0030-drops-259399},
doi = {10.4230/LIPIcs.CPM.2026.13},
annote = {Keywords: Burrows-Wheeler Transform, BWT-runs, Repetitiveness Measure, Clustering Effect, Generalized de Bruijn Words}
}