A grammar compression is a context-free grammar (CFG) deriving a single string deterministically. For an input string of length N over an alphabet of size sigma, the smallest CFG is O(log N)-approximable in the offline setting and O(log N log^* N)-approximable in the online setting. In addition, an information-theoretic lower bound for representing a CFG in Chomsky normal form of n variables is log (n!/n^sigma) + n + o(n) bits. Although there is an online grammar compression algorithm that directly computes the succinct encoding of its output CFG with O(log N log^* N) approximation guarantee, the problem of optimizing its working space has remained open. We propose a fully-online algorithm that requires the fewest bits of working space asymptotically equal to the lower bound in O(N log log n) compression time. In addition we propose several techniques to boost grammar compression and show their efficiency by computational experiments.
@InProceedings{takabatake_et_al:LIPIcs.ESA.2017.67, author = {Takabatake, Yoshimasa and I, Tomohiro and Sakamoto, Hiroshi}, title = {{A Space-Optimal Grammar Compression}}, booktitle = {25th Annual European Symposium on Algorithms (ESA 2017)}, pages = {67:1--67:15}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-049-1}, ISSN = {1868-8969}, year = {2017}, volume = {87}, editor = {Pruhs, Kirk and Sohler, Christian}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.ESA.2017.67}, URN = {urn:nbn:de:0030-drops-78640}, doi = {10.4230/LIPIcs.ESA.2017.67}, annote = {Keywords: Grammar compression, fully-online algorithm, succinct data structure} }
Feedback for Dagstuhl Publishing