Creative Commons Attribution 3.0 Unported license
We generalise the fundamental concept of LZ77 factorisation from strings to trees. A tree is represented as a collection of edge-disjoint fragments that either consist of one node or has already occurred earlier (in the BFS order). Similarly as for strings, such a collection uniquely determines the tree, so by minimising the number of fragments we obtain a compressed representation of the tree. We show that our generalisation has several useful properties of the standard LZ77 factorisation: it can be computed in polynomial time and its simpler variant in linear time; its size is not larger than the smallest grammar for a tree; it can be transformed (in linear time) into a tree grammar of size O(rg log(n/(rg))), where n is the size of the tree, g the size of the smallest grammar for this tree and r the maximal arity of the nodes in the tree, which matches a recent bound of Jez and Lohrey [STACS 2014], but with a simpler and more modular proof.
@InProceedings{gawrychowski_et_al:LIPIcs.FSTTCS.2016.35,
author = {Gawrychowski, Pawel and Jez, Artur},
title = {{LZ77 Factorisation of Trees}},
booktitle = {36th IARCS Annual Conference on Foundations of Software Technology and Theoretical Computer Science (FSTTCS 2016)},
pages = {35:1--35:15},
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
ISBN = {978-3-95977-027-9},
ISSN = {1868-8969},
year = {2016},
volume = {65},
editor = {Lal, Akash and Akshay, S. and Saurabh, Saket and Sen, Sandeep},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.FSTTCS.2016.35},
URN = {urn:nbn:de:0030-drops-68700},
doi = {10.4230/LIPIcs.FSTTCS.2016.35},
annote = {Keywords: Tree grammars, Grammar compression, LZ77, SLP, Tree compression}
}