Creative Commons Attribution 3.0 Unported license
We study streaming algorithms for partitioning integer sequences and trees. In the case of trees, we suppose that the input tree is provided by a stream consisting of a depth-first-traversal of the input tree. This captures the problem of partitioning XML streams, among other problems. We show that both problems admit deterministic (1+epsilon)-approximation streaming algorithms, where a single pass is sufficient for integer sequences and two passes are required for trees. The space complexity for partitioning integer sequences is O((1/epsilon) * p * log(nm)) and for partitioning trees is O((1/epsilon) * p^2 * log(nm)), where n is the length of the input stream, m is the maximal weight of an element in the stream, and p is the number of partitions to be created. Furthermore, for the problem of partitioning integer sequences, we show that computing an optimal solution in one pass requires Omega(n) space, and computing a (1+epsilon)-approximation in one pass requires Omega((1/epsilon) * log(n)) space, rendering our algorithm tight for instances with p,m in O(1).
@InProceedings{konrad:LIPIcs.ICDT.2016.13,
author = {Konrad, Christian},
title = {{Streaming Partitioning of Sequences and Trees}},
booktitle = {19th International Conference on Database Theory (ICDT 2016)},
pages = {13:1--13:18},
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
ISBN = {978-3-95977-002-6},
ISSN = {1868-8969},
year = {2016},
volume = {48},
editor = {Martens, Wim and Zeume, Thomas},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.ICDT.2016.13},
URN = {urn:nbn:de:0030-drops-57829},
doi = {10.4230/LIPIcs.ICDT.2016.13},
annote = {Keywords: Streaming Algorithms, XML Documents, Data Partitioning, Communication Complexity}
}