We study streaming algorithms for partitioning integer sequences and trees. In the case of trees, we suppose that the input tree is provided by a stream consisting of a depth-first-traversal of the input tree. This captures the problem of partitioning XML streams, among other problems. We show that both problems admit deterministic (1+epsilon)-approximation streaming algorithms, where a single pass is sufficient for integer sequences and two passes are required for trees. The space complexity for partitioning integer sequences is O((1/epsilon) * p * log(nm)) and for partitioning trees is O((1/epsilon) * p^2 * log(nm)), where n is the length of the input stream, m is the maximal weight of an element in the stream, and p is the number of partitions to be created. Furthermore, for the problem of partitioning integer sequences, we show that computing an optimal solution in one pass requires Omega(n) space, and computing a (1+epsilon)-approximation in one pass requires Omega((1/epsilon) * log(n)) space, rendering our algorithm tight for instances with p,m in O(1).
@InProceedings{konrad:LIPIcs.ICDT.2016.13, author = {Konrad, Christian}, title = {{Streaming Partitioning of Sequences and Trees}}, booktitle = {19th International Conference on Database Theory (ICDT 2016)}, pages = {13:1--13:18}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-002-6}, ISSN = {1868-8969}, year = {2016}, volume = {48}, editor = {Martens, Wim and Zeume, Thomas}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.ICDT.2016.13}, URN = {urn:nbn:de:0030-drops-57829}, doi = {10.4230/LIPIcs.ICDT.2016.13}, annote = {Keywords: Streaming Algorithms, XML Documents, Data Partitioning, Communication Complexity} }
Feedback for Dagstuhl Publishing