Creative Commons Attribution 3.0 Unported license
In this paper, we provide the first optimal algorithm for the remaining open question from the seminal paper of Alon, Matias, and Szegedy: approximating large frequency moments. We give an upper bound on the space required to find a k-th frequency moment of O(n^(1-2/k)) bits that matches, up to a constant factor, the lower bound of Woodruff et. al for constant epsilon and constant k. Our algorithm makes a single pass over the stream and works for any constant k > 3. It is based upon two major technical accomplishments: first, we provide an optimal algorithm for finding the heavy elements in a stream; and second, we provide a technique using Martingale Sketches which gives an optimal reduction of the large frequency moment problem to the all heavy elements problem. We also provide a polylogarithmic improvement for frequency moments, frequency based functions, spatial data streams, and measuring independence of data sets.
@InProceedings{braverman_et_al:LIPIcs.APPROX-RANDOM.2014.531,
author = {Braverman, Vladimir and Katzman, Jonathan and Seidell, Charles and Vorsanger, Gregory},
title = {{An Optimal Algorithm for Large Frequency Moments Using O(n^(1-2/k)) Bits}},
booktitle = {Approximation, Randomization, and Combinatorial Optimization. Algorithms and Techniques (APPROX/RANDOM 2014)},
pages = {531--544},
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
ISBN = {978-3-939897-74-3},
ISSN = {1868-8969},
year = {2014},
volume = {28},
editor = {Jansen, Klaus and Rolim, Jos\'{e} and Devanur, Nikhil R. and Moore, Cristopher},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.APPROX-RANDOM.2014.531},
URN = {urn:nbn:de:0030-drops-47217},
doi = {10.4230/LIPIcs.APPROX-RANDOM.2014.531},
annote = {Keywords: Streaming Algorithms, Randomized Algorithms, Frequency Moments, Heavy Hitters}
}