Creative Commons Attribution 3.0 Unported license
We investigate the problem of testing the equivalence between two discrete histograms. A k-histogram over [n] is a probability distribution that is piecewise constant over some set of k intervals over [n]. Histograms have been extensively studied in computer science and statistics. Given a set of samples from two k-histogram distributions p, q over [n], we want to distinguish (with high probability) between the cases that p = q and ||p ? q||_1 >= epsilon. The main contribution of this paper is a new algorithm for this testing problem and a nearly matching information-theoretic lower bound. Specifically, the sample complexity of our algorithm matches our lower bound up to a logarithmic factor, improving on previous work by polynomial factors in the relevant parameters. Our algorithmic approach applies in a more general setting and yields improved sample upper bounds for testing closeness of other structured distributions as well.
@InProceedings{diakonikolas_et_al:LIPIcs.ICALP.2017.8,
author = {Diakonikolas, Ilias and Kane, Daniel M. and Nikishkin, Vladimir},
title = {{Near-Optimal Closeness Testing of Discrete Histogram Distributions}},
booktitle = {44th International Colloquium on Automata, Languages, and Programming (ICALP 2017)},
pages = {8:1--8:15},
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
ISBN = {978-3-95977-041-5},
ISSN = {1868-8969},
year = {2017},
volume = {80},
editor = {Chatzigiannakis, Ioannis and Indyk, Piotr and Kuhn, Fabian and Muscholl, Anca},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.ICALP.2017.8},
URN = {urn:nbn:de:0030-drops-74937},
doi = {10.4230/LIPIcs.ICALP.2017.8},
annote = {Keywords: distribution testing, histograms, closeness testing}
}