Creative Commons Attribution 3.0 Unported license
We construct near-optimal coresets for kernel density estimate for points in R^d when the kernel is positive definite. Specifically we show a polynomial time construction for a coreset of size O(sqrt{d log (1/epsilon)}/epsilon), and we show a near-matching lower bound of size Omega(sqrt{d}/epsilon). The upper bound is a polynomial in 1/epsilon improvement when d in [3,1/epsilon^2) (for all kernels except the Gaussian kernel which had a previous upper bound of O((1/epsilon) log^d (1/epsilon))) and the lower bound is the first known lower bound to depend on d for this problem. Moreover, the upper bound restriction that the kernel is positive definite is significant in that it applies to a wide-variety of kernels, specifically those most important for machine learning. This includes kernels for information distances and the sinc kernel which can be negative.
@InProceedings{phillips_et_al:LIPIcs.SoCG.2018.66,
author = {Phillips, Jeff M. and Tai, Wai Ming},
title = {{Near-Optimal Coresets of Kernel Density Estimates}},
booktitle = {34th International Symposium on Computational Geometry (SoCG 2018)},
pages = {66:1--66:13},
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
ISBN = {978-3-95977-066-8},
ISSN = {1868-8969},
year = {2018},
volume = {99},
editor = {Speckmann, Bettina and T\'{o}th, Csaba D.},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.SoCG.2018.66},
URN = {urn:nbn:de:0030-drops-87797},
doi = {10.4230/LIPIcs.SoCG.2018.66},
annote = {Keywords: Coresets, Kernel Density Estimate, Discrepancy}
}