We construct near-optimal coresets for kernel density estimate for points in R^d when the kernel is positive definite. Specifically we show a polynomial time construction for a coreset of size O(sqrt{d log (1/epsilon)}/epsilon), and we show a near-matching lower bound of size Omega(sqrt{d}/epsilon). The upper bound is a polynomial in 1/epsilon improvement when d in [3,1/epsilon^2) (for all kernels except the Gaussian kernel which had a previous upper bound of O((1/epsilon) log^d (1/epsilon))) and the lower bound is the first known lower bound to depend on d for this problem. Moreover, the upper bound restriction that the kernel is positive definite is significant in that it applies to a wide-variety of kernels, specifically those most important for machine learning. This includes kernels for information distances and the sinc kernel which can be negative.
@InProceedings{phillips_et_al:LIPIcs.SoCG.2018.66, author = {Phillips, Jeff M. and Tai, Wai Ming}, title = {{Near-Optimal Coresets of Kernel Density Estimates}}, booktitle = {34th International Symposium on Computational Geometry (SoCG 2018)}, pages = {66:1--66:13}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-066-8}, ISSN = {1868-8969}, year = {2018}, volume = {99}, editor = {Speckmann, Bettina and T\'{o}th, Csaba D.}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.SoCG.2018.66}, URN = {urn:nbn:de:0030-drops-87797}, doi = {10.4230/LIPIcs.SoCG.2018.66}, annote = {Keywords: Coresets, Kernel Density Estimate, Discrepancy} }
Feedback for Dagstuhl Publishing