The sparse regression problem, also known as best subset selection problem, can be cast as follows: Given a set S of n points in ℝ^d, a point y∈ ℝ^d, and an integer 2 ≤ k ≤ d, find an affine combination of at most k points of S that is nearest to y. We describe a O(n^{k-1} log^{d-k+2} n)-time randomized (1+ε)-approximation algorithm for this problem with d and ε constant. This is the first algorithm for this problem running in time o(n^k). Its running time is similar to the query time of a data structure recently proposed by Har-Peled, Indyk, and Mahabadi (ICALP'18), while not requiring any preprocessing. Up to polylogarithmic factors, it matches a conditional lower bound relying on a conjecture about affine degeneracy testing. In the special case where k = d = O(1), we provide a simple O_δ(n^{d-1+δ})-time deterministic exact algorithm, for any δ > 0. Finally, we show how to adapt the approximation algorithm for the sparse linear regression and sparse convex regression problems with the same running time, up to polylogarithmic factors.
@InProceedings{cardinal_et_al:LIPIcs.SWAT.2020.20, author = {Cardinal, Jean and Ooms, Aur\'{e}lien}, title = {{Sparse Regression via Range Counting}}, booktitle = {17th Scandinavian Symposium and Workshops on Algorithm Theory (SWAT 2020)}, pages = {20:1--20:17}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-150-4}, ISSN = {1868-8969}, year = {2020}, volume = {162}, editor = {Albers, Susanne}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.SWAT.2020.20}, URN = {urn:nbn:de:0030-drops-122677}, doi = {10.4230/LIPIcs.SWAT.2020.20}, annote = {Keywords: Sparse Linear Regression, Orthogonal Range Searching, Affine Degeneracy Testing, Nearest Neighbors, Hyperplane Arrangements} }
Feedback for Dagstuhl Publishing