Creative Commons Attribution 3.0 Unported license
We consider the classical k-means clustering problem in the setting of bi-criteria approximation, in which an algorithm is allowed to output beta*k > k clusters, and must produce a clustering with cost at most alpha times the to the cost of the optimal set of k clusters. We argue that this approach is natural in many settings, for which the exact number of clusters is a priori unknown, or unimportant up to a constant factor. We give new bi-criteria approximation algorithms, based on linear programming and local search, respectively, which attain a guarantee alpha(beta) depending on the number beta*k of clusters that may be opened. Our guarantee alpha(beta) is always at most 9 + epsilon and improves rapidly with beta (for example: alpha(2) < 2.59, and alpha(3) < 1.4). Moreover, our algorithms have only polynomial dependence on the dimension of the input data, and so are applicable in high-dimensional settings.
@InProceedings{makarychev_et_al:LIPIcs.APPROX-RANDOM.2016.14,
author = {Makarychev, Konstantin and Makarychev, Yury and Sviridenko, Maxim and Ward, Justin},
title = {{A Bi-Criteria Approximation Algorithm for k-Means}},
booktitle = {Approximation, Randomization, and Combinatorial Optimization. Algorithms and Techniques (APPROX/RANDOM 2016)},
pages = {14:1--14:20},
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
ISBN = {978-3-95977-018-7},
ISSN = {1868-8969},
year = {2016},
volume = {60},
editor = {Jansen, Klaus and Mathieu, Claire and Rolim, Jos\'{e} D. P. and Umans, Chris},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.APPROX-RANDOM.2016.14},
URN = {urn:nbn:de:0030-drops-66370},
doi = {10.4230/LIPIcs.APPROX-RANDOM.2016.14},
annote = {Keywords: k-means clustering, bicriteria approximation algorithms, linear programming, local search}
}