We consider the classical k-means clustering problem in the setting of bi-criteria approximation, in which an algorithm is allowed to output beta*k > k clusters, and must produce a clustering with cost at most alpha times the to the cost of the optimal set of k clusters. We argue that this approach is natural in many settings, for which the exact number of clusters is a priori unknown, or unimportant up to a constant factor. We give new bi-criteria approximation algorithms, based on linear programming and local search, respectively, which attain a guarantee alpha(beta) depending on the number beta*k of clusters that may be opened. Our guarantee alpha(beta) is always at most 9 + epsilon and improves rapidly with beta (for example: alpha(2) < 2.59, and alpha(3) < 1.4). Moreover, our algorithms have only polynomial dependence on the dimension of the input data, and so are applicable in high-dimensional settings.
@InProceedings{makarychev_et_al:LIPIcs.APPROX-RANDOM.2016.14, author = {Makarychev, Konstantin and Makarychev, Yury and Sviridenko, Maxim and Ward, Justin}, title = {{A Bi-Criteria Approximation Algorithm for k-Means}}, booktitle = {Approximation, Randomization, and Combinatorial Optimization. Algorithms and Techniques (APPROX/RANDOM 2016)}, pages = {14:1--14:20}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-018-7}, ISSN = {1868-8969}, year = {2016}, volume = {60}, editor = {Jansen, Klaus and Mathieu, Claire and Rolim, Jos\'{e} D. P. and Umans, Chris}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.APPROX-RANDOM.2016.14}, URN = {urn:nbn:de:0030-drops-66370}, doi = {10.4230/LIPIcs.APPROX-RANDOM.2016.14}, annote = {Keywords: k-means clustering, bicriteria approximation algorithms, linear programming, local search} }
Feedback for Dagstuhl Publishing