We study the problem of k-means clustering in the space of straight-line segments in ℝ² under the Hausdorff distance. For this problem, we give a (1+ε)-approximation algorithm that, for an input of n segments, for any fixed k, and with constant success probability, runs in time O(n + ε^{-O(k)} + ε^{-O(k)} ⋅ log^O(k) (ε^{-1})). The algorithm has two main ingredients. Firstly, we express the k-means objective in our metric space as a sum of algebraic functions and use the optimization technique of Vigneron [Antoine Vigneron, 2014] to approximate its minimum. Secondly, we reduce the input size by computing a small size coreset using the sensitivity-based sampling framework by Feldman and Langberg [Dan Feldman and Michael Langberg, 2011; Feldman et al., 2020]. Our results can be extended to polylines of constant complexity with a running time of O(n + ε^{-O(k)}).
@InProceedings{cabello_et_al:LIPIcs.ESA.2023.28, author = {Cabello, Sergio and Giannopoulos, Panos}, title = {{On k-Means for Segments and Polylines}}, booktitle = {31st Annual European Symposium on Algorithms (ESA 2023)}, pages = {28:1--28:14}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-295-2}, ISSN = {1868-8969}, year = {2023}, volume = {274}, editor = {G{\o}rtz, Inge Li and Farach-Colton, Martin and Puglisi, Simon J. and Herman, Grzegorz}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.ESA.2023.28}, URN = {urn:nbn:de:0030-drops-186812}, doi = {10.4230/LIPIcs.ESA.2023.28}, annote = {Keywords: k-means clustering, segments, polylines, Hausdorff distance, Fr\'{e}chet mean} }
Feedback for Dagstuhl Publishing