,
Monika Henzinger
,
David Saulpic
Creative Commons Attribution 4.0 International license
We study in this paper the problem of maintaining a solution to k-median and k-means clustering in a fully dynamic setting. To do so, we present an algorithm to efficiently maintain a coreset, a compressed version of the dataset, that allows easy computation of a clustering solution at query time. Our coreset algorithm has near-optimal update time of Õ(k) in general metric spaces, which reduces to Õ(d) in the Euclidean space ℝ^d. The query time is O(k²) in general metrics, and O(kd) in ℝ^d. To maintain a constant-factor approximation for k-median and k-means clustering in Euclidean space, this directly leads to an algorithm with update time Õ(d), and query time Õ(kd + k²). To maintain a O(polylog k)-approximation, the query time is reduced to Õ(kd).
@InProceedings{latour_et_al:LIPIcs.ESA.2024.100,
author = {la Tour, Max Dupr\'{e} and Henzinger, Monika and Saulpic, David},
title = {{Fully Dynamic k-Means Coreset in Near-Optimal Update Time}},
booktitle = {32nd Annual European Symposium on Algorithms (ESA 2024)},
pages = {100:1--100:16},
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
ISBN = {978-3-95977-338-6},
ISSN = {1868-8969},
year = {2024},
volume = {308},
editor = {Chan, Timothy and Fischer, Johannes and Iacono, John and Herman, Grzegorz},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.ESA.2024.100},
URN = {urn:nbn:de:0030-drops-211716},
doi = {10.4230/LIPIcs.ESA.2024.100},
annote = {Keywords: clustering, fully-dynamic, coreset, k-means}
}