We study in this paper the problem of maintaining a solution to k-median and k-means clustering in a fully dynamic setting. To do so, we present an algorithm to efficiently maintain a coreset, a compressed version of the dataset, that allows easy computation of a clustering solution at query time. Our coreset algorithm has near-optimal update time of Õ(k) in general metric spaces, which reduces to Õ(d) in the Euclidean space ℝ^d. The query time is O(k²) in general metrics, and O(kd) in ℝ^d. To maintain a constant-factor approximation for k-median and k-means clustering in Euclidean space, this directly leads to an algorithm with update time Õ(d), and query time Õ(kd + k²). To maintain a O(polylog k)-approximation, the query time is reduced to Õ(kd).
@InProceedings{latour_et_al:LIPIcs.ESA.2024.100, author = {la Tour, Max Dupr\'{e} and Henzinger, Monika and Saulpic, David}, title = {{Fully Dynamic k-Means Coreset in Near-Optimal Update Time}}, booktitle = {32nd Annual European Symposium on Algorithms (ESA 2024)}, pages = {100:1--100:16}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-338-6}, ISSN = {1868-8969}, year = {2024}, volume = {308}, editor = {Chan, Timothy and Fischer, Johannes and Iacono, John and Herman, Grzegorz}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.ESA.2024.100}, URN = {urn:nbn:de:0030-drops-211716}, doi = {10.4230/LIPIcs.ESA.2024.100}, annote = {Keywords: clustering, fully-dynamic, coreset, k-means} }
Feedback for Dagstuhl Publishing