The center of mass of a point set lying on a manifold generalizes the celebrated Euclidean centroid, and is ubiquitous in statistical analysis in non Euclidean spaces. In this work, we give a complete characterization of the weighted p-mean of a finite set of angular values on S¹, based on a decomposition of S¹ such that the functional of interest has at most one local minimum per cell. This characterization is used to show that the problem is decidable for rational angular values -a consequence of Lindemann’s theorem on the transcendence of π, and to develop an effective algorithm parameterized by exact predicates. A robust implementation of this algorithm based on multi-precision interval arithmetic is also presented, and is shown to be effective for large values of n and p. We use it as building block to implement the k-means and k-means++ clustering algorithms on the flat torus, with applications to clustering protein molecular conformations. These algorithms are available in the Structural Bioinformatics Library (http://sbl.inria.fr). Our derivations are of interest in two respects. First, efficient p-mean calculations are relevant to develop principal components analysis on the flat torus encoding angular spaces-a particularly important case to describe molecular conformations. Second, our two-stage strategy stresses the interest of combinatorial methods for p-means, also emphasizing the role of numerical issues.
@InProceedings{cazals_et_al:LIPIcs.SEA.2021.15, author = {Cazals, Fr\'{e}d\'{e}ric and Delmas, Bernard and O'Donnell, Timothee}, title = {{Fr\'{e}chet Mean and p-Mean on the Unit Circle: Decidability, Algorithm, and Applications to Clustering on the Flat Torus}}, booktitle = {19th International Symposium on Experimental Algorithms (SEA 2021)}, pages = {15:1--15:16}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-185-6}, ISSN = {1868-8969}, year = {2021}, volume = {190}, editor = {Coudert, David and Natale, Emanuele}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.SEA.2021.15}, URN = {urn:nbn:de:0030-drops-137870}, doi = {10.4230/LIPIcs.SEA.2021.15}, annote = {Keywords: Frech\'{e}t mean, p-mean, circular statistics, decidability, robustness, multi-precision, angular spaces, flat torus, clustering, molecular conformations} }
Feedback for Dagstuhl Publishing