Creative Commons Attribution 3.0 Unported license
We study the complexity of finding an optimal hierarchical clustering of an unweighted similarity graph under the recently introduced Dasgupta objective function. We introduce a proof technique, called the normalization procedure, that takes any such clustering of a graph G and iteratively improves it until a desired target clustering of G is reached. We use this technique to show both a negative and a positive complexity result. Firstly, we show that in general the problem is NP-complete. Secondly, we consider min-well-behaved graphs, which are graphs H having the property that for any k the graph H^{(k)} being the join of k copies of H has an optimal hierarchical clustering that splits each copy of H in the same optimal way. To optimally cluster such a graph H^{(k)} we thus only need to optimally cluster the smaller graph H. Co-bipartite graphs are min-well-behaved, but otherwise they seem to be scarce. We use the normalization procedure to show that also the cycle on 6 vertices is min-well-behaved.
@InProceedings{hgemo_et_al:LIPIcs.MFCS.2020.47,
author = {H{\o}gemo, Svein and Paul, Christophe and Telle, Jan Arne},
title = {{Hierarchical Clusterings of Unweighted Graphs}},
booktitle = {45th International Symposium on Mathematical Foundations of Computer Science (MFCS 2020)},
pages = {47:1--47:13},
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
ISBN = {978-3-95977-159-7},
ISSN = {1868-8969},
year = {2020},
volume = {170},
editor = {Esparza, Javier and Kr\'{a}l', Daniel},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.MFCS.2020.47},
URN = {urn:nbn:de:0030-drops-127139},
doi = {10.4230/LIPIcs.MFCS.2020.47},
annote = {Keywords: Hierarchical Clustering}
}