,
Stefano Cavazzi
Creative Commons Attribution 4.0 International license
The concept of uniqueness can play an important role when the assessment of an observation’s distinctiveness is essential. This article introduces a distance-based uniqueness measure that quantifies the relative rarity or commonness of a multi-variate observation within a dataset. Unique observations exhibit rare combinations of values, and not necessarily extreme values. Taking a cognitive psychological perspective, our measure defines uniqueness as the sum of distances between a target observation and all other observations. After presenting the measure u and its corresponding standardised version u_z, we propose a method to calculate a p value through a probability density function. We then demonstrate the measure’s behaviour in a case study on the uniqueness of Greater London boroughs, based on real-world socioeconomic variables. This initial investigation indicates that u can support exploratory data analysis.
@InProceedings{ballatore_et_al:LIPIcs.GIScience.2023.15,
author = {Ballatore, Andrea and Cavazzi, Stefano},
title = {{Why Is Greenwich so Common? Quantifying the Uniqueness of Multivariate Observations}},
booktitle = {12th International Conference on Geographic Information Science (GIScience 2023)},
pages = {15:1--15:6},
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
ISBN = {978-3-95977-288-4},
ISSN = {1868-8969},
year = {2023},
volume = {277},
editor = {Beecham, Roger and Long, Jed A. and Smith, Dianna and Zhao, Qunshan and Wise, Sarah},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.GIScience.2023.15},
URN = {urn:nbn:de:0030-drops-189109},
doi = {10.4230/LIPIcs.GIScience.2023.15},
annote = {Keywords: uniqueness, distinctiveness, similarity, outlier detection, multivariate data}
}