The concept of uniqueness can play an important role when the assessment of an observation’s distinctiveness is essential. This article introduces a distance-based uniqueness measure that quantifies the relative rarity or commonness of a multi-variate observation within a dataset. Unique observations exhibit rare combinations of values, and not necessarily extreme values. Taking a cognitive psychological perspective, our measure defines uniqueness as the sum of distances between a target observation and all other observations. After presenting the measure u and its corresponding standardised version u_z, we propose a method to calculate a p value through a probability density function. We then demonstrate the measure’s behaviour in a case study on the uniqueness of Greater London boroughs, based on real-world socioeconomic variables. This initial investigation indicates that u can support exploratory data analysis.
@InProceedings{ballatore_et_al:LIPIcs.GIScience.2023.15, author = {Ballatore, Andrea and Cavazzi, Stefano}, title = {{Why Is Greenwich so Common? Quantifying the Uniqueness of Multivariate Observations}}, booktitle = {12th International Conference on Geographic Information Science (GIScience 2023)}, pages = {15:1--15:6}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-288-4}, ISSN = {1868-8969}, year = {2023}, volume = {277}, editor = {Beecham, Roger and Long, Jed A. and Smith, Dianna and Zhao, Qunshan and Wise, Sarah}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.GIScience.2023.15}, URN = {urn:nbn:de:0030-drops-189109}, doi = {10.4230/LIPIcs.GIScience.2023.15}, annote = {Keywords: uniqueness, distinctiveness, similarity, outlier detection, multivariate data} }
Feedback for Dagstuhl Publishing