Creative Commons Attribution 3.0 Unported license
The selection problem, in forms such as finding the median or choosing the k top ranked items in a dataset, is a core task in computing with numerous applications in fields as diverse as statistics, databases, Machine Learning, finance, biology, and graphics. The selection algorithm Median of Medians, although a landmark theoretical achievement, is seldom used in practice because it is slower than simple approaches based on sampling. The main contribution of this paper is a fast linear-time deterministic selection algorithm MedianOfNinthers based on a refined definition of MedianOfMedians. A complementary algorithm MedianOfExtrema is also proposed. These algorithms work together to solve the selection problem in guaranteed linear time, faster than state-of-the-art baselines, and without resorting to randomization, heuristics, or fallback approaches for pathological cases. We demonstrate results on uniformly distributed random numbers, typical low-entropy artificial datasets, and real-world data. Measurements are open-sourced alongside the implementation at https://github.com/andralex/MedianOfNinthers.
@InProceedings{alexandrescu:LIPIcs.SEA.2017.24,
author = {Alexandrescu, Andrei},
title = {{Fast Deterministic Selection}},
booktitle = {16th International Symposium on Experimental Algorithms (SEA 2017)},
pages = {24:1--24:19},
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
ISBN = {978-3-95977-036-1},
ISSN = {1868-8969},
year = {2017},
volume = {75},
editor = {Iliopoulos, Costas S. and Pissis, Solon P. and Puglisi, Simon J. and Raman, Rajeev},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.SEA.2017.24},
URN = {urn:nbn:de:0030-drops-76122},
doi = {10.4230/LIPIcs.SEA.2017.24},
annote = {Keywords: Selection Problem, Quickselect, Median of Medians, Algorithm Engineering, Algorithmic Libraries}
}