The selection problem, in forms such as finding the median or choosing the k top ranked items in a dataset, is a core task in computing with numerous applications in fields as diverse as statistics, databases, Machine Learning, finance, biology, and graphics. The selection algorithm Median of Medians, although a landmark theoretical achievement, is seldom used in practice because it is slower than simple approaches based on sampling. The main contribution of this paper is a fast linear-time deterministic selection algorithm MedianOfNinthers based on a refined definition of MedianOfMedians. A complementary algorithm MedianOfExtrema is also proposed. These algorithms work together to solve the selection problem in guaranteed linear time, faster than state-of-the-art baselines, and without resorting to randomization, heuristics, or fallback approaches for pathological cases. We demonstrate results on uniformly distributed random numbers, typical low-entropy artificial datasets, and real-world data. Measurements are open-sourced alongside the implementation at https://github.com/andralex/MedianOfNinthers.
@InProceedings{alexandrescu:LIPIcs.SEA.2017.24, author = {Alexandrescu, Andrei}, title = {{Fast Deterministic Selection}}, booktitle = {16th International Symposium on Experimental Algorithms (SEA 2017)}, pages = {24:1--24:19}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-036-1}, ISSN = {1868-8969}, year = {2017}, volume = {75}, editor = {Iliopoulos, Costas S. and Pissis, Solon P. and Puglisi, Simon J. and Raman, Rajeev}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.SEA.2017.24}, URN = {urn:nbn:de:0030-drops-76122}, doi = {10.4230/LIPIcs.SEA.2017.24}, annote = {Keywords: Selection Problem, Quickselect, Median of Medians, Algorithm Engineering, Algorithmic Libraries} }
Feedback for Dagstuhl Publishing