Data structures for efficient sampling from a set of weighted items are an important building block of many applications. However, few parallel solutions are known. We close many of these gaps both for shared-memory and distributed-memory machines. We give efficient, fast, and practicable algorithms for sampling single items, k items with/without replacement, permutations, subsets, and reservoirs. We also give improved sequential algorithms for alias table construction and for sampling with replacement. Experiments on shared-memory parallel machines with up to 158 threads show near linear speedups both for construction and queries.
@InProceedings{hubschleschneider_et_al:LIPIcs.ESA.2019.59, author = {H\"{u}bschle-Schneider, Lorenz and Sanders, Peter}, title = {{Parallel Weighted Random Sampling}}, booktitle = {27th Annual European Symposium on Algorithms (ESA 2019)}, pages = {59:1--59:24}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-124-5}, ISSN = {1868-8969}, year = {2019}, volume = {144}, editor = {Bender, Michael A. and Svensson, Ola and Herman, Grzegorz}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.ESA.2019.59}, URN = {urn:nbn:de:0030-drops-111800}, doi = {10.4230/LIPIcs.ESA.2019.59}, annote = {Keywords: categorical distribution, multinoulli distribution, parallel algorithm, alias method, PRAM, communication efficient algorithm, subset sampling, reservoir sampling} }
Feedback for Dagstuhl Publishing