String sorting is an important part of tasks such as building index data structures. Unfortunately, current string sorting algorithms do not scale to massively parallel distributed-memory machines since they either have latency (at least) proportional to the number of processors p or communicate the data a large number of times (at least logarithmic). We present practical and efficient algorithms for distributed-memory string sorting that scale to large p. Similar to state-of-the-art sorters for atomic objects, the algorithms have latency of about p^{1/k} when allowing the data to be communicated k times. Experiments indicate good scaling behavior on a wide range of inputs on up to 49152 cores. Overall, we achieve speedups of up to 4.9 over the current state-of-the-art distributed string sorting algorithms.
@InProceedings{kurpicz_et_al:LIPIcs.ESA.2024.83, author = {Kurpicz, Florian and Mehnert, Pascal and Sanders, Peter and Schimek, Matthias}, title = {{Scalable Distributed String Sorting}}, booktitle = {32nd Annual European Symposium on Algorithms (ESA 2024)}, pages = {83:1--83:17}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-338-6}, ISSN = {1868-8969}, year = {2024}, volume = {308}, editor = {Chan, Timothy and Fischer, Johannes and Iacono, John and Herman, Grzegorz}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.ESA.2024.83}, URN = {urn:nbn:de:0030-drops-211541}, doi = {10.4230/LIPIcs.ESA.2024.83}, annote = {Keywords: sorting, strings, distributed-memory computing, distributed membership filters, scalability} }
Feedback for Dagstuhl Publishing