In PODS'17, Ketsman and Suciu gave an algorithm in the MPC model for computing the result of any natural join where every input relation has two attributes. Achieving an optimal load O(m/p^{1/ρ}) - where m is the total size of the input relations, p the number of machines, and ρ the fractional edge covering number of the join - their algorithm requires 7 rounds to finish. This paper presents a simpler algorithm that ensures the same load with 3 rounds (in fact, the second round incurs only a load of O(p²) to transmit certain statistics to assist machine allocation in the last round). Our algorithm is made possible by a new theorem that provides fresh insight on the structure of the problem, and brings us closer to understanding the intrinsic reason why joins on binary relations can be settled with load O(m/p^{1/ρ}).
@InProceedings{tao:LIPIcs.ICDT.2020.25, author = {Tao, Yufei}, title = {{A Simple Parallel Algorithm for Natural Joins on Binary Relations}}, booktitle = {23rd International Conference on Database Theory (ICDT 2020)}, pages = {25:1--25:18}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-139-9}, ISSN = {1868-8969}, year = {2020}, volume = {155}, editor = {Lutz, Carsten and Jung, Jean Christoph}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.ICDT.2020.25}, URN = {urn:nbn:de:0030-drops-119495}, doi = {10.4230/LIPIcs.ICDT.2020.25}, annote = {Keywords: Natural Joins, Conjunctive Queries, MPC Algorithms, Parallel Computing} }
Feedback for Dagstuhl Publishing