,
Reinhard Pichler
,
Alexander Selzer
Creative Commons Attribution 4.0 International license
Aggregate queries often require computing large intermediate joins despite producing only small outputs. We identify broad classes of acyclic aggregate queries that can be evaluated without materialising any join results, using a bottom-up, semi-join–based propagation of cardinalities and partial aggregates. An implementation in Spark SQL shows that this approach is widely applicable and yields substantial performance gains on standard benchmarks.
@InProceedings{lanzinger_et_al:LIPIcs.ICDT.2026.24,
author = {Lanzinger, Matthias and Pichler, Reinhard and Selzer, Alexander},
title = {{Database Theory in Action: Evaluation of Aggregate Queries Without Materialisation}},
booktitle = {29th International Conference on Database Theory (ICDT 2026)},
pages = {24:1--24:5},
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
ISBN = {978-3-95977-413-0},
ISSN = {1868-8969},
year = {2026},
volume = {365},
editor = {ten Cate, Balder and Funk, Maurice},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.ICDT.2026.24},
URN = {urn:nbn:de:0030-drops-256380},
doi = {10.4230/LIPIcs.ICDT.2026.24},
annote = {Keywords: Join Processing, Aggregate Queries, Acyclic Conjunctive Queries}
}
archived version