Creative Commons Attribution 4.0 International license
We study the problem of learning junta distributions on {±1}ⁿ, where a distribution is a k-junta if its probability mass function depends on a subset of at most k variables. We make two main contributions:
- We show that learning k-junta distributions is computationally equivalent to learning k-parity functions with noise (LPN), a landmark problem in computational learning theory.
- We design an algorithm for learning junta distributions whose statistical complexity is optimal, up to polylogarithmic factors. Computationally, our algorithm matches the complexity of previous (non-sample-optimal) algorithms. Combined, our two contributions imply that our algorithm cannot be significantly improved, statistically or computationally, barring a breakthrough for LPN.
@InProceedings{beretta:LIPIcs.APPROX/RANDOM.2025.31,
author = {Beretta, Lorenzo},
title = {{New Statistical and Computational Results for Learning Junta Distributions}},
booktitle = {Approximation, Randomization, and Combinatorial Optimization. Algorithms and Techniques (APPROX/RANDOM 2025)},
pages = {31:1--31:23},
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
ISBN = {978-3-95977-397-3},
ISSN = {1868-8969},
year = {2025},
volume = {353},
editor = {Ene, Alina and Chattopadhyay, Eshan},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.APPROX/RANDOM.2025.31},
URN = {urn:nbn:de:0030-drops-243978},
doi = {10.4230/LIPIcs.APPROX/RANDOM.2025.31},
annote = {Keywords: Junta Distributions, Learning Parities with Noise}
}