We describe a novel way to represent the probability distribution of a random binary string as a mixture having a maximally weighted component associated with independent (though not necessarily identically distributed) Bernoulli characters. We refer to this as the latent independent weight of the probabilistic source producing the string, and derive a combinatorial algorithm to compute it. The decomposition we propose may serve as an alternative to the Boolean paradigm of hypothesis testing, or to assess the fraction of uncorrupted samples originating from a source with independent marginal distributions. In this sense, the latent independent weight quantifies the maximal amount of independence contained within a probabilistic source, which, properly speaking, may not have independent marginal distributions.
@InProceedings{pearson_et_al:LIPIcs.AofA.2020.23, author = {Pearson, Antony and Lladser, Manuel E.}, title = {{Hidden Independence in Unstructured Probabilistic Models}}, booktitle = {31st International Conference on Probabilistic, Combinatorial and Asymptotic Methods for the Analysis of Algorithms (AofA 2020)}, pages = {23:1--23:13}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-147-4}, ISSN = {1868-8969}, year = {2020}, volume = {159}, editor = {Drmota, Michael and Heuberger, Clemens}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.AofA.2020.23}, URN = {urn:nbn:de:0030-drops-120538}, doi = {10.4230/LIPIcs.AofA.2020.23}, annote = {Keywords: Bayesian networks, contamination, latent weights, mixture models, independence, symbolic data} }
Feedback for Dagstuhl Publishing