Probabilistic programming languages are used for developing statistical models, and they typically consist of two components: a specification of a stochastic process (the prior), and a specification of observations that restrict the probability space to a conditional subspace (the posterior). Use cases of such formalisms include the development of algorithms in machine learning and artificial intelligence. We propose and investigate an extension of Datalog for specifying statistical models, and establish a declarative probabilistic-programming paradigm over databases. Our proposed extension provides convenient mechanisms to include common numerical probability functions; in particular, conclusions of rules may contain values drawn from such functions. The semantics of a program is a probability distribution over the possible outcomes of the input database with respect to the program. Observations are naturally incorporated by means of integrity constraints over the extensional and intensional relations. The resulting semantics is robust under different chases and invariant to rewritings that preserve logical equivalence.
@InProceedings{barany_et_al:LIPIcs.ICDT.2016.7, author = {Barany, Vince and ten Cate, Balder and Kimelfeld, Benny and Olteanu, Dan and Vagena, Zografoula}, title = {{Declarative Probabilistic Programming with Datalog}}, booktitle = {19th International Conference on Database Theory (ICDT 2016)}, pages = {7:1--7:19}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-002-6}, ISSN = {1868-8969}, year = {2016}, volume = {48}, editor = {Martens, Wim and Zeume, Thomas}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.ICDT.2016.7}, URN = {urn:nbn:de:0030-drops-57761}, doi = {10.4230/LIPIcs.ICDT.2016.7}, annote = {Keywords: Chase, Datalog, probability measure space, probabilistic programming} }
Feedback for Dagstuhl Publishing