@InProceedings{yeager_et_al:LIPIcs.ITP.2022.32, author = {Yeager, Jared and Moss, J. Eliot B. and Norrish, Michael and Thomas, Philip S.}, title = {{Mechanizing Soundness of Off-Policy Evaluation}}, booktitle = {13th International Conference on Interactive Theorem Proving (ITP 2022)}, pages = {32:1--32:20}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-252-5}, ISSN = {1868-8969}, year = {2022}, volume = {237}, editor = {Andronick, June and de Moura, Leonardo}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.ITP.2022.32}, URN = {urn:nbn:de:0030-drops-167413}, doi = {10.4230/LIPIcs.ITP.2022.32}, annote = {Keywords: Formal Methods, HOL4, Reinforcement Learning, Off-Policy Evaluation, Concentration Inequality, Hoeffding} }