@InProceedings{yeager_et_al:LIPIcs.ITP.2022.32,
author = {Yeager, Jared and Moss, J. Eliot B. and Norrish, Michael and Thomas, Philip S.},
title = {{Mechanizing Soundness of Off-Policy Evaluation}},
booktitle = {13th International Conference on Interactive Theorem Proving (ITP 2022)},
pages = {32:1--32:20},
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
ISBN = {978-3-95977-252-5},
ISSN = {1868-8969},
year = {2022},
volume = {237},
editor = {Andronick, June and de Moura, Leonardo},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.ITP.2022.32},
URN = {urn:nbn:de:0030-drops-167413},
doi = {10.4230/LIPIcs.ITP.2022.32},
annote = {Keywords: Formal Methods, HOL4, Reinforcement Learning, Off-Policy Evaluation, Concentration Inequality, Hoeffding}
}