Published in: LIPIcs, Volume 237, 13th International Conference on Interactive Theorem Proving (ITP 2022)
Jared Yeager, J. Eliot B. Moss, Michael Norrish, and Philip S. Thomas. Mechanizing Soundness of Off-Policy Evaluation. In 13th International Conference on Interactive Theorem Proving (ITP 2022). Leibniz International Proceedings in Informatics (LIPIcs), Volume 237, pp. 32:1-32:20, Schloss Dagstuhl – Leibniz-Zentrum für Informatik (2022)
@InProceedings{yeager_et_al:LIPIcs.ITP.2022.32,
author = {Yeager, Jared and Moss, J. Eliot B. and Norrish, Michael and Thomas, Philip S.},
title = {{Mechanizing Soundness of Off-Policy Evaluation}},
booktitle = {13th International Conference on Interactive Theorem Proving (ITP 2022)},
pages = {32:1--32:20},
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
ISBN = {978-3-95977-252-5},
ISSN = {1868-8969},
year = {2022},
volume = {237},
editor = {Andronick, June and de Moura, Leonardo},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.ITP.2022.32},
URN = {urn:nbn:de:0030-drops-167413},
doi = {10.4230/LIPIcs.ITP.2022.32},
annote = {Keywords: Formal Methods, HOL4, Reinforcement Learning, Off-Policy Evaluation, Concentration Inequality, Hoeffding}
}