@InProceedings{kretinsky_et_al:LIPIcs.CONCUR.2018.8, author = {Kret{\'\i}nsk\'{y}, Jan and P\'{e}rez, Guillermo A. and Raskin, Jean-Fran\c{c}ois}, title = {{Learning-Based Mean-Payoff Optimization in an Unknown MDP under Omega-Regular Constraints}}, booktitle = {29th International Conference on Concurrency Theory (CONCUR 2018)}, pages = {8:1--8:18}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-087-3}, ISSN = {1868-8969}, year = {2018}, volume = {118}, editor = {Schewe, Sven and Zhang, Lijun}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.CONCUR.2018.8}, URN = {urn:nbn:de:0030-drops-95468}, doi = {10.4230/LIPIcs.CONCUR.2018.8}, annote = {Keywords: Markov decision processes, Reinforcement learning, Beyond worst case} }