We study countably infinite Markov decision processes with Büchi objectives, which ask to visit a given subset F of states infinitely often. A question left open by T.P. Hill in 1979 [Theodore Preston Hill, 1979] is whether there always exist epsilon-optimal Markov strategies, i.e., strategies that base decisions only on the current state and the number of steps taken so far. We provide a negative answer to this question by constructing a non-trivial counterexample. On the other hand, we show that Markov strategies with only 1 bit of extra memory are sufficient.
@InProceedings{kiefer_et_al:LIPIcs.ICALP.2019.119, author = {Kiefer, Stefan and Mayr, Richard and Shirmohammadi, Mahsa and Totzke, Patrick}, title = {{B\"{u}chi Objectives in Countable MDPs}}, booktitle = {46th International Colloquium on Automata, Languages, and Programming (ICALP 2019)}, pages = {119:1--119:14}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-109-2}, ISSN = {1868-8969}, year = {2019}, volume = {132}, editor = {Baier, Christel and Chatzigiannakis, Ioannis and Flocchini, Paola and Leonardi, Stefano}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.ICALP.2019.119}, URN = {urn:nbn:de:0030-drops-106959}, doi = {10.4230/LIPIcs.ICALP.2019.119}, annote = {Keywords: Markov decision processes} }
Feedback for Dagstuhl Publishing