We consider multiple-environment Markov decision processes (MEMDP), which consist of a finite set of MDPs over the same state space, representing different scenarios of transition structure and probability. The value of a strategy is the probability to satisfy the objective, here a parity objective, in the worst-case scenario, and the value of an MEMDP is the supremum of the values achievable by a strategy. We show that deciding whether the value is 1 is a PSPACE-complete problem, and even in P when the number of environments is fixed, along with new insights to the almost-sure winning problem, which is to decide if there exists a strategy with value 1. Pure strategies are sufficient for theses problems, whereas randomization is necessary in general when the value is smaller than 1. We present an algorithm to approximate the value, running in double exponential space. Our results are in contrast to the related model of partially-observable MDPs where all these problems are known to be undecidable.
@InProceedings{chatterjee_et_al:LIPIcs.ICALP.2025.150, author = {Chatterjee, Krishnendu and Doyen, Laurent and Raskin, Jean-Fran\c{c}ois and Sankur, Ocan}, title = {{The Value Problem for Multiple-Environment MDPs with Parity Objective}}, booktitle = {52nd International Colloquium on Automata, Languages, and Programming (ICALP 2025)}, pages = {150:1--150:17}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-372-0}, ISSN = {1868-8969}, year = {2025}, volume = {334}, editor = {Censor-Hillel, Keren and Grandoni, Fabrizio and Ouaknine, Jo\"{e}l and Puppis, Gabriele}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.ICALP.2025.150}, URN = {urn:nbn:de:0030-drops-235272}, doi = {10.4230/LIPIcs.ICALP.2025.150}, annote = {Keywords: Markov decision processes, imperfect information, randomized strategies, limit-sure winning} }
Feedback for Dagstuhl Publishing