The stochastic shortest path problem (SSPP) asks to resolve the non-deterministic choices in a Markov decision process (MDP) such that the expected accumulated weight before reaching a target state is maximized. This paper addresses the optimization of the variance-penalized expectation (VPE) of the accumulated weight, which is a variant of the SSPP in which a multiple of the variance of accumulated weights is incurred as a penalty. It is shown that the optimal VPE in MDPs with non-negative weights as well as an optimal deterministic finite-memory scheduler can be computed in exponential space. The threshold problem whether the maximal VPE exceeds a given rational is shown to be EXPTIME-hard and to lie in NEXPTIME. Furthermore, a result of interest in its own right obtained on the way is that a variance-minimal scheduler among all expectation-optimal schedulers can be computed in polynomial time.
@InProceedings{piribauer_et_al:LIPIcs.ICALP.2022.129, author = {Piribauer, Jakob and Sankur, Ocan and Baier, Christel}, title = {{The Variance-Penalized Stochastic Shortest Path Problem}}, booktitle = {49th International Colloquium on Automata, Languages, and Programming (ICALP 2022)}, pages = {129:1--129:19}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-235-8}, ISSN = {1868-8969}, year = {2022}, volume = {229}, editor = {Boja\'{n}czyk, Miko{\l}aj and Merelli, Emanuela and Woodruff, David P.}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.ICALP.2022.129}, URN = {urn:nbn:de:0030-drops-164705}, doi = {10.4230/LIPIcs.ICALP.2022.129}, annote = {Keywords: Markov decision process, variance, stochastic shortest path problem} }
Feedback for Dagstuhl Publishing