,
Ocan Sankur
,
Christel Baier
Creative Commons Attribution 4.0 International license
The stochastic shortest path problem (SSPP) asks to resolve the non-deterministic choices in a Markov decision process (MDP) such that the expected accumulated weight before reaching a target state is maximized. This paper addresses the optimization of the variance-penalized expectation (VPE) of the accumulated weight, which is a variant of the SSPP in which a multiple of the variance of accumulated weights is incurred as a penalty. It is shown that the optimal VPE in MDPs with non-negative weights as well as an optimal deterministic finite-memory scheduler can be computed in exponential space. The threshold problem whether the maximal VPE exceeds a given rational is shown to be EXPTIME-hard and to lie in NEXPTIME. Furthermore, a result of interest in its own right obtained on the way is that a variance-minimal scheduler among all expectation-optimal schedulers can be computed in polynomial time.
@InProceedings{piribauer_et_al:LIPIcs.ICALP.2022.129,
author = {Piribauer, Jakob and Sankur, Ocan and Baier, Christel},
title = {{The Variance-Penalized Stochastic Shortest Path Problem}},
booktitle = {49th International Colloquium on Automata, Languages, and Programming (ICALP 2022)},
pages = {129:1--129:19},
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
ISBN = {978-3-95977-235-8},
ISSN = {1868-8969},
year = {2022},
volume = {229},
editor = {Boja\'{n}czyk, Miko{\l}aj and Merelli, Emanuela and Woodruff, David P.},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.ICALP.2022.129},
URN = {urn:nbn:de:0030-drops-164705},
doi = {10.4230/LIPIcs.ICALP.2022.129},
annote = {Keywords: Markov decision process, variance, stochastic shortest path problem}
}