This work provides a simplified proof of the statistical minimax optimality of (iterate averaged) stochastic gradient descent (SGD), for the special case of least squares. This result is obtained by analyzing SGD as a stochastic process and by sharply characterizing the stationary covariance matrix of this process. The finite rate optimality characterization captures the constant factors and addresses model mis-specification.
@InProceedings{jain_et_al:LIPIcs.FSTTCS.2017.2, author = {Jain, Prateek and Kakade, Sham M. and Kidambi, Rahul and Netrapalli, Praneeth and Pillutla, Venkata Krishna and Sidford, Aaron}, title = {{A Markov Chain Theory Approach to Characterizing the Minimax Optimality of Stochastic Gradient Descent (for Least Squares)}}, booktitle = {37th IARCS Annual Conference on Foundations of Software Technology and Theoretical Computer Science (FSTTCS 2017)}, pages = {2:1--2:10}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-055-2}, ISSN = {1868-8969}, year = {2018}, volume = {93}, editor = {Lokam, Satya and Ramanujam, R.}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.FSTTCS.2017.2}, URN = {urn:nbn:de:0030-drops-83941}, doi = {10.4230/LIPIcs.FSTTCS.2017.2}, annote = {Keywords: Stochastic Gradient Descent, Minimax Optimality, Least Squares Regression} }
Feedback for Dagstuhl Publishing