Creative Commons Attribution 3.0 Unported license
Machine learning has been widely adopted in diverse science and engineering domains, aided by reusable libraries and quick development patterns. The TensorFlow library is probably the best-known representative of this trend and most users employ the Python API to its powerful back-end. TensorFlow programs are susceptible to several systematic errors, especially in the dynamic typing setting of Python. We present Pythia, a static analysis that tracks the shapes of tensors across Python library calls and warns of several possible mismatches. The key technical aspects are a close modeling of library semantics with respect to tensor shape, and an identification of violations and error-prone patterns. Pythia is powerful enough to statically detect (with 84.62% precision) 11 of the 14 shape-related TensorFlow bugs in the recent Zhang et al. empirical study - an independent slice of real-world bugs.
@InProceedings{lagouvardos_et_al:LIPIcs.ECOOP.2020.15,
author = {Lagouvardos, Sifis and Dolby, Julian and Grech, Neville and Antoniadis, Anastasios and Smaragdakis, Yannis},
title = {{Static Analysis of Shape in TensorFlow Programs}},
booktitle = {34th European Conference on Object-Oriented Programming (ECOOP 2020)},
pages = {15:1--15:29},
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
ISBN = {978-3-95977-154-2},
ISSN = {1868-8969},
year = {2020},
volume = {166},
editor = {Hirschfeld, Robert and Pape, Tobias},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.ECOOP.2020.15},
URN = {urn:nbn:de:0030-drops-131726},
doi = {10.4230/LIPIcs.ECOOP.2020.15},
annote = {Keywords: Python, TensorFlow, static analysis, Doop, Wala}
}