Creative Commons Attribution 4.0 International license
We identify a fundamental incompatibility between the goals of accuracy, trust, and human-level reasoning in artificial intelligence (AI) systems, for strict mathematical definitions of these notions. We define accuracy of a system as the property that it never makes any false claims when it has the ability to abstain from making a prediction on any input, and trust as the assumption that the system is accurate. We define human-level reasoning as the property of an AI system always matching or exceeding human capability. Our core finding is that - for our formal definitions of these notions - an accurate and trusted AI system cannot be a human-level reasoning system: for such an accurate, trusted system there are task instances which are easily and provably solvable by a human but not by the system. Our proofs draw parallels to Gödel’s incompleteness theorems and Turing’s proof of the undecidability of the halting problem, and can be regarded as interpretations of Gödel’s and Turing’s results. Key to our proof is the formalization of the notion of trust, which allows us to separate the intrinsic property of a system (being accurate) from its epistemic status (being trusted).
@InProceedings{panigrahy_et_al:LIPIcs.FORC.2026.11,
author = {Panigrahy, Rina and Sharan, Vatsal},
title = {{Limitations on Accurate, Trusted, Human-Level Reasoning}},
booktitle = {7th Symposium on Foundations of Responsible Computing (FORC 2026)},
pages = {11:1--11:21},
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
ISBN = {978-3-95977-419-2},
ISSN = {1868-8969},
year = {2026},
volume = {368},
editor = {Lin, Huijia (Rachel)},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.FORC.2026.11},
URN = {urn:nbn:de:0030-drops-259840},
doi = {10.4230/LIPIcs.FORC.2026.11},
annote = {Keywords: Accuracy, Safety, Trust, Complexity-theoretic limitations}
}