This report summarizes the outcomes of Dagstuhl Seminar 25142, which convened leading researchers and practitioners to address the pressing challenges in evaluating explainable artificial intelligence (XAI). The seminar focused on developing reusable experimental designs and robust evaluation frameworks that balance technical rigor with human-centered considerations. Key themes included the need for standardized metrics, the contextual relevance of evaluation criteria, and the integration of human understanding, trust, and reliance into assessment methodologies. Through a series of talks, collaborative discussions, and case studies across domains such as healthcare, hiring, and decision support, the seminar identified critical gaps in current XAI evaluation practices and proposed actionable strategies to bridge them. The report presents a refined taxonomy of evaluation criteria, practical guidance for experimental design, and a roadmap for future interdisciplinary collaboration in responsible and transparent AI development.
@Article{stumpf_et_al:DagRep.15.3.201, author = {Stumpf, Simone and Teso, Stefano and Daly, Elizabeth M.}, title = {{Explainability in Focus: Advancing Evaluation through Reusable Experiment Design (Dagstuhl Seminar 25142)}}, pages = {201--224}, journal = {Dagstuhl Reports}, ISSN = {2192-5283}, year = {2025}, volume = {15}, number = {3}, editor = {Stumpf, Simone and Teso, Stefano and Daly, Elizabeth M.}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/DagRep.15.3.201}, URN = {urn:nbn:de:0030-drops-248935}, doi = {10.4230/DagRep.15.3.201}, annote = {Keywords: Explainability, Mental Models, interactive machine learning, Experiment Design, Human-centered AI Dagstuhl Seminar} }