This paper studies grading algorithms for randomized exams. In a randomized exam, each student is asked a small number of random questions from a large question bank. The predominant grading rule is simple averaging, i.e., calculating grades by averaging scores on the questions each student is asked, which is fair ex-ante, over the randomized questions, but not fair ex-post, on the realized questions. The fair grading problem is to estimate the average grade of each student on the full question bank. The maximum-likelihood estimator for the Bradley-Terry-Luce model on the bipartite student-question graph is shown to be consistent with high probability when the number of questions asked to each student is at least the cubed-logarithm of the number of students. In an empirical study on exam data and in simulations, our algorithm based on the maximum-likelihood estimator significantly outperforms simple averaging in prediction accuracy and ex-post fairness even with a small class and exam size.
@InProceedings{chen_et_al:LIPIcs.FORC.2023.7, author = {Chen, Jiale and Hartline, Jason and Zoeter, Onno}, title = {{Fair Grading Algorithms for Randomized Exams}}, booktitle = {4th Symposium on Foundations of Responsible Computing (FORC 2023)}, pages = {7:1--7:22}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-272-3}, ISSN = {1868-8969}, year = {2023}, volume = {256}, editor = {Talwar, Kunal}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.FORC.2023.7}, URN = {urn:nbn:de:0030-drops-179282}, doi = {10.4230/LIPIcs.FORC.2023.7}, annote = {Keywords: Ex-ante and Ex-post Fairness, Item Response Theory, Algorithmic Fairness in Education} }
Feedback for Dagstuhl Publishing