Creative Commons Attribution 3.0 Unported license
We evaluate the folk wisdom that algorithmic decision rules trained on data produced by biased human decision-makers necessarily reflect this bias. We consider a setting where training labels are only generated if a biased decision-maker takes a particular action, and so "biased" training data arise due to discriminatory selection into the training data. In our baseline model, the more biased the decision-maker is against a group, the more the algorithmic decision rule favors that group. We refer to this phenomenon as bias reversal. We then clarify the conditions that give rise to bias reversal. Whether a prediction algorithm reverses or inherits bias depends critically on how the decision-maker affects the training data as well as the label used in training. We illustrate our main theoretical results in a simulation study applied to the New York City Stop, Question and Frisk dataset.
@InProceedings{rambachan_et_al:LIPIcs.FORC.2020.6,
author = {Rambachan, Ashesh and Roth, Jonathan},
title = {{Bias In, Bias Out? Evaluating the Folk Wisdom}},
booktitle = {1st Symposium on Foundations of Responsible Computing (FORC 2020)},
pages = {6:1--6:15},
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
ISBN = {978-3-95977-142-9},
ISSN = {1868-8969},
year = {2020},
volume = {156},
editor = {Roth, Aaron},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.FORC.2020.6},
URN = {urn:nbn:de:0030-drops-120225},
doi = {10.4230/LIPIcs.FORC.2020.6},
annote = {Keywords: fairness, selective labels, discrimination, training data}
}