We consider the problem of principal component analysis (PCA) in the presence of outliers. Given a matrix A (d x n) and parameters k, m, the goal is to remove a set of at most m columns of A (outliers), so as to minimize the rank-k approximation error of the remaining matrix (inliers). While much of the work on this problem has focused on recovery of the rank-k subspace under assumptions on the inliers and outliers, we focus on the approximation problem. Our main result shows that sampling-based methods developed in the outlier-free case give non-trivial guarantees even in the presence of outliers. Using this insight, we develop a simple algorithm that has bi-criteria guarantees. Further, unlike similar formulations for clustering, we show that bi-criteria guarantees are unavoidable for the problem, under appropriate complexity assumptions.
@InProceedings{bhaskara_et_al:LIPIcs.APPROX-RANDOM.2018.4, author = {Bhaskara, Aditya and Kumar, Srivatsan}, title = {{Low Rank Approximation in the Presence of Outliers}}, booktitle = {Approximation, Randomization, and Combinatorial Optimization. Algorithms and Techniques (APPROX/RANDOM 2018)}, pages = {4:1--4:16}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-085-9}, ISSN = {1868-8969}, year = {2018}, volume = {116}, editor = {Blais, Eric and Jansen, Klaus and D. P. Rolim, Jos\'{e} and Steurer, David}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.APPROX-RANDOM.2018.4}, URN = {urn:nbn:de:0030-drops-94087}, doi = {10.4230/LIPIcs.APPROX-RANDOM.2018.4}, annote = {Keywords: Low rank approximation, PCA, Robustness to outliers} }
Feedback for Dagstuhl Publishing