We take a first step towards a rigorous asymptotic analysis of graph-based methods for finding (approximate) nearest neighbors in high-dimensional spaces, by analyzing the complexity of randomized greedy walks on the approximate nearest neighbor graph. For random data sets of size n = 2^{o(d)} on the d-dimensional Euclidean unit sphere, using near neighbor graphs we can provably solve the approximate nearest neighbor problem with approximation factor c > 1 in query time n^{rho_{q} + o(1)} and space n^{1 + rho_{s} + o(1)}, for arbitrary rho_{q}, rho_{s} >= 0 satisfying (2c^2 - 1) rho_{q} + 2 c^2 (c^2 - 1) sqrt{rho_{s} (1 - rho_{s})} >= c^4. Graph-based near neighbor searching is especially competitive with hash-based methods for small c and near-linear memory, and in this regime the asymptotic scaling of a greedy graph-based search matches optimal hash-based trade-offs of Andoni-Laarhoven-Razenshteyn-Waingarten [Andoni et al., 2017]. We further study how the trade-offs scale when the data set is of size n = 2^{Theta(d)}, and analyze asymptotic complexities when applying these results to lattice sieving.
@InProceedings{laarhoven:LIPIcs.SoCG.2018.57, author = {Laarhoven, Thijs}, title = {{Graph-Based Time-Space Trade-Offs for Approximate Near Neighbors}}, booktitle = {34th International Symposium on Computational Geometry (SoCG 2018)}, pages = {57:1--57:14}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-066-8}, ISSN = {1868-8969}, year = {2018}, volume = {99}, editor = {Speckmann, Bettina and T\'{o}th, Csaba D.}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.SoCG.2018.57}, URN = {urn:nbn:de:0030-drops-87700}, doi = {10.4230/LIPIcs.SoCG.2018.57}, annote = {Keywords: approximate nearest neighbor problem, near neighbor graphs, locality-sensitive hashing, locality-sensitive filters, similarity search} }