We revisit the fundamental problem of dictionary look-up with mismatches. Given a set (dictionary) of d strings of length m and an integer k, we must preprocess it into a data structure to answer the following queries: Given a query string Q of length m, find all strings in the dictionary that are at Hamming distance at most k from Q. Chan and Lewenstein (CPM 2015) showed a data structure for k = 1 with optimal query time O(m/w + occ), where w is the size of a machine word and occ is the size of the output. The data structure occupies O(w d log^{1+epsilon} d) extra bits of space (beyond the entropy-bounded space required to store the dictionary strings). In this work we give a solution with similar bounds for a much wider range of values k. Namely, we give a data structure that has O(m/w + log^k d + occ) query time and uses O(w d log^k d) extra bits of space.
@InProceedings{gawrychowski_et_al:LIPIcs.MFCS.2018.66, author = {Gawrychowski, Pawel and Landau, Gad M. and Starikovskaya, Tatiana}, title = {{Fast Entropy-Bounded String Dictionary Look-Up with Mismatches}}, booktitle = {43rd International Symposium on Mathematical Foundations of Computer Science (MFCS 2018)}, pages = {66:1--66:15}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-086-6}, ISSN = {1868-8969}, year = {2018}, volume = {117}, editor = {Potapov, Igor and Spirakis, Paul and Worrell, James}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.MFCS.2018.66}, URN = {urn:nbn:de:0030-drops-96486}, doi = {10.4230/LIPIcs.MFCS.2018.66}, annote = {Keywords: Dictionary look-up, Hamming distance, compact data structures} }
Feedback for Dagstuhl Publishing