In this paper we describe compressed indexes that support pattern matching queries for strings with wildcards. For a constant size alphabet our data structure uses O(n.log^e(n)) bits for any e>0 and reports all occ occurrences of a wildcard string in O(m+s^g.M(n)+occ) time, where M(n)=o(log(log(log(n)))), s is the alphabet size, m is the number of alphabet symbols and g is the number of wildcard symbols in the query string. We also present an O(n)-bit index with O((m+s^g+occ).log^e(n)) query time and an O(n{log(log(n))}^2)-bit index with O((m+s^g+occ).log(log(n))) query time. These are the first non-trivial data structures for this problem that need o(n.log(n)) bits of space.
@InProceedings{lewenstein_et_al:LIPIcs.STACS.2014.506, author = {Lewenstein, Moshe and Nekrich, Yakov and Vitter, Jeffrey Scott}, title = {{Space-Efficient String Indexing for Wildcard Pattern Matching}}, booktitle = {31st International Symposium on Theoretical Aspects of Computer Science (STACS 2014)}, pages = {506--517}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-939897-65-1}, ISSN = {1868-8969}, year = {2014}, volume = {25}, editor = {Mayr, Ernst W. and Portier, Natacha}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.STACS.2014.506}, URN = {urn:nbn:de:0030-drops-44838}, doi = {10.4230/LIPIcs.STACS.2014.506}, annote = {Keywords: compressed data structures, compressed indexes, pattern matching} }
Feedback for Dagstuhl Publishing