The non-overlapping indexing problem is defined as follows: pre-process a given text T[1,n] of length n into a data structure such that whenever a pattern P[1,p] comes as an input, we can efficiently report the largest set of non-overlapping occurrences of P in T. The best known solution is by Cohen and Porat [ISAAC, 2009]. Their index size is O(n) words and query time is optimal O(p+nocc), where nocc is the output size. We study this problem in the cache-oblivious model and present a new data structure of size O(n log n) words. It can answer queries in optimal O(p/(B)+log_B n+nocc/B) I/Os, where B is the block size.
@InProceedings{hooshmand_et_al:LIPIcs.CPM.2018.8, author = {Hooshmand, Sahar and Abedin, Paniz and K\"{u}lekci, M. Oguzhan and Thankachan, Sharma V.}, title = {{Non-Overlapping Indexing - Cache Obliviously}}, booktitle = {29th Annual Symposium on Combinatorial Pattern Matching (CPM 2018)}, pages = {8:1--8:9}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-074-3}, ISSN = {1868-8969}, year = {2018}, volume = {105}, editor = {Navarro, Gonzalo and Sankoff, David and Zhu, Binhai}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.CPM.2018.8}, URN = {urn:nbn:de:0030-drops-87009}, doi = {10.4230/LIPIcs.CPM.2018.8}, annote = {Keywords: Suffix Trees, Cache Oblivious, Data Structure, String Algorithms} }
Feedback for Dagstuhl Publishing