We revisit the k-mismatch problem in the streaming model on a pattern of length m and a streaming text of length n, both over a size-σ alphabet. The current state-of-the-art algorithm for the streaming k-mismatch problem, by Clifford et al. [SODA 2019], uses Õ(k) space and Õ(√k) worst-case time per character. The space complexity is known to be (unconditionally) optimal, and the worst-case time per character matches a conditional lower bound. However, there is a gap between the total time cost of the algorithm, which is Õ(n√k), and the fastest known offline algorithm, which costs Õ(n + min(nk/√m, σn)) time. Moreover, it is not known whether improvements over the Õ(n√k) total time are possible when using more than O(k) space. We address these gaps by designing a randomized streaming algorithm for the k-mismatch problem that, given an integer parameter k≤s≤m, uses Õ(s) space and costs Õ(n+min(nk²/m, nk/√s, σnm/s)) total time. For s=m, the total runtime becomes Õ(n + min(nk/√m, σn)), which matches the time cost of the fastest offline algorithm. Moreover, the worst-case time cost per character is still Õ(√k).
@InProceedings{golan_et_al:LIPIcs.CPM.2020.15, author = {Golan, Shay and Kociumaka, Tomasz and Kopelowitz, Tsvi and Porat, Ely}, title = {{The Streaming k-Mismatch Problem: Tradeoffs Between Space and Total Time}}, booktitle = {31st Annual Symposium on Combinatorial Pattern Matching (CPM 2020)}, pages = {15:1--15:15}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-149-8}, ISSN = {1868-8969}, year = {2020}, volume = {161}, editor = {G{\o}rtz, Inge Li and Weimann, Oren}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.CPM.2020.15}, URN = {urn:nbn:de:0030-drops-121406}, doi = {10.4230/LIPIcs.CPM.2020.15}, annote = {Keywords: Streaming pattern matching, Hamming distance, k-mismatch} }
Feedback for Dagstuhl Publishing