The k-mappability problem has two integers parameters m and k. For every subword of size m in a text S, we wish to report the number of indices in S in which the word occurs with at most k mismatches. The problem was lately tackled by Alzamel et al. [Mai Alzamel et al., 2018]. For a text with constant alphabet Σ and k ∈ O(1), they present an algorithm with linear space and O(nlog^{k+1}n) time. For the case in which k = 1 and a constant size alphabet, a faster algorithm with linear space and O(nlog(n)log log(n)) time was presented in [Mai Alzamel et al., 2020]. In this work, we enhance the techniques of [Mai Alzamel et al., 2020] to obtain an algorithm with linear space and O(n log(n)) time for k = 1. Our algorithm removes the constraint of the alphabet being of constant size. We also present linear algorithms for the case of k = 1, |Σ| ∈ O(1) and m = Ω(√n).
@InProceedings{amir_et_al:LIPIcs.CPM.2021.5, author = {Amir, Amihood and Boneh, Itai and Kondratovsky, Eitan}, title = {{The k-Mappability Problem Revisited}}, booktitle = {32nd Annual Symposium on Combinatorial Pattern Matching (CPM 2021)}, pages = {5:1--5:20}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-186-3}, ISSN = {1868-8969}, year = {2021}, volume = {191}, editor = {Gawrychowski, Pawe{\l} and Starikovskaya, Tatiana}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.CPM.2021.5}, URN = {urn:nbn:de:0030-drops-139566}, doi = {10.4230/LIPIcs.CPM.2021.5}, annote = {Keywords: Pattern Matching, Hamming Distance, Suffix Tree, Suffix Array} }
Feedback for Dagstuhl Publishing