Chaining algorithms aim to form a semi-global alignment of two sequences based on a set of anchoring local alignments as input. Depending on the optimization criteria and the exact definition of a chain, there are several O(n log n) time algorithms to solve this problem optimally, where n is the number of input anchors. In this paper, we focus on a formulation allowing the anchors to overlap in a chain. This formulation was studied by Shibuya and Kurochkin (WABI 2003), but their algorithm comes with no proof of correctness. We revisit and modify their algorithm to consider a strict definition of precedence relation on anchors, adding the required derivation to convince on the correctness of the resulting algorithm that runs in O(n log² n) time on anchors formed by exact matches. With the more relaxed definition of precedence relation considered by Shibuya and Kurochkin or when anchors are non-nested such as matches of uniform length (k-mers), the algorithm takes O(n log n) time. We also establish a connection between chaining with overlaps and the widely studied longest common subsequence problem.
@InProceedings{makinen_et_al:LIPIcs.CPM.2020.25, author = {M\"{a}kinen, Veli and Sahlin, Kristoffer}, title = {{Chaining with Overlaps Revisited}}, booktitle = {31st Annual Symposium on Combinatorial Pattern Matching (CPM 2020)}, pages = {25:1--25:12}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-149-8}, ISSN = {1868-8969}, year = {2020}, volume = {161}, editor = {G{\o}rtz, Inge Li and Weimann, Oren}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.CPM.2020.25}, URN = {urn:nbn:de:0030-drops-121500}, doi = {10.4230/LIPIcs.CPM.2020.25}, annote = {Keywords: Sparse Dynamic Programming, Chaining, Maximal Exact Matches, Longest Common Subsequence} }
Feedback for Dagstuhl Publishing