Given a threshold L and a set R = {R_1, ..., R_m} of m strings (haplotype sequences), each having length n, the minimum segmentation problem for founder reconstruction is to partition [1,n] into set P of disjoint segments such that each segment [a,b] in P has length at least L and the number d(a,b)=|{R_i[a,b] : 1 <= i <= m}| of distinct substrings at segment [a,b] is minimized over [a,b] in P. The distinct substrings in the segments represent founder blocks that can be concatenated to form max{d(a,b) : [a,b] in P} founder sequences representing the original R such that crossovers happen only at segment boundaries. We give an optimal O(mn) time algorithm to solve the problem, improving over earlier O(mn^2). This improvement enables to exploit the algorithm on a pan-genomic setting of input strings being aligned haplotype sequences of complete human chromosomes, with a goal of finding a representative set of references that can be indexed for read alignment and variant calling. We implemented the new algorithm and give some experimental evidence on the practicality of the approach on this pan-genomic setting.
@InProceedings{norri_et_al:LIPIcs.WABI.2018.15, author = {Norri, Tuukka and Cazaux, Bastien and Kosolobov, Dmitry and M\"{a}kinen, Veli}, title = {{Minimum Segmentation for Pan-genomic Founder Reconstruction in Linear Time}}, booktitle = {18th International Workshop on Algorithms in Bioinformatics (WABI 2018)}, pages = {15:1--15:15}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-082-8}, ISSN = {1868-8969}, year = {2018}, volume = {113}, editor = {Parida, Laxmi and Ukkonen, Esko}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.WABI.2018.15}, URN = {urn:nbn:de:0030-drops-93175}, doi = {10.4230/LIPIcs.WABI.2018.15}, annote = {Keywords: Pan-genome indexing, founder reconstruction, dynamic programming, positional Burrows-Wheeler transform, range minimum query} }
Feedback for Dagstuhl Publishing