We present Kohdista, which is an index-based algorithm for finding pairwise alignments between single molecule maps (Rmaps). The novelty of our approach is the formulation of the alignment problem as automaton path matching, and the application of modern index-based data structures. In particular, we combine the use of the Generalized Compressed Suffix Array (GCSA) index with the wavelet tree in order to build Kohdista. We validate Kohdista on simulated E. coli data, showing the approach successfully finds alignments between Rmaps simulated from overlapping genomic regions. Lastly, we demonstrate Kohdista is the only method that is capable of finding a significant number of high quality pairwise Rmap alignments for large eukaryote organisms in reasonable time. Kohdista is available at https://github.com/mmuggli/KOHDISTA/.
@InProceedings{muggli_et_al:LIPIcs.WABI.2018.12, author = {Muggli, Martin D. and Puglisi, Simon J. and Boucher, Christina}, title = {{A Succinct Solution to Rmap Alignment}}, booktitle = {18th International Workshop on Algorithms in Bioinformatics (WABI 2018)}, pages = {12:1--12:16}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-95977-082-8}, ISSN = {1868-8969}, year = {2018}, volume = {113}, editor = {Parida, Laxmi and Ukkonen, Esko}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.WABI.2018.12}, URN = {urn:nbn:de:0030-drops-93143}, doi = {10.4230/LIPIcs.WABI.2018.12}, annote = {Keywords: Optical mapping, index based data structures, FM-index, graph algorithms} }
Feedback for Dagstuhl Publishing