Creative Commons Attribution 3.0 Unported license
Transcriptomic structural variants (TSVs) - large-scale transcriptome sequence change due to structural variation - are common, especially in cancer. Detecting TSVs is a challenging computational problem. Sample heterogeneity (including differences between alleles in diploid organisms) is a critical confounding factor when identifying TSVs. To improve TSV detection in heterogeneous RNA-seq samples, we introduce the Multiple Compatible Arrangement Problem (MCAP), which seeks k genome rearrangements to maximize the number of reads that are concordant with at least one rearrangement. This directly models the situation of a heterogeneous or diploid sample. We prove that MCAP is NP-hard and provide a 1/4-approximation algorithm for k=1 and a 3/4-approximation algorithm for the diploid case (k=2) assuming an oracle for k=1. Combining these, we obtain a 3/16-approximation algorithm for MCAP when k=2 (without an oracle). We also present an integer linear programming formulation for general k. We characterize the graph structures that require k>1 to satisfy all edges and show such structures are prevalent in cancer samples. We evaluate our algorithms on 381 TCGA samples and 2 cancer cell lines and show improved performance compared to the state-of-the-art TSV-calling tool, SQUID.
@InProceedings{qiu_et_al:LIPIcs.WABI.2019.18,
author = {Qiu, Yutong and Ma, Cong and Xie, Han and Kingsford, Carl},
title = {{Detecting Transcriptomic Structural Variants in Heterogeneous Contexts via the Multiple Compatible Arrangements Problem}},
booktitle = {19th International Workshop on Algorithms in Bioinformatics (WABI 2019)},
pages = {18:1--18:5},
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
ISBN = {978-3-95977-123-8},
ISSN = {1868-8969},
year = {2019},
volume = {143},
editor = {Huber, Katharina T. and Gusfield, Dan},
publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik},
address = {Dagstuhl, Germany},
URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.WABI.2019.18},
URN = {urn:nbn:de:0030-drops-110483},
doi = {10.4230/LIPIcs.WABI.2019.18},
annote = {Keywords: transcriptomic structural variation, integer linear programming, heterogeneity}
}