We consider the reconstruction of a phylogeny from multiple genes under the multispecies coalescent. We establish a connection with the sparse signal detection problem, where one seeks to distinguish between a distribution and a mixture of the distribution and a sparse signal. Using this connection, we derive an information-theoretic trade-off between the number of genes needed for an accurate reconstruction and the sequence length of the genes.
@InProceedings{mossel_et_al:LIPIcs.APPROX-RANDOM.2015.931, author = {Mossel, Elchanan and Roch, Sebastien}, title = {{Distance-based Species Tree Estimation: Information-Theoretic Trade-off between Number of Loci and Sequence Length under the Coalescent}}, booktitle = {Approximation, Randomization, and Combinatorial Optimization. Algorithms and Techniques (APPROX/RANDOM 2015)}, pages = {931--942}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, ISBN = {978-3-939897-89-7}, ISSN = {1868-8969}, year = {2015}, volume = {40}, editor = {Garg, Naveen and Jansen, Klaus and Rao, Anup and Rolim, Jos\'{e} D. P.}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.APPROX-RANDOM.2015.931}, URN = {urn:nbn:de:0030-drops-53455}, doi = {10.4230/LIPIcs.APPROX-RANDOM.2015.931}, annote = {Keywords: phylogenetic reconstruction, multispecies coalescent, sequence length requirement.} }
Feedback for Dagstuhl Publishing