Snakemake is a novel workflow engine with a simple Python-derived workflow definition language and an optimizing execution environment. It is the first system that supports multiple named wildcards (or variables) in input and output filenames of each rule definition. It also allows to write human-readable workflows that document themselves. We have found Snakemake especially useful for building high-throughput sequencing data analysis pipelines and present examples from this area. Snakemake exemplifies a generic way to implement a domain specific language in python, without writing a full parser or introducing syntactical overhead by overloading language features.
@InProceedings{koster_et_al:OASIcs.GCB.2012.49, author = {K\"{o}ster, Johannes and Rahmann, Sven}, title = {{Building and Documenting Workflows with Python-Based Snakemake}}, booktitle = {German Conference on Bioinformatics 2012}, pages = {49--56}, series = {Open Access Series in Informatics (OASIcs)}, ISBN = {978-3-939897-44-6}, ISSN = {2190-6807}, year = {2012}, volume = {26}, editor = {B\"{o}cker, Sebastian and Hufsky, Franziska and Scheubert, Kerstin and Schleicher, Jana and Schuster, Stefan}, publisher = {Schloss Dagstuhl -- Leibniz-Zentrum f{\"u}r Informatik}, address = {Dagstuhl, Germany}, URL = {https://drops.dagstuhl.de/entities/document/10.4230/OASIcs.GCB.2012.49}, URN = {urn:nbn:de:0030-drops-37179}, doi = {10.4230/OASIcs.GCB.2012.49}, annote = {Keywords: workflow engine, dependency graph, knapsack problem, Python, high-throughput sequencing, next-generation sequencing} }
Feedback for Dagstuhl Publishing