Snakefile "ymp/rules/centrifuge.rules"
Env(name="centrifuge", base="bioconda", packages="centrifuge")
with Stage("profile_centrifuge") as S:
S.doc("""
Classify reads using centrifuge
""")
EXTS = "1.cf 2.cf 3.cf".split()
rule centrifuge:
message:
"Centrifuge: Classifying reads in {wildcards.sample}"
input:
db = ["{params.dbbase}." + ext for ext in EXTS],
reads = "{:prev:}/{sample}.{:pairnames:}.fq.gz"
params:
dbbase = "{:dir.references:}/centrifuge/{params.dbname}",
dbname = "p+h+v"
output:
hits = "{:this:}/{sample}.hits.tsv",
report = "{:this:}/{sample}.report.tsv",
log:
"{:this:}/{sample}.log",
threads:
16
conda:
"centrifuge"
resources:
mem = "32g",
shell:
"centrifuge"
" -x {params.dbbase}"
" -1 {input.reads[0]}"
" -2 {input.reads[1]}"
" --report-file {output.report}"
" -S {output.hits}"
" --threads {threads}"
" --time" # print timings
" --mm" # use mmap so that multiple instances share idx
" >{log} 2>&1"