Snakefile "ymp/rules/trinity.rules"
Env(name="trinity", base="bioconda", packages="trinity")
with Stage("assemble_trinity") as S:
rule trinity:
message:
"Trinity: assembling {wildcards.target}"
input:
r1 = "{:prev:}/{:target:}.{:pairnames[0]:}.fq.gz",
r2 = "{:prev:}/{:target:}.{:pairnames[1]:}.fq.gz"
output:
fa = "{:this:}/{target}.fasta.gz",
tm = "{:this:}/{target}.timing",
mp = "{:this:}/{target}.fasta.gene_trans_map"
log:
"{:this:}/{target}.log"
params:
mem = icfg.mem("32g"),
mem_gb = icfg.mem("32g", unit="g"),
reads = "--left {params.r1} --right {params.r2}",
r1 = lambda wc, input: ",".join([input.r1] if isinstance(input.r1, str)
else input.r1),
r2 = lambda wc, input: ",".join([input.r2] if isinstance(input.r2, str)
else input.r2),
min_contig_length = 200,
lib_type = "FR",
outdir = "{:this:}/trinity_{target}"
threads:
32
conda:
"trinity"
shell:
"Trinity"
" --seqType fq"
" --SS_lib_type {params.lib_type}"
" --max_memory {params.mem_gb}G"
" --left {params.r1} --right {params.r2}"
" --CPU {threads}"
" --min_contig_length {params.min_contig_length}"
" --output {params.outdir}"
" >{log} 2>&1;"
"gzip -c {params.outdir}/Trinity.fasta > {output.fa};"
"mv {params.outdir}/Trinity.timing {output.tm};"
"mv {params.outdir}/Trinity.fasta.gene_trans_map {output.mp};"
"rm -rf {params.outdir}"
rule trinity_stats:
message:
"Trinity: collecting assembly stats"
input:
"{:this:}/{target}.fasta.gz"
output:
"{:this:}/{target}.trinity-stats"
conda:
"trinity"
shell:
"TrinityStats.pl {input} > {output}"
rule trinity_all:
message:
"Trinity: finished"
input:
"{:this:}/{:targets:}.fasta.gz"
output:
touch("{:this:}/all_targets.stamp")