Snakefile "ymp/rules/trinity.rules"
from ymp.util import check_input, filter_input
Env(name="trinity", base="bioconda", packages="trinity")
with Stage("assemble_trinity") as S:
rule trinity:
message:
"Trinity: assembling {wildcards.target}"
input:
r1 = "{:prev:}/{:target:}.{:pairnames[0]:}.fq.gz",
r2 = "{:prev:}/{:target:}.{:pairnames[1]:}.fq.gz"
output:
fa = "{:this:}/{target}.fasta.gz",
tm = temp("{:this:}/{target}.timing"),
mp = "{:this:}/{target}.fasta.gene_trans_map"
log:
"{:this:}/{target}.log"
params:
r1 = filter_input("r1", also=["r2"], minsize=1, join=","),
r2 = filter_input("r2", also=["r1"], minsize=1, join=","),
# Allow failure if we have <50 sequences
sufficient_input = check_input(["r1", "r2"], minlines=50*4*2),
min_contig_length = 200,
lib_type = "FR",
# outdir must contain word "trinity"
outdir = "{:this:}/{target}.trinity.tmp"
resources:
mem = "32g",
threads:
24
conda:
"trinity"
shell: """
make_empty_result() {{
echo "YMP: skipping assembly - $*" >> {log}
mkdir -p {params.outdir}
touch {params.outdir}/Trinity.fasta
touch {params.outdir}/Trinity.timing
touch {params.outdir}/Trinity.fasta.gene_trans_map
}}
if [ -z "{params.r1}" ]; then
make_empty_result "all input empty"
else
ARGS=
ARGS="$ARGS --seqType fq"
ARGS="$ARGS --SS_lib_type {params.lib_type}"
ARGS="$ARGS --max_memory {resources.mem_gb}G"
ARGS="$ARGS --left {params.r1} --right {params.r2}"
ARGS="$ARGS --CPU {threads}"
ARGS="$ARGS --min_contig_length {params.min_contig_length}"
ARGS="$ARGS --output {params.outdir}"
echo "YMP: Running Trinity $ARGS" >> {log}
if ! Trinity $ARGS >>{log} 2>&1; then
if [ "{params.sufficient_input}" = "False" ]; then
make_empty_result "Assuming failure due to low read count"
else
exit 1
fi
fi
fi
gzip -c {params.outdir}/Trinity.fasta > {output.fa}
mv {params.outdir}/Trinity.timing {output.tm}
mv {params.outdir}/Trinity.fasta.gene_trans_map {output.mp}
rm -rf {params.outdir}
"""
rule trinity_stats:
message:
"Trinity: collecting assembly stats"
input:
"{:this:}/{target}.fasta.gz"
output:
"{:this:}/{target}.trinity-stats"
conda:
"trinity"
shell:
"TrinityStats.pl {input} > {output}"