Snakefile "ymp/rules/spades.rules"
Env(name="spades", base="bioconda", packages=["spades", "pigz"])
with Stage("assemble_metaspades") as S:
S.doc("""Assemble reads using metaspades
>>> ymp make toy.assemble_metaspades
>>> ymp make toy.group_ALL.assemble_metaspades
>>> ymp make toy.group_Subject.assemble_metaspades
""")
localrules: metaspades_input_yaml
rule metaspades_input_yaml:
"""
Prepares a dataset config for spades. Spades commandline is limited to
at most 9 pairs of fq files, so to allow arbitrary numbers we need to
use the dataset config option.
Preparing in a separate rule so that the main metaspades rule can use
the ``shell:`` rule and not ``run:``, which would preclude it from
using conda environments.
"""
message:
"Preparing Config YAML for MetaSpades: {output}"
input:
r1 = "{:prev:}/{:target:}.{:pairnames[0]:}.fq.gz",
r2 = "{:prev:}/{:target:}.{:pairnames[1]:}.fq.gz"
output:
yaml = "{:this:}/{target}.yaml"
run:
import yaml
from ymp.util import filter_out_empty
r1, r2 = filter_out_empty(input.r1, input.r2)
with open(output.yaml, "w") as cfg:
cfg.write(yaml.safe_dump([{
"left reads": ["../"+r for r in r1],
"right reads": ["../"+r for r in r2],
"type": "paired-end",
"orientation": "fr"
}]))
rule metaspades:
"""
Runs MetaSpades. Supports reads.by_COLUMN.sp/complete as target for
by group co-assembly.
"""
message:
"(Co-)Assembling {wildcards.target} with MetaSpades"
input:
conf = "{:this:}/{target}.yaml",
r1 = "{:prev:}/{:target:}.{:pairnames[0]:}.fq.gz",
r2 = "{:prev:}/{:target:}.{:pairnames[1]:}.fq.gz"
output:
fasta = "{:this:}/{target}.fasta.gz",
fastg = "{:this:}/{target}.fastg.gz"
log:
console = "{:this:}/{target}.log",
spades = "{:this:}/{target}.spades.log.gz"
benchmark:
"benchmarks/metaspades/{:this:}/{target}.txt"
params:
workdir = "{:this:}/tmp.{target}/",
tmpdir = "{:dir.tmp:}",
mem = icfg.mem("1000g")
conda:
"spades"
threads:
24
shell: """
CONTINUE=""
if [ -e "{params.workdir}" ]; then
if [ -e "{params.workdir}/params.txt" ]; then
CONTINUE=y
else
rm -rf "{params.workdir}"
fi
fi
if [ -n "$CONTINUE" ]; then
metaspades.py \
-o {params.workdir} \
--threads {threads} \
--tmp-dir {params.tmpdir} \
--memory $(({params.mem}/1024)) \
--restart-from last \
>{log.console} 2>&1
else
metaspades.py \
-o {params.workdir} \
--threads {threads} \
--tmp-dir {params.tmpdir} \
--dataset {input.conf} \
--memory $(({params.mem}/1024)) \
>{log.console} 2>&1
fi
pigz -p {threads} -9 -c {params.workdir}/scaffolds.fasta > {output.fasta}
pigz -p {threads} -9 -c {params.workdir}/assembly_graph.fastg > {output.fastg}
cat {params.workdir}/{{params.txt,spades.log}} | pigz -p {threads} -9 > {log.spades}
rm -rf {params.workdir}
"""
rule metaspades_all:
message:
"Completed Metaspades assemblies"
input:
"{:this:}/{:targets:}.fasta.gz"
output:
"{:this:}/all_targets.stamp"
shell:
"touch {output}"