Snakefile "ymp/rules/humann2.rules"
Env(name="humann2", base="bioconda", packages=[
"humann2",
"tbb=2018" # needed by bowtie2; older versions log an error on macos 10.12 throwing humann2 off
])
with stage.references:
localrules: human_db_download
rule human_db_download:
"""
Download :ref:`HUMAnN2 <bioconda:humann2>` reference databases
"""
output:
"{:dir.references:}/humann2/{database}.{build}"
log:
"{output}.log"
wildcard_constraints:
database = "[^.]*",
build = "[^.]*"
threads:
1
conda:
"humann2"
shell: """
humann2_databases --download {wildcards.database} {wildcards.build} {output} >{log} 2>&1
"""
with Stage("humann2") as S:
S.doc("""
Compute functional profiles using HUMAnN2
""")
rule humann2:
"""
Runs HUMAnN2 with separately processed Metaphlan2 output.
.. note:: HUMAnN2 has no special support for paired end reads. As per manual,
we just feed it the concatenated forward and reverse reads.
"""
input:
reads = "{:prev:}/{sample}.{:pairnames:}.fq.gz",
chocophlan = "{:dir.references:}/humann2/chocophlan.{params.chocophlan}",
uniref = "{:dir.references:}/humann2/uniref.{params.uniref}",
metaphlan = "{:prev:}/{sample}_profile.txt"
output:
genefam = "{:this:}/{sample}_genefamilies.tsv",
pathabu = "{:this:}/{sample}_pathabundance.tsv",
pathcov = "{:this:}/{sample}_pathcoverage.tsv",
tempdir = "{:this:}/{sample}_humann2_temp"
params:
chocophlan = "full",
uniref = "uniref90_diamond",
outpath = "{:this:}",
resources:
mem = "128g",
log:
stderr = "{:this:}/{sample}.log",
trace = "{:this:}/{sample}.trace",
threads: 16
conda: "humann2"
shell:
"mkdir -p {output.tempdir};"
"cat {input.reads} > {output.tempdir}/temp.fq.gz;"
"humann2"
" --input-format fastq.gz"
" --input {output.tempdir}/temp.fq.gz"
" --taxonomic-profile {input.metaphlan}"
" --output-basename {wildcards.sample}"
" --output {params.outpath}"
" --nucleotide-database {input.chocophlan}/chocophlan"
" --protein-database {input.uniref}/uniref"
" --threads {threads}"
" --o-log {log.trace}"
" >{log.stderr} 2>&1"
";"
"rm {output.tempdir}/temp.fq.gz;"
localrules: humann2_renorm_table
rule humann2_renorm_table:
"""
Renormalizes humann2 output tables
"""
message:
"HUMANn2: renormalizing table {input} to {wildcards.unit}/{wildcards.mode}"
input:
"{:this:}/{sample}_{type}.tsv"
wildcard_constraints:
type = "(genefamilies|pathabundance|pathcoverage)",
unit = "(cpm|relab)",
mode = "(community|levelwise)"
output:
"{:this:}/{sample}_{type}_{mode}_{unit}_single.tsv"
log:
"{:this:}/{sample}_{type}_{mode}_{unit}_single.log"
params:
special = "y"
conda:
"humann2"
shell:
"humann2_renorm_table"
" --input {input}"
" --output {output}"
" --units {wildcards.unit}"
" --mode {wildcards.mode}"
" --special {params.special}"
" --update-snames"
" >{log} 2>&1"
localrules: humann2_join_tables
rule humann2_join_tables:
"""
Joins HUMAnN2 per sample output tables
"""
message:
"HUMANn2: joining tables into {output}"
input:
"{:this:}/{:runs:}_{type}_{mode}_{unit}_single.tsv"
wildcard_constraints:
type = "(genefamilies|pathabundance|pathcoverage)",
unit = "(cpm|relab)",
mode = "(community|levelwise)"
params:
basedir = "{:this:}",
pattern = "{type}_{mode}_{unit}_single.tsv"
output:
"{:this:}/{type}_{mode}_{unit}.tsv"
log:
"{output}.log"
conda:
"humann2"
shell:
"humann2_join_tables"
" --input {params.basedir}"
" --file_name {params.pattern}"
" --output {output}"
" >{log} 2>&1"
localrules: humann2_all
rule humann2_all:
message:
"HUMANn2: Finished processing {params.basedir}"
params:
basedir = "{:this:}"
input:
expand("{{:this:}}/{type}_{mode}_{unit}.tsv", \
type=("genefamilies","pathabundance","pathcoverage"), \
mode=("community", "levelwise"), \
unit=("cpm", "relab"))
output:
touch("{:this:}/all_targets.stamp")