Snakefile "ymp/rules/metabat2.rules"
Env(name="metabat2", base="bioconda", packages=[
"metabat2",
"pigz"
])
with Stage("bin_metabat2") as S:
S.doc("""
Bin metagenome assembly into MAGs
>>> ymp make mock.assemble_megahit.map_bbmap.sort_bam.bin_metabat2
>>> ymp make mock.group_ALL.assemble_megahit.map_bbmap.sort_bam.group_ALL.bin_metabat2
""")
rule metabat2_depth:
"""
Generates a depth file from BAM
"""
message:
"Summarizing contig depth"
input:
bam = "{:prev:}/{:target:}.sorted.bam"
output:
depth = "{:this:}/{target}.depth.txt",
paired = "{:this:}/{target}.paired.txt"
log:
"{:this:}/{target}.depth.log"
threads:
1
params:
min_contig_length = 1000,
min_contig_depth = 1
conda:
"metabat2"
shell:
"jgi_summarize_bam_contig_depths"
" --outputDepth {output.depth}"
" --pairedContigs {output.paired}"
" --minContigLength {params.min_contig_length}"
" --minContigDepth {params.min_contig_depth}"
" {input.bam}"
" > {log} 2>&1"
checkpoint metabat2_bin:
"""
Bin metagenome with MetaBat2
"""
message:
"Binning {wildcards.target} with MetaBat2"
input:
depth = "{:this:}/{target}.depth.txt",
fasta = "{:prev:}/{:target:}.fasta.gz"
output:
bins = "{:this:}/{target}.bins",
fasta = "{:this:}/{target}.{:bin:}.fasta.gz"
log:
"{:this:}/{target}.metabat.log"
threads:
32
params:
prefix = "{:this:}/{target}",
min_contig_len = 2500, # decrease if input quality very high
max_p = 95, # decrease if input quality very low
max_edges = 200, # decrease if input quality very low,
# increase if completeness low
min_s = 60, # increase if input quality very low
min_cls_size = 200000, # minimum bp per bin
seed = "123456"
conda:
"metabat2"
shell:
"exec >{log} 2>&1;"
"metabat2"
" --inFile {input.fasta}"
" --abdFile {input.depth}"
" --outFile {params.prefix}"
" --minContig {params.min_contig_len}"
" --maxP {params.max_p}"
" --minS {params.min_s}"
" --maxEdges {params.max_edges}"
" --minClsSize {params.min_cls_size}"
" --numThreads {threads}"
" --seed {params.seed}"
" --saveCls"
";"
"mv {params.prefix} {params.prefix}.metabat.txt;"
""
"touch {output.bins};"
"if compgen -G '{params.prefix}*.fa' >/dev/null; then"
" echo '{params.prefix}'*.fa | xargs -P4 pigz;"
" for bin in {params.prefix}.*.fa.gz; do"
" base=${{bin#{params.prefix}.}};"
" binid=bin_${{base%.fa.gz}};"
" mv $bin {params.prefix}__$binid.fasta.gz;"
" echo $binid >> {output.bins};"
" done;"
"fi"