Snakefile "ymp/rules/metabat2.rules"
Env(name="metabat2", base="bioconda", packages=[
"metabat2",
"libdeflate==1.0" # pinning in bioconda broken
])
with Stage("bin_metabat2") as S:
S.doc("""
Bin metagenome assembly into MAGs
""")
rule metabat2_depth:
"""
Generates a depth file from BAM
"""
message:
"Summarizing contig depth"
input:
bam = "{:prev:}/{:target:}.sorted.bam"
output:
depth = "{:this:}/{target}.depth.txt",
paired = "{:this:}/{target}.paired.txt"
log:
"{:this:}/{target}.depth.log"
threads:
1
params:
min_contig_length = 1000,
min_contig_depth = 1
conda:
"metabat2"
shell:
"jgi_summarize_bam_contig_depths"
" --outputDepth {output.depth}"
" --pairedContigs {output.paired}"
" --minContigLength {params.min_contig_length}"
" --minContigDepth {params.min_contig_depth}"
" {input.bam}"
" > {log} 2>&1"
rule metabat2_bin:
"""
Bin metagenome with MetaBat2
"""
message:
"Binning {wildcards.target} with MetaBat2"
input:
depth = "{:this:}/{target}.depth.txt",
fasta = "{:prev:}/{target}.fasta.gz"
output:
fasta_dir = directory("{:this:}/{target}/")
log:
"{:this:}/{target}.metabat.log"
threads:
32
params:
min_contig_len = 2500, # decrease if input quality very high
max_p = 95, # decrease if input quality very low
max_edges = 200, # decrease if input quality very low,
# increase if completeness low
min_s = 60, # increase if input quality very low
min_cls_size = 200000, # minimum bp per bin
seed = "123456"
conda:
"metabat2"
shell:
"metabat2"
" --inFile {input.fasta}"
" --abdFile {input.depth}"
" --outFile {output.fasta_dir}"
" --minContig {params.min_contig_len}"
" --maxP {params.max_p}"
" --minS {params.min_s}"
" --maxEdges {params.max_edges}"
" --minClsSize {params.min_cls_size}"
" --numThreads {threads}"
" --seed {params.seed}"
" > {log} 2>&1"
rule metabat2_all:
message:
"Completed Metabat binning"
input:
"{:this:}/{:targets:}/"
output:
touch("{:this:}/all_targets.stamp")