Snakefile "ymp/rules/bmtagger.rules"
"""
BMTagger Rules
"""
Env(name="bmtagger", base="bioconda", packages=[
"bmtagger",
"blast >=2.7.1" # make sure we don't get 2.2
])
with Stage("index_bmtagger") as S:
rule bmtagger_bitmask:
message:
"BMTagger: bmtool indexing {input}"
input:
"{:prev:}/{:target:}.fasta.gz"
output:
"{:this:}/{target}.bitmask"
log:
"{:this:}/{target}.bitmask.log"
threads:
1
params:
tmp = "{:this:}/{target}.tmp.fa",
wordsize = 18, # 18->8g, 17->2g, 16->512MB
resources:
mem = "16g",
conda:
"bmtagger"
shell: """
gunzip -c {input} > {params.tmp}
bmtool \
--fasta-file={params.tmp} \
--output-file={output} \
--word-size={params.wordsize} \
> {log} 2>&1
rm {params.tmp}
"""
# --compress fails with small references (segfault in bmfilter)
rule bmtagger_index:
message:
"BMTagger: srprism indexing {input}"
input:
"{:prev:}/{:target:}.fasta.gz"
output:
"{:this:}/{target}.srprism"
log:
"{:this:}/{target}.srprism.log"
threads: 1
resources:
mem = "16g",
conda:
"bmtagger"
shell: """
srprism mkindex \
--input {input} \
--output {output} \
--memory $(({resources.mem_mb} / 16 * 15)) \
> {log} 2>&1
touch {output}
"""
with Stage("filter_bmtagger", "remove_bmtagger") as S:
S.doc("""
Filter(-out) contaminant reads using BMTagger
>>> ymp make toy.ref_phiX.index_bmtagger.remove_bmtagger
>>> ymp make toy.ref_phiX.index_bmtagger.remove_bmtagger.assemble_megahit
>>> ymp make toy.ref_phiX.index_bmtagger.filter_bmtagger
>>> ymp make mpic.ref_phiX.index_bmtagger.remove_bmtagger
""")
rule bmtagger_find:
"Match paired end reads against reference"
message:
"BMTagger: matching reads from {input.fq} to {input.srprism}"
input:
fq = "{:prev:}/{:target:}.{:pairnames:}.fq.gz",
bitmask = "{:prev:}/{:target:}.bitmask",
srprism = "{:prev:}/{:target:}.srprism",
tmpdir = ancient("{:dir.tmp:}")
output:
matches = temp("{:this:}/{target}.txt"),
matchgz = "{:this:}/{target}.txt.gz"
log:
"{:this:}/{target}.txt.log"
threads:
1
params:
matearg = "-2 <(gunzip -c {input.fq[1]})"
resources:
mem = "16g",
conda:
"bmtagger"
shell: """
bmtagger.sh \
-b {input.bitmask} \
-x {input.srprism} \
-q 1 \
-1 <(gunzip -c {input.fq[0]}) \
{params.matearg} \
-T {input.tmpdir} \
-o {output.matches} \
> {log} 2>&1
gzip {output.matches} -c > {output.matchgz}
"""
rule bmtagger_find_se: # ymp: extends bmtagger_find
"Match single end reads against reference"
input:
fq = ["{:prev:}/{:target:}.{:pairnames[0]:}.fq.gz"]
params:
matearg = ""
rule bmtagger_filter:
"Filter reads using reference"
message:
"BMTagger: filtering {input.fq}"
input:
fq = "{:prev:}/{:target:}.{:pairnames[0]:}.fq.gz",
matches = "{:this:}/{target}.txt"
output:
"{:this:}/{target}.{:pairnames[0]:}.fq.gz"
log:
"{:this:}/{target}.{:pairnames[0]:}.fq.log"
threads:
8
params:
mate = 1,
action = "-keep",
resources:
mem = "8g",
conda:
"bmtagger"
shell: """
extract_fullseq \
{input.matches} \
{params.action} \
-fastq \
-mate{params.mate} <(gunzip -c {input.fq}) | gzip -c > {output} 2>{log}
"""
rule bmtagger_filter_revread: # ymp: extends bmtagger_filter
input:
fq = "{:prev:}/{:target:}.{:pairnames[1]:}.fq.gz"
output:
"{:this:}/{target}.{:pairnames[1]:}.fq.gz"
log:
"{:this:}/{target}.{:pairnames[1]:}.fq.log"
params:
mate = 2,
ext = "{:pairnames[1]:}"
rule bmtagger_filter_out: # ymp: extends bmtagger_filter
"Filter-out reads using reference"
message:
"BMTagger: filtering out {input.fq}"
output:
"{:that:}/{target}.{pairsuff}.fq.gz"
log:
"{:that:}/{target}.{pairsuff}.fq.log"
params:
action = "-remove"
rule bmtagger_filter_all:
message:
"BMTagge: done"
output:
touch("{:this:}/all_targets.stamp")
input:
"{:this:}/{:targets:}.{:pairnames:}.fq.gz"
rule bmtagger_remove_all:
message:
"BMTagge: done"
output:
touch("{:that:}/all_targets.stamp")
input:
"{:that:}/{:targets:}.{:pairnames:}.fq.gz"