Snakefile "ymp/rules/otu_picking.rules"
from os import path
rule pick_open_otus:
"""Pick open reference OTUs"""
message:
"Pick open reference {params.similarity}% OTUs on forward reads"
input:
seqs = "{dir}/{:fwd_fq_names:}.fq.gz",
gg = "{:dir.references:}/greengenes/rep_set/{similarity}_otus.fasta"
output:
otu_table = "{dir}.o{similarity}/otu_table.biom",
tree = "{dir}.o{similarity}/rep_set.tre",
rep_set = "{dir}.o{similarity}/rep_set.fa"
wildcard_constraints:
similarity = "(94|97|99)",
conda:
"qiime.yml"
params:
seqs = lambda wc, input: ','.join(input.seqs),
seq_ids = lambda wc, input: ','.join(['.'.join(i.split('/')[-1].split('.')[:-3]) for i in input.seqs]),
similarity = lambda wc: wc.similarity if wc.similarity else 97
log:
"{dir}.o{similarity}/log.txt"
threads:
8
shadow: "shallow"
shell: """
if [ ! -e "{input.gg}" ]; then
echo Failed to find GreenGenes reference file {input.gg}.
exit 1
fi
split_libraries_fastq.py \
-i {params.seqs} \
-o slout \
--barcode_type 'not-barcoded' \
--sample_ids {params.seq_ids} \
-n 0 -q 3 -r 3 -p .75 --phred_offset 33
echo "pick_otus:similarity\t.{params.similarity}" > qiime_params.txt
pick_open_reference_otus.py \
-i slout/seqs.fna \
-o qiime_otus \
-r {input.gg} \
-a -O {threads} \
-p qiime_params.txt
mv qiime_otus/otu_table_mc2_w_tax_no_pynast_failures.biom {output.otu_table}
mv qiime_otus/rep_set.tre {output.tree}
mv qiime_otus/rep_set.fna {output.rep_set}
cat slout/split_library_log.txt qiime_otus/log_*.txt > {log}
"""
rule pick_closed_otus:
"""Pick closed reference OTUs"""
message:
"Pick closed reference {params.similarity}% OTUs on forward reads"
input:
seqs = "{dir}/{:fwd_fq_names:}.fq.gz",
gg = "{:dir.references:}/greengenes/"
wildcard_constraints:
similarity = "([0-9][0-9])?"
output:
otu_table = "{dir}.c{similarity}/otu_table.biom",
conda:
"qiime.yml"
params:
seqs = lambda wc, input: ','.join(input.seqs),
seq_ids = lambda wc, input: ','.join([i.split('/')[-1].split('.')[0] for i in input.seqs]),
similarity = lambda wc: wc.similarity if wc.similarity else 97,
ref_fasta = lambda wc, input: input.gg + "/rep_set/{similarity}_otus.fasta".format(**wc)
log:
"{dir}.o{similarity}/log.txt"
threads:
8
shadow: "shallow"
shell: """
if [ ! -e "{params.ref_fasta}" ]; then
echo Failed to find GreenGenes reference file {params.ref_fasta}.
exit 1
fi
split_libraries_fastq.py \
-i {params.seqs} \
-o slout \
--barcode_type 'not-barcoded' \
--sample_ids {params.seq_ids} \
-n 0 -q 3 -r 3 -p .75 --phred_offset 33
echo "pick_otus:similarity\t.{params.similarity}" > qiime_params.txt
pick_closed_reference_otus.py \
-i slout/seqs.fna \
-o qiime_otus \
-r {params.ref_fasta} \
-a -O {threads} \
-p qiime_params.txt
mv qiime_otus/otu_table.biom {output.otu_table}
cat slout/split_library_log.txt qiime_otus/log_*.txt > {log}
"""
rule rarefy_table:
""""""
input:
biom = "{dir}/{otu_table}.biom"
output:
biom = "{dir}/{otu_table}.d{rar_level}.biom"
wildcard_constraints:
rar_level = r"\d+"
conda:
srcdir("qiime.yml")
params:
rar_level = "{rar_level}"
shell: """
single_rarefaction.py -i {input.biom} -d {params.rar_level} -o {output.biom}
"""
rule convert_to_closed_ref:
"""Convert open reference otu table to closed reference"""
message:
"Convert open reference otu picked table {input.biom} to closed reference"
input:
biom = "{dir}/{table}.biom",
gg = "{:dir.references:}/greengenes/"
output:
biom = "{dir}/{table}.closed.biom"
conda:
"qiime.yml"
params:
ref_fasta = lambda wc, input: input.gg+"rep_set/99_otus.fasta"
shell: """
filter_otus_from_otu_table.py \
-i {input.biom} -o {output.biom} \
--negate_ids_to_exclude -e {params.ref_fasta}
"""