Snakefile "ymp/rules/hisat2.rules"
Env(name="hisat2", base="bioconda", packages=["hisat2=2.1.0", "samtools"])
HT2IDX_SUFFIXES = ["{}.ht2".format(n+1) for n in range(8)]
with Stage("map_hisat2") as S:
S.doc("""
Map reads using Hisat2
""")
rule hisat2_map:
"""
For hisat we always assume a pre-build index as providing SNPs and haplotypes
etc is beyond this pipelines scope.
"""
message: "{:name:}: {output.bam}"
input:
pairs = "{:prev:}/{:target:}.{: pairnames :}.fq.gz",
index = expand("{{:prev:}}/{{:target:}}.{ext}", ext=HT2IDX_SUFFIXES)
output:
bam = temp( "{:this:}/{target}.bam"),
stats = "{:this:}/{target}.stats"
log:
"{:this:}/{target}.log"
benchmark:
"benchmarks/{:name:}/{:this:}/{target}.txt"
resources:
mem = "32g",
threads:
16
conda:
"hisat2"
shell:
"INDEX={input.index[0]} ;"
"INDEX=\"${{INDEX%.1.ht2}}\" ;"
"hisat2"
" -1 {input.pairs[0]}"
" -2 {input.pairs[1]}"
" -x \"$INDEX\""
# print time for each stage
" --time"
# write summary in new format
" --new-summary"
" --summary-file {output.stats}"
" -p {threads} "
" 2>{log}"
" | samtools view -b -o {output.bam} -"