Snakefile "ymp/rules/hisat2.rules"

Env(name="hisat2", base="bioconda", packages=["hisat2=2.1.0", "samtools"])

HT2IDX_SUFFIXES = ["{}.ht2".format(n+1) for n in range(8)]

with Stage("map_hisat2") as S:
    S.doc("""
    Map reads using Hisat2
    """)
    rule hisat2_map:
        """
        For hisat we always assume a pre-build index as providing SNPs and haplotypes
        etc is beyond this pipelines scope.
        """
        message: "{:name:}: {output.bam}"
        input:
            pairs =        "{:prev:}/{:target:}.{: pairnames :}.fq.gz",
            index = expand("{{:prev:}}/{{:target:}}.{ext}", ext=HT2IDX_SUFFIXES)
        output:
            bam   = temp(  "{:this:}/{target}.bam"),
            stats =        "{:this:}/{target}.stats"
        log:
                           "{:this:}/{target}.log"
        benchmark:
            "benchmarks/{:name:}/{:this:}/{target}.txt"
        resources:
            mem = "32g",
        threads:
            16
        conda:
            "hisat2"
        shell:
            "INDEX={input.index[0]}  ;"
            "INDEX=\"${{INDEX%.1.ht2}}\"  ;"
            "hisat2"
            " -1 {input.pairs[0]}"
            " -2 {input.pairs[1]}"
            " -x \"$INDEX\""
            # print time for each stage
            " --time"
            # write summary in new format
            " --new-summary"
            " --summary-file {output.stats}"
            " -p {threads} "
            " 2>{log}"
            " | samtools view -b -o {output.bam} -"