Snakefile "ymp/rules/sambamba.rules"

Env(name="sambamba", base="bioconda", packages="sambamba")


with Stage("sort_bam") as S:
    rule sambamba_sort:
        message:
            "{:name:}: (Sambamba) {output.bam}"
        input:
            bam = "{:prev:}/{target}.bam",
        output:
            bam = "{:this:}/{target}.sorted.bam",
            bai = "{:this:}/{target}.sorted.bam.bai",
        log:
            "{:this:}/{target}.sorted.bam.log"
        benchmark:
            "benchmarks/{:name:}/{:this:}/{target}.txt"
        params:
            compress = 6,
        resources:
            mem = "32g",
        threads:
            8
        conda:
            "sambamba"
        shell:
            "sambamba sort"
            " --memory-limit={resources.mem_mb}MB"
            " --compression-level={params.compress}"
            " --nthreads={threads}"
            " --out={output.bam}"
            " {input.bam}"
            " >{log} 2>&1"


with Stage("markdup_sambamba") as S:
    S.add_param("RM", typ="flag", name = "remove_dups", value="--remove-duplicates")
    rule sambamba_markdup:
        message:
            "{:name:}: {params.remove_dups} {output.bam}"
        input:
            bam = "{:prev:}/{target}.sorted.bam",
        output:
            bam = "{:this:}/{target}.sorted.bam",
            bai = "{:this:}/{target}.sorted.bai",
        log:
            "{:this:}/{target}.sorted.bam.log"
        benchmark:
            "benchmarks/{:name:}/{:this:}/{target}.txt"
        params:
            compress = 6,
        resources:
            mem = "32g",
        threads:
            8
        conda:
            "sambamba"
        shell:
            "exec >{log} 2>&1;"
            "sambamba markdup"
            " --compression-level={params.compress}"
            " --nthreads={threads}"
            " {params.remove_dups}"
            " {input.bam}"
            " {output.bam};"
            ""
            "sambamba index"
            " --nthreads={threads}"
            "  {output.bam} {output.bai};"