Snakefile "ymp/rules/convert.rules"

"Generic file conversion rules"



# FIXME: how can I make these localrules?
for fasta in "fa fna fasta".split():
    for fastp in "faa fastp".split():
        rule:
            message:
                "FASTA->FASTP {output}"
            input:
                "{path}."+fasta
            output:
                "{path}."+fastp
            priority:
                -50
            run:
                from ymp.nuc2aa import fasta_dna2aa
                with open(input[0], "r") as inf, open(output[0], "w") as outf:
                    fasta_dna2aa(inf, outf)

rule faa_fastp:
    message: "LN_S {output} -> {input}"
    input:
        "{path}.faa"
    output:
        "{path}.fastp"
    run:
        import os
        os.symlink(
            os.path.join(
                os.path.relpath(
                    os.path.dirname(input[0]),
                    os.path.dirname(output[0])
                ),
                os.path.basename(input[0])
            ),
            output[0]
        )

rule fasta_to_fastp_gz:
    message:
        "FASTA->FASTP {input}"
    input:
        "{path}.fasta.gz"
    output:
        "{path}.fastp.gz_disabled"
    run:
        from ymp.nuc2aa import fasta_dna2aa
        import gzip as gz
        with gz.open(input[0], "r") as inf, gz.open(output[0], "w") as outf:
            fasta_dna2aa(inf, outf)


rule gunzip:
    """
    Generic temporary gunzip

    Use ``ruleorder: gunzip > myrule`` to prefer gunzipping
    over re-running a rule. E.g.

    >>> ruleorder: gunzip > myrule
    >>> rule myrule:
    >>>   output: temp("some.txt"), "some.txt.gzip"
    """
    message: "Uncompressing {input}"
    wildcard_constraints:
        path = r".*(?<!\.gz)"
    input: "{path}.gz"
    output: temp("{path}_disabled")
    params:
        mem = icfg.mem("1G")
    threads: 1
    conda: "pigz.yml"
    shell: """
    unpigz \
      --keep \
      --force \
      --processes {threads} \
      {input}
    """


localrules: mkdir
rule mkdir:
    """
    Auto-create directories listed in ymp config.

    Use these as input:
    >>> input: tmpdir = ancient(icfg.dir.tmp)
    """
    message: "Creating directory '{output}'"
    output: directory("{{x,({})}}".format("|".join(list(icfg.dir))))
    shell: "mkdir -p {output}"


rule fq2fa:
    """Unzip and convert fastq to fasta"""
    message:
        "Converting {input} to fasta"
    input:
        seqs = "{path}.fq"
    output:
        seqs = "{path}.fa.gz"
    conda:
        "fa2fq.yml"
    shell: """
    sed -n '1~4 s/^@/>/p; 2~4p' {input.seqs} | gzip > {output.seqs}
    """
ruleorder: fq2fa > gunzip