Snakefile "ymp/rules/unicycler.rules"

from ymp.util import filter_input

Env(name="unicycler", base="bioconda", packages=["unicycler", "pigz"])

with Stage("assemble_unicycler") as S:
    S.doc("""Assemble reads using unicycler

    >>> ymp make toy.assemble_unicycler
    """)

    rule unicycler:
        """
        Runs unicycler
        """
        message:
            "Assembling {wildcards.target} with Unicycler"
        input:
            # FIXME: unicycler does not support multiple input fq
            #        => grouping / co assembly does not work
            #        might be possible to concatenate, not ideal though
            r1 = "{:prev:}/{:target:}.{:pairnames[0]:}.fq.gz",
            r2 = "{:prev:}/{:target:}.{:pairnames[1]:}.fq.gz"
        output:
            fasta = "{:this:}/{target}.fasta.gz"
        log:
            "{:this:}/{target}.log"
        conda:
            "unicycler"
        threads:
            24
        params:
            r1 = filter_input("r1", also=["r2"], minsize=1),
            r2 = filter_input("r2", also=["r1"], minsize=1),
            # Allow failure if we have <50 sequences
            sufficient_input = check_input(["r1", "r2"], minlines=50*4*2),
            # Exclude contigs from the FASTA file which are shorter than this length
            min_fasta_length = 200,
            # Maximum k size for spades as fraction read length
            max_kmer_frac = 0.7,
            # Level of file retention (default: 1)
            # 0 = only keep final files: assembly (FASTA, GFA and log),
            # 1 = also save graphs at main checkpoints,
            # 2 = also keep SAM (enables fast rerun in different mode),
            # 3 = keep all temp files and save all graphs (for debugging)
            keep = 1,
            # Bridging mode (default: normal)
            # conservative = smaller contigs, lowest misassembly rate
            # normal = moderate contig size and misassembly rate
            # bold = longest contigs, higher misassembly rate
            mode = "normal",
            workdir = "{:this:}/{target}.tmp/",
            verbosity = 1,
        shell: """
        short1=({params.r1})
        short2=({params.r2})
        if [ ${{#short1[*]}} -gt 1 ]; then
          # Unicycler does not support multiple FQ files as input
          # See https://github.com/rrwick/Unicycler/issues/66
          # Create tmpdir, set delete trap on exit, concatenate
          # zip files, and pass those to unicycler instead.
          tmpdir=$(mktemp -d)
          trap 'rm -rf $tmpdir' EXIT
          short1=$tmpdir/R1.fq.gz
          short2=$tmpdir/R2.fq.gz
          echo "YMP: concatenating {params.r1} into $short1" >> {log}
          cat {params.r1} > $short1 &
          echo "YMP: concatenating {params.r2} into $short2" >> {log}
          cat {params.r2} > $short2 &
          wait
        fi

        make_empty_result() {{
           echo "YMP: skippping assembly - $*" >> {log}
           mkdir -p {params.workdir}
           touch {params.workdir}assembly.fasta
        }}
        
        if [ -z "{params.r1}" ]; then
           make_empty_result "all input empty"
        else
           ARGS=""
           ARGS="$ARGS --short1 $short1"
           ARGS="$ARGS --short2 $short2"
           ARGS="$ARGS -o {params.workdir}"
           ARGS="$ARGS --min_fasta_length {params.min_fasta_length}"
           ARGS="$ARGS --keep {params.keep}"
           ARGS="$ARGS --mode {params.mode}"
           ARGS="$ARGS --threads {threads}"
           ARGS="$ARGS --verbosity {params.verbosity}"
           ARGS="$ARGS --max_kmer_frac {params.max_kmer_frac}"
           echo "YMP: Running Unicycler $ARGS" >> {log}
           if ! unicycler $ARGS >>{log} 2>&1; then
             if [ "{params.sufficient_input}" = "False" ]; then
                make_empty_result "Assuming failure due to low read count"
             else
                exit 1
             fi
           fi
        fi
        pigz -p {threads} {params.workdir}assembly.fasta
        mv {params.workdir}assembly.fasta.gz {output.fasta}
        rm -rf {params.workdir}
        """