Snakefile "ymp/rules/test.rules"

"""
Verification rules
"""

with Stage("check") as S:
    S.doc("""
    Verify file availability

    This stage provides rules for checking the file availability at a given point
    in the stage stack.

    Mainly useful for testing and debugging.
    """)
    rule check_fasta:
        """Verify availability of FastA type reference"""
        message: "Verifying {input}"
        input:
            "{:reference:}.fasta.gz"
        output:
            temp(touch("{:this:}/ref_fasta"))
        run:
            import gzip, re
            with gzip.open(input[0], "rt", encoding="ascii") as ref:
                header = ref.readline()
                if not header[0] == '>':
                    raise Exception(
                        "FastA {} does not start with '>'"
                        " but '{}'".format(input, header[0]))
                data = ref.readline()
                if not re.fullmatch(r"[\sACGTN]+", data, re.IGNORECASE):
                    raise Exception(
                        "FastA {} contains characters other than ACGTN "
                        " and whitespace".format(input))

    rule check_fastp:
        """Verify availability of FastP type reference"""
        message: "Verifying {input}"
        input:
            "{:reference:}.fastp.gz"
        output:
            temp(touch("{:this:}/ref_fastp"))
        run:
            import gzip, re
            with gzip.open(input[0], "rt", encoding="ascii") as ref:
                header = ref.readline()
                if not header[0] == '>':
                    raise Exception(
                        "FastP {} does not start with '>'"
                        " but '{}'".format(input, header[0]))
                data = ref.readline()
                if not re.fullmatch(r"[\s*ACDEFGHIKLMNPQRSTVWY]+", data, re.IGNORECASE):
                    raise Exception(
                        "FastP {} contains characters other than ACGTN "
                        " and whitespace".format(input))