Snakefile "ymp/rules/test.rules"
"""
Verification rules
"""
with Stage("check") as S:
S.doc("""
Verify file availability
This stage provides rules for checking the file availability at a given point
in the stage stack.
Mainly useful for testing and debugging.
""")
rule check_fasta:
"""Verify availability of FastA type reference"""
message: "Verifying {input}"
input:
"{:reference:}.fasta.gz"
output:
temp(touch("{:this:}/ref_fasta"))
run:
import gzip, re
with gzip.open(input[0], "rt", encoding="ascii") as ref:
header = ref.readline()
if not header[0] == '>':
raise Exception(
"FastA {} does not start with '>'"
" but '{}'".format(input, header[0]))
data = ref.readline()
if not re.fullmatch(r"[\sACGTN]+", data, re.IGNORECASE):
raise Exception(
"FastA {} contains characters other than ACGTN "
" and whitespace".format(input))
rule check_fastp:
"""Verify availability of FastP type reference"""
message: "Verifying {input}"
input:
"{:reference:}.fastp.gz"
output:
temp(touch("{:this:}/ref_fastp"))
run:
import gzip, re
with gzip.open(input[0], "rt", encoding="ascii") as ref:
header = ref.readline()
if not header[0] == '>':
raise Exception(
"FastP {} does not start with '>'"
" but '{}'".format(input, header[0]))
data = ref.readline()
if not re.fullmatch(r"[\s*ACDEFGHIKLMNPQRSTVWY]+", data, re.IGNORECASE):
raise Exception(
"FastP {} contains characters other than ACGTN "
" and whitespace".format(input))