Source code for ymp.nuc2aa

#!/usr/bin/env python3

import click


AA = 'FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG'
NU = 'TCAG'
B2N = {a: b for a, b in zip(NU, range(len(NU)))}


[docs]def nuc2num(seq): return sum([ len(NU) ** pos * B2N[nuc] for pos, nuc in enumerate(reversed(seq)) ])
[docs]def nuc2aa(seq): return ''.join([ AA[nuc2num(codon)] for codon in zip(*[iter(seq)]*3) ])
@click.command() @click.argument('input', type=click.File('r')) @click.argument('output', type=click.File('w')) def click_fasta_dna2aa(input, output): if input.name.endswith(".gz"): import gzip input = gzip.open(input.name, "rt") fasta_dna2aa(input, output)
[docs]def fasta_dna2aa(inf, outf): def write_aa(header, seq): # outf.write(header.encode('ascii')) outf.write(header) aa = nuc2aa(seq) if "start_type=GTG" in header: aa = 'M'+aa[1:] outf.write(('\n'.join([ aa[s:s+60] for s in range(0, len(aa)+59, 60) ]).strip()+'\n') # .encode('ascii') ) header = None seq = "" for line in inf: # line = line.decode('ascii') if line[0] == '>': if header: write_aa(header, seq) header = line seq = "" else: seq += line.strip() if header: write_aa(header, seq)
if __name__ == "__main__": # pylint does not get click decorators, disable warning: # pylint: disable=no-value-for-parameter click_fasta_dna2aa()