Source code for ymp.gff
"""
Implements simple reader and writer for GFF (general feature format) files.
Unfinished
- only supports one version, GFF 3.2.3.
- no escaping
"""
from collections import namedtuple
_FIELDS = [
'seqid', # the sequence id
'source', # the tool
'type', # CDS, rRNA, ...
'start',
'end',
'score',
'strand', # + / -
'phase',
'attributes'
]
Feature = namedtuple("Feature", _FIELDS)
_ATTRIBUTES = [
'ID', # unique ID per GFF, use multiple for discontinuous features
'Name', # display name for user
'Alias', # secondary name, e.g. accession
'Parent', # indicate "partof" relationship
'Target', # alignment target to 'target_id start end [strand]'
'Gap', # alignment in CIGAR format
'Derives_From', # temporal relationship
'Note', # free text
'Dbxref', # database cross reference
'Ontology_term',
'Is_circular', # if feature is circular
# note
# product
# partial
# start_type
# rbs_motif
# rbs_spacer
# gc_cont
# conf
# score
# <x>score
]
Attributes = namedtuple("Attributes", _ATTRIBUTES)
[docs]class reader(object):
def __init__(self, fileobj):
self.fileobj = fileobj
def __iter__(self):
for line in self.fileobj:
if isinstance(line, bytes):
line = line.decode('ascii')
if line[0] == "#":
continue
f = line.strip().split('\t')
f[3] = int(f[3])
f[4] = int(f[4])
f[-1] = dict([
tuple(item.split('='))
for item in f[-1].strip(';').split(';')
])
yield Feature(*f)
[docs]class writer(object):
def __init__(self, fileobj):
self.fileobj = fileobj
self.fileobj.write("##gff-version 3\n")
[docs] def write(self, feature):
self.fileobj.write("\t".join(map(str, feature))+"\n")