Source code for ymp.map2otu
import sys, csv, re
from argparse import ArgumentParser
import fileinput
[docs]class emirge_info:
def __init__(self, line):
## C5-14863|JF198678.1.1366;Prior=0.000387;Length=1357;NormPrior=0.000383;size=383
## <sample>-<id>|<acc>;(<key>=<value>)*[;]
field_list = line.split(";");
self.sid, self.acc = field_list[0].split("|")
self.sample, self.id, _ = re.split('-([0-9]*)$', self.sid)
for kv in field_list[1:]:
try:
(key, value) = kv.split("=")
try:
self.__dict__[key] = float(value)
except ValueError:
self.__dict__[key] = value
except:
pass
def __str__(self):
return "sample=%s id=%s acc=%s prior=%f" % (self.sample, self.id, self.acc, self.Prior)
[docs]class MapfileParser(object):
def __init__(self, minid=0):
self.samples = set()
self.by_centroid_sums={}
self.by_sample_sums={}
self.minid=0
[docs] def read(self, mapfiles):
mapfile = fileinput.input(mapfiles)
try:
for line in mapfile:
items = line.split("\t")
src = emirge_info(items[0])
centroid = emirge_info(items[1])
id = float(items[2])
if id < self.minid:
continue
self.samples.add(src.sample)
if src.sample in self.by_sample_sums:
self.by_sample_sums[src.sample] += src.NormPrior
else:
self.by_sample_sums[src.sample] = src.NormPrior
if not centroid.sid in self.by_centroid_sums:
self.by_centroid_sums[centroid.sid] = {"centroid": centroid.sid}
if src.sample in self.by_centroid_sums[centroid.sid]:
self.by_centroid_sums[centroid.sid][src.sample] += src.NormPrior
else:
self.by_centroid_sums[centroid.sid][src.sample] = src.NormPrior
finally:
mapfile.close()
[docs] def write(self, outfile):
if outfile == "-":
outfile = sys.stdout
else:
outfile = open(outfile, "w")
try:
writer = csv.DictWriter(outfile, fieldnames = ["centroid"] + sorted(self.samples))
writer.writeheader()
for key in sorted(self.by_centroid_sums.keys()):
writer.writerow(self.by_centroid_sums[key])
finally:
if outfile != sys.stdout:
outfile.close()
[docs]def main():
parser = ArgumentParser()
parser.add_argument("-o","--output", default="-", metavar="file")
parser.add_argument("--id", type=float, default=0.0)
parser.add_argument("input", nargs="*", default="-")
args = parser.parse_args()
parser = MapfileParser(args.id)
parser.read(args.input)
parser.write(args.output)
if __name__ == '__main__':
main()