Source code for ymp.string

import re

from itertools import product
from string import Formatter
from typing import List, Dict, Tuple, Union, Any, Set

import snakemake.utils


[docs]class FormattingError(AttributeError): def __init__(self, message: str, fieldname: str) -> None: super().__init__(message) self.attr = fieldname
[docs]class GetNameFormatter(Formatter):
[docs] def get_names(self, pattern: str): for val in self.parse(pattern): if val[1] is not None: yield val[1]
[docs]class OverrideJoinFormatter(Formatter): """Formatter with overridable join method The default formatter joins all arguments with ``"".join(args)``. This class overrides :meth:`_vformat` with identical code, changing only that line to one that can be overridden by a derived class. """ def _vformat(self, format_string: str, args: List, kwargs: Dict, used_args, recursion_depth: int, auto_arg_index: int=0) -> Tuple[Union[List[str], str], int]: if recursion_depth < 0: raise ValueError('Max string recursion exceeded') result = [] for literal_text, field_name, format_spec, conversion in \ self.parse(format_string): # output the literal text if literal_text: result.append(literal_text) # if there's a field, output it if field_name is not None: # this is some markup, find the object and do # the formatting # handle arg indexing when empty field_names are given. if field_name == '': if auto_arg_index is False: raise ValueError('cannot switch from manual field ' 'specification to automatic field ' 'numbering') field_name = str(auto_arg_index) auto_arg_index += 1 elif field_name.isdigit(): if auto_arg_index: raise ValueError('cannot switch from manual field ' 'specification to automatic field ' 'numbering') # disable auto arg incrementing, if it gets # used later on, then an exception will be raised auto_arg_index = False # given the field_name, find the object it references # and the argument it came from obj, arg_used = self.get_field(field_name, args, kwargs) used_args.add(arg_used) # do any conversion on the resulting object obj = self.convert_field(obj, conversion) # expand the format spec, if needed format_spec, auto_arg_index = self._vformat( format_spec, args, kwargs, used_args, recursion_depth-1, auto_arg_index=auto_arg_index) result.append(self.format_field(obj, format_spec)) return self.join(result), auto_arg_index
[docs] def join(self, args: List[str]) -> Union[List[str],str]: """ Joins the expanded pieces of the template string to form the output. This function is equivalent to ``''.join(args)``. By overriding it, alternative methods can be implemented, e.g. to create a list of strings, each corresponding to a the cross product of the expanded variables. """ return ''.join(args)
[docs]class ProductFormatter(OverrideJoinFormatter): """ String Formatter that creates a list of strings each expanded using one point in the cartesian product of all replacement values. If none of the arguments evaluate to lists, the result is a string, otherwise it is a list. >>> ProductFormatter().format("{A} and {B}", A=[1,2], B=[3,4]) "1 and 3" "1 and 4" "2 and 3" "2 and 4" """
[docs] def join(self, args: List[Any]) -> Union[List[str], str]: # expand everything that isn't a string to a list args = [[item] if isinstance(item, str) else list(item) for item in args] # combine items into list corresponding to cartesian product res = [''.join(flat_args) for flat_args in product(*args)] if len(res) > 1: return res if res: return res[0] return ''
[docs] def format_field(self, value, format_spec: str): if hasattr(value, '__iter__') and not isinstance(value, str): return (format(item) for item in value) return format(value, format_spec)
[docs]class RegexFormatter(Formatter): """ String Formatter accepting a regular expression defining the format of the expanded tags. """ def __init__(self, regex: Union[str, Any]) -> None: super().__init__() if (isinstance(regex, str)): self.regex = re.compile(regex) else: self.regex = regex
[docs] def parse(self, format_string: str): """ Parse format_string into tuples. Tuples contain literal_text: text to copy field_name: follwed by field name format_spec: conversion: """ if format_string is None: return start = 0 for match in self.regex.finditer(format_string): yield (format_string[start:match.start()], # literal text match.group('name'), # field name '', # format spec None) # conversion start = match.end() # yield text at end of format_string yield (format_string[start:], None, None, None)
[docs] def get_names(self, format_string: str) -> Set[str]: """Get set of field names in format_string)""" return set(match.group('name') for match in self.regex.finditer(format_string))
[docs]class QuotedElementFormatter(snakemake.utils.SequenceFormatter): def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) self.element_formatter = snakemake.utils.QuotedFormatter()
[docs]class PartialFormatter(Formatter): """ Formats what it can and leaves the remainder untouched """
[docs] def get_field(self, field_name, args, kwargs): try: val = super().get_field(field_name, args, kwargs) if type(val[0]).__name__ == "function": raise IndexError() return val except (KeyError, IndexError, TypeError): return getattr(self, "spec", "{{{}}}").format(field_name), None
[docs]def make_formatter(product=None, regex=None, partial=None, quoted=None): formatter = 1 types: 'List[type]' = [] class_name = "" class_dict = {} for arg, cls, name in ( (product, ProductFormatter, 'Product'), (regex, RegexFormatter, 'Regex'), (partial, PartialFormatter, 'Partial'), (quoted, QuotedElementFormatter, 'QuotedElement'), (formatter, GetNameFormatter, 'Formatter') ): if arg is not None: types += [cls] class_name += name if not isinstance(arg, int): class_dict[name.lower()] = arg return type(class_name, tuple(types), {})(**class_dict)