diff --git a/poc/__init__.py b/poc/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/poc/api.py b/poc/api.py new file mode 100644 index 000000000..c2f0b307a --- /dev/null +++ b/poc/api.py @@ -0,0 +1,13 @@ + + +from poc.expressions import ( # noqa: F401 + join_, + metadata, + trait, + list_items, + dict_items, + set_items, + recursive, +) + +from poc.parsing import parse # noqa: F401 diff --git a/poc/expressions.py b/poc/expressions.py new file mode 100644 index 000000000..01c9371c1 --- /dev/null +++ b/poc/expressions.py @@ -0,0 +1,627 @@ + +import copy +from functools import reduce, update_wrapper +from poc.observe import ( + _is_not_none, + ListenerPath, + MetadataListener, + NamedTraitListener, + _FilteredTraitListener, + ListItemListener, + DictItemListener, + SetItemListener, +) + + +def observe(object, expression, handler): + # ``observe`` replaces ``on_trait_change``. + # ``expression`` replaces ``name`` in ``on_trait_change``. + # This is an example implementation to demonstrate + # how the Expression is used. This will fail because + # ``_observe`` is not implemented here. + for path in expression.as_paths(): + _observe(object=object, path=path, handler=handler) # noqa: F821 + + +class Expression: + """ A user-facing object for constructing the data structures + required by ``observe`` (a future replacement for + ``on_trait_change``). + + ``Expression.as_paths()`` creates a new list of ``ListenerPath`` + objects for ``observe`` to operate on. + + While the object constructor is public facing, users will likely + use one of the module-level convenient functions for creating + an instance of ``Expression``. Methods on ``Expression`` + allows the users to extend the ``ListenerPath``. + """ + def __init__(self): + # ``_levels`` is a list of tuple(branched nodes, cycled nodes) + # The last item is the most nested level. + # When paths are constructured from this expression, one starts + # from the end of this list, to the top, and then continues to + # the prior_expressions + self._levels = [] + + # Represent prior expressions to be combined in series (JOIN) + # or in parallel (OR) + self._prior_expression = None + + def __eq__(self, other): + if type(other) is not type(self): + return False + return self.as_paths() == other.as_paths() + + def __or__(self, expression): + """ Create a new expression that matches this expression OR + the given expression. + + e.g. ``trait("age") | trait("number")`` will match either trait `age` + or trait `number` on an object. + + Parameters + ---------- + expression : Expression + + Returns + ------- + new_expression : Expression + """ + new = Expression() + new._prior_expression = _ParallelExpression([self, expression]) + return new + + def then(self, expression): + """ Create a new expression by extending this expression with + the given expression. + + e.g. ``trait("child").then( trait("age") | trait("number") )`` + on an object matches ``child.age`` or ``child.number`` on the object. + + Parameters + ---------- + expression : Expression + + Returns + ------- + new_expression : Expression + """ + + if self._prior_expression is None and not self._levels: + # this expression is empty... + new = expression.copy() + else: + new = Expression() + new._prior_expression = _SeriesExpression([self, expression]) + return new + + def _root_nodes(self): + """ Return the root branched nodes of this expression. They may come + from the prior expressions if defined. + + This is for supporting recursions back to the root nodes. + + Returns + ------- + bnodes : set(BaseListener) + Nodes for branches. + cnodes : set(BasListener) + Nodes for cycles. + + Raises + ------ + ValueError + If no root nodes are found. + """ + if not self._levels and self._prior_expression is None: + raise ValueError("No root nodes") + + if self._prior_expression is not None: + return self._prior_expression._root_nodes() + + for bnodes, cnodes in self._levels: + if bnodes: + return (bnodes, cnodes) + + raise ValueError("No root nodes") + + def recursive(self, expression): + """ Create a new expression by adding a recursive path to + this expression. + + e.g. ``trait("root").recursive(trait("left") | trait("right")).trait("value")`` + will match ``root.left.value``, ``root.left.left.value``, + ``root.left.right.left.value`` and so on. + + Parameters + ---------- + expression : Expression + + Returns + ------- + new_expression : Expression + """ + new = self.then(expression) + bnodes, cnodes = expression._root_nodes() + if cnodes: + raise RuntimeError("Cannot recurse on a recursion.") + return new._new_with_cycles(bnodes) + + def as_paths(self): + """ Return all the ListenerPath for the observer. + """ + paths, _ = _create_paths(self) + return set(paths) + + def trait(self, name, notify=True, optional=False): + """ Create a new expression that matches the current + expression and then a trait with the exact name given. + + e.g. ``trait("child").trait("age")`` matches ``child.age`` + on an object, and is equivalent to + ``trait("child").then(trait("age"))`` + + Parameters + ---------- + name : str + Name of the trait to match. + notify : boolean, optional + Whether to notify for changes. + optional : boolean, optional + Whether this trait is optional on an object. + + Returns + ------- + new_expression : Expression + """ + return self._new_with_branches( + nodes=[NamedTraitListener(name, notify, optional)], + ) + + def list_items(self, notify=True, optional=False): + """ Create a new expression for observing items inside a list. + + e.g. ``trait("containers").list_items()`` for observing to mutations + to a list named ``containers``. + + e.g. ``trait("containers").list_items().trait("value")`` for observing + the trait ``value`` on any items in the list ``containers``. + + Parameters + ---------- + notify : boolean, optional + Whether to notify for changes. + optional : booleal, optional + Whether to ignore this if the upstream object is not a list. + + Returns + ------- + new_expression : Expression + """ + return self._new_with_branches( + nodes=[ListItemListener(notify=notify, optional=optional)], + ) + + def dict_items(self, notify=True, optional=False): + """ Create a new expression for observing items inside a dict. + + An expression with ``dict_items`` cannot be further extended + as it is ambiguous which of the keys and values are being + observed. + + Parameters + ---------- + notify : boolean, optional + Whether to notify for changes. + optional : booleal, optional + Whether to ignore this if the upstream object is not a dict. + + Returns + ------- + new_expression : Expression + """ + # Should be similar to list_items but for dict + return self._new_with_branches( + nodes=[DictItemListener(notify=notify, optional=optional)], + ) + + def set_items(self, notify=True, optional=False): + """ Create a new expression for observing items inside a set. + + Parameters + ---------- + notify : boolean, optional + Whether to notify for changes. + optional : booleal, optional + Whether to ignore this if the upstream object is not a set. + + Returns + ------- + new_expression : Expression + """ + # Should be similar to list_items but for set + return self._new_with_branches( + nodes=[SetItemListener(notify=notify, optional=optional)], + ) + + def filter_(self, filter, notify=True): + """ Create a new expression that matches traits using the + given filter after the current expression returns a match. + + Parameters + ---------- + filter : callable(str, TraitType) -> boolean + Return true if a trait is to be observed. + Note that if this expression is used for removing + observers, the given filter must compare equally to the + filter used for putting up the observer in the first place. + notify : boolean, optional + Whether to notify for changes. + + Returns + ------- + new_expression : Expression + """ + return self._new_with_branches( + nodes=[_FilteredTraitListener(notify=notify, filter=filter)], + ) + + def anytrait(self, notify=True): + """ Create a new expression that matches anytrait after + the current expresion returns a match. + + e.g. ``trait("child").anytrait()`` with match anytrait on + the trait ``child`` on a given object, such as ``child.age``, + ``child.name``, ``child.mother`` and so on. + + Equivalent to ``filter(filter=anytrait_filter)`` where + ``anyrait_filter`` always returns True. + + Parameters + ---------- + notify : boolean, optional + Whether to notify for changes. + + Returns + ------- + new_expression : Expression + """ + return self.filter_(filter=_anytrait_filter, notify=notify) + + def metadata(self, metadata_name, value=_is_not_none, notify=True): + """ Return a new expression that matches traits based on + metadata filters, after the current expression matches. + + e.g. ``metadata("age", filter=lambda value: value is not None)`` + matches traits with metadata whose values are not None. + + Parameters + ---------- + metadata_name : str + Name of the metadata to filter traits with. + value : callable(value) -> boolean + Return true if a trait is to be observed. + ``value`` is the value of the metadata, if defined on a trait. + notify : boolean, optional + Whether to notify for changes. + + Returns + ------- + new_expression : Expression + + See also + -------- + HasTraits.traits + """ + # Something that makes use of + # HasTraits.traits(**{metadata_name: filter}) + return self._new_with_branches( + nodes=[ + MetadataListener( + metadata_name=metadata_name, value=value, notify=notify), + ] + ) + + def _new_with_branches(self, nodes): + expression = self.copy() + expression._levels.append((set(nodes), set())) + return expression + + def _new_with_cycles(self, nodes): + expression = self.copy() + expression._levels.append((set(), set(nodes))) + return expression + + def copy(self): + """ Return a shallow copy of this expression.""" + expression = Expression() + expression._levels = self._levels.copy() + if self._prior_expression is not None: + expression._prior_expression = self._prior_expression.copy() + return expression + + def info(self): + """ Return a list of user-friendly texts containing descriptive information + about this expression. + """ + infos = [] + for path in self.as_paths(): + infos.append(" ---- Path ---- ") + infos.extend(path.info()) + return infos + + def print(self): + """ Print the descriptive information for this expression. + """ + print(*self.info(), sep="\n") + + +def _anytrait_filter(name, trait): + """ Filter for matching any traits.""" + return True + + +def _create_paths(expression, paths=None, id_to_path=None, last_cnodes=None): + """ Create ListenerPaths from a given expression. + + Parameters + ---------- + expression : Expression + paths : collection of ListenerPath + Leaf paths to be added. + Needed when this function is called recursively. + id_to_path : dict(int, ListenerPath) + Mapping from nodes' ids to ListenerPath. + Needed for maintaining object identity while handling cycles + when this function is called recursively. + last_cnodes : collection of BaseListener + Nodes to be added as cycles. + Needed when this function is called recursively. + + Returns + ------- + paths : list of ListenerPath + New paths + last_cnodes : list of BaseListener + Cycles to be propagated upstream, if any. Used when this + function is called multiple times for joining expressions. + """ + if paths is None: + paths = [] + + if id_to_path is None: + id_to_path = {} + + def make_path(node): + if id(node) in id_to_path: + return id_to_path[id(node)] + path = ListenerPath(node=node) + id_to_path[id(node)] = path + return path + + if last_cnodes is None: + last_cnodes = [] + else: + last_cnodes = list(last_cnodes) + + for bnodes, cnodes in expression._levels[::-1]: + if bnodes and cnodes: + raise RuntimeError("Either branches or cycles given, not both.") + + if cnodes: + last_cnodes.extend(cnodes) + continue + + cnodes = set(last_cnodes) + last_cnodes.clear() + + cpaths = [make_path(node) for node in cnodes] + bpaths = [make_path(node) for node in bnodes] + for path in bpaths: + path.branches = path.branches.union(paths) + path.cycles = path.cycles.union(cpaths) + + paths = bpaths + + if expression._prior_expression is not None: + paths, last_cnodes = expression._prior_expression._create_paths( + paths=paths, + id_to_path=id_to_path, + last_cnodes=last_cnodes, + ) + return paths, last_cnodes + + +class _SeriesExpression: + """ Container of Expression for joining expressions in series. + Used internally in this module. + + Parameters + ---------- + expressions : list of Expression + List of Expression to be combined in series. + """ + + def __init__(self, expressions): + self.expressions = expressions.copy() + + def copy(self): + return _SeriesExpression(self.expressions) + + def _root_nodes(self): + """ Return the root nodes of this expression. + + Returns + ------- + bnodes : set(BaseListener) + Nodes for branches. + cnodes : set(BasListener) + Nodes for cycles. + + Raises + ------ + ValueError + If no root nodes are found. + """ + for expr in self.expressions: + try: + return expr._root_nodes() + except ValueError: + continue + else: + raise ValueError("No root nodes found.") + + def _create_paths(self, paths, id_to_path, last_cnodes): + """ + Create new ListenerPath(s) from the joined expressions. + + Parameters + ---------- + paths : collection of ListenerPath + Leaf paths to be added. + Needed when this function is called recursively. + id_to_path : dict(int, ListenerPath) + Mapping from nodes' ids to ListenerPath. + Needed for maintaining object identity while handling cycles + when this function is called recursively. + last_cnodes : collection of BaseListener + Nodes to be added as cycles. + Needed when this function is called recursively. + + Returns + ------- + paths : list of ListenerPath + New paths + last_cnodes : list of BaseListener + Cycles to be propagated upstream, if any. Used when this + function is called multiple times for joining expressions. + """ + for expr in self.expressions[::-1]: + paths, last_cnodes = _create_paths( + expr, + paths=paths, + id_to_path=id_to_path, + last_cnodes=last_cnodes, + ) + return paths, last_cnodes + + +class _ParallelExpression: + """ Container of Expression for joining expressions in parallel. + Used internally in this module. + + Parameters + ---------- + expressions : list of Expression + List of Expression to be combined in parallel. + """ + + def __init__(self, expressions): + self.expressions = expressions.copy() + + def copy(self): + return _ParallelExpression(self.expressions) + + def _root_nodes(self): + """ Return the root branched nodes of this expression. + + Returns + ------- + bnodes : set(BaseListener) + Nodes for branches. + cnodes : set(BasListener) + Nodes for cycles. + + Raises + ------ + ValueError + If no root nodes are found. + """ + bnodes = set() + cnodes = set() + for expr in self.expressions: + bs, cs = expr._root_nodes() + bnodes |= bs + cnodes |= cs + + if not bnodes: + raise ValueError("No root nodes") + + return (bnodes, cnodes) + + def _create_paths(self, paths, id_to_path, last_cnodes): + """ + Create new ListenerPath(s) from the joined expressions. + + Parameters + ---------- + paths : collection of ListenerPath + Leaf paths to be added. + Needed when this function is called recursively. + id_to_path : dict(int, ListenerPath) + Mapping from nodes' ids to ListenerPath. + Needed for maintaining object identity while handling cycles + when this function is called recursively. + last_cnodes : collection of BaseListener + Nodes to be added as cycles. + Needed when this function is called recursively. + + Returns + ------- + paths : list of ListenerPath + New paths + last_cnodes : list of BaseListener + Cycles to be propagated upstream, if any. Used when this + function is called multiple times for joining expressions. + """ + new_paths = [] + for expr in self.expressions: + or_paths, cnodes = _create_paths( + expr, + paths=paths, + id_to_path=id_to_path, + last_cnodes=last_cnodes, + ) + if cnodes: + raise ValueError( + "Cycles cannot be propagated further upstream with OR operation." + ) + new_paths.extend(or_paths) + return new_paths, [] + + +# Define top-level functions + +def _as_top_level(func): + + def new_func(*args, **kwargs): + return func(Expression(), *args, **kwargs) + + # Recreate the docstring with the appropriate arguments + update_wrapper( + new_func, + getattr(Expression(), func.__name__) + ) + new_func.__module__ = __name__ + return new_func + + +recursive = _as_top_level(Expression.recursive) + +trait = _as_top_level(Expression.trait) + +list_items = _as_top_level(Expression.list_items) + +dict_items = _as_top_level(Expression.dict_items) + +set_items = _as_top_level(Expression.set_items) + +metadata = _as_top_level(Expression.metadata) + + +def join_(*expressions): + """ Convenient function for joining many expressions + using ``Expression.then`` + """ + return reduce(lambda e1, e2: e1.then(e2), expressions) \ No newline at end of file diff --git a/poc/generated_parser.py b/poc/generated_parser.py new file mode 100644 index 000000000..0cfa73aaf --- /dev/null +++ b/poc/generated_parser.py @@ -0,0 +1,2107 @@ +# The file was automatically generated by Lark v0.8.5 +# +# +# Lark Stand-alone Generator Tool +# ---------------------------------- +# Generates a stand-alone LALR(1) parser with a standard lexer +# +# Git: https://github.com/erezsh/lark +# Author: Erez Shinan (erezshin@gmail.com) +# +# +# >>> LICENSE +# +# This tool and its generated code use a separate license from Lark, +# and are subject to the terms of the Mozilla Public License, v. 2.0. +# If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# +# If you wish to purchase a commercial license for this tool and its +# generated code, you may contact me via email or otherwise. +# +# If MPL2 is incompatible with your free or open-source project, +# contact me and we'll work it out. +# +# + +import os +from io import open + +class LarkError(Exception): + pass + +class GrammarError(LarkError): + pass + +class ParseError(LarkError): + pass + +class LexError(LarkError): + pass + +class UnexpectedEOF(ParseError): + def __init__(self, expected): + self.expected = expected + + message = ("Unexpected end-of-input. Expected one of: \n\t* %s\n" % '\n\t* '.join(x.name for x in self.expected)) + super(UnexpectedEOF, self).__init__(message) + + +class UnexpectedInput(LarkError): + pos_in_stream = None + + def get_context(self, text, span=40): + pos = self.pos_in_stream + start = max(pos - span, 0) + end = pos + span + before = text[start:pos].rsplit('\n', 1)[-1] + after = text[pos:end].split('\n', 1)[0] + return before + after + '\n' + ' ' * len(before) + '^\n' + + def match_examples(self, parse_fn, examples): + """ Given a parser instance and a dictionary mapping some label with + some malformed syntax examples, it'll return the label for the + example that bests matches the current error. + """ + assert self.state is not None, "Not supported for this exception" + + candidate = None + for label, example in examples.items(): + assert not isinstance(example, STRING_TYPE) + + for malformed in example: + try: + parse_fn(malformed) + except UnexpectedInput as ut: + if ut.state == self.state: + try: + if ut.token == self.token: # Try exact match first + return label + except AttributeError: + pass + if not candidate: + candidate = label + + return candidate + + +class UnexpectedCharacters(LexError, UnexpectedInput): + def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None): + message = "No terminal defined for '%s' at line %d col %d" % (seq[lex_pos], line, column) + + self.line = line + self.column = column + self.allowed = allowed + self.considered_tokens = considered_tokens + self.pos_in_stream = lex_pos + self.state = state + + message += '\n\n' + self.get_context(seq) + if allowed: + message += '\nExpecting: %s\n' % allowed + if token_history: + message += '\nPrevious tokens: %s\n' % ', '.join(repr(t) for t in token_history) + + super(UnexpectedCharacters, self).__init__(message) + + + +class UnexpectedToken(ParseError, UnexpectedInput): + def __init__(self, token, expected, considered_rules=None, state=None): + self.token = token + self.expected = expected # XXX str shouldn't necessary + self.line = getattr(token, 'line', '?') + self.column = getattr(token, 'column', '?') + self.considered_rules = considered_rules + self.state = state + self.pos_in_stream = getattr(token, 'pos_in_stream', None) + + message = ("Unexpected token %r at line %s, column %s.\n" + "Expected one of: \n\t* %s\n" + % (token, self.line, self.column, '\n\t* '.join(self.expected))) + + super(UnexpectedToken, self).__init__(message) + +class VisitError(LarkError): + """VisitError is raised when visitors are interrupted by an exception + + It provides the following attributes for inspection: + - obj: the tree node or token it was processing when the exception was raised + - orig_exc: the exception that cause it to fail + """ + def __init__(self, rule, obj, orig_exc): + self.obj = obj + self.orig_exc = orig_exc + + message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc) + super(VisitError, self).__init__(message) + +def classify(seq, key=None, value=None): + d = {} + for item in seq: + k = key(item) if (key is not None) else item + v = value(item) if (value is not None) else item + if k in d: + d[k].append(v) + else: + d[k] = [v] + return d + + +def _deserialize(data, namespace, memo): + if isinstance(data, dict): + if '__type__' in data: # Object + class_ = namespace[data['__type__']] + return class_.deserialize(data, memo) + elif '@' in data: + return memo[data['@']] + return {key:_deserialize(value, namespace, memo) for key, value in data.items()} + elif isinstance(data, list): + return [_deserialize(value, namespace, memo) for value in data] + return data + + +class Serialize(object): + def memo_serialize(self, types_to_memoize): + memo = SerializeMemoizer(types_to_memoize) + return self.serialize(memo), memo.serialize() + + def serialize(self, memo=None): + if memo and memo.in_types(self): + return {'@': memo.memoized.get(self)} + + fields = getattr(self, '__serialize_fields__') + res = {f: _serialize(getattr(self, f), memo) for f in fields} + res['__type__'] = type(self).__name__ + postprocess = getattr(self, '_serialize', None) + if postprocess: + postprocess(res, memo) + return res + + @classmethod + def deserialize(cls, data, memo): + namespace = getattr(cls, '__serialize_namespace__', {}) + namespace = {c.__name__:c for c in namespace} + + fields = getattr(cls, '__serialize_fields__') + + if '@' in data: + return memo[data['@']] + + inst = cls.__new__(cls) + for f in fields: + try: + setattr(inst, f, _deserialize(data[f], namespace, memo)) + except KeyError as e: + raise KeyError("Cannot find key for class", cls, e) + postprocess = getattr(inst, '_deserialize', None) + if postprocess: + postprocess() + return inst + + +class SerializeMemoizer(Serialize): + __serialize_fields__ = 'memoized', + + def __init__(self, types_to_memoize): + self.types_to_memoize = tuple(types_to_memoize) + self.memoized = Enumerator() + + def in_types(self, value): + return isinstance(value, self.types_to_memoize) + + def serialize(self): + return _serialize(self.memoized.reversed(), None) + + @classmethod + def deserialize(cls, data, namespace, memo): + return _deserialize(data, namespace, memo) + + + +try: + STRING_TYPE = basestring +except NameError: # Python 3 + STRING_TYPE = str + + +import types +from functools import wraps, partial +from contextlib import contextmanager + +Str = type(u'') +try: + classtype = types.ClassType # Python2 +except AttributeError: + classtype = type # Python3 + +def smart_decorator(f, create_decorator): + if isinstance(f, types.FunctionType): + return wraps(f)(create_decorator(f, True)) + + elif isinstance(f, (classtype, type, types.BuiltinFunctionType)): + return wraps(f)(create_decorator(f, False)) + + elif isinstance(f, types.MethodType): + return wraps(f)(create_decorator(f.__func__, True)) + + elif isinstance(f, partial): + # wraps does not work for partials in 2.7: https://bugs.python.org/issue3445 + return wraps(f.func)(create_decorator(lambda *args, **kw: f(*args[1:], **kw), True)) + + else: + return create_decorator(f.__func__.__call__, True) + +import sys, re +Py36 = (sys.version_info[:2] >= (3, 6)) + +import sre_parse +import sre_constants +def get_regexp_width(regexp): + try: + return [int(x) for x in sre_parse.parse(regexp).getwidth()] + except sre_constants.error: + raise ValueError(regexp) + + +class Meta: + def __init__(self): + self.empty = True + +class Tree(object): + def __init__(self, data, children, meta=None): + self.data = data + self.children = children + self._meta = meta + + @property + def meta(self): + if self._meta is None: + self._meta = Meta() + return self._meta + + def __repr__(self): + return 'Tree(%s, %s)' % (self.data, self.children) + + def _pretty_label(self): + return self.data + + def _pretty(self, level, indent_str): + if len(self.children) == 1 and not isinstance(self.children[0], Tree): + return [ indent_str*level, self._pretty_label(), '\t', '%s' % (self.children[0],), '\n'] + + l = [ indent_str*level, self._pretty_label(), '\n' ] + for n in self.children: + if isinstance(n, Tree): + l += n._pretty(level+1, indent_str) + else: + l += [ indent_str*(level+1), '%s' % (n,), '\n' ] + + return l + + def pretty(self, indent_str=' '): + return ''.join(self._pretty(0, indent_str)) + + def __eq__(self, other): + try: + return self.data == other.data and self.children == other.children + except AttributeError: + return False + + def __ne__(self, other): + return not (self == other) + + def __hash__(self): + return hash((self.data, tuple(self.children))) + + def iter_subtrees(self): + # TODO: Re-write as a more efficient version + + visited = set() + q = [self] + + l = [] + while q: + subtree = q.pop() + l.append( subtree ) + if id(subtree) in visited: + continue # already been here from another branch + visited.add(id(subtree)) + q += [c for c in subtree.children if isinstance(c, Tree)] + + seen = set() + for x in reversed(l): + if id(x) not in seen: + yield x + seen.add(id(x)) + + def find_pred(self, pred): + "Find all nodes where pred(tree) == True" + return filter(pred, self.iter_subtrees()) + + def find_data(self, data): + "Find all nodes where tree.data == data" + return self.find_pred(lambda t: t.data == data) + + +from inspect import getmembers, getmro + +class Discard(Exception): + pass + +# Transformers + +class _Decoratable: + @classmethod + def _apply_decorator(cls, decorator, **kwargs): + mro = getmro(cls) + assert mro[0] is cls + libmembers = {name for _cls in mro[1:] for name, _ in getmembers(_cls)} + for name, value in getmembers(cls): + + # Make sure the function isn't inherited (unless it's overwritten) + if name.startswith('_') or (name in libmembers and name not in cls.__dict__): + continue + if not callable(cls.__dict__[name]): + continue + + # Skip if v_args already applied (at the function level) + if hasattr(cls.__dict__[name], 'vargs_applied'): + continue + + static = isinstance(cls.__dict__[name], (staticmethod, classmethod)) + setattr(cls, name, decorator(value, static=static, **kwargs)) + return cls + + def __class_getitem__(cls, _): + return cls + + +class Transformer(_Decoratable): + """Visits the tree recursively, starting with the leaves and finally the root (bottom-up) + + Calls its methods (provided by user via inheritance) according to tree.data + The returned value replaces the old one in the structure. + + Can be used to implement map or reduce. + """ + __visit_tokens__ = True # For backwards compatibility + + def __init__(self, visit_tokens=True): + self.__visit_tokens__ = visit_tokens + + def _call_userfunc(self, tree, new_children=None): + # Assumes tree is already transformed + children = new_children if new_children is not None else tree.children + try: + f = getattr(self, tree.data) + except AttributeError: + return self.__default__(tree.data, children, tree.meta) + else: + try: + wrapper = getattr(f, 'visit_wrapper', None) + if wrapper is not None: + return f.visit_wrapper(f, tree.data, children, tree.meta) + else: + return f(children) + except (GrammarError, Discard): + raise + except Exception as e: + raise VisitError(tree.data, tree, e) + + def _call_userfunc_token(self, token): + try: + f = getattr(self, token.type) + except AttributeError: + return self.__default_token__(token) + else: + try: + return f(token) + except (GrammarError, Discard): + raise + except Exception as e: + raise VisitError(token.type, token, e) + + + def _transform_children(self, children): + for c in children: + try: + if isinstance(c, Tree): + yield self._transform_tree(c) + elif self.__visit_tokens__ and isinstance(c, Token): + yield self._call_userfunc_token(c) + else: + yield c + except Discard: + pass + + def _transform_tree(self, tree): + children = list(self._transform_children(tree.children)) + return self._call_userfunc(tree, children) + + def transform(self, tree): + return self._transform_tree(tree) + + def __mul__(self, other): + return TransformerChain(self, other) + + def __default__(self, data, children, meta): + "Default operation on tree (for override)" + return Tree(data, children, meta) + + def __default_token__(self, token): + "Default operation on token (for override)" + return token + + + +class InlineTransformer(Transformer): # XXX Deprecated + def _call_userfunc(self, tree, new_children=None): + # Assumes tree is already transformed + children = new_children if new_children is not None else tree.children + try: + f = getattr(self, tree.data) + except AttributeError: + return self.__default__(tree.data, children, tree.meta) + else: + return f(*children) + + +class TransformerChain(object): + def __init__(self, *transformers): + self.transformers = transformers + + def transform(self, tree): + for t in self.transformers: + tree = t.transform(tree) + return tree + + def __mul__(self, other): + return TransformerChain(*self.transformers + (other,)) + + +class Transformer_InPlace(Transformer): + "Non-recursive. Changes the tree in-place instead of returning new instances" + def _transform_tree(self, tree): # Cancel recursion + return self._call_userfunc(tree) + + def transform(self, tree): + for subtree in tree.iter_subtrees(): + subtree.children = list(self._transform_children(subtree.children)) + + return self._transform_tree(tree) + + +class Transformer_InPlaceRecursive(Transformer): + "Recursive. Changes the tree in-place instead of returning new instances" + def _transform_tree(self, tree): + tree.children = list(self._transform_children(tree.children)) + return self._call_userfunc(tree) + + + +# Visitors + +class VisitorBase: + def _call_userfunc(self, tree): + return getattr(self, tree.data, self.__default__)(tree) + + def __default__(self, tree): + "Default operation on tree (for override)" + return tree + + def __class_getitem__(cls, _): + return cls + + +class Visitor(VisitorBase): + """Bottom-up visitor, non-recursive + + Visits the tree, starting with the leaves and finally the root (bottom-up) + Calls its methods (provided by user via inheritance) according to tree.data + """ + + def visit(self, tree): + for subtree in tree.iter_subtrees(): + self._call_userfunc(subtree) + return tree + + def visit_topdown(self,tree): + for subtree in tree.iter_subtrees_topdown(): + self._call_userfunc(subtree) + return tree + +class Visitor_Recursive(VisitorBase): + """Bottom-up visitor, recursive + + Visits the tree, starting with the leaves and finally the root (bottom-up) + Calls its methods (provided by user via inheritance) according to tree.data + """ + + def visit(self, tree): + for child in tree.children: + if isinstance(child, Tree): + self.visit(child) + + self._call_userfunc(tree) + return tree + + def visit_topdown(self,tree): + self._call_userfunc(tree) + + for child in tree.children: + if isinstance(child, Tree): + self.visit_topdown(child) + + return tree + + + +def visit_children_decor(func): + "See Interpreter" + @wraps(func) + def inner(cls, tree): + values = cls.visit_children(tree) + return func(cls, values) + return inner + + +class Interpreter(_Decoratable): + """Top-down visitor, recursive + + Visits the tree, starting with the root and finally the leaves (top-down) + Calls its methods (provided by user via inheritance) according to tree.data + + Unlike Transformer and Visitor, the Interpreter doesn't automatically visit its sub-branches. + The user has to explicitly call visit_children, or use the @visit_children_decor + """ + + def visit(self, tree): + f = getattr(self, tree.data) + wrapper = getattr(f, 'visit_wrapper', None) + if wrapper is not None: + return f.visit_wrapper(f, tree.data, tree.children, tree.meta) + else: + return f(tree) + + def visit_children(self, tree): + return [self.visit(child) if isinstance(child, Tree) else child + for child in tree.children] + + def __getattr__(self, name): + return self.__default__ + + def __default__(self, tree): + return self.visit_children(tree) + + + + +# Decorators + +def _apply_decorator(obj, decorator, **kwargs): + try: + _apply = obj._apply_decorator + except AttributeError: + return decorator(obj, **kwargs) + else: + return _apply(decorator, **kwargs) + + + +def _inline_args__func(func): + @wraps(func) + def create_decorator(_f, with_self): + if with_self: + def f(self, children): + return _f(self, *children) + else: + def f(self, children): + return _f(*children) + return f + + return smart_decorator(func, create_decorator) + + +def inline_args(obj): # XXX Deprecated + return _apply_decorator(obj, _inline_args__func) + + + +def _visitor_args_func_dec(func, visit_wrapper=None, static=False): + def create_decorator(_f, with_self): + if with_self: + def f(self, *args, **kwargs): + return _f(self, *args, **kwargs) + else: + def f(self, *args, **kwargs): + return _f(*args, **kwargs) + return f + + if static: + f = wraps(func)(create_decorator(func, False)) + else: + f = smart_decorator(func, create_decorator) + f.vargs_applied = True + f.visit_wrapper = visit_wrapper + return f + + +def _vargs_inline(f, data, children, meta): + return f(*children) +def _vargs_meta_inline(f, data, children, meta): + return f(meta, *children) +def _vargs_meta(f, data, children, meta): + return f(children, meta) # TODO swap these for consistency? Backwards incompatible! +def _vargs_tree(f, data, children, meta): + return f(Tree(data, children, meta)) + +def v_args(inline=False, meta=False, tree=False, wrapper=None): + "A convenience decorator factory, for modifying the behavior of user-supplied visitor methods" + if tree and (meta or inline): + raise ValueError("Visitor functions cannot combine 'tree' with 'meta' or 'inline'.") + + func = None + if meta: + if inline: + func = _vargs_meta_inline + else: + func = _vargs_meta + elif inline: + func = _vargs_inline + elif tree: + func = _vargs_tree + + if wrapper is not None: + if func is not None: + raise ValueError("Cannot use 'wrapper' along with 'tree', 'meta' or 'inline'.") + func = wrapper + + def _visitor_args_dec(obj): + return _apply_decorator(obj, _visitor_args_func_dec, visit_wrapper=func) + return _visitor_args_dec + + + +class Indenter: + def __init__(self): + self.paren_level = None + self.indent_level = None + assert self.tab_len > 0 + + def handle_NL(self, token): + if self.paren_level > 0: + return + + yield token + + indent_str = token.rsplit('\n', 1)[1] # Tabs and spaces + indent = indent_str.count(' ') + indent_str.count('\t') * self.tab_len + + if indent > self.indent_level[-1]: + self.indent_level.append(indent) + yield Token.new_borrow_pos(self.INDENT_type, indent_str, token) + else: + while indent < self.indent_level[-1]: + self.indent_level.pop() + yield Token.new_borrow_pos(self.DEDENT_type, indent_str, token) + + assert indent == self.indent_level[-1], '%s != %s' % (indent, self.indent_level[-1]) + + def _process(self, stream): + for token in stream: + if token.type == self.NL_type: + for t in self.handle_NL(token): + yield t + else: + yield token + + if token.type in self.OPEN_PAREN_types: + self.paren_level += 1 + elif token.type in self.CLOSE_PAREN_types: + self.paren_level -= 1 + assert self.paren_level >= 0 + + while len(self.indent_level) > 1: + self.indent_level.pop() + yield Token(self.DEDENT_type, '') + + assert self.indent_level == [0], self.indent_level + + def process(self, stream): + self.paren_level = 0 + self.indent_level = [0] + return self._process(stream) + + # XXX Hack for ContextualLexer. Maybe there's a more elegant solution? + @property + def always_accept(self): + return (self.NL_type,) + + + +class Symbol(Serialize): + __slots__ = ('name',) + + is_term = NotImplemented + + def __init__(self, name): + self.name = name + + def __eq__(self, other): + assert isinstance(other, Symbol), other + return self.is_term == other.is_term and self.name == other.name + + def __ne__(self, other): + return not (self == other) + + def __hash__(self): + return hash(self.name) + + def __repr__(self): + return '%s(%r)' % (type(self).__name__, self.name) + + fullrepr = property(__repr__) + + +class Terminal(Symbol): + __serialize_fields__ = 'name', 'filter_out' + + is_term = True + + def __init__(self, name, filter_out=False): + self.name = name + self.filter_out = filter_out + + @property + def fullrepr(self): + return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out) + + + +class NonTerminal(Symbol): + __serialize_fields__ = 'name', + + is_term = False + + + +class RuleOptions(Serialize): + __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'empty_indices' + + def __init__(self, keep_all_tokens=False, expand1=False, priority=None, empty_indices=()): + self.keep_all_tokens = keep_all_tokens + self.expand1 = expand1 + self.priority = priority + self.empty_indices = empty_indices + + def __repr__(self): + return 'RuleOptions(%r, %r, %r)' % ( + self.keep_all_tokens, + self.expand1, + self.priority, + ) + + +class Rule(Serialize): + """ + origin : a symbol + expansion : a list of symbols + order : index of this expansion amongst all rules of the same name + """ + __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash') + + __serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options' + __serialize_namespace__ = Terminal, NonTerminal, RuleOptions + + def __init__(self, origin, expansion, order=0, alias=None, options=None): + self.origin = origin + self.expansion = expansion + self.alias = alias + self.order = order + self.options = options or RuleOptions() + self._hash = hash((self.origin, tuple(self.expansion))) + + def _deserialize(self): + self._hash = hash((self.origin, tuple(self.expansion))) + + def __str__(self): + return '<%s : %s>' % (self.origin.name, ' '.join(x.name for x in self.expansion)) + + def __repr__(self): + return 'Rule(%r, %r, %r, %r)' % (self.origin, self.expansion, self.alias, self.options) + + def __hash__(self): + return self._hash + + def __eq__(self, other): + if not isinstance(other, Rule): + return False + return self.origin == other.origin and self.expansion == other.expansion + + + + + +class Pattern(Serialize): + + def __init__(self, value, flags=()): + self.value = value + self.flags = frozenset(flags) + + def __repr__(self): + return repr(self.to_regexp()) + + # Pattern Hashing assumes all subclasses have a different priority! + def __hash__(self): + return hash((type(self), self.value, self.flags)) + def __eq__(self, other): + return type(self) == type(other) and self.value == other.value and self.flags == other.flags + + def to_regexp(self): + raise NotImplementedError() + + if Py36: + # Python 3.6 changed syntax for flags in regular expression + def _get_flags(self, value): + for f in self.flags: + value = ('(?%s:%s)' % (f, value)) + return value + + else: + def _get_flags(self, value): + for f in self.flags: + value = ('(?%s)' % f) + value + return value + + +class PatternStr(Pattern): + __serialize_fields__ = 'value', 'flags' + + type = "str" + + def to_regexp(self): + return self._get_flags(re.escape(self.value)) + + @property + def min_width(self): + return len(self.value) + max_width = min_width + +class PatternRE(Pattern): + __serialize_fields__ = 'value', 'flags', '_width' + + type = "re" + + def to_regexp(self): + return self._get_flags(self.value) + + _width = None + def _get_width(self): + if self._width is None: + self._width = get_regexp_width(self.to_regexp()) + return self._width + + @property + def min_width(self): + return self._get_width()[0] + @property + def max_width(self): + return self._get_width()[1] + + +class TerminalDef(Serialize): + __serialize_fields__ = 'name', 'pattern', 'priority' + __serialize_namespace__ = PatternStr, PatternRE + + def __init__(self, name, pattern, priority=1): + assert isinstance(pattern, Pattern), pattern + self.name = name + self.pattern = pattern + self.priority = priority + + def __repr__(self): + return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern) + + + +class Token(Str): + __slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos') + + def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None, end_line=None, end_column=None, end_pos=None): + try: + self = super(Token, cls).__new__(cls, value) + except UnicodeDecodeError: + value = value.decode('latin1') + self = super(Token, cls).__new__(cls, value) + + self.type = type_ + self.pos_in_stream = pos_in_stream + self.value = value + self.line = line + self.column = column + self.end_line = end_line + self.end_column = end_column + self.end_pos = end_pos + return self + + def update(self, type_=None, value=None): + return Token.new_borrow_pos( + type_ if type_ is not None else self.type, + value if value is not None else self.value, + self + ) + + @classmethod + def new_borrow_pos(cls, type_, value, borrow_t): + return cls(type_, value, borrow_t.pos_in_stream, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos) + + def __reduce__(self): + return (self.__class__, (self.type, self.value, self.pos_in_stream, self.line, self.column, )) + + def __repr__(self): + return 'Token(%s, %r)' % (self.type, self.value) + + def __deepcopy__(self, memo): + return Token(self.type, self.value, self.pos_in_stream, self.line, self.column) + + def __eq__(self, other): + if isinstance(other, Token) and self.type != other.type: + return False + + return Str.__eq__(self, other) + + __hash__ = Str.__hash__ + + +class LineCounter: + def __init__(self): + self.newline_char = '\n' + self.char_pos = 0 + self.line = 1 + self.column = 1 + self.line_start_pos = 0 + + def feed(self, token, test_newline=True): + """Consume a token and calculate the new line & column. + + As an optional optimization, set test_newline=False is token doesn't contain a newline. + """ + if test_newline: + newlines = token.count(self.newline_char) + if newlines: + self.line += newlines + self.line_start_pos = self.char_pos + token.rindex(self.newline_char) + 1 + + self.char_pos += len(token) + self.column = self.char_pos - self.line_start_pos + 1 + +class _Lex: + "Built to serve both Lexer and ContextualLexer" + def __init__(self, lexer, state=None): + self.lexer = lexer + self.state = state + + def lex(self, stream, newline_types, ignore_types): + newline_types = frozenset(newline_types) + ignore_types = frozenset(ignore_types) + line_ctr = LineCounter() + last_token = None + + while line_ctr.char_pos < len(stream): + lexer = self.lexer + res = lexer.match(stream, line_ctr.char_pos) + if not res: + allowed = {v for m, tfi in lexer.mres for v in tfi.values()} - ignore_types + if not allowed: + allowed = {""} + raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, allowed=allowed, state=self.state, token_history=last_token and [last_token]) + + value, type_ = res + + if type_ not in ignore_types: + t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column) + line_ctr.feed(value, type_ in newline_types) + t.end_line = line_ctr.line + t.end_column = line_ctr.column + t.end_pos = line_ctr.char_pos + if t.type in lexer.callback: + t = lexer.callback[t.type](t) + if not isinstance(t, Token): + raise ValueError("Callbacks must return a token (returned %r)" % t) + yield t + last_token = t + else: + if type_ in lexer.callback: + t2 = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column) + lexer.callback[type_](t2) + line_ctr.feed(value, type_ in newline_types) + + + + +class UnlessCallback: + def __init__(self, mres): + self.mres = mres + + def __call__(self, t): + for mre, type_from_index in self.mres: + m = mre.match(t.value) + if m: + t.type = type_from_index[m.lastindex] + break + return t + +class CallChain: + def __init__(self, callback1, callback2, cond): + self.callback1 = callback1 + self.callback2 = callback2 + self.cond = cond + + def __call__(self, t): + t2 = self.callback1(t) + return self.callback2(t) if self.cond(t2) else t2 + + + + + +def _create_unless(terminals, g_regex_flags): + tokens_by_type = classify(terminals, lambda t: type(t.pattern)) + assert len(tokens_by_type) <= 2, tokens_by_type.keys() + embedded_strs = set() + callback = {} + for retok in tokens_by_type.get(PatternRE, []): + unless = [] # {} + for strtok in tokens_by_type.get(PatternStr, []): + if strtok.priority > retok.priority: + continue + s = strtok.pattern.value + m = re.match(retok.pattern.to_regexp(), s, g_regex_flags) + if m and m.group(0) == s: + unless.append(strtok) + if strtok.pattern.flags <= retok.pattern.flags: + embedded_strs.add(strtok) + if unless: + callback[retok.name] = UnlessCallback(build_mres(unless, g_regex_flags, match_whole=True)) + + terminals = [t for t in terminals if t not in embedded_strs] + return terminals, callback + + +def _build_mres(terminals, max_size, g_regex_flags, match_whole): + # Python sets an unreasonable group limit (currently 100) in its re module + # Worse, the only way to know we reached it is by catching an AssertionError! + # This function recursively tries less and less groups until it's successful. + postfix = '$' if match_whole else '' + mres = [] + while terminals: + try: + mre = re.compile(u'|'.join(u'(?P<%s>%s)'%(t.name, t.pattern.to_regexp()+postfix) for t in terminals[:max_size]), g_regex_flags) + except AssertionError: # Yes, this is what Python provides us.. :/ + return _build_mres(terminals, max_size//2, g_regex_flags, match_whole) + + # terms_from_name = {t.name: t for t in terminals[:max_size]} + mres.append((mre, {i:n for n,i in mre.groupindex.items()} )) + terminals = terminals[max_size:] + return mres + +def build_mres(terminals, g_regex_flags, match_whole=False): + return _build_mres(terminals, len(terminals), g_regex_flags, match_whole) + +def _regexp_has_newline(r): + r"""Expressions that may indicate newlines in a regexp: + - newlines (\n) + - escaped newline (\\n) + - anything but ([^...]) + - any-char (.) when the flag (?s) exists + - spaces (\s) + """ + return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r) + +class Lexer(object): + """Lexer interface + + Method Signatures: + lex(self, stream) -> Iterator[Token] + """ + lex = NotImplemented + + +class TraditionalLexer(Lexer): + + def __init__(self, terminals, ignore=(), user_callbacks={}, g_regex_flags=0): + assert all(isinstance(t, TerminalDef) for t in terminals), terminals + + terminals = list(terminals) + + # Sanitization + for t in terminals: + try: + re.compile(t.pattern.to_regexp(), g_regex_flags) + except re.error: + raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern)) + + if t.pattern.min_width == 0: + raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern)) + + assert set(ignore) <= {t.name for t in terminals} + + # Init + self.newline_types = [t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())] + self.ignore_types = list(ignore) + + terminals.sort(key=lambda x:(-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name)) + self.terminals = terminals + self.user_callbacks = user_callbacks + self.build(g_regex_flags) + + def build(self, g_regex_flags=0): + terminals, self.callback = _create_unless(self.terminals, g_regex_flags) + assert all(self.callback.values()) + + for type_, f in self.user_callbacks.items(): + if type_ in self.callback: + # Already a callback there, probably UnlessCallback + self.callback[type_] = CallChain(self.callback[type_], f, lambda t: t.type == type_) + else: + self.callback[type_] = f + + self.mres = build_mres(terminals, g_regex_flags) + + def match(self, stream, pos): + for mre, type_from_index in self.mres: + m = mre.match(stream, pos) + if m: + return m.group(0), type_from_index[m.lastindex] + + def lex(self, stream): + return _Lex(self).lex(stream, self.newline_types, self.ignore_types) + + + + +class ContextualLexer(Lexer): + + def __init__(self, terminals, states, ignore=(), always_accept=(), user_callbacks={}, g_regex_flags=0): + tokens_by_name = {} + for t in terminals: + assert t.name not in tokens_by_name, t + tokens_by_name[t.name] = t + + lexer_by_tokens = {} + self.lexers = {} + for state, accepts in states.items(): + key = frozenset(accepts) + try: + lexer = lexer_by_tokens[key] + except KeyError: + accepts = set(accepts) | set(ignore) | set(always_accept) + state_tokens = [tokens_by_name[n] for n in accepts if n and n in tokens_by_name] + lexer = TraditionalLexer(state_tokens, ignore=ignore, user_callbacks=user_callbacks, g_regex_flags=g_regex_flags) + lexer_by_tokens[key] = lexer + + self.lexers[state] = lexer + + self.root_lexer = TraditionalLexer(terminals, ignore=ignore, user_callbacks=user_callbacks, g_regex_flags=g_regex_flags) + + def lex(self, stream, get_parser_state): + parser_state = get_parser_state() + l = _Lex(self.lexers[parser_state], parser_state) + try: + for x in l.lex(stream, self.root_lexer.newline_types, self.root_lexer.ignore_types): + yield x + parser_state = get_parser_state() + l.lexer = self.lexers[parser_state] + l.state = parser_state # For debug only, no need to worry about multithreading + except UnexpectedCharacters as e: + # In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined, + # but not in the current context. + # This tests the input against the global context, to provide a nicer error. + root_match = self.root_lexer.match(stream, e.pos_in_stream) + if not root_match: + raise + + value, type_ = root_match + t = Token(type_, value, e.pos_in_stream, e.line, e.column) + raise UnexpectedToken(t, e.allowed, state=e.state) + + + +class LexerConf(Serialize): + __serialize_fields__ = 'tokens', 'ignore', 'g_regex_flags' + __serialize_namespace__ = TerminalDef, + + def __init__(self, tokens, ignore=(), postlex=None, callbacks=None, g_regex_flags=0): + self.tokens = tokens + self.ignore = ignore + self.postlex = postlex + self.callbacks = callbacks or {} + self.g_regex_flags = g_regex_flags + + def _deserialize(self): + self.callbacks = {} # TODO + + +from functools import partial, wraps +from itertools import repeat, product + + +class ExpandSingleChild: + def __init__(self, node_builder): + self.node_builder = node_builder + + def __call__(self, children): + if len(children) == 1: + return children[0] + else: + return self.node_builder(children) + +class PropagatePositions: + def __init__(self, node_builder): + self.node_builder = node_builder + + def __call__(self, children): + res = self.node_builder(children) + + if isinstance(res, Tree): + for c in children: + if isinstance(c, Tree) and not c.meta.empty: + res.meta.line = c.meta.line + res.meta.column = c.meta.column + res.meta.start_pos = c.meta.start_pos + res.meta.empty = False + break + elif isinstance(c, Token): + res.meta.line = c.line + res.meta.column = c.column + res.meta.start_pos = c.pos_in_stream + res.meta.empty = False + break + + for c in reversed(children): + if isinstance(c, Tree) and not c.meta.empty: + res.meta.end_line = c.meta.end_line + res.meta.end_column = c.meta.end_column + res.meta.end_pos = c.meta.end_pos + res.meta.empty = False + break + elif isinstance(c, Token): + res.meta.end_line = c.end_line + res.meta.end_column = c.end_column + res.meta.end_pos = c.end_pos + res.meta.empty = False + break + + return res + + +class ChildFilter: + def __init__(self, to_include, append_none, node_builder): + self.node_builder = node_builder + self.to_include = to_include + self.append_none = append_none + + def __call__(self, children): + filtered = [] + + for i, to_expand, add_none in self.to_include: + if add_none: + filtered += [None] * add_none + if to_expand: + filtered += children[i].children + else: + filtered.append(children[i]) + + if self.append_none: + filtered += [None] * self.append_none + + return self.node_builder(filtered) + +class ChildFilterLALR(ChildFilter): + "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)" + + def __call__(self, children): + filtered = [] + for i, to_expand, add_none in self.to_include: + if add_none: + filtered += [None] * add_none + if to_expand: + if filtered: + filtered += children[i].children + else: # Optimize for left-recursion + filtered = children[i].children + else: + filtered.append(children[i]) + + if self.append_none: + filtered += [None] * self.append_none + + return self.node_builder(filtered) + +class ChildFilterLALR_NoPlaceholders(ChildFilter): + "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)" + def __init__(self, to_include, node_builder): + self.node_builder = node_builder + self.to_include = to_include + + def __call__(self, children): + filtered = [] + for i, to_expand in self.to_include: + if to_expand: + if filtered: + filtered += children[i].children + else: # Optimize for left-recursion + filtered = children[i].children + else: + filtered.append(children[i]) + return self.node_builder(filtered) + +def _should_expand(sym): + return not sym.is_term and sym.name.startswith('_') + +def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices): + # Prepare empty_indices as: How many Nones to insert at each index? + if _empty_indices: + assert _empty_indices.count(False) == len(expansion) + s = ''.join(str(int(b)) for b in _empty_indices) + empty_indices = [len(ones) for ones in s.split('0')] + assert len(empty_indices) == len(expansion)+1, (empty_indices, len(expansion)) + else: + empty_indices = [0] * (len(expansion)+1) + + to_include = [] + nones_to_add = 0 + for i, sym in enumerate(expansion): + nones_to_add += empty_indices[i] + if keep_all_tokens or not (sym.is_term and sym.filter_out): + to_include.append((i, _should_expand(sym), nones_to_add)) + nones_to_add = 0 + + nones_to_add += empty_indices[len(expansion)] + + if _empty_indices or len(to_include) < len(expansion) or any(to_expand for i, to_expand,_ in to_include): + if _empty_indices or ambiguous: + return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include, nones_to_add) + else: + # LALR without placeholders + return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include]) + +class AmbiguousExpander: + """Deal with the case where we're expanding children ('_rule') into a parent but the children + are ambiguous. i.e. (parent->_ambig->_expand_this_rule). In this case, make the parent itself + ambiguous with as many copies as their are ambiguous children, and then copy the ambiguous children + into the right parents in the right places, essentially shifting the ambiguiuty up the tree.""" + def __init__(self, to_expand, tree_class, node_builder): + self.node_builder = node_builder + self.tree_class = tree_class + self.to_expand = to_expand + + def __call__(self, children): + def _is_ambig_tree(child): + return hasattr(child, 'data') and child.data == '_ambig' + + #### When we're repeatedly expanding ambiguities we can end up with nested ambiguities. + # All children of an _ambig node should be a derivation of that ambig node, hence + # it is safe to assume that if we see an _ambig node nested within an ambig node + # it is safe to simply expand it into the parent _ambig node as an alternative derivation. + ambiguous = [] + for i, child in enumerate(children): + if _is_ambig_tree(child): + if i in self.to_expand: + ambiguous.append(i) + + to_expand = [j for j, grandchild in enumerate(child.children) if _is_ambig_tree(grandchild)] + child.expand_kids_by_index(*to_expand) + + if not ambiguous: + return self.node_builder(children) + + expand = [ iter(child.children) if i in ambiguous else repeat(child) for i, child in enumerate(children) ] + return self.tree_class('_ambig', [self.node_builder(list(f[0])) for f in product(zip(*expand))]) + +def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens): + to_expand = [i for i, sym in enumerate(expansion) + if keep_all_tokens or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))] + if to_expand: + return partial(AmbiguousExpander, to_expand, tree_class) + +def ptb_inline_args(func): + @wraps(func) + def f(children): + return func(*children) + return f + +def inplace_transformer(func): + @wraps(func) + def f(children): + # function name in a Transformer is a rule name. + tree = Tree(func.__name__, children) + return func(tree) + return f + +def apply_visit_wrapper(func, name, wrapper): + if wrapper is _vargs_meta or wrapper is _vargs_meta_inline: + raise NotImplementedError("Meta args not supported for internal transformer") + @wraps(func) + def f(children): + return wrapper(func, name, children, None) + return f + + +class ParseTreeBuilder: + def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False, ambiguous=False, maybe_placeholders=False): + self.tree_class = tree_class + self.propagate_positions = propagate_positions + self.always_keep_all_tokens = keep_all_tokens + self.ambiguous = ambiguous + self.maybe_placeholders = maybe_placeholders + + self.rule_builders = list(self._init_builders(rules)) + + def _init_builders(self, rules): + for rule in rules: + options = rule.options + keep_all_tokens = self.always_keep_all_tokens or options.keep_all_tokens + expand_single_child = options.expand1 + + wrapper_chain = list(filter(None, [ + (expand_single_child and not rule.alias) and ExpandSingleChild, + maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None), + self.propagate_positions and PropagatePositions, + self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens), + ])) + + yield rule, wrapper_chain + + + def create_callback(self, transformer=None): + callbacks = {} + + for rule, wrapper_chain in self.rule_builders: + + user_callback_name = rule.alias or rule.origin.name + try: + f = getattr(transformer, user_callback_name) + # XXX InlineTransformer is deprecated! + wrapper = getattr(f, 'visit_wrapper', None) + if wrapper is not None: + f = apply_visit_wrapper(f, user_callback_name, wrapper) + else: + if isinstance(transformer, InlineTransformer): + f = ptb_inline_args(f) + elif isinstance(transformer, Transformer_InPlace): + f = inplace_transformer(f) + except AttributeError: + f = partial(self.tree_class, user_callback_name) + + for w in wrapper_chain: + f = w(f) + + if rule in callbacks: + raise GrammarError("Rule '%s' already exists" % (rule,)) + + callbacks[rule] = f + + return callbacks + + +class LALR_Parser(object): + def __init__(self, parser_conf, debug=False): + assert all(r.options.priority is None for r in parser_conf.rules), "LALR doesn't yet support prioritization" + analysis = LALR_Analyzer(parser_conf, debug=debug) + analysis.compute_lalr() + callbacks = parser_conf.callbacks + + self._parse_table = analysis.parse_table + self.parser_conf = parser_conf + self.parser = _Parser(analysis.parse_table, callbacks) + + @classmethod + def deserialize(cls, data, memo, callbacks): + inst = cls.__new__(cls) + inst._parse_table = IntParseTable.deserialize(data, memo) + inst.parser = _Parser(inst._parse_table, callbacks) + return inst + + def serialize(self, memo): + return self._parse_table.serialize(memo) + + def parse(self, *args): + return self.parser.parse(*args) + + +class _Parser: + def __init__(self, parse_table, callbacks): + self.states = parse_table.states + self.start_states = parse_table.start_states + self.end_states = parse_table.end_states + self.callbacks = callbacks + + def parse(self, seq, start, set_state=None): + token = None + stream = iter(seq) + states = self.states + + start_state = self.start_states[start] + end_state = self.end_states[start] + + state_stack = [start_state] + value_stack = [] + + if set_state: set_state(start_state) + + def get_action(token): + state = state_stack[-1] + try: + return states[state][token.type] + except KeyError: + expected = [s for s in states[state].keys() if s.isupper()] + raise UnexpectedToken(token, expected, state=state) + + def reduce(rule): + size = len(rule.expansion) + if size: + s = value_stack[-size:] + del state_stack[-size:] + del value_stack[-size:] + else: + s = [] + + value = self.callbacks[rule](s) + + _action, new_state = states[state_stack[-1]][rule.origin.name] + assert _action is Shift + state_stack.append(new_state) + value_stack.append(value) + + # Main LALR-parser loop + for token in stream: + while True: + action, arg = get_action(token) + assert arg != end_state + + if action is Shift: + state_stack.append(arg) + value_stack.append(token) + if set_state: set_state(arg) + break # next token + else: + reduce(arg) + + token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1) + while True: + _action, arg = get_action(token) + assert(_action is Reduce) + reduce(arg) + if state_stack[-1] == end_state: + return value_stack[-1] + + + +class Action: + def __init__(self, name): + self.name = name + def __str__(self): + return self.name + def __repr__(self): + return str(self) + +Shift = Action('Shift') +Reduce = Action('Reduce') + + +class ParseTable: + def __init__(self, states, start_states, end_states): + self.states = states + self.start_states = start_states + self.end_states = end_states + + def serialize(self, memo): + tokens = Enumerator() + rules = Enumerator() + + states = { + state: {tokens.get(token): ((1, arg.serialize(memo)) if action is Reduce else (0, arg)) + for token, (action, arg) in actions.items()} + for state, actions in self.states.items() + } + + return { + 'tokens': tokens.reversed(), + 'states': states, + 'start_states': self.start_states, + 'end_states': self.end_states, + } + + @classmethod + def deserialize(cls, data, memo): + tokens = data['tokens'] + states = { + state: {tokens[token]: ((Reduce, Rule.deserialize(arg, memo)) if action==1 else (Shift, arg)) + for token, (action, arg) in actions.items()} + for state, actions in data['states'].items() + } + return cls(states, data['start_states'], data['end_states']) + + +class IntParseTable(ParseTable): + + @classmethod + def from_ParseTable(cls, parse_table): + enum = list(parse_table.states) + state_to_idx = {s:i for i,s in enumerate(enum)} + int_states = {} + + for s, la in parse_table.states.items(): + la = {k:(v[0], state_to_idx[v[1]]) if v[0] is Shift else v + for k,v in la.items()} + int_states[ state_to_idx[s] ] = la + + + start_states = {start:state_to_idx[s] for start, s in parse_table.start_states.items()} + end_states = {start:state_to_idx[s] for start, s in parse_table.end_states.items()} + return cls(int_states, start_states, end_states) + + + +def get_frontend(parser, lexer): + if parser=='lalr': + if lexer is None: + raise ValueError('The LALR parser requires use of a lexer') + elif lexer == 'standard': + return LALR_TraditionalLexer + elif lexer == 'contextual': + return LALR_ContextualLexer + elif issubclass(lexer, Lexer): + return partial(LALR_CustomLexer, lexer) + else: + raise ValueError('Unknown lexer: %s' % lexer) + elif parser=='earley': + if lexer=='standard': + return Earley + elif lexer=='dynamic': + return XEarley + elif lexer=='dynamic_complete': + return XEarley_CompleteLex + elif lexer=='contextual': + raise ValueError('The Earley parser does not support the contextual parser') + else: + raise ValueError('Unknown lexer: %s' % lexer) + elif parser == 'cyk': + if lexer == 'standard': + return CYK + else: + raise ValueError('CYK parser requires using standard parser.') + else: + raise ValueError('Unknown parser: %s' % parser) + + +class _ParserFrontend(Serialize): + def _parse(self, input, start, *args): + if start is None: + start = self.start + if len(start) > 1: + raise ValueError("Lark initialized with more than 1 possible start rule. Must specify which start rule to parse", start) + start ,= start + return self.parser.parse(input, start, *args) + + +class WithLexer(_ParserFrontend): + lexer = None + parser = None + lexer_conf = None + start = None + + __serialize_fields__ = 'parser', 'lexer_conf', 'start' + __serialize_namespace__ = LexerConf, + + def __init__(self, lexer_conf, parser_conf, options=None): + self.lexer_conf = lexer_conf + self.start = parser_conf.start + self.postlex = lexer_conf.postlex + + @classmethod + def deserialize(cls, data, memo, callbacks, postlex): + inst = super(WithLexer, cls).deserialize(data, memo) + inst.postlex = postlex + inst.parser = LALR_Parser.deserialize(inst.parser, memo, callbacks) + inst.init_lexer() + return inst + + def _serialize(self, data, memo): + data['parser'] = data['parser'].serialize(memo) + + def lex(self, *args): + stream = self.lexer.lex(*args) + return self.postlex.process(stream) if self.postlex else stream + + def parse(self, text, start=None): + token_stream = self.lex(text) + return self._parse(token_stream, start) + + def init_traditional_lexer(self): + self.lexer = TraditionalLexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore, user_callbacks=self.lexer_conf.callbacks, g_regex_flags=self.lexer_conf.g_regex_flags) + +class LALR_WithLexer(WithLexer): + def __init__(self, lexer_conf, parser_conf, options=None): + debug = options.debug if options else False + self.parser = LALR_Parser(parser_conf, debug=debug) + WithLexer.__init__(self, lexer_conf, parser_conf, options) + + self.init_lexer() + + def init_lexer(self): + raise NotImplementedError() + +class LALR_TraditionalLexer(LALR_WithLexer): + def init_lexer(self): + self.init_traditional_lexer() + +class LALR_ContextualLexer(LALR_WithLexer): + def init_lexer(self): + states = {idx:list(t.keys()) for idx, t in self.parser._parse_table.states.items()} + always_accept = self.postlex.always_accept if self.postlex else () + self.lexer = ContextualLexer(self.lexer_conf.tokens, states, + ignore=self.lexer_conf.ignore, + always_accept=always_accept, + user_callbacks=self.lexer_conf.callbacks, + g_regex_flags=self.lexer_conf.g_regex_flags) + + + def parse(self, text, start=None): + parser_state = [None] + def set_parser_state(s): + parser_state[0] = s + + token_stream = self.lex(text, lambda: parser_state[0]) + return self._parse(token_stream, start, set_parser_state) + + +class LarkOptions(Serialize): + """Specifies the options for Lark + + """ + OPTIONS_DOC = """ +# General + + start - The start symbol. Either a string, or a list of strings for + multiple possible starts (Default: "start") + debug - Display debug information, such as warnings (default: False) + transformer - Applies the transformer to every parse tree (equivlent to + applying it after the parse, but faster) + propagate_positions - Propagates (line, column, end_line, end_column) + attributes into all tree branches. + maybe_placeholders - When True, the `[]` operator returns `None` when not matched. + When `False`, `[]` behaves like the `?` operator, + and returns no value at all. + (default=`False`. Recommended to set to `True`) + cache_grammar - Cache the Lark grammar (Default: False) + g_regex_flags - Flags that are applied to all terminals + (both regex and strings) + keep_all_tokens - Prevent the tree builder from automagically + removing "punctuation" tokens (default: False) + +# Algorithm + + parser - Decides which parser engine to use + Accepts "earley" or "lalr". (Default: "earley") + (there is also a "cyk" option for legacy) + + lexer - Decides whether or not to use a lexer stage + "auto" (default): Choose for me based on the parser + "standard": Use a standard lexer + "contextual": Stronger lexer (only works with parser="lalr") + "dynamic": Flexible and powerful (only with parser="earley") + "dynamic_complete": Same as dynamic, but tries *every* variation + of tokenizing possible. + + ambiguity - Decides how to handle ambiguity in the parse. + Only relevant if parser="earley" + "resolve": The parser will automatically choose the simplest + derivation (it chooses consistently: greedy for + tokens, non-greedy for rules) + "explicit": The parser will return all derivations wrapped + in "_ambig" tree nodes (i.e. a forest). + +# Domain Specific + + postlex - Lexer post-processing (Default: None) Only works with the + standard and contextual lexers. + priority - How priorities should be evaluated - auto, none, normal, + invert (Default: auto) + lexer_callbacks - Dictionary of callbacks for the lexer. May alter + tokens during lexing. Use with caution. + edit_terminals - A callback + """ + if __doc__: + __doc__ += OPTIONS_DOC + + _defaults = { + 'debug': False, + 'keep_all_tokens': False, + 'tree_class': None, + 'cache_grammar': False, + 'postlex': None, + 'parser': 'earley', + 'lexer': 'auto', + 'transformer': None, + 'start': 'start', + 'priority': 'auto', + 'ambiguity': 'auto', + 'propagate_positions': False, + 'lexer_callbacks': {}, + 'maybe_placeholders': False, + 'edit_terminals': None, + 'g_regex_flags': 0, + } + + def __init__(self, options_dict): + o = dict(options_dict) + + options = {} + for name, default in self._defaults.items(): + if name in o: + value = o.pop(name) + if isinstance(default, bool): + value = bool(value) + else: + value = default + + options[name] = value + + if isinstance(options['start'], STRING_TYPE): + options['start'] = [options['start']] + + self.__dict__['options'] = options + + assert self.parser in ('earley', 'lalr', 'cyk', None) + + if self.parser == 'earley' and self.transformer: + raise ValueError('Cannot specify an embedded transformer when using the Earley algorithm.' + 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)') + + if o: + raise ValueError("Unknown options: %s" % o.keys()) + + def __getattr__(self, name): + try: + return self.options[name] + except KeyError as e: + raise AttributeError(e) + + def __setattr__(self, name, value): + assert name in self.options + self.options[name] = value + + def serialize(self, memo): + return self.options + + @classmethod + def deserialize(cls, data, memo): + return cls(data) + + +class Lark(Serialize): + def __init__(self, grammar, **options): + """ + grammar : a string or file-object containing the grammar spec (using Lark's ebnf syntax) + options : a dictionary controlling various aspects of Lark. + """ + self.options = LarkOptions(options) + + # Some, but not all file-like objects have a 'name' attribute + try: + self.source = grammar.name + except AttributeError: + self.source = '' + + # Drain file-like objects to get their contents + try: + read = grammar.read + except AttributeError: + pass + else: + grammar = read() + + assert isinstance(grammar, STRING_TYPE) + + if self.options.cache_grammar: + raise NotImplementedError("Not available yet") + + if self.options.lexer == 'auto': + if self.options.parser == 'lalr': + self.options.lexer = 'contextual' + elif self.options.parser == 'earley': + self.options.lexer = 'dynamic' + elif self.options.parser == 'cyk': + self.options.lexer = 'standard' + else: + assert False, self.options.parser + lexer = self.options.lexer + assert lexer in ('standard', 'contextual', 'dynamic', 'dynamic_complete') or issubclass(lexer, Lexer) + + if self.options.ambiguity == 'auto': + if self.options.parser == 'earley': + self.options.ambiguity = 'resolve' + else: + disambig_parsers = ['earley', 'cyk'] + assert self.options.parser in disambig_parsers, ( + 'Only %s supports disambiguation right now') % ', '.join(disambig_parsers) + + if self.options.priority == 'auto': + if self.options.parser in ('earley', 'cyk', ): + self.options.priority = 'normal' + elif self.options.parser in ('lalr', ): + self.options.priority = None + elif self.options.priority in ('invert', 'normal'): + assert self.options.parser in ('earley', 'cyk'), "priorities are not supported for LALR at this time" + + assert self.options.priority in ('auto', None, 'normal', 'invert'), 'invalid priority option specified: {}. options are auto, none, normal, invert.'.format(self.options.priority) + assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"' + assert self.options.ambiguity in ('resolve', 'explicit', 'auto', ) + + # Parse the grammar file and compose the grammars (TODO) + self.grammar = load_grammar(grammar, self.source) + + # Compile the EBNF grammar into BNF + self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start) + + if self.options.edit_terminals: + for t in self.terminals: + self.options.edit_terminals(t) + + self._terminals_dict = {t.name:t for t in self.terminals} + + # If the user asked to invert the priorities, negate them all here. + # This replaces the old 'resolve__antiscore_sum' option. + if self.options.priority == 'invert': + for rule in self.rules: + if rule.options.priority is not None: + rule.options.priority = -rule.options.priority + # Else, if the user asked to disable priorities, strip them from the + # rules. This allows the Earley parsers to skip an extra forest walk + # for improved performance, if you don't need them (or didn't specify any). + elif self.options.priority == None: + for rule in self.rules: + if rule.options.priority is not None: + rule.options.priority = None + + # TODO Deprecate lexer_callbacks? + lexer_callbacks = dict(self.options.lexer_callbacks) + if self.options.transformer: + t = self.options.transformer + for term in self.terminals: + if hasattr(t, term.name): + lexer_callbacks[term.name] = getattr(t, term.name) + + self.lexer_conf = LexerConf(self.terminals, self.ignore_tokens, self.options.postlex, lexer_callbacks, self.options.g_regex_flags) + + if self.options.parser: + self.parser = self._build_parser() + elif lexer: + self.lexer = self._build_lexer() + + if __init__.__doc__: + __init__.__doc__ += "\nOptions:\n" + LarkOptions.OPTIONS_DOC + + __serialize_fields__ = 'parser', 'rules', 'options' + + def _build_lexer(self): + return TraditionalLexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore, user_callbacks=self.lexer_conf.callbacks, g_regex_flags=self.lexer_conf.g_regex_flags) + + def _prepare_callbacks(self): + self.parser_class = get_frontend(self.options.parser, self.options.lexer) + self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class or Tree, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr' and self.options.ambiguity=='explicit', self.options.maybe_placeholders) + self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer) + + def _build_parser(self): + self._prepare_callbacks() + parser_conf = ParserConf(self.rules, self._callbacks, self.options.start) + return self.parser_class(self.lexer_conf, parser_conf, options=self.options) + + @classmethod + def deserialize(cls, data, namespace, memo, transformer=None, postlex=None): + if memo: + memo = SerializeMemoizer.deserialize(memo, namespace, {}) + inst = cls.__new__(cls) + options = dict(data['options']) + if transformer is not None: + options['transformer'] = transformer + if postlex is not None: + options['postlex'] = postlex + inst.options = LarkOptions.deserialize(options, memo) + inst.rules = [Rule.deserialize(r, memo) for r in data['rules']] + inst.source = '' + inst._prepare_callbacks() + inst.parser = inst.parser_class.deserialize(data['parser'], memo, inst._callbacks, inst.options.postlex) + return inst + + def save(self, f): + data, m = self.memo_serialize([TerminalDef, Rule]) + pickle.dump({'data': data, 'memo': m}, f) + + @classmethod + def load(cls, f): + d = pickle.load(f) + namespace = {'Rule': Rule, 'TerminalDef': TerminalDef} + memo = d['memo'] + return Lark.deserialize(d['data'], namespace, memo) + + + @classmethod + def open(cls, grammar_filename, rel_to=None, **options): + """Create an instance of Lark with the grammar given by its filename + + If rel_to is provided, the function will find the grammar filename in relation to it. + + Example: + + >>> Lark.open("grammar_file.lark", rel_to=__file__, parser="lalr") + Lark(...) + + """ + if rel_to: + basepath = os.path.dirname(rel_to) + grammar_filename = os.path.join(basepath, grammar_filename) + with open(grammar_filename, encoding='utf8') as f: + return cls(f, **options) + + def __repr__(self): + return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source, self.options.parser, self.options.lexer) + + + def lex(self, text): + "Only lex (and postlex) the text, without parsing it. Only relevant when lexer='standard'" + if not hasattr(self, 'lexer'): + self.lexer = self._build_lexer() + stream = self.lexer.lex(text) + if self.options.postlex: + return self.options.postlex.process(stream) + return stream + + def get_terminal(self, name): + "Get information about a terminal" + return self._terminals_dict[name] + + def parse(self, text, start=None): + """Parse the given text, according to the options provided. + + The 'start' parameter is required if Lark was given multiple possible start symbols (using the start option). + + Returns a tree, unless specified otherwise. + """ + return self.parser.parse(text, start=start) + + +DATA = ( +{'parser': {'parser': {'tokens': {0: 'LSQB', 1: 'NAME', 2: 'PLUS', 3: 'ITEMS', 4: '__series_star_0', 5: 'recursed_element', 6: 'items', 7: 'series', 8: 'quiet_element', 9: 'metadata', 10: 'grouped', 11: 'last_element', 12: 'element', 13: 'notified_element', 14: 'trait', 15: 'COMMA', 16: 'RSQB', 17: '$END', 18: 'STAR', 19: 'COLON', 20: 'DOT', 21: '_dot', 22: '_colon', 23: 'start', 24: 'parallel', 25: 'expr', 26: '__parallel_star_1', 27: '_asterisk'}, 'states': {0: {0: (1, {'@': 10}), 1: (1, {'@': 10}), 2: (1, {'@': 10}), 3: (1, {'@': 10})}, 1: {0: (1, {'@': 11}), 1: (1, {'@': 11}), 2: (1, {'@': 11}), 3: (1, {'@': 11})}, 2: {4: (0, 10), 5: (0, 12), 6: (0, 3), 7: (0, 26), 8: (0, 15), 9: (0, 8), 10: (0, 17), 1: (0, 23), 2: (0, 24), 11: (0, 16), 12: (0, 33), 13: (0, 20), 14: (0, 25), 0: (0, 35), 3: (0, 11)}, 3: {15: (1, {'@': 12}), 16: (1, {'@': 12}), 17: (1, {'@': 12}), 18: (1, {'@': 12}), 19: (1, {'@': 12}), 20: (1, {'@': 12})}, 4: {}, 5: {15: (1, {'@': 13}), 16: (1, {'@': 13}), 20: (1, {'@': 13}), 17: (1, {'@': 13}), 19: (1, {'@': 13})}, 6: {16: (1, {'@': 14}), 17: (1, {'@': 14}), 15: (1, {'@': 14})}, 7: {15: (1, {'@': 15}), 16: (1, {'@': 15}), 17: (1, {'@': 15}), 18: (1, {'@': 15}), 19: (1, {'@': 15}), 20: (1, {'@': 15})}, 8: {15: (1, {'@': 16}), 16: (1, {'@': 16}), 17: (1, {'@': 16}), 18: (1, {'@': 16}), 19: (1, {'@': 16}), 20: (1, {'@': 16})}, 9: {16: (0, 30)}, 10: {5: (0, 12), 6: (0, 3), 9: (0, 8), 10: (0, 17), 1: (0, 23), 2: (0, 24), 11: (0, 6), 13: (0, 18), 12: (0, 33), 8: (0, 21), 14: (0, 25), 0: (0, 35), 3: (0, 11)}, 11: {15: (1, {'@': 17}), 16: (1, {'@': 17}), 17: (1, {'@': 17}), 18: (1, {'@': 17}), 19: (1, {'@': 17}), 20: (1, {'@': 17})}, 12: {21: (0, 36), 20: (0, 34), 22: (0, 0), 19: (0, 37), 16: (1, {'@': 18}), 17: (1, {'@': 18}), 15: (1, {'@': 18})}, 13: {17: (1, {'@': 19})}, 14: {15: (1, {'@': 20}), 16: (1, {'@': 20}), 17: (1, {'@': 20}), 19: (1, {'@': 20}), 20: (1, {'@': 20})}, 15: {0: (1, {'@': 21}), 1: (1, {'@': 21}), 2: (1, {'@': 21}), 3: (1, {'@': 21})}, 16: {16: (1, {'@': 22}), 17: (1, {'@': 22}), 15: (1, {'@': 22})}, 17: {15: (1, {'@': 23}), 16: (1, {'@': 23}), 17: (1, {'@': 23}), 18: (1, {'@': 23}), 19: (1, {'@': 23}), 20: (1, {'@': 23})}, 18: {0: (1, {'@': 24}), 1: (1, {'@': 24}), 2: (1, {'@': 24}), 3: (1, {'@': 24})}, 19: {0: (1, {'@': 25}), 1: (1, {'@': 25}), 2: (1, {'@': 25}), 3: (1, {'@': 25})}, 20: {0: (1, {'@': 26}), 1: (1, {'@': 26}), 2: (1, {'@': 26}), 3: (1, {'@': 26})}, 21: {0: (1, {'@': 27}), 1: (1, {'@': 27}), 2: (1, {'@': 27}), 3: (1, {'@': 27})}, 22: {4: (0, 10), 5: (0, 12), 6: (0, 3), 8: (0, 15), 23: (0, 4), 9: (0, 8), 10: (0, 17), 1: (0, 23), 2: (0, 24), 3: (0, 11), 24: (0, 27), 11: (0, 16), 7: (0, 29), 12: (0, 33), 0: (0, 35), 13: (0, 20), 14: (0, 25), 25: (0, 13)}, 23: {15: (1, {'@': 28}), 16: (1, {'@': 28}), 17: (1, {'@': 28}), 18: (1, {'@': 28}), 19: (1, {'@': 28}), 20: (1, {'@': 28})}, 24: {1: (0, 7)}, 25: {15: (1, {'@': 29}), 16: (1, {'@': 29}), 17: (1, {'@': 29}), 18: (1, {'@': 29}), 19: (1, {'@': 29}), 20: (1, {'@': 29})}, 26: {16: (1, {'@': 30}), 17: (1, {'@': 30}), 15: (1, {'@': 30})}, 27: {17: (1, {'@': 31})}, 28: {4: (0, 10), 5: (0, 12), 6: (0, 3), 8: (0, 15), 9: (0, 8), 7: (0, 32), 10: (0, 17), 1: (0, 23), 2: (0, 24), 11: (0, 16), 12: (0, 33), 13: (0, 20), 14: (0, 25), 0: (0, 35), 3: (0, 11)}, 29: {15: (0, 2), 26: (0, 31), 17: (1, {'@': 32}), 16: (1, {'@': 32})}, 30: {15: (1, {'@': 33}), 16: (1, {'@': 33}), 17: (1, {'@': 33}), 18: (1, {'@': 33}), 19: (1, {'@': 33}), 20: (1, {'@': 33})}, 31: {15: (0, 28), 17: (1, {'@': 34}), 16: (1, {'@': 34})}, 32: {16: (1, {'@': 35}), 17: (1, {'@': 35}), 15: (1, {'@': 35})}, 33: {21: (0, 19), 20: (0, 34), 27: (0, 14), 19: (0, 37), 18: (0, 5), 22: (0, 1), 16: (1, {'@': 36}), 17: (1, {'@': 36}), 15: (1, {'@': 36})}, 34: {0: (1, {'@': 37}), 3: (1, {'@': 37}), 1: (1, {'@': 37}), 2: (1, {'@': 37})}, 35: {4: (0, 10), 5: (0, 12), 6: (0, 3), 24: (0, 9), 8: (0, 15), 9: (0, 8), 10: (0, 17), 1: (0, 23), 2: (0, 24), 11: (0, 16), 7: (0, 29), 12: (0, 33), 13: (0, 20), 14: (0, 25), 0: (0, 35), 3: (0, 11)}, 36: {0: (1, {'@': 38}), 1: (1, {'@': 38}), 2: (1, {'@': 38}), 3: (1, {'@': 38})}, 37: {0: (1, {'@': 39}), 3: (1, {'@': 39}), 1: (1, {'@': 39}), 2: (1, {'@': 39})}}, 'start_states': {'start': 22}, 'end_states': {'start': 4}}, 'lexer_conf': {'tokens': [{'@': 0}, {'@': 1}, {'@': 2}, {'@': 3}, {'@': 4}, {'@': 5}, {'@': 6}, {'@': 7}, {'@': 8}, {'@': 9}], 'ignore': ['WS'], 'g_regex_flags': 0, '__type__': 'LexerConf'}, 'start': ['start'], '__type__': 'LALR_ContextualLexer'}, 'rules': [{'@': 19}, {'@': 37}, {'@': 39}, {'@': 13}, {'@': 33}, {'@': 28}, {'@': 29}, {'@': 23}, {'@': 16}, {'@': 12}, {'@': 15}, {'@': 17}, {'@': 25}, {'@': 38}, {'@': 11}, {'@': 10}, {'@': 36}, {'@': 18}, {'@': 20}, {'@': 14}, {'@': 22}, {'@': 34}, {'@': 32}, {'@': 31}, {'@': 26}, {'@': 21}, {'@': 24}, {'@': 27}, {'@': 30}, {'@': 35}], 'options': {'debug': False, 'keep_all_tokens': False, 'tree_class': None, 'cache_grammar': False, 'postlex': None, 'parser': 'lalr', 'lexer': 'contextual', 'transformer': None, 'start': ['start'], 'priority': None, 'ambiguity': 'auto', 'propagate_positions': False, 'lexer_callbacks': {}, 'maybe_placeholders': False, 'edit_terminals': None, 'g_regex_flags': 0}, '__type__': 'Lark'} +) +MEMO = ( +{0: {'name': 'NAME', 'pattern': {'value': '[a-zA-Z_]\\w*', 'flags': [], '_width': [1, 4294967295], '__type__': 'PatternRE'}, 'priority': 1, '__type__': 'TerminalDef'}, 1: {'name': 'WS', 'pattern': {'value': '(?:[ \t\x0c\r\n])+', 'flags': [], '_width': [1, 4294967295], '__type__': 'PatternRE'}, 'priority': 1, '__type__': 'TerminalDef'}, 2: {'name': 'DOT', 'pattern': {'value': '.', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 3: {'name': 'COLON', 'pattern': {'value': ':', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 4: {'name': 'STAR', 'pattern': {'value': '*', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 5: {'name': 'LSQB', 'pattern': {'value': '[', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 6: {'name': 'RSQB', 'pattern': {'value': ']', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 7: {'name': 'PLUS', 'pattern': {'value': '+', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 8: {'name': 'ITEMS', 'pattern': {'value': 'items', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 9: {'name': 'COMMA', 'pattern': {'value': ',', 'flags': [], '__type__': 'PatternStr'}, 'priority': 1, '__type__': 'TerminalDef'}, 10: {'origin': {'name': 'quiet_element', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'recursed_element', '__type__': 'NonTerminal'}, {'name': '_colon', '__type__': 'NonTerminal'}], 'order': 1, 'alias': 'quiet', 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 11: {'origin': {'name': 'quiet_element', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'element', '__type__': 'NonTerminal'}, {'name': '_colon', '__type__': 'NonTerminal'}], 'order': 0, 'alias': 'quiet', 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 12: {'origin': {'name': 'element', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'items', '__type__': 'NonTerminal'}], 'order': 3, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 13: {'origin': {'name': '_asterisk', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'STAR', 'filter_out': True, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 14: {'origin': {'name': 'series', '__type__': 'NonTerminal'}, 'expansion': [{'name': '__series_star_0', '__type__': 'NonTerminal'}, {'name': 'last_element', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 15: {'origin': {'name': 'metadata', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'PLUS', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'NAME', 'filter_out': False, '__type__': 'Terminal'}], 'order': 0, 'alias': 'metadata', 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 16: {'origin': {'name': 'element', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'metadata', '__type__': 'NonTerminal'}], 'order': 2, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 17: {'origin': {'name': 'items', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'ITEMS', 'filter_out': True, '__type__': 'Terminal'}], 'order': 0, 'alias': 'items', 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 18: {'origin': {'name': 'last_element', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'recursed_element', '__type__': 'NonTerminal'}], 'order': 1, 'alias': 'last', 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 19: {'origin': {'name': 'start', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'expr', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 20: {'origin': {'name': 'recursed_element', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'element', '__type__': 'NonTerminal'}, {'name': '_asterisk', '__type__': 'NonTerminal'}], 'order': 0, 'alias': 'recursed', 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 21: {'origin': {'name': '__series_star_0', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'quiet_element', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 22: {'origin': {'name': 'series', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'last_element', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 23: {'origin': {'name': 'element', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'grouped', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 24: {'origin': {'name': '__series_star_0', '__type__': 'NonTerminal'}, 'expansion': [{'name': '__series_star_0', '__type__': 'NonTerminal'}, {'name': 'notified_element', '__type__': 'NonTerminal'}], 'order': 2, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 25: {'origin': {'name': 'notified_element', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'element', '__type__': 'NonTerminal'}, {'name': '_dot', '__type__': 'NonTerminal'}], 'order': 0, 'alias': 'notify', 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 26: {'origin': {'name': '__series_star_0', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'notified_element', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 27: {'origin': {'name': '__series_star_0', '__type__': 'NonTerminal'}, 'expansion': [{'name': '__series_star_0', '__type__': 'NonTerminal'}, {'name': 'quiet_element', '__type__': 'NonTerminal'}], 'order': 3, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 28: {'origin': {'name': 'trait', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'NAME', 'filter_out': False, '__type__': 'Terminal'}], 'order': 0, 'alias': 'trait', 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 29: {'origin': {'name': 'element', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'trait', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 30: {'origin': {'name': '__parallel_star_1', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'COMMA', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'series', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 31: {'origin': {'name': 'expr', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'parallel', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 32: {'origin': {'name': 'parallel', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'series', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 33: {'origin': {'name': 'grouped', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'LSQB', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'parallel', '__type__': 'NonTerminal'}, {'name': 'RSQB', 'filter_out': True, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 34: {'origin': {'name': 'parallel', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'series', '__type__': 'NonTerminal'}, {'name': '__parallel_star_1', '__type__': 'NonTerminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': True, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 35: {'origin': {'name': '__parallel_star_1', '__type__': 'NonTerminal'}, 'expansion': [{'name': '__parallel_star_1', '__type__': 'NonTerminal'}, {'name': 'COMMA', 'filter_out': True, '__type__': 'Terminal'}, {'name': 'series', '__type__': 'NonTerminal'}], 'order': 1, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 36: {'origin': {'name': 'last_element', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'element', '__type__': 'NonTerminal'}], 'order': 0, 'alias': 'last', 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 37: {'origin': {'name': '_dot', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'DOT', 'filter_out': True, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 38: {'origin': {'name': 'notified_element', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'recursed_element', '__type__': 'NonTerminal'}, {'name': '_dot', '__type__': 'NonTerminal'}], 'order': 1, 'alias': 'notify', 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}, 39: {'origin': {'name': '_colon', '__type__': 'NonTerminal'}, 'expansion': [{'name': 'COLON', 'filter_out': True, '__type__': 'Terminal'}], 'order': 0, 'alias': None, 'options': {'keep_all_tokens': False, 'expand1': False, 'priority': None, 'empty_indices': (), '__type__': 'RuleOptions'}, '__type__': 'Rule'}} +) +Shift = 0 +Reduce = 1 +def Lark_StandAlone(transformer=None, postlex=None): + namespace = {'Rule': Rule, 'TerminalDef': TerminalDef} + return Lark.deserialize(DATA, namespace, MEMO, transformer=transformer, postlex=postlex) diff --git a/poc/grammar.lark b/poc/grammar.lark new file mode 100644 index 000000000..1862800bc --- /dev/null +++ b/poc/grammar.lark @@ -0,0 +1,53 @@ +?start: expr + +_dot: "." +_colon: ":" +_asterisk: "*" + +// Grouped elements, e.g. "[a.b]", "[a,b]" +?grouped: "[" parallel "]" + +// Simple name as trait name, e.g. "a" +trait: NAME -> trait + +// Element that can be followed by a ".", ":", "*" +?element: trait | grouped | metadata | items + +// Syntax to indicate name is being used for matching an existing +// metadata name rather than a trait name. +metadata: "+" NAME -> metadata + +// Keyword for handing items (in list, or dict, or set, +// or a trait named "items") +items: "items" -> items + +// Element marked with notify flag set to True +// e.g. "a." +notified_element: (element | recursed_element) _dot -> notify + +// Element marked with notify flag set to False +// e.g. "a:", "[a.b]:" +quiet_element: (element | recursed_element) _colon -> quiet + +// Element whose notify flag depends on the last flag used +// e.g. The "b" and "c" elements will have notify flag set +// to false in "[b,c]:d" +last_element: (element | recursed_element) -> last + +// Element to be recursed. e.g. "a*", "[a.b]*" +recursed_element: element _asterisk -> recursed + +// Rule for joining elements in series +// e.g. "a.b.c" +?series: (notified_element | quiet_element)* last_element + +// Rule for joining elements in parallel +// e.g. "a,b,c" +?parallel: series ("," series)* + +?expr: parallel + +NAME: /[a-zA-Z_]\w*/ + +%import common.WS +%ignore WS diff --git a/poc/observe.py b/poc/observe.py new file mode 100644 index 000000000..8b6471264 --- /dev/null +++ b/poc/observe.py @@ -0,0 +1,314 @@ + + +def _is_not_none(value): + return value is not None + + +class BaseListener: + + def __eq__(self, other): + """ Return true if a given instance is equivalent to this + one. Needed for comparing paths and cleaning up listeners. + """ + raise NotImplementedError() + + @property + def notify(self): + """ Whether to call notifiers for changes on this item.""" + return self._notify + + @notify.setter + def notify(self, value): + self._notify = value + + +class _FilteredTraitListener(BaseListener): + + def __init__(self, notify, filter): + """ + Parameters + ---------- + notify : boolean + Whether to notify for changes. + filter : callable(name, trait) -> boolean + Callable that receives a named trait and returns + a boolean as for whether the trait is being + listened to. + It is the developers' responsibility to ensure two + equivalent ``filter`` compare equal. + i.e. this class should not be exposed to the users. + """ + self.filter = filter + self.notify = notify + + def __repr__(self): + return "".format( + self.notify, self.filter, + ) + + def __hash__(self): + return hash((type(self), self.notify, self.filter)) + + def __eq__(self, other): + if other is self: + return True + if type(other) is not type(self): + return False + return ( + (self.filter, self.notify) + == (other.filter, other.notify) + ) + + + +class MetadataListener(BaseListener): + """ A listener for traits with a given metadata criterion. + """ + + def __init__(self, metadata_name, value, notify): + """ + + Parameters + ---------- + metadata_name : str + Name of the metadata + value : callable(value) -> boolean + A callable that receives the value of the named metadata from a + trait and returns true if the metadata value is accepted. + notify : boolean + Whether to notify for changes. + """ + self.metadata_name = metadata_name + self.value = value + self.notify = notify + + def __repr__(self): + return "".format( + self.metadata_name, self.value, self.notify, + ) + + def __hash__(self): + return hash(( + self.metadata_name, + self.value, + self.notify, + )) + + def __eq__(self, other): + return ( + type(self) is type(other) + and self.metadata_name == other.metadata_name + ) + + +class NamedTraitListener(BaseListener): + + def __init__( + self, name, notify, optional, comparison_mode=None): + """ + Parameters + ---------- + name : str + Name of the trait to listen to. + notify : boolean + Whether to notify for changes. + optional : boolean + Whether the trait is optional. If false and the trait + is not found, an exception will be raised. + comparison_mode : ComparisonMode or None + Whether to modify the default comparison behaviour. + """ + self.name = name + self.notify = notify + self.optional = optional + self.comparison_mode = comparison_mode + + def __repr__(self): + return "".format( + self.name, self.notify, + ) + + def __hash__(self): + return hash(( + type(self), + self.name, + self.notify, + self.optional, + self.comparison_mode, + )) + + def __eq__(self, other): + if other is self: + return True + if type(other) is not type(self): + return False + return ( + (self.name, self.notify, self.optional, self.comparison_mode) + == (other.name, other.notify, other.optional, + other.comparison_mode) + ) + + +class ListItemListener(BaseListener): + + def __init__(self, notify, optional): + self.notify = notify + self.optional = optional + + def __repr__(self): + return "".format( + self.notify, self.optional, + ) + + def __hash__(self): + return hash((type(self), self.notify, self.optional)) + + def __eq__(self, other): + return ( + type(self) is type(other) + and self.notify == other.notify + and self.optional == other.optional + ) + + +class DictItemListener(BaseListener): + + def __init__(self, notify, optional): + self.notify = notify + self.optional = optional + + def __repr__(self): + return "".format( + self.notify, self.optional, + ) + + def __hash__(self): + return hash((type(self), self.notify, self.optional)) + + def __eq__(self, other): + return ( + type(self) is type(other) + and self.notify == other.notify + and self.optional == other.optional + ) + + +class SetItemListener(BaseListener): + + def __init__(self, notify, optional): + self.notify = notify + self.optional = optional + + def __repr__(self): + return "".format( + self.notify, self.optional, + ) + + def __hash__(self): + return hash((type(self), self.notify, self.optional)) + + def __eq__(self, other): + return ( + type(self) is type(other) + and self.notify == other.notify + and self.optional == other.optional + ) + + +class DictValueListener(BaseListener): + # TODO: Define this class taking into account + # what is going to happen in TraitDictObject + + def __init__(self, notify): + self.notify = notify + + +class ListenerPath: + """ Data structure for representing the path(s) to observe + traits. + + For equality check, handling of cycles relies on the creator + of these ListenerPath to have maintained the cycles separately. + Only nodes are used when cycles are compared for equality. + """ + + def __init__(self, node, branches=(), cycles=()): + """ + + Parameters + ---------- + node : BaseListener + The current node. + branches : iterable of ListenerPath + Paths as branches. + cycles : iterable of ListenerPath + Paths as cycles. They are assumed to have been referenced + elsewhere inside a larger ListenerPath that includes this + path. + """ + self.node = node + self.branches = set(branches) + self.cycles = set(cycles) + + @property + def nexts(self): + """ Next set of ListenerPath. + """ + return self.branches | self.cycles + + def __new__(cls, *args, **kwargs): + # Required for supporting cycles. + instance = super().__new__(cls) + instance.node = None + instance.cycles = set() + instance.branches = set() + return instance + + def __repr__(self): + return "".format( + self.node, len(self.branches), len(self.cycles) + ) + + def __hash__(self): + return hash( + ( + type(self), + self.node, + frozenset(p.node for p in self.cycles), + frozenset(self.branches), + ) + ) + + def __eq__(self, other): + """ Return true if a given ListenerPath is equivalent to this one. + """ + if other is self: + return True + if type(other) is not type(self): + return False + + self_loop_nodes = set(p.node for p in self.cycles) + other_loop_nodes = set(p.node for p in other.cycles) + return( + self.node == other.node + # Rehash as the branches may have been modified afterwards + and set(iter(self.branches)) == set(iter(other.branches)) + and self_loop_nodes == other_loop_nodes + ) + + def info(self, indent=0): + """ Return a list of user-friendly texts containing descriptive information + about this path. + + Returns + ------- + lines : list of str + """ + infos = [] + infos.append(" " * indent + "Node: {!r}".format(self.node)) + for path in self.branches: + infos.extend(path.info(indent=indent + 4)) + + for path in self.cycles: + infos.append(" " * (indent + 4) + "Loop to {!r}".format(path.node)) + return infos diff --git a/poc/parsing.py b/poc/parsing.py new file mode 100644 index 000000000..5fa867f6a --- /dev/null +++ b/poc/parsing.py @@ -0,0 +1,263 @@ +from functools import reduce +import operator + +import poc.expressions as _expr_module +from poc.generated_parser import Lark_StandAlone as _Lark_StandAlone + +parser = _Lark_StandAlone() + +#: Token annotation for a name (a trait name, or a metadata name, etc.) +_NAME_TOKEN = "NAME" + + +def handle_series(trees, default_notifies): + """ Handle expressions joined in series using "." or ":" connectors. + + Parameters + ---------- + trees : list of lark.tree.Tree + The children tree for the "series" rule. + It should contain one or more items. + + Returns + ------- + expression : Expression + """ + expressions = ( + handle_tree(tree, default_notifies=default_notifies) + for tree in trees + ) + return _expr_module.join_(*expressions) + + +def handle_parallel(trees, default_notifies): + """ Handle expressions joined in parallel using "," connectors. + + Parameters + ---------- + trees : list of lark.tree.Tree + The children tree for the "parallel" rule. + It should contain one or more items. + + Returns + ------- + expression : Expression + """ + expressions = ( + handle_tree(tree, default_notifies=default_notifies) for tree in trees + ) + return reduce(operator.or_, expressions) + + +def handle_notify(trees, default_notifies): + """ Handle trees wrapped with the notify flag set to True, + indicated by the existence of "." suffix to an element. + + Parameters + ---------- + trees : list of lark.tree.Tree + The children tree for the "notify" rule. + It contains only one item. + + Returns + ------- + expression : Expression + """ + default_notifies.append(True) + # Expect a single child as element + tree, = trees + expression = handle_tree(tree, default_notifies=default_notifies) + notify = default_notifies.pop() + if notify is not True: + raise RuntimeError("Default notify flag unexpectedly changed.") + return expression + + +def handle_quiet(trees, default_notifies): + """ Handle trees wrapped with the notify flag set to True, + indicated by the existence of ":" suffix to an element. + + Parameters + ---------- + trees : list of lark.tree.Tree + The children tree for the "quiet" rule. + It contains only one item. + + Returns + ------- + expression : Expression + """ + #: TODO: Refactor this, which is basically identical to handle_notify + #: apart from the flag. + default_notifies.append(False) + # Expect a single child as element + tree, = trees + expression = handle_tree(tree, default_notifies=default_notifies) + notify = default_notifies.pop() + if notify is not False: + raise RuntimeError("Default notify flag unexpectedly changed.") + return expression + + +def handle_last(trees, default_notifies): + """ Handle trees when the notify is not immediately specified + as a suffix. The last notify flag will be used. + + e.g. In "a.[b,c]:.d", the element "b" should receive a notify flag + set to false, which is set after a parallel group is defined. + + Parameters + ---------- + trees : list of lark.tree.Tree + The children tree for the "last" rule. + It contains only one item. + + Returns + ------- + expression : Expression + """ + tree, = trees + return handle_tree(tree, default_notifies=default_notifies) + + +def handle_trait(trees, default_notifies): + """ Handle an element for a named trait. + + Parameters + ---------- + trees : list of lark.tree.Tree + The children tree for the "trait" rule. + It contains only one item. + + Returns + ------- + expression : Expression + """ + token, = trees + # sanity check + if token.type != _NAME_TOKEN: + raise ValueError("Unexpected token: {!r}".format(token)) + name = token.value + notify = default_notifies[-1] + return _expr_module.trait(name, notify=notify) + + +def handle_metadata(trees, default_notifies): + """ Handle an element for filtering existing metadata. + + Parameters + ---------- + trees : list of lark.tree.Tree + The children tree for the "metadata" rule. + It contains only one item. + + Returns + ------- + expression : Expression + """ + token, = trees + # sanity check + if token.type != _NAME_TOKEN: + raise ValueError("Unexpected token: {!r}".format(token)) + metadata_name = token.value + notify = default_notifies[-1] + return _expr_module.metadata(metadata_name, notify=notify) + + +def handle_recursed(trees, default_notifies): + """ Handle trees to be wrapped with recursion. + + Parameters + ---------- + trees : list of lark.tree.Tree + The children tree for the "recursed" rule. + There should be only one item. + + Returns + ------- + expression : Expression + """ + tree, = trees + return _expr_module.recursive( + handle_tree(tree, default_notifies=default_notifies) + ) + + +def handle_items(trees, default_notifies): + """ Handle keyword "items". + + Parameters + ---------- + trees : list of lark.tree.Tree + The children tree for the "items" rule. + It should be empty. + + Returns + ------- + expression : Expression + """ + if trees: + # Nothing should be wrapped in items + raise ValueError("Unexpected tree: {!r}".format(trees)) + notify = default_notifies[-1] + return reduce( + operator.or_, + ( + _expr_module.trait("items", notify=notify, optional=True), + _expr_module.list_items(notify=notify, optional=True), + _expr_module.dict_items(notify=notify, optional=True), + _expr_module.set_items(notify=notify, optional=True), + ) + ) + + +def handle_tree(tree, default_notifies=None): + """ Handle a tree using the specified rule. + + Parameters + ---------- + tree : lark.tree.Tree + Tree to be converted to an Expression. + default_notifies : list of boolean + The notify flag stack. + The last item is the current notify flag. + See handlers for "notify" and "quiet", which + push and pop a notify flag to this stack. + + Returns + ------- + expression: Expression + """ + if default_notifies is None: + default_notifies = [True] + + # All handlers must be callable + # with the signature (list of Tree, default_notifies) + handlers = { + "series": handle_series, + "parallel": handle_parallel, + "recursed": handle_recursed, + "notify": handle_notify, + "quiet": handle_quiet, + "last": handle_last, + "trait": handle_trait, + "metadata": handle_metadata, + "items": handle_items, + } + return handlers[tree.data](tree.children, default_notifies=default_notifies) + + +def parse(text): + """ Top-level function for parsing user's text to an Expression. + + Parameters + ---------- + text : str + Text to be parsed. + + Returns + ------- + expression : Expression + """ + tree = parser.parse(text) + return handle_tree(tree) diff --git a/poc/tests/__init__.py b/poc/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/poc/tests/test_expressions.py b/poc/tests/test_expressions.py new file mode 100644 index 000000000..4916ba255 --- /dev/null +++ b/poc/tests/test_expressions.py @@ -0,0 +1,668 @@ +import copy +import pickle +import unittest + +from poc.expressions import trait, recursive, metadata +from poc.observe import ( + _is_not_none, + NamedTraitListener, ListenerPath, MetadataListener, +) + + +class TestBasicExpression(unittest.TestCase): + + def test_basic_t(self): + expression = trait("name") + actual, = expression.as_paths() + self.assertEqual( + actual, + ListenerPath( + node=NamedTraitListener( + name="name", + notify=True, + optional=False, + ), + ), + ) + + def test_basic_path(self): + expression = trait("name").trait("attr") + actual, = expression.as_paths() + self.assertEqual( + actual, + ListenerPath( + node=NamedTraitListener( + name="name", + notify=True, + optional=False, + ), + branches=set([ + ListenerPath( + node=NamedTraitListener( + name="attr", + notify=True, + optional=False, + ), + ) + ]), + ), + ) + + +class TestOrExpression(unittest.TestCase): + + def test_or_expression(self): + expression = trait("name") | trait("attr") + expected = [ + ListenerPath( + node=NamedTraitListener( + name="name", + notify=True, + optional=False, + ), + ), + ListenerPath( + node=NamedTraitListener( + name="attr", + notify=True, + optional=False, + ), + ), + ] + self.assertCountEqual( + expression.as_paths(), + expected, + ) + + def test_or_with_same_nodes(self): + e1 = trait("name") | trait("name") + e2 = trait("name") + self.assertEqual(len(e1.as_paths()), 1) + self.assertEqual(len(e2.as_paths()), 1) + self.assertEqual(e1.as_paths(), e2.as_paths()) + + def test_or_with_same_paths(self): + e1 = trait("name") | trait("age") + e2 = trait("age") | trait("name") + e3 = e1 | e2 + self.assertEqual(len(e3.as_paths()), 2) + + def test_or_then_extend(self): + e1 = (trait("a").trait("b") | trait("c")).trait("d") + + a_path, c_path = e1.as_paths() + if a_path.node.name == "c": + c_path, a_path = a_path, c_path + + self.assertEqual( + [p.node.name for p in a_path.nexts], + ["b"] + ) + b_path, = a_path.nexts + self.assertEqual( + [p.node.name for p in b_path.nexts], + ["d"] + ) + self.assertEqual( + [p.node.name for p in c_path.nexts], + ["d"] + ) + + def test_then(self): + expression = trait("child").then(trait("age") | trait("name")) + actual, = expression.as_paths() + self.assertEqual( + actual, + ListenerPath( + node=NamedTraitListener( + name="child", + notify=True, + optional=False, + ), + branches=set([ + ListenerPath( + node=NamedTraitListener( + name="age", + notify=True, + optional=False, + ), + ), + ListenerPath( + node=NamedTraitListener( + name="name", + notify=True, + optional=False, + ), + ), + ]) + ) + ) + + +class TestMetadata(unittest.TestCase): + + def test_metadata_on_its_own(self): + expression = metadata("updated") + actual, = expression.as_paths() + expected = ListenerPath( + node=MetadataListener( + metadata_name="updated", + value=_is_not_none, + notify=True, + ) + ) + self.assertEqual(actual, expected) + + +class TestRecursion(unittest.TestCase): + + def test_recursion(self): + expression = trait("root").recursive(trait("left") | trait("right")) + actual, = expression.as_paths() + self.assertEqual(actual.node.name, "root") + + self.assertEqual( + set(p.node.name for p in actual.nexts), + set(["left", "right"]) + ) + nexts = list(actual.nexts) + self.assertCountEqual(nexts[0].nexts, set(actual.nexts)) + self.assertCountEqual(nexts[1].nexts, set(actual.nexts)) + + def test_recursion_with_equals(self): + # Same test but use equality check + expression = trait("root").recursive(trait("left") | trait("right")) + actual, = expression.as_paths() + + expected = ListenerPath( + node=NamedTraitListener( + name="root", + notify=True, + optional=False, + ), + ) + left = ListenerPath( + node=NamedTraitListener( + name="left", + notify=True, + optional=False, + ), + ) + right = ListenerPath( + node=NamedTraitListener( + name="right", + notify=True, + optional=False, + ) + ) + expected.branches.update([left, right]) + left.cycles.update([left, right]) + right.cycles.update([left, right]) + + self.assertEqual(actual, expected) + + def test_recursion_then_extend(self): + expression = ( + trait("root").recursive( + trait("left") | trait("right")).trait("value") + ) + actual, = expression.as_paths() + + # First level, root only + self.assertEqual(actual.node.name, "root") + + # Second level, left or right + self.assertCountEqual( + [p.node.name for p in actual.nexts], + ["left", "right"], + ) + + # Third level, left or right or value + for path in actual.nexts: + self.assertCountEqual( + [p.node.name for p in path.nexts], + ["left", "right", "value"], + ) + + def test_recursion_then_extend_with_equals(self): + # Same test but use equality check + expression = trait("root").recursive( + trait("left") | trait("right")).trait("value") + actual, = expression.as_paths() + + expected = ListenerPath( + node=NamedTraitListener( + name="root", + notify=True, + optional=False, + ), + ) + left = ListenerPath( + node=NamedTraitListener( + name="left", + notify=True, + optional=False, + ), + ) + right = ListenerPath( + node=NamedTraitListener( + name="right", + notify=True, + optional=False, + ) + ) + value = ListenerPath( + node=NamedTraitListener( + name="value", + notify=True, + optional=False, + ) + ) + expected.branches.update([left, right]) + left.cycles.update([left, right]) + left.branches.add(value) + right.cycles.update([left, right]) + right.branches.add(value) + + self.assertEqual(actual, expected) + + def test_recursion_different_order(self): + expression1 = trait("root").recursive(trait("right") | trait("left")).trait("value") + expression2 = trait("root").recursive(trait("left") | trait("right")).trait("value") + self.assertEqual( + expression1.as_paths(), + expression2.as_paths(), + ) + + def test_recursion_not_equal(self): + expression1 = trait("root").recursive(trait("left") | trait("right")).trait("value") + expression2 = trait("root").recursive(trait("prev") | trait("right")).trait("value") + self.assertNotEqual( + expression1.as_paths(), + expression2.as_paths(), + ) + + def test_recursion_branch_not_equal(self): + expression1 = trait("root").recursive(trait("left") | trait("right")).trait("value") + expression2 = trait("root").recursive(trait("left") | trait("right")).trait("age") + self.assertNotEqual( + expression1.as_paths(), + expression2.as_paths(), + ) + + def test_recursion_multi_level(self): + left_then_right = trait("left").trait("right") + expression = ( + trait("root").recursive(left_then_right) + ) + actual, = expression.as_paths() + + # First level, root only + self.assertEqual(actual.node.name, "root") + + # Second level, left + path, = actual.nexts + self.assertEqual(path.node.name, "left") + + # Third level, right + path, = path.nexts + self.assertEqual(path.node.name, "right") + + # Fourth level, back to left again + path, = path.nexts + left_path = path + self.assertEqual(path.node.name, "left") + + # And so on + path, = path.nexts + self.assertEqual(path.node.name, "right") + path, = path.nexts + self.assertIs(path, left_path) + + # The original left_or_right should not be + # mutated + new_left_then_right = trait("left").trait("right") + self.assertEqual( + left_then_right, + new_left_then_right, + ) + + def test_recursion_multi_level_with_equals(self): + # Same test but use equality check + left_then_right = trait("left").trait("right") + expression = ( + trait("root").recursive(left_then_right) + ) + actual, = expression.as_paths() + + expected = ListenerPath( + node=NamedTraitListener( + name="root", + notify=True, + optional=False, + ) + ) + left_path = ListenerPath( + node=NamedTraitListener( + name="left", + notify=True, + optional=False, + ), + ) + right_path = ListenerPath( + node=NamedTraitListener( + name="right", + notify=True, + optional=False, + ) + ) + right_path.cycles.add(left_path) + left_path.branches.add(right_path) + expected.branches.add(left_path) + + self.assertEqual(actual, expected) + + def test_recursion_extended_twice(self): + # This would match + # root.left.right.value + # root.left.right.left.right.value + # root.left.right.left.right.left.right.value + expression = trait("root").recursive( + trait("left").trait("right")).trait("value") + + path, = expression.as_paths() + + # first is root + self.assertEqual(path.node.name, "root") + + # second is left + self.assertEqual([p.node.name for p in path.nexts], ["left"]) + path, = path.nexts + + # then it matches right + self.assertEqual([p.node.name for p in path.nexts], ["right"]) + path, = path.nexts + + # then it might match left again, or value + names = [p.node.name for p in path.nexts] + self.assertCountEqual(names, ["left", "value"]) + + # if it matches left, then it has to match right again + for left_path in path.nexts: + if left_path.node.name == "left": + right_path, = left_path.nexts + self.assertEqual(right_path.node.name, "right") + break + else: + self.fail("No left node found.") + + def test_recursion_extended_twice_with_equals(self): + # Same test, but use equality check + # This would match + # root.left.right.value + # root.left.right.left.right.value + # root.left.right.left.right.left.right.value + expression = trait("root").recursive( + trait("left").trait("right")).trait("value") + + expected = ListenerPath( + node=NamedTraitListener( + name="root", + notify=True, + optional=False, + ), + ) + left = ListenerPath( + node=NamedTraitListener( + name="left", + notify=True, + optional=False, + ), + ) + right = ListenerPath( + node=NamedTraitListener( + name="right", + notify=True, + optional=False, + ) + ) + value = ListenerPath( + node=NamedTraitListener( + name="value", + notify=True, + optional=False, + ) + ) + right.branches.add(value) + right.cycles.add(left) + left.branches.add(right) + expected.branches.add(left) + + actual, = expression.as_paths() + self.assertEqual(actual, expected) + + def test_recursive_from_empty(self): + expression = recursive(trait("name")) + expected = ListenerPath( + node=NamedTraitListener( + name="name", + notify=True, + optional=False, + ), + ) + expected.cycles.add(expected) + + actual, = expression.as_paths() + self.assertEqual(actual, expected) + + def test_recursive_different_paths(self): + expression1 = trait("root").recursive(trait("one").trait("two")) + expression2 = trait("root").recursive(trait("one").trait("three")) + self.assertNotEqual( + expression1.as_paths(), + expression2.as_paths() + ) + + def test_deepcopy_strange_behaviour(self): + e1 = recursive(trait("value") | trait("name")) + e2 = trait("name") + e3 = e1 | e2 + + self.assertCountEqual( + set(e1.as_paths()) | set(e2.as_paths()), + e3.as_paths(), + ) + + def test_recurse_extend_then_recurse(self): + recurse_c = recursive(trait("c")) + e = recursive(recurse_c.trait("d")) + e = recursive(recursive(trait("c")).trait("d")) + + path, = e.as_paths() + + # First is just "c" + self.assertEqual(path.node.name, "c") + + # Then it matches either "c" or "d" + self.assertCountEqual( + [p.node.name for p in path.nexts], + ["c", "d"] + ) + + # d is the branch... + d_path, = path.branches + + # it should then go back to c + self.assertEqual([p.node.name for p in d_path.nexts], ["c"]) + + def test_extend_recurse_recurse(self): + e = recursive(trait("b", False).recursive(trait("c"))) + + path, = e.as_paths() + + # First it is just b + self.assertEqual(path.node.name, "b") + + # then it matches "c" + self.assertCountEqual([p.node.name for p in path.nexts], ["c"]) + + c_path, = path.nexts + + # then it loops to "c" or "b" + self.assertCountEqual([p.node.name for p in c_path.nexts], ["c", "b"]) + + # With equality check + expected = ListenerPath( + node=NamedTraitListener(name="b", notify=False, optional=False), + ) + c_path = ListenerPath( + node=NamedTraitListener(name="c", notify=True, optional=False), + ) + c_path.cycles.update([expected, c_path]) + expected.branches.add(c_path) + self.assertEqual(path, expected) + + +class TestDeepCopySupper(unittest.TestCase): + """ Integration test with the ListenerPath to test deepcopy + support. + + When HasTraits methods are decorated with ``observe``, we may choose + to hash the created ``ListenerPath`` low level objects, instead of + hashing the user-facing ``Expression`` aiming at providing a user-friendly + interface. + + With ``ListenerPath`` being a persisted internal state of an HasTraits + instance, it needs to support deepcopy and pickling. Recursion would make this + challenging. + """ + + def test_basic_path_pickle(self): + expression = trait("a") + path, = expression.as_paths() + serialized = pickle.dumps(path) + deserialized = pickle.loads(serialized) + self.assertEqual(deserialized, path) + + def test_recursed_path_pickle(self): + + expression = recursive(trait("a")) + path, = expression.as_paths() + serialized = pickle.dumps(path) + deserialized = pickle.loads(serialized) + self.assertEqual(deserialized, path) + + +class TestPicklingSupport(unittest.TestCase): + """ Integration test with the ListenerPath for pickling support. + + When HasTraits methods are decorated with ``observe``, we may choose + to hash the created ``ListenerPath`` low level objects, instead of + hashing the user-facing ``Expression`` aiming at providing a user-friendly + interface. + + With ``ListenerPath`` being a persisted internal state of an HasTraits + instance, it needs to support deepcopy and pickling. Recursion would make this + challenging. + """ + + def test_basic_path_pickling(self): + expression = trait("a") + path, = expression.as_paths() + copied = copy.deepcopy(path) + self.assertEqual(copied, path) + + def test_recursed_path_deep_copy(self): + + expression = recursive(trait("a")) + path, = expression.as_paths() + + copied = copy.deepcopy(path) + self.assertEqual(copied, path) + +class TestPathEquality(unittest.TestCase): + """ For sanity checks.""" + + def test_recursion_paths(self): + + path1 = create_path() + path2 = create_path() + + self.assertEqual(path1, path2) + + def test_recursion_paths2(self): + + path1 = create_path2() + path2 = create_path2() + + self.assertEqual(path1, path2) + + +def create_path(): + path1 = ListenerPath( + node=NamedTraitListener( + name="root", + notify=True, + optional=False, + ), + ) + left = ListenerPath( + node=NamedTraitListener( + name="left", + notify=True, + optional=False, + ), + ) + right = ListenerPath( + node=NamedTraitListener( + name="right", + notify=True, + optional=False, + ) + ) + value = ListenerPath( + node=NamedTraitListener( + name="value", + notify=True, + optional=False, + ) + ) + left.cycles.update([left, right]) + left.branches.add(value) + right.cycles.update([left, right]) + right.branches.add(value) + path1.branches.update([left, right]) + return path1 + + +def create_path2(): + expected = ListenerPath( + node=NamedTraitListener( + name="root", + notify=True, + optional=False, + ) + ) + left_path = ListenerPath( + node=NamedTraitListener( + name="left", + notify=True, + optional=False, + ), + ) + + right_path = ListenerPath( + node=NamedTraitListener( + name="right", + notify=True, + optional=False, + ) + ) + left_path.branches.add(right_path) + right_path.cycles.add(left_path) + expected.branches.add(left_path) + return expected + + +if __name__ == "__main__": + unittest.main() diff --git a/poc/tests/test_parsing.py b/poc/tests/test_parsing.py new file mode 100644 index 000000000..c620f95f2 --- /dev/null +++ b/poc/tests/test_parsing.py @@ -0,0 +1,140 @@ +import unittest + +from poc.generated_parser import ( + UnexpectedCharacters, + UnexpectedToken, +) + +from poc import parsing +from poc import expressions + + +class TestParsing(unittest.TestCase): + + def test_simple(self): + actual = parsing.parse("a") + expected = expressions.trait("a") + self.assertEqual(actual, expected) + + def test_join(self): + actual = parsing.parse("a.b.c") + expected = expressions.trait("a").trait("b").trait("c") + self.assertEqual(actual, expected) + + def test_join_with_colon(self): + actual = parsing.parse("a:b:c") + expected = expressions.trait("a", False).trait("b", False).trait("c") + self.assertEqual(actual, expected) + + def test_or_with_commas(self): + actual = parsing.parse("a,b,c") + expected = ( + expressions.trait("a") + | expressions.trait("b") + | expressions.trait("c") + ) + self.assertEqual(actual, expected) + + def test_or_with_join_nested(self): + actual = parsing.parse("a.b.c,d.e") + expected = ( + expressions.trait("a").trait("b").trait("c") + | expressions.trait("d").trait("e") + ) + self.assertEqual(actual, expected) + + def test_items_support(self): + actual = parsing.parse("a.items:b") + + items_attr_or_items = ( + expressions.trait("items", notify=False, optional=True) + | expressions.list_items(notify=False, optional=True) + | expressions.dict_items(notify=False, optional=True) + | expressions.set_items(notify=False, optional=True) + ) + expected = ( + expressions.trait("a").then( + items_attr_or_items).trait( + "b") + ) + self.assertEqual(actual, expected) + + def test_grouped_or(self): + actual = parsing.parse("root.[left,right]") + expected = ( + expressions.trait("root").then( + expressions.trait("left") | expressions.trait("right")) + ) + self.assertEqual(actual, expected) + + def test_grouped_or_extended(self): + actual = parsing.parse("root.[left,right].value") + expected = ( + expressions.trait("root").then( + expressions.trait("left") | expressions.trait("right")).trait( + "value") + ) + self.assertEqual(actual, expected) + + def test_unparse_content(self): + with self.assertRaises(UnexpectedCharacters): + parsing.parse("a.b.c^abc") + + def test_error_empty_string(self): + with self.assertRaises(UnexpectedToken): + parsing.parse("") + + def test_error_unconnected_expressions(self): + with self.assertRaises(UnexpectedToken): + parsing.parse("[a.b]c") + + def test_recursion_support(self): + actual = parsing.parse("root.[left,right]*.value") + expected = ( + expressions.trait("root").recursive( + expressions.trait("left") | expressions.trait("right")).trait( + "value") + ) + self.assertEqual(actual, expected) + + def test_recurse_twice(self): + actual = parsing.parse("[b:c*]*") + expected = expressions.recursive( + expressions.trait("b", False).recursive(expressions.trait("c")) + ) + self.assertEqual(actual, expected) + + def test_group_and_join(self): + actual = parsing.parse("[a:b,c].d") + expected = ( + expressions.trait("a", False).trait("b") + | expressions.trait("c") + ).trait("d") + self.assertEqual(actual, expected) + + def test_group_followed_by_colon(self): + actual = parsing.parse("[a:b,c]:d") + expected = ( + expressions.trait("a", False).trait("b", False) + | expressions.trait("c", False) + ).trait("d") + self.assertEqual(actual, expected) + + def test_join_double_recursion_modify_last(self): + actual = parsing.parse("a.[b:c*]*.d") + expected = ( + expressions.trait("a").recursive( + expressions.trait("b", False).recursive( + expressions.trait("c"))).trait("d") + ) + self.assertEqual(actual, expected) + + def test_multi_branch_then_or_modify_last(self): + actual = parsing.parse("root.[a.b.c.d,value]:g") + expected = ( + expressions.trait("root").then( + expressions.trait("a").trait("b").trait("c").trait("d", False) + | expressions.trait("value", False) + ).trait("g") + ) + self.assertEqual(actual, expected)