Skip to content

Commit

Permalink
Replace redbaron with parso for Python 3.7 support #96
Browse files Browse the repository at this point in the history
* Static loader improvements

* Add parser using parso

* Run tests with Python 3.7 on travis

* Set parso as default parser

Users can override with GETGAUGE_USE_0_3_3_PARSER environment variable
  • Loading branch information
surajbarkale authored and shubhamsc committed Aug 6, 2018
1 parent d4a0ad4 commit 98b16ae
Show file tree
Hide file tree
Showing 19 changed files with 1,375 additions and 246 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@ docs/build/html
.vscode/
venv/
setup.py
build
build
.tox/
14 changes: 9 additions & 5 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,15 @@ addons:
packages:
- oracle-java8-installer

python:
- "2.7"
- "3.4"
- "3.5"
- "3.6"
matrix:
include:
- python: 2.7
- python: 3.4
- python: 3.5
- python: 3.6
- python: 3.7
dist: xenial
sudo: true # Only required till travis-ci/travis-ci/issues/9815 is fixed

env:
global:
Expand Down
15 changes: 14 additions & 1 deletion docs/source/faq.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,17 @@ By default the language runner uses ``python`` command to run specs. To change t
ImportError: No module named step_impl.<file_name>
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

This error happens on older versions of Python(2.7, 3.2). Create ``step_impl/__init__.py`` to fix this.
This error happens on older versions of Python(2.7, 3.2). Create ``step_impl/__init__.py`` to fix this.


Steps not found or refactor failure after upgrading from 0.3.3
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

We have replaced the internal Python parsing engine after version 0.3.3 was released. This change was necessary to support Python 3 syntax. We have tried our best to make sure there is no user impact on users. However, you may have found a bug in our new parser.

To revert to the old parser implementation, add ``GETGAUGE_USE_0_3_3_PARSER`` property to the ``python.properties`` file in the ``<PROJECT_DIR>/env/default`` directory.

::
GETGAUGE_USE_0_3_3_PARSER = true

If this fixes your issue (or if you are still encountering it); please create an issue in our GitHub Project. This property along with the old parser will be removed in future releases.
77 changes: 77 additions & 0 deletions getgauge/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import os
import six
from abc import ABCMeta, abstractmethod
from getgauge.parser_parso import ParsoPythonFile
from getgauge.parser_redbaron import RedbaronPythonFile


class PythonFile(object):
Class = None

@staticmethod
def parse(file_path, content=None):
"""
Create a PythonFileABC object with specified file_path and content. If content is None
then, it is loaded from the file_path method. Otherwise, file_path is only used for
reporting errors.
"""
return PythonFile.Class.parse(file_path, content)

@staticmethod
def select_python_parser(parser=None):
"""
Select default parser for loading and refactoring steps. Passing `redbaron` as argument
will select the old paring engine from v0.3.3
Replacing the redbaron parser was necessary to support Python 3 syntax. We have tried our
best to make sure there is no user impact on users. However, there may be regressions with
new parser backend.
To revert to the old parser implementation, add `GETGAUGE_USE_0_3_3_PARSER=true` property
to the `python.properties` file in the `<PROJECT_DIR>/env/default directory.
This property along with the redbaron parser will be removed in future releases.
"""
if parser == 'redbaron' or os.environ.get('GETGAUGE_USE_0_3_3_PARSER'):
PythonFile.Class = RedbaronPythonFile
else:
PythonFile.Class = ParsoPythonFile


# Select the default implementation
PythonFile.select_python_parser()


class PythonFileABC(six.with_metaclass(ABCMeta)):
@staticmethod
def parse(file_path, content=None):
"""
Create a PythonFileABC object with specified file_path and content. If content is None
then, it is loaded from the file_path method. Otherwise, file_path is only used for
reporting errors.
"""
raise NotImplementedError

@abstractmethod
def iter_steps(self):
"""Iterate over steps in the parsed file"""
raise NotImplementedError

@abstractmethod
def refactor_step(self, old_text, new_text, move_param_from_idx):
"""
Find the step with old_text and change it to new_text. The step function
parameters are also changed according to move_param_from_idx. Each entry in
this list should specify parameter position from old
"""
raise NotImplementedError

@abstractmethod
def get_code(self):
"""Returns current content of the tree."""
raise NotImplementedError


# Verify that implemetations are subclasses of ABC
PythonFileABC.register(ParsoPythonFile)
PythonFileABC.register(RedbaronPythonFile)
154 changes: 154 additions & 0 deletions getgauge/parser_parso.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
import ast
import six
import parso
import logging


# Reuse parser for multiple invocations. This also prevents
# problems with pyfakefs during testing for Python 3.7
_parser = parso.load_grammar()


class ParsoPythonFile(object):
@staticmethod
def parse(file_path, content=None):
"""
Create a PythonFile object with specified file_path and content. If content is None
then, it is loaded from the file_path method. Otherwise, file_path is only used for
reporting errors.
"""
try:
# Parso reads files in binary mode and converts to unicode using python_bytes_to_unicode()
# function. As a result, we no longer have information about original file encoding and
# output of module.get_content() can not be converted back to bytes. For now we can make a
# compromise by reading the file ourselves and passing content to parse() function.
if content is None:
with open(file_path) as f:
content = f.read()
py_tree = _parser.parse(content, path=file_path, error_recovery=False)
return ParsoPythonFile(file_path, py_tree)
except parso.parser.ParserSyntaxError as ex:
logging.error("Failed to parse %s:%d '%s'", file_path, ex.error_leaf.line, ex.error_leaf.get_code())

def __init__(self, file_path, py_tree):
self.file_path = file_path
self.py_tree = py_tree

def _span_from_pos(self, start_pos, end_pos):
return {
'start': start_pos[0],
'startChar': start_pos[1],
'end': end_pos[0],
'endChar': end_pos[1],
}

def _iter_step_func_decorators(self):
"""Find top level functions with step decorator in parsed file"""
for func in self.py_tree.iter_funcdefs():
for decorator in func.get_decorators():
if decorator.children[1].value == 'step':
yield func, decorator
break

def _step_decorator_args(self, decorator):
"""Get the arguments passed to step decorators converted to python objects"""
args = decorator.children[3:-2]
step = None
if len(args) == 1:
try:
step = ast.literal_eval(args[0].get_code())
except (ValueError, SyntaxError):
pass
if isinstance(step, six.string_types+(list,)):
return step
logging.error("Decorator step accepts either a string or a list of strings - %s:%d",
self.file_path, decorator.start_pos[0])
else:
logging.error("Decorator step accepts only one argument - %s:%d",
self.file_path, decorator.start_pos[0])

def iter_steps(self):
"""Iterate over steps in the parsed file"""
for func, decorator in self._iter_step_func_decorators():
step = self._step_decorator_args(decorator)
if step:
span = self._span_from_pos(decorator.start_pos, func.end_pos)
yield step, func.name.value, span

def _find_step_node(self, step_text):
"""Find the ast node which contains the text"""
for func, decorator in self._iter_step_func_decorators():
step = self._step_decorator_args(decorator)
arg_node = decorator.children[3]
if step == step_text:
return arg_node, func
elif isinstance(step, list) and step_text in step:
idx = step.index(step_text)
step_node = arg_node.children[1].children[idx * 2]
return step_node, func
return None, None

def _refactor_step_text(self, step, old_text, new_text):
step_span = self._span_from_pos(step.start_pos, step.end_pos)
step.value = step.value.replace(old_text, new_text)
return step_span, step.value

def _create_param_node(self, parent, name, prefix, is_last):
start_pos = parent[-1].end_pos[0], parent[-1].end_pos[1] + len(prefix)
children = [parso.python.tree.Name(name, start_pos, prefix)]
if not is_last:
children.append(parso.python.tree.Operator(',', children[-1].end_pos))
return parso.python.tree.Param(children, parent)

def _move_param_nodes(self, param_nodes, move_param_from_idx):
# Param nodes include opening and closing braces
num_params = len(param_nodes) - 2
# If the move list is exactly same as current params
# list then no need to create a new list.
if list(range(num_params)) == move_param_from_idx:
return param_nodes
# Get the prefix from second parameter to use with new parameters
prefix = param_nodes[2].name.prefix if num_params > 1 else ' '
new_param_nodes = [parso.python.tree.Operator('(', param_nodes[0].start_pos)]
for i, move_from in enumerate(move_param_from_idx):
param = self._create_param_node(
new_param_nodes,
'arg{}'.format(i) if move_from < 0 else param_nodes[move_from+1].name.value,
'' if i == 0 else prefix,
i >= len(move_param_from_idx) - 1
)
new_param_nodes.append(param)
new_param_nodes.append(parso.python.tree.Operator(')', new_param_nodes[-1].end_pos))
# Change the parent to actual function
for node in new_param_nodes:
node.parent = param_nodes[0].parent
return new_param_nodes

def refactor_step(self, old_text, new_text, move_param_from_idx):
"""
Find the step with old_text and change it to new_text. The step function
parameters are also changed according to move_param_from_idx. Each entry in
this list should specify parameter position from old
"""
diffs = []
step, func = self._find_step_node(old_text)
if step is None:
return diffs
step_diff = self._refactor_step_text(step, old_text, new_text)
diffs.append(step_diff)
params_list_node = func.children[2]
moved_params = self._move_param_nodes(params_list_node.children, move_param_from_idx)
if params_list_node.children is not moved_params:
# Record original parameter list span excluding braces
params_span = self._span_from_pos(
params_list_node.children[0].end_pos,
params_list_node.children[-1].start_pos)
params_list_node.children = moved_params
# Get code for moved paramters excluding braces
param_code = ''.join(p.get_code() for p in moved_params[1:-1])
diffs.append((params_span, param_code))
return diffs

def get_code(self):
"""Returns current content of the tree."""
return self.py_tree.get_code()
Loading

0 comments on commit 98b16ae

Please sign in to comment.