Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove duplicate token matcher Python code #205

Merged
merged 4 commits into from
Jan 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ This project adheres to [Semantic Versioning](http://semver.org).
This document is formatted according to the principles of [Keep A CHANGELOG](http://keepachangelog.com).

## [Unreleased]
### Changed
- [Python] Removed duplicate code in markdown token matcher ([#205](https://github.com/cucumber/gherkin/pull/205))

### Added
- (i18n) Added Vietnamese translation of "Rule"

Expand Down
1 change: 1 addition & 0 deletions python/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ acceptance/
*.pyc
dist/
build/
venv/
.tox/
.coverage
.coverage.*
1 change: 1 addition & 0 deletions python/MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ gherkin/parser.py
gherkin/token.py
gherkin/token_formatter_builder.py
gherkin/token_matcher.py
gherkin/token_matcher_markdown.py
gherkin/token_scanner.py
gherkin/pickles/__init__.py
gherkin/pickles/compiler.py
Expand Down
6 changes: 5 additions & 1 deletion python/gherkin/token_matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,15 @@ def match_DocStringSeparator(self, token):
# close
return self._match_DocStringSeparator(token, self._active_doc_string_separator, False)

@staticmethod
def _default_docstring_content_type():
return None

def _match_DocStringSeparator(self, token, separator, is_open):
if not token.line.startswith(separator):
return False

content_type = None
content_type = self._default_docstring_content_type()
if is_open:
content_type = token.line.get_rest_trimmed(len(separator))
self._active_doc_string_separator = separator
Expand Down
94 changes: 8 additions & 86 deletions python/gherkin/token_matcher_markdown.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,16 @@
import re
from collections import defaultdict
from .dialect import Dialect
from .errors import NoSuchLanguageException
from .token_matcher import TokenMatcher

KEYWORD_PREFIX_BULLET = '^(\\s*[*+-]\\s*)'
KEYWORD_PREFIX_HEADER = '^(#{1,6}\\s)'

class GherkinInMarkdownTokenMatcher(object):
LANGUAGE_RE = re.compile(r"^\s*#\s*language\s*:\s*([a-zA-Z\-_]+)\s*$")
class GherkinInMarkdownTokenMatcher(TokenMatcher):

def __init__(self, dialect_name='en'):
self._default_dialect_name = dialect_name
self._change_dialect(dialect_name)
self.reset()
super(GherkinInMarkdownTokenMatcher, self).__init__(dialect_name)

def reset(self):
if self.dialect_name != self._default_dialect_name:
self._change_dialect(self._default_dialect_name)
self._indent_to_remove = 0
self._active_doc_string_separator = None
super(GherkinInMarkdownTokenMatcher, self).reset()
self.matched_feature_line=False

def match_FeatureLine(self, token):
Expand Down Expand Up @@ -52,7 +44,7 @@ def match_ExamplesLine(self, token):
return self._match_title_line(KEYWORD_PREFIX_HEADER, self.dialect.examples_keywords, ':', token, 'ExamplesLine')

def match_TableRow(self, token):
# Gherkin tables must be indented 2-5 spaces in order to be distinguidedn from non-Gherkin tables
# Gherkin tables must be indented 2-5 spaces to be distinguishable from non-Gherkin tables

if re.match('^\\s\\s\\s?\\s?\\s?\\|',token.line.get_line_text(0)):
table_cells = token.line.table_cells
Expand Down Expand Up @@ -119,7 +111,6 @@ def match_Language(self, token):
return False

def match_TagLine(self, token):

tags = []
matching_tags = re.finditer('`(@[^`]+)`', token.line.get_line_text())
idx=0
Expand All @@ -144,88 +135,19 @@ def match_DocStringSeparator(self, token):
# close
return self._match_DocStringSeparator(token, self._active_doc_string_separator, False)

def _match_DocStringSeparator(self, token, separator, is_open):
if not token.line.startswith(separator):
return False

content_type = ''
if is_open:
content_type = token.line.get_rest_trimmed(len(separator))
self._active_doc_string_separator = separator
self._indent_to_remove = token.line.indent
else:
self._active_doc_string_separator = None
self._indent_to_remove = 0

# TODO: Use the separator as keyword. That's needed for pretty printing.
self._set_token_matched(token, 'DocStringSeparator', content_type, separator)
return True

def match_Other(self, token):
# take the entire line, except removing DocString indents
text = token.line.get_line_text(self._indent_to_remove)
self._set_token_matched(token, 'Other', self._unescaped_docstring(text), indent=0)
return True

def match_EOF(self, token):
if not token.eof():
return False

self._set_token_matched(token, 'EOF')
return True
@staticmethod
def _default_docstring_content_type():
return ''

def _match_title_line(self, prefix, keywords, keywordSuffix, token, token_type):

keywords_or_list="|".join(map(lambda x: re.escape(x), keywords))
match = re.search(u'{}({}){}(.*)'.format(prefix, keywords_or_list, keywordSuffix), token.line.get_line_text())
indent = token.line.indent
result = False

if(match):
matchedKeyword = match.group(2)
indent += len(match.group(1))
self._set_token_matched(token, token_type, match.group(3).strip(), matchedKeyword, indent=indent)
return True
return False

def _set_token_matched(self, token, matched_type, text=None,
keyword=None, keyword_type=None, indent=None, items=None):
if items is None:
items = []
token.matched_type = matched_type
# text == '' should not result in None
token.matched_text = text.rstrip('\r\n') if text is not None else None
token.matched_keyword = keyword
token.matched_keyword_type = keyword_type
if indent is not None:
token.matched_indent = indent
else:
token.matched_indent = token.line.indent if token.line else 0
token.matched_items = items
token.location['column'] = token.matched_indent + 1
token.matched_gherkin_dialect = self.dialect_name

def _change_dialect(self, dialect_name, location=None):
dialect = Dialect.for_name(dialect_name)
if not dialect:
raise NoSuchLanguageException(dialect_name, location)

self.dialect_name = dialect_name
self.dialect = dialect
self.keyword_types = defaultdict(list)
for keyword in self.dialect.given_keywords:
self.keyword_types[keyword].append('Context')
for keyword in self.dialect.when_keywords:
self.keyword_types[keyword].append('Action')
for keyword in self.dialect.then_keywords:
self.keyword_types[keyword].append('Outcome')
for keyword in self.dialect.and_keywords + self.dialect.but_keywords:
self.keyword_types[keyword].append('Conjunction')

def _unescaped_docstring(self, text):
if self._active_doc_string_separator == '"""':
return text.replace('\\"\\"\\"', '"""')
elif self._active_doc_string_separator == '```':
return text.replace('\\`\\`\\`', '```')
else:
return text