Skip to content

Commit

Permalink
Replace tokenization with tokenize_rt (#77)
Browse files Browse the repository at this point in the history
* Replace tokenization with tokenize_rt

The standard library module `tokenize` does not round trip, so we had to
implement our own tokenization on top of it. However, tokenize_rt does
it better, so let's adopt it instead.

* Drop version pin

* Fix pip upgrade on Windows
  • Loading branch information
pquentin authored Jul 22, 2022
1 parent 0d21b04 commit 1b6fd0c
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 48 deletions.
2 changes: 1 addition & 1 deletion ci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ set -ex

BLACK_VERSION=22.6.0

pip install -U pip setuptools wheel
python -m pip install -U pip setuptools wheel

python setup.py sdist --formats=zip
pip install dist/*.zip
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
include_package_data=True,
packages=find_packages("src"),
package_dir={"": "src"},
install_requires=[],
install_requires=["tokenize_rt"],
keywords=["async"],
python_requires=">=3.7",
classifiers=[
Expand Down
74 changes: 28 additions & 46 deletions src/unasync/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import sys
import tokenize as std_tokenize

import tokenize_rt
from setuptools.command import build_py as orig

from ._version import __version__ # NOQA
Expand Down Expand Up @@ -65,35 +66,41 @@ def _match(self, filepath):
def _unasync_file(self, filepath):
with open(filepath, "rb") as f:
encoding, _ = std_tokenize.detect_encoding(f.readline)
f.seek(0)
tokens = _tokenize(f)

with open(filepath, "rt", encoding=encoding) as f:
tokens = tokenize_rt.src_to_tokens(f.read())
tokens = self._unasync_tokens(tokens)
result = _untokenize(tokens)
result = tokenize_rt.tokens_to_src(tokens)
outfilepath = filepath.replace(self.fromdir, self.todir)
os.makedirs(os.path.dirname(outfilepath), exist_ok=True)
with open(outfilepath, "wb") as f:
f.write(result.encode(encoding))

def _unasync_tokens(self, tokens):
# TODO __await__, ...?
used_space = None
for space, toknum, tokval in tokens:
if tokval in ["async", "await"]:
# When removing async or await, we want to use the whitespace that
# was before async/await before the next token so that
# `print(await stuff)` becomes `print(stuff)` and not
# `print( stuff)`
used_space = space
skip_next = False
for i, token in enumerate(tokens):
if skip_next:
skip_next = False
continue

if token.src in ["async", "await"]:
# When removing async or await, we want to skip the following whitespace
# so that `print(await stuff)` becomes `print(stuff)` and not `print( stuff)`
skip_next = True
else:
if toknum == std_tokenize.NAME:
tokval = self._unasync_name(tokval)
elif toknum == std_tokenize.STRING:
left_quote, name, right_quote = tokval[0], tokval[1:-1], tokval[-1]
tokval = left_quote + self._unasync_name(name) + right_quote
if used_space is None:
used_space = space
yield (used_space, tokval)
used_space = None
if token.name == "NAME":
token = token._replace(src=self._unasync_name(token.src))
elif token.name == "STRING":
left_quote, name, right_quote = (
token.src[0],
token.src[1:-1],
token.src[-1],
)
token = token._replace(
src=left_quote + self._unasync_name(name) + right_quote
)

yield token

def _unasync_name(self, name):
if name in self.token_replacements:
Expand Down Expand Up @@ -122,31 +129,6 @@ def unasync_files(fpath_list, rules):
Token = collections.namedtuple("Token", ["type", "string", "start", "end", "line"])


def _tokenize(f):
last_end = (1, 0)
for tok in std_tokenize.tokenize(f.readline):
if tok.type == std_tokenize.ENCODING:
continue

if last_end[0] < tok.start[0]:
yield ("", std_tokenize.STRING, " \\\n")
last_end = (tok.start[0], 0)

space = ""
if tok.start > last_end:
assert tok.start[0] == last_end[0]
space = " " * (tok.start[1] - last_end[1])
yield (space, tok.type, tok.string)

last_end = tok.end
if tok.type in [std_tokenize.NEWLINE, std_tokenize.NL]:
last_end = (tok.end[0] + 1, 0)


def _untokenize(tokens):
return "".join(space + tokval for space, tokval in tokens)


_DEFAULT_RULE = Rule(fromdir="/_async/", todir="/_sync/")


Expand Down

0 comments on commit 1b6fd0c

Please sign in to comment.