Skip to content

Commit

Permalink
refactor!: compile each language individually
Browse files Browse the repository at this point in the history
  • Loading branch information
ObserverOfTime committed May 18, 2024
1 parent ad3519c commit 3cc7a64
Show file tree
Hide file tree
Showing 17 changed files with 212 additions and 165 deletions.
2 changes: 1 addition & 1 deletion .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ indent_size = 3
[*.md]
indent_size = 2

[*.yml]
[*.{yml,json}]
indent_size = 2
8 changes: 3 additions & 5 deletions .github/CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@

## Adding a new language

- Add the language repo to _README.rst_ (along with its license) and
_tree_sitter_languages/repos/repos.txt_.
- Add the language name to _tests/test_tree_sitter_languages.py_ (sorted).
- Add `TS_LANGUAGE_INIT(name)` and `TS_LANGUAGE_METHOD(name),` to
_tree_sitter_languages/languages.c_ (sorted).
- Add the language data to _languages.json_.
- Add the language repo and license to _README.rst_.
- Add the language name to _tests/test_tree_sitter_languages.py_.
- Submit a pull request.
8 changes: 0 additions & 8 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,6 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Clone repos
working-directory: tree_sitter_languages/repos
shell: bash
run: ./clone.sh
- name: Set up Python
uses: actions/setup-python@v5
with:
Expand All @@ -41,10 +37,6 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Clone repos
working-directory: tree_sitter_languages/repos
shell: bash
run: ./clone.sh
- name: Set up Python
uses: actions/setup-python@v5
with:
Expand Down
14 changes: 9 additions & 5 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,14 @@ on:
push:
branches: ["*"]
paths:
- setup.py
- languages.json
- tests/
- tree_sitter_languages/
pull_request:
paths:
- setup.py
- languages.json
- tests/
- tree_sitter_languages/

Expand All @@ -19,16 +23,16 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Clone repos
working-directory: tree_sitter_languages/repos
shell: bash
run: ./clone.sh
- name: Set up Python
uses: actions/setup-python@v5
with:
cache: pip
python-version: "3.9"
- name: Set up tree-sitter CLI
uses: tree-sitter/setup-action/cli/@v1
- name: Install package
run: pip install -v -e .
env:
TS_REGENERATE: "1"
- name: Run tests
run: python -munittest discover tests
run: python -munittest discover -v tests
5 changes: 2 additions & 3 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
include tree_sitter_languages/languages.h
include languages.json

prune tree_sitter_languages/repos
recursive-include tree_sitter_languages/repos src/**/*.[ch]
prune vendor
3 changes: 3 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ Install
pip install tree-sitter-languages
**Note:** building from source requires ``git`` and a C compiler.

Usage
=====

Expand Down
50 changes: 50 additions & 0 deletions languages.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
{
"dot": {
"repo": "https://github.com/rydesun/tree-sitter-dot"
},
"elisp": {
"repo": "https://github.com/Wilfred/tree-sitter-elisp"
},
"elm": {
"repo": "https://github.com/elm-tooling/tree-sitter-elm"
},
"fixed_form_fortran": {
"repo": "https://github.com/ZedThree/tree-sitter-fixed-form-fortran",
"branch": "f77"
},
"fortran": {
"repo": "https://github.com/stadelmanma/tree-sitter-fortran"
},
"gomod": {
"repo": "https://github.com/camdencheek/tree-sitter-go-mod"
},
"hack": {
"repo": "https://github.com/slackhq/tree-sitter-hack"
},
"hcl": {
"repo": "https://github.com/tree-sitter-grammars/tree-sitter-hcl",
"languages": {
"hcl": "",
"terraform": "dialects/terraform"
}
},
"kotlin": {
"repo": "https://github.com/fwcd/tree-sitter-kotlin"
},
"make": {
"repo": "https://github.com/tree-sitter-grammars/tree-sitter-make"
},
"objc": {
"repo": "https://github.com/tree-sitter-grammars/tree-sitter-objc"
},
"rst": {
"repo": "https://github.com/stsewd/tree-sitter-rst"
},
"scala": {
"repo": "https://github.com/tree-sitter/tree-sitter-scala"
},
"sql": {
"repo": "https://github.com/derekstride/tree-sitter-sql",
"branch": "gh-pages"
}
}
123 changes: 94 additions & 29 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,90 @@
from glob import glob
from json import loads
from pathlib import Path
from platform import system
from os import chdir, environ, getcwd
from typing import TypedDict

from setuptools import Extension, setup # type: ignore
from wheel.bdist_wheel import bdist_wheel # type: ignore
from setuptools import Extension, setup
from setuptools.command.build_ext import build_ext
from wheel.bdist_wheel import bdist_wheel

Language = TypedDict("Language", {
"repo": str,
"branch": str | None,
"languages": dict[str, str] | None
})

languages: dict[str, Language] = \
loads(Path(__file__).with_name("languages.json").read_text())

extensions: list[Extension] = []

common_source = Path(__file__).parent / "tree_sitter_languages" / "language.c"

for lang, data in languages.items():
for name in data.get("languages", {lang: ""}).keys():
extensions.append(
Extension(
name=f"tree_sitter_languages._language.{name}",
sources=[str(common_source)],
include_dirs=[lang],
define_macros=[
("PY_SSIZE_T_CLEAN", None),
("TREE_SITTER_HIDE_SYMBOLS", None),
("TS_LANGUAGE_NAME", name),
],
extra_compile_args=[
"-std=c11",
"-fvisibility=hidden",
"-Wno-cast-function-type",
"-Wno-unused-but-set-variable",
"-Werror=implicit-function-declaration",
] if system() != "Windows" else [
"/std:c11",
"/wd4244",
],
py_limited_api=True,
optional=True,
)
)


class BuildExt(build_ext):
def build_extension(self, ext: Extension):
name = ext.include_dirs.pop()
lang = languages[name]
cwd = getcwd()

if not (dir := Path(cwd) / "vendor" / name).is_dir():
clone = ["git", "clone", "-q", "--depth=1", "--sparse"]
if branch := lang.get("branch"):
clone.append(f"--branch={branch}")
clone.extend([lang["repo"], dir.relative_to(cwd)])
self.spawn(clone)
self.spawn([
"git", "-C", dir.relative_to(cwd),
"sparse-checkout", "set", "--no-cone", "/**/src/**"
])
else:
self.spawn([
"git", "-C", dir.relative_to(cwd),
"pull", "-q", "--depth=1"
])

name = ext.name.split(".")[-1]
path = dir / lang.get("languages", {name: ""})[name]
src = path / "src"
if "TS_REGENERATE" in environ:
chdir(path)
self.spawn([
"tree-sitter", "generate",
"--no-bindings", "src/grammar.json"
])
chdir(cwd)
ext.sources.extend(list(map(str, src.glob("*.c"))))
ext.include_dirs = [str(src)]

return super().build_extension(ext)


class BdistWheel(bdist_wheel):
Expand All @@ -13,32 +95,15 @@ def get_tag(self):
return python, abi, platform


sources = glob('tree_sitter_languages/repos/**/src/*.c', recursive=True)
sources.append("tree_sitter_languages/languages.c")

setup(
packages=["tree_sitter_languages"],
include_package_data=False,
ext_modules=[
Extension(
name="tree_sitter_languages.languages",
sources=sources,
define_macros=[
("PY_SSIZE_T_CLEAN", None),
("TREE_SITTER_HIDE_SYMBOLS", None),
],
extra_compile_args=[
"-std=c11",
"-fvisibility=hidden",
"-Wno-cast-function-type",
"-Wno-unused-but-set-variable",
"-Werror=implicit-function-declaration",
] if system() != "Windows" else [
"/std:c11",
"/wd4244",
],
py_limited_api=True
)
packages=[
"tree_sitter_languages",
"tree_sitter_languages._language",
],
cmdclass={"bdist_wheel": BdistWheel},
include_package_data=False,
ext_modules=extensions,
cmdclass={
"build_ext": BuildExt,
"bdist_wheel": BdistWheel,
},
)
4 changes: 2 additions & 2 deletions tests/test_tree_sitter_languages.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,5 @@ def test_get_language(self):
self.assertIsInstance(get_language(language), Language)

def test_invalid_name(self):
self.assertRaises(AttributeError, get_language, "invalid")
self.assertRaises(AttributeError, get_parser, "invalid")
self.assertRaises(LookupError, get_language, "invalid")
self.assertRaises(LookupError, get_parser, "invalid")
13 changes: 8 additions & 5 deletions tree_sitter_languages/__init__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
"""Tree-sitter languages"""

from tree_sitter import Language, Parser
from importlib import import_module as _import

from . import languages
from tree_sitter import Language, Parser


def get_language(name: str) -> Language:
"""Get the language with the given name."""
if not hasattr(languages, name):
raise AttributeError(f"Language not found: {name}")
return Language(getattr(languages, name)())
try:
module = _import(f"._language.{name}", __package__)
except ModuleNotFoundError:
raise LookupError(f"Language not found: {name}")
else:
return Language(module.language())


def get_parser(language: str) -> Parser:
Expand Down
1 change: 1 addition & 0 deletions tree_sitter_languages/_language/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

39 changes: 39 additions & 0 deletions tree_sitter_languages/language.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#include <Python.h>

typedef struct TSLanguage TSLanguage;

#ifndef TS_LANGUAGE_NAME
#error TS_LANGUAGE_NAME must be defined
#endif

#define _str(s) #s
#define str(s) _str(s)
#define _cat(a, b) a##b
#define cat(a, b) _cat(a, b)

#define TS_LANGUAGE_FUNC cat(tree_sitter_, TS_LANGUAGE_NAME)
#define TS_LANGUAGE_METHOD cat(TS_LANGUAGE_NAME, _language)
#define TS_LANGUAGE_MODULE cat(PyInit_, TS_LANGUAGE_NAME)

TSLanguage *TS_LANGUAGE_FUNC(void);

static PyObject* TS_LANGUAGE_METHOD(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args)) {
return PyLong_FromVoidPtr(TS_LANGUAGE_FUNC());
}

static PyMethodDef methods[] = {
{"language", TS_LANGUAGE_METHOD, METH_NOARGS, NULL},
{NULL, NULL, 0, NULL}
};

static struct PyModuleDef module = {
.m_base = PyModuleDef_HEAD_INIT,
.m_name = str(TS_LANGUAGE_NAME),
.m_doc = NULL,
.m_size = -1,
.m_methods = methods
};

PyMODINIT_FUNC TS_LANGUAGE_MODULE(void) {
return PyModule_Create(&module);
}
Loading

0 comments on commit 3cc7a64

Please sign in to comment.