Skip to content

Commit

Permalink
community[patch]: Switch lang parsers to tree_sitter_language_pack
Browse files Browse the repository at this point in the history
tree-sitter-languages appears dormant while tree-sitter-language-pack
supports tree-sitter 0.22 and many languages. It should be pretty much
a drop-in replacement.

See:
grantjenks/py-tree-sitter-languages#64
https://pypi.org/project/tree-sitter-language-pack
  • Loading branch information
serpent213 committed Oct 6, 2024
1 parent 7a07196 commit f6b1ba4
Show file tree
Hide file tree
Showing 32 changed files with 36 additions and 36 deletions.
4 changes: 2 additions & 2 deletions docs/docs/integrations/document_loaders/source_code.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
"- Scala (*)\n",
"- TypeScript (*)\n",
"\n",
"Items marked with (*) require the packages `tree_sitter` and `tree_sitter_languages`.\n",
"Items marked with (*) require the packages `tree_sitter` and `tree_sitter_language_pack`.\n",
"It is straightforward to add support for additional languages using `tree_sitter`,\n",
"although this currently requires modifying LangChain.\n",
"\n",
Expand All @@ -48,7 +48,7 @@
"metadata": {},
"outputs": [],
"source": [
"%pip install -qU esprima esprima tree_sitter tree_sitter_languages"
"%pip install -qU esprima esprima tree_sitter tree_sitter_language_pack"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion libs/community/extended_testing_deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ tidb-vector>=0.0.3,<1.0.0
timescale-vector==0.0.1
tqdm>=4.48.0
tree-sitter>=0.20.2,<0.21
tree-sitter-languages>=1.8.0,<2
tree-sitter-language-pack>=0.2.0,<0.3
upstash-redis>=1.1.0,<2
upstash-ratelimit>=1.1.0,<2
vdms>=0.0.20
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class CSegmenter(TreeSitterSegmenter):
"""Code segmenter for C."""

def get_language(self) -> "Language":
from tree_sitter_languages import get_language
from tree_sitter_language_pack import get_language

return get_language("c")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class CPPSegmenter(TreeSitterSegmenter):
"""Code segmenter for C++."""

def get_language(self) -> "Language":
from tree_sitter_languages import get_language
from tree_sitter_language_pack import get_language

return get_language("cpp")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class CSharpSegmenter(TreeSitterSegmenter):
"""Code segmenter for C#."""

def get_language(self) -> "Language":
from tree_sitter_languages import get_language
from tree_sitter_language_pack import get_language

return get_language("c_sharp")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class ElixirSegmenter(TreeSitterSegmenter):
"""Code segmenter for Elixir."""

def get_language(self) -> "Language":
from tree_sitter_languages import get_language
from tree_sitter_language_pack import get_language

return get_language("elixir")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class GoSegmenter(TreeSitterSegmenter):
"""Code segmenter for Go."""

def get_language(self) -> "Language":
from tree_sitter_languages import get_language
from tree_sitter_language_pack import get_language

return get_language("go")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class JavaSegmenter(TreeSitterSegmenter):
"""Code segmenter for Java."""

def get_language(self) -> "Language":
from tree_sitter_languages import get_language
from tree_sitter_language_pack import get_language

return get_language("java")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class KotlinSegmenter(TreeSitterSegmenter):
"""Code segmenter for Kotlin."""

def get_language(self) -> "Language":
from tree_sitter_languages import get_language
from tree_sitter_language_pack import get_language

return get_language("kotlin")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ class LanguageParser(BaseBlobParser):
- TypeScript: "ts" (*)
Items marked with (*) require the packages `tree_sitter` and
`tree_sitter_languages`. It is straightforward to add support for additional
`tree_sitter_language_pack`. It is straightforward to add support for additional
languages using `tree_sitter`, although this currently requires modifying LangChain.
The language used for parsing can be configured, along with the minimum number of
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class LuaSegmenter(TreeSitterSegmenter):
"""Code segmenter for Lua."""

def get_language(self) -> "Language":
from tree_sitter_languages import get_language
from tree_sitter_language_pack import get_language

return get_language("lua")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class PerlSegmenter(TreeSitterSegmenter):
"""Code segmenter for Perl."""

def get_language(self) -> "Language":
from tree_sitter_languages import get_language
from tree_sitter_language_pack import get_language

return get_language("perl")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class PHPSegmenter(TreeSitterSegmenter):
"""Code segmenter for PHP."""

def get_language(self) -> "Language":
from tree_sitter_languages import get_language
from tree_sitter_language_pack import get_language

return get_language("php")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class RubySegmenter(TreeSitterSegmenter):
"""Code segmenter for Ruby."""

def get_language(self) -> "Language":
from tree_sitter_languages import get_language
from tree_sitter_language_pack import get_language

return get_language("ruby")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class RustSegmenter(TreeSitterSegmenter):
"""Code segmenter for Rust."""

def get_language(self) -> "Language":
from tree_sitter_languages import get_language
from tree_sitter_language_pack import get_language

return get_language("rust")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class ScalaSegmenter(TreeSitterSegmenter):
"""Code segmenter for Scala."""

def get_language(self) -> "Language":
from tree_sitter_languages import get_language
from tree_sitter_language_pack import get_language

return get_language("scala")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ def __init__(self, code: str):

try:
import tree_sitter # noqa: F401
import tree_sitter_languages # noqa: F401
import tree_sitter_language_pack # noqa: F401
except ImportError:
raise ImportError(
"Could not import tree_sitter/tree_sitter_languages Python packages. "
"Please install them with "
"`pip install tree-sitter tree-sitter-languages`."
"Could not import tree_sitter/tree_sitter_language_pack Python "
"packages. Please install them with "
"`pip install tree-sitter tree-sitter-language-pack`."
)

def is_valid(self) -> bool:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class TypeScriptSegmenter(TreeSitterSegmenter):
"""Code segmenter for TypeScript."""

def get_language(self) -> "Language":
from tree_sitter_languages import get_language
from tree_sitter_language_pack import get_language

return get_language("typescript")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from langchain_community.document_loaders.parsers.language.c import CSegmenter


@pytest.mark.requires("tree_sitter", "tree_sitter_languages")
@pytest.mark.requires("tree_sitter", "tree_sitter_language_pack")
class TestCSegmenter(unittest.TestCase):
def setUp(self) -> None:
self.example_code = """int main() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from langchain_community.document_loaders.parsers.language.cpp import CPPSegmenter


@pytest.mark.requires("tree_sitter", "tree_sitter_languages")
@pytest.mark.requires("tree_sitter", "tree_sitter_language_pack")
class TestCPPSegmenter(unittest.TestCase):
def setUp(self) -> None:
self.example_code = """int foo() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from langchain_community.document_loaders.parsers.language.csharp import CSharpSegmenter


@pytest.mark.requires("tree_sitter", "tree_sitter_languages")
@pytest.mark.requires("tree_sitter", "tree_sitter_language_pack")
class TestCSharpSegmenter(unittest.TestCase):
def setUp(self) -> None:
self.example_code = """namespace World
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from langchain_community.document_loaders.parsers.language.elixir import ElixirSegmenter


@pytest.mark.requires("tree_sitter", "tree_sitter_languages")
@pytest.mark.requires("tree_sitter", "tree_sitter_language_pack")
class TestElixirSegmenter(unittest.TestCase):
def setUp(self) -> None:
self.example_code = """@doc "some comment"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from langchain_community.document_loaders.parsers.language.go import GoSegmenter


@pytest.mark.requires("tree_sitter", "tree_sitter_languages")
@pytest.mark.requires("tree_sitter", "tree_sitter_language_pack")
class TestGoSegmenter(unittest.TestCase):
def setUp(self) -> None:
self.example_code = """func foo(a int) int {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from langchain_community.document_loaders.parsers.language.java import JavaSegmenter


@pytest.mark.requires("tree_sitter", "tree_sitter_languages")
@pytest.mark.requires("tree_sitter", "tree_sitter_language_pack")
class TestJavaSegmenter(unittest.TestCase):
def setUp(self) -> None:
self.example_code = """class Hello
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from langchain_community.document_loaders.parsers.language.kotlin import KotlinSegmenter


@pytest.mark.requires("tree_sitter", "tree_sitter_languages")
@pytest.mark.requires("tree_sitter", "tree_sitter_language_pack")
class TestKotlinSegmenter(unittest.TestCase):
def setUp(self) -> None:
self.example_code = """fun foo(a: Int): Int {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from langchain_community.document_loaders.parsers.language.lua import LuaSegmenter


@pytest.mark.requires("tree_sitter", "tree_sitter_languages")
@pytest.mark.requires("tree_sitter", "tree_sitter_language_pack")
class TestLuaSegmenter(unittest.TestCase):
def setUp(self) -> None:
self.example_code = """function F()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from langchain_community.document_loaders.parsers.language.perl import PerlSegmenter


@pytest.mark.requires("tree_sitter", "tree_sitter_languages")
@pytest.mark.requires("tree_sitter", "tree_sitter_language_pack")
class TestPerlSegmenter(unittest.TestCase):
def setUp(self) -> None:
self.example_code = """sub Hello {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from langchain_community.document_loaders.parsers.language.php import PHPSegmenter


@pytest.mark.requires("tree_sitter", "tree_sitter_languages")
@pytest.mark.requires("tree_sitter", "tree_sitter_language_pack")
class TestPHPSegmenter(unittest.TestCase):
def setUp(self) -> None:
self.example_code = """<?php
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from langchain_community.document_loaders.parsers.language.ruby import RubySegmenter


@pytest.mark.requires("tree_sitter", "tree_sitter_languages")
@pytest.mark.requires("tree_sitter", "tree_sitter_language_pack")
class TestRubySegmenter(unittest.TestCase):
def setUp(self) -> None:
self.example_code = """def foo
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from langchain_community.document_loaders.parsers.language.rust import RustSegmenter


@pytest.mark.requires("tree_sitter", "tree_sitter_languages")
@pytest.mark.requires("tree_sitter", "tree_sitter_language_pack")
class TestRustSegmenter(unittest.TestCase):
def setUp(self) -> None:
self.example_code = """fn foo() -> i32 {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from langchain_community.document_loaders.parsers.language.scala import ScalaSegmenter


@pytest.mark.requires("tree_sitter", "tree_sitter_languages")
@pytest.mark.requires("tree_sitter", "tree_sitter_language_pack")
class TestScalaSegmenter(unittest.TestCase):
def setUp(self) -> None:
self.example_code = """def foo() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
)


@pytest.mark.requires("tree_sitter", "tree_sitter_languages")
@pytest.mark.requires("tree_sitter", "tree_sitter_language_pack")
class TestTypeScriptSegmenter(unittest.TestCase):
def setUp(self) -> None:
self.example_code = """function foo(): number
Expand Down

0 comments on commit f6b1ba4

Please sign in to comment.