Skip to content

Downstream changes from main to c-cpp branch #80

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 29, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cldk/analysis/python/__init__.py
Original file line number Diff line number Diff line change
@@ -18,6 +18,6 @@
Python package
"""

from .python import PythonAnalysis
from .python_analysis import PythonAnalysis

__all__ = ["PythonAnalysis"]
Original file line number Diff line number Diff line change
@@ -18,17 +18,17 @@
Python module
"""

from abc import ABC
from pathlib import Path
from typing import Dict, List
from pandas import DataFrame
from typing import List

from cldk.analysis import SymbolTable
from cldk.analysis.python.treesitter import PythonSitter
from cldk.models.python.models import PyMethod, PyImport, PyModule, PyClass


class PythonAnalysis(SymbolTable):
"""Python Analysis Class"""

def __init__(
self,
analysis_backend: str,
@@ -48,13 +48,13 @@ def __init__(

# Initialize the analysis analysis_backend
if analysis_backend.lower() == "codeql":
raise NotImplementedError(f"Support for {analysis_backend} has not been implemented yet.")
raise NotImplementedError("Support for {analysis_backend} has not been implemented yet.")
elif analysis_backend.lower() == "codeanalyzer":
raise NotImplementedError(f"Support for {analysis_backend} has not been implemented yet.")
raise NotImplementedError("Support for {analysis_backend} has not been implemented yet.")
elif analysis_backend.lower() == "treesitter":
self.analysis_backend: PythonSitter = PythonSitter()
else:
raise NotImplementedError(f"Support for {analysis_backend} has not been implemented yet.")
raise NotImplementedError("Support for {analysis_backend} has not been implemented yet.")

def get_methods(self) -> List[PyMethod]:
"""
@@ -89,14 +89,14 @@ def get_method_details(self, method_signature: str) -> PyMethod:

def is_parsable(self, source_code: str) -> bool:
"""
Check if the code is parsable
Args:
source_code: source code
Check if the code is parsable
Args:
source_code: source code
Returns:
True if the code is parsable, False otherwise
Returns:
True if the code is parsable, False otherwise
"""
return PythonSitter.is_parsable(self, source_code)
return PythonSitter().is_parsable(source_code)

def get_raw_ast(self, source_code: str) -> str:
"""
@@ -107,9 +107,9 @@ def get_raw_ast(self, source_code: str) -> str:
Returns:
Tree: the raw AST
"""
return PythonSitter.get_raw_ast(self, source_code)
return PythonSitter().get_raw_ast(source_code)

def get_imports(self) -> List[PyImport]:
def get_imports(self) -> List[PyImport]:
"""
Given an application or a source code, get all the imports
"""
@@ -119,7 +119,7 @@ def get_variables(self, **kwargs):
"""
Given an application or a source code, get all the variables
"""
raise NotImplementedError(f"Support for this functionality has not been implemented yet.")
raise NotImplementedError("Support for this functionality has not been implemented yet.")

def get_classes(self) -> List[PyClass]:
"""
@@ -131,34 +131,34 @@ def get_classes_by_criteria(self, **kwargs):
"""
Given an application or a source code, get all the classes given the inclusion and exclution criteria
"""
raise NotImplementedError(f"Support for this functionality has not been implemented yet.")
raise NotImplementedError("Support for this functionality has not been implemented yet.")

def get_sub_classes(self, **kwargs):
"""
Given an application or a source code, get all the sub-classes
"""
raise NotImplementedError(f"Support for this functionality has not been implemented yet.")
raise NotImplementedError("Support for this functionality has not been implemented yet.")

def get_nested_classes(self, **kwargs):
"""
Given an application or a source code, get all the nested classes
"""
raise NotImplementedError(f"Support for this functionality has not been implemented yet.")
raise NotImplementedError("Support for this functionality has not been implemented yet.")

def get_constructors(self, **kwargs):
"""
Given an application or a source code, get all the constructors
"""
raise NotImplementedError(f"Support for this functionality has not been implemented yet.")
raise NotImplementedError("Support for this functionality has not been implemented yet.")

def get_methods_in_class(self, **kwargs):
"""
Given an application or a source code, get all the methods within the given class
"""
raise NotImplementedError(f"Support for this functionality has not been implemented yet.")
raise NotImplementedError("Support for this functionality has not been implemented yet.")

def get_fields(self, **kwargs):
"""
Given an application or a source code, get all the fields
"""
raise NotImplementedError(f"Support for this functionality has not been implemented yet.")
raise NotImplementedError("Support for this functionality has not been implemented yet.")
Empty file.
283 changes: 283 additions & 0 deletions tests/analysis/python/test_python_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,283 @@
################################################################################
# Copyright IBM Corporation 2025
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

"""
Python Tests
"""
import os
from typing import List
from tree_sitter import Tree
import pytest

from cldk.analysis.python import PythonAnalysis
from cldk.utils.analysis_engine import AnalysisEngine
from cldk.models.python.models import PyClass, PyImport, PyMethod, PyModule

PYTHON_CODE = """
import os
from typing import List
from math import *
def env(env_var: str): -> str
return os.getenv(env_var)
class Calculator():
'''Calculator Class'''
def __init__(self):
self._total = 0
@property
def total(self):
return self._total
@total.setter
def total(self, value):
self._total = value
def add(self, a, b):
total += a + b
return a + b
def subtract(self, a, b):
total += a - b
return a - b
def multiply(self, a, b):
total += (a * b)
return a * b
def divide(self, a, b):
total += (a / b)
return a / b
"""


def test_not_implemented():
"""It should return raise a not implemented exception"""
# test with CodeQL
with pytest.raises(NotImplementedError) as except_info:
_ = PythonAnalysis(
analysis_backend=AnalysisEngine.CODEQL, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None
)
assert except_info.type == NotImplementedError

# test with CodeAnalyzer
with pytest.raises(NotImplementedError) as except_info:
_ = PythonAnalysis(
analysis_backend=AnalysisEngine.CODEANALYZER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None
)
assert except_info.type == NotImplementedError

# Test with unknown backend
with pytest.raises(NotImplementedError) as except_info:
_ = PythonAnalysis(analysis_backend="unknown", eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None)
assert except_info.type == NotImplementedError


def test_get_methods():
"""It should return all of the methods"""
python_analysis = PythonAnalysis(
analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None
)

all_methods = python_analysis.get_methods()
assert all_methods is not None
assert isinstance(all_methods, List)
assert len(all_methods) == 7
for method in all_methods:
assert isinstance(method, PyMethod)


def test_get_functions():
"""It should return all of the functions"""
python_analysis = PythonAnalysis(
analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None
)

all_functions = python_analysis.get_functions()
assert all_functions is not None
assert isinstance(all_functions, List)
assert len(all_functions) == 1
for method in all_functions:
assert isinstance(method, PyMethod)


def test_get_all_modules(tmp_path):
"""It should return all of the modules"""
python_analysis = PythonAnalysis(
analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=tmp_path, source_code=None, analysis_backend_path=None, analysis_json_path=None
)

# set up some temporary modules
temp_file_path = os.path.join(tmp_path, "hello.py")
with open(temp_file_path, "w", encoding="utf-8") as hello_module:
hello_module.write('print("Hello, world!")')
temp_file_path = os.path.join(tmp_path, "bye.py")
with open(temp_file_path, "w", encoding="utf-8") as bye_module:
bye_module.write('print("Goodbye, world!")')

all_modules = python_analysis.get_modules()
assert all_modules is not None
assert isinstance(all_modules, List)
assert len(all_modules) == 2
for module in all_modules:
assert isinstance(module, PyModule)


def test_get_method_details():
"""It should return the method details"""
python_analysis = PythonAnalysis(
analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None
)

method_details = python_analysis.get_method_details("add(self, a, b)")
assert method_details is not None
assert isinstance(method_details, PyMethod)
assert method_details.full_signature == "add(self, a, b)"


def test_is_parsable():
"""It should be able to parse the code"""
python_analysis = PythonAnalysis(
analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None
)

code = "def is_parsable(self, code: str) -> bool: return True"
is_parsable = python_analysis.is_parsable(code)
assert is_parsable is True

code = "def is_not_parsable(self, code: str) -> bool: return True if True else"
is_parsable = python_analysis.is_parsable(code)
assert is_parsable is False


def test_get_raw_ast():
"""It should return the raw AST"""
python_analysis = PythonAnalysis(
analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None
)

raw_ast = python_analysis.get_raw_ast(PYTHON_CODE)
assert raw_ast is not None
assert isinstance(raw_ast, Tree)
assert raw_ast.root_node is not None


def test_get_imports():
"""It should return all of the imports"""
python_analysis = PythonAnalysis(
analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None
)

all_imports = python_analysis.get_imports()
assert all_imports is not None
assert isinstance(all_imports, List)
assert len(all_imports) == 3
for py_import in all_imports:
assert isinstance(py_import, PyImport)


def test_get_variables():
"""It should return all of the variables"""
python_analysis = PythonAnalysis(
analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None
)

with pytest.raises(NotImplementedError) as except_info:
python_analysis.get_variables()
assert except_info.type == NotImplementedError


def test_get_classes():
"""It should return all of the classes"""
python_analysis = PythonAnalysis(
analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None
)

all_classes = python_analysis.get_classes()
assert all_classes is not None
assert isinstance(all_classes, List)
assert len(all_classes) == 1
assert isinstance(all_classes[0], PyClass)
assert all_classes[0].class_name == "Calculator"
assert len(all_classes[0].methods) == 7


def test_get_classes_by_criteria():
"""It should return all of the classes that match the criteria"""
python_analysis = PythonAnalysis(
analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None
)

with pytest.raises(NotImplementedError) as except_info:
python_analysis.get_classes_by_criteria()
assert except_info.type == NotImplementedError


def test_get_sub_classes():
"""It should return all of the subclasses"""
python_analysis = PythonAnalysis(
analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None
)

with pytest.raises(NotImplementedError) as except_info:
python_analysis.get_sub_classes()
assert except_info.type == NotImplementedError


def test_get_nested_classes():
"""It should return all of the nested classes"""
python_analysis = PythonAnalysis(
analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None
)

with pytest.raises(NotImplementedError) as except_info:
python_analysis.get_nested_classes()
assert except_info.type == NotImplementedError


def test_get_constructors():
"""It should return all of the constructors"""
python_analysis = PythonAnalysis(
analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None
)

with pytest.raises(NotImplementedError) as except_info:
python_analysis.get_constructors()
assert except_info.type == NotImplementedError


def test_get_methods_in_class():
"""It should return all of the methods in the class"""
python_analysis = PythonAnalysis(
analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None
)

with pytest.raises(NotImplementedError) as except_info:
python_analysis.get_methods_in_class()
assert except_info.type == NotImplementedError


def test_get_fields():
"""It should return all of the fields in the class"""
python_analysis = PythonAnalysis(
analysis_backend=AnalysisEngine.TREESITTER, eager_analysis=True, project_dir=None, source_code=PYTHON_CODE, analysis_backend_path=None, analysis_json_path=None
)

with pytest.raises(NotImplementedError) as except_info:
python_analysis.get_fields()
assert except_info.type == NotImplementedError
212 changes: 212 additions & 0 deletions tests/analysis/python/test_python_sitter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
################################################################################
# Copyright IBM Corporation 2025
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

"""
Python Tests
"""
import os
from unittest.mock import patch
from typing import List
from tree_sitter import Tree

from cldk.analysis.python.treesitter import PythonSitter
from cldk.models.python.models import PyClass, PyImport, PyMethod, PyModule

PYTHON_CODE = """
import os
from typing import List
from math import *
def env(env_var: str): -> str
return os.getenv(env_var)
class Calculator():
'''Calculator Class'''
def __init__(self):
self._total = 0
@property
def total(self):
return self._total
@total.setter
def total(self, value):
self._total = value
def add(self, a, b):
total += a + b
return a + b
def subtract(self, a, b):
total += a - b
return a - b
def multiply(self, a, b):
total += (a * b)
return a * b
def divide(self, a, b):
total += (a / b)
return a / b
"""


def test_is_parsable():
"""It should be able to parse the code"""
python_sitter = PythonSitter()

code = "def is_parsable(self, code: str) -> bool: return True"
is_parsable = python_sitter.is_parsable(code)
assert is_parsable is True

code = "def is_not_parsable(self, code: str) -> bool: return True if True else"
is_parsable = python_sitter.is_parsable(code)
assert is_parsable is False

# Test when parse returns None
with patch("cldk.analysis.python.treesitter.python_sitter.Parser.parse") as parse_mock:
parse_mock.return_value = None
code = "def is_parsable(self, code: str) -> bool: return True"
is_parsable = python_sitter.is_parsable(code)
assert is_parsable is False

# Test exception conditions <- Not sure why this doesn't work
# with patch("cldk.analysis.python.treesitter.python_sitter.Node.children") as recursion_mock:
# recursion_mock.side_effect = RecursionError()
# code = "def is_parsable(self, code: str) -> bool: return True"
# is_parsable = python_sitter.is_parsable(code)
# assert is_parsable is False


def test_get_raw_ast():
"""It should return the raw AST"""
python_sitter = PythonSitter()

raw_ast = python_sitter.get_raw_ast(PYTHON_CODE)
assert raw_ast is not None
assert isinstance(raw_ast, Tree)
assert raw_ast.root_node is not None


def test_get_all_methods():
"""It should return all of the methods"""
python_sitter = PythonSitter()

all_methods = python_sitter.get_all_methods(PYTHON_CODE)
assert all_methods is not None
assert isinstance(all_methods, List)
assert len(all_methods) == 7
for method in all_methods:
assert isinstance(method, PyMethod)


def test_get_all_functions():
"""It should return all of the functions"""
python_sitter = PythonSitter()

all_functions = python_sitter.get_all_functions(PYTHON_CODE)
assert all_functions is not None
assert isinstance(all_functions, List)
assert len(all_functions) == 1
for method in all_functions:
assert isinstance(method, PyMethod)


def test_get_method_details():
"""It should return the method details"""
python_sitter = PythonSitter()

method_details = python_sitter.get_method_details(PYTHON_CODE, "add(self, a, b)")
assert method_details is not None
assert isinstance(method_details, PyMethod)
assert method_details.full_signature == "add(self, a, b)"

# Test when get_all_methods returns empty list
with patch("cldk.analysis.python.treesitter.python_sitter.PythonSitter.get_all_methods") as method_mock:
method_mock.return_value = []
method_details = python_sitter.get_method_details(PYTHON_CODE, "add(self, a, b)")
assert method_details is None


def test_get_all_imports():
"""It should return all of the imports"""
python_sitter = PythonSitter()

all_imports = python_sitter.get_all_imports(PYTHON_CODE)
assert all_imports is not None
assert isinstance(all_imports, List)
assert len(all_imports) == 3
assert "import os" in all_imports
assert "from typing import List" in all_imports
assert "from math import *" in all_imports


def test_get_module_details():
"""It should return the module details"""
python_sitter = PythonSitter()

module_details = python_sitter.get_module_details(PYTHON_CODE)
assert module_details is not None
assert isinstance(module_details, PyModule)
assert len(module_details.functions) == 1
assert len(module_details.classes) == 1
assert len(module_details.imports) == 3


def test_get_all_import_details():
"""It should return all of the import details"""
python_sitter = PythonSitter()

all_import_details = python_sitter.get_all_imports_details(PYTHON_CODE)
assert all_import_details is not None
assert isinstance(all_import_details, List)
assert len(all_import_details) == 3
for import_details in all_import_details:
assert isinstance(import_details, PyImport)


def test_get_all_classes():
"""It should return all of the classes"""
python_sitter = PythonSitter()

all_classes = python_sitter.get_all_classes(PYTHON_CODE)
assert all_classes is not None
assert isinstance(all_classes, List)
assert len(all_classes) == 1
assert isinstance(all_classes[0], PyClass)
assert all_classes[0].class_name == "Calculator"
assert len(all_classes[0].methods) == 7


def test_get_all_modules(tmp_path):
"""It should return all of the modules"""
python_sitter = PythonSitter()

# set up some temporary modules
temp_file_path = os.path.join(tmp_path, "hello.py")
with open(temp_file_path, "w", encoding="utf-8") as hello_module:
hello_module.write('print("Hello, world!")')
temp_file_path = os.path.join(tmp_path, "bye.py")
with open(temp_file_path, "w", encoding="utf-8") as bye_module:
bye_module.write('print("Goodbye, world!")')

all_modules = python_sitter.get_all_modules(tmp_path)
assert all_modules is not None
assert isinstance(all_modules, List)
assert len(all_modules) == 2
for module in all_modules:
assert isinstance(module, PyModule)