Skip to content

Address Feature Request Issue 114: Update APIs to match codeanalyzer 2.3.0 #115

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion cldk/analysis/java/codeanalyzer/bin/.gitignore

This file was deleted.

19 changes: 0 additions & 19 deletions cldk/analysis/java/codeanalyzer/bin/__init__.py

This file was deleted.

116 changes: 97 additions & 19 deletions cldk/analysis/java/codeanalyzer/codeanalyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from cldk.analysis.commons.treesitter import TreesitterJava
from cldk.models.java import JGraphEdges
from cldk.models.java.enums import CRUDOperationType
from cldk.models.java.models import JApplication, JCRUDOperation, JCallable, JField, JMethodDetail, JType, JCompilationUnit, JGraphEdgesST
from cldk.models.java.models import JApplication, JCRUDOperation, JCallable, JCallableParameter, JComment, JField, JMethodDetail, JType, JCompilationUnit, JGraphEdgesST
from cldk.utils.exceptions.exceptions import CodeanalyzerExecutionException

logger = logging.getLogger(__name__)
Expand All @@ -47,7 +47,6 @@ class JCodeanalyzer:
analysis_json_path (str or Path, optional): The path to save the intermediate code analysis outputs.
If None, the analysis will be read from the pipe.
analysis_level (str): The level of analysis ('symbol_table' or 'call_graph').
use_graalvm_binary (bool): If True, the GraalVM binary will be used instead of the codeanalyzer jar.
eager_analysis (bool): If True, the analysis will be performed every time the object is created.

Methods:
Expand Down Expand Up @@ -92,15 +91,13 @@ def __init__(
analysis_backend_path: Union[str, Path, None],
analysis_json_path: Union[str, Path, None],
analysis_level: str,
use_graalvm_binary: bool,
eager_analysis: bool,
target_files: List[str] | None,
) -> None:
self.project_dir = project_dir
self.source_code = source_code
self.analysis_backend_path = analysis_backend_path
self.analysis_json_path = analysis_json_path
self.use_graalvm_binary = use_graalvm_binary
self.eager_analysis = eager_analysis
self.analysis_level = analysis_level
self.target_files = target_files
Expand Down Expand Up @@ -128,27 +125,22 @@ def _get_codeanalyzer_exec(self) -> List[str]:
List[str]: The executable command for codeanalyzer.

Notes:
- If the use_graalvm_binary flag is set, the codeanalyzer binary from GraalVM will be used.
- If the analysis_backend_path is provided, the codeanalyzer jar from that path will be used.
- If not provided, the latest codeanalyzer jar from GitHub will be downloaded.
"""

if self.use_graalvm_binary:
with resources.as_file(resources.files("cldk.analysis.java.codeanalyzer.bin") / "codeanalyzer") as codeanalyzer_bin_path:
codeanalyzer_exec = shlex.split(codeanalyzer_bin_path.__str__())
if self.analysis_backend_path:
analysis_backend_path = Path(self.analysis_backend_path)
logger.info(f"Using codeanalyzer jar from {analysis_backend_path}")
codeanalyzer_jar_file = next(analysis_backend_path.rglob("codeanalyzer-*.jar"), None)
if codeanalyzer_jar_file is None:
raise CodeanalyzerExecutionException("Codeanalyzer jar not found in the provided path.")
codeanalyzer_exec = shlex.split(f"java -jar {codeanalyzer_jar_file}")
else:
if self.analysis_backend_path:
analysis_backend_path = Path(self.analysis_backend_path)
logger.info(f"Using codeanalyzer jar from {analysis_backend_path}")
codeanalyzer_jar_file = next(analysis_backend_path.rglob("codeanalyzer-*.jar"), None)
if codeanalyzer_jar_file is None:
raise CodeanalyzerExecutionException("Codeanalyzer jar not found in the provided path.")
# Since the path to codeanalyzer.jar we will use the default jar from the cldk/analysis/java/codeanalyzer/jar folder
with resources.as_file(resources.files("cldk.analysis.java.codeanalyzer.jar")) as codeanalyzer_jar_path:
codeanalyzer_jar_file = next(codeanalyzer_jar_path.rglob("codeanalyzer-*.jar"), None)
codeanalyzer_exec = shlex.split(f"java -jar {codeanalyzer_jar_file}")
else:
# Since the path to codeanalyzer.jar we will use the default jar from the cldk/analysis/java/codeanalyzer/jar folder
with resources.as_file(resources.files("cldk.analysis.java.codeanalyzer.jar")) as codeanalyzer_jar_path:
codeanalyzer_jar_file = next(codeanalyzer_jar_path.rglob("codeanalyzer-*.jar"), None)
codeanalyzer_exec = shlex.split(f"java -jar {codeanalyzer_jar_file}")
return codeanalyzer_exec

@staticmethod
Expand Down Expand Up @@ -497,6 +489,29 @@ def get_method(self, qualified_class_name, method_signature) -> JCallable:
if cd == method_signature:
return ci.callable_declarations[cd]

def get_method_parameters(self, qualified_class_name, method_signature) -> List[JCallableParameter]:
"""Should return a dictionary of method parameters given the qualified class name and method signature.

Args:
qualified_class_name (str): The qualified name of the class.
method_signature (str): The signature of the method.

Returns:
Dict[str, str]: A dictionary of method parameters for the given qualified class name and method signature.
"""
return self.get_method(qualified_class_name, method_signature).parameters

def get_parameters_from_callable(self, callable: JCallable) -> List[JCallableParameter]:
"""Should return a dictionary of method parameters given the callable.

Args:
callable (JCallable): The callable object.

Returns:
Dict[str, str]: A dictionary of method parameters for the given callable.
"""
return callable.parameters

def get_java_file(self, qualified_class_name) -> str:
"""Should return java file name given the qualified class name.

Expand Down Expand Up @@ -1006,3 +1021,66 @@ def get_all_delete_operations(self) -> List[Dict[str, Union[JType, JCallable, Li
}
)
return crud_delete_operations

# Some APIs to process comments
def get_comments_in_a_method(self, qualified_class_name: str, method_signature: str) -> List[JComment]:
"""Get all comments in a method.

Args:
qualified_class_name (str): Qualified name of the class.
method_signature (str): Signature of the method.

Returns:
List[str]: List of comments in the method.
"""
callable = self.get_method(qualified_class_name, method_signature)
return callable.comments

def get_comments_in_a_class(self, qualified_class_name: str) -> List[JComment]:
"""Get all comments in a class.

Args:
qualified_class_name (str): Qualified name of the class.

Returns:
List[str]: List of comments in the class.
"""
klass = self.get_class(qualified_class_name)
return klass.comments

def get_comment_in_file(self, file_path: str) -> List[JComment]:
"""Get all comments in a file.

Args:
file_path (str): Path to the file.

Returns:
List[str]: List of comments in the file.
"""
compilation_unit = self.get_symbol_table().get(file_path, None)
if compilation_unit is None:
raise CodeanalyzerExecutionException(f"File {file_path} not found in the symbol table.")
return compilation_unit.comments

def get_all_comments(self) -> Dict[str, List[JComment]]:
"""Get all comments in the Java application.

Returns:
Dict[str, List[str]]: Dictionary of file paths and their corresponding comments.
"""
comments = {}
for file_path, _ in self.get_symbol_table().items():
comments[file_path] = self.get_comment_in_file(file_path)
return comments

def get_all_docstrings(self) -> List[Tuple[str, JComment]]:
"""Get all docstrings in the Java application.

Returns:
Dict[str, List[str]]: Dictionary of file paths and their corresponding docstrings.
"""
docstrings = []
for file_path, list_of_comments in self.get_all_comments().items():
docstrings += [(file_path, docstring) for docstring in list_of_comments if docstring.is_javadoc]

return docstrings
Binary file not shown.
72 changes: 67 additions & 5 deletions cldk/analysis/java/java_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from cldk.analysis.commons.treesitter import TreesitterJava
from cldk.models.java import JCallable
from cldk.models.java import JApplication
from cldk.models.java.models import JCRUDOperation, JCompilationUnit, JMethodDetail, JType, JField
from cldk.models.java.models import JCRUDOperation, JComment, JCompilationUnit, JMethodDetail, JType, JField
from cldk.analysis.java.codeanalyzer import JCodeanalyzer


Expand All @@ -41,7 +41,6 @@ def __init__(
analysis_json_path: str | Path | None,
analysis_level: str,
target_files: List[str] | None,
use_graalvm_binary: bool,
eager_analysis: bool,
) -> None:
"""Initialization method for Java Analysis backend.
Expand All @@ -52,7 +51,6 @@ def __init__(
analysis_backend_path (str | None): The path to the analysis_backend, defaults to None and in the case of codeql, it is assumed that the cli is installed and available in the PATH. In the case of codeanalyzer the codeanalyzer.jar is downloaded from the lastest release.
analysis_json_path (str | Path | None): The path save the to the analysis database (analysis.json), defaults to None. If None, the analysis database is not persisted.
analysis_level (str): Analysis level (symbol-table, call-graph)
use_graalvm_binary (bool): A flag indicating whether to use the GraalVM binary for SDG analysis, defaults to False. If False, the default Java binary is used and one needs to have Java 17 or higher installed.
eager_analysis (bool): A flag indicating whether to perform eager analysis, defaults to False. If True, the analysis is performed eagerly. That is, the analysis.json file is created during analysis every time even if it already exists.

Raises:
Expand All @@ -69,7 +67,6 @@ def __init__(
self.analysis_json_path = analysis_json_path
self.analysis_backend_path = analysis_backend_path
self.eager_analysis = eager_analysis
self.use_graalvm_binary = use_graalvm_binary
self.target_files = target_files
self.treesitter_java: TreesitterJava = TreesitterJava()
# Initialize the analysis analysis_backend
Expand All @@ -79,7 +76,6 @@ def __init__(
eager_analysis=self.eager_analysis,
analysis_level=self.analysis_level,
analysis_json_path=self.analysis_json_path,
use_graalvm_binary=self.use_graalvm_binary,
analysis_backend_path=self.analysis_backend_path,
target_files=self.target_files,
)
Expand Down Expand Up @@ -327,6 +323,21 @@ def get_method(self, qualified_class_name: str, qualified_method_name: str) -> J
"""
return self.backend.get_method(qualified_class_name, qualified_method_name)

def get_method_parameters(self, qualified_class_name: str, qualified_method_name: str) -> List[str]:
"""Should return a list of method parameters given qualified class and method names.

Args:
qualified_class_name (str): The qualified name of the class.
qualified_method_name (str): The qualified name of the method.

Raises:
NotImplementedError: Raised when we do not support this function.

Returns:
JCallable: A method for the given qualified method name.
"""
return self.backend.get_method_parameters(qualified_class_name, qualified_method_name)

def get_java_file(self, qualified_class_name: str) -> str:
"""Should return a class given qualified class name.

Expand Down Expand Up @@ -606,3 +617,54 @@ def get_all_delete_operations(self) -> List[Dict[str, Union[JType, JCallable, Li
List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]: A list of all delete operations in the source code.
"""
return self.backend.get_all_delete_operations()

# Some APIs to process comments
def get_comments_in_a_method(self, qualified_class_name: str, method_signature: str) -> List[JComment]:
"""Get all comments in a method.

Args:
qualified_class_name (str): Qualified name of the class.
method_signature (str): Signature of the method.

Returns:
List[str]: List of comments in the method.
"""
return self.backend.get_comments_in_a_method(qualified_class_name, method_signature)

def get_comments_in_a_class(self, qualified_class_name: str) -> List[JComment]:
"""Get all comments in a class.

Args:
qualified_class_name (str): Qualified name of the class.

Returns:
List[str]: List of comments in the class.
"""
return self.backend.get_comments_in_a_class(qualified_class_name)

def get_comment_in_file(self, file_path: str) -> List[JComment]:
"""Get all comments in a file.

Args:
file_path (str): Path to the file.

Returns:
List[str]: List of comments in the file.
"""
return self.backend.get_comment_in_file(file_path)

def get_all_comments(self) -> Dict[str, List[JComment]]:
"""Get all comments in the Java application.

Returns:
Dict[str, List[str]]: Dictionary of file paths and their corresponding comments.
"""
return self.backend.get_all_comments()

def get_all_docstrings(self) -> Dict[str, List[JComment]]:
"""Get all docstrings in the Java application.

Returns:
Dict[str, List[str]]: Dictionary of file paths and their corresponding docstrings.
"""
return self.backend.get_all_docstrings()
2 changes: 0 additions & 2 deletions cldk/analysis/python/python_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,12 @@ def __init__(
source_code: str | None,
analysis_backend_path: str | None,
analysis_json_path: str | Path | None,
use_graalvm_binary: bool = None,
) -> None:
self.project_dir = project_dir
self.source_code = source_code
self.analysis_json_path = analysis_json_path
self.analysis_backend_path = analysis_backend_path
self.eager_analysis = eager_analysis
self.use_graalvm_binary = use_graalvm_binary
self.analysis_backend: TreesitterPython = TreesitterPython()

def get_methods(self) -> List[PyMethod]:
Expand Down
2 changes: 0 additions & 2 deletions cldk/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ def analysis(
target_files: List[str] | None = None,
analysis_backend_path: str | None = None,
analysis_json_path: str | Path = None,
use_graalvm_binary: bool = False,
) -> JavaAnalysis:
"""
Initialize the preprocessor based on the specified language.
Expand Down Expand Up @@ -116,7 +115,6 @@ def analysis(
analysis_level=analysis_level,
analysis_backend_path=analysis_backend_path,
analysis_json_path=analysis_json_path,
use_graalvm_binary=use_graalvm_binary,
target_files=target_files,
eager_analysis=eager,
)
Expand Down
Loading