diff --git a/Dockerfile b/Dockerfile index e7ab747..75679e9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,21 +1,13 @@ FROM python:3.8.16-slim -#USER admin -#install git first -#WORKDIR /server +RUN apt-get update && apt-get install git -y + RUN python -m pip install --upgrade pip COPY requirements.txt . RUN pip install -r requirements.txt -COPY . . +COPY . ./app +WORKDIR app RUN pip install -e . -#CMD [ "python", "-m" , "flask", "run", "--host=0.0.0.0"] - -RUN #cd /server -#WORKDIR /server - - -#CMD [ "flask", "--debug", "run", "--host=0.0.0.0"] -#CMD [ "python", "app.py"] - +CMD [ "flask", "--debug", "run", "--host=0.0.0.0"] diff --git a/app.py b/app.py index d01ad87..8f2613b 100644 --- a/app.py +++ b/app.py @@ -1,81 +1,136 @@ -import abc -import os -import sys - -from flask import Flask, render_template, send_file, request, url_for, redirect +import pandas as pd +from flask import Flask, redirect, render_template, request, send_file, url_for, escape from pystruct.objects.full_report import FullReport -from pystruct.objects.grab_code import grab_code -import pystruct +from pystruct.plat.dataset_controller import DatasetController +from pystruct.utils.object_utils import get_all_concrete_object_classes +from pystruct.utils.object_utils import get_object_class_from_class_name + -from pystruct.objects.imports_data_objects import * -from pystruct.objects.metric_tables import * -from pystruct.objects.metric_obj import * -from pystruct.objects.uml_graph_obj import * +dataset_controller = DatasetController.get_instance() +dataset_name = 'No project selected' if dataset_controller.current_dataset is None else dataset_controller.current_dataset.dataset_name app = Flask(__name__) app.config['SECRET_KEY'] = 'test_key' - - -def all_subclasses(cls): - return set(cls.__subclasses__()).union( - [s for c in cls.__subclasses__() for s in all_subclasses(c)]) +debug_flag = True +app.logger.info(f"{debug_flag=}") +app.logger.info(f"{dataset_controller.current_dataset=}") @app.route('/') def main(): + if dataset_controller.current_dataset is None: + return redirect(url_for('project')) + + global dataset_name + dataset_name = dataset_controller.current_dataset.name + table_of_content_dict = {k: v.__name__ for k, v in FullReport.content_dict.items()} - debug_flag = app.debug - # all_objects = sorted([_cls.__name__ for _cls in all_subclasses(AbstractObject) - # if (issubclass(_cls, HTMLObject) and not isinstance(_cls, abc.ABC))]) - all_objects = sorted([_cls.__name__ for _cls in all_subclasses(AbstractObject) - if (not isinstance(_cls, abc.ABC))]) - return render_template('index.html', **locals()) + return render_template('index.html', **locals(), **globals()) -@app.route('/obj/') -def obj(obj_class): - cls = getattr(sys.modules[__name__], obj_class) - html_object = cls().data() - return render_template('objects.html', **locals()) +@app.route('/obj/') +def obj(obj_class_name): + print("debug->", dataset_controller.current_dataset.name) + cls = get_object_class_from_class_name(obj_class_name.replace(' ', '')) + html_object = cls().to_html() + return render_template('objects.html', **locals(), **globals()) -@app.route('/build_obj/') -def build_obj(obj_class): - cls = getattr(sys.modules[__name__], obj_class) +@app.route('/build_obj/') +def build_obj(obj_class_name): + cls = get_object_class_from_class_name(obj_class_name.replace(' ', '')) cls().delete() - html_object = cls().build() - return render_template('objects.html', **locals()) + html_object = cls().to_html() + return render_template('objects.html', **locals(), **globals()) -@app.route('/download/') -def download_obj(obj_class): - getattr(sys.modules[__name__], obj_class.split('.')[0])().data() # pre load the obj - filepath = os.path.join(app.root_path, 'report_files/objs/', obj_class) +@app.route('/download/') +def download_obj(obj_class_name): + cls = get_object_class_from_class_name(obj_class_name.replace(' ', '')) + cls().data() + filepath = dataset_controller.current_dataset.objects_directory / "html" / f"{obj_class_name}.html" app.logger.info(f"{filepath}") return send_file(filepath, as_attachment=True) -@app.route('/load/', methods=['GET', 'POST']) -def load_project(): - app.logger.info(f"LOAD_PROJECT> {request.method}") - error_message = '' - source = None - if request.method == 'POST': - if "filepath_input" in request.form: - source = request.form['filepath_input'] - elif "giturl_input" in request.form: - source = request.form['giturl_input'] +@app.route('/project/', methods=['GET']) +def project(): + existing_dataset_names = [dataset.name for dataset in dataset_controller.all_datasets] + return render_template('project_menu.html', **locals(), **globals()) + + +@app.route('/project/open/', methods=['POST']) +def open_project(): + dataset_name = request.form['load_project'] + dataset_controller.open(dataset_name) + return redirect(url_for('main')) + + +@app.route('/project/new/source/', methods=['POST']) +def new_source_project(): + source = request.form['filepath_input'] + dataset_controller.new(dir_path=source) + return redirect(url_for('main')) + +@app.route('/project/new/git/', methods=['POST']) +def new_git_project(): + git_url = request.form['git_url'] + source_directory = request.form['source_directory'] + branch = request.form.get('branch', 'master') + dataset_controller.new(git_url=git_url, + code_dir=source_directory, + branch=branch + ) + return redirect(url_for('main')) + + +@app.route('/project/delete/', methods=['POST']) +def delete_project(): + dataset_name = request.form['delete_project'] + dataset_controller.delete(dataset_name) + return redirect(url_for('main')) + + +@app.route('/debug') +def debug(): + existing_objs = sorted([obj.stem for obj in dataset_controller.current_dataset.objects_directory.rglob('*') if obj.is_file()]) + all_objects = sorted([_cls.name() for _cls in get_all_concrete_object_classes()]) + return render_template('debug.html', **locals(), **globals()) + + +@app.route('/debug/test_all_objects') +def test_all_objects(): + all_objects = [_cls for _cls in get_all_concrete_object_classes()] + + results = [] + for obj_class in all_objects: try: - grab_code(source) - return redirect(url_for('main')) + _cls = get_object_class_from_class_name(obj_class.name().replace(' ', '')) + _cls().delete() + result = _cls().build() except Exception as e: - app.logger.warning(e) - error_message = str(e) + build_result = f"{e.__class__.__name__}: {str(e)}" + else: + build_result = f"Success (len={len(result)})" - return render_template('load_project.html', **locals()) + obj_res = { + 'object': f"{obj_class.name()}", + 'build result': build_result + } + results.append(obj_res) + + results_df = pd.DataFrame(results).sort_values('build result', ascending=True) + objects_table = results_df.to_html(escape=False) + return render_template('test_all_objects.html', **locals(), **globals()) if __name__ == "__main__": app.run(host='0.0.0.0', debug=True) + +# TODO UMLClassRelationGraphHTMLObj fix and make it multitab +# TODO fix delete dataset functionality (permission denied on windows) +# TODO tidy up the logs +# TODO file explorer for load dataset +# TODO make sure it runs from docker also diff --git a/docker-compose.yml b/docker-compose.yml index 9e75b25..473b8b3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -5,7 +5,8 @@ services: ports: - "8080:8080" app: - build: . + build: + dockerfile: ./Dockerfile ports: - "5000:5000" depends_on: diff --git a/pystruct/metrics/import_metrics.py b/pystruct/metrics/import_metrics.py index 2b271b0..1952a9b 100644 --- a/pystruct/metrics/import_metrics.py +++ b/pystruct/metrics/import_metrics.py @@ -18,7 +18,7 @@ def enrich_import_raw_df(df): df['is_external'] = ~df['is_internal'] & ~df['is_no_imports'] # checks if it is a python built-in module - df['is_builtin'] = ~df['is_internal'] & ~df['is_no_imports'] + df['is_builtin'] = df['imports'].apply(is_python_builtin_package) # finds the module import path for in-project imports df['import_module'] = df['imports'].apply(lambda x: element_with_longest_match(x, df['module'].unique())) diff --git a/pystruct/metrics/metrics_core.py b/pystruct/metrics/metrics_core.py index 1a472c6..22fbe16 100644 --- a/pystruct/metrics/metrics_core.py +++ b/pystruct/metrics/metrics_core.py @@ -2,7 +2,7 @@ import pandas as pd -from pystruct.objects.data_objects import DataframeObject +from pystruct.objects.data_objects import DataframeObjectABC from pystruct.objects.python_object import PObject from pystruct.utils.logs import log_yellow from pystruct.visitors.visitor import TreeNodeVisitor @@ -49,11 +49,11 @@ def visit_class_method(self, node): def results(self): data_array = list(self._results.items()) - return pd.DataFrame.from_records(data_array, columns=['item', self._metric.name]) + return pd.DataFrame.from_records(data_array, columns=['item', self._metric.metric_name]) class Metric(abc.ABC): - name = 'no-name' + metric_name = None def calculate(self, p_obj, **kwargs): pass @@ -74,9 +74,9 @@ def calculate_class_method(self, p_obj, **kwargs): pass -class MetricObject(DataframeObject, Metric): +class MetricObject(DataframeObjectABC, Metric): def __init__(self): - super().__init__(read_csv_kwargs={'index_col': None}, to_csv_kwargs={'index': False}) + super().__init__() def build(self): pobj = PObject().python_source_object() diff --git a/pystruct/objects/data_objects.py b/pystruct/objects/data_objects.py index fc42986..e15a8cf 100644 --- a/pystruct/objects/data_objects.py +++ b/pystruct/objects/data_objects.py @@ -1,97 +1,167 @@ import abc import pandas as pd +from json2html import json2html -from pystruct.utils.data_mixins import HTMLMixin -from pystruct.utils.file_strategies import DataframeFile, HTMLFile from pystruct.utils import logs - - -class SingletonClass(object): - def __new__(cls, *args, **kwargs): - if not hasattr(cls, 'instance'): - cls.instance = super(SingletonClass, cls).__new__(cls, *args, **kwargs) - else: - logs.log_general(f"SingletonClass: Object {cls.__name__} is already initialized.") - return cls.instance - - -class AbstractObject(SingletonClass, abc.ABC, HTMLMixin): - def __init__(self, file_strategy=None): - self._file_strategy = file_strategy +from pystruct.utils.file_adapters import DataframeFile, HTMLFile, JsonFile, TextFile +from pystruct.utils.mixins import NameMixin, HTMLMixin +from pystruct.utils.string_utils import split_camel_case_string +from pystruct.utils.python_utils import Singleton + + +class AbstractObject(abc.ABC, Singleton, HTMLMixin, NameMixin): + """ + AbstractObject manages the lifecycle of objects and the data they contain. It + is responsible for building, caching, and saving the data, as well as loading + and returning the data if it already exists. The main responsibility of this class + is to provide an easy-to-extend interface and handle all data management operations + efficiently and invisibly from the user. + + To use this class, you need to provide an implementation of the build method responsible + for creating the data. If you provide a file adapter, AbstractObject will use it to store + data after creation and to load it if there is such a file. To obtain the data, you only + need to call the data method. + + HOW TO EXTEND: + > If extended by another abstract class, for example another type of objects like JSON, + it is encouraged to provide additional methods for obtaining data, for example a json() + method for a JSON object to provide more specific information about what the data is and + potentially any needed validation and logging. + + > If extended by another abstract class, for example another category of object like a + multi-tab html element, where build instructions are extended, it is recommended to add + an abstract `build_[object-category]` (for example `build_multitabs_page`) which will be + called by `build`. + + If extended by a concrete class, for example an actual final object, just implement the + corresponding `build*` method, and use the corresponding `data` method for the output. + + `build` calls `build_[type]`, 'data' calls `[data_type]` + + IMPLEMENTATIONS + * abc.ABC: AbstractObject is an abstract class and cannot be used as it is. build method will + have to be implemented when extended. + * Singleton: Ensures that if two objects get instantiated at different times in the same + execution, the data will be cached the second time, saving an extra loading operation. + * HTMLMixin: Adds the to_html implementation to ensure that every implementation will have + an HTML representation. + * PrettifiedClassNameMixin: Provides class_name and prettified_class_name methods that + make it easier to access the class name and a prettified version of it. + """ + + def __init__(self, file_adapter=None): + self._file_adapter = file_adapter self._data = None def data(self): - logs.log_obj_stage(f"{self.__class__.__name__} data.") - # try: + logs.log_obj_stage(f"{self.name()} data.") return self._prepare_data() - # except Exception as e: - # log_red(str(e)) - # return f"{self.__class__.__name__}: {e}" def _prepare_data(self): if self._data is not None: # if self._data breaks in Dataframes return self._data - if self._file_strategy: - self._data = self._file_strategy.load() + if self._file_adapter: + self._data = self._file_adapter.load() if self._data is None: - logs.log_general(f"{self.__class__.__name__} object is building.") + logs.log_general(f"{self.name()} object is building.") self._data = self.build() - logs.log_general(f"{self.__class__.__name__} object finished.") + logs.log_general(f"{self.name()} object finished.") - if self._file_strategy and self._data is not None: # if self._data breaks in Dataframes - self._file_strategy.save(self._data) + if self._file_adapter and self._data is not None: # if self._data breaks in Dataframes + self._file_adapter.save(self._data) return self._data def delete(self): - if self._file_strategy: - self._file_strategy.delete_file() + self._data = None + if self._file_adapter: + self._file_adapter.delete_file() @abc.abstractmethod def build(self): return None def to_html(self): - # TODO add a HTML mixin with a to_html method here, so all objects can have a default or a customisable html representation - return 'NOT_IMPLEMENTED' + return f"to_html:{self.data()}" + + @classmethod + def name(cls): + return split_camel_case_string(cls.__name__) + + +class TextObjectABC(AbstractObject, abc.ABC): + def __init__(self, file_ext): + super().__init__(TextFile(self, file_ext=file_ext)) + + def text(self): + data = self.data() + if not isinstance(data, str): + raise TypeError( + f"Wrong return type: build method of TextObject objects must return string. got {type(data)}") + return data + + def to_html(self): + return f"

Text:

{self.text()}" -class DataframeObject(AbstractObject, abc.ABC): - def __init__(self, read_csv_kwargs=None, to_csv_kwargs=None): + +class DataframeObjectABC(AbstractObject, abc.ABC): + def __init__(self, read_csv_kwargs=None, to_csv_kwargs=None): # TODO default values for DF objects + read_csv_kwargs = {'index_col': None} if read_csv_kwargs is None else read_csv_kwargs + to_csv_kwargs = {'index': False} if to_csv_kwargs is None else to_csv_kwargs super().__init__(DataframeFile(self, save_kwargs=to_csv_kwargs, load_kwargs=read_csv_kwargs)) - @property def dataframe(self): - build_res = self.build() + build_res = self.data() if not isinstance(build_res, pd.DataFrame) and not isinstance(build_res, pd.Series): - raise TypeError(f"Wrong return type: build method of DataframeObject objects must return pandas.DataFrame or pandas.Series. got {type(build_res)}") + raise TypeError( + f"Wrong return type: build method of DataframeObject objects must return pandas.DataFrame or pandas.Series. got {type(build_res)}") + return build_res + + def to_html(self): + return f"to_html:{self.dataframe().to_html()}" + + +class JSONObjectABC(AbstractObject, abc.ABC): + def __init__(self): + super().__init__(JsonFile(self)) + + def json(self): + build_res = self.data() + if not isinstance(build_res, list) and not isinstance(build_res, dict): + raise TypeError( + f"Wrong return type: build method of JSONObject objects must return a JSON object (list of dicts or dict). got {type(build_res)}") return build_res + def to_html(self): + return f"to_html:{json2html.convert(json=self.json())}" -class HTMLObject(AbstractObject, abc.ABC): + +class HTMLObjectABC(AbstractObject, abc.ABC): def __init__(self): super().__init__(HTMLFile(self)) - @property def html(self): - build_res = self.build() + build_res = self.data() if not isinstance(build_res, str): - raise TypeError(f"Wrong return type: build method of HTMLObject objects must return string. got {type(build_res)}") + raise TypeError( + f"Wrong return type: build method of HTMLObject objects must return string. got {type(build_res)}") return build_res + def to_html(self): + return self.html() + -class HTMLTableObject(HTMLObject, abc.ABC): +class HTMLTableObjectABC(HTMLObjectABC, abc.ABC): def __init__(self): super().__init__() def title(self): - def _space_before_upper_case(s): - return ''.join([(f" {c}" if c.isupper() else c) for c in s]) - return _space_before_upper_case(self.__class__.__name__) + return self.name() @abc.abstractmethod def build_dataframe(self): @@ -103,8 +173,40 @@ def build(self): raise TypeError( f"Wrong return type: build_dataframe method of HTMLTableObject objects must return pandas.DataFrame. got {type(build_res)}") - # html_table_str = SimpleHTMLTable(build_res).html - # return html_table_str title = self.title() if self.title() else '' table_html = build_res.to_html(index=False, justify='center') return f"

{title}

{table_html}
" + + +class PlantUMLDocumentObjABC(JSONObjectABC, abc.ABC): + @staticmethod + def _validate_doc(doc): + if not isinstance(doc, str): + raise TypeError(f"PlantUML documents must be of type 'str'. got {type(doc)}") + if not doc.startswith('@startuml'): + raise TypeError(f"PlantUML documents must start with ''. got {doc[:min(len(doc), 9)]}") + if not doc.strip().endswith('@enduml'): + raise TypeError(f"PlantUML documents must end with ''. got {doc[-min(len(doc), 7):]}") + + def documents(self): + docs = self.json() + + if isinstance(docs, str): + docs = [docs] + + doc_values = docs.values() if isinstance(docs, dict) else docs + for doc in doc_values: + self._validate_doc(doc) + + return docs + + +# TODO Report objects +# TODO plantUMLDOc objects +# TODO HTMLTableObject objects can be DataframeObjects (to_html will do the job) +# TODO similar for umlgrapphs +# TODO organise objects methods so: +# * abstract objects define a build_[object] method calling build or build_[object] for super class +# * abstract objects define a valudate_[object] method +# * make sure subclasses use the child concrete classes' methods + diff --git a/pystruct/objects/dependencies.py b/pystruct/objects/dependencies.py index bee39dc..7743c24 100644 --- a/pystruct/objects/dependencies.py +++ b/pystruct/objects/dependencies.py @@ -1,12 +1,25 @@ import pandas as pd -from pystruct.objects.data_objects import DataframeObject +from pystruct.objects.data_objects import DataframeObjectABC from pystruct.objects.imports_data_objects import ImportsEnrichedDataframe -class PackageAndModulesMapping(DataframeObject): +def _unique_sorted_string_agg(items): + filtered_items = [item for item in items if isinstance(item, str)] + return ','.join(sorted(set(filtered_items ))) + + +def _produce_unique_and_nunique_from_df(imports_df, new_column_name, groupby_column, agg_column): + agg_rows = imports_df.groupby(groupby_column).agg({agg_column: [_unique_sorted_string_agg, pd.Series.nunique]}) + agg_rows.columns = [f'{new_column_name}s', f'number_of_{new_column_name}s'] + agg_rows[f'number_of_{new_column_name}s'].fillna(0, inplace=True) + # agg_rows[f'number_of_{new_column_name}s'] = agg_rows[f'number_of_{new_column_name}s'].astype(int) + return agg_rows + + +class PackageAndModulesMapping(DataframeObjectABC): def __init__(self): - super().__init__(read_csv_kwargs={'index_col': None, 'header': 0}, to_csv_kwargs={'index': False}) + super().__init__() def build(self): df = ImportsEnrichedDataframe().data() @@ -18,15 +31,7 @@ def build(self): return df_res -def _produce_unique_and_nunique_from_df(imports_df, new_column_name, groupby_column, agg_column): - agg_rows = imports_df.groupby(groupby_column).agg({agg_column: [pd.Series.unique, pd.Series.nunique]}) - agg_rows.columns = [f'{new_column_name}s', f'number_of_{new_column_name}s'] - agg_rows[f'number_of_{new_column_name}s'].fillna(0, inplace=True) - # agg_rows[f'number_of_{new_column_name}s'] = agg_rows[f'number_of_{new_column_name}s'].astype(int) - return agg_rows - - -class PackageDependencyStatsDataframe(DataframeObject): +class PackageDependencyStatsDataframe(DataframeObjectABC): def __init__(self): super().__init__(read_csv_kwargs={'index_col': None, 'header': 0}, to_csv_kwargs={'index': False}) df = ImportsEnrichedDataframe().data() @@ -90,7 +95,7 @@ def produce_imported_from_packages(self): return res -class ModuleDependencyStatsDataframe(DataframeObject): +class ModuleDependencyStatsDataframe(DataframeObjectABC): def __init__(self): super().__init__(read_csv_kwargs={'index_col': None, 'header': 0}, to_csv_kwargs={'index': False}) df = ImportsEnrichedDataframe().data() diff --git a/pystruct/objects/full_report.py b/pystruct/objects/full_report.py index 10ee0e9..33367a7 100644 --- a/pystruct/objects/full_report.py +++ b/pystruct/objects/full_report.py @@ -1,16 +1,16 @@ from pystruct.html_utils.html_pages import TabsHTML -from pystruct.objects.data_objects import HTMLObject +from pystruct.objects.data_objects import HTMLObjectABC from pystruct.objects.imports_data_objects import ImportsStatsHTML from pystruct.objects.metric_tables import AllMetricsTable, AllMetricsStatsHTML -from pystruct.objects.uml_graph_obj import UMLClassDiagramObj, UMLClassRelationDiagramObj, \ +from pystruct.objects.uml_graph_obj import UMLClassGraphHTMLObj, UMLClassRelationGraphHTMLObj, \ DependencyReportObj -class FullReport(HTMLObject): +class FullReport(HTMLObjectABC): content_dict = { "General info": AllMetricsStatsHTML, - "UML Class diagram": UMLClassDiagramObj, - "UML Relation diagram": UMLClassRelationDiagramObj, + "UML Class diagram": UMLClassGraphHTMLObj, + "UML Relation diagram": UMLClassRelationGraphHTMLObj, "Imports table": ImportsStatsHTML, "Dependencies": DependencyReportObj, # "In project Import graphs": InProjectImportModuleGraphObj, diff --git a/pystruct/objects/grab_code.py b/pystruct/objects/grab_code.py index a49db7e..1174bc2 100644 --- a/pystruct/objects/grab_code.py +++ b/pystruct/objects/grab_code.py @@ -1,14 +1,13 @@ import os.path -import tempfile from functools import lru_cache -# from git import Repo +from git import Repo from pystruct.configs import PATH_CODE_COPY_DIR, PATH_FILES_DIR, PATH_GIT_COPY_DIR from pystruct.utils import path_utils from pystruct.utils.logs import log_yellow, log_cyan, log_red, log_pink -from pystruct.utils.python_file_utils import find_source_dirs from pystruct.utils.path_utils import delete_dir_if_exists +from pystruct.utils.python_file_utils import find_source_dirs def is_git_url(url): diff --git a/pystruct/objects/imports_data_objects.py b/pystruct/objects/imports_data_objects.py index ace9e6d..84f0799 100644 --- a/pystruct/objects/imports_data_objects.py +++ b/pystruct/objects/imports_data_objects.py @@ -1,17 +1,14 @@ import pandas as pd -from pystruct.html_utils.html_pages import HTMLPage +from pystruct.html_utils.html_pages import HTMLPage, TabsHTML from pystruct.metrics.import_metrics import enrich_import_raw_df -from pystruct.objects.data_objects import DataframeObject, HTMLTableObject, HTMLObject +from pystruct.objects.data_objects import DataframeObjectABC, HTMLTableObjectABC, HTMLObjectABC from pystruct.objects.metric_obj import IsScriptFile from pystruct.objects.python_object import PObject from pystruct.reports.import_graph import CollectImportsVisitor -class ImportsRawDataframe(DataframeObject): - def __init__(self): - super().__init__(read_csv_kwargs={'index_col': None}, to_csv_kwargs={'index': False}) - +class ImportsRawDataframe(DataframeObjectABC): def build(self): pobj = PObject().python_source_object() imports_col = CollectImportsVisitor() @@ -19,10 +16,7 @@ def build(self): return imports_col.result() -class ImportsEnrichedDataframe(DataframeObject): - def __init__(self): - super().__init__(read_csv_kwargs={'index_col': None}, to_csv_kwargs={'index': False}) - +class ImportsEnrichedDataframe(DataframeObjectABC): def build(self): df = ImportsRawDataframe().data() @@ -35,7 +29,7 @@ def build(self): return df_enriched -class MostImportedPackages(HTMLTableObject): +class MostImportedPackages(HTMLTableObjectABC): def build_dataframe(self): df = ImportsEnrichedDataframe().data() value_counts = df['import_root'].value_counts() @@ -46,7 +40,7 @@ def build_dataframe(self): return res_df -class MostImportedProjectModules(HTMLTableObject): +class MostImportedProjectModules(HTMLTableObjectABC): def build_dataframe(self): df = ImportsEnrichedDataframe().data() value_counts = df['import_module'].value_counts() @@ -57,7 +51,7 @@ def build_dataframe(self): return res_df -class MostImportedProjectPackages(HTMLTableObject): +class MostImportedProjectPackages(HTMLTableObjectABC): def build_dataframe(self): df = ImportsEnrichedDataframe().data() value_counts = df[df['is_internal']]['import_package'].value_counts() @@ -68,24 +62,21 @@ def build_dataframe(self): return res_df -class UnusedModules(HTMLTableObject): +class UnusedModules(HTMLTableObjectABC): def build_dataframe(self): df = ImportsEnrichedDataframe().data() df = df[df['module_name'] != '__init__'] return df[df['unused_module']][['module', 'is_script_file']].drop_duplicates() -class InvalidImports(HTMLTableObject): +class InvalidImports(HTMLTableObjectABC): def build_dataframe(self): df = ImportsEnrichedDataframe().data() filtered_df = df[df['invalid_import']] return filtered_df[['module', 'imports']] -class InProjectImportModuleGraphDataframe(DataframeObject): - def __init__(self): - super().__init__(read_csv_kwargs={'index_col': None}, to_csv_kwargs={'index': False}) - +class InProjectImportModuleGraphDataframe(DataframeObjectABC): def build(self): df = ImportsEnrichedDataframe().data() @@ -94,10 +85,7 @@ def build(self): return df_graph -class PackagesImportModuleGraphDataframe(DataframeObject): - def __init__(self): - super().__init__(read_csv_kwargs={'index_col': None}, to_csv_kwargs={'index': False}) - +class PackagesImportModuleGraphDataframe(DataframeObjectABC): def build(self): df = ImportsEnrichedDataframe().data() @@ -106,14 +94,14 @@ def build(self): return df_graph -class ImportsStatsHTML(HTMLObject): +class ImportsStatsHTML(HTMLObjectABC): def build(self): - page = HTMLPage() - page.add_element(MostImportedPackages().data()) - page.add_element(MostImportedProjectModules().data()) - page.add_element(MostImportedProjectPackages().data()) - page.add_element(UnusedModules().data()) - page.add_element(InvalidImports().data()) + page = TabsHTML() + page.add_tab(MostImportedPackages.name(), MostImportedPackages().data()) + page.add_tab(MostImportedProjectModules.name(), MostImportedProjectModules().data()) + page.add_tab(MostImportedProjectPackages.name(), MostImportedProjectPackages().data()) + page.add_tab(UnusedModules.name(), UnusedModules().data()) + page.add_tab(InvalidImports.name(), InvalidImports().data()) return page.html() diff --git a/pystruct/objects/metric_obj.py b/pystruct/objects/metric_obj.py index 789c25c..76e87ee 100644 --- a/pystruct/objects/metric_obj.py +++ b/pystruct/objects/metric_obj.py @@ -7,12 +7,12 @@ plt.style.use('bmh') from pystruct.metrics.metrics_core import MetricObject -from pystruct.objects.data_objects import DataframeObject, HTMLTableObject +from pystruct.objects.data_objects import DataframeObjectABC, HTMLTableObjectABC from pystruct.objects.metric_stats import ValueCountMetricObj, MatplotlibGraphMetricObj class TypeMetricObj(MetricObject): - name = 'type' + metric_name = 'type' def calculate(self, p_obj, **kwargs): return p_obj.type @@ -20,11 +20,12 @@ def calculate(self, p_obj, **kwargs): class TypeMetricValueCountsTable(ValueCountMetricObj): def get_series(self): - return TypeMetricObj().data()['type'] + obj = TypeMetricObj() + return obj.data()[obj.metric_name] class NumberOfCodeLinesMetricObj(MetricObject): - name = 'number_of_lines' + metric_name = 'number_of_lines' def _calc(self, p_obj): return len(p_obj.code_lines) @@ -42,16 +43,15 @@ def calculate_class_method(self, function_obj, **kwargs): return self._calc(function_obj) -class GeneralItemMetricObj(DataframeObject): +class GeneralItemMetricObj(DataframeObjectABC): def build(self): df_lines = NumberOfCodeLinesMetricObj().data() df_types = TypeMetricObj().data() df = df_types.merge(df_lines, on='item', how='left') - - df_agg_stats = df.groupby('type').agg({ + df_agg_stats = df.groupby(TypeMetricObj.metric_name).agg({ 'item': 'count', - 'number_of_lines': ['sum', 'min', 'mean', 'max'] + NumberOfCodeLinesMetricObj.metric_name: ['sum', 'min', 'mean', 'max'] }).sort_values(by=[('item', 'count')]).reset_index() df_agg_stats.columns = df_agg_stats.columns.droplevel(0) @@ -72,7 +72,7 @@ def build(self): return df_agg_stats -class GeneralItemMetricHTMLTable(HTMLTableObject): +class GeneralItemMetricHTMLTable(HTMLTableObjectABC): def build_dataframe(self): return GeneralItemMetricObj().data() @@ -89,14 +89,14 @@ def build_plot(self): plt.subplot(1, 4, i+1) plt.xlabel(type) if i == 0: - plt.ylabel('number_of_lines') + plt.ylabel(NumberOfCodeLinesMetricObj.metric_name) # plt.boxplot(df[df['type'] == type]['number_of_lines']) - plot_hist_and_quartiles(df[df['type'] == type]['number_of_lines']) + plot_hist_and_quartiles(df[df[TypeMetricObj.metric_name] == type][NumberOfCodeLinesMetricObj.metric_name]) plt.tight_layout() class NumberOfArgsInFunctionsMetricObj(MetricObject): - name = 'number_of_args_in_functions' + metric_name = 'number_of_args_in_functions' @staticmethod def _fetch_args(p_obj): @@ -127,14 +127,14 @@ def build_plot(self): plt.subplot(1, 2, i + 1) plt.xlabel(type) if i == 0: - plt.ylabel('number_of_args_in_functions') + plt.ylabel(NumberOfArgsInFunctionsMetricObj.metric_name) # plt.boxplot(df[df['type'] == type]['number_of_args_in_functions']) - plot_hist_and_quartiles(df[df['type'] == type]['number_of_args_in_functions']) + plot_hist_and_quartiles(df[df[TypeMetricObj.metric_name] == type][NumberOfArgsInFunctionsMetricObj.metric_name]) plt.tight_layout() class IsScriptFile(MetricObject): - name = 'is_script_file' + metric_name = 'is_script_file' def calculate_module(self, module_obj, **kwargs): all_code = module_obj.code.replace('\n', '').replace(' ', '').replace('"', "'") diff --git a/pystruct/objects/metric_stats.py b/pystruct/objects/metric_stats.py index 5fb16a7..9d04dd3 100644 --- a/pystruct/objects/metric_stats.py +++ b/pystruct/objects/metric_stats.py @@ -6,10 +6,10 @@ import pandas as pd from pystruct.html_utils.html_pages import ImageHTML -from pystruct.objects.data_objects import HTMLTableObject, HTMLObject +from pystruct.objects.data_objects import HTMLTableObjectABC, HTMLObjectABC -class ValueCountMetricObj(HTMLTableObject, abc.ABC): +class ValueCountMetricObj(HTMLTableObjectABC, abc.ABC): @abc.abstractmethod def get_series(self): pass @@ -20,7 +20,7 @@ def build_dataframe(self): return res -class MatplotlibGraphMetricObj(HTMLObject, abc.ABC): +class MatplotlibGraphMetricObj(HTMLObjectABC, abc.ABC): @abc.abstractmethod def build_plot(self): pass diff --git a/pystruct/objects/metric_tables.py b/pystruct/objects/metric_tables.py index b7bcffd..7bb5937 100644 --- a/pystruct/objects/metric_tables.py +++ b/pystruct/objects/metric_tables.py @@ -7,13 +7,10 @@ from pystruct.html_utils.html_pages import HTMLPage from pystruct.metrics.metric_sets import ALL_METRICS -from pystruct.objects.data_objects import HTMLTableObject, DataframeObject, HTMLObject +from pystruct.objects.data_objects import HTMLTableObjectABC, DataframeObjectABC, HTMLObjectABC -class AllMetricsDataframe(DataframeObject): - def __init__(self): - super().__init__(read_csv_kwargs={'index_col': None}, to_csv_kwargs={'index': False}) - +class AllMetricsDataframe(DataframeObjectABC): def build(self): df = ALL_METRICS[0]().data() @@ -23,12 +20,12 @@ def build(self): return df -class AllMetricsTable(HTMLTableObject): +class AllMetricsTable(HTMLTableObjectABC): def build_dataframe(self): return AllMetricsDataframe().data() -class AllMetricsStatsHTML(HTMLObject): +class AllMetricsStatsHTML(HTMLObjectABC): def build(self): page = HTMLPage() page.add_element(GeneralItemMetricHTMLTable().data()) diff --git a/pystruct/objects/python_object.py b/pystruct/objects/python_object.py index 67437ff..d91b3d3 100644 --- a/pystruct/objects/python_object.py +++ b/pystruct/objects/python_object.py @@ -1,20 +1,18 @@ -import os - -from pystruct.configs import PATH_CODE_COPY_DIR -from pystruct.objects.data_objects import AbstractObject +from pystruct.objects.data_objects import JSONObjectABC +from pystruct.plat.dataset_controller import DatasetController from pystruct.python.python_source_obj import PythonSourceObj -from pystruct.utils.file_strategies import JsonFile -class PObject(AbstractObject): +class PObject(JSONObjectABC): def __init__(self): - super().__init__(JsonFile(self)) + super().__init__() self._pobj = None def build(self): - srcs = os.listdir(PATH_CODE_COPY_DIR) - src_path = os.path.join(PATH_CODE_COPY_DIR, srcs[0]) - self._pobj = PythonSourceObj.from_project_source(src_path) + # srcs = os.listdir(PATH_CODE_COPY_DIR) + # src_path = os.path.join(PATH_CODE_COPY_DIR, srcs[0]) + code_dir = DatasetController.get_instance().current_dataset.code_directory + self._pobj = PythonSourceObj.from_project_source(code_dir) return self._pobj.to_dict() def python_source_object(self): diff --git a/pystruct/objects/uml_graph_obj.py b/pystruct/objects/uml_graph_obj.py index 97d0c41..0700d0a 100644 --- a/pystruct/objects/uml_graph_obj.py +++ b/pystruct/objects/uml_graph_obj.py @@ -1,9 +1,12 @@ +import abc + import markdown import pandas as pd +from pystruct.objects.data_objects import PlantUMLDocumentObjABC from pystruct.html_utils.html_pages import HTMLPage, TabsHTML from pystruct.metrics.import_metrics import breakdown_import_path -from pystruct.objects.data_objects import AbstractObject, HTMLObject, DataframeObject +from pystruct.objects.data_objects import AbstractObject, HTMLObjectABC, DataframeObjectABC from pystruct.objects.dependencies import PackageDependencyStatsDataframe, ModuleDependencyStatsDataframe, \ PackageAndModulesMapping from pystruct.objects.imports_data_objects import PackagesImportModuleGraphDataframe, ImportsEnrichedDataframe @@ -11,16 +14,14 @@ from pystruct.reports.uml_class import UMLClassBuilder, UMLClassRelationBuilder, ObjectRelationGraphBuilder, \ PlantUMLPackagesAndModulesBuilder from pystruct.utils.color_utils import getDistinctColors -from pystruct.utils.data_mixins import JSONableMixin -from pystruct.utils.file_strategies import HTMLFile +from pystruct.utils.file_adapters import HTMLFile from pystruct.utils.graph_structures import Graph +from pystruct.utils.mixins import JSONableMixin from pystruct.utils.plantuml_utils import PlantUMLService +from pystruct.utils.string_utils import single_to_multiline_string -class PackageColorMappingDataframe(DataframeObject, JSONableMixin): - def __init__(self): - super().__init__(read_csv_kwargs={'index_col': None, 'header':0}, to_csv_kwargs={'index': False}) - +class PackageColorMappingDataframe(DataframeObjectABC, JSONableMixin): def build(self): df = ImportsEnrichedDataframe().data() all_packages = sorted(list(set(df['package'].unique()).union(df[df['is_internal']]['import_package'].dropna().unique()))) @@ -45,16 +46,18 @@ def to_json(self): return self.data().set_index('package').to_dict(orient='index') -class PlantUMLDiagramObj(AbstractObject): +class PlantUMLGraphSingleHTMLPageObj(AbstractObject, abc.ABC): def __init__(self, multithread=False): super().__init__(HTMLFile(self)) - self._plant_uml = PlantUMLService(multithread) + self._plant_uml = PlantUMLService.get_instance() def build(self): - docs = self.plantuml_docs() + docs = self.build_plantuml_docs() if isinstance(docs, str): docs = [docs] + if isinstance(docs, dict): + docs = list(docs.values()) plantuml_diagram_html_images = self._plant_uml.convert_multiple_docs_to_html_images(docs) @@ -63,15 +66,39 @@ def build(self): return html_page.html() - def plantuml_docs(self): + @abc.abstractmethod + def build_plantuml_docs(self): pass -class UMLClassDiagramObj(PlantUMLDiagramObj): - def __init__(self): - super().__init__(multithread=True) +class PlantUMLGraphMultiTabHTMLPageObj(AbstractObject, abc.ABC): + def __init__(self, multithread=False): + super().__init__(HTMLFile(self)) + self._plant_uml = PlantUMLService.get_instance() + + def build(self): + docs = self.build_plantuml_docs() + + if isinstance(docs, str): + docs = [docs] + if isinstance(docs, list): + docs = {f"document_{i}": doc for i, doc in enumerate(docs)} + + plantuml_diagram_html_images = {title: self._plant_uml.convert_doc_to_html_image(doc) + for title, doc in docs.items()} + + html_page = TabsHTML() + [html_page.add_tab(title, html_image) for title, html_image in plantuml_diagram_html_images.items()] + + return html_page.html() + + @abc.abstractmethod + def build_plantuml_docs(self): + pass + - def plantuml_docs(self): +class UMLClassDocumentObj(PlantUMLDocumentObjABC): + def build(self): pobj = PObject().python_source_object() uml_builder = UMLClassBuilder() @@ -82,8 +109,13 @@ def plantuml_docs(self): return plantuml_doc_strings -class UMLClassRelationDiagramObj(PlantUMLDiagramObj): - def plantuml_docs(self): +class UMLClassGraphHTMLObj(PlantUMLGraphMultiTabHTMLPageObj): + def build_plantuml_docs(self): + return UMLClassDocumentObj().documents() + + +class UMLClassRelationDocumentObj(PlantUMLDocumentObjABC): + def build(self): pobj = PObject().python_source_object() uml_builder = UMLClassRelationBuilder() @@ -91,7 +123,7 @@ def plantuml_docs(self): plantuml_doc_strings = uml_builder.result() - plantuml_doc_strings = UMLClassRelationDiagramObj.split_documents(plantuml_doc_strings) + plantuml_doc_strings = UMLClassRelationDocumentObj.split_documents(plantuml_doc_strings) return plantuml_doc_strings @staticmethod @@ -112,8 +144,13 @@ def split_documents(graph_doc): return subgraph_docs -class PackageRelationsGraphObj(PlantUMLDiagramObj): - def plantuml_docs(self): +class UMLClassRelationGraphHTMLObj(PlantUMLGraphSingleHTMLPageObj): + def build_plantuml_docs(self): + return UMLClassRelationDocumentObj().documents() + + +class PackageRelationsDocumentObj(PlantUMLDocumentObjABC): + def build(self): df_pkgs = self.produce_data() plantuml_doc = PlantUMLPackagesAndModulesBuilder(direction='top to bottom direction') @@ -126,20 +163,24 @@ def plantuml_docs(self): total_internal_imports = int(row['total_imports']) if row['total_imports'] else 0 total_unique_internal_imports = int(row['number_of_internal_packages']) if row['number_of_internal_packages'] else 0 - internal_imports = row['internal_packages'] if row['internal_packages'] else '' + internal_imports = self._present_packages(row['internal_packages'], remove_package_root=True) if row['internal_packages'] else '' total_external_imports = int(row['number_of_external_packages']) if row['number_of_external_packages'] else 0 - external_imports = row['external_packages'] if row['external_packages'] else '' - python_builtins = int(row['number_of_builtin_packages']) if row['number_of_builtin_packages'] else 0 + external_imports = self._present_packages(row['external_packages']) if row['external_packages'] else '' + total_python_builtins = int(row['number_of_builtin_packages']) if row['number_of_builtin_packages'] else 0 + python_builtins = self._present_packages(row['builtin_packages']) if row['builtin_packages'] else '' total_imported = int(row['total_imported']) if row['total_imported'] else 0 total_unique_imported = int(row['times_been_imported_from_packages']) if row['times_been_imported_from_packages'] else 0 - imported_from = row['imported_from_packages'] if row['imported_from_packages'] else '' - plantuml_doc.add_note( # TODO keep only package name on internals, not full package path - f"internal imports: {total_internal_imports}\n" - f"> ({total_unique_internal_imports} unique {self._keep_package_names(internal_imports)})\n" - f"other imports : {total_external_imports}\n" - f"> {external_imports} ({python_builtins} python builtins*)\n" - f"imported by : {total_imported}\n" - f"> ({total_unique_imported} unique {self._keep_package_names(imported_from)}" + imported_from = self._present_packages(row['imported_from_packages'], remove_package_root=True) if row['imported_from_packages'] else '' + plantuml_doc.add_note( + f"IMPORTS\n" + f"Internal: {total_internal_imports} ({total_unique_internal_imports} unique)\n" + f"> {internal_imports})\n" + f"Python built-in: {total_python_builtins}\n" + f"> {python_builtins}\n" + f"Other: {total_external_imports}\n" + f"> {external_imports}\n" + f"Imported by: {total_imported} ({total_unique_imported} unique)\n" + f"> {imported_from}" ) plantuml_doc.end_container() @@ -153,7 +194,7 @@ def plantuml_docs(self): plantuml_doc.add_relation(package, '<|-[thickness=2]-', import_package, arrow_color, f":{num}") plantuml_doc_string = plantuml_doc.finish_and_return() - return plantuml_doc_string + return [plantuml_doc_string] def produce_data(self): df = ImportsEnrichedDataframe().data() @@ -167,7 +208,7 @@ def produce_data(self): times_imported_dict = {k: v['size'] for k, v in df_agg.groupby('import_package').agg({'size': 'sum'}).to_dict(orient='index').items()} - df_pkgs = df_stats.merge(df[['package', 'package_name']].drop_duplicates(), on='package', how='left').\ + df_pkgs = df_stats.merge(df[['package']].drop_duplicates(), on='package', how='left').\ merge(df_pkgs_colors, on='package', how='left').\ merge(df_agg, on='package', how='left') df_pkgs['total_imports'] = df_pkgs['package'].map(total_imports_dict) @@ -176,17 +217,30 @@ def produce_data(self): df_pkgs = df_pkgs.fillna(value=False) return df_pkgs + @staticmethod + def _present_packages(packages_str, remove_package_root=False): + packages = packages_str.split(',') + if remove_package_root: + packages = PackageRelationsDocumentObj._keep_package_names(packages) + packages_sorted = sorted(packages) + result_str = single_to_multiline_string(packages_sorted) + print(result_str) + return result_str + @staticmethod def _keep_package_names(packages): - if isinstance(packages, str): # TODO load dataframe types correctly - packages = packages[1:-1].replace("'", '').replace(' ', ',').split(',') transform_func = lambda x: breakdown_import_path(x)[-1] packages_names = [transform_func(package) for package in packages] return packages_names -class PackageAndModuleRelationsGraphObj(PlantUMLDiagramObj): - def plantuml_docs(self): +class PackageRelationsGraphHTMLObj(PlantUMLGraphSingleHTMLPageObj): + def build_plantuml_docs(self): + return PackageRelationsDocumentObj().documents() + + +class PackageAndModuleRelationsDocumentObj(PlantUMLDocumentObjABC): + def build(self): df_pkgs, df_rels = self.produce_data() plantuml_doc = PlantUMLPackagesAndModulesBuilder() @@ -202,7 +256,7 @@ def plantuml_docs(self): plantuml_doc.add_relation(module, '<|-[thickness=3]-', import_module, arrow_color) plantuml_doc_string = plantuml_doc.finish_and_return() - return plantuml_doc_string + return [plantuml_doc_string] def produce_data(self): df_pkgs_colors = PackageColorMappingDataframe().data() @@ -222,21 +276,28 @@ def produce_data(self): return df_agg, df_relations -class ModuleRelationGraphObj(PlantUMLDiagramObj): - def plantuml_docs(self): + +class PackageAndModuleRelationsGraphHTMLObj(PlantUMLGraphSingleHTMLPageObj): + def build_plantuml_docs(self): + return PackageAndModuleRelationsDocumentObj().documents() + + +class ModuleRelationDocuemntObj(PlantUMLDocumentObjABC): + def build(self): df = ImportsEnrichedDataframe().data() self._df = df[df['is_internal']] modules, import_modules = self._df['module'].tolist(), self._df['import_module'].tolist() subgraphs = Graph(list(zip(modules, import_modules))).subgraphs() - docs = [self.produce_doc_for_modules(subgraph.nodes) for subgraph in subgraphs] + docs = {f"Grpah {i+1} (size={subgraph.size()})": self.produce_doc_for_modules(subgraph.nodes) + for i, subgraph in enumerate(subgraphs)} return docs def produce_doc_for_modules(self, modules): df = self._df[self._df['module'].isin(modules) | self._df['import_module'].isin(modules)] plantuml_doc = PlantUMLPackagesAndModulesBuilder(direction='top to bottom direction', separator='set separator none') - # TODO DictObjects for PackageColorMappingDataframe package_colors = {k: v['color'] for k, v in PackageColorMappingDataframe().data().set_index('package').to_dict(orient='index').items()} + # package_colors = PackageColorMappingDataframe().to_json() df_mods = pd.DataFrame(df[['module', 'package']].values.tolist()+df[df['is_internal']][['import_module', 'import_package']].values.tolist(), columns=['module', 'package']).drop_duplicates() @@ -253,20 +314,30 @@ def produce_doc_for_modules(self, modules): return plantuml_doc_string -class PackagesImportModuleGraphObj(PlantUMLDiagramObj): - def plantuml_docs(self): +class ModuleRelationGraphObj(PlantUMLGraphMultiTabHTMLPageObj): + def build_plantuml_docs(self): + return ModuleRelationDocuemntObj().documents() + + +class PackagesImportModuleDocumentObj(PlantUMLDocumentObjABC): + def build(self): df = PackagesImportModuleGraphDataframe().data() - plantuml_doc_strings = [] + plantuml_doc_strings = {} for package in df['import_root'].unique(): sub_df = df[df['import_root'] == package] module, import_root = sub_df['module'].tolist(), sub_df['import_root'].tolist() doc = ObjectRelationGraphBuilder(module, import_root).result() - plantuml_doc_strings.append(doc) + plantuml_doc_strings[package] = doc return plantuml_doc_strings -class DependencyAnalysisObj(HTMLObject): +class PackagesImportModuleGraphHTMLObj(PlantUMLGraphMultiTabHTMLPageObj): + def build_plantuml_docs(self): + return PackagesImportModuleDocumentObj().documents() + + +class DependencyAnalysisObj(HTMLObjectABC): def build(self): df = ImportsEnrichedDataframe().data() df = df[(~df['is_no_imports']) & (~df['is_init_file'])] @@ -284,13 +355,13 @@ def build(self): return markdown_to_html+PackageDependencyStatsDataframe().data().to_html()+"
"+ModuleDependencyStatsDataframe().data().to_html() -class DependencyReportObj(HTMLObject): +class DependencyReportObj(HTMLObjectABC): content_dict = { 'Analysis': DependencyAnalysisObj, - "Package Relations": PackageRelationsGraphObj, - "Package-Module Relations": PackageAndModuleRelationsGraphObj, + "Package Relations": PackageRelationsGraphHTMLObj, + "Package-Module Relations": PackageAndModuleRelationsGraphHTMLObj, "Module relations": ModuleRelationGraphObj, - "Commercial packages": PackagesImportModuleGraphObj, + "External package dependencies": PackagesImportModuleGraphHTMLObj, } def build(self): @@ -305,9 +376,9 @@ def build(self): # ModuleDependencyStatsDataframe().data() # PackageDependencyStatsDataframe().data() # DependencyAnalysisObj().data() - # UMLClassDiagramObj().data() + UMLClassGraphHTMLObj().data() # UMLClassRelationDiagramObj().data() - ModuleRelationGraphObj().data() - PackageAndModuleRelationsGraphObj().data() - PackageRelationsGraphObj().data() + # ModuleRelationGraphObj().data() + # PackageAndModuleRelationsGraphObj().data() + # PackageRelationsGraphObj().data() # PackagesImportModuleGraphObj().data() diff --git a/pystruct/plat/__init__.py b/pystruct/plat/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pystruct/plat/dataset_controller.py b/pystruct/plat/dataset_controller.py new file mode 100644 index 0000000..e73a2d6 --- /dev/null +++ b/pystruct/plat/dataset_controller.py @@ -0,0 +1,89 @@ +import pathlib + +from pystruct.configs import PATH_ROOT +from pystruct.utils.logs import log_disk_ops +from pystruct.utils.python_utils import Singleton +from pystruct.utils.storage import DatasetsDirectory + + +def get_project_root_path(): + return pathlib.Path(PATH_ROOT) + + +class DatasetController: + __instance = None + + @classmethod + def get_instance(cls): # TODO make singleton a class + if cls.__instance is None: + cls.__instance = DatasetController() + log_disk_ops(f"DatasetController: Instance is created.") + return cls.__instance + + def __init__(self): + log_disk_ops(f"DatasetController: Initialising...") + root = pathlib.Path(get_project_root_path()) + datasets_dir = root / 'datasets' + + self._datasets = DatasetsDirectory(datasets_dir) + self._current_dataset = None + + self._reset_current_dataset() + + def _reset_current_dataset(self): + datasets = self._datasets.get_datasets() + if len(datasets) > 0: + self._current_dataset = datasets[0] + else: + self._current_dataset = None + log_disk_ops(f"DatasetController: Reset dataset {self._current_dataset}.") + + @property + def all_datasets(self): + return self._datasets.get_datasets() + + @property + def current_dataset(self): + if self._current_dataset is not None and not self._current_dataset.exists(): + log_disk_ops(f"DatasetController: Current dataset needs to reset dataset {self._current_dataset}: exists={self._current_dataset.exists()}.") + self._reset_current_dataset() + return self._current_dataset + + def new(self, dir_path=None, git_url=None, code_dir=None, branch='master'): + if dir_path is not None: + dataset_name = pathlib.Path(dir_path).name + dataset = self._datasets.new_dataset(dataset_name) + dataset.add_python_files_from_path(dir_path) + self._current_dataset = dataset + log_disk_ops(f"DatasetController: New dataset {self._current_dataset} from path.") + return dataset + elif git_url is not None: + dataset_name = git_url.split('/')[-1].split('.')[0] + dataset_name += f" ({branch})" if branch != 'master' else '' + dataset = self._datasets.new_dataset(dataset_name) + dataset.add_python_files_from_git(git_url, code_dir=code_dir, branch=branch) + self._current_dataset = dataset + log_disk_ops(f"DatasetController: New dataset {self._current_dataset} from Git repo.") + else: + raise ValueError("At least one of dir_path or git_url parameters has to be populated.") + + def open(self, dataset_name): + log_disk_ops(f"DatasetController: Attempting to open dataset {dataset_name}.") + for dataset in self._datasets.get_datasets(): + if dataset.name == dataset_name: + self._current_dataset = dataset + log_disk_ops(f"DatasetController: Opened dataset {self._current_dataset}.") + break + else: + log_disk_ops(f"DatasetController: Failed to open dataset {dataset_name} (Not found in {self._datasets.path}).") + return self._current_dataset + + def delete(self, dataset_name): + for dataset in self._datasets.get_datasets(): + if dataset.name == dataset_name: + self._datasets.delete_dataset(dataset_name) + log_disk_ops(f"DatasetController: Deleted dataset {dataset}.") + self._reset_current_dataset() + else: + log_disk_ops(f"DatasetController: Failed to delete dataset {dataset_name} (Not found).") + return self._current_dataset \ No newline at end of file diff --git a/pystruct/reports/uml_class.py b/pystruct/reports/uml_class.py index 4d7a360..ee09c08 100644 --- a/pystruct/reports/uml_class.py +++ b/pystruct/reports/uml_class.py @@ -1,4 +1,5 @@ import ast +from collections import OrderedDict from functools import cached_property, lru_cache import pandas as pd @@ -95,7 +96,7 @@ def _end_brackets(self): def _start_uml(self): self._add_line('@startuml') self._add_line('left to right direction') - # self._add_line('set separator .') + self._add_line('set separator none') # self._add_line('scale max 1024 width') def _end_uml(self): @@ -160,7 +161,7 @@ def finish_and_return(self): class PlantUMLPackagesAndModulesBuilder: - def __init__(self, direction='left to right direction', separator='set separator .'): + def __init__(self, direction='left to right direction', separator='set separator none'): self._res_string = "" self._direction = direction self._separator = separator @@ -233,13 +234,13 @@ def all_in_one_doc(self): return uml_doc.finish_and_return() def one_doc_per_package(self): - uml_docs = [] + uml_docs = {} for package_name, package_dict in self._packages.items(): uml_doc = PlantUMLDocument() uml_doc.add_package(package_name, package_dict) - uml_docs.append(uml_doc.finish_and_return()) + uml_docs[package_name] = uml_doc.finish_and_return() - return uml_docs + return OrderedDict(uml_docs) def result(self): if self._seperate_packages: diff --git a/pystruct/utils/ast_utils.py b/pystruct/utils/ast_utils.py index 7f0a1e0..38e769e 100644 --- a/pystruct/utils/ast_utils.py +++ b/pystruct/utils/ast_utils.py @@ -46,7 +46,7 @@ def separate_statement(code, ast_var): return res_code_str, remaining_code_lines_str -def get_first_ast_of_type(code, list_of_types):# todo maybe avoid calling analyse_ast +def get_first_ast_of_type(code, list_of_types): asts = analyse_ast(code) for _ast in asts: for _type in list_of_types: diff --git a/pystruct/utils/data_mixins.py b/pystruct/utils/data_mixins.py deleted file mode 100644 index 1ef5500..0000000 --- a/pystruct/utils/data_mixins.py +++ /dev/null @@ -1,13 +0,0 @@ -import abc - - -class JSONableMixin(abc.ABC): - @abc.abstractmethod - def to_json(self): - pass - - -class HTMLMixin(abc.ABC): - @abc.abstractmethod - def to_html(self): - pass \ No newline at end of file diff --git a/pystruct/utils/file_strategies.py b/pystruct/utils/file_adapters.py similarity index 79% rename from pystruct/utils/file_strategies.py rename to pystruct/utils/file_adapters.py index ee48579..6697f8b 100644 --- a/pystruct/utils/file_strategies.py +++ b/pystruct/utils/file_adapters.py @@ -4,15 +4,15 @@ import pandas as pd -from pystruct.configs import PATH_FILES_DIR +from pystruct.plat.dataset_controller import DatasetController from pystruct.utils import logs -class AbstractFileStrategy(abc.ABC): +class AbstractFileAdapter(abc.ABC): def __init__(self, obj, file_ext, load_kwargs=None, save_kwargs=None): self._obj = obj - self._root_dir = PATH_FILES_DIR+"/objs/" # root_dir self._file_ext = file_ext + self._root_dir = DatasetController.get_instance().current_dataset.objects_directory / self._file_ext self._cached_data = None self._load_kwargs, self._save_kwargs = load_kwargs, save_kwargs @@ -58,7 +58,7 @@ def save_to_file(self, data, filepath, **kwargs): pass -class JsonFile(AbstractFileStrategy): +class JsonFile(AbstractFileAdapter): def __init__(self, obj, load_kwargs=None, save_kwargs=None): super().__init__(obj, file_ext='json', load_kwargs=load_kwargs, save_kwargs=save_kwargs) @@ -72,7 +72,21 @@ def save_to_file(self, data, filepath, **kwargs): json.dump(data, f, indent=4) -class HTMLFile(AbstractFileStrategy): +class TextFile(AbstractFileAdapter): + def __init__(self, obj, file_ext='txt', load_kwargs=None, save_kwargs=None): + super().__init__(obj, file_ext=file_ext, load_kwargs=load_kwargs, save_kwargs=save_kwargs) + + def load_from_file(self, filepath, **kwargs): + with open(filepath, 'r') as f: + return f.read() + + def save_to_file(self, data, filepath, **kwargs): + os.makedirs(os.path.dirname(filepath), exist_ok=True) + with open(filepath, 'w') as f: + f.write(data) + + +class HTMLFile(AbstractFileAdapter): def __init__(self, obj, load_kwargs=None, save_kwargs=None): super().__init__(obj, file_ext='html', load_kwargs=load_kwargs, save_kwargs=save_kwargs) @@ -86,7 +100,7 @@ def save_to_file(self, data, filepath, **kwargs): f.write(data) -class DataframeFile(AbstractFileStrategy): +class DataframeFile(AbstractFileAdapter): def __init__(self, obj, load_kwargs=None, save_kwargs=None): super().__init__(obj, file_ext='csv', load_kwargs=load_kwargs, save_kwargs=save_kwargs) diff --git a/pystruct/utils/graph_structures.py b/pystruct/utils/graph_structures.py index 9b77ef8..143eff8 100644 --- a/pystruct/utils/graph_structures.py +++ b/pystruct/utils/graph_structures.py @@ -35,3 +35,5 @@ def subgraphs(self): res_graph_objs.sort(key=lambda g: len(g.nodes), reverse=True) return res_graph_objs + def size(self): + return len(self.nodes) \ No newline at end of file diff --git a/pystruct/utils/logs.py b/pystruct/utils/logs.py index 4216d18..09baa19 100644 --- a/pystruct/utils/logs.py +++ b/pystruct/utils/logs.py @@ -81,7 +81,7 @@ def log_obj_stage(*args, **kwargs): def log_plantuml(*args, **kwargs): - log_pink(*args, **kwargs) + log_pink("PLANTUML:", *args, **kwargs) def timing_log(_func): diff --git a/pystruct/utils/mixins.py b/pystruct/utils/mixins.py new file mode 100644 index 0000000..c3ddb7c --- /dev/null +++ b/pystruct/utils/mixins.py @@ -0,0 +1,18 @@ +import abc + + +class JSONableMixin(abc.ABC): + @abc.abstractmethod + def to_json(self): + pass + + +class HTMLMixin: + def to_html(self): + return f"

to_html:'{self}'

" + + +class NameMixin: + @classmethod + def name(cls): + return cls.__name__ diff --git a/pystruct/utils/object_utils.py b/pystruct/utils/object_utils.py new file mode 100644 index 0000000..f7990d9 --- /dev/null +++ b/pystruct/utils/object_utils.py @@ -0,0 +1,24 @@ +import abc +import sys +import inspect + +from pystruct.objects.data_objects import AbstractObject +from pystruct.objects.imports_data_objects import * +from pystruct.objects.metric_tables import * +from pystruct.objects.metric_obj import * +from pystruct.objects.uml_graph_obj import * +from pystruct.objects.dependencies import * +from pystruct.objects.full_report import * +from pystruct.utils.python_utils import subclasses_of_class + + +def get_object_class_from_class_name(class_name): + return getattr(sys.modules[__name__], class_name) + + +def get_all_object_classes(): + return subclasses_of_class(AbstractObject) + + +def get_all_concrete_object_classes(): + return [_cls for _cls in get_all_object_classes() if not inspect.isabstract(_cls)] diff --git a/pystruct/utils/plantuml_utils.py b/pystruct/utils/plantuml_utils.py index 0a27163..7fea3e0 100644 --- a/pystruct/utils/plantuml_utils.py +++ b/pystruct/utils/plantuml_utils.py @@ -7,6 +7,7 @@ from pystruct.utils.logs import log_plantuml PLANTUML_LOCAL_SERVER_URL = 'http://localhost:8080/img/' +PLANTUML_DOCKER_SERVER_URL = 'http://plantuml:8080/img/' PLANTUML_WEB_SERVER_URL = 'http://www.plantuml.com/plantuml/img/' @@ -27,43 +28,55 @@ def result(self): class PlantUMLService: - @staticmethod - def _check_local_plantuml_server(): - try: - pl = plantuml.PlantUML(PLANTUML_LOCAL_SERVER_URL) - pl.processes("""@startuml\nBob -> Alice : hello\n@enduml""") - except ConnectionRefusedError as ce: - return False - return True + __instance = None + + @classmethod + def get_instance(cls): + if cls.__instance is None: + cls.__instance = PlantUMLService() + log_plantuml(f"PlantUMLService: Instance is created.") + return cls.__instance def __init__(self, multithreading=False): self._multithreading_flag = multithreading self._plant_uml_server = None + self.reset_plant_uml_server() - @property - def plantuml_server(self): - if self._plant_uml_server is None: - self.set_plant_uml_server() - return self._plant_uml_server - - def set_plant_uml_server(self): - if self._plant_uml_server is not None: - return - - if PlantUMLService._check_local_plantuml_server(): - log_plantuml(f"(LOCALHOST) Plant UML running locally: {PLANTUML_LOCAL_SERVER_URL}") - self._plant_uml_server = plantuml.PlantUML(PLANTUML_LOCAL_SERVER_URL) - else: - log_plantuml(f"(WEB) Plant UML running on web: {PLANTUML_WEB_SERVER_URL}") - self._plant_uml_server = plantuml.PlantUML(PLANTUML_WEB_SERVER_URL) + def reset_plant_uml_server(self): + def init_plantuml(url): + log_plantuml(f"Attempting to connect to a local Plant UML server: {url}") + plant_uml_server = plantuml.PlantUML(url) + log_plantuml(f"Sending test message...") + res = plant_uml_server.processes(plantuml_text="""@startuml\nBob -> Alice : hello\n@enduml""") + log_plantuml(f"Response length {len(res)}") + return plant_uml_server - def convert_doc_to_html_image(self, doc): - log_plantuml(f"Processing plantUML document (size={len(doc)})..") - raw_image_data = self.plantuml_server.processes(plantuml_text=doc) - image_html = ImageHTML(raw_image_data) - log_plantuml(f"PlantUML document (size={len(doc)}) is done.") + try: + self._plant_uml_server = init_plantuml(PLANTUML_LOCAL_SERVER_URL) + log_plantuml(f"(LOCALHOST) Plant UML is running locally: {PLANTUML_LOCAL_SERVER_URL}") + except (plantuml.PlantUMLConnectionError, ConnectionRefusedError, OSError) as local_connection_error: + try: + self._plant_uml_server = init_plantuml(PLANTUML_DOCKER_SERVER_URL) + log_plantuml(f"(DOCKER) Plant UML is running on web: {PLANTUML_DOCKER_SERVER_URL}") + except (plantuml.PlantUMLConnectionError, ConnectionRefusedError) as docker_connection_error: + self._plant_uml_server = init_plantuml(PLANTUML_WEB_SERVER_URL) + log_plantuml(f"(WEB) Plant UML is running on web: {PLANTUML_WEB_SERVER_URL}") - return image_html + def convert_doc_to_html_image(self, doc, error_message=''): + try: + log_plantuml(f"Processing plantUML document (size={len(doc)})..") + raw_image_data = self._plant_uml_server.processes(plantuml_text=doc) + image_html = ImageHTML(raw_image_data) + log_plantuml(f"PlantUML document (size={len(doc)}) is done.") + return image_html + # except plantuml.PlantUMLHTTPError as http_error: + except Exception as e: + if error_message is None: + raise + else: + log_plantuml(f"WARNING: PlantUML document (size={len(doc)}) failed.") + error_message_uml = f"""{error_message} Error: {e}""" + return error_message_uml def convert_multiple_docs_to_html_images(self, docs): if self._multithreading_flag: @@ -71,9 +84,12 @@ def convert_multiple_docs_to_html_images(self, docs): raise NotImplementedError # return self._send_multiple_requests(docs) doesn't work right now except Exception as exception: - log_plantuml(f"Processing {len(docs)} documents with multithreading failed with {exception=}. Switching to linear.") + log_plantuml( + f"Processing {len(docs)} documents with multithreading failed with {exception=}. Switching to linear.") self._multithreading_flag = False - return [self.convert_doc_to_html_image(doc) for doc in docs] + + html_images = [self.convert_doc_to_html_image(doc) for doc in docs] + return html_images def _send_multiple_requests(self, docs): threads = [] @@ -90,11 +106,7 @@ def _send_multiple_requests(self, docs): if __name__ == "__main__": - serv = PlantUMLService() + serv = PlantUMLService.get_instance() docs = ["""@startuml\nBob -> Alice : hello\n@enduml""", """@startuml\nBob -> Alice : hello\n@enduml"""] res = serv.convert_multiple_docs_to_html_images(docs) print(res) - - - - diff --git a/pystruct/utils/python_utils.py b/pystruct/utils/python_utils.py index c8e0f90..bd7e7e4 100644 --- a/pystruct/utils/python_utils.py +++ b/pystruct/utils/python_utils.py @@ -1,10 +1,14 @@ import re +import typing from urllib.error import URLError from urllib.request import urlopen import pkgutil from functools import lru_cache import logging +from pystruct.utils import logs + + @lru_cache def all_python_builtin_packages(): return sorted([mod.name for mod in list(pkgutil.iter_modules())]) @@ -12,7 +16,6 @@ def all_python_builtin_packages(): @lru_cache def is_python_builtin_package(pkg_name): - # TODO save a version of fetch_python_builtin_packages_from_python_docs to a file """ If the machine is connected to the internet it will try to fetch the python built-in packages from the original Python docs site. Otherwise, it will fetch them from pkgutil @@ -38,3 +41,36 @@ def fetch_python_builtin_packages_from_python_docs(): packages = [pkg.split(r'library/')[1].split(r'.html')[0] for pkg in re.findall(' 0: + self._code_dir = self._project_dir / int_dir_names.pop() + log_disk_ops(f"Dataset: Found code directory {self._code_dir}") + + @property + def name(self): + return self._dataset_name + + @property + def code_directory(self): + return self._code_dir + + @property + def git_directory(self): + return self._git_dir + + @property + def objects_directory(self): + return self._objs_dir + + def add_python_files_from_path(self, python_source): + log_disk_ops(f"Dataset: Fetching python files from {str(python_source)} ...") + + python_source_path = pathlib.Path(python_source) + + self._code_dir = pathlib.Path(self._project_dir, python_source_path.name) + if self._code_dir.exists(): + shutil.rmtree(self._code_dir) + self._code_dir.mkdir() + + files_to_fetch = list(python_source_path.rglob('*.py')) + for from_filepath in files_to_fetch: + self._copy_code_file(from_filepath, python_source_path) + log_disk_ops(f"Dataset: Fetched {len(files_to_fetch)} python files from {str(python_source)} to {str(self._code_dir)}") + + def _copy_code_file(self, from_filepath, source_filepath): + relative_filepath = from_filepath.relative_to(source_filepath) + to_filepath = self.code_directory / relative_filepath + for parent_path in list(to_filepath.parents)[::-1]: + parent_path.mkdir(exist_ok=True) + to_filepath.write_text(from_filepath.read_text()) + log_disk_ops(f"Dataset: Copied from {str(from_filepath)} to {str(to_filepath)}") + + def add_python_files_from_git(self, git_url, code_dir=None, branch='master'): + download_dir = self._project_dir / 'git' + if download_dir.exists(): + shutil.rmtree(download_dir) + + if code_dir is None: + code_dir = self._find_code_dir(git_url) + + log_disk_ops(f"Dataset: Accessing git repository {str(download_dir)}...") + Repo.clone_from(git_url, download_dir, branch=branch) + log_disk_ops(f"Dataset: Downloaded git repository {str(download_dir)}.") + self.add_python_files_from_path(download_dir / code_dir) + + def _find_code_dir(self, dir_path): + init_files = dir_path.rglob('*/__init__.py') + if len(init_files) > 0: + code_dir = init_files[0].parent + log_disk_ops(f"Dataset: Discovered python package {code_dir}") + return code_dir + else: + python_files = dir_path.rglob('*.py') + if len(python_files) > 0: + code_dir = python_files[0].parent + log_disk_ops(f"Dataset: Discovered directory containing python files {code_dir}") + return code_dir + else: + raise FileNotFoundError(f"Dataset: No python files found in {dir_path}") + + +class FileDirectory(Directory): # TODO redundant. maybe delete + def __init__(self, obj_dir): + super().__init__(obj_dir) + self.path.mkdir(exist_ok=True) + + def read_file(self, _dir, file): + file_path = self.path / _dir / file + return StringIO(file_path.read_text()) + + def write_file(self, content, _dir, file): + file_path = self.path / _dir / file + file_path.write_text(content) + + def delete_file(self, _dir, file): + file_path = self.path / _dir / file + file_path.unlink() + + +class DatasetsDirectory(Directory): + def __init__(self, dir_path): + super().__init__(dir_path) + log_disk_ops(f"DatasetsDirectory: Datasets directory {str(self.path)}") + + def get_datasets(self): + all_dataset_paths = [path for path in self.path.iterdir() if path.is_dir()] + datasets = [Dataset(path) for path in all_dataset_paths] + return datasets + + def new_dataset(self, dataset_name): + dataset_path = self.path / dataset_name + + if dataset_path.exists(): + self.delete_dataset(dataset_name) + + dataset = Dataset(dataset_path) + return dataset + + def delete_dataset(self, dataset_name): + dataset_path = self.path / dataset_name + log_disk_ops(f"DatasetsDirectory: Deleting {str(dataset_path)}") + shutil.rmtree(dataset_path) + + + + + diff --git a/pystruct/utils/string_utils.py b/pystruct/utils/string_utils.py new file mode 100644 index 0000000..b860c84 --- /dev/null +++ b/pystruct/utils/string_utils.py @@ -0,0 +1,80 @@ +import re + + +def split_camel_case_string(camel_case_string): + """ChatGPT + Here's how the function works: + * The function uses the re.findall() method to find all matches of the regular expression pattern '[A-Z][a-z0-9]*|[A-Z]+' in the input class_name. This pattern matches either an uppercase letter followed by any number of lowercase letters or digits ([A-Z][a-z0-9]*), or one or more consecutive uppercase letters ([A-Z]+). The re.findall() method returns a list of all matches. + * The function then joins the list of matches with a space separator using the join() method and returns the result. + + Here's an example usage of the function: + _prettify_classname('MyClassName') -> 'My Class Name' + _prettify_classname('HTTPResponse') -> 'HTTP Response' + _prettify_classname('DB2Connection') -> 'DB2 Connection' + _prettify_classname('MyXMLParserClass') -> 'My XML Parser Class' + _prettify_classname('HTML') -> 'HTML' + + """ + # words = re.findall('[A-Z][a-z0-9]*|[A-Z]+', class_name) # chatGPT + words = re.findall('.+?(?:(?<=[a-z0-9])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])|$)', camel_case_string) + return ' '.join(words) + + +def single_to_multiline_string(strings, max_length=30, seperator=', '): + """ ChatGPT + Takes in a list of strings and concatenates them into a multiline string with a maximum line length + specified by `max_length`. The `seperator` parameter is used to separate the individual strings in the + concatenated string. + + Args: + strings (list): A list of strings to concatenate into a multiline string. + max_length (int): The maximum length of each line in the multiline string. Defaults to 30. + seperator (str): The string to use to separate each string in the concatenated string. Defaults to ", ". + + Returns: + str: The concatenated multiline string. + + Example: + >>> strings = ["This", " is a long string", "This is another long string", "Yet another long string"] + >>> single_to_multiline_string(strings, max_length=20, seperator='; ') + 'This is a long string; \n> This is another long string; \n> Yet another long string' + + def test_create_multiline_string(): + # Test case 1: Strings fit within maximum line length + strings = ["This is a long string", "This is another long string", "Yet another long string"] + expected_output = "This is a long string, This is another long string, Yet another long string" + assert _create_multiline_string(strings, max_length=30, seperator=', ') == expected_output + + # Test case 2: Strings need to be split into multiple lines + strings = ["This is a long string", "This is another long string", "Yet another long string"] + expected_output = "This is a long string,\n> This is another long string,\n> Yet another long string" + assert _create_multiline_string(strings, max_length=20, seperator=', ') == expected_output + + # Test case 3: Empty list of strings + strings = [] + expected_output = "" + assert _create_multiline_string(strings, max_length=30, seperator=', ') == expected_output + + # Test case 4: Single string that is longer than max line length + strings = ["This is a very long string that exceeds the maximum line length"] + expected_output = "This is a very long string that exceeds the maximum line length" + assert _create_multiline_string(strings, max_length=20, seperator=', ') == expected_output + + # Test case 5: Custom separator + strings = ["This is a long string", "This is another long string", "Yet another long string"] + expected_output = "This is a long string; \n> This is another long string; \n> Yet another long string" + assert _create_multiline_string(strings, max_length=20, seperator='; ') == expected_output + """ + output = '' + current_line_length = 0 + + for s in strings: + if current_line_length + len(s) <= max_length: + output += (seperator if len(output) > 0 else '')+s + current_line_length += len(s) + else: + output += ',\n> ' + s + current_line_length = len(s) + + return output + diff --git a/requirements.txt b/requirements.txt index 624a548..15c1d80 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,5 @@ GitPython Flask==2.2.2 matplotlib networkx -markdown \ No newline at end of file +markdown +json2html \ No newline at end of file diff --git a/static/styles.css b/static/styles.css index 0cb55a9..4585f24 100644 --- a/static/styles.css +++ b/static/styles.css @@ -58,3 +58,36 @@ th { background-color: #04AA6D; color: white; } + +table { + border-collapse: collapse; + width: 100%; + max-width: 100%; + margin-bottom: 1rem; + background-color: #343a40; + color: #fff; + font-size: 0.9rem; + line-height: 1.4; + font-weight: normal; + text-align: left; +} + +table th, table td { + padding: 0.5rem; + vertical-align: middle; + border: 1px solid #adb5bd; +} + +table th { + background-color: #212529; + font-weight: bold; +} + +table tbody tr:nth-of-type(even) { + background-color: #212529; +} + +table thead th { + vertical-align: bottom; + border-bottom: 2px solid #adb5bd; +} \ No newline at end of file diff --git a/templates/debug.html b/templates/debug.html new file mode 100644 index 0000000..97efed4 --- /dev/null +++ b/templates/debug.html @@ -0,0 +1,17 @@ +{% extends "layout.html" %} + +{% block content %} +

Debug page

+
Test all objects (will attempt to build everything)
+ +

Existing objects

+{% for _cls in existing_objs %} + {{_cls}}
+{% endfor %} + +

All objects

+{% for _cls in all_objects %} + {{_cls}}
+{% endfor %} + +{% endblock content %} \ No newline at end of file diff --git a/templates/index.html b/templates/index.html index 773cb24..d92d812 100644 --- a/templates/index.html +++ b/templates/index.html @@ -4,17 +4,10 @@

Home

Reports:
    - {% for title, obj_class in table_of_content_dict.items() %} -
  • {{title}}
  • + {% for title, obj_class_name in table_of_content_dict.items() %} +
  • {{title}}
  • {% endfor %}
-

Download full report

-

Load another project

-{% if debug_flag %} -

Debug is enabled

- {% for _cls in all_objects %} - {{_cls}}
- {% endfor %} - -{% endif %} +

Download full report

+

Load another project

{% endblock content %} \ No newline at end of file diff --git a/templates/layout.html b/templates/layout.html index 854aedb..d7432a2 100644 --- a/templates/layout.html +++ b/templates/layout.html @@ -6,7 +6,12 @@ +

Current project: {{ dataset_name }}

Home +{% if debug_flag %} + Debug +{% endif %} + {% block content %}

Welcome to Python source metric!

{% endblock content %} diff --git a/templates/load_project.html b/templates/load_project.html deleted file mode 100644 index 81f1cad..0000000 --- a/templates/load_project.html +++ /dev/null @@ -1,23 +0,0 @@ -{% extends "layout.html" %} - -{% block content %} -

Load another project

- -
-

Browse local files:

- - -
- -
- - -
-

Github project:

- - -
-
- - -{% endblock content %} diff --git a/templates/objects.html b/templates/objects.html index 11d2b79..a21d8d1 100644 --- a/templates/objects.html +++ b/templates/objects.html @@ -1,6 +1,6 @@ {% extends "layout.html" %} {% block content %} -

Download page

+

Download page

{{html_object|safe}} {% endblock content %} \ No newline at end of file diff --git a/templates/project_menu.html b/templates/project_menu.html new file mode 100644 index 0000000..2d7ee30 --- /dev/null +++ b/templates/project_menu.html @@ -0,0 +1,78 @@ +{% extends "layout.html" %} + +{% block content %} +

Open project

+
+ + + + +
+ + +

Load another project

+
+

Browse local files and directories:

+ + + + + +
+ + + + + +
+

Github project:

+ +
+ + +
+ + +
+ + +
+ +

Delete project

+
+ + + + +
+ +{% endblock content %} diff --git a/templates/test_all_objects.html b/templates/test_all_objects.html new file mode 100644 index 0000000..d9fbd3b --- /dev/null +++ b/templates/test_all_objects.html @@ -0,0 +1,6 @@ +{% extends "layout.html" %} + +{% block content %} +

Rebuilt all objects

+{{objects_table | safe}} +{% endblock content %} \ No newline at end of file diff --git a/tests/objects/test_PackageDependencyStatsDataframe.py b/tests/objects/test_PackageDependencyStatsDataframe.py index e1a538d..6872e58 100644 --- a/tests/objects/test_PackageDependencyStatsDataframe.py +++ b/tests/objects/test_PackageDependencyStatsDataframe.py @@ -17,7 +17,7 @@ def test_external_packages(self, mock_imports): }) expected_df = pd.DataFrame({ 'package': ['a1', 'a1.b1', 'a2'], - 'external_packages': [['ext_1', 'ext_2'], ['ext_1'], ['ext_1', 'ext_2', 'ext_3']], + 'external_packages': ['ext_1,ext_2', 'ext_1', 'ext_1,ext_2,ext_3'], 'number_of_external_packages': [2, 1, 3], }).set_index('package') @@ -35,7 +35,7 @@ def test_python_builtin_packages(self, mock_imports): }) expected_df = pd.DataFrame({ 'package': ['a2', 'a3'], - 'builtin_packages': [['builtin_1', 'builtin_2'], ['builtin_1']], + 'builtin_packages': ['builtin_1,builtin_2', 'builtin_1'], 'number_of_builtin_packages': [2, 1], }).set_index('package') obj = PackageDependencyStatsDataframe() @@ -52,7 +52,7 @@ def test_internal_packages(self, mock_imports): }) expected_df = pd.DataFrame({ 'package': ['a2', 'a3'], - 'internal_packages': [['int_1', 'int_2'], ['int_2']], + 'internal_packages': ['int_1,int_2', 'int_2'], 'number_of_internal_packages': [2, 1], }).set_index('package') obj = PackageDependencyStatsDataframe() @@ -69,7 +69,7 @@ def test_internal_modules(self, mock_imports): }) expected_df = pd.DataFrame({ 'package': ['a2', 'a3'], - 'internal_modules': [['int_1', 'int_2'], ['int_2']], + 'internal_modules': ['int_1,int_2', 'int_2'], 'number_of_internal_modules': [2, 1], }).set_index('package') obj = PackageDependencyStatsDataframe() @@ -101,7 +101,7 @@ def test_imported_from_packages(self, mock_imports): }) expected_df = pd.DataFrame({ 'package': ['int_1', 'int_2'], - 'imported_from_packages': [['a2'], ['a2', 'a3']], + 'imported_from_packages': ['a2', 'a2,a3'], 'times_been_imported_from_packages': [1, 2], }).set_index('package') obj = PackageDependencyStatsDataframe() diff --git a/tests/objects/test_data_objects.py b/tests/objects/test_data_objects.py index d787035..91a5b28 100644 --- a/tests/objects/test_data_objects.py +++ b/tests/objects/test_data_objects.py @@ -1,61 +1,206 @@ +import os.path import unittest -from unittest.mock import patch, MagicMock +from unittest.mock import MagicMock -from pystruct.objects.data_objects import AbstractObject +import pandas as pd + +from pystruct.objects import data_objects as do class TestAbstractObject(unittest.TestCase): - @patch.multiple(AbstractObject, __abstractmethods__=set()) - @patch.object(AbstractObject, 'build', return_value='build_ret') - def test_data_without_file_strategy(self, mock_build): - obj = AbstractObject() - self.assertEqual(obj._prepare_data(), 'build_ret') - mock_build.assert_called_once() - - @patch.multiple(AbstractObject, __abstractmethods__=set()) - @patch.object(AbstractObject, 'build', return_value='build_ret') - def test_data_build_save_cache(self, mock_build): - file_strategy = MagicMock() - file_strategy.load = MagicMock(return_value=None) - file_strategy.save = MagicMock() - - obj = AbstractObject(file_strategy) + def test_data_without_file_adapter(self): + class TestObject(do.AbstractObject): + def __init__(self): + super().__init__() + + def build(self): + return 'build_result' + + obj = TestObject() + self.assertEqual(obj.data(), 'build_result') + self.assertEqual(obj.data(), 'build_result') + + def test_data_build_cache_and_save(self): + class TestObject(do.AbstractObject): + def __init__(self, file_adapter): + super().__init__(file_adapter) + + def build(self): + return 'build_result' + + file_adapter = MagicMock() + file_adapter.load = MagicMock(return_value=None) + file_adapter.save = MagicMock() + + obj = TestObject(file_adapter) self.assertIsNone(obj._data) - self.assertEqual(obj._prepare_data(), 'build_ret') - file_strategy.load.assert_called_once() - file_strategy.save.assert_called_once_with(obj._data) - mock_build.assert_called_once() + self.assertEqual(obj.data(), 'build_result') + self.assertIsNotNone(obj._data) + file_adapter.load.assert_called_once() + file_adapter.save.assert_called_once_with(obj._data) - self.assertEqual(obj._prepare_data(), 'build_ret') + self.assertEqual(obj.data(), 'build_result') self.assertIsNotNone(obj._data) - file_strategy.load.assert_called_once() - file_strategy.save.assert_called_once_with(obj._data) - mock_build.assert_called_once() - - @patch.multiple(AbstractObject, __abstractmethods__=set()) - @patch.object(AbstractObject, 'build', return_value='build_ret') - def test_data_save_cache(self, mock_build): - file_strategy = MagicMock() - file_strategy.load = MagicMock(return_value='load_ret') - file_strategy.save = MagicMock() - - obj = AbstractObject(file_strategy) + file_adapter.load.assert_called_once() + file_adapter.save.assert_called_once_with(obj._data) + + def test_data_load_and_cache(self): + class TestObject(do.AbstractObject): + def __init__(self, file_adapter): + super().__init__(file_adapter) + + def build(self): + return 'build_result' + + file_adapter = MagicMock() + file_adapter.load = MagicMock(return_value='load_result') + file_adapter.save = MagicMock() + + obj = TestObject(file_adapter) self.assertIsNone(obj._data) - self.assertEqual(obj._prepare_data(), 'load_ret') - file_strategy.load.assert_called_once() - file_strategy.save.assert_not_called() - mock_build.assert_not_called() + self.assertEqual(obj.data(), 'load_result') + file_adapter.load.assert_called_once() + file_adapter.save.assert_not_called() - self.assertEqual(obj._prepare_data(), 'load_ret') + self.assertEqual(obj.data(), 'load_result') self.assertIsNotNone(obj._data) - file_strategy.load.assert_called_once() - file_strategy.save.assert_not_called() - mock_build.assert_not_called() - - @patch.multiple(AbstractObject, __abstractmethods__=set()) - @patch.object(AbstractObject, '_prepare_data', side_effect=Exception) - def test_data(self, *args): - self.assertIsNotNone(AbstractObject().data()) + file_adapter.load.assert_called_once() + file_adapter.save.assert_not_called() + + def test_data_load_delete_and_build(self): + class TestObject(do.AbstractObject): + def __init__(self, file_adapter): + super().__init__(file_adapter) + + def build(self): + return 'build_result' + + file_adapter = MagicMock() + file_adapter.load = MagicMock(return_value='load_result') + file_adapter.save = MagicMock() + file_adapter.delete_file = MagicMock() + + obj = TestObject(file_adapter) + self.assertIsNone(obj._data) + self.assertEqual(obj.data(), 'load_result') + self.assertIsNotNone(obj._data) + file_adapter.load.assert_called_once() + file_adapter.save.assert_not_called() + + self.assertIsNotNone(obj._data) + obj.delete() + file_adapter.load = MagicMock(return_value=None) + self.assertIsNone(obj._data) + file_adapter.delete_file.assert_called_once() + + obj = TestObject(file_adapter) + self.assertIsNone(obj._data) + self.assertEqual(obj.data(), 'build_result') + self.assertIsNotNone(obj._data) + file_adapter.load.assert_called_once() + file_adapter.save.assert_called_once() + + +class TestTextObjectABC(unittest.TestCase): + def test_lifecycle(self): + build_result = "example_text" + + class ExampleTextObject(do.TextObjectABC): + def __init__(self): + super().__init__('ext') + + def build(self): + return build_result + + obj = ExampleTextObject() + self.assertEqual(obj.text(), build_result) + self.assertTrue(os.path.exists(obj._file_adapter.filepath)) + + del obj + obj = ExampleTextObject() + self.assertEqual(obj.text(), build_result) + + obj.delete() + self.assertFalse(os.path.exists(obj._file_adapter.filepath)) + + +class TestHTMLObjectABC(unittest.TestCase): + def test_lifecycle(self): + build_result = "example_html" + + class ExampleHTMLObject(do.HTMLObjectABC): + def __init__(self): + super().__init__() + + def build(self): + return build_result + + obj = ExampleHTMLObject() + self.assertEqual(obj.html(), build_result) + self.assertTrue(os.path.exists(obj._file_adapter.filepath)) + + del obj + obj = ExampleHTMLObject() + self.assertEqual(obj.html(), build_result) + + obj.delete() + self.assertFalse(os.path.exists(obj._file_adapter.filepath)) + + +class TestDataframeObjectABC(unittest.TestCase): + def test_lifecycle(self): + build_result = pd.DataFrame({ + 'col1': [1, 2, 3], + 'col2': ['a', 'b', 'c'], + 'col3': [True, False, None] + }) + + class ExampleDataframeObject(do.DataframeObjectABC): + def __init__(self): + super().__init__() + + def build(self): + return build_result + + obj = ExampleDataframeObject() + + test_result = obj.dataframe() + pd.testing.assert_frame_equal(test_result, build_result) + self.assertTrue(os.path.exists(obj._file_adapter.filepath)) + + del obj + obj = ExampleDataframeObject() + pd.testing.assert_frame_equal(obj.dataframe(), build_result) + + obj.delete() + self.assertFalse(os.path.exists(obj._file_adapter.filepath)) + + +class TestJSONObjectABC(unittest.TestCase): + def test_lifecycle(self): + build_result = [ + {'a': 0, 'b': 1}, + {'c': 2} + ] + + class ExampleJSONObject(do.JSONObjectABC): + def __init__(self): + super().__init__() + + def build(self): + return build_result + + obj = ExampleJSONObject() + test_result = obj.json() + self.assertEqual(test_result, build_result) + self.assertTrue(os.path.exists(obj._file_adapter.filepath)) + + del obj + obj = ExampleJSONObject() + self.assertEqual(obj.json(), build_result) + + obj.delete() + self.assertFalse(os.path.exists(obj._file_adapter.filepath)) if __name__ == '__main__': diff --git a/tests/utils/test_file_strategies.py b/tests/utils/test_file_strategies.py index b5c474a..5416fa0 100644 --- a/tests/utils/test_file_strategies.py +++ b/tests/utils/test_file_strategies.py @@ -1,14 +1,14 @@ import unittest from unittest.mock import patch -import pystruct.utils.file_strategies as fs +import pystruct.utils.file_adapters as fs class ExampleObject: pass -class ExampleConcreteFileStrategy(fs.AbstractFileStrategy): +class ExampleConcreteFileAdapter(fs.AbstractFileAdapter): def load_from_file(self, filepath, **kwargs): return 'example_data' @@ -19,7 +19,7 @@ def save_to_file(self, data, filepath, **kwargs): class TestAbstractFileStrategy(unittest.TestCase): def setUp(self): self.example_obj = ExampleObject() - self.file_strategy = ExampleConcreteFileStrategy(self.example_obj, + self.file_strategy = ExampleConcreteFileAdapter(self.example_obj, 'example_extension') def test_filename(self): @@ -49,8 +49,8 @@ def test_load_path_does_not_exist(self): self.assertEqual(self.file_strategy.load(), None) def test_save(self): - with patch.object(ExampleConcreteFileStrategy, 'save_to_file') as mock_save_to_file: - ExampleConcreteFileStrategy(None, None, None) + with patch.object(ExampleConcreteFileAdapter, 'save_to_file') as mock_save_to_file: + ExampleConcreteFileAdapter(None, None, None) self.file_strategy.save('example_save_data') self.assertEqual(self.file_strategy._cached_data, 'example_save_data') diff --git a/tests/utils/test_object_utils.py b/tests/utils/test_object_utils.py new file mode 100644 index 0000000..ebed62f --- /dev/null +++ b/tests/utils/test_object_utils.py @@ -0,0 +1,26 @@ +import abc +import unittest + +from pystruct.objects import data_objects as do +from pystruct.utils import object_utils as utils + + +class TestObjectUtils(unittest.TestCase): + def test_get_object_class_from_class_name(self): + self.assertEqual(utils.get_object_class_from_class_name('AbstractObject'), do.AbstractObject) + + def test_get_all_object_classes(self): + self.assertTrue(len(utils.get_all_object_classes()) > len(utils.get_all_concrete_object_classes())) + + def test_get_all_concrete_object_classes(self): + concrete_objs = utils.get_all_concrete_object_classes() + self.assertTrue(len(concrete_objs) > 0) + for obj_classes in concrete_objs: + self.assertNotIsInstance(obj_classes, do.AbstractObject) + self.assertNotIsInstance(obj_classes, do.DataframeObjectABC) + self.assertNotIsInstance(obj_classes, do.HTMLObjectABC) + self.assertNotIsInstance(obj_classes, do.HTMLTableObjectABC) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/utils/test_plantuml_utils.py b/tests/utils/test_plantuml_utils.py new file mode 100644 index 0000000..9be3b4f --- /dev/null +++ b/tests/utils/test_plantuml_utils.py @@ -0,0 +1,75 @@ +import unittest +from unittest.mock import patch, MagicMock + +from pystruct.utils import plantuml_utils +from pystruct.utils.plantuml_utils import PlantUMLService,\ + PLANTUML_LOCAL_SERVER_URL, PLANTUML_WEB_SERVER_URL, PLANTUML_DOCKER_SERVER_URL + + +class TestPlantUMLService(unittest.TestCase): + @patch.object(plantuml_utils.PlantUMLService, '__init__', return_value=None) + def test_singleton(self, mock_init): + p1, p2 = plantuml_utils.PlantUMLService.get_instance(), plantuml_utils.PlantUMLService.get_instance() + self.assertTrue(p1 is p2) + mock_init.assert_called_once() + + @patch('pystruct.utils.plantuml_utils.plantuml.PlantUML') + def test_reset_plant_uml_server_local_success(self, mock_plantuml): + mock_plant_uml_instance = MagicMock() + mock_plantuml.return_value = mock_plant_uml_instance + mock_plant_uml_instance.processes.return_value = 'Success' + + plant_uml_service = PlantUMLService() + + mock_plantuml.assert_called_with(PLANTUML_LOCAL_SERVER_URL) + self.assertEqual(plant_uml_service._plant_uml_server, mock_plant_uml_instance) + + @patch('pystruct.utils.plantuml_utils.plantuml.PlantUML') + def test_reset_plant_uml_server_local_fail_but_docker_success(self, mock_plantuml): + def side_effect(url): + if url == PLANTUML_LOCAL_SERVER_URL: + raise plantuml_utils.plantuml.PlantUMLConnectionError() + else: + mock_plant_uml_instance = MagicMock() + mock_plant_uml_instance.processes.return_value = 'Success' + return mock_plant_uml_instance + + mock_plantuml.side_effect = side_effect + + plant_uml_service = PlantUMLService() + + mock_plantuml.assert_called_with(PLANTUML_DOCKER_SERVER_URL) + self.assertEqual(plant_uml_service._plant_uml_server.processes(), 'Success') + + @patch('pystruct.utils.plantuml_utils.plantuml.PlantUML') + def test_reset_plant_uml_server_local_and_docker_fail_but_WEB_success(self, mock_plantuml): + def side_effect(url): + if url == PLANTUML_LOCAL_SERVER_URL or url == PLANTUML_DOCKER_SERVER_URL: + raise plantuml_utils.plantuml.PlantUMLConnectionError() + else: + mock_plant_uml_instance = MagicMock() + mock_plant_uml_instance.processes.return_value = 'Success' + return mock_plant_uml_instance + + mock_plantuml.side_effect = side_effect + + plant_uml_service = PlantUMLService() + + mock_plantuml.assert_called_with(PLANTUML_WEB_SERVER_URL) + self.assertEqual(plant_uml_service._plant_uml_server.processes(), 'Success') + + + @patch('pystruct.utils.plantuml_utils.plantuml.PlantUML') + def test_reset_plant_uml_server_local_and_web_connection_error(self, mock_plantuml): + mock_plantuml.side_effect = plantuml_utils.plantuml.PlantUMLConnectionError() + + with self.assertRaises(plantuml_utils.plantuml.PlantUMLConnectionError): + plant_uml_service = PlantUMLService() + + mock_plantuml.assert_called_with(PLANTUML_LOCAL_SERVER_URL) + mock_plantuml.assert_called_with(PLANTUML_WEB_SERVER_URL) + self.assertIsNone(plant_uml_service._plant_uml_server) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/utils/test_python_utils.py b/tests/utils/test_python_utils.py new file mode 100644 index 0000000..8e649f3 --- /dev/null +++ b/tests/utils/test_python_utils.py @@ -0,0 +1,89 @@ +import unittest +from unittest.mock import patch + +from pystruct.utils import python_utils + + +class TestSingleton(unittest.TestCase): + """ChatGPT""" + + def test_singleton_instance(self): + class MyClass(python_utils.Singleton): + pass + + # Test that two instances of MyClass are the same object + a = MyClass() + b = MyClass() + self.assertIs(a, b) + + def test_singleton_subclass(self): + class MyClass(python_utils.Singleton): + pass + + class MySubclass(MyClass): + pass + + # Test that two instances of MySubclass are the same object + a = MySubclass() + b = MySubclass() + self.assertIs(a, b) + + def test_multiple_subclasses(self): + class MyClass1(python_utils.Singleton): + def methdod1(self): + return 'method1' + + class MyClass2(python_utils.Singleton): + def methdod2(self): + return 'method2' + + a = MyClass1() + b = MyClass2() + self.assertIsNot(a, b) + self.assertEqual(a.methdod1(), 'method1') + self.assertEqual(b.methdod2(), 'method2') + + def test_singleton_arguments(self): + class MyClass(python_utils.Singleton): + def __init__(self, x, y): + self.x = x + self.y = y + + # Test that two instances of MyClass with different arguments are the same object + a = MyClass(1, 2) + self.assertEqual(a.x, 1) + self.assertEqual(a.y, 2) + b = MyClass(1, 3) + self.assertIs(a, b) + self.assertEqual(a.x, 1) + self.assertEqual(a.y, 3) + self.assertEqual(b.x, 1) + self.assertEqual(b.y, 3) + + +class TestMultiSingleton(unittest.TestCase): + def test_single_instance_creation(self): + # Test that a single instance is created for a specific key + instance1 = python_utils.MultiSingleton('key1') + instance2 = python_utils.MultiSingleton('key1') + self.assertEqual(instance1, instance2) + + def test_multiple_instance_creation(self): + # Test that multiple instances are not created for different keys + instance1 = python_utils.MultiSingleton('key1') + instance2 = python_utils.MultiSingleton('key2') + self.assertNotEqual(instance1, instance2) + + def test_existing_instance_retrieval(self): + # Test that an existing instance is retrieved for a specific key + instance1 = python_utils.MultiSingleton('key1') + instance2 = python_utils.MultiSingleton('key1') + instance3 = python_utils.MultiSingleton('key2') + instance4 = python_utils.MultiSingleton('key2') + self.assertEqual(instance1, instance2) + self.assertEqual(instance3, instance4) + self.assertNotEqual(instance1, instance3) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/utils/test_storage.py b/tests/utils/test_storage.py new file mode 100644 index 0000000..1a44a4e --- /dev/null +++ b/tests/utils/test_storage.py @@ -0,0 +1,73 @@ +import os +import pathlib +import shutil +import unittest +from unittest import mock + +from pystruct.utils import storage +from pystruct.utils.storage import DatasetsDirectory, Dataset + + +class TestDirectory(unittest.TestCase): + def test_directory(self): + self.assertEqual(storage.Directory('test_dir').path, pathlib.Path('test_dir')) + self.assertEqual(storage.Directory(pathlib.Path('test_dir')).path, pathlib.Path('test_dir')) + + +class TestDatasetsDirectory(unittest.TestCase): + def setUp(self): + self.datasets_dir_name = "test_datasets" + self.datasets_dir = DatasetsDirectory(self.datasets_dir_name) + self.datasets_dir_path = self.datasets_dir.path + + def tearDown(self): + shutil.rmtree(self.datasets_dir_path) + + def test_reset_current_dataset(self): + # Test case when there are no datasets in the directory + self.datasets_dir.reset_current_dataset() + self.assertIsNone(self.datasets_dir.get_current_dataset()) + + # Test case when there is one dataset in the directory + dataset_name = "test_dataset" + dataset_path = self.datasets_dir_path / dataset_name + dataset_path.mkdir() + self.datasets_dir.reset_current_dataset() + current_dataset = self.datasets_dir.get_current_dataset() + self.assertEqual(current_dataset.path, dataset_path) + + def test_set_current_dataset(self): + # Test with an existing dataset + existing_dataset_name = "test_dataset" + existing_dataset_path = self.datasets_dir_path / existing_dataset_name + existing_dataset_path.mkdir() + self.datasets_dir.set_current_dataset(existing_dataset_name) + current_dataset = self.datasets_dir.get_current_dataset() + self.assertEqual(current_dataset.path, existing_dataset_path) + + # Test with a non-existing dataset + non_existing_dataset_name = "non_existing_dataset" + self.datasets_dir.set_current_dataset(non_existing_dataset_name) + current_dataset = self.datasets_dir.get_current_dataset() + self.assertEqual(current_dataset.path, existing_dataset_path) + + def test_new_dataset(self): + dataset_name = "new_dataset" + dataset = self.datasets_dir.new_dataset(dataset_name) + dataset_path = self.datasets_dir_path / dataset_name + self.assertTrue(dataset_path.exists()) + self.assertEqual(dataset.path, dataset_path) + current_dataset = self.datasets_dir.get_current_dataset() + self.assertEqual(current_dataset.path, dataset_path) + + def test_delete_dataset(self): + dataset_name = "test_dataset" + dataset_path = self.datasets_dir_path / dataset_name + dataset_path.mkdir() + self.datasets_dir.set_current_dataset(dataset_name) + self.datasets_dir.delete_dataset(dataset_name) + self.assertFalse(dataset_path.exists()) + self.assertIsNone(self.datasets_dir.get_current_dataset()) + +if __name__ == '__main__': + unittest.main() diff --git a/tests/utils/test_string_utils.py b/tests/utils/test_string_utils.py new file mode 100644 index 0000000..a241a3b --- /dev/null +++ b/tests/utils/test_string_utils.py @@ -0,0 +1,45 @@ +import unittest + +from pystruct.utils import string_utils as su + + +class TestSplitCamelCase(unittest.TestCase): + def test__prettify_classname(self): + self.assertEqual(su.split_camel_case_string('MyClassName'), 'My Class Name') + self.assertEqual(su.split_camel_case_string('HTTPResponse'), 'HTTP Response') + self.assertEqual(su.split_camel_case_string('DB2Connection'), 'DB2 Connection') + self.assertEqual(su.split_camel_case_string('MyXMLParserClass'), 'My XML Parser Class') + self.assertEqual(su.split_camel_case_string('HTML'), 'HTML') + self.assertEqual(su.split_camel_case_string('Already split'), 'Already split') + + +class TestSingleToMultilineString(unittest.TestCase): + def test_single_to_multiline_string(self): + # Test case 1: Strings fit within maximum line length + strings = ["This is a long string", "This is another long string", "Yet another long string"] + expected_output = "This is a long string, This is another long string, Yet another long string" + self.assertEqual(su.single_to_multiline_string(strings, max_length=30, seperator=', '), expected_output) + + # Test case 2: Strings need to be split into multiple lines + strings = ["This is a long string", "This is another long string", "Yet another long string"] + expected_output = "This is a long string;\n> This is another long string;\n> Yet another long string" + self.assertEqual(su.single_to_multiline_string(strings, max_length=20, seperator='; '), expected_output) + + # Test case 3: Empty list of strings + strings = [] + expected_output = "" + self.assertEqual(su.single_to_multiline_string(strings, max_length=30, seperator=', '), expected_output) + + # Test case 4: Single string that is longer than max line length + strings = ["This is a very long string that exceeds the maximum line length"] + expected_output = "This is a very long string that exceeds the maximum line length" + self.assertEqual(su.single_to_multiline_string(strings, max_length=20, seperator=', '), expected_output) + + # Test case 5: Custom separator + strings = ["This is a long string", "This is another long string", "Yet another long string"] + expected_output = "This is a long string;\n> This is another long string;\n> Yet another long string" + self.assertEqual(su.single_to_multiline_string(strings, max_length=20, seperator='; '), expected_output) + + +if __name__ == '__main__': + unittest.main()