From 0f4903b5625b02878e20ee62098a433e6f4a0ea8 Mon Sep 17 00:00:00 2001 From: allburov Date: Thu, 14 Jul 2022 20:52:13 +0700 Subject: [PATCH] Refactor delete_empty_folder using treelib and Tree approach --- .github/workflows/release.yml | 2 +- README.md | 38 ++++- artifactory_cleanup/__init__.py | 1 + artifactory_cleanup/rules/base.py | 7 +- artifactory_cleanup/rules/delete.py | 22 +-- artifactory_cleanup/rules/utils.py | 234 ++++++++++++++++++---------- pytest.ini | 2 + setup.py | 1 + tests/artifacts_list.json | 20 ++- tests/test_rules_delete.py | 18 ++- 10 files changed, 231 insertions(+), 114 deletions(-) create mode 100644 pytest.ini diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 116456e..42d24d7 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -29,7 +29,7 @@ jobs: - name: Test with pytest run: | - python3 -m pytest -s --color=yes -vv tests + python3 -m pytest -s --color=yes -vv tests artifactory_cleanup - name: Build package run: python -m build diff --git a/README.md b/README.md index 1853fdb..f24d382 100644 --- a/README.md +++ b/README.md @@ -12,8 +12,9 @@ - [Usage](#usage) - [Commands](#commands) - [Available Rules](#available-rules) - - [Artifact cleanup policies](#artifact-cleanup-policies) + - [ArtifactNode cleanup policies](#artifact-cleanup-policies) - [Docker Container Usage](#docker-container-usage) +- [FAQ](#faq) - [Release](#release) @@ -41,7 +42,7 @@ You should take the following steps: 2. Сreate a python file, for example, `reponame.py` with the following contents: ```python from artifactory_cleanup import rules -from artifactory_cleanup.rules import CleanupPolicy +from artifactory_cleanup import CleanupPolicy RULES = [ @@ -83,7 +84,7 @@ artifactory-cleanup --destroy --user user --password password --artifactory-serv All rules are imported from the `rules` module. See also [List of available cleaning rules](docs/RULES) -## Artifact cleanup policies ## +## ArtifactNode cleanup policies ## To add a cleaning policy you need: @@ -92,7 +93,7 @@ To add a cleaning policy you need: ```python from artifactory_cleanup import rules -from artifactory_cleanup.rules import CleanupPolicy +from artifactory_cleanup import CleanupPolicy RULES = [ @@ -146,6 +147,35 @@ To build the container image locally run the following command in the folder of ```bash docker build . --tag artifactory-cleanup ``` +# FAQ + +## How to clean up Conan repository? +The idea came from https://github.com/devopshq/artifactory-cleanup/issues/47 + +```python +from artifactory_cleanup import rules +from artifactory_cleanup import CleanupPolicy +RULES = [ + # ------ ALL REPOS -------- + CleanupPolicy( + 'Delete files older than 60 days', + rules.repo('conan-testing'), + rules.delete_not_used_since(days=60), + # Make sure to keep conan metadata. See also + # https://github.com/devopshq/artifactory-cleanup/issues/47 + rules.exclude_filename(['.timestamp', 'index.json']), + ), + CleanupPolicy( + 'Delete empty folders', + rules.repo('conan-testing'), + rules.delete_empty_folder(), + # Exclude metadata files + # If a folder only contains these files, consider it as empty + rules.exclude_filename(['.timestamp', 'index.json']), + ), +] +``` + # Release diff --git a/artifactory_cleanup/__init__.py b/artifactory_cleanup/__init__.py index 4e1f885..529d950 100644 --- a/artifactory_cleanup/__init__.py +++ b/artifactory_cleanup/__init__.py @@ -1 +1,2 @@ from artifactory_cleanup.artifactorycleanup import ArtifactoryCleanup # noqa +from artifactory_cleanup.rules.base import CleanupPolicy # noqa diff --git a/artifactory_cleanup/rules/base.py b/artifactory_cleanup/rules/base.py index 2120dd5..9a3bf6d 100644 --- a/artifactory_cleanup/rules/base.py +++ b/artifactory_cleanup/rules/base.py @@ -200,7 +200,12 @@ def filter(self, artifacts): return artifacts def delete(self, artifact, destroy): - artifact_path = quote("{repo}/{path}/{name}".format(**artifact)) + if artifact["path"] == ".": + artifact_path = "{repo}/{name}".format(**artifact) + else: + artifact_path = "{repo}/{path}/{name}".format(**artifact) + + artifact_path = quote(artifact) if destroy: print("DESTROY MODE - delete {}".format(artifact_path)) delete_url = "{}/{}".format(self.artifactory_url, artifact_path) diff --git a/artifactory_cleanup/rules/delete.py b/artifactory_cleanup/rules/delete.py index 1edd305..590ccb9 100644 --- a/artifactory_cleanup/rules/delete.py +++ b/artifactory_cleanup/rules/delete.py @@ -1,10 +1,7 @@ from datetime import timedelta +from artifactory_cleanup.rules import utils from artifactory_cleanup.rules.base import Rule -from artifactory_cleanup.rules.utils import ( - artifacts_list_to_tree, - folder_artifacts_without_children, -) class delete_older_than(Rule): @@ -89,10 +86,10 @@ def _aql_add_filter(self, aql_query_list): class delete_empty_folder(Rule): """ - Clean up empty folders in local repositories. A special rule that runs separately on all repositories. + Remove empty folders. - Refers to the plugin - https://github.com/jfrog/artifactory-user-plugins/tree/master/cleanup/deleteEmptyDirs + If you just want to clean up empty folders - Artifactory must do it automatically. + We use the rule to help with some specific cases - look at README.md "FAQ: How to clean up Conan repository" """ def _aql_add_filter(self, aql_query_list): @@ -101,10 +98,7 @@ def _aql_add_filter(self, aql_query_list): aql_query_list.append(all_files_dict) return aql_query_list - def _filter_result(self, result_artifact): - - artifact_tree = artifacts_list_to_tree(result_artifact) - - # Now we have a dict with all folders and files - # An empty folder is represented by not having any children - return list(folder_artifacts_without_children(artifact_tree)) + def _filter_result(self, result_artifacts): + repositories = utils.build_repositories(result_artifacts) + folders = utils.get_empty_folders(repositories) + return folders diff --git a/artifactory_cleanup/rules/utils.py b/artifactory_cleanup/rules/utils.py index fa4d4d2..2cfd5bb 100644 --- a/artifactory_cleanup/rules/utils.py +++ b/artifactory_cleanup/rules/utils.py @@ -1,102 +1,164 @@ -from collections import defaultdict, deque -from typing import Dict, List +from collections import defaultdict +from typing import Dict, List, Tuple, Optional +from treelib import Node, Tree -def artifacts_list_to_tree(list_of_artifacts: List): + +def is_repository(data): + return data["path"] == "." and data["name"] == "." + + +def get_fullpath(repo, path, name, **kwargs): + """ + Get path from raw Artifactory's data + """ + # root - repo + if name == ".": + return repo + # folder under root + if path == ".": + return f"{repo}/{name}" + return f"{repo}/{path}/{name}" + + +def split_fullpath(fullpath: str) -> Tuple[str, Optional[str]]: + """ + Split path into (name, parent) + >>> split_fullpath("repo/folder/filename.py") + ('filename.py', 'repo/folder') + + >>> split_fullpath("repo") + ('repo', None) + """ + parts = fullpath.rsplit("/", maxsplit=1) + if len(parts) == 1: + return parts[0], None + return parts[1], parts[0] + + +def parse_fullpath(fullpath: str) -> Tuple[str, str, str]: """ - Convert a list of artifacts to a dict representing the directory tree. - Each entry name corresponds to the folder or file name. And has two subnodes 'children' and - 'data'. 'children' is recursively again the list of files/folder within that folder. - 'data' contains the artifact data returned by artifactory. + Parse full path to (repo, path, name) + >>> parse_fullpath("repo/path/name.py") + ('repo', 'path', 'name.py') - Major idea based on https://stackoverflow.com/a/58917078 + >>> parse_fullpath("repo/path") + ('repo', '.', 'path') + + >>> parse_fullpath("repo") + ('repo', '.', '.') """ + if "/" not in fullpath: + return fullpath, ".", "." + name, repo_path = split_fullpath(fullpath) + + if "/" not in repo_path: + return repo_path, ".", name + + repo, path = repo_path.split("/", maxsplit=1) + return repo, path, name + - def nested_dict(): +class ArtifactNode(Node): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.files = 0 + + def is_file(self): + if not self.data: + return False + return self.data["type"] == "file" + + def get_data(self) -> Dict: + if self.data: + return self.data + + repo, path, name = parse_fullpath(self.identifier) + data = dict(repo=repo, path=path, name=name) + if is_repository(data): + raise ValueError("Can not remove repository root") + return data + + +class RepositoryTree(Tree): + def parse_artifact(self, data): """ - Creates a default dictionary where each value is another default dictionary. + Parse Artifactory's raw data and add artifact to the tree """ - return defaultdict(nested_dict) + fullpath = get_fullpath(**data) + self.add_artifact(fullpath, data) + + def add_artifact(self, fullpath, data): + existed = self.get_node(fullpath) + if existed and existed.data is None: + # We met it before, but with no data + existed.data = data + return existed - def default_to_regular(d): + name, parent = split_fullpath(fullpath) + self.upsert_path(parent) + artifact = ArtifactNode(tag=name, identifier=fullpath, data=data) + self.add_node(node=artifact, parent=parent) + return artifact + + def upsert_path(self, fullpath): """ - Converts defaultdicts of defaultdicts to dict of dicts. + Create path to the folder if not exist """ - if isinstance(d, defaultdict): - d = {k: default_to_regular(v) for k, v in d.items()} - return d - - new_path_dict = nested_dict() - for artifact in list_of_artifacts: - parts = artifact["path"].split("/") - if parts: - marcher = new_path_dict - for key in parts: - # We need the repo for the root level folders. They are not in the - # artifacts list - marcher[key]["data"] = {"repo": artifact["repo"]} - marcher = marcher[key]["children"] - marcher[artifact["name"]]["data"] = artifact - artifact_tree = default_to_regular(new_path_dict) - # Artifactory also returns the directory itself. We need to remove it from the list - # since that tree branch has no children assigned - if "." in artifact_tree: - del artifact_tree["."] - return artifact_tree - - -def folder_artifacts_without_children(artifacts_tree: Dict, path=""): - """ - Takes the artifacts tree and returns the list of artifacts which are folders - and do not have any children. + if not fullpath: + return - If folder1 has only folder2 as a child, and folder2 is empty, the list only contains - folder1. I.e., empty folders are also recursively propagated back. + exists = self.contains(fullpath) + if exists: + return - The input tree will be modified and empty folders will be deleted from the tree. + self.add_artifact(fullpath, data=None) - """ + def count_files(self, nid=None) -> int: + """Count files inside the directory. DFS traversing""" + nid = nid or self.root + node: ArtifactNode = self.get_node(nid) + if node.is_file(): + node.files = 1 + return node.files - # use a deque instead of a list. it's faster to add elements there - empty_folder_artifacts = deque() + children: List[ArtifactNode] = self.children(nid) + for child in children: + self.count_files(child.identifier) + files = sum(child.files for child in children) + node.files = files + return node.files - def _add_to_del_list(name: str): - """ - Add element with name to empty folder list and remove it from the tree - """ - empty_folder_artifacts.append(artifacts_tree[name]["data"]) - # Also delete the item from the children list to recursively delete folders - # upwards - del artifacts_tree[name] - - # Use list(item.keys()) here so that we can delete items while iterating over the - # dict. - for artifact_name in list(artifacts_tree.keys()): - tree_entry = artifacts_tree[artifact_name] - if "type" in tree_entry["data"] and tree_entry["data"]["type"] == "file": - continue - if not "path" in tree_entry["data"]: - # Set the path and name for root folders which were not explicitly in the - # artifacts list - tree_entry["data"]["path"] = path - tree_entry["data"]["name"] = artifact_name - if not "children" in tree_entry or len(tree_entry["children"]) == 0: - # This an empty folder - _add_to_del_list(artifact_name) - else: - artifacts = folder_artifacts_without_children( - tree_entry["children"], - path=path + "/" + artifact_name if len(path) > 0 else artifact_name, - ) - # Additional check needed here because the recursive call may - # delete additional children. - # And here we want to check again if all children would be deleted. - # Then also delete this. - if len(tree_entry["children"]) == 0: - # just delete the whole folder since all children are empty - _add_to_del_list(artifact_name) - else: - # add all empty folder children to the list - empty_folder_artifacts.extend(artifacts) - - return empty_folder_artifacts + def get_highest_empty_folders(self, nid=None) -> List[ArtifactNode]: + """Get the highest empty folders for the repository. DFS traversing""" + nid = nid or self.root + node: ArtifactNode = self.get_node(nid) + if not node.is_root() and node.files == 0: + # Empty folder that contains only empty folders + if all(child.files == 0 for child in self.children(nid)): + return [node] + + folders = [] + for child in self.children(nid): + _folder = self.get_highest_empty_folders(nid=child.identifier) + folders.extend(_folder) + return folders + + +def build_repositories(artifacts: List[Dict]) -> List[RepositoryTree]: + """Build tree-like repository objects from raw Artifactory data""" + repositories = defaultdict(RepositoryTree) + for data in artifacts: + repo = repositories[data["repo"]] + repo.parse_artifact(data) + return list(repositories.values()) + + +def get_empty_folders(repositories: List[RepositoryTree]) -> List[Dict]: + folders = [] + for repo in repositories: + repo.count_files() + for repo in repositories: + _folders = repo.get_highest_empty_folders() + folders.extend(_folders) + return [folder.get_data() for folder in folders] diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..df3eb51 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +addopts = --doctest-modules diff --git a/setup.py b/setup.py index cd3fa08..f646cff 100644 --- a/setup.py +++ b/setup.py @@ -33,6 +33,7 @@ "plumbum", "dohq-artifactory", "teamcity-messages", + "treelib", ], classifiers=[ "Development Status :: 5 - Production/Stable", diff --git a/tests/artifacts_list.json b/tests/artifacts_list.json index 25fb20f..72e547c 100644 --- a/tests/artifacts_list.json +++ b/tests/artifacts_list.json @@ -33,19 +33,19 @@ }, { "repo": "test-repo", - "path": "user1", - "name": "package1", + "path": "user1/package1", + "name": "0.1.2", "type": "folder", "size": 0, - "depth": 2 + "depth": 3 }, { "repo": "test-repo", - "path": "user1/package1", - "name": "0.1.2", + "path": "user1", + "name": "package1", "type": "folder", "size": 0, - "depth": 3 + "depth": 2 }, { "repo": "test-repo", @@ -390,5 +390,13 @@ "type": "folder", "size": 0, "depth": 8 + }, + { + "repo": "test2-repo", + "path": "emptyfolder1", + "name": "emptyfolder2", + "type": "folder", + "size": 0, + "depth": 1 } ] diff --git a/tests/test_rules_delete.py b/tests/test_rules_delete.py index 2b55cfa..2665063 100644 --- a/tests/test_rules_delete.py +++ b/tests/test_rules_delete.py @@ -1,10 +1,15 @@ import json +import os from artifactory_cleanup.rules import delete_empty_folder def load_artifacts(): """ + + test2-repo + - emptyfolder + test-repo - . - user1 @@ -55,7 +60,8 @@ def load_artifacts(): - ba38761e49a42a9a44c8be1e30df4886 - user3 """ - with open("artifacts_list.json", "r") as fp: + test_dir = os.path.dirname(__file__) + with open(f"{test_dir}/artifacts_list.json", "r") as fp: artifacts_list = json.load(fp) return artifacts_list @@ -87,9 +93,17 @@ def test_delete_empty_folder(): }, # Longer folder structure where all subfolders are empty { + "depth": 1, + "size": 0, "repo": "test-repo", - "path": "", + "path": ".", "name": "user3", + "type": "folder", + }, + { + "name": "emptyfolder1", + "path": ".", + "repo": "test2-repo", }, ] assert list(artifacts_to_remove) == expected_empty_folders