Skip to content

Commit

Permalink
Refactor delete_empty_folder using treelib and Tree approach
Browse files Browse the repository at this point in the history
  • Loading branch information
allburov committed Jul 14, 2022
1 parent dc1446f commit 0f4903b
Show file tree
Hide file tree
Showing 10 changed files with 231 additions and 114 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
- name: Test with pytest
run: |
python3 -m pytest -s --color=yes -vv tests
python3 -m pytest -s --color=yes -vv tests artifactory_cleanup
- name: Build package
run: python -m build
Expand Down
38 changes: 34 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@
- [Usage](#usage)
- [Commands](#commands)
- [Available Rules](#available-rules)
- [Artifact cleanup policies](#artifact-cleanup-policies)
- [ArtifactNode cleanup policies](#artifact-cleanup-policies)
- [Docker Container Usage](#docker-container-usage)
- [FAQ](#faq)
- [Release](#release)

<!-- tocstop -->
Expand Down Expand Up @@ -41,7 +42,7 @@ You should take the following steps:
2. Сreate a python file, for example, `reponame.py` with the following contents:
```python
from artifactory_cleanup import rules
from artifactory_cleanup.rules import CleanupPolicy
from artifactory_cleanup import CleanupPolicy

RULES = [

Expand Down Expand Up @@ -83,7 +84,7 @@ artifactory-cleanup --destroy --user user --password password --artifactory-serv
All rules are imported from the `rules` module.
See also [List of available cleaning rules](docs/RULES)

## Artifact cleanup policies ##
## ArtifactNode cleanup policies ##

To add a cleaning policy you need:

Expand All @@ -92,7 +93,7 @@ To add a cleaning policy you need:

```python
from artifactory_cleanup import rules
from artifactory_cleanup.rules import CleanupPolicy
from artifactory_cleanup import CleanupPolicy

RULES = [

Expand Down Expand Up @@ -146,6 +147,35 @@ To build the container image locally run the following command in the folder of
```bash
docker build . --tag artifactory-cleanup
```
# FAQ

## How to clean up Conan repository?
The idea came from https://github.com/devopshq/artifactory-cleanup/issues/47

```python
from artifactory_cleanup import rules
from artifactory_cleanup import CleanupPolicy
RULES = [
# ------ ALL REPOS --------
CleanupPolicy(
'Delete files older than 60 days',
rules.repo('conan-testing'),
rules.delete_not_used_since(days=60),
# Make sure to keep conan metadata. See also
# https://github.com/devopshq/artifactory-cleanup/issues/47
rules.exclude_filename(['.timestamp', 'index.json']),
),
CleanupPolicy(
'Delete empty folders',
rules.repo('conan-testing'),
rules.delete_empty_folder(),
# Exclude metadata files
# If a folder only contains these files, consider it as empty
rules.exclude_filename(['.timestamp', 'index.json']),
),
]
```


# Release

Expand Down
1 change: 1 addition & 0 deletions artifactory_cleanup/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from artifactory_cleanup.artifactorycleanup import ArtifactoryCleanup # noqa
from artifactory_cleanup.rules.base import CleanupPolicy # noqa
7 changes: 6 additions & 1 deletion artifactory_cleanup/rules/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,12 @@ def filter(self, artifacts):
return artifacts

def delete(self, artifact, destroy):
artifact_path = quote("{repo}/{path}/{name}".format(**artifact))
if artifact["path"] == ".":
artifact_path = "{repo}/{name}".format(**artifact)
else:
artifact_path = "{repo}/{path}/{name}".format(**artifact)

artifact_path = quote(artifact)
if destroy:
print("DESTROY MODE - delete {}".format(artifact_path))
delete_url = "{}/{}".format(self.artifactory_url, artifact_path)
Expand Down
22 changes: 8 additions & 14 deletions artifactory_cleanup/rules/delete.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
from datetime import timedelta

from artifactory_cleanup.rules import utils
from artifactory_cleanup.rules.base import Rule
from artifactory_cleanup.rules.utils import (
artifacts_list_to_tree,
folder_artifacts_without_children,
)


class delete_older_than(Rule):
Expand Down Expand Up @@ -89,10 +86,10 @@ def _aql_add_filter(self, aql_query_list):

class delete_empty_folder(Rule):
"""
Clean up empty folders in local repositories. A special rule that runs separately on all repositories.
Remove empty folders.
Refers to the plugin
https://github.com/jfrog/artifactory-user-plugins/tree/master/cleanup/deleteEmptyDirs
If you just want to clean up empty folders - Artifactory must do it automatically.
We use the rule to help with some specific cases - look at README.md "FAQ: How to clean up Conan repository"
"""

def _aql_add_filter(self, aql_query_list):
Expand All @@ -101,10 +98,7 @@ def _aql_add_filter(self, aql_query_list):
aql_query_list.append(all_files_dict)
return aql_query_list

def _filter_result(self, result_artifact):

artifact_tree = artifacts_list_to_tree(result_artifact)

# Now we have a dict with all folders and files
# An empty folder is represented by not having any children
return list(folder_artifacts_without_children(artifact_tree))
def _filter_result(self, result_artifacts):
repositories = utils.build_repositories(result_artifacts)
folders = utils.get_empty_folders(repositories)
return folders
234 changes: 148 additions & 86 deletions artifactory_cleanup/rules/utils.py
Original file line number Diff line number Diff line change
@@ -1,102 +1,164 @@
from collections import defaultdict, deque
from typing import Dict, List
from collections import defaultdict
from typing import Dict, List, Tuple, Optional

from treelib import Node, Tree

def artifacts_list_to_tree(list_of_artifacts: List):

def is_repository(data):
return data["path"] == "." and data["name"] == "."


def get_fullpath(repo, path, name, **kwargs):
"""
Get path from raw Artifactory's data
"""
# root - repo
if name == ".":
return repo
# folder under root
if path == ".":
return f"{repo}/{name}"
return f"{repo}/{path}/{name}"


def split_fullpath(fullpath: str) -> Tuple[str, Optional[str]]:
"""
Split path into (name, parent)
>>> split_fullpath("repo/folder/filename.py")
('filename.py', 'repo/folder')
>>> split_fullpath("repo")
('repo', None)
"""
parts = fullpath.rsplit("/", maxsplit=1)
if len(parts) == 1:
return parts[0], None
return parts[1], parts[0]


def parse_fullpath(fullpath: str) -> Tuple[str, str, str]:
"""
Convert a list of artifacts to a dict representing the directory tree.
Each entry name corresponds to the folder or file name. And has two subnodes 'children' and
'data'. 'children' is recursively again the list of files/folder within that folder.
'data' contains the artifact data returned by artifactory.
Parse full path to (repo, path, name)
>>> parse_fullpath("repo/path/name.py")
('repo', 'path', 'name.py')
Major idea based on https://stackoverflow.com/a/58917078
>>> parse_fullpath("repo/path")
('repo', '.', 'path')
>>> parse_fullpath("repo")
('repo', '.', '.')
"""
if "/" not in fullpath:
return fullpath, ".", "."
name, repo_path = split_fullpath(fullpath)

if "/" not in repo_path:
return repo_path, ".", name

repo, path = repo_path.split("/", maxsplit=1)
return repo, path, name


def nested_dict():
class ArtifactNode(Node):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.files = 0

def is_file(self):
if not self.data:
return False
return self.data["type"] == "file"

def get_data(self) -> Dict:
if self.data:
return self.data

repo, path, name = parse_fullpath(self.identifier)
data = dict(repo=repo, path=path, name=name)
if is_repository(data):
raise ValueError("Can not remove repository root")
return data


class RepositoryTree(Tree):
def parse_artifact(self, data):
"""
Creates a default dictionary where each value is another default dictionary.
Parse Artifactory's raw data and add artifact to the tree
"""
return defaultdict(nested_dict)
fullpath = get_fullpath(**data)
self.add_artifact(fullpath, data)

def add_artifact(self, fullpath, data):
existed = self.get_node(fullpath)
if existed and existed.data is None:
# We met it before, but with no data
existed.data = data
return existed

def default_to_regular(d):
name, parent = split_fullpath(fullpath)
self.upsert_path(parent)
artifact = ArtifactNode(tag=name, identifier=fullpath, data=data)
self.add_node(node=artifact, parent=parent)
return artifact

def upsert_path(self, fullpath):
"""
Converts defaultdicts of defaultdicts to dict of dicts.
Create path to the folder if not exist
"""
if isinstance(d, defaultdict):
d = {k: default_to_regular(v) for k, v in d.items()}
return d

new_path_dict = nested_dict()
for artifact in list_of_artifacts:
parts = artifact["path"].split("/")
if parts:
marcher = new_path_dict
for key in parts:
# We need the repo for the root level folders. They are not in the
# artifacts list
marcher[key]["data"] = {"repo": artifact["repo"]}
marcher = marcher[key]["children"]
marcher[artifact["name"]]["data"] = artifact
artifact_tree = default_to_regular(new_path_dict)
# Artifactory also returns the directory itself. We need to remove it from the list
# since that tree branch has no children assigned
if "." in artifact_tree:
del artifact_tree["."]
return artifact_tree


def folder_artifacts_without_children(artifacts_tree: Dict, path=""):
"""
Takes the artifacts tree and returns the list of artifacts which are folders
and do not have any children.
if not fullpath:
return

If folder1 has only folder2 as a child, and folder2 is empty, the list only contains
folder1. I.e., empty folders are also recursively propagated back.
exists = self.contains(fullpath)
if exists:
return

The input tree will be modified and empty folders will be deleted from the tree.
self.add_artifact(fullpath, data=None)

"""
def count_files(self, nid=None) -> int:
"""Count files inside the directory. DFS traversing"""
nid = nid or self.root
node: ArtifactNode = self.get_node(nid)
if node.is_file():
node.files = 1
return node.files

# use a deque instead of a list. it's faster to add elements there
empty_folder_artifacts = deque()
children: List[ArtifactNode] = self.children(nid)
for child in children:
self.count_files(child.identifier)
files = sum(child.files for child in children)
node.files = files
return node.files

def _add_to_del_list(name: str):
"""
Add element with name to empty folder list and remove it from the tree
"""
empty_folder_artifacts.append(artifacts_tree[name]["data"])
# Also delete the item from the children list to recursively delete folders
# upwards
del artifacts_tree[name]

# Use list(item.keys()) here so that we can delete items while iterating over the
# dict.
for artifact_name in list(artifacts_tree.keys()):
tree_entry = artifacts_tree[artifact_name]
if "type" in tree_entry["data"] and tree_entry["data"]["type"] == "file":
continue
if not "path" in tree_entry["data"]:
# Set the path and name for root folders which were not explicitly in the
# artifacts list
tree_entry["data"]["path"] = path
tree_entry["data"]["name"] = artifact_name
if not "children" in tree_entry or len(tree_entry["children"]) == 0:
# This an empty folder
_add_to_del_list(artifact_name)
else:
artifacts = folder_artifacts_without_children(
tree_entry["children"],
path=path + "/" + artifact_name if len(path) > 0 else artifact_name,
)
# Additional check needed here because the recursive call may
# delete additional children.
# And here we want to check again if all children would be deleted.
# Then also delete this.
if len(tree_entry["children"]) == 0:
# just delete the whole folder since all children are empty
_add_to_del_list(artifact_name)
else:
# add all empty folder children to the list
empty_folder_artifacts.extend(artifacts)

return empty_folder_artifacts
def get_highest_empty_folders(self, nid=None) -> List[ArtifactNode]:
"""Get the highest empty folders for the repository. DFS traversing"""
nid = nid or self.root
node: ArtifactNode = self.get_node(nid)
if not node.is_root() and node.files == 0:
# Empty folder that contains only empty folders
if all(child.files == 0 for child in self.children(nid)):
return [node]

folders = []
for child in self.children(nid):
_folder = self.get_highest_empty_folders(nid=child.identifier)
folders.extend(_folder)
return folders


def build_repositories(artifacts: List[Dict]) -> List[RepositoryTree]:
"""Build tree-like repository objects from raw Artifactory data"""
repositories = defaultdict(RepositoryTree)
for data in artifacts:
repo = repositories[data["repo"]]
repo.parse_artifact(data)
return list(repositories.values())


def get_empty_folders(repositories: List[RepositoryTree]) -> List[Dict]:
folders = []
for repo in repositories:
repo.count_files()
for repo in repositories:
_folders = repo.get_highest_empty_folders()
folders.extend(_folders)
return [folder.get_data() for folder in folders]
2 changes: 2 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[pytest]
addopts = --doctest-modules
Loading

0 comments on commit 0f4903b

Please sign in to comment.