Skip to content

Commit

Permalink
scrape_role_files: add support for nested role directories
Browse files Browse the repository at this point in the history
Changing the way how roles are scraped. Previously directories present in
default roles directory were treated as roles (dir name is role name).

Now scraper enters each directory looking for one of 8 specified subdirs.
When at least one such subdir is found, its parent is treated as role, else
subdir is added to search list (this is equivalent of traversing repository)

Adjusting tests to utilize nested role directories
  • Loading branch information
Krzysztof Swietlicki authored and Krzysztof Swietlicki committed Jan 10, 2024
1 parent bb31afe commit 8f86c5f
Show file tree
Hide file tree
Showing 5 changed files with 162 additions and 36 deletions.
8 changes: 8 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,14 @@ def repo_data():
},
"changelog_file": None,
},
"foobar/baz": {
"last_changed": "2018-09-17 15:15:15",
"readme_file": {
"path": "roles/foobar/baz/README.rst",
"content": raw_file("repo_files/roles/foobar/baz/README.rst"),
},
"changelog_file": None,
},
}

return repo, tenants, job_files, role_files
Expand Down
53 changes: 39 additions & 14 deletions tests/scraper/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ class MockContents:
DIR = "dir"

def __init__(self, path, type):
self.name = path.split("/")[-1]
self.path = path
self.type = type

Expand Down Expand Up @@ -101,9 +102,10 @@ class MockGitHubRepository(GitHubRepository):
"zuul.d/jobs.yaml": MOCKED_JOB_CONTENT,
"roles": {"docker-run": MockContents("roles/docker-run", MockContents.DIR)},
"roles/docker-run": {
"tasks": MockContents("roles/docker-run/tasks", MockContents.DIR),
"README.rst": MockContents(
"roles/docker-run/README.rst", MockContents.FILE
)
),
},
"roles/docker-run/README.rst": MOCKED_ROLE_DESCRIPTION,
"roles/ignored": MockContents("not a valid role", MockContents.FILE),
Expand All @@ -117,25 +119,40 @@ class MockGitHubRepository(GitHubRepository):
"bar": MockContents("roles/bar", MockContents.DIR),
"foobar": MockContents("roles/foobar", MockContents.DIR),
"empty-dir": MockContents("roles/empty-dir", MockContents.DIR),
"foobaz": MockContents("roles/foobaz", MockContents.DIR),
},
"roles/foo": {
"README.md": MockContents("roles/foo/README.md", MockContents.FILE)
"defaults": MockContents("roles/foo/defaults", MockContents.DIR),
"README.md": MockContents("roles/foo/README.md", MockContents.FILE),
},
"roles/bar": {
"README.txt": MockContents("roles/bar/README.txt", MockContents.FILE)
"library": MockContents("roles/bar/library", MockContents.DIR),
"README.txt": MockContents("roles/bar/README.txt", MockContents.FILE),
},
"roles/foobar": {
"README": MockContents("roles/foobar/README", MockContents.FILE)
"handlers": MockContents("roles/foobar/handlers", MockContents.DIR),
"README": MockContents("roles/foobar/README", MockContents.FILE),
},
"roles/empty-dir": {
"meta": MockContents("roles/empty-dir/meta", MockContents.DIR),
"README.whatever": MockContents(
"roles/empty-dir/README.whatever", MockContents.FILE
)
),
},
"roles/foobaz": {
"baz": MockContents("roles/foobaz/baz", MockContents.DIR),
},
"roles/foobaz/baz": {
"vars": MockContents("roles/foobaz/baz/vars", MockContents.DIR),
"README.rst": MockContents(
"roles/foobaz/baz/README.rst", MockContents.FILE
),
},
"roles/foo/README.md": "# Just some Markdown",
"roles/bar/README.txt": "Just some simple text\nwith a line break",
"roles/foobar/README": "Simple text in a file without extension",
"roles/empty-dir/REAMDE.whatever": "This file won't be checked out",
"roles/empty-dir/README.whatever": "This file won't be checked out",
"roles/foobaz/baz/README.rst": "Simple readme file from nested role",
},
"orga1/repo3": {
REPO_ROOT: {
Expand Down Expand Up @@ -215,19 +232,24 @@ def test_scrape():
"orga1/repo2": (
{},
{
"foo": {
"bar": {
"last_changed": "2018-09-17 15:15:15",
"readme_file": {
"path": "roles/foo/README.md",
"content": "# Just some Markdown",
"path": "roles/bar/README.txt",
"content": "Just some simple text\nwith a line break",
},
"changelog_file": None,
},
"bar": {
"empty-dir": {
"last_changed": "2018-09-17 15:15:15",
"readme_file": None,
"changelog_file": None,
},
"foo": {
"last_changed": "2018-09-17 15:15:15",
"readme_file": {
"path": "roles/bar/README.txt",
"content": "Just some simple text\nwith a line break",
"path": "roles/foo/README.md",
"content": "# Just some Markdown",
},
"changelog_file": None,
},
Expand All @@ -239,9 +261,12 @@ def test_scrape():
},
"changelog_file": None,
},
"empty-dir": {
"foobaz/baz": {
"last_changed": "2018-09-17 15:15:15",
"readme_file": None,
"readme_file": {
"path": "roles/foobaz/baz/README.rst",
"content": "Simple readme file from nested role",
},
"changelog_file": None,
},
},
Expand Down
35 changes: 35 additions & 0 deletions tests/scraper/test_repo_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def test_parse(repo_data):
job_5 = jobs[4]
role_1 = [r for r in roles if r["role_name"] == "foo"][0]
role_2 = [r for r in roles if r["role_name"] == "bar"][0]
role_3 = [r for r in roles if r["role_name"] == "foobar/baz"][0]

expected_job_1 = {
"job_name": "my-cool-new-job",
Expand Down Expand Up @@ -215,6 +216,39 @@ def test_parse(repo_data):
"last_updated": "2018-09-17 15:15:15",
}

expected_role_3 = {
"role_name": "foobar/baz",
"repo": "my/project",
"tenants": ["foo", "bar"],
"description": (
"Yet another simple description\n\n"
"**Role variables**\n\n"
".. zuul:rolevar:: my_mandatory_variable\n\n"
" This variable is mandatory.\n"
),
"description_html": (
"<p>Yet another simple description</p>\n"
"<p><strong>Role variables</strong></p>\n"
'<dl class="zuul rolevar">\n'
'<dt class="sig sig-object zuul" id="rolevar-my_mandatory_variable">\n'
'<span class="sig-name descname">'
'<span class="pre">my_mandatory_variable</span>'
"</span>"
'<a class="headerlink" href="#rolevar-my_mandatory_variable" '
'title="Permalink to this definition">'
"¶</a><br /></dt>\n"
"<dd><p>This variable is mandatory.</p>\n"
"</dd></dl>\n"
"\n"
),
"url": "https://github/my/project/tree/master/roles/foobar/baz",
"private": False,
"platforms": [],
"reusable": False,
"scrape_time": scrape_time,
"last_updated": "2018-09-17 15:15:15",
}

# NOTE (fschmidt): Without the skip_empty flag, empty (= None) keys will
# be stripped from the resulting dict.
assert job_1.to_dict(skip_empty=False) == expected_job_1
Expand All @@ -224,6 +258,7 @@ def test_parse(repo_data):
assert job_5.to_dict(skip_empty=False) == expected_job_5
assert role_1.to_dict(skip_empty=False) == expected_role_1
assert role_2.to_dict(skip_empty=False) == expected_role_2
assert role_3.to_dict(skip_empty=False) == expected_role_3


def test_parse_reusable_repo(repo_data):
Expand Down
7 changes: 7 additions & 0 deletions tests/testdata/repo_files/roles/foobar/baz/README.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Yet another simple description

**Role variables**

.. zuul:rolevar:: my_mandatory_variable
This variable is mandatory.
95 changes: 73 additions & 22 deletions zubbi/scraper/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

import logging
import os

from zubbi.scraper.exceptions import CheckoutError

Expand All @@ -29,6 +30,19 @@

ROLES_DIRECTORY = "roles"

# Role needs to contains at least one of those directories to be considered as role
# https://docs.ansible.com/ansible/latest/playbook_guide/playbooks_reuse_roles.html#role-directory-structure
ROLE_MANDATORY_DIRS = [
"tasks",
"handlers",
"library",
"defaults",
"vars",
"files",
"templates",
"meta",
]

REPO_ROOT = "/"


Expand Down Expand Up @@ -146,39 +160,76 @@ def get_file_info(self, path):

def scrape_role_files(self):
role_files = {}
# Roles might be grouped in some parent directory, so we need to
# recursively check nested directories till we find role directory.
# We are only interested in the role name (=> directory name)
# and the README and CHANGELOG files. Thus, we don't need to
# iterate recursively over the roles directory as those files
# should be on the top-level per role.

# Listing repo files is based on the example in pyGithub doc:
# https://pygithub.readthedocs.io/en/v2.1.1/examples/Repository.html#get-all-of-the-contents-of-the-repository-recursively
try:
roles = self.repo.directory_contents(ROLES_DIRECTORY)
for role_name, remote_file in roles.items():
# Ansible requires the role to be defined in a certain directory structure.
# Thus, we are only interested in directories. Files can be omitted
dir_contents = [
item
for item in self.repo.directory_contents(ROLES_DIRECTORY).values()
if item.type == "dir"
]
while dir_contents:
try:
if remote_file.type != "dir":
# Ansible requires the role to be defined in a
# certain directory structure. Thus, we can
# ignore it in case it's not a directory.
continue
last_changed = self.repo.last_changed(remote_file.path)
existing_files = self.repo.directory_contents(remote_file.path)
# Skip empty directories or files
if not existing_files:
continue
readme_file = self.find_matching_file(README_FILES, existing_files)
changelog_file = self.find_matching_file(
CHANGELOG_FILES, existing_files
)
role_files[role_name] = {
"last_changed": last_changed,
"readme_file": readme_file,
"changelog_file": changelog_file,
}
dir = dir_contents.pop(0)

# When directory is one of role mandatory directories we assume that
# its parent is the role directory (then information is extracted
# and all child directories are removed from search list).
# Otherwise, we append the subdirs of this dir to search list (this
# implements the recursive search)
if dir.name not in ROLE_MANDATORY_DIRS:
dir_contents.extend(
[
item
for item in self.repo.directory_contents(
dir.path
).values()
if item.type == "dir"
]
)
else:
# get parent path
role_path = os.path.split(dir.path)[0]
# remove all role subdirs from the search list
dir_contents = [
item
for item in dir_contents
if not item.path.startswith(f"{role_path}/")
]

# we want to get the git change history
# and search for README/CHANGELOG files
last_changed = self.repo.last_changed(role_path)
existing_files = self.repo.directory_contents(role_path)
readme_file = self.find_matching_file(
README_FILES, existing_files
)
changelog_file = self.find_matching_file(
CHANGELOG_FILES, existing_files
)
# we treat role directory path (without ROLES_DIRECTORY dir name
# and trailing slash prefix) as role name
role_files[role_path.replace(f"{ROLES_DIRECTORY}/", "", 1)] = {
"last_changed": last_changed,
"readme_file": readme_file,
"changelog_file": changelog_file,
}
except CheckoutError as e:
LOGGER.exception(e)
except CheckoutError as e:
LOGGER.debug(e)

return role_files
# sort keys (role names) alphabetically
return {key: value for key, value in sorted(role_files.items())}

def find_matching_file(self, file_filter, existing_files):
for filename, file_content in existing_files.items():
Expand Down

0 comments on commit 8f86c5f

Please sign in to comment.