Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tenant_parser.py: add support for extra-config-paths #104

Merged
merged 1 commit into from
Dec 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,11 @@ def repo_data():
# and the other one is used for the parser.
repo = DummyRepo("my/project")

tenants = {"jobs": ["foo"], "roles": ["foo", "bar"]}
tenants = {
"jobs": ["foo"],
"roles": ["foo", "bar"],
"extra_config_paths": {"zuul-extra.d": ["bar"]},
}

job_files = {
"zuul.d/jobs.yaml": {
Expand All @@ -91,6 +95,10 @@ def repo_data():
"content": raw_file("repo_files/zuul.d/jobs-parse-error.yaml"),
"blame": [],
},
"zuul-extra.d/extra-jobs.yaml": {
"content": raw_file("repo_files/zuul-extra.d/extra-jobs.yaml"),
"blame": [],
},
}

role_files = {
Expand Down
82 changes: 81 additions & 1 deletion tests/scraper/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,33 @@
run: playbooks/non-existing-playbook.yaml
"""

MOCKED_JOB_CONTENT_2 = """
- job:
name: even-cooler-new-job
parent: super-base-job
description: |
This is another job for testing purposes.
run: playbooks/non-existing-super-playbook.yaml
"""

MOCKED_PROJECT_CONTENT = """
- job:
name: super-duper-new-job
parent: lame-base-job
description: |
This is yet another job for testing purposes.
run: playbooks/non-existing-hyper-playbook.yaml

- project:
name: my-simple-project
check:
jobs:
- noop
gate:
jobs:
- super-duper-new-job
"""

MOCKED_ROLE_DESCRIPTION = """
Role description containing some reStructuredText expressions.

Expand Down Expand Up @@ -110,6 +137,19 @@ class MockGitHubRepository(GitHubRepository):
"roles/foobar/README": "Simple text in a file without extension",
"roles/empty-dir/REAMDE.whatever": "This file won't be checked out",
},
"orga1/repo3": {
REPO_ROOT: {
"project-extra.yaml": MockContents(
"project-extra.yaml", MockContents.FILE
),
"zuul-extra.d": MockContents("zuul-extra.d", MockContents.DIR),
},
"project-extra.yaml": MOCKED_PROJECT_CONTENT,
"zuul-extra.d": {
"jobs.yaml": MockContents("zuul-extra.d/jobs.yaml", MockContents.FILE)
},
"zuul-extra.d/jobs.yaml": MOCKED_JOB_CONTENT_2,
},
# Empty repositories
"orga2/repo1": {},
"orga2/repo3": {},
Expand Down Expand Up @@ -206,6 +246,39 @@ def test_scrape():
},
},
),
"orga1/repo3": (
{
"project-extra.yaml": {
"last_changed": "2018-09-17 15:15:15",
"blame": [],
"content": "\n- job:\n"
" name: super-duper-new-job\n"
" parent: lame-base-job\n"
" description: |\n"
" This is yet another job for testing purposes.\n"
" run: playbooks/non-existing-hyper-playbook.yaml\n"
"\n- project:\n"
" name: my-simple-project\n"
" check:\n"
" jobs:\n"
" - noop\n"
" gate:\n"
" jobs:\n"
" - super-duper-new-job\n",
},
"zuul-extra.d/jobs.yaml": {
"last_changed": "2018-09-17 15:15:15",
"blame": [],
"content": "\n- job:\n"
" name: even-cooler-new-job\n"
" parent: super-base-job\n"
" description: |\n"
" This is another job for testing purposes.\n"
" run: playbooks/non-existing-super-playbook.yaml\n",
},
},
{},
),
"orga2/repo1": ({}, {}),
"orga2/repo3": ({}, {}),
}
Expand All @@ -221,7 +294,14 @@ def test_scrape():

for repo, tenants in repo_map.items():
gh_repo = MockGitHubRepository(repo)
job_files, role_files = Scraper(gh_repo).scrape()
extra_config_paths = tenants["tenants"].get("extra_config_paths", {})
if repo == "orga1/repo2":
assert len(extra_config_paths) == 1
elif repo == "orga1/repo3":
assert len(extra_config_paths) == 2
else:
assert len(extra_config_paths) == 0
job_files, role_files = Scraper(gh_repo, extra_config_paths).scrape()
assert (job_files, role_files) == expected[repo]


Expand Down
33 changes: 31 additions & 2 deletions tests/scraper/test_repo_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,12 @@ def test_parse(repo_data):
repo, tenants, job_files, role_files = repo_data

jobs, roles = RepoParser(
repo, tenants, job_files, role_files, scrape_time, is_reusable_repo=False
repo,
tenants,
job_files,
role_files,
scrape_time,
is_reusable_repo=False,
).parse()

# We assume that we can access the resulting jobs and roles dictionary
Expand All @@ -39,6 +44,7 @@ def test_parse(repo_data):
job_2 = jobs[1]
job_3 = jobs[2]
job_4 = jobs[3]
job_5 = jobs[4]
role_1 = [r for r in roles if r["role_name"] == "foo"][0]
role_2 = [r for r in roles if r["role_name"] == "bar"][0]

Expand Down Expand Up @@ -109,6 +115,23 @@ def test_parse(repo_data):
"last_updated": None,
}

expected_job_5 = {
"job_name": "awesome-job",
"repo": "my/project",
"tenants": ["bar"],
"description": "Job in custom directory, without a playbook or parent.\n",
"description_html": "<p>Job in custom directory, without a playbook or parent.</p>\n",
"parent": "base",
"url": "https://github/zuul-extra.d/extra-jobs.yaml",
"private": False,
"platforms": [],
"reusable": False,
"line_start": 1,
"line_end": 4,
"scrape_time": scrape_time,
"last_updated": None,
}

expected_role_1 = {
"role_name": "foo",
"repo": "my/project",
Expand Down Expand Up @@ -198,6 +221,7 @@ def test_parse(repo_data):
assert job_2.to_dict(skip_empty=False) == expected_job_2
assert job_3.to_dict(skip_empty=False) == expected_job_3
assert job_4.to_dict(skip_empty=False) == expected_job_4
assert job_5.to_dict(skip_empty=False) == expected_job_5
assert role_1.to_dict(skip_empty=False) == expected_role_1
assert role_2.to_dict(skip_empty=False) == expected_role_2

Expand All @@ -208,7 +232,12 @@ def test_parse_reusable_repo(repo_data):
repo, tenants, job_files, role_files = repo_data

jobs, roles = RepoParser(
repo, tenants, job_files, role_files, scrape_time, is_reusable_repo=True
repo,
tenants,
job_files,
role_files,
scrape_time,
is_reusable_repo=True,
).parse()

# We assume that we can access the resulting jobs and roles dictionary
Expand Down
4 changes: 4 additions & 0 deletions tests/testdata/repo_files/zuul-extra.d/extra-jobs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
- job:
name: awesome-job
description: |
Job in custom directory, without a playbook or parent.
9 changes: 9 additions & 0 deletions tests/testdata/test.foo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,18 @@
- orga1/repo1:
exclude: [pipeline, project]
- orga1/repo2
- orga1/repo3:
exclude:
- project
- pipeline
extra-config-paths:
- project-extra.yaml
- zuul-extra.d/
- orga2/repo1
untrusted-projects:
- orga2/repo1: {shadow: orga1/repo2}
- orga1/repo2:
exclude: [project]
extra-config-paths:
- zuul-extra.d/
- orga2/repo3
14 changes: 11 additions & 3 deletions zubbi/scraper/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,14 +556,22 @@ def _scrape_repo_map(


def scrape_repo(repo, tenants, reusable_repos, scrape_time):
job_files, role_files = Scraper(repo).scrape()
job_files, role_files = Scraper(
repo,
tenants.get("extra-config-paths", {}),
).scrape()

is_rusable_repo = repo.repo_name in reusable_repos
is_reusable_repo = repo.repo_name in reusable_repos
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for fixing that 😉

jobs = []
roles = []
try:
jobs, roles = RepoParser(
repo, tenants, job_files, role_files, scrape_time, is_rusable_repo
repo,
tenants,
job_files,
role_files,
scrape_time,
is_reusable_repo,
).parse()
except Exception:
LOGGER.exception("Unable to parse job or role definitions in repo '%s'", repo)
Expand Down
19 changes: 17 additions & 2 deletions zubbi/scraper/repo_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,13 @@

class RepoParser:
def __init__(
self, repo, tenants, job_files, role_files, scrape_time, is_reusable_repo
self,
repo,
tenants,
job_files,
role_files,
scrape_time,
is_reusable_repo,
):
self.repo = repo
self.tenants = tenants
Expand Down Expand Up @@ -64,6 +70,15 @@ def parse_job_files(self):
# LOGGER.debug(json.dumps(repo_jobs, indent=4))
return repo_jobs

def _get_job_tenants(self, file_path):
extra_config_paths = self.tenants.get("extra_config_paths", {})
tenants = self.tenants["jobs"]
for extra_config_path in extra_config_paths.keys():
if file_path.startswith(extra_config_path):
tenants = extra_config_paths[extra_config_path]
break
return tenants

def parse_job_definitions(self, file_path, job_info):
try:
jobs_yaml = yaml.load(job_info["content"], Loader=ZuulSafeLoader)
Expand All @@ -83,7 +98,7 @@ def parse_job_definitions(self, file_path, job_info):
job = ZuulJob(meta={"id": uuid})
job.job_name = job_name
job.repo = self.repo.name
job.tenants = self.tenants["jobs"]
job.tenants = self._get_job_tenants(file_path)
job.private = self.repo.private
job.scrape_time = self.scrape_time
job.line_start = job_def["__line_start__"]
Expand Down
7 changes: 5 additions & 2 deletions zubbi/scraper/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,11 @@


class Scraper:
def __init__(self, repo):
def __init__(self, repo, extra_config_paths=None):
self.repo = repo
self.extra_config_paths = (
list(extra_config_paths.keys()) if extra_config_paths else []
)

def scrape(self):
LOGGER.info("Scraping '%s'", self.repo.name)
Expand All @@ -55,7 +58,7 @@ def scrape_job_files(self):

job_files = self.iterate_directory(
REPO_ROOT,
whitelist=ZUUL_DIRECTORIES + ZUUL_FILES,
whitelist=ZUUL_DIRECTORIES + ZUUL_FILES + self.extra_config_paths,
# NOTE (felix): As we provide this directly to the
# str.endswith() method, the argument must be a str or a
# tuple of strings, otherwise the following exception is
Expand Down
20 changes: 17 additions & 3 deletions zubbi/scraper/tenant_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import logging
import os
from collections import defaultdict

import yaml

Expand Down Expand Up @@ -62,7 +63,7 @@ def parse(self):
self.tenants.append(tenant_name)

def _update_repo_map(self, project, connection_name, tenant):
project_name, exclude = self._extract_project(project)
project_name, exclude, extra_config_paths = self._extract_project(project)

# Map the current tenant to the current repository
repo_tenant_entry = self.repo_map.setdefault(
Expand All @@ -75,14 +76,27 @@ def _update_repo_map(self, project, connection_name, tenant):
repo_tenant_entry["tenants"]["jobs"].append(tenant)
repo_tenant_entry["tenants"]["roles"].append(tenant)

if extra_config_paths:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I understand this correctly, this whole block is about creating a default empty list and adding the tenant to it. In that case we could simplify this to:

if extra_config_paths:
    repo_tenant_extra_config_paths = repo_tenant_entry["tenants"].setdefault("extra_config_paths", defaultdict(lambda: []))
    repo_tenant_extra_config_paths[extra_config_path].append(tenant)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes.
Nice optimization :)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

applied

Copy link
Author

@swiekr swiekr Dec 7, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have tested the behavior using following test:

from pprint import pprint as pprint
from collections import defaultdict

repo_map = {}
connection_name = "github"

# project_name, tenant , extra_config_paths
test_cases = [
    ("org1/repo1", "foo", []),
    ("org1/repo2", "foo", ["zuul-extra.d"]),
    ("org1/repo2", "bar", ["zuul-extra.d", ".zuul-extra.yaml"]),
    ("org1/repo2", "baz", [".zuul-extra.yaml"]),
    ("org1/repo2", "bud", []),
]

for case in test_cases:
    project_name = case[0]
    tenant = case[1]
    extra_config_paths = case[2]

    repo_tenant_entry = repo_map.setdefault(
        project_name,
        {"tenants": {"jobs": [], "roles": []}, "connection_name": connection_name},
    )

    repo_tenant_entry["tenants"]["jobs"].append(tenant)
    repo_tenant_entry["tenants"]["roles"].append(tenant)

    if extra_config_paths:
        repo_tenant_extra_config_paths = repo_tenant_entry["tenants"].setdefault("extra_config_paths", defaultdict(lambda: []))
        for extra_config_path in extra_config_paths:
            repo_tenant_extra_config_paths[extra_config_path].append(tenant)

    pprint(repo_map)
    print("------------------------------------------------")

for item in repo_map["org1/repo2"]["tenants"]["extra_config_paths"].items():
    print(item)

results are OK (repoMap structure is as we would expect it: no 'extra_config_paths' dict in repos without it defined):

[...]
{'org1/repo1': {'connection_name': 'github',
                'tenants': {'jobs': ['foo'], 'roles': ['foo']}},
 'org1/repo2': {'connection_name': 'github',
                'tenants': {'extra_config_paths': defaultdict(<function <lambda> at 0x7fd5836901f0>,
                                                              {'.zuul-extra.yaml': ['bar',
                                                                                    'baz'],
                                                               'zuul-extra.d': ['foo',
                                                                                'bar']}),
                            'jobs': ['foo', 'bar', 'baz', 'bud'],
                            'roles': ['foo', 'bar', 'baz', 'bud']}}}
------------------------------------------------
('zuul-extra.d', ['foo', 'bar'])
('.zuul-extra.yaml', ['bar', 'baz'])

repo_tenant_extra_config_paths = repo_tenant_entry["tenants"].setdefault(
"extra_config_paths", defaultdict(lambda: [])
)
for extra_config_path in extra_config_paths:
repo_tenant_extra_config_paths[extra_config_path].append(tenant)

def _extract_project(self, project):
project_name = project
exclude = []
extra_config_paths = []
if type(project) is dict:
# Get the first key of the dict containing the project name.
project_name = list(project.keys())[0]
exclude = project.get("exclude", [])
return project_name, exclude
exclude = project[project_name].get("exclude", [])
felixedel marked this conversation as resolved.
Show resolved Hide resolved
# NOTE (swietlicki): directories in extra-config-path section contain
# trailing slash, while inside the Scraper.iterate_directory() the comparison
# is done against dir names without trailing slash
for item in project[project_name].get("extra-config-paths", []):
extra_config_paths.append(item[:-1] if item.endswith("/") else item)
return project_name, exclude, extra_config_paths

def _load_tenant_sources_from_file(self, sources_file):
LOGGER.info("Parsing tenant sources file '%s'", sources_file)
Expand Down