Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add metadata to json decode errors #2860

Merged
merged 3 commits into from
Jul 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 35 additions & 37 deletions augur/tasks/git/dependency_tasks/core.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from datetime import datetime

Check warning on line 1 in augur/tasks/git/dependency_tasks/core.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 C0114: Missing module docstring (missing-module-docstring) Raw Output: augur/tasks/git/dependency_tasks/core.py:1:0: C0114: Missing module docstring (missing-module-docstring)
import os
from augur.application.db.models import *

Check warning on line 3 in augur/tasks/git/dependency_tasks/core.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W0401: Wildcard import augur.application.db.models (wildcard-import) Raw Output: augur/tasks/git/dependency_tasks/core.py:3:0: W0401: Wildcard import augur.application.db.models (wildcard-import)
from augur.application.db.lib import bulk_insert_dicts, get_repo_by_repo_git, get_value, get_session
from augur.tasks.github.util.github_api_key_handler import GithubApiKeyHandler
from augur.tasks.git.dependency_tasks.dependency_util import dependency_calculator as dep_calc
from augur.tasks.util.worker_util import parse_json_from_subprocess_call
from augur.tasks.git.util.facade_worker.facade_worker.utilitymethods import get_absolute_repo_path
from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth
from augur.tasks.util.metadata_exception import MetadataException


def generate_deps_data(logger, repo_git):
Expand Down Expand Up @@ -49,7 +50,7 @@

logger.info(f"Inserted {len(deps)} dependencies for repo {repo_id}")

"""

Check warning on line 53 in augur/tasks/git/dependency_tasks/core.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W0105: String statement has no effect (pointless-string-statement) Raw Output: augur/tasks/git/dependency_tasks/core.py:53:0: W0105: String statement has no effect (pointless-string-statement)
def deps_model(session, repo_id,repo_git,repo_path,repo_name):
# Data collection and storage method

Expand Down Expand Up @@ -82,7 +83,7 @@

#setting the environmental variable which is required by scorecard

with get_session() as session:

Check warning on line 86 in augur/tasks/git/dependency_tasks/core.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W0612: Unused variable 'session' (unused-variable) Raw Output: augur/tasks/git/dependency_tasks/core.py:86:26: W0612: Unused variable 'session' (unused-variable)
#key_handler = GithubRandomKeyAuth(logger)
key_handler = GithubApiKeyHandler(logger)
os.environ['GITHUB_AUTH_TOKEN'] = key_handler.get_random_key()
Expand All @@ -94,50 +95,47 @@

try:
required_output = parse_json_from_subprocess_call(logger,['./scorecard', command, '--format=json'],cwd=path_to_scorecard)
except Exception as e:
logger.error(f"Could not parse required output! Error: {e}")
raise e

# end

logger.info('adding to database...')
logger.debug(f"output: {required_output}")
logger.info('adding to database...')
logger.debug(f"output: {required_output}")

if not required_output['checks']:
logger.info('No scorecard checks found!')
return

#Store the overall score first
to_insert = []
overall_deps_scorecard = {
'repo_id': repo_id,
'name': 'OSSF_SCORECARD_AGGREGATE_SCORE',
'scorecard_check_details': required_output['repo'],
'score': required_output['score'],
'tool_source': 'scorecard_model',
'tool_version': '0.43.9',
'data_source': 'Git',
'data_collection_date': datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ')
}
to_insert.append(overall_deps_scorecard)
# bulk_insert_dicts(overall_deps_scorecard, RepoDepsScorecard, ["repo_id","name"])

#Store misc data from scorecard in json field.
for check in required_output['checks']:
repo_deps_scorecard = {
if not required_output['checks']:
logger.info('No scorecard checks found!')
return

#Store the overall score first
to_insert = []
overall_deps_scorecard = {
'repo_id': repo_id,
'name': check['name'],
'scorecard_check_details': check,
'score': check['score'],
'name': 'OSSF_SCORECARD_AGGREGATE_SCORE',
'scorecard_check_details': required_output['repo'],
'score': required_output['score'],
'tool_source': 'scorecard_model',
'tool_version': '0.43.9',
'data_source': 'Git',
'data_collection_date': datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ')
}
to_insert.append(repo_deps_scorecard)

bulk_insert_dicts(logger, to_insert, RepoDepsScorecard, ["repo_id","name"])

logger.info(f"Done generating scorecard for repo {repo_id} from path {path}")
to_insert.append(overall_deps_scorecard)
# bulk_insert_dicts(overall_deps_scorecard, RepoDepsScorecard, ["repo_id","name"])

#Store misc data from scorecard in json field.
for check in required_output['checks']:
repo_deps_scorecard = {
'repo_id': repo_id,
'name': check['name'],
'scorecard_check_details': check,
'score': check['score'],
'tool_source': 'scorecard_model',
'tool_version': '0.43.9',
'data_source': 'Git',
'data_collection_date': datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ')
}
to_insert.append(repo_deps_scorecard)

bulk_insert_dicts(logger, to_insert, RepoDepsScorecard, ["repo_id","name"])

logger.info(f"Done generating scorecard for repo {repo_id} from path {path}")

except Exception as e:

raise MetadataException(e, f"required_output: {required_output}")
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pylint] reported by reviewdog 🐶
W0707: Consider explicitly re-raising using 'raise MetadataException(e, f'required_output: {required_output}') from e' (raise-missing-from)

6 changes: 6 additions & 0 deletions augur/tasks/util/metadata_exception.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
class MetadataException(Exception):
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pylint] reported by reviewdog 🐶
C0114: Missing module docstring (missing-module-docstring)

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pylint] reported by reviewdog 🐶
C0115: Missing class docstring (missing-class-docstring)

def __init__(self, original_exception, additional_metadata):
self.original_exception = original_exception
self.additional_metadata = additional_metadata

super().__init__(f"{str(self.original_exception)} | Additional metadata: {self.additional_metadata}")
5 changes: 4 additions & 1 deletion augur/tasks/util/worker_util.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#SPDX-License-Identifier: MIT

Check warning on line 1 in augur/tasks/util/worker_util.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 C0114: Missing module docstring (missing-module-docstring) Raw Output: augur/tasks/util/worker_util.py:1:0: C0114: Missing module docstring (missing-module-docstring)
import json
#import gunicorn.app.base
import numpy as np
Expand All @@ -11,6 +11,9 @@
import json
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pylint] reported by reviewdog 🐶
W0404: Reimport 'json' (imported line 2) (reimported)

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pylint] reported by reviewdog 🐶
C0412: Imports from package json are not grouped (ungrouped-imports)

import subprocess

from augur.tasks.util.metadata_exception import MetadataException


def create_grouped_task_load(*args,processes=8,dataList=[],task=None):
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pylint] reported by reviewdog 🐶
W0102: Dangerous default value [] as argument (dangerous-default-value)


if not dataList or not task:
Expand Down Expand Up @@ -70,7 +73,7 @@
# if a KeyError does not occur then a dict with those values has already been processed
# if a KeyError occurs a dict with those values has not been found yet
try:
unique_values[key]

Check warning on line 76 in augur/tasks/util/worker_util.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W0104: Statement seems to have no effect (pointless-statement) Raw Output: augur/tasks/util/worker_util.py:76:12: W0104: Statement seems to have no effect (pointless-statement)
continue
except KeyError:
unique_values[key] = 1
Expand Down Expand Up @@ -108,7 +111,7 @@

def calculate_date_weight_from_timestamps(added,last_collection,domain_start_days=30):
#Get the time since last collection as well as when the repo was added.
if last_collection is None:

Check warning on line 114 in augur/tasks/util/worker_util.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 R1705: Unnecessary "else" after "return", remove the "else" and de-indent the code inside it (no-else-return) Raw Output: augur/tasks/util/worker_util.py:114:4: R1705: Unnecessary "else" after "return", remove the "else" and de-indent the code inside it (no-else-return)
delta = datetime.now() - added
return date_weight_factor(delta.days)
else:
Expand All @@ -117,7 +120,7 @@
factor = date_weight_factor(delta.days,domain_shift=domain_start_days)

#If the repo is older than thirty days, start to decrease its weight.
if delta.days >= domain_start_days:

Check warning on line 123 in augur/tasks/util/worker_util.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 R1705: Unnecessary "else" after "return", remove the "else" and de-indent the code inside it (no-else-return) Raw Output: augur/tasks/util/worker_util.py:123:8: R1705: Unnecessary "else" after "return", remove the "else" and de-indent the code inside it (no-else-return)
return factor
else:
#Else increase its weight
Expand All @@ -126,9 +129,9 @@
def parse_json_from_subprocess_call(logger, subprocess_arr, cwd=None):
logger.info(f"running subprocess {subprocess_arr[0]}")
if cwd:
p = subprocess.run(subprocess_arr,cwd=cwd,capture_output=True, text=True, timeout=None)

Check warning on line 132 in augur/tasks/util/worker_util.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W1510: 'subprocess.run' used without explicitly defining the value for 'check'. (subprocess-run-check) Raw Output: augur/tasks/util/worker_util.py:132:12: W1510: 'subprocess.run' used without explicitly defining the value for 'check'. (subprocess-run-check)
else:
p = subprocess.run(subprocess_arr,capture_output=True, text=True, timeout=None)

Check warning on line 134 in augur/tasks/util/worker_util.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W1510: 'subprocess.run' used without explicitly defining the value for 'check'. (subprocess-run-check) Raw Output: augur/tasks/util/worker_util.py:134:12: W1510: 'subprocess.run' used without explicitly defining the value for 'check'. (subprocess-run-check)

logger.info('subprocess completed... ')

Expand All @@ -141,7 +144,7 @@
required_output = {}
except json.decoder.JSONDecodeError as e:
logger.error(f"Could not parse required output! \n output: {output} \n Error: {e}")
raise e
raise MetadataException(e, f"output : {output}")
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pylint] reported by reviewdog 🐶
W0707: Consider explicitly re-raising using 'raise MetadataException(e, f'output : {output}') from e' (raise-missing-from)


return required_output

Expand Down
Loading