Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release v0.80.0 - PR #2972

Merged
merged 13 commits into from
Jan 21, 2025
Merged
9 changes: 7 additions & 2 deletions augur/application/db/data_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -791,6 +791,11 @@ def extract_needed_pr_data_from_gitlab_merge_request(pr, repo_id, tool_source, t
Returns:
Parsed pr dict
"""
pr_closed_datetime = pr['closed_at']
pr_merged_datetime = pr['merged_at']

if not pr_closed_datetime:
pr_closed_datetime = pr_merged_datetime

pr_dict = {
'repo_id': repo_id,
Expand All @@ -810,8 +815,8 @@ def extract_needed_pr_data_from_gitlab_merge_request(pr, repo_id, tool_source, t
'pr_body': pr['description'],
'pr_created_at': pr['created_at'],
'pr_updated_at': pr['updated_at'],
'pr_closed_at': pr['closed_at'],
'pr_merged_at': pr['merged_at'],
'pr_closed_at': pr_closed_datetime,
'pr_merged_at': pr_merged_datetime,
'pr_merge_commit_sha': pr['merge_commit_sha'],
'pr_teams': None,
'pr_milestone': pr['milestone'].get('title') if pr['milestone'] else None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
def map_dependencies(info):
if type(info) is dict:

if "version" in info:

Check warning on line 60 in augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 R1705: Unnecessary "elif" after "return", remove the leading "el" from "elif" (no-else-return) Raw Output: augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py:60:8: R1705: Unnecessary "elif" after "return", remove the leading "el" from "elif" (no-else-return)
return info['version']
elif 'git' in info:
return info['git']+'#'+info['ref']
Expand Down Expand Up @@ -140,12 +140,12 @@
group = 'runtime'
for package in manifest['package']:
req = None
if package['category'] == 'main':
if package.get('category') == 'main':
group = 'runtime'
if package['category'] == 'dev':
if package.get('category') == 'dev':
group = 'develop'
if 'version' in package:
req = package['version']
req = package.get('version')
elif 'git' in package:
req = package['git']+'#'+package['ref']
Dict = {'name': package['name'], 'requirement': req, 'type': group, 'package': 'PYPI'}
Expand All @@ -163,7 +163,7 @@
#dependencies = contents['dependencies']
dependencies = contents.get('dependencies', [])

if not dependencies:

Check warning on line 166 in augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 R1705: Unnecessary "else" after "return", remove the "else" and de-indent the code inside it (no-else-return) Raw Output: augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py:166:4: R1705: Unnecessary "else" after "return", remove the "else" and de-indent the code inside it (no-else-return)
print("No dependencies found.")
return []
else:
Expand Down
56 changes: 29 additions & 27 deletions augur/tasks/git/dependency_libyear_tasks/libyear_util/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,54 +32,56 @@ def get_parsed_deps(path,logger):

deps_file = None
dependency_list = list()

for f in file_list:
deps_file = find(f, path)
if not deps_file:

if not deps_file or not f:
continue
file_handle= open(deps_file)

if f == 'Requirement.txt':
dependency_list = parse_requirement_txt(file_handle)
short_file_name = os.path.split(deps_file)[-1]

if short_file_name == 'Requirement.txt':
dependency_list.extend(parse_requirement_txt(file_handle))

elif f == 'requirements.txt':
dependency_list = parse_requirement_txt(file_handle)
if short_file_name == 'requirements.txt':
dependency_list.extend(parse_requirement_txt(file_handle))

elif f == 'setup.py':
dependency_list = parse_setup_py(file_handle)
if short_file_name == 'setup.py':
dependency_list.extend(parse_setup_py(file_handle))

elif f == 'Pipfile':
dependency_list = parse_pipfile(file_handle)
if short_file_name == 'Pipfile':
dependency_list.extend(parse_pipfile(file_handle))

elif f == 'Pipfile.lock':
dependency_list = parse_pipfile_lock(file_handle)
if short_file_name == 'Pipfile.lock':
dependency_list.extend(parse_pipfile_lock(file_handle))

elif f == 'pyproject.toml':
dependency_list = parse_poetry(file_handle)
if short_file_name == 'pyproject.toml':
dependency_list.extend(parse_poetry(file_handle))

elif f == 'poetry.lock':
dependency_list = parse_poetry_lock(file_handle)
if short_file_name == 'poetry.lock':
dependency_list.extend(parse_poetry_lock(file_handle))

elif f == 'environment.yml':
dependency_list = parse_conda(file_handle)
if short_file_name == 'environment.yml':
dependency_list.extend(parse_conda(file_handle))

elif f == 'environment.yaml':
dependency_list = parse_conda(file_handle)
if short_file_name == 'environment.yaml':
dependency_list.extend(parse_conda(file_handle))

elif f == 'environment.yml.lock':
dependency_list = parse_conda(file_handle)
if f == 'environment.yml.lock':
dependency_list.extend(parse_conda(file_handle))

elif f == 'environment.yaml.lock':
dependency_list = parse_conda(file_handle)
if short_file_name == 'environment.yaml.lock':
dependency_list.extend(parse_conda(file_handle))

elif f == 'package.json':
if short_file_name == 'package.json':
try:
dependency_list = parse_package_json(file_handle)
dependency_list.extend(parse_package_json(file_handle))
except KeyError as e:
logger.error(f"package.json for repo at path {path} is missing required key: {e}\n Skipping file...")


return dependency_list
return dependency_list


def get_libyear(current_version, current_release_date, latest_version, latest_release_date):
Expand Down
41 changes: 30 additions & 11 deletions augur/tasks/github/messages.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging

from datetime import timedelta, timezone

from augur.tasks.init.celery_app import celery_app as celery
from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask
Expand All @@ -10,12 +10,13 @@
from augur.tasks.github.util.util import get_owner_repo
from augur.application.db.models import PullRequest, Message, Issue, PullRequestMessageRef, IssueMessageRef, Contributor, Repo, CollectionStatus
from augur.application.db import get_engine, get_session
from augur.application.db.lib import get_core_data_last_collected
from sqlalchemy.sql import text

platform_id = 1

@celery.task(base=AugurCoreRepoCollectionTask)
def collect_github_messages(repo_git: str) -> None:
def collect_github_messages(repo_git: str, full_collection: bool) -> None:

logger = logging.getLogger(collect_github_messages.__name__)

Expand All @@ -29,9 +30,15 @@ def collect_github_messages(repo_git: str) -> None:
owner, repo = get_owner_repo(repo_git)
task_name = f"{owner}/{repo}: Message Task"

if full_collection:
core_data_last_collected = None
else:
# subtract 2 days to ensure all data is collected
core_data_last_collected = (get_core_data_last_collected(repo_id) - timedelta(days=2)).replace(tzinfo=timezone.utc)


if is_repo_small(repo_id):
message_data = fast_retrieve_all_pr_and_issue_messages(repo_git, logger, manifest.key_auth, task_name)
message_data = fast_retrieve_all_pr_and_issue_messages(repo_git, logger, manifest.key_auth, task_name, core_data_last_collected)

if message_data:
process_messages(message_data, task_name, repo_id, logger, augur_db)
Expand All @@ -40,7 +47,7 @@ def collect_github_messages(repo_git: str) -> None:
logger.info(f"{owner}/{repo} has no messages")

else:
process_large_issue_and_pr_message_collection(repo_id, repo_git, logger, manifest.key_auth, task_name, augur_db)
process_large_issue_and_pr_message_collection(repo_id, repo_git, logger, manifest.key_auth, task_name, augur_db, core_data_last_collected)


def is_repo_small(repo_id):
Expand All @@ -51,13 +58,16 @@ def is_repo_small(repo_id):

return result != None

def fast_retrieve_all_pr_and_issue_messages(repo_git: str, logger, key_auth, task_name) -> None:
def fast_retrieve_all_pr_and_issue_messages(repo_git: str, logger, key_auth, task_name, since) -> None:

owner, repo = get_owner_repo(repo_git)

# url to get issue and pull request comments
url = f"https://api.github.com/repos/{owner}/{repo}/issues/comments"

if since:
url += f"?since={since.isoformat()}"

# define logger for task
logger.info(f"Collecting github comments for {owner}/{repo}")

Expand All @@ -70,7 +80,7 @@ def fast_retrieve_all_pr_and_issue_messages(repo_git: str, logger, key_auth, tas
return list(github_data_access.paginate_resource(url))


def process_large_issue_and_pr_message_collection(repo_id, repo_git: str, logger, key_auth, task_name, augur_db) -> None:
def process_large_issue_and_pr_message_collection(repo_id, repo_git: str, logger, key_auth, task_name, augur_db, since) -> None:

owner, repo = get_owner_repo(repo_git)

Expand All @@ -81,11 +91,20 @@ def process_large_issue_and_pr_message_collection(repo_id, repo_git: str, logger

with engine.connect() as connection:

query = text(f"""
(select pr_comments_url from pull_requests WHERE repo_id={repo_id} order by pr_created_at desc)
UNION
(select comments_url as comment_url from issues WHERE repo_id={repo_id} order by created_at desc);
""")
if since:
query = text(f"""
(select pr_comments_url from pull_requests WHERE repo_id={repo_id} AND pr_updated_at > timestamptz(timestamp '{since}') order by pr_created_at desc)
UNION
(select comments_url as comment_url from issues WHERE repo_id={repo_id} AND updated_at > timestamptz(timestamp '{since}') order by created_at desc);
""")
else:

query = text(f"""
(select pr_comments_url from pull_requests WHERE repo_id={repo_id} order by pr_created_at desc)
UNION
(select comments_url as comment_url from issues WHERE repo_id={repo_id} order by created_at desc);
""")


result = connection.execute(query).fetchall()
comment_urls = [x[0] for x in result]
Expand Down
2 changes: 1 addition & 1 deletion augur/tasks/start_tasks.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reviewdog warning is due to a Celery quirk with injecting self externally.

Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
#Define secondary group that can't run until after primary jobs have finished.
secondary_repo_jobs = group(
collect_events.si(repo_git),#*create_grouped_task_load(dataList=first_pass, task=collect_events).tasks,
collect_github_messages.si(repo_git), #*create_grouped_task_load(dataList=first_pass,task=collect_github_messages).tasks,
collect_github_messages.si(repo_git, full_collection), #*create_grouped_task_load(dataList=first_pass,task=collect_github_messages).tasks,
collect_github_repo_clones_data.si(repo_git),
)

Expand Down Expand Up @@ -295,7 +295,7 @@
status = repo.collection_status[0]
raw_count = status.issue_pr_sum

issue_pr_task_update_weight_util([int(raw_count)],repo_git=repo_git,session=session)

Check warning on line 298 in augur/tasks/start_tasks.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 E1120: No value for argument 'issue_and_pr_nums' in function call (no-value-for-parameter) Raw Output: augur/tasks/start_tasks.py:298:12: E1120: No value for argument 'issue_and_pr_nums' in function call (no-value-for-parameter)

facade_not_pending = CollectionStatus.facade_status != CollectionState.PENDING.value
facade_not_failed = CollectionStatus.facade_status != CollectionState.FAILED_CLONE.value
Expand Down
Loading