diff --git a/README.md b/README.md index 3f83946f5d..02ec125fb6 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Augur NEW Release v0.63.3 +# Augur NEW Release v0.70.0 Augur is primarily a data engineering tool that makes it possible for data scientists to gather open source software community data. Less data carpentry for everyone else! The primary way of looking at Augur data is through [8Knot](https://github.com/oss-aspen/8knot) ... A public instance of 8Knot is available at https://metrix.chaoss.io ... That is tied to a public instance of Augur at https://ai.chaoss.io @@ -10,7 +10,7 @@ The primary way of looking at Augur data is through [8Knot](https://github.com/o ## NEW RELEASE ALERT! ### [If you want to jump right in, updated docker build/compose and bare metal installation instructions are available here](docs/new-install.md) -Augur is now releasing a dramatically improved new version to the main branch. It is also available here: https://github.com/chaoss/augur/releases/tag/v0.63.3 +Augur is now releasing a dramatically improved new version to the main branch. It is also available here: https://github.com/chaoss/augur/releases/tag/v0.70.0 - The `main` branch is a stable version of our new architecture, which features: - Dramatic improvement in the speed of large scale data collection (100,000+ repos). All data is obtained for 100k+ repos within 2 weeks. diff --git a/augur/application/cli/backend.py b/augur/application/cli/backend.py index a0480adab4..d7a8ad745d 100644 --- a/augur/application/cli/backend.py +++ b/augur/application/cli/backend.py @@ -172,7 +172,7 @@ def determine_worker_processes(ratio,maximum): sleep_time += 6 #20% of estimate, Maximum value of 25 - secondary_num_processes = determine_worker_processes(.25, 25) + secondary_num_processes = determine_worker_processes(.25, 45) logger.info(f"Starting secondary worker processes with concurrency={secondary_num_processes}") secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={secondary_num_processes} -n secondary:{uuid.uuid4().hex}@%h -Q secondary" process_list.append(subprocess.Popen(secondary_worker.split(" "))) diff --git a/augur/application/cli/collection.py b/augur/application/cli/collection.py index 63c433a79e..7d65cad978 100644 --- a/augur/application/cli/collection.py +++ b/augur/application/cli/collection.py @@ -132,7 +132,7 @@ def determine_worker_processes(ratio,maximum): sleep_time += 6 #20% of estimate, Maximum value of 25 - secondary_num_processes = determine_worker_processes(.25, 25) + secondary_num_processes = determine_worker_processes(.25, 45) logger.info(f"Starting secondary worker processes with concurrency={secondary_num_processes}") secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={secondary_num_processes} -n secondary:{uuid.uuid4().hex}@%h -Q secondary" process_list.append(subprocess.Popen(secondary_worker.split(" "))) diff --git a/augur/application/cli/tasks.py b/augur/application/cli/tasks.py index b4bec994eb..c64dce5b88 100644 --- a/augur/application/cli/tasks.py +++ b/augur/application/cli/tasks.py @@ -37,7 +37,7 @@ def start(): scheduling_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=1 -n scheduling:{uuid.uuid4().hex}@%h -Q scheduling" core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=45 -n core:{uuid.uuid4().hex}@%h" - secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=25 -n secondary:{uuid.uuid4().hex}@%h -Q secondary" + secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=45 -n secondary:{uuid.uuid4().hex}@%h -Q secondary" scheduling_worker_process = subprocess.Popen(scheduling_worker.split(" ")) core_worker_process = subprocess.Popen(core_worker.split(" ")) diff --git a/augur/tasks/git/dependency_tasks/core.py b/augur/tasks/git/dependency_tasks/core.py index 296e69075e..e4c6273479 100644 --- a/augur/tasks/git/dependency_tasks/core.py +++ b/augur/tasks/git/dependency_tasks/core.py @@ -72,7 +72,11 @@ def generate_scorecard(session,repo_id,path): key_handler = GithubApiKeyHandler(session, session.logger) os.environ['GITHUB_AUTH_TOKEN'] = key_handler.get_random_key() - required_output = parse_json_from_subprocess_call(session.logger,['./scorecard', command, '--format=json'],cwd=path_to_scorecard) + try: + required_output = parse_json_from_subprocess_call(session.logger,['./scorecard', command, '--format=json'],cwd=path_to_scorecard) + except Exception as e: + session.logger.error(f"Could not parse required output! Error: {e}") + raise e session.logger.info('adding to database...') session.logger.debug(f"output: {required_output}") diff --git a/augur/tasks/git/util/facade_worker/facade_worker/rebuildcache.py b/augur/tasks/git/util/facade_worker/facade_worker/rebuildcache.py index 5668739767..e4697dbc19 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/rebuildcache.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/rebuildcache.py @@ -396,7 +396,8 @@ def rebuild_unknown_affiliation_and_web_caches(session): # ("DELETE c.* FROM dm_repo_group_weekly c " # "JOIN repo_groups p ON c.repo_group_id = p.repo_group_id WHERE " # "p.rg_recache=TRUE") - session.execute_sql(clear_dm_repo_group_weekly) + +# session.execute_sql(clear_dm_repo_group_weekly) clear_dm_repo_group_monthly = s.sql.text(""" DELETE @@ -410,7 +411,8 @@ def rebuild_unknown_affiliation_and_web_caches(session): # ("DELETE c.* FROM dm_repo_group_monthly c " # "JOIN repo_groups p ON c.repo_group_id = p.repo_group_id WHERE " # "p.rg_recache=TRUE") - session.execute_sql(clear_dm_repo_group_monthly) + +# session.execute_sql(clear_dm_repo_group_monthly) clear_dm_repo_group_annual = s.sql.text(""" DELETE @@ -424,7 +426,7 @@ def rebuild_unknown_affiliation_and_web_caches(session): # ("DELETE c.* FROM dm_repo_group_annual c " # "JOIN repo_groups p ON c.repo_group_id = p.repo_group_id WHERE " # "p.rg_recache=TRUE") - session.execute_sql(clear_dm_repo_group_annual) +# session.execute_sql(clear_dm_repo_group_annual) clear_dm_repo_weekly = s.sql.text(""" DELETE @@ -441,7 +443,7 @@ def rebuild_unknown_affiliation_and_web_caches(session): # "JOIN repo r ON c.repo_id = r.repo_id " # "JOIN repo_groups p ON r.repo_group_id = p.repo_group_id WHERE " # "p.rg_recache=TRUE") - session.execute_sql(clear_dm_repo_weekly) +# session.execute_sql(clear_dm_repo_weekly) clear_dm_repo_monthly = s.sql.text(""" DELETE @@ -458,7 +460,7 @@ def rebuild_unknown_affiliation_and_web_caches(session): # "JOIN repo r ON c.repo_id = r.repo_id " # "JOIN repo_groups p ON r.repo_group_id = p.repo_group_id WHERE " # "p.rg_recache=TRUE") - session.execute_sql(clear_dm_repo_monthly) +# session.execute_sql(clear_dm_repo_monthly) clear_dm_repo_annual = s.sql.text(""" DELETE @@ -475,7 +477,7 @@ def rebuild_unknown_affiliation_and_web_caches(session): # "JOIN repo r ON c.repo_id = r.repo_id " # "JOIN repo_groups p ON r.repo_group_id = p.repo_group_id WHERE " # "p.rg_recache=TRUE") - session.execute_sql(clear_dm_repo_annual) +# session.execute_sql(clear_dm_repo_annual) clear_unknown_cache = s.sql.text(""" DELETE @@ -573,7 +575,7 @@ def rebuild_unknown_affiliation_and_web_caches(session): "r.repo_group_id, info.a, info.b, info.c") ).bindparams(tool_source=session.tool_source,tool_version=session.tool_version,data_source=session.data_source) - session.execute_sql(cache_projects_by_week) +# session.execute_sql(cache_projects_by_week) cache_projects_by_month = s.sql.text( ("INSERT INTO dm_repo_group_monthly (repo_group_id, email, affiliation, month, year, added, removed, whitespace, files, patches, tool_source, tool_version, data_source) " @@ -609,7 +611,7 @@ def rebuild_unknown_affiliation_and_web_caches(session): "r.repo_group_id, info.a, info.b, info.c" )).bindparams(tool_source=session.tool_source,tool_version=session.tool_version,data_source=session.data_source) - session.execute_sql(cache_projects_by_month) +# session.execute_sql(cache_projects_by_month) cache_projects_by_year = s.sql.text(( "INSERT INTO dm_repo_group_annual (repo_group_id, email, affiliation, year, added, removed, whitespace, files, patches, tool_source, tool_version, data_source) " @@ -649,7 +651,7 @@ def rebuild_unknown_affiliation_and_web_caches(session): - session.execute_sql(cache_projects_by_year) + # session.execute_sql(cache_projects_by_year) # Start caching by repo session.log_activity('Verbose','Caching repos') @@ -689,7 +691,7 @@ def rebuild_unknown_affiliation_and_web_caches(session): "a.repo_id, info.a, info.b, info.c" )).bindparams(tool_source=session.tool_source,tool_version=session.tool_version,data_source=session.data_source) - session.execute_sql(cache_repos_by_week) +# session.execute_sql(cache_repos_by_week) cache_repos_by_month = s.sql.text(( "INSERT INTO dm_repo_monthly (repo_id, email, affiliation, month, year, added, removed, whitespace, files, patches, tool_source, tool_version, data_source)" @@ -725,7 +727,7 @@ def rebuild_unknown_affiliation_and_web_caches(session): "a.repo_id, info.a, info.b, info.c" )).bindparams(tool_source=session.tool_source,tool_version=session.tool_version,data_source=session.data_source) - session.execute_sql(cache_repos_by_month) +# session.execute_sql(cache_repos_by_month) cache_repos_by_year = s.sql.text(( "INSERT INTO dm_repo_annual (repo_id, email, affiliation, year, added, removed, whitespace, files, patches, tool_source, tool_version, data_source)" @@ -759,7 +761,7 @@ def rebuild_unknown_affiliation_and_web_caches(session): "a.repo_id, info.a, info.b, info.c" )).bindparams(tool_source=session.tool_source,tool_version=session.tool_version,data_source=session.data_source) - session.execute_sql(cache_repos_by_year) +# session.execute_sql(cache_repos_by_year) # Reset cache flags diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index 69e40f6818..73ea1b025a 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -12,6 +12,8 @@ from augur.application.db.util import execute_session_query from ..messages.tasks import process_github_comment_contributors +from typing import Generator, List, Dict + platform_id = 1 @@ -29,20 +31,32 @@ def collect_pull_requests(repo_git: str) -> int: Repo.repo_git == repo_git).one().repo_id owner, repo = get_owner_repo(repo_git) - pr_data = retrieve_all_pr_data(repo_git, logger, manifest.key_auth) - if pr_data: - process_pull_requests(pr_data, f"{owner}/{repo}: Pr task", repo_id, logger, augur_db) + total_count = 0 + all_data = [] + for page in retrieve_all_pr_data(repo_git, logger, manifest.key_auth): + all_data += page + + if len(all_data) >= 1000: + process_pull_requests(all_data, f"{owner}/{repo}: Pr task", repo_id, logger, augur_db) + total_count += len(all_data) + all_data.clear() + + if len(all_data): + process_pull_requests(all_data, f"{owner}/{repo}: Pr task", repo_id, logger, augur_db) + total_count += len(all_data) - return len(pr_data) + if total_count > 0: + return total_count else: logger.info(f"{owner}/{repo} has no pull requests") return 0 + # TODO: Rename pull_request_reviewers table to pull_request_requested_reviewers # TODO: Fix column names in pull request labels table -def retrieve_all_pr_data(repo_git: str, logger, key_auth) -> None: +def retrieve_all_pr_data(repo_git: str, logger, key_auth): #-> Generator[List[Dict]]: owner, repo = get_owner_repo(repo_git) @@ -52,24 +66,21 @@ def retrieve_all_pr_data(repo_git: str, logger, key_auth) -> None: # returns an iterable of all prs at this url (this essentially means you can treat the prs variable as a list of the prs) prs = GithubPaginator(url, key_auth, logger) - all_data = [] num_pages = prs.get_num_pages() for page_data, page in prs.iter_pages(): if page_data is None: - return all_data + return if len(page_data) == 0: logger.debug( f"{owner}/{repo} Prs Page {page} contains no data...returning") logger.info(f"{owner}/{repo} Prs Page {page} of {num_pages}") - return all_data + return logger.info(f"{owner}/{repo} Prs Page {page} of {num_pages}") - - all_data += page_data - - return all_data + + yield page_data def process_pull_requests(pull_requests, task_name, repo_id, logger, augur_db): diff --git a/augur/tasks/github/util/gh_graphql_entities.py b/augur/tasks/github/util/gh_graphql_entities.py index cb5df455b7..574adbbaf0 100644 --- a/augur/tasks/github/util/gh_graphql_entities.py +++ b/augur/tasks/github/util/gh_graphql_entities.py @@ -338,17 +338,21 @@ def __iter__(self): #self.logger.info(f"{params}") data = self.request_graphql_dict(variables=params) try: - coreData = self.extract_paginate_result(data) - #Check to make sure we have data - coreData['totalCount'] + coreData = self.extract_paginate_result(data) + if coreData is not None: + if coreData.get('totalCount') is not None: + self.logger.info("... core data obtained") + else: + self.logger.info(f"Helen, the ghost in our machine, did not get a numerical result for core data (value): {data} \n Zero value assigned.") + coreData['totalCount'] = 0 + else: + self.logger.error("Core data is None, cannot proceed with operations on it, but assigning a value of Zero to ensure continued collection.") + yield None + return except KeyError as e: self.logger.error("Could not extract paginate result because there was no data returned") - self.logger.error( - ''.join(traceback.format_exception(None, e, e.__traceback__))) - - self.logger.info(f"Graphql paramters: {params}") - return + self.logger.error(''.join(traceback.format_exception(None, e, e.__traceback__))) if int(coreData['totalCount']) == 0: diff --git a/augur/tasks/util/collection_util.py b/augur/tasks/util/collection_util.py index 3561b19b40..9776258626 100644 --- a/augur/tasks/util/collection_util.py +++ b/augur/tasks/util/collection_util.py @@ -132,58 +132,76 @@ def __init__(self,name,phases,max_repo = 10,days_until_collect_again = 1, gitlab def get_active_repo_count(self,session): return len(session.query(CollectionStatus).filter(getattr(CollectionStatus,f"{self.name}_status" ) == CollectionState.COLLECTING.value).all()) - #Get repo urls based on passed in info. + def get_valid_repos(self,session): - #getattr(CollectionStatus,f"{hook}_status" ) represents the status of the given hook - #Get the count of repos that are currently running this collection hook - #status_column = f"{hook}_status" + active_repo_count = self.get_active_repo_count(session) + limit = self.max_repo-active_repo_count - #Will always disallow errored repos and repos that are already collecting + if limit <= 0: + return - #The maximum amount of repos to schedule is affected by the existing repos running tasks - limit = self.max_repo-active_repo_count + collection_list = get_newly_added_repos(session, limit, hook=self.name) + self.repo_list.extend(collection_list) + limit -= len(collection_list) - #Extract the user id from the randomized list and split into four chunks - split_user_list = split_random_users_list(session,f"{self.name}_status",self.new_status) + #Now start recollecting other repos if there is space to do so. + if limit <= 0: + return - session.logger.info(f"User_list: {split_user_list}") + collection_list = get_repos_for_recollection(session, limit, hook=self.name, days_until_collect_again=self.days_until_collect_again) - #Iterate through each fourth of the users fetched - for quarter_list in split_user_list: - if limit <= 0: - return + self.repo_list.extend(collection_list) - collection_list = get_valid_repos_for_users(session,limit,tuple(quarter_list),hook=self.name, days_to_wait_until_next_collection=self.days_until_collect_again) - self.repo_list.extend(collection_list) - #Update limit with amount of repos started - limit -= len(collection_list) +def get_newly_added_repos(session, limit, hook): - #Now start old repos if there is space to do so. - if limit <= 0: - return + condition_string = "" + if hook in ["core", "secondary", "ml"]: + condition_string += f"""{hook}_status='{str(CollectionState.PENDING.value)}'""" + + elif hook == "facade": + condition_string += f"""facade_status='{str(CollectionState.UPDATE.value)}'""" + + if hook == "secondary": + condition_string += f""" and core_status='{str(CollectionState.SUCCESS.value)}'""" + + repo_query = s.sql.text(f""" + select repo_git + from augur_operations.collection_status x, augur_data.repo y + where x.repo_id=y.repo_id + and {condition_string} + order by repo_added + limit :limit_num + """).bindparams(limit_num=limit) + valid_repos = session.execute_sql(repo_query).fetchall() + valid_repo_git_list = [repo[0] for repo in valid_repos] - user_list = get_list_of_all_users(session) - random.shuffle(user_list) + return valid_repo_git_list - #Extract the user id from the randomized list and split into four chunks - split_user_list = split_list_into_chunks([row[0] for row in user_list], 4) +def get_repos_for_recollection(session, limit, hook, days_until_collect_again): - for quarter_list in split_user_list: + if hook in ["core", "secondary", "ml"]: + condition_string = f"""{hook}_status='{str(CollectionState.SUCCESS.value)}'""" - #Break out if limit has been reached - if limit <= 0: - return + elif hook == "facade": + condition_string = f"""facade_status='{str(CollectionState.SUCCESS.value)}'""" - #only start repos older than the specified amount of days - #Query a set of valid repositories sorted by weight, also making sure that the repos aren't new or errored - #Order by the relevant weight for the collection hook - collection_list = get_valid_repos_for_users(session,limit,tuple(quarter_list),allow_old_repos=True,hook=self.name, days_to_wait_until_next_collection=self.days_until_collect_again) + repo_query = s.sql.text(f""" + select repo_git + from augur_operations.collection_status x, repo y + where x.repo_id = y.repo_id + and {condition_string} + and {hook}_data_last_collected <= NOW() - INTERVAL '{days_until_collect_again} DAYS' + order by {hook}_data_last_collected + limit :limit_num + """).bindparams(limit_num=limit) - self.repo_list.extend(collection_list) - limit -= len(collection_list) + valid_repos = session.execute_sql(repo_query).fetchall() + valid_repo_git_list = [repo[0] for repo in valid_repos] + + return valid_repo_git_list def get_enabled_phase_names_from_config(): @@ -610,80 +628,3 @@ def send_messages(self): #yield the value of the task_id to the calling method so that the proper collectionStatus field can be updated yield repo_git, task_id, col_hook.name - -#def start_block_of_repos(logger,session,repo_git_identifiers,phases,repos_type,hook="core"): -# -# logger.info(f"Starting collection on {len(repo_git_identifiers)} {repos_type} {hook} repos") -# if len(repo_git_identifiers) == 0: -# return 0 -# -# logger.info(f"Collection starting for {hook}: {tuple(repo_git_identifiers)}") -# -# routine = AugurTaskRoutine(session,repos=repo_git_identifiers,collection_phases=phases,collection_hook=hook) -# -# routine.start_data_collection() -# -# return len(repo_git_identifiers) - -def get_valid_repos_for_users(session,limit,users,allow_old_repos = False,hook="core",days_to_wait_until_next_collection = 1): - - condition_string = "1" - - if hook == "core": - condition_string = get_required_conditions_for_core_repos(allow_collected_before=allow_old_repos,days_until_collect_again= days_to_wait_until_next_collection) - elif hook == "secondary": - condition_string = get_required_conditions_for_secondary_repos(allow_collected_before=allow_old_repos,days_until_collect_again = days_to_wait_until_next_collection) - elif hook == "facade": - condition_string = get_required_conditions_for_facade_repos(allow_collected_before=allow_old_repos,days_until_collect_again = days_to_wait_until_next_collection) - elif hook == "ml": - condition_string = get_required_conditions_for_ml_repos(allow_collected_before=allow_old_repos,days_until_collect_again = days_to_wait_until_next_collection) - - #Query a set of valid repositories sorted by weight, also making sure that the repos are new - #Order by the relevant weight for the collection hook - repo_query = s.sql.text(f""" - SELECT DISTINCT repo.repo_id, repo.repo_git, collection_status.{hook}_weight - FROM augur_operations.user_groups - JOIN augur_operations.user_repos ON augur_operations.user_groups.group_id = augur_operations.user_repos.group_id - JOIN augur_data.repo ON augur_operations.user_repos.repo_id = augur_data.repo.repo_id - JOIN augur_operations.collection_status ON augur_operations.user_repos.repo_id = augur_operations.collection_status.repo_id - WHERE user_id IN :list_of_user_ids AND {condition_string} - ORDER BY augur_operations.collection_status.{hook}_weight - LIMIT :limit_num - """).bindparams(list_of_user_ids=users,limit_num=limit) - - #Get a list of valid repo ids, limit set to 2 times the usual - valid_repos = session.execute_sql(repo_query).fetchall() - valid_repo_git_list = [repo[1] for repo in valid_repos] - - session.logger.info(f"valid repo git list: {tuple(valid_repo_git_list)}") - - #start repos for new primary collection hook - #collection_size = start_block_of_repos( - # session.logger, session, - # valid_repo_git_list, - # phases, repos_type=repos_type, hook=hook - #) - - return valid_repo_git_list - -def split_random_users_list(session,status_col, status_new): - #Split all users that have new repos into four lists and randomize order - query = s.sql.text(f""" - SELECT - user_id - FROM augur_operations.user_groups - JOIN augur_operations.user_repos ON augur_operations.user_groups.group_id = augur_operations.user_repos.group_id - JOIN augur_data.repo ON augur_operations.user_repos.repo_id = augur_data.repo.repo_id - JOIN augur_operations.collection_status ON augur_operations.user_repos.repo_id = augur_operations.collection_status.repo_id - WHERE {status_col}='{str(status_new)}' - GROUP BY user_id - """) - - user_list = session.execute_sql(query).fetchall() - random.shuffle(user_list) - - #Extract the user id from the randomized list and split into four chunks - split_user_list = split_list_into_chunks([row[0] for row in user_list], 4) - - return split_user_list - diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index 1ec7871c9b..6e158d199b 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -2,7 +2,7 @@ FROM python:3.10-bookworm LABEL maintainer="outdoors@acm.org" -LABEL version="0.63.3" +LABEL version="0.70.0" ENV DEBIAN_FRONTEND=noninteractive diff --git a/docker/database/Dockerfile b/docker/database/Dockerfile index effe34d3b2..1421e1f76c 100644 --- a/docker/database/Dockerfile +++ b/docker/database/Dockerfile @@ -2,7 +2,7 @@ FROM postgres:14 LABEL maintainer="outdoors@acm.org" -LABEL version="0.63.3" +LABEL version="0.70.0" ENV POSTGRES_DB "test" ENV POSTGRES_USER "augur" diff --git a/docker/rabbitmq/Dockerfile b/docker/rabbitmq/Dockerfile index 80542c788b..079c73dc99 100644 --- a/docker/rabbitmq/Dockerfile +++ b/docker/rabbitmq/Dockerfile @@ -1,7 +1,7 @@ FROM rabbitmq:3.12-management-alpine LABEL maintainer="574/augur@simplelogin.com" -LABEL version="0.63.3" +LABEL version="0.70.0" ARG RABBIT_MQ_DEFAULT_USER ARG RABBIT_MQ_DEFAULT_PASSWORD diff --git a/metadata.py b/metadata.py index 841521b515..b914869d58 100644 --- a/metadata.py +++ b/metadata.py @@ -5,8 +5,8 @@ __short_description__ = "Python 3 package for free/libre and open-source software community metrics, models & data collection" -__version__ = "0.63.3" -__release__ = "v0.63.3 (Supply Chain Gang)" +__version__ = "0.70.0" +__release__ = "v0.70.0 (Windows 95 Man!)" __license__ = "MIT" __copyright__ = "University of Missouri, University of Nebraska-Omaha, CHAOSS, Brian Warner & Augurlabs 2024"