Skip to content

Commit

Permalink
Merge pull request #47 from LLNL/python-cleanup
Browse files Browse the repository at this point in the history
Python cleanup
  • Loading branch information
IanLee1521 authored Dec 6, 2019
2 parents 66745e3 + 9102c9c commit 84545d6
Show file tree
Hide file tree
Showing 25 changed files with 1,720 additions and 1,101 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ dist/
venv/
*.pyc
llnl_scraper.egg-info/
.vscode/
6 changes: 3 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ language: python
sudo: false

python:
- '3.5'
- '3.6'
# - '3.7' -- Failing in Travis CI
- '3.7'
- '3.8'

install:
- pip install flake8 bandit
- pip install flake8 bandit black
- pip install .

script:
Expand Down
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
test:
bandit -r scraper/
flake8 --ignore=E501 scraper/
flake8 --ignore=E231,E501,W503 scraper/
black --check scraper/

release: test
python setup.py sdist bdist_wheel
Expand Down
1 change: 1 addition & 0 deletions requirements/dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@ twine

# Testing tools
bandit
black
flake8
safety
2 changes: 1 addition & 1 deletion scraper/bitbucket/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def connect(url, username, password):

bb_session = stashy.connect(url, username, password)

logger.info('Connected to: %s as %s', url, username)
logger.info("Connected to: %s as %s", url, username)

return bb_session

Expand Down
151 changes: 78 additions & 73 deletions scraper/code_gov/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,114 +17,119 @@ def process_config(config):
Returns a Code.gov Metadata file
"""

agency = config.get('agency', 'UNKNOWN')
logger.debug('Agency: %s', agency)
agency = config.get("agency", "UNKNOWN")
logger.debug("Agency: %s", agency)

method = config.get('method', 'other')
logger.debug('Inventory Method: %s', method)
method = config.get("method", "other")
logger.debug("Inventory Method: %s", method)

compute_labor_hours = config.get('compute_labor_hours', True)
compute_labor_hours = config.get("compute_labor_hours", True)

if config.get('contact_email', None) is None:
if config.get("contact_email", None) is None:
# A default contact email is required to handle the (frequent) case
# where a project / repository has no available contact email.
logger.warning('Config file should contain a "contact_email"')

logger.debug('Creating inventory from config: %s', config)
logger.debug("Creating inventory from config: %s", config)
code_gov_metadata = Metadata(agency, method)

# Parse config for GitHub repositories
github_instances = config.get('GitHub', [])
if config.get('github_gov_orgs', False):
github_instances.append({
'url': 'https://github.com',
'orgs': gov_orgs(),
})
github_instances = config.get("GitHub", [])
if config.get("github_gov_orgs", False):
github_instances.append({"url": "https://github.com", "orgs": gov_orgs()})
for instance in github_instances:
url = instance.get('url', 'https://github.com')
orgs = instance.get('orgs', [])
repos = instance.get('repos', [])
public_only = instance.get('public_only', True)
excluded = instance.get('exclude', [])
token = instance.get('token', None)
url = instance.get("url", "https://github.com")
orgs = instance.get("orgs", [])
repos = instance.get("repos", [])
public_only = instance.get("public_only", True)
excluded = instance.get("exclude", [])
token = instance.get("token", None)

gh_session = github.connect(url, token)

for repo in github.query_repos(gh_session, orgs, repos, public_only):
if repo.owner.login in excluded or repo.full_name in excluded:
logger.info('Excluding: %s', repo.full_name)
logger.info("Excluding: %s", repo.full_name)
continue

code_gov_project = Project.from_github3(repo, labor_hours=compute_labor_hours)
code_gov_metadata['releases'].append(code_gov_project)
code_gov_project = Project.from_github3(
repo, labor_hours=compute_labor_hours
)
code_gov_metadata["releases"].append(code_gov_project)

# Parse config for GitLab repositories
gitlab_instances = config.get('GitLab', [])
gitlab_instances = config.get("GitLab", [])
for instance in gitlab_instances:
url = instance.get('url')
url = instance.get("url")
# orgs = instance.get('orgs', [])
repos = instance.get('repos', [])
repos = instance.get("repos", [])
# public_only = instance.get('public_only', True)
excluded = instance.get('exclude', [])
token = instance.get('token', None)
fetch_languages = instance.get('fetch_languages', False)
excluded = instance.get("exclude", [])
token = instance.get("token", None)
fetch_languages = instance.get("fetch_languages", False)

gl_session = gitlab.connect(url, token)

for repo in gitlab.query_repos(gl_session, repos):
namespace = repo.namespace['path']
namespace = repo.namespace["path"]
path_with_namespace = repo.path_with_namespace
if namespace in excluded or path_with_namespace in excluded:
logger.info('Excluding: %s', repo.path_with_namespace)
logger.info("Excluding: %s", repo.path_with_namespace)
continue

code_gov_project = Project.from_gitlab(repo, labor_hours=compute_labor_hours, fetch_languages=fetch_languages)
code_gov_metadata['releases'].append(code_gov_project)
code_gov_project = Project.from_gitlab(
repo, labor_hours=compute_labor_hours, fetch_languages=fetch_languages
)
code_gov_metadata["releases"].append(code_gov_project)

# Parse config for Bitbucket repositories
bitbucket_instances = config.get('Bitbucket', [])
bitbucket_instances = config.get("Bitbucket", [])
for instance in bitbucket_instances:
url = instance.get('url')
url = instance.get("url")
# orgs = instance.get('orgs', None)
# public_only = instance.get('public_only', True)
# token = instance.get('token', None)
username = instance.get('username')
password = instance.get('password')
excluded = instance.get('exclude', [])
username = instance.get("username")
password = instance.get("password")
excluded = instance.get("exclude", [])

bb_session = bitbucket.connect(url, username, password)

for repo in bitbucket.all_repos(bb_session):
project = repo['project']['key']
project_repo = '%s/%s' % (project, repo['slug'])
project = repo["project"]["key"]
project_repo = "%s/%s" % (project, repo["slug"])
if project in excluded or project_repo in excluded:
logger.info('Excluding: %s', project_repo)
logger.info("Excluding: %s", project_repo)
continue

code_gov_project = Project.from_stashy(repo, labor_hours=compute_labor_hours)
code_gov_metadata['releases'].append(code_gov_project)
code_gov_project = Project.from_stashy(
repo, labor_hours=compute_labor_hours
)
code_gov_metadata["releases"].append(code_gov_project)

# Parse config for TFS repositories
tfs_instances = config.get('TFS', [])
tfs_instances = config.get("TFS", [])
for instance in tfs_instances:
url = instance.get('url')
token = instance.get('token', None)
url = instance.get("url")
token = instance.get("token", None)

projects = tfs.get_projects_metadata(url, token)
for project in projects:
code_gov_project = Project.from_tfs(project, labor_hours=compute_labor_hours)
code_gov_metadata['releases'].append(code_gov_project)
code_gov_project = Project.from_tfs(
project, labor_hours=compute_labor_hours
)
code_gov_metadata["releases"].append(code_gov_project)

# Handle parsing of DOE CODE records

doecode_config = config.get('DOE CODE', {})
doecode_json = doecode_config.get('json', None)
doecode_url = doecode_config.get('url', None)
doecode_key = doecode_config.get('api_key', None)
doecode_config = config.get("DOE CODE", {})
doecode_json = doecode_config.get("json", None)
doecode_url = doecode_config.get("url", None)
doecode_key = doecode_config.get("api_key", None)

for record in doecode.process(doecode_json, doecode_url, doecode_key):
code_gov_project = Project.from_doecode(record)
code_gov_metadata['releases'].append(code_gov_project)
code_gov_metadata["releases"].append(code_gov_project)

return code_gov_metadata

Expand All @@ -134,40 +139,40 @@ def force_attributes(metadata, config):
Forces certain fields in the Code.gov Metadata json
"""

organization = config.get('organization', '')
logger.debug('Organization: %s', organization)
organization = config.get("organization", "")
logger.debug("Organization: %s", organization)

contact_email = config.get('contact_email')
logger.debug('Contact Email: %s', contact_email)
contact_email = config.get("contact_email")
logger.debug("Contact Email: %s", contact_email)

permissions = config.get('permissions', {})
default_usage = permissions.get('usageType', '')
default_exemption_text = permissions.get('exemptionText', '')
logger.debug('Default usageType: %s', default_usage)
logger.debug('Default exemptionText: %s', default_exemption_text)
permissions = config.get("permissions", {})
default_usage = permissions.get("usageType", "")
default_exemption_text = permissions.get("exemptionText", "")
logger.debug("Default usageType: %s", default_usage)
logger.debug("Default exemptionText: %s", default_exemption_text)

# Force certain fields
if organization:
logger.debug('Forcing Organization to: %s', organization)
logger.debug("Forcing Organization to: %s", organization)

if contact_email:
logger.debug('Forcing Contact Email to: %s', contact_email)
logger.debug("Forcing Contact Email to: %s", contact_email)

for release in metadata['releases']:
for release in metadata["releases"]:
if organization:
release['organization'] = organization
release["organization"] = organization

if contact_email:
release['contact']['email'] = contact_email
release["contact"]["email"] = contact_email

if 'licenses' not in release['permissions']:
release['permissions']['licenses'] = None
if "licenses" not in release["permissions"]:
release["permissions"]["licenses"] = None

if 'description' not in release:
release['description'] = 'No description available...'
if "description" not in release:
release["description"] = "No description available..."

if 'usageType' not in release['permissions']:
release['permissions']['usageType'] = default_usage
release['permissions']['exemptionText'] = default_exemption_text
if "usageType" not in release["permissions"]:
release["permissions"]["usageType"] = default_usage
release["permissions"]["exemptionText"] = default_exemption_text

return metadata
Loading

0 comments on commit 84545d6

Please sign in to comment.