Skip to content

Commit

Permalink
skip .git and refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
Remi-Gau committed Jul 9, 2024
1 parent 218a233 commit 7d8b303
Showing 1 changed file with 90 additions and 50 deletions.
140 changes: 90 additions & 50 deletions reproschema/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,75 +5,102 @@
from .jsonldutils import load_file, validate_data
from .utils import lgr, start_server, stop_server

DIR_TO_SKIP = [".git", "__pycache__", "env", "venv"]
FILES_TO_SKIP = [".DS_Store", ".gitignore", ".flake8", ".autorc", "LICENSE"]
SUPPORTED_EXTENSIONS = [
".jsonld",
"json",
"js",
"",
]

def validate_dir(directory, started=False, http_kwargs={}):

def validate_dir(
directory: str,
started: bool = False,
http_kwargs: None | dict[str, int] = None,
stop=None,
):
"""Validate a directory containing JSONLD documents against the ReproSchema pydantic model.
Recursively goes through the directory tree and validates files with the allowed extensions.
Parameters
----------
directory: str
Path to directory to walk for validation
started : bool
Whether an http server exists or not
http_kwargs : dict
http_kwargs : dict or None
Keyword arguments for the http server. Valid keywords are: port, path
and tmpdir
stop: None or function
Function to use to stop the HTTP server
Returns
-------
conforms: bool
Whether the document is conformant with the shape. Raises an exception
if any document is non-conformant.
"""
if http_kwargs is None:
http_kwargs = {}

if not os.path.isdir(directory):
if stop is not None:
stop_server(stop)
raise Exception(f"{directory} is not a directory")
print(f"Validating directory {directory}")
stop = None
if not started:
stop, port = start_server(**http_kwargs)
http_kwargs["port"] = port
else:
if "port" not in http_kwargs:
raise KeyError("HTTP server started, but port key is missing")

for root, _, files in os.walk(directory):
for name in files:
full_file_name = os.path.join(root, name)

if Path(full_file_name).suffix not in [
".jsonld",
"json",
"js",
"",
]:
lgr.info(f"Skipping file {full_file_name}")
continue

lgr.debug(f"Validating file {full_file_name}")
try:
data = load_file(
full_file_name, started=True, http_kwargs=http_kwargs
)
if len(data) == 0:
raise ValueError("Empty data graph")
print(f"Validating {full_file_name}")
conforms, vtext = validate_data(data)
except (ValueError, json.JSONDecodeError):

if Path(directory).name in DIR_TO_SKIP:
lgr.info(f"Skipping directory {directory}")
return True

lgr.debug(f"Validating directory {directory}")

files_to_validate = [
str(x)
for x in Path(directory).iterdir()
if x.is_file()
and x.name not in FILES_TO_SKIP
and x.suffix in SUPPORTED_EXTENSIONS
]

for name in files_to_validate:
lgr.debug(f"Validating file {name}")

try:
data = load_file(name, started=started, http_kwargs=http_kwargs)
if len(data) == 0:
if stop is not None:
stop_server(stop)
raise
else:
if not conforms:
lgr.critical(
f"File {full_file_name} has validation errors."
)
if stop is not None:
stop_server(stop)
raise ValueError(vtext)
if not started:
stop_server(stop)
return True
raise ValueError(f"Empty data graph in file {name}")
conforms, vtext = validate_data(data)
except (ValueError, json.JSONDecodeError):
if stop is not None:
stop_server(stop)
raise
else:
if not conforms:
lgr.critical(f"File {name} has validation errors.")
stop_server(stop)
raise ValueError(vtext)

dirs_to_validate = [
str(x)
for x in Path(directory).iterdir()
if x.is_dir() and x.name not in DIR_TO_SKIP
]

for dir in dirs_to_validate:
conforms, stop = validate_dir(
dir, started=started, http_kwargs=http_kwargs, stop=stop
)

return True, stop


def validate(path):
Expand All @@ -92,16 +119,29 @@ def validate(path):
"""
if os.path.isdir(path):
conforms = validate_dir(path)

stop, port = start_server()
http_kwargs = {"port": port}
started = True

conforms, _ = validate_dir(
path, started=started, http_kwargs=http_kwargs, stop=stop
)

stop_server(stop)

else:
# Skip validation for .DS_Store files
if Path(path).name == ".DS_Store":
lgr.info(f"{path} is a .DS_Store file and is skipped.")

if Path(path).name in FILES_TO_SKIP:
lgr.info(f"Skipping file {path}")
return True

data = load_file(path, started=False)
conforms, vtext = validate_data(data)
if not conforms:
lgr.critical(f"File {path} has validation errors.")
raise ValueError(vtext)

lgr.info(f"{path} conforms.")

return conforms

0 comments on commit 7d8b303

Please sign in to comment.