Skip to content

Commit

Permalink
Add biom validator
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisvanrun committed Nov 22, 2024
1 parent c7b4245 commit c559999
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 2 deletions.
3 changes: 3 additions & 0 deletions app/grandchallenge/components/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
validate_docker_image,
)
from grandchallenge.components.validators import (
validate_biom_format,
validate_newick_tree_format,
validate_no_slash_at_ends,
validate_safe_path,
Expand Down Expand Up @@ -1490,6 +1491,8 @@ def validate_user_upload(self, user_upload):
self.interface.validate_against_schema(value=value)
elif self.interface.kind == InterfaceKindChoices.NEWICK:
validate_newick_tree_format(tree=user_upload.read_object())
elif self.interface.kind == InterfaceKindChoices.BIOM:
validate_biom_format(user_upload=user_upload)

self._user_upload_validated = True

Expand Down
45 changes: 43 additions & 2 deletions app/grandchallenge/components/validators.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
from functools import wraps
from io import BytesIO

import biom
import h5py
from billiard.exceptions import SoftTimeLimitExceeded, TimeLimitExceeded
from Bio.Phylo import NewickIO
from django.core.exceptions import SuspiciousFileOperation, ValidationError
Expand All @@ -24,10 +29,22 @@ def validate_no_slash_at_ends(value):
raise ValidationError("Path must not begin or end with '/'")


def _handle_validation_resource_errors(func):
@wraps(func)
def wrapper(*args, **kwargs):
try:
func(*args, **kwargs)
except (MemoryError, SoftTimeLimitExceeded, TimeLimitExceeded):
raise ValidationError("The file is too large")

return wrapper


def _newick_parser(tree):
return NewickIO.Parser.from_string(tree)


@_handle_validation_resource_errors
def validate_newick_tree_format(tree):
"""Validates a Newick tree by passing it through a parser"""
parser = _newick_parser(tree)
Expand All @@ -37,10 +54,34 @@ def validate_newick_tree_format(tree):
try:
for _ in parser.parse():
has_tree = True
except (MemoryError, SoftTimeLimitExceeded, TimeLimitExceeded):
raise ValidationError("The file is too large")
except NewickIO.NewickError as e:
raise ValidationError(f"Invalid Newick tree format: {e}")

if not has_tree:
raise ValidationError("No Newick tree found")


@_handle_validation_resource_errors
def validate_biom_format(*, user_upload):
"""Validates an uploaded BIOM file by passing it's content through a parser"""

with BytesIO() as fileobj:
# Get the object into memory
user_upload.download_fileobj(fileobj)
fileobj.seek(0)

# Attempt to wrap it in a hdf5 handler
try:
hdf5_file = h5py.File(fileobj, "r")
except OSError:
raise ValidationError(
"Only BIOM in valid HDF5 binary file format are supported"
)

# Attempt to parse it as a BIOM table
try:
_handle_validation_resource_errors(biom.Table.from_hdf5)(hdf5_file)
except ValidationError as e:
raise e
except Exception:
raise ValidationError("Does not appear to be a BIOM-format file")

0 comments on commit c559999

Please sign in to comment.