Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validation of output tarballs #189

Merged
merged 13 commits into from
Dec 13, 2021
1 change: 0 additions & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,3 @@ m2r
sphinx_rtd_theme
docutils<0.18
mistune<2.0.0

2 changes: 1 addition & 1 deletion examples/Getting_started.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@
"outputs": [],
"source": [
"outdir = \"example_output\"\n",
"submission.create_files(outdir)"
"submission.create_files(outdir,remove_old=True)"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion examples/combine_limits.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@
"table.add_variable(obs)\n",
"table.add_variable(exp)\n",
"submission.add_table(table)\n",
"submission.create_files(\"example_output\")"
"submission.create_files(\"example_output\",remove_old=True)"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion examples/correlation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@
"# Create the submission object and write output\n",
"sub = Submission()\n",
"sub.add_table(table)\n",
"sub.create_files(\"./output/\")\n",
"sub.create_files(\"./output/\",remove_old=True)\n",
"\n",
"!ls -l submission.tar.gz"
]
Expand Down
2 changes: 1 addition & 1 deletion examples/read_c_file.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@
"table.add_variable(obs)\n",
"table.add_variable(exp)\n",
"submission.add_table(table)\n",
"submission.create_files(\"example_output\")"
"submission.create_files(\"example_output\",remove_old=True)"
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions examples/reading_histograms.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@
"\n",
"submission.add_table(table)\n",
"\n",
"submission.create_files(\"example_output\")"
"submission.create_files(\"example_output\",remove_old=True)"
]
},
{
Expand Down Expand Up @@ -311,7 +311,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
"version": "3.8.10"
}
},
"nbformat": 4,
Expand Down
22 changes: 19 additions & 3 deletions hepdata_lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@
import numpy as np
import yaml
from future.utils import raise_from

# try to use LibYAML bindings if possible
try:
from yaml import CLoader as Loader, CSafeDumper as Dumper
except ImportError:
from yaml import Loader, SafeDumper as Dumper
from yaml.representer import SafeRepresenter

from hepdata_validator.full_submission_validator import FullSubmissionValidator
from hepdata_lib import helpers
from hepdata_lib.root_utils import RootFileReader

Expand Down Expand Up @@ -538,13 +538,21 @@ def files_to_copy_nested(self):
files = files + table.files_to_copy
return files

def create_files(self, outdir="."):
def create_files(self, outdir=".", validate=True, remove_old=True):
"""
Create the output files.

Implicitly triggers file creation for all tables that have been added to the submission,
all variables associated to the tables and all uncertainties associated to the variables.

If `validate` is True, the hepdata-validator package will be used to validate the
output tar ball.

If `remove_old` is True, the output directory will be deleted before recreation.
"""
if remove_old and os.path.exists(outdir):
shutil.rmtree(outdir)

if not os.path.exists(outdir):
os.makedirs(outdir)

Expand Down Expand Up @@ -580,13 +588,21 @@ def create_files(self, outdir="."):
files_to_add.extend(
[os.path.join(outdir, os.path.basename(x)) for x in self.files_to_copy_nested()]
)
with tarfile.open("submission.tar.gz", "w:gz") as tar:
tarfile_path = "submission.tar.gz"
with tarfile.open(tarfile_path, "w:gz") as tar:
for filepath in files_to_add:
tar.add(
filepath,
arcname=os.path.basename(filepath)
)

if validate:
full_submission_validator = FullSubmissionValidator()
is_archive_valid = full_submission_validator.validate(archive=tarfile_path)
if not is_archive_valid:
for filename in full_submission_validator.get_messages():
full_submission_validator.print_errors(filename)
assert is_archive_valid, "The tar ball is not valid"

class Uncertainty(object):
"""
Expand Down
5 changes: 5 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
numpy
PyYAML>4.*
future
six
hepdata-validator>=0.3.2
6 changes: 2 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
except ImportError:
print("ROOT is required by this library.")

DEPS = ['numpy', 'PyYAML>4.*', 'future', 'six']
with open("requirements.txt","r") as f:
DEPS = f.readlines()

HERE = path.abspath(path.dirname(__file__))

Expand All @@ -27,8 +28,6 @@
classifiers=[
'Development Status :: 4 - Beta',
'License :: OSI Approved :: MIT License',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
Expand All @@ -39,7 +38,6 @@
install_requires=DEPS,
setup_requires=['pytest-runner', 'pytest-cov'],
tests_require=['pytest', 'papermill', 'six',
'pylint==1.9.5; python_version<"3"',
'pylint==2.9.6; python_version>="3"',
],
project_urls={
Expand Down
14 changes: 8 additions & 6 deletions tests/test_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,20 @@
# -*- coding:utf-8 -*-
"""Test Output."""
from __future__ import print_function
import os
from collections import defaultdict
from unittest import TestCase
import shutil
import os
import yaml
from test_utilities import tmp_directory_name
from hepdata_lib import Submission, Table, Variable

class TestOutput(TestCase):
"""Test output"""

def test_yaml_output(self):
"""Test yaml dump"""
tmp_dir = tmp_directory_name()

# Create test dictionary
testlist = [("x", 1.2), ("x", 2.2), ("y", 0.12), ("y", 0.22)]
Expand All @@ -23,7 +25,6 @@ def test_yaml_output(self):

# Create test submission
test_submission = Submission()
testdir = "./output"
test_table = Table("TestTable")
x_variable = Variable("X", is_independent=True, is_binned=False)
x_variable.values = testdict['x']
Expand All @@ -32,11 +33,12 @@ def test_yaml_output(self):
test_table.add_variable(x_variable)
test_table.add_variable(y_variable)
test_submission.add_table(test_table)
test_submission.create_files(testdir)
test_submission.create_files(tmp_dir)

# Test read yaml file
table_file = os.path.join(tmp_dir, "testtable.yaml")
try:
with open("output/testtable.yaml", 'r') as testfile:
with open(table_file, 'r') as testfile:
testyaml = yaml.safe_load(testfile)
except yaml.YAMLError as exc:
print(exc)
Expand All @@ -45,10 +47,10 @@ def test_yaml_output(self):
testtxt = ("dependent_variables:\n- header:\n name: Y\n values:\n" +
" - value: 0.12\n - value: 0.22\nindependent_variables:\n" +
"- header:\n name: X\n values:\n - value: 1.2\n - value: 2.2\n")
with open("output/testtable.yaml", 'r') as testfile:
with open(table_file, 'r') as testfile:
testyaml = testfile.read()

self.assertEqual(str(testyaml), testtxt)
self.addCleanup(os.remove, "submission.tar.gz")
self.addCleanup(shutil.rmtree, testdir)
self.addCleanup(shutil.rmtree, tmp_dir)
self.doCleanups()
39 changes: 35 additions & 4 deletions tests/test_submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from builtins import bytes
from unittest import TestCase
import tarfile
from test_utilities import tmp_directory_name
from hepdata_lib import Submission, Table, Variable, Uncertainty

class TestSubmission(TestCase):
Expand Down Expand Up @@ -71,15 +72,45 @@ def test_additional_resource_size(self):
def test_create_files(self):
"""Test create_files() for Submission."""

testdir = "test_output"
testdir = tmp_directory_name()
test_submission = Submission()
self.addCleanup(os.remove, "submission.tar.gz")
self.addCleanup(shutil.rmtree, testdir)

tab = Table("test")
test_submission.add_table(tab)
test_submission.create_files(testdir)

self.doCleanups()

def test_create_files_with_removal(self):
"""Test the removal of old files in create_files()"""
testdir = tmp_directory_name()

# Step 1: Create test directory containing random file
os.makedirs(testdir)
self.addCleanup(shutil.rmtree, testdir)
testfile = os.path.join(testdir, "test.txt")
with open(testfile, "w") as f:
f.write("test")
self.assertTrue(os.path.isfile(testfile))

# Step 2: Create submission and write output to test directory
# Without overwriting of files
test_submission = Submission()
tab = Table("test")
test_submission.add_table(tab)
test_submission.create_files(testdir, remove_old=False)

# Test file should still exist
self.assertTrue(os.path.isfile(testfile))

# Step 3: Recreate submission files with removal
test_submission.create_files(testdir, remove_old=True)

# Test file should no longer exist
self.assertFalse(os.path.isfile(testfile))




def test_read_abstract(self):
"""Test read_abstract function."""
some_string = string.ascii_lowercase
Expand Down
7 changes: 4 additions & 3 deletions tests/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import shutil
from unittest import TestCase

from test_utilities import tmp_directory_name
from hepdata_lib import Table, Variable, Uncertainty

class TestTable(TestCase):
Expand Down Expand Up @@ -61,7 +62,7 @@ def test_write_yaml(self):
test_table = Table("Some Table")
test_variable = Variable("Some Variable")
test_table.add_variable(test_variable)
testdir = "test_output"
testdir = tmp_directory_name()
self.addCleanup(shutil.rmtree, testdir)
try:
test_table.write_yaml(testdir)
Expand Down Expand Up @@ -110,7 +111,7 @@ def test_write_images(self):

# This should work fine
test_table.add_image(some_pdf)
testdir = "test_output"
testdir = tmp_directory_name()
self.addCleanup(shutil.rmtree, testdir)
try:
test_table.write_images(testdir)
Expand Down Expand Up @@ -182,7 +183,7 @@ def test_copy_files(self):
"""Test the copy_files function."""
test_table = Table("Some Table")
some_pdf = "%s/minimal.pdf" % os.path.dirname(__file__)
testdir = "test_output"
testdir = tmp_directory_name()
self.addCleanup(shutil.rmtree, testdir)
os.makedirs(testdir)

Expand Down
11 changes: 11 additions & 0 deletions tests/test_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,17 @@ def get_random_id(length=12):
return "".join(random.sample(string.ascii_uppercase+string.digits, length))


def tmp_directory_name():
"""
Generate a random directory name for testing.

Guaranteed to not exist.
"""
tmp_name = "/tmp/hepdata_lib_test_" + get_random_id()
if os.path.exists(tmp_name):
return tmp_directory_name()
return tmp_name

def remove_if_exist(path_to_file):
"""Remove file if it exists."""
if os.path.exists(path_to_file):
Expand Down