Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement new output format #77

Merged
merged 9 commits into from
Oct 25, 2021
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions benchmarks/runners/sandbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ def generate_operators():

def doit(self):
theory_updates = {
**nnpdf_base_theory,
"PTO": 2,
**ffns3,
"PTO": 0,
# "ModEv": "EXA",
# "XIR": 0.5,
# "fact_to_ren_scale_ratio": 2.0,
Expand Down
142 changes: 114 additions & 28 deletions src/eko/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@
"""
This file contains the output management
"""
import io
import logging
import pathlib
import shutil
import tarfile
import tempfile
import warnings

import lz4.frame
Expand Down Expand Up @@ -268,7 +273,7 @@ def to_evol(self, source=True, target=False):
if target:
self["targetpids"] = br.evol_basis_pids

def get_raw(self, binarize=True):
def get_raw(self, binarize=True, skip_q2_grid=False):
"""
Serialize result as dict/YAML.

Expand All @@ -295,26 +300,30 @@ def get_raw(self, binarize=True):
"q2_ref",
]:
out[f] = self[f]
# list() work both for np.array and list
out["inputpids"] = list(self["inputpids"])
out["targetpids"] = list(self["targetpids"])
# make raw lists
# TODO: is interpolation_xgrid really needed in the output?
for k in ["interpolation_xgrid", "targetgrid", "inputgrid"]:
out[k] = self[k].tolist()
# make operators raw
for q2, op in self["Q2grid"].items():
out["Q2grid"][q2] = dict()
for k, v in op.items():
if k == "alphas":
out["Q2grid"][q2][k] = float(v)
continue
if binarize:
out["Q2grid"][q2][k] = lz4.frame.compress(v.tobytes())
else:
out["Q2grid"][q2][k] = v.tolist()
if not skip_q2_grid:
for q2, op in self["Q2grid"].items():
out["Q2grid"][q2] = dict()
for k, v in op.items():
if k == "alphas":
out["Q2grid"][q2][k] = float(v)
continue
if binarize:
out["Q2grid"][q2][k] = lz4.frame.compress(v.tobytes())
else:
out["Q2grid"][q2][k] = v.tolist()
else:
out["Q2grid"] = self["Q2grid"]
return out

def dump_yaml(self, stream=None, binarize=True):
def dump_yaml(self, stream=None, binarize=True, skip_q2_grid=False):
"""
Serialize result as YAML.

Expand All @@ -332,10 +341,10 @@ def dump_yaml(self, stream=None, binarize=True):
Null, if written sucessfully to stream
"""
# TODO explicitly silence yaml
out = self.get_raw(binarize)
out = self.get_raw(binarize, skip_q2_grid=skip_q2_grid)
return yaml.dump(out, stream)

def dump_yaml_to_file(self, filename, binarize=True):
def dump_yaml_to_file(self, filename, binarize=True, skip_q2_grid=False):
"""
Writes YAML representation to a file.

Expand All @@ -352,11 +361,50 @@ def dump_yaml_to_file(self, filename, binarize=True):
result of dump(output, stream), i.e. Null if written sucessfully
"""
with open(filename, "w") as f:
ret = self.dump_yaml(f, binarize)
ret = self.dump_yaml(f, binarize, skip_q2_grid=skip_q2_grid)
return ret

def dump_tar(self, tarname):
"""
Writes representation into a tar archive containing:

- metadata (in YAML)
- operator (in numpy ``.npy`` format)

Parameters
----------
tarname : str
target file name
"""
tarpath = pathlib.Path(tarname)
if tarpath.suffix != ".tar":
raise ValueError(f"'{tarname}' is not a valid tar filename, wrong suffix")

with tempfile.TemporaryDirectory() as tmpdir:
tmpdir = pathlib.Path(tmpdir)

cls = self.__class__
metadata = cls(**{str(k): v for k, v in self.items() if k != "Q2grid"})
metadata["Q2grid"] = list(self["Q2grid"].keys())

yamlname = tmpdir / "metadata.yaml"
metadata.dump_yaml_to_file(yamlname, skip_q2_grid=True)

for kind in next(iter(self["Q2grid"].values())).keys():
operator = np.stack([q2[kind] for q2 in self["Q2grid"].values()])
stream = io.BytesIO()
np.save(stream, operator)
stream.seek(0)
with lz4.frame.open(
(tmpdir / kind).with_suffix(".npy.lz4"), "wb"
) as fo:
fo.write(stream.read())

with tarfile.open(tarpath, "w") as tar:
tar.add(tmpdir, arcname=tarpath.stem)

@classmethod
def load_yaml(cls, stream):
def load_yaml(cls, stream, skip_q2_grid=False):
"""
Load YAML representation from stream

Expand All @@ -379,20 +427,21 @@ def load_yaml(cls, stream):
for k in ["interpolation_xgrid", "inputgrid", "targetgrid"]:
obj[k] = np.array(obj[k])
# make operators numpy
for op in obj["Q2grid"].values():
for k, v in op.items():
if k == "alphas":
v = float(v)
elif isinstance(v, list):
v = np.array(v)
elif isinstance(v, bytes):
v = np.frombuffer(lz4.frame.decompress(v))
v = v.reshape(len_tpids, len_tgrid, len_ipids, len_igrid)
op[k] = v
if not skip_q2_grid:
for op in obj["Q2grid"].values():
for k, v in op.items():
if k == "alphas":
v = float(v)
elif isinstance(v, list):
v = np.array(v)
elif isinstance(v, bytes):
v = np.frombuffer(lz4.frame.decompress(v))
v = v.reshape(len_tpids, len_tgrid, len_ipids, len_igrid)
op[k] = v
return cls(obj)

@classmethod
def load_yaml_from_file(cls, filename):
def load_yaml_from_file(cls, filename, skip_q2_grid=False):
"""
Load YAML representation from file

Expand All @@ -408,5 +457,42 @@ def load_yaml_from_file(cls, filename):
"""
obj = None
with open(filename) as o:
obj = Output.load_yaml(o)
obj = Output.load_yaml(o, skip_q2_grid)
return obj

@classmethod
def load_tar(cls, tarname):
""" """

tarpath = pathlib.Path(tarname)
if tarpath.suffix != ".tar":
raise ValueError(f"'{tarname}' is not a valid tar filename, wrong suffix")

with tempfile.TemporaryDirectory() as tmpdir:
tmpdir = pathlib.Path(tmpdir)

with tarfile.open(tarpath, "r") as tar:
tar.extractall(tmpdir)

# metadata = cls(**{str(k): v for k, v in self.items() if k != "Q2grid"})
# metadata["Q2grid"] = list(self["Q2grid"].keys())

yamlname = tmpdir / tarpath.stem / "metadata.yaml"
metadata = cls.load_yaml_from_file(yamlname, skip_q2_grid=True)

grids = {}
for fp in (tmpdir / tarpath.stem).glob("*.npy.lz4"):
with lz4.frame.open(fp, "rb") as fd:
stream = io.BytesIO(fd.read())
stream.seek(0)
grids[pathlib.Path(fp.stem).stem] = np.load(stream)

fp.unlink()

q2grid = metadata["Q2grid"]
operator_grid = {}
for q2, slices in zip(q2grid, zip(*grids.values())):
operator_grid[q2] = dict(zip(grids.keys(), slices))
metadata["Q2grid"] = operator_grid

return metadata