From ff99a1d79b73ce40e1d0885fd55acf105caa5750 Mon Sep 17 00:00:00 2001 From: sfisher Date: Mon, 20 Sep 2021 13:41:41 +0200 Subject: [PATCH 01/18] a basic datacollection simulator --- conf/simulate_example.yml | 69 +++++ setup.cfg | 1 + src/ispyb/cli/simulate.py | 73 +++++ src/ispyb/simulation/__init__.py | 0 src/ispyb/simulation/base.py | 100 +++++++ src/ispyb/simulation/datacollection.py | 365 +++++++++++++++++++++++++ src/ispyb/simulation/sqla_helpers.py | 13 + 7 files changed, 621 insertions(+) create mode 100644 conf/simulate_example.yml create mode 100644 src/ispyb/cli/simulate.py create mode 100644 src/ispyb/simulation/__init__.py create mode 100644 src/ispyb/simulation/base.py create mode 100644 src/ispyb/simulation/datacollection.py create mode 100644 src/ispyb/simulation/sqla_helpers.py diff --git a/conf/simulate_example.yml b/conf/simulate_example.yml new file mode 100644 index 00000000..0a9194a4 --- /dev/null +++ b/conf/simulate_example.yml @@ -0,0 +1,69 @@ +# Whether to link or copy data +copy_method: copy + +# Map each beamline to a session +sessions: + bl: blc00001-1 + +# Where to copy raw data from +raw_data: /data/ispyb-test + +# Where to write simulated data to, can use {beamline} placeholders +data_dir: /data/tests/{beamline}/simulation + +ispyb_url: https://ispyb.diamond.ac.uk + +# Proteins +components: + Component1: + acronym: Component1 + sequence: SiSP + + Component2: + acronym: Component2 + +# Samples +samples: + Sample1: + name: Sample1 + component: Component1 + + Sample2: + name: Sample2 + component: Component2 + +# Experiments +experiments: + Energy scan: + - data: energy_scan/energyscan1.h5 + sample: Sample2 + xtalsnapshotfullpath1: energy_scan/snapshot.png + numberofimages: 4001 + exposuretime: 1 + #energy: 8.8143 + wavelength: 1.4065 + imagecontainersubpath: 1.1/measurement + + XRF map: + - data: xrf_map/xrfmap1.h5 + #energy: 2.4817 + wavelength: 4.9959 + sample: Sample1 + xtalsnapshotfullpath1: xrf_map/snapshot.png + exposuretime: 0.03 + numberofimages: 1600 + grid: + steps_x: 40 + steps_y: 40 + dx_mm: 0.001 + dy_mm: 0.001 + pixelspermicronx: -0.44994 + pixelspermicrony: -0.46537 + snapshot_offsetxpixel: 682.16 + snapshot_offsetypixel: 554 + subsample: + x: 9038007 + y: 24467003 + x2: 9078007 + y2: 24507003 + type: roi diff --git a/setup.cfg b/setup.cfg index e9686db8..5ceef8c0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -47,6 +47,7 @@ scripts = console_scripts = ispyb.job = ispyb.cli.job:main ispyb.last_data_collections_on = ispyb.cli.last_data_collections_on:main + ispyb.simulate = ispyb.cli.simulate:run libtbx.dispatcher.script = ispyb.job = ispyb.job ispyb.last_data_collections_on = ispyb.last_data_collections_on diff --git a/src/ispyb/cli/simulate.py b/src/ispyb/cli/simulate.py new file mode 100644 index 00000000..dcbe0546 --- /dev/null +++ b/src/ispyb/cli/simulate.py @@ -0,0 +1,73 @@ +import argparse +import logging + +from ispyb.simulation.datacollection import SimulateDataCollection + +try: + import zocalo + import zocalo.configuration +except ModuleNotFoundError: + zocalo = None + +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + + +def run(): + try: + sdc = SimulateDataCollection() + except AttributeError as e: + print(f"Simluation Error: {str(e)}") + exit() + + parser = argparse.ArgumentParser(description="ISPyB/Zocalo simulation service") + parser.add_argument( + "beamline", + help=f"Beamline to run simulation against. Available beamlines: {sdc.beamlines}", + ) + + parser.add_argument( + "experiment", + help=f"Experiment type to simluate, Available types: {sdc.experiment_types}", + ) + + parser.add_argument( + "--number", + default=0, + type=int, + dest="experiment_number", + help="Experiment simulation number to run", + ) + parser.add_argument( + "--delay", + default=5, + type=int, + dest="delay", + help="Delay between mimas start and end events", + ) + parser.add_argument( + "--debug", + action="store_true", + help="Enable debug output", + ) + + if zocalo: + zc = zocalo.configuration.from_file() + zc.activate() + zc.add_command_line_options(parser) + + args = parser.parse_args() + + root = logging.getLogger() + root.setLevel(level=logging.DEBUG if args.debug else logging.INFO) + + try: + sdc.do_run( + args.beamline, args.experiment, args.experiment_number, delay=args.delay + ) + except Exception as e: + if args.debug: + logger.exception("Simluation Error") + print(e) + else: + print(f"Simluation Error: {str(e)}") diff --git a/src/ispyb/simulation/__init__.py b/src/ispyb/simulation/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/ispyb/simulation/base.py b/src/ispyb/simulation/base.py new file mode 100644 index 00000000..da3c0d75 --- /dev/null +++ b/src/ispyb/simulation/base.py @@ -0,0 +1,100 @@ +import configparser +import os +from abc import ABC, abstractmethod +import logging + +import sqlalchemy +from sqlalchemy.orm import sessionmaker +import ispyb.sqlalchemy as isa +import yaml + +from workflows.transport.stomp_transport import StompTransport + +try: + import zocalo + import zocalo.configuration +except ModuleNotFoundError: + zocalo = None + +logger = logging.getLogger(__name__) + + +def load_config(): + try: + config_yml = os.environ["ISPYB_SIMULATE_CONFIG"] + except KeyError: + raise AttributeError( + "ISPYB_SIMULATE_CONFIG environment variable is not defined" + ) + + if not os.path.exists(config_yml): + raise AttributeError(f"Cannot find config file: {config_yml}") + + config = {} + with open(config_yml, "r") as stream: + config = yaml.safe_load(stream) + + return config + + +class Simulation(ABC): + def __init__(self): + self._config = load_config() + + if zocalo: + zc = zocalo.configuration.from_file() + zc.activate() + self.stomp = StompTransport() + + @property + def config(self): + return self._config + + @property + def session(self): + config = configparser.RawConfigParser(allow_no_value=True) + config.read(os.environ["ISPYB_CREDENTIALS"]) + url = isa.url(credentials=dict(config.items("ispyb_sqlalchemy"))) + return sessionmaker( + bind=sqlalchemy.create_engine(url, connect_args={"use_pure": True}) + ) + + @property + def beamlines(self): + return ", ".join(self.config["sessions"].keys()) + + @property + def experiment_types(self): + return ", ".join(self.config["experiments"].keys()) + + def send_message(self, message, headers={}): + if zocalo: + try: + self.stomp.connect() + self.stomp.send("processing_recipe", message, headers=headers) + except Exception: + logger.warning("Cant connect to workflow transport") + + else: + logger.warning("Zocalo not available, not sending message") + + def send_start(self, dcid, recipe="mimas"): + message = { + "recipes": [recipe], + "parameters": {"ispyb_dcid": dcid, "event": "start"}, + } + self.send_message(message) + + def send_end(self, dcid, recipe="mimas"): + message = { + "recipes": [recipe], + "parameters": {"ispyb_dcid": dcid, "event": "end"}, + } + self.send_message(message) + + def do_run(self, *args, **kwargs): + self.run(*args, **kwargs) + + @abstractmethod + def run(self, *args, **kwargs): + pass diff --git a/src/ispyb/simulation/datacollection.py b/src/ispyb/simulation/datacollection.py new file mode 100644 index 00000000..73113463 --- /dev/null +++ b/src/ispyb/simulation/datacollection.py @@ -0,0 +1,365 @@ +import os +import shutil +import logging +import time +from datetime import datetime, timedelta + +import ispyb.sqlalchemy as isa + +from ispyb.simulation.base import Simulation +from ispyb.simulation.sqla_helpers import session, proposal + + +logger = logging.getLogger(__name__) + + +class SimulateDataCollection(Simulation): + def _get_container_position(self, ses, blsession, proposalid, beamline): + shipment_name = "Simulation_Shipment" + shipment = ( + ses.query(isa.Shipping) + .filter(isa.Shipping.proposalId == proposalid) + .filter(isa.Shipping.shippingName == shipment_name) + .first() + ) + + if not shipment: + logger.debug("Creating shipment") + shipment = isa.Shipping( + shippingName=shipment_name, + proposalId=proposalid, + creationDate=datetime.now(), + ) + + ses.add(shipment) + ses.commit() + + dewar_name = "Simulation_Dewar" + dewar = ( + ses.query(isa.Dewar.dewarId) + .filter(isa.Dewar.shippingId == shipment.shippingId) + .filter(isa.Dewar.code == dewar_name) + .first() + ) + + if not dewar: + logger.debug("Creating dewar") + dewar = isa.Dewar( + shippingId=shipment.shippingId, + code=dewar_name, + dewarStatus="processing", + ) + ses.add(dewar) + ses.commit() + + container_name = "Simulation_Container" + container = ( + ses.query(isa.Container.containerId) + .filter(isa.Container.dewarId == dewar.dewarId) + .filter(isa.Container.code == container_name) + .first() + ) + + if not container: + logger.debug("Creating container") + container = isa.Container( + dewarId=dewar.dewarId, + code=container_name, + containerType="Box", + capacity=25, + bltimeStamp=datetime.now(), + containerStatus="at facility", + # beamlineLocation=beamline, + # sampleChangerLocation=1, + ) + ses.add(container) + ses.commit() + + containerhistory = isa.ContainerHistory( + containerId=container.containerId, + status="at facility", + location=1, + beamlineName=beamline, + ) + + ses.add(containerhistory) + ses.commit() + + samples = ( + ses.query(isa.BLSample) + .filter(isa.BLSample.containerId == container.containerId) + .all() + ) + max_loc = 0 + for s in samples: + if int(s.location) > max_loc: + max_loc = int(s.location) + + return container.containerId, max_loc + 1 + + def run(self, beamline, experiment_type, experiment_no=0, delay=0): + blses = self.config["sessions"][beamline] + + if experiment_type not in self.config["experiments"]: + raise KeyError(f"No such experiment type {experiment_type}") + + if experiment_no > len(self.config["experiments"][experiment_type]): + raise KeyError( + f"Invalid experiment number {experiment_no}, {len(self.config['experiments'][experiment_type])} exps available" + ) + + exp = self.config["experiments"][experiment_type][experiment_no] + data = os.path.join(self.config["raw_data"], exp["data"]) + + if not os.path.exists(data): + raise AttributeError(f"Raw data file: {data} does not exist") + + if not exp.get("sample"): + raise KeyError( + f"No sample specified for experiment {experiment_type}:{experiment_no}" + ) + + if exp["sample"] not in self.config["samples"]: + raise KeyError( + f"Experiment sample {exp['sample']} is not defined in `samples`" + ) + + sample = self.config["samples"][exp["sample"]] + + with self.session() as ses: + prop, blsession = ( + ses.query(proposal, isa.BLSession) + .join(isa.Proposal) + .filter(session == blses) + .first() + ) + + blsample = ( + ses.query(isa.BLSample) + .filter(isa.BLSample.name == sample["name"]) + .first() + ) + + if not blsample: + for k in ["component", "name"]: + if not sample.get(k): + raise KeyError(f"No {k} specified for sample {exp['sample']}") + + if sample["component"] not in self.config["components"]: + raise KeyError( + f"Sample component {sample['component']} is not defined in `components`" + ) + + comp = self.config["components"][sample["component"]] + for k in ["acronym"]: + if not comp.get(k): + raise KeyError( + f"No {k} specified for component {sample['component']}" + ) + + component = ( + ses.query(isa.Protein) + .filter(isa.Protein.acronym == comp["acronym"]) + .first() + ) + + if not component: + logger.info(f"Creating component {comp['acronym']}") + component = isa.Protein( + proposalId=blsession.proposalId, + acronym=comp.get("acronym"), + name=comp.get("name", comp.get("acronym")), + sequence=comp.get("sequence"), + density=comp.get("density"), + molecularMass=comp.get("molecularmass"), + description="Simulated component", + ) + ses.add(component) + ses.commit() + + crystal = isa.Crystal(proteinId=component.proteinId) + ses.add(crystal) + ses.commit() + + logger.info(f"Creating sample {sample['name']}") + containerid, position = self._get_container_position( + ses, blses, blsession.proposalId, beamline + ) + blsample = isa.BLSample( + name=sample["name"], + crystalId=crystal.crystalId, + location=position, + containerId=containerid, + ) + ses.add(blsample) + ses.commit() + + subsampleid = None + if exp.get("subsample"): + logger.info("Creating subsample") + sub = exp["subsample"] + + pos1id = None + if sub.get("x") and sub.get("y"): + pos1 = isa.Position( + posX=sub["x"], + posY=sub["y"], + ) + ses.add(pos1) + ses.commit() + + pos1id = pos1.positionId + + pos2id = None + if sub.get("x2") and sub.get("y2"): + pos2 = isa.Position( + posX=sub["x2"], + posY=sub["y2"], + ) + ses.add(pos2) + ses.commit() + + pos2id = pos2.positionId + + subsample = isa.BLSubSample( + positionId=pos1id, + position2Id=pos2id, + type=sub.get("type"), + blSampleId=blsample.blSampleId, + comments="Simulated sample", + ) + ses.add(subsample) + ses.commit() + + subsampleid = subsample.blSubSampleId + + logger.debug("Creating datacollection group") + dcg = isa.DataCollectionGroup( + sessionId=blsession.sessionId, + experimentType=experiment_type, + blSampleId=blsample.blSampleId, + ) + ses.add(dcg) + ses.commit() + + logger.debug("Creating datacollection") + dc = isa.DataCollection( + BLSAMPLEID=blsample.blSampleId, + blSubSampleId=subsampleid, + dataCollectionGroupId=dcg.dataCollectionGroupId, + fileTemplate=os.path.basename(exp["data"]), + imageContainerSubPath=exp.get( + "imagecontainersubpath", "1.1/measurement" + ), + numberOfImages=exp.get("numberofimages"), + wavelength=exp.get("wavelength"), + exposureTime=exp.get("exposuretime"), + runStatus="Successful", + comments="Simulated datacollection", + startTime=datetime.now(), + endTime=datetime.now() + timedelta(minutes=5), + ) + ses.add(dc) + ses.commit() + + if exp.get("grid"): + logger.debug("Creating gridinfo") + grid = isa.GridInfo( + dataCollectionId=dc.dataCollectionId, + steps_x=exp["grid"]["steps_x"], + steps_y=exp["grid"]["steps_y"], + snapshot_offsetXPixel=exp["grid"]["snapshot_offsetxpixel"], + snapshot_offsetYPixel=exp["grid"]["snapshot_offsetypixel"], + dx_mm=exp["grid"]["dx_mm"], + dy_mm=exp["grid"]["dy_mm"], + pixelsPerMicronX=exp["grid"]["pixelspermicronx"], + pixelsPerMicronY=exp["grid"]["pixelspermicrony"], + ) + ses.add(grid) + ses.commit() + + logger.info(f"Created datacollection: {dc.dataCollectionId}") + logger.info( + f"{self.config['ispyb_url']}/visit/{blses}/id/{dc.dataCollectionId}" + ) + + logger.info("Triggering mimas start") + self.send_start(dc.dataCollectionId) + + # Create the dataset dir + data_dir = os.path.join( + self.config["data_dir"].format(beamline=beamline), + prop, + exp["sample"], + f"{exp['sample']}_{dc.dataCollectionId}", + ) + + dc.imageDirectory = data_dir + ses.commit() + + if os.path.exists(data_dir): + logger.warning(f"Data directory already exists: {data_dir}") + + os.makedirs(data_dir) + if not os.path.exists(data_dir): + raise AttributeError( + f"Could not create output data directory: {data_dir}" + ) + + # Link data files / snapshots + link = self.config.get("copy_method", "copy") == "link" + if link: + logger.debug("Linking data") + os.link(data, os.path.join(data_dir, os.path.basename(data))) + else: + logger.debug("Copying data") + shutil.copy(data, os.path.join(data_dir, os.path.basename(data))) + + snapshot_path = os.path.join( + self.config["raw_data"], exp.get("xtalsnapshotfullpath1") + ) + if snapshot_path: + if os.path.exists(snapshot_path): + snapshot = os.path.join(data_dir, os.path.basename(snapshot_path)) + if link: + logger.debug("Linking snapshot") + os.link(snapshot_path, snapshot) + else: + logger.debug("Copying snapshot") + shutil.copy(snapshot_path, snapshot) + + snap, snap_extension = os.path.splitext(snapshot_path) + thumb = f"{snap}t{snap_extension}" + if os.path.exists(thumb): + if link: + logger.debug("Linking thumbnail") + os.link( + thumb, + os.path.join( + data_dir, + f"{os.path.basename(snap)}t{snap_extension}", + ), + ) + else: + logger.debug("Copying thumbnail") + shutil.copy( + thumb, + os.path.join( + data_dir, + f"{os.path.basename(snap)}t{snap_extension}", + ), + ) + else: + logger.warning(f"Snapshot thumbnail does not exist {thumb}") + + dc.xtalSnapshotFullPath1 = snapshot + else: + logger.warning(f"Snapshot file does not exist {snapshot_path}") + + logger.info(f"Finshed copying data to: {data_dir}") + + if delay: + time.sleep(delay) + + logger.info("Triggering mimas end") + self.send_end(dc.dataCollectionId) diff --git a/src/ispyb/simulation/sqla_helpers.py b/src/ispyb/simulation/sqla_helpers.py new file mode 100644 index 00000000..116a3b96 --- /dev/null +++ b/src/ispyb/simulation/sqla_helpers.py @@ -0,0 +1,13 @@ +from sqlalchemy import func +import ispyb.sqlalchemy as isa + +session = func.concat( + isa.Proposal.proposalCode, + isa.Proposal.proposalNumber, + "-", + isa.BLSession.visit_number, +).label("session") + +proposal = func.concat(isa.Proposal.proposalCode, isa.Proposal.proposalNumber).label( + "proposal" +) From 0082f62d7234eaef42817b2789083c477f7af557 Mon Sep 17 00:00:00 2001 From: sfisher Date: Fri, 1 Oct 2021 15:22:09 +0200 Subject: [PATCH 02/18] send before / after events using plugins, add some doc --- src/ispyb/cli/simulate.py | 2 +- src/ispyb/simulation/README.md | 64 ++++++++++++++++++++++++++ src/ispyb/simulation/base.py | 52 +++++++-------------- src/ispyb/simulation/datacollection.py | 8 ++-- 4 files changed, 85 insertions(+), 41 deletions(-) create mode 100644 src/ispyb/simulation/README.md diff --git a/src/ispyb/cli/simulate.py b/src/ispyb/cli/simulate.py index dcbe0546..04152a46 100644 --- a/src/ispyb/cli/simulate.py +++ b/src/ispyb/cli/simulate.py @@ -20,7 +20,7 @@ def run(): print(f"Simluation Error: {str(e)}") exit() - parser = argparse.ArgumentParser(description="ISPyB/Zocalo simulation service") + parser = argparse.ArgumentParser(description="ISPyB simulation service") parser.add_argument( "beamline", help=f"Beamline to run simulation against. Available beamlines: {sdc.beamlines}", diff --git a/src/ispyb/simulation/README.md b/src/ispyb/simulation/README.md new file mode 100644 index 00000000..c20ab6cf --- /dev/null +++ b/src/ispyb/simulation/README.md @@ -0,0 +1,64 @@ +# ISPyB simulation + +Simulate data collection and trigger automatic data processing against real data: + +```bash +isypb.simulate +isypb.simulate bm23 'Energy scan' +``` + +This will link some real raw data into a new location in the session along with snapshots if available, create a datacollection in the ispyb database and trigger a mimas start event to determine the processing execution plan. + +The simulator will create hierarchically a component (`Protein`), related `BLSample` (with intermediate `Crystal`), and potentially a `SubSample`, contained within a `Container`, `Dewar`, and `Shipment` belonging to the specified `Proposal` if they do not already exist with the defined name. Then the simulator creates a `DataCollection` and `DataCollectionGroup`, linked to the relevant `BLSample` and `BLSession`. If grid info information is specified it will also create an entry in `GridInfo` + +## Configuration + +An example configuration is available in `conf/simulate.yml` + +Each entry in `experiments` relates to a `DataCollectionGroup.experimentType` entry so must match one of the available types in the database. See https://github.com/DiamondLightSource/ispyb-database/blob/master/schemas/ispyb/tables.sql#L1930 for a full list. This is a list and so allows multiple entries of the same type to be specified and executed separately using the `--number` flag. + +## Available parameters per table + +### Protein + +* acronym +* name +* sequence +* density +* molecularMass +* description + +### BLSample + +* name + +### BLSubSample + +* x +* y +* x2 +* y2 +* type + +### DataCollection + +* imageContainerSubPath +* numberOfImages +* wavelength +* exposureTime + +### GridInfo + +* steps_x +* steps_y +* snapshot_offsetXPixel +* snapshot_offsetYPixel +* dx_mm +* dy_mm +* pixelsPerMicronX +* pixelsPerMicronY + + +## Zocalo + +If zocalo is installed the simulator will also send a message to zocalo before the data is copied, and send another message after the data copy is finished by default triggering the `mimas` recipe. diff --git a/src/ispyb/simulation/base.py b/src/ispyb/simulation/base.py index da3c0d75..92dc4b53 100644 --- a/src/ispyb/simulation/base.py +++ b/src/ispyb/simulation/base.py @@ -2,19 +2,13 @@ import os from abc import ABC, abstractmethod import logging +import pkg_resources import sqlalchemy from sqlalchemy.orm import sessionmaker import ispyb.sqlalchemy as isa import yaml -from workflows.transport.stomp_transport import StompTransport - -try: - import zocalo - import zocalo.configuration -except ModuleNotFoundError: - zocalo = None logger = logging.getLogger(__name__) @@ -41,11 +35,6 @@ class Simulation(ABC): def __init__(self): self._config = load_config() - if zocalo: - zc = zocalo.configuration.from_file() - zc.activate() - self.stomp = StompTransport() - @property def config(self): return self._config @@ -67,30 +56,21 @@ def beamlines(self): def experiment_types(self): return ", ".join(self.config["experiments"].keys()) - def send_message(self, message, headers={}): - if zocalo: - try: - self.stomp.connect() - self.stomp.send("processing_recipe", message, headers=headers) - except Exception: - logger.warning("Cant connect to workflow transport") - - else: - logger.warning("Zocalo not available, not sending message") - - def send_start(self, dcid, recipe="mimas"): - message = { - "recipes": [recipe], - "parameters": {"ispyb_dcid": dcid, "event": "start"}, - } - self.send_message(message) - - def send_end(self, dcid, recipe="mimas"): - message = { - "recipes": [recipe], - "parameters": {"ispyb_dcid": dcid, "event": "end"}, - } - self.send_message(message) + def before_start(self, dcid): + for entry in pkg_resources.iter_entry_points( + "ispyb.simulator.before_datacollection" + ): + fn = entry.load() + logger.info(f"Executing before start plugin `{entry.name}`") + fn(dcid) + + def after_end(self, dcid): + for entry in pkg_resources.iter_entry_points( + "ispyb.simulator.after_datacollection" + ): + fn = entry.load() + logger.info(f"Executing after end plugin `{entry.name}`") + fn(dcid) def do_run(self, *args, **kwargs): self.run(*args, **kwargs) diff --git a/src/ispyb/simulation/datacollection.py b/src/ispyb/simulation/datacollection.py index 73113463..1e646e01 100644 --- a/src/ispyb/simulation/datacollection.py +++ b/src/ispyb/simulation/datacollection.py @@ -283,8 +283,8 @@ def run(self, beamline, experiment_type, experiment_no=0, delay=0): f"{self.config['ispyb_url']}/visit/{blses}/id/{dc.dataCollectionId}" ) - logger.info("Triggering mimas start") - self.send_start(dc.dataCollectionId) + logger.info("Triggering before start plugins") + self.before_start(dc.dataCollectionId) # Create the dataset dir data_dir = os.path.join( @@ -361,5 +361,5 @@ def run(self, beamline, experiment_type, experiment_no=0, delay=0): if delay: time.sleep(delay) - logger.info("Triggering mimas end") - self.send_end(dc.dataCollectionId) + logger.info("Triggering after end plugins") + self.after_end(dc.dataCollectionId) From fb837391df0b4357fec80032e99b1e6c775f047e Mon Sep 17 00:00:00 2001 From: sfisher Date: Fri, 1 Oct 2021 15:25:59 +0200 Subject: [PATCH 03/18] update history and readme --- HISTORY.rst | 1 + src/ispyb/simulation/README.md | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/HISTORY.rst b/HISTORY.rst index f2f54b70..2d283836 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -4,6 +4,7 @@ History Unreleased / master ------------------- +* Add a basic data collection simulator 6.9.0 (2021-09-16) ------------------ diff --git a/src/ispyb/simulation/README.md b/src/ispyb/simulation/README.md index c20ab6cf..30aa9917 100644 --- a/src/ispyb/simulation/README.md +++ b/src/ispyb/simulation/README.md @@ -7,7 +7,7 @@ isypb.simulate isypb.simulate bm23 'Energy scan' ``` -This will link some real raw data into a new location in the session along with snapshots if available, create a datacollection in the ispyb database and trigger a mimas start event to determine the processing execution plan. +This will link some real raw data into a new location in the session along with snapshots if available, create a datacollection in the ispyb database. It can trigger events before and after the data is copied using the `ispyb.simulator.before_datacollection` and `ispyb.simulator.after_datacollection` entry points. These are passing just the `DataCollection.dataCollectionId`. The simulator will create hierarchically a component (`Protein`), related `BLSample` (with intermediate `Crystal`), and potentially a `SubSample`, contained within a `Container`, `Dewar`, and `Shipment` belonging to the specified `Proposal` if they do not already exist with the defined name. Then the simulator creates a `DataCollection` and `DataCollectionGroup`, linked to the relevant `BLSample` and `BLSession`. If grid info information is specified it will also create an entry in `GridInfo` From 2bab7d26bd8ba012fb1a3d380943deeffd2bff32 Mon Sep 17 00:00:00 2001 From: Stuart Fisher Date: Tue, 5 Oct 2021 13:22:46 +0200 Subject: [PATCH 04/18] md -> rst --- docs/index.rst | 1 + .../simulation/README.md => docs/simulate.rst | 42 ++++++++++++------- 2 files changed, 28 insertions(+), 15 deletions(-) rename src/ispyb/simulation/README.md => docs/simulate.rst (82%) diff --git a/docs/index.rst b/docs/index.rst index b177697d..7cb1f061 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -7,6 +7,7 @@ Welcome to the ISPyB API documentation! installation usage + simulate api contributing authors diff --git a/src/ispyb/simulation/README.md b/docs/simulate.rst similarity index 82% rename from src/ispyb/simulation/README.md rename to docs/simulate.rst index 30aa9917..10833e54 100644 --- a/src/ispyb/simulation/README.md +++ b/docs/simulate.rst @@ -1,25 +1,32 @@ -# ISPyB simulation +======== +Simulate +======== -Simulate data collection and trigger automatic data processing against real data: +Simulate data collection and trigger automatic data processing against real data:: + + isypb.simulate + isypb.simulate bm23 'Energy scan' -```bash -isypb.simulate -isypb.simulate bm23 'Energy scan' -``` This will link some real raw data into a new location in the session along with snapshots if available, create a datacollection in the ispyb database. It can trigger events before and after the data is copied using the `ispyb.simulator.before_datacollection` and `ispyb.simulator.after_datacollection` entry points. These are passing just the `DataCollection.dataCollectionId`. The simulator will create hierarchically a component (`Protein`), related `BLSample` (with intermediate `Crystal`), and potentially a `SubSample`, contained within a `Container`, `Dewar`, and `Shipment` belonging to the specified `Proposal` if they do not already exist with the defined name. Then the simulator creates a `DataCollection` and `DataCollectionGroup`, linked to the relevant `BLSample` and `BLSession`. If grid info information is specified it will also create an entry in `GridInfo` -## Configuration +*************** +Configuration +*************** An example configuration is available in `conf/simulate.yml` Each entry in `experiments` relates to a `DataCollectionGroup.experimentType` entry so must match one of the available types in the database. See https://github.com/DiamondLightSource/ispyb-database/blob/master/schemas/ispyb/tables.sql#L1930 for a full list. This is a list and so allows multiple entries of the same type to be specified and executed separately using the `--number` flag. -## Available parameters per table -### Protein +*************** +Available parameters per table +*************** + +Protein +------------- * acronym * name @@ -28,11 +35,13 @@ Each entry in `experiments` relates to a `DataCollectionGroup.experimentType` en * molecularMass * description -### BLSample +BLSample +------------- * name -### BLSubSample +BLSubSample +------------- * x * y @@ -40,14 +49,16 @@ Each entry in `experiments` relates to a `DataCollectionGroup.experimentType` en * y2 * type -### DataCollection +DataCollection +------------- * imageContainerSubPath * numberOfImages * wavelength * exposureTime -### GridInfo +GridInfo +------------- * steps_x * steps_y @@ -58,7 +69,8 @@ Each entry in `experiments` relates to a `DataCollectionGroup.experimentType` en * pixelsPerMicronX * pixelsPerMicronY - -## Zocalo +*************** +Zocalo +*************** If zocalo is installed the simulator will also send a message to zocalo before the data is copied, and send another message after the data copy is finished by default triggering the `mimas` recipe. From f0b52e3cc6c528014fb0da0a1f3fbef5f729e28a Mon Sep 17 00:00:00 2001 From: Stuart Fisher Date: Wed, 6 Oct 2021 11:30:40 +0200 Subject: [PATCH 05/18] add info about env var for config --- docs/simulate.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/simulate.rst b/docs/simulate.rst index 10833e54..96969775 100644 --- a/docs/simulate.rst +++ b/docs/simulate.rst @@ -16,7 +16,7 @@ The simulator will create hierarchically a component (`Protein`), related `BLSam Configuration *************** -An example configuration is available in `conf/simulate.yml` +The configuration file location is defined via the `ISPYB_SIMULATE_CONFIG` environment variable. An example configuration is available in `conf/simulate.yml` Each entry in `experiments` relates to a `DataCollectionGroup.experimentType` entry so must match one of the available types in the database. See https://github.com/DiamondLightSource/ispyb-database/blob/master/schemas/ispyb/tables.sql#L1930 for a full list. This is a list and so allows multiple entries of the same type to be specified and executed separately using the `--number` flag. From 7d2bfe44a27bf9ccb42def9f2dabd82eef26cbf5 Mon Sep 17 00:00:00 2001 From: Stuart Fisher Date: Thu, 7 Oct 2021 16:32:30 +0200 Subject: [PATCH 06/18] fix snapshot copy/linking --- src/ispyb/simulation/datacollection.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/ispyb/simulation/datacollection.py b/src/ispyb/simulation/datacollection.py index 1e646e01..388e1938 100644 --- a/src/ispyb/simulation/datacollection.py +++ b/src/ispyb/simulation/datacollection.py @@ -322,17 +322,17 @@ def run(self, beamline, experiment_type, experiment_no=0, delay=0): if os.path.exists(snapshot_path): snapshot = os.path.join(data_dir, os.path.basename(snapshot_path)) if link: - logger.debug("Linking snapshot") + logger.debug("Linking snapshot from '%s' to '%s'", snapshot_path, snapshot) os.link(snapshot_path, snapshot) else: - logger.debug("Copying snapshot") + logger.debug("Copying snapshot from '%s' to '%s'", snapshot_path, snapshot) shutil.copy(snapshot_path, snapshot) snap, snap_extension = os.path.splitext(snapshot_path) thumb = f"{snap}t{snap_extension}" if os.path.exists(thumb): if link: - logger.debug("Linking thumbnail") + logger.debug("Linking thumbnail from '%s'", thumb) os.link( thumb, os.path.join( @@ -341,7 +341,7 @@ def run(self, beamline, experiment_type, experiment_no=0, delay=0): ), ) else: - logger.debug("Copying thumbnail") + logger.debug("Copying thumbnail from '%s'", thumb) shutil.copy( thumb, os.path.join( @@ -353,6 +353,7 @@ def run(self, beamline, experiment_type, experiment_no=0, delay=0): logger.warning(f"Snapshot thumbnail does not exist {thumb}") dc.xtalSnapshotFullPath1 = snapshot + ses.commit() else: logger.warning(f"Snapshot file does not exist {snapshot_path}") From cc57ee8c57636beab4021d8d161c725e5abfc9d3 Mon Sep 17 00:00:00 2001 From: Stuart Fisher Date: Tue, 12 Oct 2021 15:16:51 +0200 Subject: [PATCH 07/18] expand example conf details, use ISPyB column casing to avoid further confusion --- conf/simulate_example.yml | 58 ++++++++++++++++---------- src/ispyb/simulation/datacollection.py | 20 +++++---- 2 files changed, 47 insertions(+), 31 deletions(-) diff --git a/conf/simulate_example.yml b/conf/simulate_example.yml index 0a9194a4..41ea2c40 100644 --- a/conf/simulate_example.yml +++ b/conf/simulate_example.yml @@ -13,54 +13,68 @@ data_dir: /data/tests/{beamline}/simulation ispyb_url: https://ispyb.diamond.ac.uk -# Proteins +# Define Components (Proteins) components: - Component1: + # an internal references for the component + comp1: + # columns to populate for this component acronym: Component1 sequence: SiSP - Component2: + comp2: acronym: Component2 -# Samples +# Define BLSamples samples: - Sample1: + # an internal reference for this sample + samp1: + # columns to populate for this sample name: Sample1 - component: Component1 + # which component this sample is an instance of (one of the keys in components above) + component: comp1 - Sample2: + samp2: name: Sample2 - component: Component2 + component: comp2 -# Experiments +# Define Experiments (DataCollections) experiments: Energy scan: + # data will be split into its respective imageDirectory and fileTemplate columns - data: energy_scan/energyscan1.h5 - sample: Sample2 - xtalsnapshotfullpath1: energy_scan/snapshot.png - numberofimages: 4001 - exposuretime: 1 + # which sample to link this data collection to (one of the keys in samples above) + sample: samp1 + + # columns to populate + xtalSnapshotFullPath1: energy_scan/snapshot.png + numberOfImages: 4001 + exposureTime: 1 #energy: 8.8143 wavelength: 1.4065 - imagecontainersubpath: 1.1/measurement + imageContainerSubPath: 1.1/measurement XRF map: - data: xrf_map/xrfmap1.h5 - #energy: 2.4817 - wavelength: 4.9959 - sample: Sample1 + sample: samp1 + xtalsnapshotfullpath1: xrf_map/snapshot.png - exposuretime: 0.03 numberofimages: 1600 + exposuretime: 0.03 + #energy: 2.4817 + wavelength: 4.9959 + + # additionally populate GridInfo grid: steps_x: 40 steps_y: 40 dx_mm: 0.001 dy_mm: 0.001 - pixelspermicronx: -0.44994 - pixelspermicrony: -0.46537 - snapshot_offsetxpixel: 682.16 - snapshot_offsetypixel: 554 + pixelsPerMicronX: -0.44994 + pixelsPerMicrony: -0.46537 + snapshot_offsetXPixel: 682.16 + snapshot_offsetYPixel: 554 + + # additionally populate BlSubSample subsample: x: 9038007 y: 24467003 diff --git a/src/ispyb/simulation/datacollection.py b/src/ispyb/simulation/datacollection.py index 388e1938..2d7b22cb 100644 --- a/src/ispyb/simulation/datacollection.py +++ b/src/ispyb/simulation/datacollection.py @@ -171,7 +171,7 @@ def run(self, beamline, experiment_type, experiment_no=0, delay=0): name=comp.get("name", comp.get("acronym")), sequence=comp.get("sequence"), density=comp.get("density"), - molecularMass=comp.get("molecularmass"), + molecularMass=comp.get("molecularMass"), description="Simulated component", ) ses.add(component) @@ -244,16 +244,18 @@ def run(self, beamline, experiment_type, experiment_no=0, delay=0): logger.debug("Creating datacollection") dc = isa.DataCollection( + # TODO: Remove - legacy column BLSAMPLEID=blsample.blSampleId, blSubSampleId=subsampleid, dataCollectionGroupId=dcg.dataCollectionGroupId, fileTemplate=os.path.basename(exp["data"]), + imageDirectory=os.path.dirname(exp["data"]), imageContainerSubPath=exp.get( - "imagecontainersubpath", "1.1/measurement" + "imageContainerSubPath", "1.1/measurement" ), - numberOfImages=exp.get("numberofimages"), + numberOfImages=exp.get("numberOfImages"), wavelength=exp.get("wavelength"), - exposureTime=exp.get("exposuretime"), + exposureTime=exp.get("exposureTime"), runStatus="Successful", comments="Simulated datacollection", startTime=datetime.now(), @@ -268,12 +270,12 @@ def run(self, beamline, experiment_type, experiment_no=0, delay=0): dataCollectionId=dc.dataCollectionId, steps_x=exp["grid"]["steps_x"], steps_y=exp["grid"]["steps_y"], - snapshot_offsetXPixel=exp["grid"]["snapshot_offsetxpixel"], - snapshot_offsetYPixel=exp["grid"]["snapshot_offsetypixel"], + snapshot_offsetXPixel=exp["grid"]["snapshot_offsetXPixel"], + snapshot_offsetYPixel=exp["grid"]["snapshot_offsetYPixel"], dx_mm=exp["grid"]["dx_mm"], dy_mm=exp["grid"]["dy_mm"], - pixelsPerMicronX=exp["grid"]["pixelspermicronx"], - pixelsPerMicronY=exp["grid"]["pixelspermicrony"], + pixelsPerMicronX=exp["grid"]["pixelsPerMicronX"], + pixelsPerMicronY=exp["grid"]["pixelsPerMicronY"], ) ses.add(grid) ses.commit() @@ -316,7 +318,7 @@ def run(self, beamline, experiment_type, experiment_no=0, delay=0): shutil.copy(data, os.path.join(data_dir, os.path.basename(data))) snapshot_path = os.path.join( - self.config["raw_data"], exp.get("xtalsnapshotfullpath1") + self.config["raw_data"], exp.get("xtalSnapshotFullPath1") ) if snapshot_path: if os.path.exists(snapshot_path): From 776ec65701f216afad7405fca913089f9eca27b4 Mon Sep 17 00:00:00 2001 From: Stuart Fisher Date: Tue, 12 Oct 2021 15:27:32 +0200 Subject: [PATCH 08/18] couple more casing --- conf/simulate_example.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/conf/simulate_example.yml b/conf/simulate_example.yml index 41ea2c40..da7723fd 100644 --- a/conf/simulate_example.yml +++ b/conf/simulate_example.yml @@ -8,7 +8,7 @@ sessions: # Where to copy raw data from raw_data: /data/ispyb-test -# Where to write simulated data to, can use {beamline} placeholders +# Where to write simulated data to, can use {beamline} placeholder data_dir: /data/tests/{beamline}/simulation ispyb_url: https://ispyb.diamond.ac.uk @@ -57,9 +57,9 @@ experiments: - data: xrf_map/xrfmap1.h5 sample: samp1 - xtalsnapshotfullpath1: xrf_map/snapshot.png - numberofimages: 1600 - exposuretime: 0.03 + xtalSnapshotFullPath1: xrf_map/snapshot.png + numberOfImages: 1600 + exposureTime: 0.03 #energy: 2.4817 wavelength: 4.9959 From 94eac9ad5e97111bd23cff41d4dfda9eafaedece Mon Sep 17 00:00:00 2001 From: Stuart Fisher Date: Tue, 12 Oct 2021 15:45:38 +0200 Subject: [PATCH 09/18] doc update --- docs/simulate.rst | 44 ++++++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/docs/simulate.rst b/docs/simulate.rst index 96969775..eceb0706 100644 --- a/docs/simulate.rst +++ b/docs/simulate.rst @@ -1,14 +1,18 @@ -======== -Simulate -======== +============== +ispyb.simulate +============== -Simulate data collection and trigger automatic data processing against real data:: +`ispyb.simulate` creates a new DataCollection row in the ISPyB database from a simple yaml definition. It creates a data collection, related sample information, and associated shipping entities. It then copies some raw data and associated snapshots (and thumbnails). - isypb.simulate - isypb.simulate bm23 'Energy scan' +Simulate a data collection:: + ispyb.simulate + ispyb.simulate bm23 'Energy scan' + +If multiple experiments of the same type are specified one can be chosen with the `--number` flag:: + + ispyb.simulate --number 2 bm23 'Energy scan' -This will link some real raw data into a new location in the session along with snapshots if available, create a datacollection in the ispyb database. It can trigger events before and after the data is copied using the `ispyb.simulator.before_datacollection` and `ispyb.simulator.after_datacollection` entry points. These are passing just the `DataCollection.dataCollectionId`. The simulator will create hierarchically a component (`Protein`), related `BLSample` (with intermediate `Crystal`), and potentially a `SubSample`, contained within a `Container`, `Dewar`, and `Shipment` belonging to the specified `Proposal` if they do not already exist with the defined name. Then the simulator creates a `DataCollection` and `DataCollectionGroup`, linked to the relevant `BLSample` and `BLSession`. If grid info information is specified it will also create an entry in `GridInfo` @@ -16,17 +20,21 @@ The simulator will create hierarchically a component (`Protein`), related `BLSam Configuration *************** -The configuration file location is defined via the `ISPYB_SIMULATE_CONFIG` environment variable. An example configuration is available in `conf/simulate.yml` +The configuration file location is defined via the `ISPYB_SIMULATE_CONFIG` environment variable. An example configuration is available in `conf/simulate.yml`_. The structure and requirements of this file are documented in the example. -Each entry in `experiments` relates to a `DataCollectionGroup.experimentType` entry so must match one of the available types in the database. See https://github.com/DiamondLightSource/ispyb-database/blob/master/schemas/ispyb/tables.sql#L1930 for a full list. This is a list and so allows multiple entries of the same type to be specified and executed separately using the `--number` flag. +Each entry in `experiments` relates to a `DataCollectionGroup.experimentType` entry so must match one of the available types in the database. See `experimentTypes`_ for a full list. This is a list and so allows multiple entries of the same type to be specified and executed separately using the `--number` flag. +.. _conf/simulate.yml: https://github.com/DiamondLightSource/ispyb-api/blob/master/conf/simulate_example.yml +.. _experimentTypes: https://github.com/DiamondLightSource/ispyb-database/blob/master/schemas/ispyb/tables.sql#L1930 -*************** -Available parameters per table -*************** +*************************** +Available columns per table +*************************** -Protein -------------- +The ISPyB tables are large, and as such only a subset of the columns are exposed by this simulator, the most pertinent in order to create usable data collections and associated entries. These are as listed below for each table. + +Component (Protein) +------------------- * acronym * name @@ -50,7 +58,7 @@ BLSubSample * type DataCollection -------------- +-------------- * imageContainerSubPath * numberOfImages @@ -70,7 +78,11 @@ GridInfo * pixelsPerMicronY *************** -Zocalo +Plugins *************** +The simulator can trigger events before and after the data is copied using the `ispyb.simulator.before_datacollection` and `ispyb.simulator.after_datacollection` entry points. These are passed just the new `DataCollection.dataCollectionId`. + +Zocalo +------------- If zocalo is installed the simulator will also send a message to zocalo before the data is copied, and send another message after the data copy is finished by default triggering the `mimas` recipe. From bb28539ccb42c1320532dd5f0b843e8bc5860f60 Mon Sep 17 00:00:00 2001 From: Stuart Fisher Date: Tue, 12 Oct 2021 16:29:24 +0200 Subject: [PATCH 10/18] various recommendations, flatten config --- conf/simulate_example.yml | 77 ++++++++++--------- docs/simulate.rst | 3 +- src/ispyb/cli/simulate.py | 24 +++--- src/ispyb/simulation/base.py | 11 +-- src/ispyb/simulation/datacollection.py | 100 ++++++++++++------------- 5 files changed, 101 insertions(+), 114 deletions(-) diff --git a/conf/simulate_example.yml b/conf/simulate_example.yml index da7723fd..77f5b00d 100644 --- a/conf/simulate_example.yml +++ b/conf/simulate_example.yml @@ -39,45 +39,50 @@ samples: # Define Experiments (DataCollections) experiments: - Energy scan: - # data will be split into its respective imageDirectory and fileTemplate columns - - data: energy_scan/energyscan1.h5 - # which sample to link this data collection to (one of the keys in samples above) - sample: samp1 + # a shortname for this experiment (available via cli) + energy_scan1: + # the experimentType, must map to a valid type in DataCollectionGroup.experimentType + experimentType: Energy scan + # data will be split into its respective imageDirectory and fileTemplate columns + data: energy_scan/energyscan1.h5 + # which sample to link this data collection to (one of the keys in samples above) + sample: samp1 - # columns to populate - xtalSnapshotFullPath1: energy_scan/snapshot.png - numberOfImages: 4001 - exposureTime: 1 - #energy: 8.8143 - wavelength: 1.4065 - imageContainerSubPath: 1.1/measurement + # columns to populate + # thumbnails should have a trailing t, i.e. energy_scan/snapshott.png + xtalSnapshotFullPath1: energy_scan/snapshot.png + numberOfImages: 4001 + exposureTime: 1 + #energy: 8.8143 + wavelength: 1.4065 + imageContainerSubPath: 1.1/measurement - XRF map: - - data: xrf_map/xrfmap1.h5 - sample: samp1 + xrf_map1: + experimentType: XRF map + data: xrf_map/xrfmap1.h5 + sample: samp1 - xtalSnapshotFullPath1: xrf_map/snapshot.png - numberOfImages: 1600 - exposureTime: 0.03 - #energy: 2.4817 - wavelength: 4.9959 + xtalSnapshotFullPath1: xrf_map/snapshot.png + numberOfImages: 1600 + exposureTime: 0.03 + #energy: 2.4817 + wavelength: 4.9959 - # additionally populate GridInfo - grid: - steps_x: 40 - steps_y: 40 - dx_mm: 0.001 - dy_mm: 0.001 - pixelsPerMicronX: -0.44994 - pixelsPerMicrony: -0.46537 - snapshot_offsetXPixel: 682.16 - snapshot_offsetYPixel: 554 + # additionally populate GridInfo + grid: + steps_x: 40 + steps_y: 40 + dx_mm: 0.001 + dy_mm: 0.001 + pixelsPerMicronX: -0.44994 + pixelsPerMicrony: -0.46537 + snapshot_offsetXPixel: 682.16 + snapshot_offsetYPixel: 554 - # additionally populate BlSubSample - subsample: - x: 9038007 - y: 24467003 - x2: 9078007 - y2: 24507003 + # additionally populate BlSubSample + subsample: + x: 9038007 + y: 24467003 + x2: 9078007 + y2: 24507003 type: roi diff --git a/docs/simulate.rst b/docs/simulate.rst index eceb0706..4ad4c436 100644 --- a/docs/simulate.rst +++ b/docs/simulate.rst @@ -22,7 +22,7 @@ Configuration The configuration file location is defined via the `ISPYB_SIMULATE_CONFIG` environment variable. An example configuration is available in `conf/simulate.yml`_. The structure and requirements of this file are documented in the example. -Each entry in `experiments` relates to a `DataCollectionGroup.experimentType` entry so must match one of the available types in the database. See `experimentTypes`_ for a full list. This is a list and so allows multiple entries of the same type to be specified and executed separately using the `--number` flag. +Each entry in `experiments` represents a different data collection. The `experimentType` column relates to a `DataCollectionGroup.experimentType` entry so must match one of the available types in the database. See `experimentTypes`_ for a full list. .. _conf/simulate.yml: https://github.com/DiamondLightSource/ispyb-api/blob/master/conf/simulate_example.yml .. _experimentTypes: https://github.com/DiamondLightSource/ispyb-database/blob/master/schemas/ispyb/tables.sql#L1930 @@ -64,6 +64,7 @@ DataCollection * numberOfImages * wavelength * exposureTime +* xtalSnapshotFullPath1-4 GridInfo ------------- diff --git a/src/ispyb/cli/simulate.py b/src/ispyb/cli/simulate.py index 04152a46..458ec97a 100644 --- a/src/ispyb/cli/simulate.py +++ b/src/ispyb/cli/simulate.py @@ -17,27 +17,21 @@ def run(): try: sdc = SimulateDataCollection() except AttributeError as e: - print(f"Simluation Error: {str(e)}") - exit() + exit(f"Simulation Error: {e}") - parser = argparse.ArgumentParser(description="ISPyB simulation service") + parser = argparse.ArgumentParser(description="ISPyB simulation tool") parser.add_argument( "beamline", - help=f"Beamline to run simulation against. Available beamlines: {sdc.beamlines}", + help=f"Beamline to run simulation against", + choices=sdc.beamlines ) parser.add_argument( "experiment", - help=f"Experiment type to simluate, Available types: {sdc.experiment_types}", + help=f"Experiment type to simluate", + choices=sdc.experiments ) - parser.add_argument( - "--number", - default=0, - type=int, - dest="experiment_number", - help="Experiment simulation number to run", - ) parser.add_argument( "--delay", default=5, @@ -63,11 +57,11 @@ def run(): try: sdc.do_run( - args.beamline, args.experiment, args.experiment_number, delay=args.delay + args.beamline, args.experiment, delay=args.delay ) except Exception as e: if args.debug: - logger.exception("Simluation Error") + logger.exception("Simulation Error") print(e) else: - print(f"Simluation Error: {str(e)}") + print(f"Simulation Error: {str(e)}") diff --git a/src/ispyb/simulation/base.py b/src/ispyb/simulation/base.py index 92dc4b53..6ddde8ce 100644 --- a/src/ispyb/simulation/base.py +++ b/src/ispyb/simulation/base.py @@ -24,11 +24,8 @@ def load_config(): if not os.path.exists(config_yml): raise AttributeError(f"Cannot find config file: {config_yml}") - config = {} with open(config_yml, "r") as stream: - config = yaml.safe_load(stream) - - return config + return yaml.safe_load(stream) class Simulation(ABC): @@ -50,11 +47,7 @@ def session(self): @property def beamlines(self): - return ", ".join(self.config["sessions"].keys()) - - @property - def experiment_types(self): - return ", ".join(self.config["experiments"].keys()) + return list(self.config["sessions"].keys()) def before_start(self, dcid): for entry in pkg_resources.iter_entry_points( diff --git a/src/ispyb/simulation/datacollection.py b/src/ispyb/simulation/datacollection.py index 2d7b22cb..0040d941 100644 --- a/src/ispyb/simulation/datacollection.py +++ b/src/ispyb/simulation/datacollection.py @@ -14,6 +14,10 @@ class SimulateDataCollection(Simulation): + @property + def experiments(self): + return list(self.config["experiments"].keys()) + def _get_container_position(self, ses, blsession, proposalid, beamline): shipment_name = "Simulation_Shipment" shipment = ( @@ -97,27 +101,23 @@ def _get_container_position(self, ses, blsession, proposalid, beamline): return container.containerId, max_loc + 1 - def run(self, beamline, experiment_type, experiment_no=0, delay=0): + def run(self, beamline, experiment, delay=0): blses = self.config["sessions"][beamline] - if experiment_type not in self.config["experiments"]: - raise KeyError(f"No such experiment type {experiment_type}") - - if experiment_no > len(self.config["experiments"][experiment_type]): - raise KeyError( - f"Invalid experiment number {experiment_no}, {len(self.config['experiments'][experiment_type])} exps available" - ) + if experiment not in self.config["experiments"]: + raise KeyError(f"No such experiment type {experiment}") - exp = self.config["experiments"][experiment_type][experiment_no] + exp = self.config["experiments"][experiment] data = os.path.join(self.config["raw_data"], exp["data"]) if not os.path.exists(data): raise AttributeError(f"Raw data file: {data} does not exist") + if not exp.get("experimentType"): + raise AttributeError(f"No experiment type defined for experiment {experiment}") + if not exp.get("sample"): - raise KeyError( - f"No sample specified for experiment {experiment_type}:{experiment_no}" - ) + raise KeyError(f"No sample specified for experiment {experiment}") if exp["sample"] not in self.config["samples"]: raise KeyError( @@ -236,8 +236,10 @@ def run(self, beamline, experiment_type, experiment_no=0, delay=0): logger.debug("Creating datacollection group") dcg = isa.DataCollectionGroup( sessionId=blsession.sessionId, - experimentType=experiment_type, + experimentType=exp["experimentType"], blSampleId=blsample.blSampleId, + startTime=datetime.now(), + endTime=datetime.now() + timedelta(minutes=5), ) ses.add(dcg) ses.commit() @@ -249,7 +251,6 @@ def run(self, beamline, experiment_type, experiment_no=0, delay=0): blSubSampleId=subsampleid, dataCollectionGroupId=dcg.dataCollectionGroupId, fileTemplate=os.path.basename(exp["data"]), - imageDirectory=os.path.dirname(exp["data"]), imageContainerSubPath=exp.get( "imageContainerSubPath", "1.1/measurement" ), @@ -309,33 +310,35 @@ def run(self, beamline, experiment_type, experiment_no=0, delay=0): ) # Link data files / snapshots - link = self.config.get("copy_method", "copy") == "link" - if link: - logger.debug("Linking data") - os.link(data, os.path.join(data_dir, os.path.basename(data))) + if self.config.get("copy_method", "copy") == "link": + action, link_or_copy = "Linking", os.link else: - logger.debug("Copying data") - shutil.copy(data, os.path.join(data_dir, os.path.basename(data))) + action, link_or_copy = "Coyping", shutil.copy - snapshot_path = os.path.join( - self.config["raw_data"], exp.get("xtalSnapshotFullPath1") - ) - if snapshot_path: - if os.path.exists(snapshot_path): - snapshot = os.path.join(data_dir, os.path.basename(snapshot_path)) - if link: - logger.debug("Linking snapshot from '%s' to '%s'", snapshot_path, snapshot) - os.link(snapshot_path, snapshot) - else: - logger.debug("Copying snapshot from '%s' to '%s'", snapshot_path, snapshot) - shutil.copy(snapshot_path, snapshot) - - snap, snap_extension = os.path.splitext(snapshot_path) - thumb = f"{snap}t{snap_extension}" - if os.path.exists(thumb): - if link: - logger.debug("Linking thumbnail from '%s'", thumb) - os.link( + logger.debug(f"{action} data") + link_or_copy(data, os.path.join(data_dir, os.path.basename(data))) + + for snapshot in [f"xtalSnapshotFullPath{i}" for i in range(4)]: + if exp.get(snapshot): + snapshot_path = os.path.join( + self.config["raw_data"], exp.get(snapshot) + ) + if os.path.exists(snapshot_path): + new_snapshot = os.path.join( + data_dir, os.path.basename(snapshot_path) + ) + logger.debug( + f"{action} snapshot from '%s' to '%s'", + snapshot_path, + new_snapshot, + ) + link_or_copy(snapshot_path, new_snapshot) + + snap, snap_extension = os.path.splitext(snapshot_path) + thumb = f"{snap}t{snap_extension}" + if os.path.exists(thumb): + logger.debug(f"{action} thumbnail from '%s'", thumb) + link_or_copy( thumb, os.path.join( data_dir, @@ -343,21 +346,12 @@ def run(self, beamline, experiment_type, experiment_no=0, delay=0): ), ) else: - logger.debug("Copying thumbnail from '%s'", thumb) - shutil.copy( - thumb, - os.path.join( - data_dir, - f"{os.path.basename(snap)}t{snap_extension}", - ), - ) - else: - logger.warning(f"Snapshot thumbnail does not exist {thumb}") + logger.warning(f"Snapshot thumbnail does not exist {thumb}") - dc.xtalSnapshotFullPath1 = snapshot - ses.commit() - else: - logger.warning(f"Snapshot file does not exist {snapshot_path}") + setattr(dc, snapshot, new_snapshot) + ses.commit() + else: + logger.warning(f"Snapshot file does not exist {snapshot_path}") logger.info(f"Finshed copying data to: {data_dir}") From 9cd12d6be69de417a113dd420056fb9e55eecf72 Mon Sep 17 00:00:00 2001 From: Stuart Fisher Date: Tue, 12 Oct 2021 16:33:41 +0200 Subject: [PATCH 11/18] rename --- src/ispyb/simulation/datacollection.py | 2 +- src/ispyb/simulation/{sqla_helpers.py => sqlalchemy_helpers.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename src/ispyb/simulation/{sqla_helpers.py => sqlalchemy_helpers.py} (100%) diff --git a/src/ispyb/simulation/datacollection.py b/src/ispyb/simulation/datacollection.py index 0040d941..ffd476b6 100644 --- a/src/ispyb/simulation/datacollection.py +++ b/src/ispyb/simulation/datacollection.py @@ -7,7 +7,7 @@ import ispyb.sqlalchemy as isa from ispyb.simulation.base import Simulation -from ispyb.simulation.sqla_helpers import session, proposal +from ispyb.simulation.sqlalchemy_helpers import session, proposal logger = logging.getLogger(__name__) diff --git a/src/ispyb/simulation/sqla_helpers.py b/src/ispyb/simulation/sqlalchemy_helpers.py similarity index 100% rename from src/ispyb/simulation/sqla_helpers.py rename to src/ispyb/simulation/sqlalchemy_helpers.py From 12c7747198b7287161fac6466070f1cee6a3f749 Mon Sep 17 00:00:00 2001 From: Stuart Fisher Date: Tue, 12 Oct 2021 16:33:50 +0200 Subject: [PATCH 12/18] import --- src/ispyb/simulation/sqlalchemy_helpers.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/ispyb/simulation/sqlalchemy_helpers.py b/src/ispyb/simulation/sqlalchemy_helpers.py index 116a3b96..0e9bf081 100644 --- a/src/ispyb/simulation/sqlalchemy_helpers.py +++ b/src/ispyb/simulation/sqlalchemy_helpers.py @@ -1,13 +1,13 @@ -from sqlalchemy import func -import ispyb.sqlalchemy as isa +import sqlalchemy +import ispyb.sqlalchemy -session = func.concat( - isa.Proposal.proposalCode, - isa.Proposal.proposalNumber, +session = sqlalchemy.func.concat( + ispyb.sqlalchemy.Proposal.proposalCode, + ispyb.sqlalchemy.Proposal.proposalNumber, "-", - isa.BLSession.visit_number, + ispyb.sqlalchemy.BLSession.visit_number, ).label("session") -proposal = func.concat(isa.Proposal.proposalCode, isa.Proposal.proposalNumber).label( - "proposal" -) +proposal = sqlalchemy.func.concat( + ispyb.sqlalchemy.Proposal.proposalCode, ispyb.sqlalchemy.Proposal.proposalNumber +).label("proposal") From 68d656f51948bba666143f9904de0da7941be1a9 Mon Sep 17 00:00:00 2001 From: Stuart Fisher Date: Tue, 12 Oct 2021 16:37:14 +0200 Subject: [PATCH 13/18] outdated --- docs/simulate.rst | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docs/simulate.rst b/docs/simulate.rst index 4ad4c436..ea6bd60b 100644 --- a/docs/simulate.rst +++ b/docs/simulate.rst @@ -9,10 +9,6 @@ Simulate a data collection:: ispyb.simulate ispyb.simulate bm23 'Energy scan' -If multiple experiments of the same type are specified one can be chosen with the `--number` flag:: - - ispyb.simulate --number 2 bm23 'Energy scan' - The simulator will create hierarchically a component (`Protein`), related `BLSample` (with intermediate `Crystal`), and potentially a `SubSample`, contained within a `Container`, `Dewar`, and `Shipment` belonging to the specified `Proposal` if they do not already exist with the defined name. Then the simulator creates a `DataCollection` and `DataCollectionGroup`, linked to the relevant `BLSample` and `BLSession`. If grid info information is specified it will also create an entry in `GridInfo` From a495af37654429d55fef83b0d68642c5cc01874c Mon Sep 17 00:00:00 2001 From: Stuart Fisher Date: Tue, 12 Oct 2021 16:37:47 +0200 Subject: [PATCH 14/18] update doc --- docs/simulate.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/simulate.rst b/docs/simulate.rst index ea6bd60b..3380dea3 100644 --- a/docs/simulate.rst +++ b/docs/simulate.rst @@ -6,8 +6,8 @@ ispyb.simulate Simulate a data collection:: - ispyb.simulate - ispyb.simulate bm23 'Energy scan' + ispyb.simulate + ispyb.simulate bm23 energy_scan1 The simulator will create hierarchically a component (`Protein`), related `BLSample` (with intermediate `Crystal`), and potentially a `SubSample`, contained within a `Container`, `Dewar`, and `Shipment` belonging to the specified `Proposal` if they do not already exist with the defined name. Then the simulator creates a `DataCollection` and `DataCollectionGroup`, linked to the relevant `BLSample` and `BLSession`. If grid info information is specified it will also create an entry in `GridInfo` From b767a8a270e0125c8e5338bf587714acca89354c Mon Sep 17 00:00:00 2001 From: Stuart Fisher Date: Tue, 12 Oct 2021 16:39:21 +0200 Subject: [PATCH 15/18] update arg doc --- src/ispyb/cli/simulate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ispyb/cli/simulate.py b/src/ispyb/cli/simulate.py index 458ec97a..d657c65a 100644 --- a/src/ispyb/cli/simulate.py +++ b/src/ispyb/cli/simulate.py @@ -28,7 +28,7 @@ def run(): parser.add_argument( "experiment", - help=f"Experiment type to simluate", + help=f"Experiment to simluate", choices=sdc.experiments ) From 322fe16551e24ec5bce1e2d3e2279f37c12c5c05 Mon Sep 17 00:00:00 2001 From: Stuart Fisher Date: Tue, 12 Oct 2021 16:39:41 +0200 Subject: [PATCH 16/18] arg doc --- src/ispyb/cli/simulate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ispyb/cli/simulate.py b/src/ispyb/cli/simulate.py index d657c65a..ed78c1fe 100644 --- a/src/ispyb/cli/simulate.py +++ b/src/ispyb/cli/simulate.py @@ -37,7 +37,7 @@ def run(): default=5, type=int, dest="delay", - help="Delay between mimas start and end events", + help="Delay between plugin start and end events", ) parser.add_argument( "--debug", From 96c5bb6f7adc7f52394d34d40b2f8b89e96520b8 Mon Sep 17 00:00:00 2001 From: Stuart Fisher Date: Tue, 12 Oct 2021 17:00:03 +0200 Subject: [PATCH 17/18] load config in cli side --- src/ispyb/cli/simulate.py | 23 ++++++++++++----------- src/ispyb/simulation/base.py | 15 ++++----------- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/src/ispyb/cli/simulate.py b/src/ispyb/cli/simulate.py index ed78c1fe..d31d0291 100644 --- a/src/ispyb/cli/simulate.py +++ b/src/ispyb/cli/simulate.py @@ -1,5 +1,6 @@ import argparse import logging +import os from ispyb.simulation.datacollection import SimulateDataCollection @@ -14,22 +15,24 @@ def run(): + config_yml = os.getenv("ISPYB_SIMULATE_CONFIG") + if not config_yml: + raise RuntimeError( + "`ISPYB_SIMULATE_CONFIG` environment variable is not defined" + ) + try: - sdc = SimulateDataCollection() + sdc = SimulateDataCollection(config_yml) except AttributeError as e: exit(f"Simulation Error: {e}") parser = argparse.ArgumentParser(description="ISPyB simulation tool") parser.add_argument( - "beamline", - help=f"Beamline to run simulation against", - choices=sdc.beamlines + "beamline", help=f"Beamline to run simulation against", choices=sdc.beamlines ) parser.add_argument( - "experiment", - help=f"Experiment to simluate", - choices=sdc.experiments + "experiment", help=f"Experiment to simluate", choices=sdc.experiments ) parser.add_argument( @@ -56,12 +59,10 @@ def run(): root.setLevel(level=logging.DEBUG if args.debug else logging.INFO) try: - sdc.do_run( - args.beamline, args.experiment, delay=args.delay - ) + sdc.do_run(args.beamline, args.experiment, delay=args.delay) except Exception as e: if args.debug: logger.exception("Simulation Error") print(e) else: - print(f"Simulation Error: {str(e)}") + print(f"Simulation Error: {e}") diff --git a/src/ispyb/simulation/base.py b/src/ispyb/simulation/base.py index 6ddde8ce..37bf5776 100644 --- a/src/ispyb/simulation/base.py +++ b/src/ispyb/simulation/base.py @@ -13,24 +13,17 @@ logger = logging.getLogger(__name__) -def load_config(): - try: - config_yml = os.environ["ISPYB_SIMULATE_CONFIG"] - except KeyError: - raise AttributeError( - "ISPYB_SIMULATE_CONFIG environment variable is not defined" - ) - +def load_config(config_yml): if not os.path.exists(config_yml): - raise AttributeError(f"Cannot find config file: {config_yml}") + raise RuntimeError(f"Cannot find config file: {config_yml}") with open(config_yml, "r") as stream: return yaml.safe_load(stream) class Simulation(ABC): - def __init__(self): - self._config = load_config() + def __init__(self, config_yml): + self._config = load_config(config_yml) @property def config(self): From 7e0434d1bdcdad268e95e42821e3c13606e3faec Mon Sep 17 00:00:00 2001 From: Stuart Fisher Date: Tue, 12 Oct 2021 17:01:54 +0200 Subject: [PATCH 18/18] typo --- conf/simulate_example.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/simulate_example.yml b/conf/simulate_example.yml index 77f5b00d..c9d7fd83 100644 --- a/conf/simulate_example.yml +++ b/conf/simulate_example.yml @@ -15,7 +15,7 @@ ispyb_url: https://ispyb.diamond.ac.uk # Define Components (Proteins) components: - # an internal references for the component + # an internal reference for the component comp1: # columns to populate for this component acronym: Component1