diff --git a/qiskit_experiments/database_service/device_component.py b/qiskit_experiments/database_service/device_component.py index 2d0bbdca59..a85baff4c9 100644 --- a/qiskit_experiments/database_service/device_component.py +++ b/qiskit_experiments/database_service/device_component.py @@ -81,9 +81,10 @@ def to_component(string: str) -> DeviceComponent: Raises: ValueError: If input string is not a valid device component. """ + if isinstance(string, DeviceComponent): + return string if string.startswith("Q"): return Qubit(int(string[1:])) - elif string.startswith("R"): + if string.startswith("R"): return Resonator(int(string[1:])) - else: - return UnknownComponent(string) + return UnknownComponent(string) diff --git a/qiskit_experiments/database_service/utils.py b/qiskit_experiments/database_service/utils.py index 2a388a6acb..81740546ca 100644 --- a/qiskit_experiments/database_service/utils.py +++ b/qiskit_experiments/database_service/utils.py @@ -19,12 +19,14 @@ from abc import ABC, abstractmethod from collections import OrderedDict from datetime import datetime, timezone -from typing import Callable, Tuple, Dict, Any, Union, Type, Optional +from typing import Callable, Tuple, List, Dict, Any, Union, Type, Optional import json +import pandas as pd import dateutil.parser import pkg_resources from dateutil import tz + from qiskit.version import __version__ as terra_version from qiskit_ibm_experiment import ( @@ -276,3 +278,199 @@ def append(self, value): """Append to the list.""" with self._lock: self._container.append(value) + + +class ThreadSafeDataFrame(ThreadSafeContainer): + """Thread safe data frame. + + This class wraps pandas dataframe with predefined column labels, + which is specified by the class method `_default_columns`. + Subclass can override this method to provide default labels specific to its data structure. + + This object is expected to be used internally in the ExperimentData. + """ + + def __init__(self, init_values=None): + """ThreadSafeContainer constructor.""" + self._columns = self._default_columns() + self._extra = [] + super().__init__(init_values) + + @classmethod + def _default_columns(cls) -> List[str]: + return [] + + def _init_container(self, init_values: Optional[Union[Dict, pd.DataFrame]] = None): + """Initialize the container.""" + if init_values is None: + return pd.DataFrame(columns=self.get_columns()) + if isinstance(init_values, pd.DataFrame): + input_columns = list(init_values.columns) + if input_columns != self.get_columns(): + raise ValueError( + f"Input data frame contains unexpected columns {input_columns}. " + f"{self.__class__.__name__} defines {self.get_columns()} as default columns." + ) + return init_values + if isinstance(init_values, dict): + return pd.DataFrame.from_dict( + data=init_values, + orient="index", + columns=self.get_columns(), + ) + raise TypeError(f"Initial value of {type(init_values)} is not valid data type.") + + def get_columns(self) -> List[str]: + """Return current column names. + + Returns: + List of column names. + """ + with self._lock: + return self._columns.copy() + + def add_columns(self, *new_columns: str, default_value: Any = None): + """Add new columns to the table. + + This operation mutates the current container. + + Args: + new_columns: Name of columns to add. + default_value: Default value to fill added columns. + """ + with self._lock: + # Order sensitive + new_columns = [c for c in new_columns if c not in self.get_columns()] + if len(new_columns) == 0: + return + + # Update columns + for new_column in new_columns: + self._container.insert(len(self._container.columns), new_column, default_value) + self._columns.extend(new_columns) + self._extra.extend(new_columns) + + def clear(self): + """Remove all elements from this container.""" + with self._lock: + self._container = self._init_container() + self._columns = self._default_columns() + self._extra = [] + + def container( + self, + collapse_extra: bool = True, + ) -> pd.DataFrame: + """Return bare pandas dataframe. + + Args: + collapse_extra: Set True to show only default columns. + + Returns: + Bare pandas dataframe. This object is no longer thread safe. + """ + with self._lock: + container = self._container.copy() + + if collapse_extra: + return container[self._default_columns()] + return container + + def drop_entry( + self, + index: str, + ): + """Drop entry from the dataframe. + + Args: + index: Name of entry to drop. + + Raises: + ValueError: When index is not in this table. + """ + with self._lock: + if index not in self._container.index: + raise ValueError(f"Table index {index} doesn't exist in this table.") + self._container.drop(index, inplace=True) + + def get_entry( + self, + index: str, + ) -> pd.Series: + """Get entry from the dataframe. + + Args: + index: Name of entry to acquire. + + Returns: + Pandas Series of acquired entry. This doesn't mutate the table. + + Raises: + ValueError: When index is not in this table. + """ + with self._lock: + if index not in self._container.index: + raise ValueError(f"Table index {index} doesn't exist in this table.") + + return self._container.loc[index] + + def add_entry( + self, + index: str, + **kwargs, + ) -> pd.Series: + """Add new entry to the dataframe. + + Args: + index: Name of this entry. Must be unique in this table. + kwargs: Description of new entry to register. + + Returns: + Pandas Series of added entry. This doesn't mutate the table. + + Raises: + ValueError: When index is not unique in this table. + """ + with self._lock: + if index in self._container.index: + raise ValueError(f"Table index {index} already exists in the table.") + + if kwargs.keys() - set(self.get_columns()): + self.add_columns(*kwargs.keys()) + + template = dict.fromkeys(self.get_columns()) + template.update(kwargs) + + if not isinstance(index, str): + index = str(index) + self._container.loc[index] = list(template.values()) + + return self._container.iloc[-1] + + def _repr_html_(self) -> Union[str, None]: + """Return HTML representation of this dataframe.""" + with self._lock: + # Remove underscored columns. + return self._container._repr_html_() + + def __json_encode__(self) -> Dict[str, Any]: + with self._lock: + return { + "class": "ThreadSafeDataFrame", + "data": self._container.to_dict(orient="index"), + "columns": self._columns, + "extra": self._extra, + } + + @classmethod + def __json_decode__(cls, value: Dict[str, Any]) -> "ThreadSafeDataFrame": + if not value.get("class", None) == "ThreadSafeDataFrame": + raise ValueError("JSON decoded value for ThreadSafeDataFrame is not valid class type.") + + instance = object.__new__(cls) + # Need to update self._columns first to set extra columns in the dataframe container. + instance._columns = value.get("columns", cls._default_columns()) + instance._extra = value.get("extra", []) + instance._lock = threading.RLock() + instance._container = instance._init_container(init_values=value.get("data", {})) + return instance diff --git a/qiskit_experiments/framework/__init__.py b/qiskit_experiments/framework/__init__.py index f76c4cad55..c9a480e09e 100644 --- a/qiskit_experiments/framework/__init__.py +++ b/qiskit_experiments/framework/__init__.py @@ -86,6 +86,7 @@ AnalysisStatus AnalysisResult AnalysisResultData + AnalysisResultTable ExperimentConfig AnalysisConfig ExperimentEncoder @@ -137,6 +138,7 @@ from .backend_timing import BackendTiming from .configs import ExperimentConfig, AnalysisConfig from .analysis_result_data import AnalysisResultData +from .analysis_result_table import AnalysisResultTable from .experiment_data import ExperimentData from .composite import ( ParallelExperiment, diff --git a/qiskit_experiments/framework/analysis_result_data.py b/qiskit_experiments/framework/analysis_result_data.py index b4d6f6aac6..e957bea336 100644 --- a/qiskit_experiments/framework/analysis_result_data.py +++ b/qiskit_experiments/framework/analysis_result_data.py @@ -16,6 +16,9 @@ import logging from typing import Optional, Dict, Any, List +from qiskit_experiments.database_service.device_component import DeviceComponent + + LOG = logging.getLogger(__name__) @@ -23,14 +26,70 @@ class AnalysisResultData: """Dataclass for experiment analysis results""" - # TODO: move stderr and unit into custom value class name: str value: Any + experiment: str = None chisq: Optional[float] = None quality: Optional[str] = None + experiment_id: Optional[str] = None + result_id: Optional[str] = None + tags: List = dataclasses.field(default_factory=list) + backend: Optional[str] = None + run_time: Optional[str] = None + created_time: Optional[str] = None extra: Dict[str, Any] = dataclasses.field(default_factory=dict, hash=False, compare=False) device_components: List = dataclasses.field(default_factory=list) + @classmethod + def from_table_element( + cls, + name: str, + value: Any, + experiment: Optional[str] = None, + components: Optional[List[DeviceComponent]] = None, + quality: Optional[str] = None, + experiment_id: Optional[str] = None, + result_id: Optional[str] = None, + tags: Optional[List[str]] = None, + backend: Optional[str] = None, + run_time: Optional[str] = None, + created_time: Optional[str] = None, + **extra, + ): + """A factory method of AnalysisResultData from a single element in AnalysisResultTable. + + Args: + name: Name of this entity. + value: Result value. + experiment: Type of experiment. + components: Device component that the experiment was run on. + quality: Quality of this result. + experiment_id: ID of associated experiment. + result_id: Unique ID of this data entry in the storage. + tags: List of tags. + backend: Device name that the experiment was run on. + run_time: A time at the experiment was run. + created_time: A time at this value was computed. + **extra: Extra information. + """ + chisq = extra.pop("chisq", None) + + return AnalysisResultData( + name=name, + value=value, + experiment=experiment, + chisq=chisq, + quality=quality, + experiment_id=experiment_id, + result_id=result_id, + tags=tags, + backend=backend, + run_time=run_time, + created_time=created_time, + device_components=components, + extra=extra, + ) + def __str__(self): out = f"{self.name}:" out += f"\n- value:{self.value}" @@ -47,3 +106,35 @@ def __str__(self): def __iter__(self): """Return iterator of data fields (attr, value)""" return iter((field.name, getattr(self, field.name)) for field in dataclasses.fields(self)) + + +def as_table_element( + result_data: AnalysisResultData, +) -> Dict[str, Any]: + """Python dataclass as_dict-like function to return + canonical data for analysis AnalysisResultTable. + + Args: + result_data: AnalysisResultData dataclass to format. + + Returns: + Formatted data representation in dictionary format. + """ + out = { + "name": result_data.name, + "experiment": result_data.experiment, + "components": result_data.device_components, + "value": result_data.value, + "quality": result_data.quality, + "experiment_id": result_data.experiment_id, + "result_id": result_data.result_id, + "tags": result_data.tags, + "backend": result_data.backend, + "run_time": result_data.run_time, + "created_time": result_data.created_time, + } + if result_data.chisq is not None: + out["chisq"] = result_data.chisq + out.update(result_data.extra) + + return out diff --git a/qiskit_experiments/framework/analysis_result_table.py b/qiskit_experiments/framework/analysis_result_table.py new file mode 100644 index 0000000000..053655a2a7 --- /dev/null +++ b/qiskit_experiments/framework/analysis_result_table.py @@ -0,0 +1,176 @@ +# This code is part of Qiskit. +# +# (C) Copyright IBM 2023. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""Table representation of analysis results.""" + +import logging +import re +import uuid +import warnings +from typing import List, Union, Optional + +import pandas as pd + +from qiskit_experiments.database_service.utils import ThreadSafeDataFrame + +LOG = logging.getLogger(__name__) + + +class AnalysisResultTable(ThreadSafeDataFrame): + """Table form container of analysis results. + + This table is a dataframe wrapper with the thread-safe mechanism with predefined columns. + This object is attached to the :class:`.ExperimentData` container to store + analysis results. Each table row contains series of metadata in addition to the + result value itself. + + User can rely on the dataframe filtering mechanism to analyze large scale experiment + results, e.g. massive parallel experiment and batch experiment outcomes, efficiently. + See `pandas dataframe documentation `_ + for more details. + """ + + VALID_ID_REGEX = re.compile(r"\A(?P\w{8})-\w{4}-\w{4}-\w{4}-\w{12}\Z") + + @classmethod + def _default_columns(cls) -> List[str]: + return [ + "name", + "experiment", + "components", + "value", + "quality", + "experiment_id", + "result_id", + "tags", + "backend", + "run_time", + "created_time", + ] + + def result_ids(self) -> List[str]: + """Return all result IDs in this table.""" + with self._lock: + return self._container["result_id"].to_list() + + def filter_columns(self, columns: Union[str, List[str]]) -> List[str]: + """Filter columns names available in this table. + + Args: + columns: Specifying a set of columns to return. You can pass a list of each + column name to return, otherwise builtin column groups are available. + + * "all": Return all columns, including metadata to communicate + with experiment service, such as entry IDs. + * "default": Return columns including analysis result with supplementary + information about experiment. + * "minimal": Return only analysis subroutine returns. + + Raises: + ValueError: When column is given in string which doesn't match with any builtin group. + """ + with self._lock: + if columns == "all": + return self._columns + if columns == "default": + return [ + "name", + "experiment", + "components", + "value", + "quality", + "backend", + "run_time", + ] + self._extra + if columns == "minimal": + return [ + "name", + "components", + "value", + "quality", + ] + self._extra + if not isinstance(columns, str): + out = [] + for column in columns: + if column in self._columns: + out.append(column) + else: + warnings.warn( + f"Specified column name {column} does not exist in this table.", + UserWarning, + ) + return out + raise ValueError( + f"Column group {columns} is not valid name. Use either 'all', 'default', 'minimal'." + ) + + # pylint: disable=arguments-renamed + def add_entry( + self, + result_id: Optional[str] = None, + **kwargs, + ) -> pd.Series: + """Add new entry to the table. + + Args: + result_id: Result ID. Automatically generated when not provided. + This must be valid hexadecimal UUID string. + kwargs: Description of new entry to register. + + Returns: + Pandas Series of added entry. This doesn't mutate the table. + + Raises: + ValueError: When the truncated result id causes a collision in the table. + """ + if not result_id: + result_id = self._unique_table_index() + + matched = self.VALID_ID_REGEX.match(result_id) + if matched is None: + warnings.warn( + f"The result ID {result_id} is not a valid result ID string. " + "This entry might fail in saving with the experiment service.", + UserWarning, + ) + short_id = result_id[:8] + else: + # Short unique index is generated from result id. + # Showing full result id unnecessary occupies horizontal space of the html table. + # This mechanism is similar with the github commit hash. + short_id = matched.group("short_id") + + with self._lock: + if short_id in self._container.index: + raise ValueError( + f"The short ID of the result_id '{short_id}' already exists in the " + "experiment data. Please use another ID to avoid index collision." + ) + + return super().add_entry( + index=short_id, + result_id=result_id, + **kwargs, + ) + + def _unique_table_index(self): + """Generate unique UUID which is unique in the table with first 8 characters.""" + with self._lock: + n = 0 + while n < 1000: + tmp_id = str(uuid.uuid4()) + if tmp_id[:8] not in self._container.index: + return tmp_id + raise RuntimeError( + "Unique result_id string cannot be prepared for this table within 1000 trials. " + "Reduce number of entries, or manually provide a unique result_id." + ) diff --git a/qiskit_experiments/framework/base_analysis.py b/qiskit_experiments/framework/base_analysis.py index 53101a0856..7122b6f548 100644 --- a/qiskit_experiments/framework/base_analysis.py +++ b/qiskit_experiments/framework/base_analysis.py @@ -15,15 +15,17 @@ from abc import ABC, abstractmethod import copy from collections import OrderedDict +from datetime import datetime from typing import List, Tuple, Union, Dict +from dateutil import tz + from qiskit_experiments.database_service.device_component import Qubit from qiskit_experiments.framework import Options from qiskit_experiments.framework.store_init_args import StoreInitArgs -from qiskit_experiments.framework.experiment_data import ExperimentData +from qiskit_experiments.framework.experiment_data import ExperimentData, FigureData from qiskit_experiments.framework.configs import AnalysisConfig -from qiskit_experiments.framework.analysis_result_data import AnalysisResultData -from qiskit_experiments.framework.analysis_result import AnalysisResult +from qiskit_experiments.framework.analysis_result_data import AnalysisResultData, as_table_element class BaseAnalysis(ABC, StoreInitArgs): @@ -153,8 +155,6 @@ def run( if not replace_results and _requires_copy(experiment_data): experiment_data = experiment_data.copy() - experiment_components = self._get_experiment_components(experiment_data) - # Set Analysis options if not options: analysis = self @@ -162,23 +162,52 @@ def run( analysis = self.copy() analysis.set_options(**options) - def run_analysis(expdata): + def run_analysis(expdata: ExperimentData): # Clearing previous analysis data experiment_data._clear_results() - # making new analysis + + # Making new analysis results, figures = analysis._run_analysis(expdata) - # Add components - analysis_results = [ - analysis._format_analysis_result( - result, expdata.experiment_id, experiment_components - ) - for result in results - ] - # Update experiment data with analysis results - if analysis_results: - expdata.add_analysis_results(analysis_results) + + if results: + for result in results: + # Populate missing data fields + if not result.experiment_id: + result.experiment_id = expdata.experiment_id + if not result.experiment: + result.experiment = expdata.experiment_type + if not result.device_components: + result.device_components = self._get_experiment_components(expdata) + if not result.backend: + result.backend = expdata.backend_name + if not result.created_time: + result.created_time = datetime.now(tz.tzlocal()) + if not result.run_time: + result.run_time = expdata.running_time + + # To canonical kwargs to add to the analysis table. + table_format = as_table_element(result) + + # Remove result_id to make sure the id is unique in the scope of the container. + # This will let the container generate a unique id. + del table_format["result_id"] + + expdata.add_analysis_results(**table_format) + if figures: - expdata.add_figures(figures, figure_names=self.options.figure_names) + figure_to_add = [] + for figure in figures: + if not isinstance(figure, FigureData): + qubits_repr = "_".join( + map(str, expdata.metadata.get("device_components", [])[:5]) + ) + short_id = expdata.experiment_id[:8] + figure = FigureData( + figure=figure, + name=f"{expdata.experiment_type}_{qubits_repr}_{short_id}.svg", + ) + figure_to_add.append(figure) + expdata.add_figures(figure_to_add, figure_names=self.options.figure_names) experiment_data.add_analysis_callback(run_analysis) @@ -195,30 +224,6 @@ def _get_experiment_components(self, experiment_data: ExperimentData): return experiment_components - def _format_analysis_result(self, data, experiment_id, experiment_components=None): - """Format run analysis result to DbAnalysisResult""" - device_components = [] - if data.device_components: - device_components = data.device_components - elif experiment_components: - device_components = experiment_components - - if isinstance(data, AnalysisResult): - # Update device components and experiment id - data.device_components = device_components - data.experiment_id = experiment_id - return data - - return AnalysisResult( - name=data.name, - value=data.value, - device_components=device_components, - experiment_id=experiment_id, - chisq=data.chisq, - quality=data.quality, - extra=data.extra, - ) - @abstractmethod def _run_analysis( self, diff --git a/qiskit_experiments/framework/composite/composite_analysis.py b/qiskit_experiments/framework/composite/composite_analysis.py index 3e030ea993..85e8baf0a0 100644 --- a/qiskit_experiments/framework/composite/composite_analysis.py +++ b/qiskit_experiments/framework/composite/composite_analysis.py @@ -140,15 +140,18 @@ def _run_analysis(self, experiment_data: ExperimentData): # Optionally flatten results from all component experiments # for adding to the main experiment data container if self._flatten_results: - return self._combine_results(component_expdata) - + analysis_results, figures = self._combine_results(component_expdata) + for res in analysis_results: + # Override experiment ID because entries are flattened + res.experiment_id = experiment_data.experiment_id + return analysis_results, figures return [], [] def _component_experiment_data(self, experiment_data: ExperimentData) -> List[ExperimentData]: """Return a list of marginalized experiment data for component experiments. Args: - experiment_data: a composite experiment experiment data container. + experiment_data: a composite experiment data container. Returns: The list of analysis-ready marginalized experiment data for each @@ -340,7 +343,8 @@ def _set_flatten_results(self): analysis._set_flatten_results() def _combine_results( - self, component_experiment_data: List[ExperimentData] + self, + component_experiment_data: List[ExperimentData], ) -> Tuple[List[AnalysisResultData], List["matplotlib.figure.Figure"]]: """Combine analysis results from component experiment data. @@ -355,15 +359,22 @@ def _combine_results( """ analysis_results = [] figures = [] - for i, sub_expdata in enumerate(component_experiment_data): + for sub_expdata in component_experiment_data: figures += sub_expdata._figures.values() - for result in sub_expdata.analysis_results(): - # Add metadata to distinguish the component experiment - # the result was generated from - result.extra["component_experiment"] = { - "experiment_type": sub_expdata.experiment_type, - "component_index": i, - } - analysis_results.append(result) + + # Convert Dataframe Series back into AnalysisResultData + # This is due to limitation that _run_analysis must return List[AnalysisResultData], + # and some composite analysis such as TphiAnalysis overrides this method to + # return extra quantity computed from sub analysis results. + # This produces unnecessary data conversion. + # The _run_analysis mechanism seems just complicating the entire logic. + # Since it's impossible to deprecate the usage of this protected method, + # we should implement new CompositeAnalysis class with much more efficient + # internal logic. Note that the child data structure is no longer necessary + # because dataframe offers more efficient data filtering mechanisms. + analysis_table = sub_expdata.analysis_results(columns="all", dataframe=True) + for _, series in analysis_table.iterrows(): + data = AnalysisResultData.from_table_element(**series.to_dict()) + analysis_results.append(data) return analysis_results, figures diff --git a/qiskit_experiments/framework/experiment_data.py b/qiskit_experiments/framework/experiment_data.py index bbd11cd6c7..5e12163fc9 100644 --- a/qiskit_experiments/framework/experiment_data.py +++ b/qiskit_experiments/framework/experiment_data.py @@ -21,7 +21,7 @@ from datetime import datetime, timezone from concurrent import futures from threading import Event -from functools import wraps +from functools import wraps, singledispatch from collections import deque import contextlib import copy @@ -33,6 +33,7 @@ import json import traceback import numpy as np +import pandas as pd from dateutil import tz from matplotlib import pyplot from matplotlib.figure import Figure as MatplotlibFigure @@ -40,9 +41,14 @@ from qiskit.providers.jobstatus import JobStatus, JOB_FINAL_STATES from qiskit.exceptions import QiskitError from qiskit.providers import Job, Backend, Provider +from qiskit.utils.deprecation import deprecate_arg -from qiskit_ibm_experiment import IBMExperimentService -from qiskit_ibm_experiment import ExperimentData as ExperimentDataclass +from qiskit_ibm_experiment import ( + IBMExperimentService, + ExperimentData as ExperimentDataclass, + AnalysisResultData as AnalysisResultDataclass, + ResultQuality, +) from qiskit_experiments.framework.json import ExperimentEncoder, ExperimentDecoder from qiskit_experiments.database_service.utils import ( qiskit_version, @@ -50,7 +56,10 @@ ThreadSafeOrderedDict, ThreadSafeList, ) +from qiskit_experiments.database_service.device_component import to_component, DeviceComponent from qiskit_experiments.framework.analysis_result import AnalysisResult +from qiskit_experiments.framework.analysis_result_data import AnalysisResultData +from qiskit_experiments.framework.analysis_result_table import AnalysisResultTable from qiskit_experiments.framework import BackendData from qiskit_experiments.database_service.exceptions import ( ExperimentDataError, @@ -132,7 +141,8 @@ def __init__(self, figure, name=None, metadata=None): Args: figure: the raw figure itself. Can be SVG or matplotlib.Figure. name: Optional, the name of the figure. - metadata: Optional, any metadata to be stored with the figure.""" + metadata: Optional, any metadata to be stored with the figure. + """ self.figure = figure self._name = name self.metadata = metadata or {} @@ -186,6 +196,9 @@ def _repr_svg_(self): return None +_FigureT = Union[str, bytes, MatplotlibFigure, FigureData] + + class ExperimentData: """Experiment data container class. @@ -326,6 +339,7 @@ def __init__( # job handling related self._jobs = ThreadSafeOrderedDict(job_ids) self._job_futures = ThreadSafeOrderedDict() + self._running_time = None self._analysis_callbacks = ThreadSafeOrderedDict() self._analysis_futures = ThreadSafeOrderedDict() # Set 2 workers for analysis executor so there can be 1 actively running @@ -338,7 +352,7 @@ def __init__( # data storage self._result_data = ThreadSafeList() self._figures = ThreadSafeOrderedDict(self._db_data.figure_names) - self._analysis_results = ThreadSafeOrderedDict() + self._analysis_results = AnalysisResultTable() self._deleted_figures = deque() self._deleted_analysis_results = deque() @@ -407,7 +421,7 @@ def creation_datetime(self) -> datetime: in the local timezone. """ - return utc_to_local(self._db_data.creation_datetime) + return self._db_data.creation_datetime @property def start_datetime(self) -> datetime: @@ -417,11 +431,11 @@ def start_datetime(self) -> datetime: The timestamp when this experiment began running in the local timezone. """ - return utc_to_local(self._db_data.start_datetime) + return self._db_data.start_datetime @start_datetime.setter def start_datetime(self, new_start_datetime: datetime) -> None: - self._db_data.start_datetime = local_to_utc(new_start_datetime) + self._db_data.start_datetime = new_start_datetime @property def updated_datetime(self) -> datetime: @@ -432,11 +446,22 @@ def updated_datetime(self) -> datetime: in the local timezone. """ - return utc_to_local(self._db_data.updated_datetime) + return self._db_data.updated_datetime + + @property + def running_time(self) -> datetime: + """Return the running time of this experiment data. + + The running time is the time the latest successful job started running on + the remote quantum machine. This can change as more jobs finish. + + """ + return self._running_time @property def end_datetime(self) -> datetime: """Return the end datetime of this experiment data. + The end datetime is the time the latest job data was added without errors; this can change as more jobs finish. @@ -445,11 +470,11 @@ def end_datetime(self) -> datetime: in the local timezone. """ - return utc_to_local(self._db_data.end_datetime) + return self._db_data.end_datetime @end_datetime.setter def end_datetime(self, new_end_datetime: datetime) -> None: - self._db_data.end_datetime = local_to_utc(new_end_datetime) + self._db_data.end_datetime = new_end_datetime @property def hub(self) -> str: @@ -660,9 +685,8 @@ def hgp(self, new_hgp: str) -> None: def _clear_results(self): """Delete all currently stored analysis results and figures""" # Schedule existing analysis results for deletion next save call - for key in self._analysis_results.keys(): - self._deleted_analysis_results.append(key) - self._analysis_results = ThreadSafeOrderedDict() + self._deleted_analysis_results.extend(list(self._analysis_results.result_ids())) + self._analysis_results.clear() # Schedule existing figures for deletion next save call for key in self._figures.keys(): self._deleted_figures.append(key) @@ -727,10 +751,6 @@ def auto_save(self, save_val: bool) -> None: if save_val is True: self.save(save_children=False) self._auto_save = save_val - for res in self._analysis_results.values(): - # Setting private variable directly to avoid duplicate save. This - # can be removed when we start tracking changes. - res._auto_save = save_val for data in self.child_data(): data.auto_save = save_val @@ -890,6 +910,10 @@ def _add_job_data( jid = job.job_id() try: job_result = job.result() + try: + self._running_time = job.time_per_step().get("running", None) + except AttributeError: + pass self._add_result_data(job_result, jid) LOG.debug("Job data added [Job ID: %s]", jid) # sets the endtime to be the time the last successful job was added @@ -1110,9 +1134,9 @@ def data( @do_auto_save def add_figures( self, - figures: Union[str, bytes, pyplot.Figure, list], - figure_names: Optional[Union[str, list]] = None, - overwrite: Optional[bool] = False, + figures: Union[_FigureT, List[_FigureT]], + figure_names: Optional[Union[str, List[str]]] = None, + overwrite: bool = False, save_figure: Optional[bool] = None, ) -> Union[str, List[str]]: """Add the experiment figure. @@ -1302,28 +1326,106 @@ def figure( return num_bytes return figure_data + @deprecate_arg( + name="results", + since="0.6", + additional_msg="Use keyword arguments rather than creating an AnalysisResult object.", + package_name="qiskit-experiments", + pending=True, + ) @do_auto_save def add_analysis_results( self, - results: Union[AnalysisResult, List[AnalysisResult]], + results: Optional[Union[AnalysisResult, List[AnalysisResult]]] = None, + *, + name: Optional[str] = None, + value: Optional[Any] = None, + quality: Optional[str] = None, + components: Optional[List[DeviceComponent]] = None, + experiment: Optional[str] = None, + experiment_id: Optional[str] = None, + result_id: Optional[str] = None, + tags: Optional[List[str]] = None, + backend: Optional[str] = None, + run_time: Optional[datetime] = None, + created_time: Optional[datetime] = None, + **extra_values, ) -> None: """Save the analysis result. Args: results: Analysis results to be saved. + name: Name of the result entry. + value: Analyzed quantity. + quality: Quality of the data. + components: Associated device components. + experiment: String identifier of the associated experiment. + experiment_id: ID of the associated experiment. + result_id: ID of this analysis entry. If not set a random UUID is generated. + tags: List of arbitrary tags. + backend: Name of associated backend. + run_time: The date time when the experiment started to run on the device. + created_time: The date time when this analysis is performed. + extra_values: Arbitrary keyword arguments for supplementary information. + New dataframe columns are created in the analysis result table with added keys. """ - if not isinstance(results, list): - results = [results] - - for result in results: - self._analysis_results[result.result_id] = result - - with contextlib.suppress(ExperimentDataError): - result.service = self.service - result.auto_save = self.auto_save - - if self.auto_save and self._service: - result.save() + if results is not None: + # TODO deprecate this path + if not isinstance(results, list): + results = [results] + for result in results: + extra_values = result.extra.copy() + if result.chisq is not None: + # Move chisq to extra. + # This is not global outcome, e.g. QPT doesn't provide chisq. + extra_values["chisq"] = result.chisq + experiment = extra_values.pop("experiment", self.experiment_type) + backend = extra_values.pop("backend", self.backend_name) + run_time = extra_values.pop("run_time", self.running_time) + created_time = extra_values.pop("created_time", None) + self._analysis_results.add_entry( + name=result.name, + value=result.value, + quality=result.quality, + components=result.device_components, + experiment=experiment, + experiment_id=result.experiment_id, + result_id=result.result_id, + tags=result.tags, + backend=backend, + run_time=run_time, + created_time=created_time, + **extra_values, + ) + if self.auto_save: + result.save() + else: + experiment = experiment or self.experiment_type + experiment_id = experiment_id or self.experiment_id + tags = tags or [] + backend = backend or self.backend_name + + series = self._analysis_results.add_entry( + result_id=result_id, + name=name, + value=value, + quality=quality, + components=components, + experiment=experiment, + experiment_id=experiment_id, + tags=tags or [], + backend=backend, + run_time=run_time, # TODO add job RUNNING time + created_time=created_time, + **extra_values, + ) + if self.auto_save: + service_result = _series_to_service_result( + series=series, + service=self._service, + auto_save=False, + ) + service_result.save() @do_auto_save def delete_analysis_result( @@ -1339,24 +1441,29 @@ def delete_analysis_result( Analysis result ID. Raises: - ExperimentEntryNotFound: If analysis result not found. + ExperimentEntryNotFound: If analysis result not found or multiple entries are found. """ + # Retrieve from DB if needed. + to_delete = self.analysis_results( + index=result_key, + block=False, + columns="all", + dataframe=True, + ) + if not isinstance(to_delete, pd.Series): + raise ExperimentEntryNotFound( + f"Multiple entries are found with result_key = {result_key}. " + "Try another key that can uniquely determine entry to delete." + ) - if isinstance(result_key, int): - result_key = self._analysis_results.keys()[result_key] - else: - # Retrieve from DB if needed. - result_key = self.analysis_results(result_key, block=False).result_id - - del self._analysis_results[result_key] - self._deleted_analysis_results.append(result_key) - + self._analysis_results.drop_entry(str(to_delete.name)) if self._service and self.auto_save: with service_exception_to_warning(): - self.service.delete_analysis_result(result_id=result_key) - self._deleted_analysis_results.remove(result_key) + self.service.delete_analysis_result(result_id=to_delete.result_id) + else: + self._deleted_analysis_results.append(to_delete.result_id) - return result_key + return to_delete.result_id def _retrieve_analysis_results(self, refresh: bool = False): """Retrieve service analysis results. @@ -1366,24 +1473,48 @@ def _retrieve_analysis_results(self, refresh: bool = False): an experiment service is available. """ # Get job results if missing experiment data. - if self.service and (not self._analysis_results or refresh): + if self.service and (len(self._analysis_results) == 0 or refresh): retrieved_results = self.service.analysis_results( experiment_id=self.experiment_id, limit=None, json_decoder=self._json_decoder ) for result in retrieved_results: - result_id = result.result_id - - self._analysis_results[result_id] = AnalysisResult(service=self.service) - self._analysis_results[result_id].set_data(result) - self._analysis_results[result_id]._created_in_db = True + # Canonicalize IBM specific data structure. + # TODO define proper data schema on frontend and delegate this to service. + cano_quality = AnalysisResult.RESULT_QUALITY_TO_TEXT.get(result.quality, "unknown") + cano_components = [to_component(c) for c in result.device_components] + extra = result.result_data["_extra"] + if result.chisq is not None: + extra["chisq"] = result.chisq + self._analysis_results.add_entry( + name=result.result_type, + value=result.result_data["_value"], + quality=cano_quality, + components=cano_components, + experiment_id=result.experiment_id, + result_id=result.result_id, + tags=result.tags, + backend=result.backend_name, + created_time=result.creation_datetime, + **extra, + ) + @deprecate_arg( + name="dataframe", + deprecation_description="Setting ``dataframe`` to False in analysis_results", + since="0.6", + package_name="qiskit-experiments", + pending=True, + predicate=lambda dataframe: not dataframe, + ) def analysis_results( self, index: Optional[Union[int, slice, str]] = None, refresh: bool = False, block: bool = True, timeout: Optional[float] = None, - ) -> Union[AnalysisResult, List[AnalysisResult]]: + columns: Union[str, List[str]] = "default", + dataframe: bool = False, + ) -> Union[AnalysisResult, List[AnalysisResult], pd.DataFrame, pd.Series]: """Return analysis results associated with this experiment. Args: @@ -1394,16 +1525,26 @@ def analysis_results( * int: Specific index of the analysis results. * slice: A list slice of indexes. * str: ID or name of the analysis result. + refresh: Retrieve the latest analysis results from the server, if an experiment service is available. block: If True block for any analysis callbacks to finish running. timeout: max time in seconds to wait for analysis callbacks to finish running. + columns: Specifying a set of columns to return. You can pass a list of each + column name to return, otherwise builtin column groups are available. + + * "all": Return all columns, including metadata to communicate + with experiment service, such as entry IDs. + * "default": Return columns including analysis result with supplementary + information about experiment. + * "minimal": Return only analysis subroutine returns. + + dataframe: Set True to return analysis results in the dataframe format. Returns: Analysis results for this experiment. Raises: - TypeError: If the input `index` has an invalid type. ExperimentEntryNotFound: If the entry cannot be found. """ if block: @@ -1411,42 +1552,41 @@ def analysis_results( self._analysis_futures.values(), name="analysis", timeout=timeout ) self._retrieve_analysis_results(refresh=refresh) - if index is None: - return self._analysis_results.values() - - def _make_not_found_message(index: Union[int, slice, str]) -> str: - """Helper to make error message for index not found""" - msg = [f"Analysis result {index} not found."] - errors = self.errors() - if errors: - msg.append(f"Errors: {errors}") - return "\n".join(msg) - - if isinstance(index, int): - if index >= len(self._analysis_results.values()): - raise ExperimentEntryNotFound(_make_not_found_message(index)) - return self._analysis_results.values()[index] - if isinstance(index, slice): - results = self._analysis_results.values()[index] - if not results: - raise ExperimentEntryNotFound(_make_not_found_message(index)) - return results - if isinstance(index, str): - # Check by result ID - if index in self._analysis_results: - return self._analysis_results[index] - # Check by name - filtered = [ - result for result in self._analysis_results.values() if result.name == index - ] - if not filtered: - raise ExperimentEntryNotFound(_make_not_found_message(index)) - if len(filtered) == 1: - return filtered[0] - else: - return filtered - raise TypeError(f"Invalid index type {type(index)}.") + out = self._analysis_results.container(collapse_extra=False) + + if index is not None: + out = _filter_analysis_results(index, out) + if out is None: + msg = [f"Analysis result {index} not found."] + errors = self.errors() + if errors: + msg.append(f"Errors: {errors}") + raise ExperimentEntryNotFound("\n".join(msg)) + + if dataframe: + valid_columns = self._analysis_results.filter_columns(columns) + out = out[valid_columns] + if len(out) == 1 and index is not None: + # For backward compatibility. + # One can directly access attributes with Series. e.g. out.value + return out.iloc[0] + return out + + # Convert back into List[AnalysisResult] which is payload for IBM experiment service. + # This will be removed in future version. + service_results = [] + for _, series in out.iterrows(): + service_results.append( + _series_to_service_result( + series=series, + service=self._service, + auto_save=self._auto_save, + ) + ) + if len(service_results) == 1 and index is not None: + return service_results[0] + return service_results # Save and load from the database @@ -1575,8 +1715,15 @@ def save( return analysis_results_to_create = [] - for result in self._analysis_results.values(): - analysis_results_to_create.append(result._db_data) + for _, series in self._analysis_results.container(collapse_extra=False).iterrows(): + # TODO We should support saving entire dataframe + # Calling API per entry takes huge amount of time. + legacy_result = _series_to_service_result( + series=series, + service=self._service, + auto_save=False, + ) + analysis_results_to_create.append(legacy_result._db_data) try: self.service.create_analysis_results( data=analysis_results_to_create, @@ -1584,8 +1731,6 @@ def save( json_encoder=self._json_encoder, max_workers=max_workers, ) - for result in self._analysis_results.values(): - result._created_in_db = True except Exception as ex: # pylint: disable=broad-except # Don't automatically fail the experiment just because its data cannot be saved. LOG.error("Unable to save the experiment data: %s", traceback.format_exc()) @@ -2181,9 +2326,7 @@ def copy(self, copy_results: bool = True) -> "ExperimentData": # Copy results and figures. # This requires analysis callbacks to finish self._wait_for_futures(self._analysis_futures.values(), name="analysis") - with self._analysis_results.lock: - new_instance._analysis_results = ThreadSafeOrderedDict() - new_instance.add_analysis_results([result.copy() for result in self.analysis_results()]) + new_instance._analysis_results = self._analysis_results.copy_object() with self._figures.lock: new_instance._figures = ThreadSafeOrderedDict() new_instance.add_figures(self._figures.values()) @@ -2215,8 +2358,6 @@ def _set_service(self, service: IBMExperimentService, replace: bool = None) -> N if self._service and not replace: raise ExperimentDataError("An experiment service is already being used.") self._service = service - for result in self._analysis_results.values(): - result.service = service with contextlib.suppress(Exception): self.auto_save = self._service.options.get("auto_save", False) for data in self.child_data(): @@ -2321,6 +2462,7 @@ def __json_encode__(self): "_jobs": self._safe_serialize_jobs(), # Handle non-serializable objects "_experiment": self._experiment, "_child_data": self._child_data, + "_running_time": self._running_time, } # the attribute self._service in charge of the connection and communication with the # experiment db. It doesn't have meaning in the json format so there is no need to serialize @@ -2485,3 +2627,143 @@ def __getstate__(self): def __json_encode__(self): return self.__getstate__() + + +def _series_to_service_result( + series: pd.Series, + service: IBMExperimentService, + auto_save: bool, + source: Optional[Dict[str, Any]] = None, +) -> AnalysisResult: + """Helper function to convert dataframe to AnalysisResult payload for IBM experiment service. + + .. note:: + + Now :class:`.AnalysisResult` is only used to save data in the experiment service. + All local operations must be done with :class:`.AnalysisResultTable` dataframe. + ExperimentData._analysis_results are totally decoupled from + the model of IBM experiment service until this function is implicitly called. + + Args: + series: Pandas dataframe Series (a row of dataframe). + service: Experiment service. + auto_save: Do auto save when entry value changes. + + Returns: + Legacy AnalysisResult payload. + """ + # TODO This must be done on experiment service rather than by client. + qe_result = AnalysisResultData.from_table_element(**series.replace({np.nan: None}).to_dict()) + + result_data = AnalysisResult.format_result_data( + value=qe_result.value, + extra=qe_result.extra, + chisq=qe_result.chisq, + source=source, + ) + + # Overwrite formatted result data dictionary with original objects. + # The format_result_data method implicitly deep copies input value and extra field, + # but it means the dictionary stores input objects with different object id. + # This affects computation of error propagation with ufloats, because it + # recognizes the value correlation with object id. + # See test.curve_analysis.test_baseclass.TestCurveAnalysis.test_end_to_end_compute_new_entry. + result_data["_value"] = qe_result.value + result_data["_extra"] = qe_result.extra + + # IBM Experiment Service doesn't have data field for experiment and run time. + # These are added to extra field so that these data can be saved. + result_data["_extra"]["experiment"] = qe_result.experiment + result_data["_extra"]["run_time"] = qe_result.run_time + + try: + quality = ResultQuality(str(qe_result.quality).upper()) + except ValueError: + quality = "unknown" + + experiment_service_payload = AnalysisResultDataclass( + result_id=qe_result.result_id, + experiment_id=qe_result.experiment_id, + result_type=qe_result.name, + result_data=result_data, + device_components=list(map(to_component, qe_result.device_components)), + quality=quality, + tags=qe_result.tags, + backend_name=qe_result.backend, + creation_datetime=qe_result.created_time, + chisq=qe_result.chisq, + ) + + service_result = AnalysisResult() + service_result.set_data(experiment_service_payload) + + with contextlib.suppress(ExperimentDataError): + service_result.service = service + service_result.auto_save = auto_save + + return service_result + + +def _filter_analysis_results( + search_key: Union[int, slice, str], + data: pd.DataFrame, +) -> pd.DataFrame: + """Helper function to search result data for given key. + + Args: + search_key: Key to search for. + data: Full result dataframe. + + Returns: + Truncated dataframe. + """ + out = _search_data(search_key, data) + if isinstance(out, pd.Series): + return pd.DataFrame([out]) + return out + + +@singledispatch +def _search_data(search_key, data): + if search_key is None: + return data + raise TypeError( + f"Invalid search key {search_key}. " f"This must be either int, slice or str type." + ) + + +@_search_data.register +def _search_with_int( + search_key: int, + data: pd.DataFrame, +): + if search_key >= len(data): + return None + return data.iloc[search_key] + + +@_search_data.register +def _search_with_slice( + search_key: slice, + data: pd.DataFrame, +): + out = data[search_key] + if len(out) == 0: + return None + return out + + +@_search_data.register +def _search_with_str( + search_key: str, + data: pd.DataFrame, +): + if search_key in data.index: + # This key is table entry hash + return data.loc[search_key] + + # This key is name of entry + out = data[data["name"] == search_key] + if len(out) == 0: + return None + return out diff --git a/releasenotes/notes/add-dataframe-analysis-results-ec8863e826a70621.yaml b/releasenotes/notes/add-dataframe-analysis-results-ec8863e826a70621.yaml new file mode 100644 index 0000000000..bd349b8b2a --- /dev/null +++ b/releasenotes/notes/add-dataframe-analysis-results-ec8863e826a70621.yaml @@ -0,0 +1,33 @@ +--- +features: + - | + :class:`.ExperimentData` has been upgraded to store analysis result data in + a table format with the new inline container :class:`.AnalysisResultTable`. + In this release, the :meth:`.ExperimentData.analysis_results` method still returns + a conventional list of :class:`.AnalysisResult` for backward compatibility, + however, when you call the method with new argument ``dataframe=True`` it returns + analysis results all in one piece with the table format. For example, + + .. code-block:: python + + exp = StandardRB((0,), lengths, backend) + experiment_data = exp.run().block_for_results() + + experiment_data.analysis_results(dataframe=True, columns="default") + + Information contained in the returned table can be filtered with ``columns`` argument, + which may take either ``all``, ``default``, ``minimal``, or list of column names. + Returning a list of :class:`.AnalysisResult` will be deprecated in a future release + along with the ``dataframe`` option. + + Related to this update, :meth:`.ExperimentData.add_analysis_results` method now takes + keyword arguments keyed on the table column names, in addition to the argument of + ``results`` which is either :class:`.AnalysisResult` or a list of it. + This allows users and developers to bypass creation of :class:`.AnalysisResult` instance + for registering new entry in the :class:`.ExperimentData` instance. + + Note that the conventional :class:`.AnalysisResult` is originally a payload object for + saving an analysis result in a remote database, as it implements a REST API + for the IBM Experiment Service, which is not necessary at all in + the context of experiment data analysis. + In a future release, :class:`.AnalysisResult` will be hidden from Qiskit Experiments users. diff --git a/test/database_service/test_db_experiment_data.py b/test/database_service/test_db_experiment_data.py index 508e73ac72..6d6713248e 100644 --- a/test/database_service/test_db_experiment_data.py +++ b/test/database_service/test_db_experiment_data.py @@ -36,7 +36,6 @@ from qiskit_experiments.framework import ExperimentData from qiskit_experiments.framework import AnalysisResult from qiskit_experiments.framework import BackendData -from qiskit_experiments.framework.experiment_data import local_to_utc from qiskit_experiments.database_service.exceptions import ( ExperimentDataError, ExperimentEntryNotFound, @@ -148,7 +147,9 @@ def _callback(_exp_data): [dat["counts"] for dat in _exp_data.data()], a_job.result().get_counts() ) exp_data.add_figures(str.encode("hello world")) - exp_data.add_analysis_results(mock.MagicMock()) + res = mock.MagicMock() + res.result_id = str(uuid.uuid4()) + exp_data.add_analysis_results(res) nonlocal called_back called_back = True @@ -450,28 +451,47 @@ def test_add_get_analysis_result(self): """Test adding and getting analysis results.""" exp_data = ExperimentData(experiment_type="qiskit_test") results = [] - for idx in range(5): + result_ids = list(map(str, range(5))) + for idx in result_ids: res = mock.MagicMock() res.result_id = idx results.append(res) - exp_data.add_analysis_results(res) + with self.assertWarns(UserWarning): + # This is invalid result ID string and cause a warning + exp_data.add_analysis_results(res) - self.assertEqual(results, exp_data.analysis_results()) - self.assertEqual(results[1], exp_data.analysis_results(1)) - self.assertEqual(results[2:4], exp_data.analysis_results(slice(2, 4))) - self.assertEqual(results[4], exp_data.analysis_results(results[4].result_id)) + # We cannot compare results with exp_data.analysis_results() + # This test is too hacky since it tris to compare MagicMock with AnalysisResult. + self.assertEqual( + [res.result_id for res in exp_data.analysis_results()], + result_ids, + ) + self.assertEqual( + exp_data.analysis_results(1).result_id, + result_ids[1], + ) + self.assertEqual( + [res.result_id for res in exp_data.analysis_results(slice(2, 4))], + result_ids[2:4], + ) def test_add_get_analysis_results(self): """Test adding and getting a list of analysis results.""" exp_data = ExperimentData(experiment_type="qiskit_test") results = [] - for idx in range(5): + result_ids = list(map(str, range(5))) + for idx in result_ids: res = mock.MagicMock() res.result_id = idx results.append(res) - exp_data.add_analysis_results(results) + with self.assertWarns(UserWarning): + # This is invalid result ID string and cause a warning + exp_data.add_analysis_results(results) + get_result_ids = [res.result_id for res in exp_data.analysis_results()] - self.assertEqual(results, exp_data.analysis_results()) + # We cannot compare results with exp_data.analysis_results() + # This test is too hacky since it tris to compare MagicMock with AnalysisResult. + self.assertEqual(get_result_ids, result_ids) def test_delete_analysis_result(self): """Test deleting analysis result.""" @@ -480,7 +500,9 @@ def test_delete_analysis_result(self): for idx in range(3): res = mock.MagicMock() res.result_id = id_template.format(idx) - exp_data.add_analysis_results(res) + with self.assertWarns(UserWarning): + # This is invalid result ID string and cause a warning + exp_data.add_analysis_results(res) subtests = [(0, id_template.format(0)), (id_template.format(2), id_template.format(2))] for del_key, res_id in subtests: @@ -504,6 +526,7 @@ def test_save(self): service = mock.create_autospec(IBMExperimentService, instance=True) exp_data.add_figures(str.encode("hello world")) analysis_result = mock.MagicMock() + analysis_result.result_id = str(uuid.uuid4()) exp_data.add_analysis_results(analysis_result) exp_data.service = service exp_data.save() @@ -516,7 +539,9 @@ def test_save_delete(self): exp_data = ExperimentData(backend=self.backend, experiment_type="qiskit_test") service = mock.create_autospec(IBMExperimentService, instance=True) exp_data.add_figures(str.encode("hello world")) - exp_data.add_analysis_results(mock.MagicMock()) + res = mock.MagicMock() + res.result_id = str(uuid.uuid4()) + exp_data.add_analysis_results() exp_data.delete_analysis_result(0) exp_data.delete_figure(0) exp_data.service = service @@ -545,6 +570,7 @@ def test_auto_save(self): ) exp_data.auto_save = True mock_result = mock.MagicMock() + mock_result.result_id = str(uuid.uuid4()) subtests = [ # update function, update parameters, service called @@ -1006,6 +1032,7 @@ def test_copy_metadata(self): exp_data = ExperimentData(experiment_type="qiskit_test") exp_data.add_data(self._get_job_result(1)) result = mock.MagicMock() + result.result_id = str(uuid.uuid4()) exp_data.add_analysis_results(result) copied = exp_data.copy(copy_results=False) self.assertEqual(exp_data.data(), copied.data()) @@ -1081,16 +1108,16 @@ def test_getters(self): data = ExperimentData() test_time = datetime.now() data._db_data.creation_datetime = test_time - self.assertEqual(data.creation_datetime, local_to_utc(test_time)) + self.assertEqual(data.creation_datetime, test_time) test_time = test_time + timedelta(hours=1) data._db_data.start_datetime = test_time - self.assertEqual(data.start_datetime, local_to_utc(test_time)) + self.assertEqual(data.start_datetime, test_time) test_time = test_time + timedelta(hours=1) data._db_data.end_datetime = test_time - self.assertEqual(data.end_datetime, local_to_utc(test_time)) + self.assertEqual(data.end_datetime, test_time) test_time = test_time + timedelta(hours=1) data._db_data.updated_datetime = test_time - self.assertEqual(data.updated_datetime, local_to_utc(test_time)) + self.assertEqual(data.updated_datetime, test_time) data._db_data.hub = "hub_name" data._db_data.group = "group_name" diff --git a/test/extended_equality.py b/test/extended_equality.py index 3cfbfba3ad..751763b8ee 100644 --- a/test/extended_equality.py +++ b/test/extended_equality.py @@ -21,12 +21,14 @@ from typing import Any, List, Union import numpy as np +import pandas as pd import uncertainties from lmfit import Model from multimethod import multimethod from qiskit_experiments.curve_analysis.curve_data import CurveFitResult from qiskit_experiments.data_processing import DataAction, DataProcessor from qiskit_experiments.database_service.utils import ( + ThreadSafeDataFrame, ThreadSafeList, ThreadSafeOrderedDict, ) @@ -272,6 +274,24 @@ def _check_configurable_classes( return is_equivalent(data1.config(), data2.config(), **kwargs) +@_is_equivalent_dispatcher.register +def _check_dataframes( + data1: Union[pd.DataFrame, ThreadSafeDataFrame], + data2: Union[pd.DataFrame, ThreadSafeDataFrame], + **kwargs, +): + """Check equality of data frame which may involve Qiskit Experiments class value.""" + if isinstance(data1, ThreadSafeDataFrame): + data1 = data1.container(collapse_extra=False) + if isinstance(data2, ThreadSafeDataFrame): + data2 = data2.container(collapse_extra=False) + return is_equivalent( + data1.to_dict(orient="index"), + data2.to_dict(orient="index"), + **kwargs, + ) + + @_is_equivalent_dispatcher.register def _check_experiment_data( data1: ExperimentData, @@ -319,4 +339,8 @@ def _check_all_attributes( **kwargs, ): """Helper function to check all attributes.""" + test = {} + for att in attrs: + test[att] = is_equivalent(getattr(data1, att), getattr(data2, att), **kwargs) + return all(is_equivalent(getattr(data1, att), getattr(data2, att), **kwargs) for att in attrs) diff --git a/test/framework/test_data_table.py b/test/framework/test_data_table.py new file mode 100644 index 0000000000..3afead20f3 --- /dev/null +++ b/test/framework/test_data_table.py @@ -0,0 +1,215 @@ +# This code is part of Qiskit. +# +# (C) Copyright IBM 2023. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""Test case for data table.""" + +from test.base import QiskitExperimentsTestCase + +import uuid +import numpy as np +import pandas as pd + +from qiskit_experiments.database_service.utils import ThreadSafeDataFrame +from qiskit_experiments.framework.analysis_result_table import AnalysisResultTable + + +def _callable_thread_local_add_entry(args, thread_table): + """A test callable that is called from multi-thread.""" + index, kwargs = args + thread_table.add_entry(index, **kwargs) + + +class TestBaseTable(QiskitExperimentsTestCase): + """Test case for data frame base class.""" + + class TestTable(ThreadSafeDataFrame): + """A table class under test with test columns.""" + + @classmethod + def _default_columns(cls): + return ["value1", "value2", "value3"] + + def test_initializing_with_dict(self): + """Test initializing table with dictionary. Columns are filled with default.""" + table = TestBaseTable.TestTable( + { + "x": [1.0, 2.0, 3.0], + "y": [4.0, 5.0, 6.0], + } + ) + self.assertListEqual(table.get_columns(), ["value1", "value2", "value3"]) + + def test_raises_initializing_with_wrong_table(self): + """Test table cannot be initialized with non-default columns.""" + wrong_table = pd.DataFrame.from_dict( + data={"x": [1.0, 2.0], "y": [3.0, 4.0], "z": [5.0, 6.0]}, + orient="index", + columns=["wrong", "columns"], + ) + with self.assertRaises(ValueError): + # columns doesn't match with default_columns + TestBaseTable.TestTable(wrong_table) + + def test_get_entry(self): + """Test getting an entry from the table.""" + table = TestBaseTable.TestTable({"x": [1.0, 2.0, 3.0]}) + self.assertListEqual(table.get_entry("x").to_list(), [1.0, 2.0, 3.0]) + + def test_add_entry(self): + """Test adding data with default keys to table.""" + table = TestBaseTable.TestTable() + table.add_entry(index="x", value1=0.0, value2=1.0, value3=2.0) + + self.assertListEqual(table.get_entry("x").to_list(), [0.0, 1.0, 2.0]) + + def test_add_entry_with_missing_key(self): + """Test adding entry with partly specified keys.""" + table = TestBaseTable.TestTable() + table.add_entry(index="x", value1=0.0, value3=2.0) + + # NaN value cannot be compared with assert + np.testing.assert_equal(table.get_entry("x").to_list(), [0.0, float("nan"), 2.0]) + + def test_add_entry_with_new_key(self): + """Test adding data with new keys to table.""" + table = TestBaseTable.TestTable() + table.add_entry(index="x", value1=0.0, value2=1.0, value3=2.0, extra=3.0) + + self.assertListEqual(table.get_columns(), ["value1", "value2", "value3", "extra"]) + self.assertListEqual(table.get_entry("x").to_list(), [0.0, 1.0, 2.0, 3.0]) + + def test_add_entry_with_multiple_new_keys(self): + """Test new keys are added to column and the key order is preserved.""" + table = TestBaseTable.TestTable() + table.add_entry(index="x", phi=0.1, lamb=0.2, theta=0.3) + + self.assertListEqual( + table.get_columns(), + ["value1", "value2", "value3", "phi", "lamb", "theta"], + ) + + def test_add_entry_with_new_key_with_existing_entry(self): + """Test adding new key will expand existing entry.""" + table = TestBaseTable.TestTable() + table.add_entry(index="x", value1=0.0, value2=1.0, value3=2.0) + table.add_entry(index="y", value1=0.0, value2=1.0, value3=2.0, extra=3.0) + + self.assertListEqual(table.get_columns(), ["value1", "value2", "value3", "extra"]) + self.assertListEqual(table.get_entry("y").to_list(), [0.0, 1.0, 2.0, 3.0]) + + # NaN value cannot be compared with assert + np.testing.assert_equal(table.get_entry("x").to_list(), [0.0, 1.0, 2.0, float("nan")]) + + def test_drop_entry(self): + """Test drop entry from the table.""" + table = TestBaseTable.TestTable() + table.add_entry(index="x", value1=0.0, value2=1.0, value3=2.0) + table.drop_entry("x") + + self.assertEqual(len(table), 0) + + def test_drop_non_existing_entry(self): + """Test dropping non-existing entry raises ValueError.""" + table = TestBaseTable.TestTable() + with self.assertRaises(ValueError): + table.drop_entry("x") + + def test_return_only_default_columns(self): + """Test extra entry is correctly recognized.""" + table = TestBaseTable.TestTable() + table.add_entry(index="x", value1=0.0, value2=1.0, value3=2.0, extra=3.0) + + default_table = table.container(collapse_extra=True) + self.assertListEqual(default_table.loc["x"].to_list(), [0.0, 1.0, 2.0]) + + def test_raises_adding_duplicated_index(self): + """Test adding duplicated index should raise.""" + table = TestBaseTable.TestTable() + table.add_entry(index="x", value1=0.0, value2=1.0, value3=2.0) + + with self.assertRaises(ValueError): + # index x is already used + table.add_entry(index="x", value1=3.0, value2=4.0, value3=5.0) + + def test_clear_container(self): + """Test reset table.""" + table = TestBaseTable.TestTable() + table.add_entry(index="x", value1=0.0, value2=1.0, value3=2.0) + self.assertEqual(len(table), 1) + + table.clear() + self.assertEqual(len(table), 0) + + def test_container_is_immutable(self): + """Test modifying container doesn't mutate the original payload.""" + table = TestBaseTable.TestTable() + table.add_entry(index="x", value1=0.1, value2=0.2, value3=0.3) + + dataframe = table.container() + dataframe.at["x", "value1"] = 100 + + # Local object can be modified + self.assertListEqual(dataframe.loc["x"].to_list(), [100, 0.2, 0.3]) + + # Original object in the experiment payload is preserved + self.assertListEqual(table.get_entry("x").to_list(), [0.1, 0.2, 0.3]) + + def test_round_trip(self): + """Test JSON roundtrip serialization with the experiment encoder.""" + table = TestBaseTable.TestTable() + table.add_entry(index="x", value1=0.0, value2=1.0, value3=2.0) + table.add_entry(index="y", value1=1.0, extra=2.0) + + self.assertRoundTripSerializable(table) + + +class TestAnalysisTable(QiskitExperimentsTestCase): + """Test case for extra functionality of analysis table.""" + + def test_add_entry_with_result_id(self): + """Test adding entry with result_id. Index is created by truncating long string.""" + table = AnalysisResultTable() + table.add_entry(result_id="9a0bdec8-c010-4ef7-bb7d-b84939717a6b", value=0.123) + self.assertEqual(table.get_entry("9a0bdec8").value, 0.123) + + def test_extra_column_name_is_always_returned(self): + """Test extra column names are always returned in filtered column names.""" + table = AnalysisResultTable() + table.add_entry(extra=0.123) + + minimal_columns = table.filter_columns("minimal") + self.assertTrue("extra" in minimal_columns) + + default_columns = table.filter_columns("default") + self.assertTrue("extra" in default_columns) + + all_columns = table.filter_columns("all") + self.assertTrue("extra" in all_columns) + + def test_listing_result_id(self): + """Test returning result IDs of all stored entries.""" + table = AnalysisResultTable() + + ref_ids = [str(uuid.uuid4()) for _ in range(10)] + for ref_id in ref_ids: + table.add_entry(result_id=ref_id, value=0) + + self.assertListEqual(table.result_ids(), ref_ids) + + def test_no_overlap_result_id(self): + """Test automatically prepare unique result IDs for sufficient number of entries.""" + table = AnalysisResultTable() + + for i in range(100): + table.add_entry(value=i) + + self.assertEqual(len(table), 100) diff --git a/test/framework/test_framework.py b/test/framework/test_framework.py index 265630371e..4c122f05ec 100644 --- a/test/framework/test_framework.py +++ b/test/framework/test_framework.py @@ -125,8 +125,8 @@ def test_analysis_replace_results_true(self): expdata2 = analysis.run(expdata1, replace_results=True, seed=12345) self.assertExperimentDone(expdata2) - self.assertEqual(expdata1, expdata2) - self.assertEqual(expdata1.analysis_results(), expdata2.analysis_results()) + self.assertEqualExtended(expdata1, expdata2) + self.assertEqualExtended(expdata1.analysis_results(), expdata2.analysis_results()) self.assertEqual(result_ids, list(expdata2._deleted_analysis_results)) def test_analysis_replace_results_false(self):