Always store all columns in load_smry and load_smry_meta to avoid dup…

…licates
equinor · Nov 28, 2020 · e58cc70 · e58cc70
1 parent 3e2180c
commit e58cc70
Show file tree

Hide file tree

Showing 4 changed files with 44 additions and 19 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,7 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## Unreleased
 ### Fixed
-- [#505](https://github.com/equinor/webviz-subsurface/pull/505) - Fixed recent performance regression issue for loading of UNSMRY data. Loading times when multiple plugins are using the same data is now significantly reduced.
+- [#505](https://github.com/equinor/webviz-subsurface/pull/505) - Fixed recent performance regression issue for loading of UNSMRY data. Loading times when multiple plugins are using the same data is now significantly reduced. Note that all UNSMRY vectors are now stored in portable apps, independent of choice of column_keys in individual plugins.
 
 ## [0.1.5] - 2020-11-26
 ### Added

diff --git a/webviz_subsurface/_models/ensemble_model.py b/webviz_subsurface/_models/ensemble_model.py
@@ -1,5 +1,8 @@
-from typing import Union, Optional, List
+import fnmatch
 import pathlib
+import re
+from typing import Union, Optional, List
+
 
 import pandas as pd
 from fmu.ensemble import ScratchEnsemble
@@ -56,23 +59,29 @@ def load_smry(
         self.webvizstore.append(
             (
                 self._load_smry,
-                [{"self": self, "time_index": time_index, "column_keys": column_keys}],
+                [{"self": self, "time_index": time_index, "column_keys": None}],
             )
         )
-        return self._load_smry(time_index=time_index, column_keys=column_keys)
+
+        if column_keys is None:
+            return self._load_smry(time_index=time_index)
+        df = self._load_smry(time_index=time_index)
+        return df[
+            df.columns[_match_column_keys(df_index=df.columns, column_keys=column_keys)]
+        ]
 
     def load_smry_meta(
         self,
         column_keys: Optional[list] = None,
     ) -> pd.DataFrame:
-        """Finds metadata for the summary vectors in the ensemble set.
-        Note that we assume the same units for all ensembles.
-        (meaning that we update/overwrite when checking the next ensemble)
-        """
+        """Finds metadata for the summary vectors in the ensemble."""
         self.webvizstore.append(
-            (self._load_smry_meta, [{"self": self, "column_keys": column_keys}])
+            (self._load_smry_meta, [{"self": self, "column_keys": None}])
         )
-        return self._load_smry_meta(column_keys=column_keys)
+        if column_keys is None:
+            return self._load_smry_meta()
+        df = self._load_smry_meta()
+        return df[_match_column_keys(df_index=df.index, column_keys=column_keys)]
 
     def load_csv(self, csv_file: pathlib.Path) -> pd.DataFrame:
         self.webvizstore.append(
@@ -95,7 +104,6 @@ def _load_smry(
         time_index: Optional[Union[list, str]] = None,
         column_keys: Optional[list] = None,
     ) -> pd.DataFrame:
-
         return self.load_ensemble().get_smry(
             time_index=time_index, column_keys=column_keys
         )
@@ -106,10 +114,7 @@ def _load_smry_meta(
         self,
         column_keys: Optional[list] = None,
     ) -> pd.DataFrame:
-        """Finds metadata for the summary vectors in the ensemble set.
-        Note that we assume the same units for all ensembles.
-        (meaning that we update/overwrite when checking the next ensemble)
-        """
+        """Finds metadata for the summary vectors in the ensemble."""
         return pd.DataFrame(
             self.load_ensemble().get_smry_meta(column_keys=column_keys)
         ).T
@@ -118,3 +123,17 @@ def _load_smry_meta(
     @webvizstore
     def _load_csv(self, csv_file: pathlib.Path) -> pd.DataFrame:
         return self.load_ensemble().load_csv(str(csv_file))
+
+
+def _match_column_keys(
+    df_index: pd.core.indexes.base.Index, column_keys: List[str]
+) -> pd.core.indexes.base.Index:
+    """Matches patterns in column_keys with the columns in df_columns, and adds 'DATE' and
+    'REAL' to the requested column patterns.
+    """
+    all_columns_keys = ["DATE", "REAL"]
+    all_columns_keys.extend(column_keys)
+    regex = re.compile(
+        "|".join([fnmatch.translate(column_key) for column_key in all_columns_keys])
+    )
+    return df_index.map(lambda column: bool(regex.fullmatch(column)))
diff --git a/webviz_subsurface/_models/ensemble_set_model.py b/webviz_subsurface/_models/ensemble_set_model.py
@@ -8,7 +8,7 @@
 
 class EnsembleSetModel:
     """Class to load and manipulate ensemble sets from given paths to
-    ensembles on scratch"""
+    ensembles on disk"""
 
     def __init__(
         self,
@@ -66,7 +66,7 @@ def load_smry_meta(
         (meaning that we update/overwrite when checking the next ensemble)
         """
 
-        smry_meta = {}
+        smry_meta: dict = {}
         for ensemble in self.ensembles:
             smry_meta.update(
                 ensemble.load_smry_meta(column_keys=column_keys).T.to_dict()

diff --git a/webviz_subsurface/plugins/_reservoir_simulation_timeseries_regional.py b/webviz_subsurface/plugins/_reservoir_simulation_timeseries_regional.py
@@ -159,9 +159,15 @@ def __init__(
                 ]
             )
         else:
-            self.smry_init_prod = self.emodel.load_smry(
+            total_smry = self.emodel.load_smry(
+                time_index=self.time_index,
                 column_keys=["F[OWG]PT"],
-                time_index="first",
+            )
+            self.smry_init_prod = pd.concat(
+                [
+                    df[df["DATE"] == min(df["DATE"])]
+                    for _, df in total_smry.groupby("ENSEMBLE")
+                ]
             )
         self.rec_ensembles = set(self.smry["ENSEMBLE"].unique())
         for col in self.smry_init_prod.columns: