Skip to content

Commit

Permalink
Always store all columns in load_smry and load_smry_meta to avoid dup…
Browse files Browse the repository at this point in the history
…licates
  • Loading branch information
asnyv committed Nov 28, 2020
1 parent 3e2180c commit e58cc70
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 19 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased
### Fixed
- [#505](https://github.com/equinor/webviz-subsurface/pull/505) - Fixed recent performance regression issue for loading of UNSMRY data. Loading times when multiple plugins are using the same data is now significantly reduced.
- [#505](https://github.com/equinor/webviz-subsurface/pull/505) - Fixed recent performance regression issue for loading of UNSMRY data. Loading times when multiple plugins are using the same data is now significantly reduced. Note that all UNSMRY vectors are now stored in portable apps, independent of choice of column_keys in individual plugins.

## [0.1.5] - 2020-11-26
### Added
Expand Down
47 changes: 33 additions & 14 deletions webviz_subsurface/_models/ensemble_model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from typing import Union, Optional, List
import fnmatch
import pathlib
import re
from typing import Union, Optional, List


import pandas as pd
from fmu.ensemble import ScratchEnsemble
Expand Down Expand Up @@ -56,23 +59,29 @@ def load_smry(
self.webvizstore.append(
(
self._load_smry,
[{"self": self, "time_index": time_index, "column_keys": column_keys}],
[{"self": self, "time_index": time_index, "column_keys": None}],
)
)
return self._load_smry(time_index=time_index, column_keys=column_keys)

if column_keys is None:
return self._load_smry(time_index=time_index)
df = self._load_smry(time_index=time_index)
return df[
df.columns[_match_column_keys(df_index=df.columns, column_keys=column_keys)]
]

def load_smry_meta(
self,
column_keys: Optional[list] = None,
) -> pd.DataFrame:
"""Finds metadata for the summary vectors in the ensemble set.
Note that we assume the same units for all ensembles.
(meaning that we update/overwrite when checking the next ensemble)
"""
"""Finds metadata for the summary vectors in the ensemble."""
self.webvizstore.append(
(self._load_smry_meta, [{"self": self, "column_keys": column_keys}])
(self._load_smry_meta, [{"self": self, "column_keys": None}])
)
return self._load_smry_meta(column_keys=column_keys)
if column_keys is None:
return self._load_smry_meta()
df = self._load_smry_meta()
return df[_match_column_keys(df_index=df.index, column_keys=column_keys)]

def load_csv(self, csv_file: pathlib.Path) -> pd.DataFrame:
self.webvizstore.append(
Expand All @@ -95,7 +104,6 @@ def _load_smry(
time_index: Optional[Union[list, str]] = None,
column_keys: Optional[list] = None,
) -> pd.DataFrame:

return self.load_ensemble().get_smry(
time_index=time_index, column_keys=column_keys
)
Expand All @@ -106,10 +114,7 @@ def _load_smry_meta(
self,
column_keys: Optional[list] = None,
) -> pd.DataFrame:
"""Finds metadata for the summary vectors in the ensemble set.
Note that we assume the same units for all ensembles.
(meaning that we update/overwrite when checking the next ensemble)
"""
"""Finds metadata for the summary vectors in the ensemble."""
return pd.DataFrame(
self.load_ensemble().get_smry_meta(column_keys=column_keys)
).T
Expand All @@ -118,3 +123,17 @@ def _load_smry_meta(
@webvizstore
def _load_csv(self, csv_file: pathlib.Path) -> pd.DataFrame:
return self.load_ensemble().load_csv(str(csv_file))


def _match_column_keys(
df_index: pd.core.indexes.base.Index, column_keys: List[str]
) -> pd.core.indexes.base.Index:
"""Matches patterns in column_keys with the columns in df_columns, and adds 'DATE' and
'REAL' to the requested column patterns.
"""
all_columns_keys = ["DATE", "REAL"]
all_columns_keys.extend(column_keys)
regex = re.compile(
"|".join([fnmatch.translate(column_key) for column_key in all_columns_keys])
)
return df_index.map(lambda column: bool(regex.fullmatch(column)))
4 changes: 2 additions & 2 deletions webviz_subsurface/_models/ensemble_set_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

class EnsembleSetModel:
"""Class to load and manipulate ensemble sets from given paths to
ensembles on scratch"""
ensembles on disk"""

def __init__(
self,
Expand Down Expand Up @@ -66,7 +66,7 @@ def load_smry_meta(
(meaning that we update/overwrite when checking the next ensemble)
"""

smry_meta = {}
smry_meta: dict = {}
for ensemble in self.ensembles:
smry_meta.update(
ensemble.load_smry_meta(column_keys=column_keys).T.to_dict()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,15 @@ def __init__(
]
)
else:
self.smry_init_prod = self.emodel.load_smry(
total_smry = self.emodel.load_smry(
time_index=self.time_index,
column_keys=["F[OWG]PT"],
time_index="first",
)
self.smry_init_prod = pd.concat(
[
df[df["DATE"] == min(df["DATE"])]
for _, df in total_smry.groupby("ENSEMBLE")
]
)
self.rec_ensembles = set(self.smry["ENSEMBLE"].unique())
for col in self.smry_init_prod.columns:
Expand Down

0 comments on commit e58cc70

Please sign in to comment.