Merge with main

equinor · Jul 8, 2024 · 657d6e9 · 657d6e9
2 parents 6d827e6 + 2312c6a
commit 657d6e9
Show file tree

Hide file tree

Showing 17 changed files with 138 additions and 165 deletions.
diff --git a/.github/workflows/master-ci.yml b/.github/workflows/master-ci.yml
@@ -13,6 +13,8 @@ on:
 jobs:
   push-images:
     runs-on: ubuntu-latest
+    permissions:
+      packages: write
     env:
       IMAGE_DESCRIPTION: Building thousands of models with timeseries data to monitor systems
       IMAGE_LICENSE: AGPL-3.0
@@ -30,21 +32,13 @@ jobs:
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v1
 
-      - name: Login to production CR
-        uses: docker/login-action@v1
-        if: ${{ steps.prep.outputs.login_prod_cr  }}
-        with:
-          registry: ${{ env.DOCKER_PROD_REGISTRY }}
-          username: ${{ secrets.DOCKER_PROD_USERNAME }}
-          password: ${{ secrets.DOCKER_PROD_PASSWORD }}
-
       - name: Login to CR
         uses: docker/login-action@v1
         if: ${{ steps.prep.outputs.login_cr }}
         with:
           registry: ${{ env.DOCKER_REGISTRY }}
-          username: ${{ secrets.DOCKER_GITHUB_USER }}
-          password: ${{ secrets.DOCKER_GITHUB_PASSWORD }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
 
       - name: Build base image
         uses: docker/build-push-action@v2
@@ -55,10 +49,15 @@ jobs:
           load: true
 
       - name: Run Trivy vulnerability scanner
-        env:
-          IMAGE: ${{ steps.prep.outputs.base_image }}
-        run: |
-          bash scripts/trivy_scan.sh "$IMAGE"
+        uses: aquasecurity/trivy-action@0.20.0
+        with:
+          image-ref: ${{ steps.prep.outputs.base_image }}
+          format: 'table'
+          exit-code: '10'
+          ignore-unfixed: true
+          hide-progress: true
+          severity: 'HIGH,CRITICAL'
+          timeout: 5m
 
       - name: Push gordo-base
         uses: docker/build-push-action@v2

diff --git a/Dockerfile b/Dockerfile
@@ -18,8 +18,7 @@ RUN rm -rf /code/dist \
 # Extract a few big dependencies which docker will cache even when other dependencies change
 RUN cat /code/requirements/full_requirements.txt | grep tensorflow== > /code/prereq.txt \
     && cat /code/requirements/full_requirements.txt | grep pyarrow== >> /code/prereq.txt \
-    && cat /code/requirements/full_requirements.txt | grep scipy== >> /code/prereq.txt \
-    && cat /code/requirements/full_requirements.txt | grep catboost== >> /code/prereq.txt
+    && cat /code/requirements/full_requirements.txt | grep scipy== >> /code/prereq.txt
 
 FROM python:3.10-slim-bookworm
 
@@ -48,7 +47,7 @@ RUN pip install gordo-packed.tar.gz[full]
 
 # Install GordoDeploy dependencies
 ARG HTTPS_PROXY
-ARG KUBECTL_VERSION="v1.22.4"
+ARG KUBECTL_VERSION="v1.30.2"
 
 #donwload & install kubectl
 RUN curl -sSL -o /usr/local/bin/kubectl https://storage.googleapis.com/kubernetes-release/release/$KUBECTL_VERSION/bin/linux/amd64/kubectl &&\
@@ -71,11 +70,11 @@ ADD build.sh ${HOME}/build.sh
 RUN cp ${HOME}/build.sh /usr/bin/build \
     && chmod a+x /usr/bin/build
 
-# Run things from gordo's home to have write access when needed (e.g. Catboost tmp files)
+# Run things from gordo's home to have write access when needed
 WORKDIR ${HOME}
 
 #download & install argo
-ENV ARGO_VERSIONS="[{\"number\":3,\"version\":\"3.4.7\"}]"
+ENV ARGO_VERSIONS="[{\"number\":3,\"version\":\"3.5.8\"}]"
 COPY scripts/download_argo.py ./download_argo.py
 RUN python3 ./download_argo.py -o /usr/local/bin
 

diff --git a/docs/conf.py b/docs/conf.py
@@ -135,8 +135,6 @@ def linkcode_resolve(domain, info):
 
 autodoc_typehints_description_target = "documented"
 
-autodoc_mock_imports = ["tensorflow"]
-
 # Document both class doc (default) and documentation in __init__
 autoclass_content = "both"
 

diff --git a/gordo/builder/build_model.py b/gordo/builder/build_model.py
@@ -556,15 +556,16 @@ def _extract_metadata_from_model(
         # which also had a GordoBase as a parameter/attribute, but will satisfy BaseEstimators
         # which can take a GordoBase model as a parameter, which will then have metadata to get
         for key, val in model.__dict__.items():
-            if key.endswith(
-                "_"
-            ):  # keras3 clones the regressor into regressor_ and never updates original regressor
-                if isinstance(val, Pipeline):
-                    metadata.update(
-                        ModelBuilder._extract_metadata_from_model(val.steps[-1][1])
-                    )
-                elif isinstance(val, GordoBase) or isinstance(val, BaseEstimator):
-                    metadata.update(ModelBuilder._extract_metadata_from_model(val))
+            # keras3 clones the regressor into regressor_ and never updates original regressor,
+            # but still stores both in attributes
+            if key == "regressor":
+                continue
+            if isinstance(val, Pipeline):
+                metadata.update(
+                    ModelBuilder._extract_metadata_from_model(val.steps[-1][1])
+                )
+            elif isinstance(val, GordoBase) or isinstance(val, BaseEstimator):
+                metadata.update(ModelBuilder._extract_metadata_from_model(val))
         return metadata
 
     @property

diff --git a/gordo/machine/model/factories/feedforward_autoencoder.py b/gordo/machine/model/factories/feedforward_autoencoder.py
@@ -49,7 +49,7 @@ def feedforward_model(
         If str then the name of the optimizer must be provided (e.x. "Adam").
         The arguments of the optimizer can be supplied in optimize_kwargs.
         If a Keras optimizer call the instance of the respective
-        class (e.x. Adam(lr=0.01,beta_1=0.9, beta_2=0.999)).  If no arguments are
+        class (e.x. Adam(learning_rate=0.01,beta_1=0.9, beta_2=0.999)).  If no arguments are
         provided Keras default values will be set.
     optimizer_kwargs
         The arguments for the chosen optimizer. If not provided Keras'
@@ -133,7 +133,7 @@ def feedforward_symmetric(
         If str then the name of the optimizer must be provided (e.x. "Adam").
         The arguments of the optimizer can be supplied in optimization_kwargs.
         If a Keras optimizer call the instance of the respective
-        class (e.x. ``Adam(lr=0.01,beta_1=0.9, beta_2=0.999)``).  If no arguments are
+        class (e.x. ``Adam(learning_rate=0.01,beta_1=0.9, beta_2=0.999)``).  If no arguments are
         provided Keras default values will be set.
     optimizer_kwargs
         The arguments for the chosen optimizer. If not provided Keras'
@@ -194,7 +194,7 @@ def feedforward_hourglass(
         If str then the name of the optimizer must be provided (e.x. "Adam").
         The arguments of the optimizer can be supplied in optimization_kwargs.
         If a Keras optimizer call the instance of the respective
-        class (e.x. Adam(lr=0.01,beta_1=0.9, beta_2=0.999)).  If no arguments are
+        class (e.x. Adam(learning_rate=0.01,beta_1=0.9, beta_2=0.999)).  If no arguments are
         provided Keras default values will be set.
     optimizer_kwargs
         The arguments for the chosen optimizer. If not provided Keras'

diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py
@@ -80,7 +80,18 @@ def __init__(
         self.kwargs: Dict[str, Any] = kwargs
         self._history = None
 
-        KerasRegressor.__init__(self, batch_size=kwargs.get("batch_size"))
+        # This new keras wrapper expects most of these kwargs to be set to the model attributes and uses them for
+        # defaults in some places, but always gives precedence to kwargs passed to respective fit, predict and compile
+        # methods, so this is just to make it happy again
+        _expected_kwargs = {
+            *KerasRegressor._fit_kwargs,
+            *KerasRegressor._predict_kwargs,
+            *KerasRegressor._compile_kwargs,
+        }
+        KerasRegressor.__init__(
+            self,
+            **{key: value for key, value in kwargs.items() if key in _expected_kwargs},
+        )
 
     @staticmethod
     def parse_module_path(module_path) -> Tuple[Optional[str], str]:
@@ -302,9 +313,12 @@ def get_params(self, **params):
             Parameters used in this estimator
         """
         params = super().get_params(**params)
-        params.pop("model", None)
         params.update({"kind": self.kind})
         params.update(self.kwargs)
+        if self.kwargs.get("callbacks") is not None and any(
+            isinstance(callback, dict) for callback in self.kwargs["callbacks"]
+        ):
+            params["callbacks"] = serializer.build_callbacks(self.kwargs["callbacks"])
         return params
 
     def _prepare_model(self):
@@ -405,12 +419,9 @@ class KerasRawModelRegressor(KerasAutoEncoder):
     ...       layers:
     ...         - tensorflow.keras.layers.Dense:
     ...             units: 4
-    ...             input_shape:
-    ...               - 4
+    ...             input_shape: [4]
     ...         - tensorflow.keras.layers.Dense:
     ...             units: 1
-    ...             input_shape:
-    ...               - 1
     ... '''
     >>> config = yaml.safe_load(config_str)
     >>> model = KerasRawModelRegressor(kind=config)
@@ -420,8 +431,7 @@ class KerasRawModelRegressor(KerasAutoEncoder):
     KerasRawModelRegressor(kind: {'compile': {'loss': 'mse', 'optimizer': 'adam'},
      'spec': {'tensorflow.keras.models.Sequential': {'layers': [{'tensorflow.keras.layers.Dense': {'input_shape': [4],
                                                                                                    'units': 4}},
-                                                                {'tensorflow.keras.layers.Dense': {'input_shape': [1],
-                                                                                                   'units': 1}}]}}})
+                                                                {'tensorflow.keras.layers.Dense': {'units': 1}}]}}})
     >>> out = model.predict(X)
     """
 

diff --git a/gordo/serializer/__init__.py b/gordo/serializer/__init__.py
@@ -1,4 +1,8 @@
-from .from_definition import from_definition, load_params_from_definition
+from .from_definition import (
+    from_definition,
+    load_params_from_definition,
+    build_callbacks,
+)
 from .into_definition import into_definition, load_definition_from_params
 from .serializer import (
     dump,

diff --git a/gordo/serializer/from_definition.py b/gordo/serializer/from_definition.py
@@ -5,6 +5,8 @@
 import typing  # noqa
 
 from typing import Union, Dict, Any, Iterable
+
+from keras.src.callbacks import Callback
 from sklearn.pipeline import Pipeline, FeatureUnion
 from sklearn.base import BaseEstimator
 from tensorflow.keras import Sequential
@@ -174,9 +176,9 @@ def _build_step(
         import_str = list(step.keys())[0]
 
         try:
-            StepClass: Union[None, FeatureUnion, Pipeline, BaseEstimator] = (
-                import_location(import_str)
-            )
+            StepClass: Union[
+                None, FeatureUnion, Pipeline, BaseEstimator
+            ] = import_location(import_str)
         except (ImportError, ValueError):
             StepClass = None
 
@@ -248,28 +250,6 @@ def _build_step(
         )
 
 
-def _build_callbacks(definitions: list):
-    """
-    Parameters
-    ----------
-    definitions
-        List of callbacks definitions
-
-    Examples
-    --------
-    >>> callbacks=_build_callbacks([{'tensorflow.keras.callbacks.EarlyStopping': {'monitor': 'val_loss,', 'patience': 10}}])
-    >>> type(callbacks[0])
-    <class 'keras.src.callbacks.early_stopping.EarlyStopping'>
-
-    Returns
-    -------
-    """
-    callbacks = []
-    for callback in definitions:
-        callbacks.append(_build_step(callback))
-    return callbacks
-
-
 def _load_param_classes(params: dict):
     """
     Inspect the params' values and determine if any can be loaded as a class.
@@ -350,7 +330,7 @@ def _load_param_classes(params: dict):
                     kwargs = _load_param_classes(sub_params)
                     params[key] = create_instance(Model, **kwargs)  # type: ignore
         elif key == "callbacks" and isinstance(value, list):
-            params[key] = _build_callbacks(value)
+            params[key] = build_callbacks(value)
     return params
 
 
@@ -367,3 +347,27 @@ def load_params_from_definition(definition: dict) -> dict:
             "Expected definition to be a dict," f"found: {type(definition)}"
         )
     return _load_param_classes(definition)
+
+
+def build_callbacks(definitions: list):
+    """
+    Parameters
+    ----------
+    definitions
+        List of callbacks definitions
+
+    Examples
+    --------
+    >>> callbacks=build_callbacks([{'tensorflow.keras.callbacks.EarlyStopping': {'monitor': 'val_loss,', 'patience': 10}}])
+    >>> type(callbacks[0])
+    <class 'keras.src.callbacks.early_stopping.EarlyStopping'>
+
+    Returns
+    -------
+    """
+    callbacks = []
+    for callback in definitions:
+        callbacks.append(
+            callback if isinstance(callback, Callback) else _build_step(callback)
+        )
+    return callbacks
diff --git a/pytest.ini b/pytest.ini
@@ -11,8 +11,8 @@ addopts =
     --doctest-glob='*.md'
     --doctest-glob='*.rst'
     --junitxml=junit/junit.xml
-    --cov-report=xml
-    --cov=gordo
+;    --cov-report=xml
+;    --cov=gordo
 flakes-ignore =
     __init__.py UnusedImport
     test_*.py UnusedImport