scaleoutsystems · ahellander · May 30, 2023 · May 23, 2022 · May 24, 2022 · May 24, 2022
diff --git a/.ci/tests/examples/inference_test.py b/.ci/tests/examples/inference_test.py
@@ -0,0 +1,35 @@
+import sys
+from time import sleep
+
+import pymongo
+
+N_CLIENTS = 2
+RETRIES = 18
+SLEEP = 10
+
+
+def _eprint(*args, **kwargs):
+    print(*args, file=sys.stderr, **kwargs)
+
+
+def _wait_n_rounds(collection):
+    n = 0
+    for _ in range(RETRIES):
+        query = {'type': 'INFERENCE'}
+        n = collection.count_documents(query)
+        if n == N_CLIENTS:
+            return n
+        _eprint(f'Succeded cleints {n}. Sleeping for {SLEEP}.')
+        sleep(SLEEP)
+    _eprint(f'Succeded clients: {n}. Giving up.')
+    return n
+
+
+if __name__ == '__main__':
+    # Connect to mongo
+    client = pymongo.MongoClient("mongodb://fedn_admin:password@localhost:6534")
+
+    # Wait for successful rounds
+    succeded = _wait_n_rounds(client['fedn-test-network']['control']['status'])
+    assert(succeded == N_CLIENTS)  # check that all rounds succeeded
+    _eprint(f'Succeded inference clients: {succeded}. Test passed.')
diff --git a/.ci/tests/examples/run_inference.sh b/.ci/tests/examples/run_inference.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+set -e
+
+# Parse example name
+if [ "$#" -lt 1 ]; then
+    >&2 echo "Wrong number of arguments (usage: run_infrence.sh <example-name>)"
+    exit 1
+fi
+example="$1"
+
+>&2 echo "Run inference"
+pushd "examples/$example"
+curl -k -X POST https://localhost:8090/infer
+
+>&2 echo "Checking inference success"
+".$example/bin/python" ../../.ci/tests/examples/inference_test.py
+
+>&2 echo "Test completed successfully"
+popd
diff --git a/.ci/tests/examples/wait_for.py b/.ci/tests/examples/wait_for.py
@@ -29,7 +29,7 @@ def _retry(try_func, **func_args):
 def _test_rounds(n_rounds):
     client = pymongo.MongoClient(
         "mongodb://fedn_admin:password@localhost:6534")
-    collection = client['fedn-test-network']['control']['round']
+    collection = client['fedn-network']['control']['rounds']
     query = {'reducer.status': 'Success'}
     n = collection.count_documents(query)
     client.close()

diff --git a/.github/workflows/integration-tests.yaml b/.github/workflows/integration-tests.yaml
@@ -15,13 +15,12 @@ jobs:
     strategy:
       matrix:
         to_test:
-          - "mnist-keras keras"
-          - "mnist-pytorch pytorch"
+          - "mnist-keras kerashelper"
+          - "mnist-pytorch pytorchhelper"
         python_version: ["3.8", "3.9","3.10"]
         os:
           - ubuntu-20.04
           - ubuntu-22.04
-          - macos-11
     runs-on: ${{ matrix.os }}
     steps:
       - name: checkout
@@ -38,7 +37,10 @@ jobs:
 
       - name: run ${{ matrix.to_test }}
         run: .ci/tests/examples/run.sh ${{ matrix.to_test }}
-        if: ${{ matrix.os != 'macos-11' }} # skip Docker part for MacOS
+
+      - name: run ${{ matrix.to_test }} inference
+        run: .ci/tests/examples/run_inference.sh ${{ matrix.to_test }}
+        if: ${{ matrix.os != 'macos-11' && matrix.to_test == 'mnist-keras keras' }} # example available for Keras
 
       - name: print logs
         if: failure()

diff --git a/LICENSE b/LICENSE
@@ -199,3 +199,4 @@
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
+
diff --git a/config/settings-client.yaml.template b/config/settings-client.yaml.template
@@ -1,3 +1,3 @@
-network_id: fedn-test-network
+network_id: fedn-network
 discover_host: reducer
 discover_port: 8090
diff --git a/config/settings-combiner.yaml.template b/config/settings-combiner.yaml.template
@@ -1,10 +1,10 @@
-network_id: fedn-test-network
-controller:
-    discover_host: reducer
-    discover_port: 8090
+network_id: fedn-network
+discover_host: reducer
+discover_port: 8090
+
+name: combiner
+host: combiner
+port: 12080
+max_clients: 30
+
 
-combiner:
-    name: combiner
-    host: combiner
-    port: 12080
-    max_clients: 30
diff --git a/config/settings-reducer.yaml.template b/config/settings-reducer.yaml.template
@@ -1,9 +1,4 @@
-network_id: fedn-test-network
-token: fedn_token
-
-control:
-  state: idle
-  helper: keras
+network_id: fedn-network
 
 statestore:
   type: MongoDB

diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -93,7 +93,7 @@ services:
       - ${HOST_REPO_DIR:-.}/fedn:/app/fedn
     entrypoint: [ "sh", "-c" ]
     command:
-      - "/venv/bin/pip install --no-cache-dir -e /app/fedn && /venv/bin/fedn run combiner -in config/settings-combiner.yaml"
+      - "/venv/bin/pip install --no-cache-dir -e /app/fedn && /venv/bin/fedn run combiner --init config/settings-combiner.yaml"
     ports:
       - 12080:12080
 
@@ -110,6 +110,6 @@ services:
       - ${HOST_REPO_DIR:-.}/fedn:/app/fedn
     entrypoint: [ "sh", "-c" ]
     command:
-      - "/venv/bin/pip install --no-cache-dir -e /app/fedn && /venv/bin/fedn run client -in config/settings-client.yaml"
+      - "/venv/bin/pip install --no-cache-dir -e /app/fedn && /venv/bin/fedn run client --init config/settings-client.yaml"
     deploy:
       replicas: 0
diff --git a/examples/mnist-keras/README.md b/examples/mnist-keras/README.md
@@ -66,3 +66,22 @@ Finally, you can start the experiment from the "control" tab of the UI.
 
 ## Clean up
 You can clean up by running `docker-compose down`.
+
+## Connecting to a distributed deployment
+To start and remotely connect a client with the required dependencies for this example, start by downloading the `client.yaml` file. You can either navigate the reducer UI or run the following command.
+
+```bash
+curl -k https://<reducer-fqdn>:<reducer-port>/config/download > client.yaml
+```
+> **Note** make sure to replace `<reducer-fqdn>` and `<reducer-port>` with appropriate values.
+
+Now you are ready to start the client via Docker by running the following command.
+
+```bash
+docker run -d \
+  -v $PWD/client.yaml:/app/client.yaml \
+  -v $PWD/data:/var/data \
+  -e ENTRYPOINT_OPTS=--data_path=/var/data/mnist.npz \
+  ghcr.io/scaleoutsystems/fedn/fedn:develop-mnist-keras run client -in client.yaml
+```
+> **Note** If reducer and combiner host names, as specfied in the configuration files, are not resolvable in the client host network you need to use the docker option `--add-hosts` to make them resolvable. Please refer to the Docker documentation for more detail.
diff --git a/examples/mnist-keras/bin/init_venv.sh b/examples/mnist-keras/bin/init_venv.sh
@@ -2,7 +2,7 @@
 set -e
 
 # Init venv
-python -m venv .mnist-keras
+python3 -m venv .mnist-keras
 
 # Pip deps
 .mnist-keras/bin/pip install --upgrade pip

diff --git a/examples/mnist-keras/client/entrypoint b/examples/mnist-keras/client/entrypoint
@@ -7,8 +7,9 @@ import fire
 import numpy as np
 import tensorflow as tf
 
-from fedn.utils.kerashelper import KerasHelper
+from fedn.utils.helpers import get_helper, save_metadata, save_metrics
 
+HELPER_MODULE = 'kerashelper'
 NUM_CLASSES = 10
 
 
@@ -17,7 +18,6 @@ def _get_data_path():
     client = docker.from_env()
     container = client.containers.get(os.environ['HOSTNAME'])
     number = container.name[-1]
-
     # Return data path
     return f"/var/data/clients/{number}/mnist.npz"
 
@@ -64,8 +64,8 @@ def _load_data(data_path, is_train=True):
 
 def init_seed(out_path='seed.npz'):
     weights = _compile_model().get_weights()
-    helper = KerasHelper()
-    helper.save_model(weights, out_path)
+    helper = get_helper(HELPER_MODULE)
+    helper.save(weights, out_path)
 
 
 def train(in_model_path, out_model_path, data_path=None, batch_size=32, epochs=1):
@@ -74,16 +74,26 @@ def train(in_model_path, out_model_path, data_path=None, batch_size=32, epochs=1
 
     # Load model
     model = _compile_model()
-    helper = KerasHelper()
-    weights = helper.load_model(in_model_path)
+    helper = get_helper(HELPER_MODULE)
+    weights = helper.load(in_model_path)
     model.set_weights(weights)
 
     # Train
     model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs)
 
     # Save
     weights = model.get_weights()
-    helper.save_model(weights, out_model_path)
+    helper.save(weights, out_model_path)
+
+    # Metadata needed for aggregation server side
+    metadata = {
+        'num_examples': len(x_train),
+        'batch_size': batch_size,
+        'epochs': epochs,
+    }
+
+    # Save JSON metadata file
+    save_metadata(metadata, out_model_path)
 
 
 def validate(in_model_path, out_json_path, data_path=None):
@@ -93,8 +103,8 @@ def validate(in_model_path, out_json_path, data_path=None):
 
     # Load model
     model = _compile_model()
-    helper = KerasHelper()
-    weights = helper.load_model(in_model_path)
+    helper = get_helper(HELPER_MODULE)
+    weights = helper.load(in_model_path)
     model.set_weights(weights)
 
     # Evaluate
@@ -111,15 +121,34 @@ def validate(in_model_path, out_json_path, data_path=None):
         "test_accuracy": model_score_test[1],
     }
 
+    # Save JSON
+    save_metrics(report, out_json_path)
+
+
+def infer(in_model_path, out_json_path, data_path=None):
+    # Using test data for inference but another dataset could be loaded
+    x_test, _ = _load_data(data_path, is_train=False)
+
+    # Load model
+    model = _compile_model()
+    helper = get_helper(HELPER_MODULE)
+    weights = helper.load(in_model_path)
+    model.set_weights(weights)
+
+    # Infer
+    y_pred = model.predict(x_test)
+    y_pred = np.argmax(y_pred, axis=1)
+
     # Save JSON
     with open(out_json_path, "w") as fh:
-        fh.write(json.dumps(report))
+        fh.write(json.dumps({'predictions': y_pred.tolist()}))
 
 
 if __name__ == '__main__':
     fire.Fire({
         'init_seed': init_seed,
         'train': train,
         'validate': validate,
+        'infer': infer,
         '_get_data_path': _get_data_path,  # for testing
     })
diff --git a/examples/mnist-keras/client/fedn.yaml b/examples/mnist-keras/client/fedn.yaml
@@ -2,4 +2,6 @@ entry_points:
   train:
     command: /venv/bin/python entrypoint train $ENTRYPOINT_OPTS
   validate:
-    command: /venv/bin/python entrypoint validate $ENTRYPOINT_OPTS
+    command: /venv/bin/python entrypoint validate $ENTRYPOINT_OPTS
+  infer:
+    command: /venv/bin/python entrypoint infer $ENTRYPOINT_OPTS
diff --git a/examples/mnist-pytorch/client/entrypoint b/examples/mnist-pytorch/client/entrypoint
@@ -1,15 +1,15 @@
 #!./.mnist-pytorch/bin/python
 import collections
-import json
 import math
 import os
 
 import docker
 import fire
 import torch
 
-from fedn.utils.pytorchhelper import PytorchHelper
+from fedn.utils.helpers import get_helper, save_metadata, save_metrics
 
+HELPER_MODULE = 'pytorchhelper'
 NUM_CLASSES = 10
 
 
@@ -69,13 +69,13 @@ def _save_model(model, out_path):
     weights_np = collections.OrderedDict()
     for w in weights:
         weights_np[w] = weights[w].cpu().detach().numpy()
-    helper = PytorchHelper()
-    helper.save_model(weights, out_path)
+    helper = get_helper(HELPER_MODULE)
+    helper.save(weights, out_path)
 
 
 def _load_model(model_path):
-    helper = PytorchHelper()
-    weights_np = helper.load_model(model_path)
+    helper = get_helper(HELPER_MODULE)
+    weights_np = helper.load(model_path)
     weights = collections.OrderedDict()
     for w in weights_np:
         weights[w] = torch.tensor(weights_np[w])
@@ -118,7 +118,18 @@ def train(in_model_path, out_model_path, data_path=None, batch_size=32, epochs=1
                 print(
                     f"Epoch {e}/{epochs-1} | Batch: {b}/{n_batches-1} | Loss: {loss.item()}")
 
-    # Save
+    # Metadata needed for aggregation server side
+    metadata = {
+        'num_examples': len(x_train),
+        'batch_size': batch_size,
+        'epochs': epochs,
+        'lr': lr
+    }
+
+    # Save JSON metadata file
+    save_metadata(metadata, out_model_path)
+
+    # Save model update
     _save_model(model, out_model_path)
 
 
@@ -151,14 +162,13 @@ def validate(in_model_path, out_json_path, data_path=None):
     }
 
     # Save JSON
-    with open(out_json_path, "w") as fh:
-        fh.write(json.dumps(report))
+    save_metrics(report, out_json_path)
 
 
 if __name__ == '__main__':
     fire.Fire({
         'init_seed': init_seed,
         'train': train,
         'validate': validate,
-        '_get_data_path': _get_data_path,  # for testing
+        # '_get_data_path': _get_data_path,  # for testing
     })
diff --git a/examples/mnist-pytorch/requirements.txt b/examples/mnist-pytorch/requirements.txt
@@ -1,4 +1,4 @@
 torch==1.13.1
 torchvision==0.14.1
 fire==0.3.1
-docker==6.1.1
+docker==6.1.1
diff --git a/fedn/README.md b/fedn/README.md
@@ -1 +1 @@
-# FEDn SDk #
+FEDn