NVIDIA · chesterxgchen · Dec 29, 2023 · Dec 29, 2023 · Dec 29, 2023 · Dec 29, 2023
diff --git a/examples/hello-world/hello-fedavg/README.md b/examples/hello-world/hello-fedavg/README.md
@@ -0,0 +1,172 @@
+# FedAvg: simplified
+
+This example illustrates  How to use the new Workflow Communication API to contract a workflow: no need to write a controller.  
+
+## FLARE Workflow Communicator API
+
+The Flare workflow Communicator API only has small set methods
+
+```
+
+class WFCommAPISpec(ABC):
+    @abstractmethod
+    def broadcast_and_wait(self, msg_payload: Dict):
+        pass
+
+    @abstractmethod
+    def send_and_wait(self, msg_payload: Dict):
+        pass
+
+    @abstractmethod
+    def relay_and_wait(self, msg_payload: Dict):
+        pass
+
+    @abstractmethod
+    def broadcast(self, msg_payload: Dict):
+        pass
+
+    @abstractmethod
+    def send(self, msg_payload: Dict):
+        pass
+
+    @abstractmethod
+    def relay(self, msg_payload: Dict):
+        pass
+
+    @abstractmethod
+    def get_site_names(self) -> List[str]:
+        pass
+
+    @abstractmethod
+    def wait_all(self, min_responses: int, resp_max_wait_time: Optional[float]) -> Dict[str, Dict[str, FLModel]]:
+        pass
+
+    @abstractmethod
+    def wait_one(self, resp_max_wait_time: Optional[float] = None) -> Tuple[str, str, FLModel]:
+        pass
+
+```
+
+
+## Writing a new Workflow
+
+With this new API writing the new workflow is really simple: 
+
+* Workflow (Server)
+
+```
+from nvflare.app_common.workflows import wf_comm as flare
+
+class FedAvg:
+    def __init__(
+            self,
+            min_clients: int,
+            num_rounds: int,
+            output_path: str,
+            start_round: int = 1,
+            stop_cond: str = None,
+            model_selection_rule: str = None,
+    ):
+        super(FedAvg, self).__init__()
+
+        <skip init code>
+
+        self.flare_comm = flare.get_wf_comm_api()
+
+    def run(self):
+        self.logger.info("start Fed Avg Workflow\n \n")
+
+        start = self.start_round
+        end = self.start_round + self.num_rounds
+
+        model = self.init_model()
+        for current_round in range(start, end):
+
+            self.logger.info(f"Round {current_round}/{self.num_rounds} started. {start=}, {end=}")
+            self.current_round = current_round
+
+            sag_results = self.scatter_and_gather(model, current_round)
+
+            aggr_result = self.aggr_fn(sag_results)
+
+            self.logger.info(f"aggregate metrics = {aggr_result.metrics}")
+
+            model = update_model(model, aggr_result)
+
+            self.select_best_model(model)
+
+        self.save_model(self.best_model, self.output_path)
+
+        self.logger.info("end Fed Avg Workflow\n \n")
+
+
+```
+Scatter and Gather (SAG): 
+
+SAG is simply ask WFController to broadcast the model to all clients
+
+```
+    def scatter_and_gather(self, model: FLModel, current_round):
+        msg_payload = {"min_responses": self.min_clients,
+                       "current_round": current_round,
+                       "num_round": self.num_rounds,
+                       "start_round": self.start_round,
+                       "data": model}
+
+        # (2) broadcast and wait
+        results = self.flare_comm.broadcast_and_wait(msg_payload)
+        return results
+```
+
+## Configurations
+
+### client-side configuration
+
+This is the same as FLARE Client API configuration
+
+### server-side configuration
+
+  Server side controller is really simple, all we need is to use WFController with newly defined workflow class
+
+
+```
+{
+  # version of the configuration
+  format_version = 2
+  task_data_filters =[]
+  task_result_filters = []
+
+  workflows = [
+      {
+        id = "fed_avg"
+        path = "nvflare.app_opt.pt.wf_controller.PTWFController"
+        args {
+            comm_msg_pull_interval = 5
+            task_name = "train"
+            wf_class_path = "fedavg_pt.PTFedAvg",
+            wf_args {
+                min_clients = 2
+                num_rounds = 10
+                output_path = "/tmp/nvflare/fedavg/mode.pth"
+                stop_cond = "accuracy >= 55"
+                model_selection_rule = "accuracy >="
+            }
+        }
+      }
+  ]
+
+  components = []
+
+}
+
+```
+
+
+## Run the job
+
+assume current working directory is at ```hello-fedavg``` directory 
+
+```
+nvflare simulator -n 2 -t 2 jobs/fedavg -w /tmp/fedavg
+
+```
diff --git a/examples/hello-world/hello-fedavg/jobs/fedavg/app/config/config_fed_client.conf b/examples/hello-world/hello-fedavg/jobs/fedavg/app/config/config_fed_client.conf
@@ -0,0 +1,77 @@
+{
+  format_version = 2
+  app_script = "train.py"
+  app_config = ""
+  executors = [
+    {
+      tasks = [
+        "train"
+      ]
+      executor {
+        path = "nvflare.app_opt.pt.client_api_launcher_executor.PTClientAPILauncherExecutor"
+        args {
+          launcher_id = "launcher"
+          pipe_id = "pipe"
+          heartbeat_timeout = 60
+          params_exchange_format = "pytorch"
+          params_transfer_type = "DIFF"
+          train_with_evaluation = true
+        }
+      }
+    }
+  ]
+  task_data_filters = []
+  task_result_filters = []
+  components = [
+    {
+      id = "launcher"
+      path = "nvflare.app_common.launchers.subprocess_launcher.SubprocessLauncher"
+      args {
+        script = "python3 custom/{app_script}  {app_config} "
+        launch_once = true
+      }
+    }
+    {
+      id = "pipe"
+      path = "nvflare.fuel.utils.pipe.cell_pipe.CellPipe"
+      args {
+        mode = "PASSIVE"
+        site_name = "{SITE_NAME}"
+        token = "{JOB_ID}"
+        root_url = "{ROOT_URL}"
+        secure_mode = "{SECURE_MODE}"
+        workspace_dir = "{WORKSPACE}"
+      }
+    }
+    {
+      id = "metrics_pipe"
+      path = "nvflare.fuel.utils.pipe.cell_pipe.CellPipe"
+      args {
+        mode = "PASSIVE"
+        site_name = "{SITE_NAME}"
+        token = "{JOB_ID}"
+        root_url = "{ROOT_URL}"
+        secure_mode = "{SECURE_MODE}"
+        workspace_dir = "{WORKSPACE}"
+      }
+    }
+    {
+      id = "metric_relay"
+      path = "nvflare.app_common.widgets.metric_relay.MetricRelay"
+      args {
+        pipe_id = "metrics_pipe"
+        event_type = "fed.analytix_log_stats"
+        read_interval = 0.1
+      }
+    }
+    {
+      id = "config_preparer"
+      path = "nvflare.app_common.widgets.external_configurator.ExternalConfigurator"
+      args {
+        component_ids = [
+          "metric_relay"
+        ]
+      }
+    }
+  ]
+}
diff --git a/examples/hello-world/hello-fedavg/jobs/fedavg/app/config/config_fed_server.conf b/examples/hello-world/hello-fedavg/jobs/fedavg/app/config/config_fed_server.conf
@@ -0,0 +1,22 @@
+{
+  # version of the configuration
+  format_version = 2
+  task_data_filters =[]
+  task_result_filters = []
+
+  workflows = [
+      {
+         id = "fed_avg"
+         path = "nvflare.app_common.workflows.fed_avg_pt.PTFedAvg"
+         args {
+            min_clients = 2
+            num_rounds = 2
+            output_path = "/tmp/nvflare/fedavg/mode.pth"
+            # stop_cond = "accuracy >= 55"
+         }
+      }
+  ]
+
+  components = [
+  ]
+}
diff --git a/examples/hello-world/hello-fedavg/jobs/fedavg/app/custom/net.py b/examples/hello-world/hello-fedavg/jobs/fedavg/app/custom/net.py
@@ -0,0 +1,37 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class Net(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.conv1 = nn.Conv2d(3, 6, 5)
+        self.pool = nn.MaxPool2d(2, 2)
+        self.conv2 = nn.Conv2d(6, 16, 5)
+        self.fc1 = nn.Linear(16 * 5 * 5, 120)
+        self.fc2 = nn.Linear(120, 84)
+        self.fc3 = nn.Linear(84, 10)
+
+    def forward(self, x):
+        x = self.pool(F.relu(self.conv1(x)))
+        x = self.pool(F.relu(self.conv2(x)))
+        x = torch.flatten(x, 1)  # flatten all dimensions except batch
+        x = F.relu(self.fc1(x))
+        x = F.relu(self.fc2(x))
+        x = self.fc3(x)
+        return x