From da3f8e0620d9a132985e1353ac40931075d8b2e4 Mon Sep 17 00:00:00 2001
From: YuanTingHsieh <yuantingh@nvidia.com>
Date: Thu, 20 Jul 2023 11:41:28 -0700
Subject: [PATCH] Add lightning api

---
 .../jobs/decorator/app/custom/__init__.py     |  13 ---
 .../interface/app/config/config_exchange.json |   6 --
 .../app/config/config_fed_client.json         |  28 -----
 .../app/config/config_fed_server.json         |  49 ---------
 .../jobs/interface/app/custom/cifar10.py      | 102 ------------------
 .../ml-to-fl/jobs/interface/app/custom/net.py |  37 -------
 .../ml-to-fl/jobs/interface/meta.json         |  10 --
 nvflare/app_common/utils/fl_model_utils.py    |   4 -
 nvflare/client/cache.py                       |  76 -------------
 9 files changed, 325 deletions(-)
 delete mode 100644 examples/advanced/ml-to-fl/jobs/decorator/app/custom/__init__.py
 delete mode 100644 examples/advanced/ml-to-fl/jobs/interface/app/config/config_exchange.json
 delete mode 100644 examples/advanced/ml-to-fl/jobs/interface/app/config/config_fed_client.json
 delete mode 100644 examples/advanced/ml-to-fl/jobs/interface/app/config/config_fed_server.json
 delete mode 100644 examples/advanced/ml-to-fl/jobs/interface/app/custom/cifar10.py
 delete mode 100644 examples/advanced/ml-to-fl/jobs/interface/app/custom/net.py
 delete mode 100644 examples/advanced/ml-to-fl/jobs/interface/meta.json
 delete mode 100644 nvflare/client/cache.py

diff --git a/examples/advanced/ml-to-fl/jobs/decorator/app/custom/__init__.py b/examples/advanced/ml-to-fl/jobs/decorator/app/custom/__init__.py
deleted file mode 100644
index 4fc50543f1..0000000000
--- a/examples/advanced/ml-to-fl/jobs/decorator/app/custom/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/examples/advanced/ml-to-fl/jobs/interface/app/config/config_exchange.json b/examples/advanced/ml-to-fl/jobs/interface/app/config/config_exchange.json
deleted file mode 100644
index 756b617ec2..0000000000
--- a/examples/advanced/ml-to-fl/jobs/interface/app/config/config_exchange.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-  "exchange_path": "./",
-  "exchange_format": "pytorch",
-  "params_type": "DIFF",
-  "params_diff_func": "numerical_params_diff"
-}
\ No newline at end of file
diff --git a/examples/advanced/ml-to-fl/jobs/interface/app/config/config_fed_client.json b/examples/advanced/ml-to-fl/jobs/interface/app/config/config_fed_client.json
deleted file mode 100644
index 5a09f80ffb..0000000000
--- a/examples/advanced/ml-to-fl/jobs/interface/app/config/config_fed_client.json
+++ /dev/null
@@ -1,28 +0,0 @@
-{
-  "format_version": 2,
-
-  "executors": [
-    {
-      "tasks": ["train"],
-      "executor": {
-        "name": "PTFilePipeLauncherExecutor",
-        "args": {
-          "launcher_id": "launcher"
-        }
-      }
-    }
-  ],
-  "task_result_filters": [
-  ],
-  "task_data_filters": [
-  ],
-  "components": [
-    {
-      "id": "launcher",
-      "name": "SubprocessLauncher",
-      "args": {
-        "script": "python custom/cifar10.py --epochs 1"
-      }
-    }
-  ]
-}
diff --git a/examples/advanced/ml-to-fl/jobs/interface/app/config/config_fed_server.json b/examples/advanced/ml-to-fl/jobs/interface/app/config/config_fed_server.json
deleted file mode 100644
index a643eb922e..0000000000
--- a/examples/advanced/ml-to-fl/jobs/interface/app/config/config_fed_server.json
+++ /dev/null
@@ -1,49 +0,0 @@
-{
-  "format_version": 2,
-
-  "server": {
-    "heart_beat_timeout": 600
-  },
-  "task_data_filters": [],
-  "task_result_filters": [],
-  "components": [
-    {
-      "id": "persistor",
-      "name": "PTFileModelPersistor",
-      "args": {
-        "model": {
-          "path": "net.Net"
-        }
-      }
-    },
-    {
-      "id": "shareable_generator",
-      "path": "nvflare.app_common.shareablegenerators.full_model_shareable_generator.FullModelShareableGenerator",
-      "args": {}
-    },
-    {
-      "id": "aggregator",
-      "path": "nvflare.app_common.aggregators.intime_accumulate_model_aggregator.InTimeAccumulateWeightedAggregator",
-      "args": {
-        "expected_data_kind": "WEIGHT_DIFF"
-      }
-    }
-  ],
-  "workflows": [
-      {
-        "id": "scatter_and_gather",
-        "name": "ScatterAndGather",
-        "args": {
-            "min_clients" : 2,
-            "num_rounds" : 2,
-            "start_round": 0,
-            "wait_time_after_min_received": 0,
-            "aggregator_id": "aggregator",
-            "persistor_id": "persistor",
-            "shareable_generator_id": "shareable_generator",
-            "train_task_name": "train",
-            "train_timeout": 0
-        }
-      }
-  ]
-}
diff --git a/examples/advanced/ml-to-fl/jobs/interface/app/custom/cifar10.py b/examples/advanced/ml-to-fl/jobs/interface/app/custom/cifar10.py
deleted file mode 100644
index 3d91a653b1..0000000000
--- a/examples/advanced/ml-to-fl/jobs/interface/app/custom/cifar10.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-import torch.nn as nn
-import torch.optim as optim
-import torchvision
-import torchvision.transforms as transforms
-from net import Net
-
-import nvflare.client as flare
-
-DATASET_PATH = "/tmp/nvflare/data"
-device = "cuda:0"
-
-transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
-batch_size = 4
-
-trainset = torchvision.datasets.CIFAR10(root=DATASET_PATH, train=True, download=True, transform=transform)
-trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)
-
-testset = torchvision.datasets.CIFAR10(root=DATASET_PATH, train=False, download=True, transform=transform)
-testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)
-
-
-net = Net()
-
-# initializes NVFlare interface
-flare.init(config="config/config_exchange.json")
-input_model, input_meta = flare.receive_model()
-
-# get model from NVFlare
-net.load_state_dict(input_model)
-
-criterion = nn.CrossEntropyLoss()
-optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
-
-
-net.to(device)
-for epoch in range(2):  # loop over the dataset multiple times
-
-    running_loss = 0.0
-    for i, data in enumerate(trainloader, 0):
-        # get the inputs; data is a list of [inputs, labels]
-        inputs, labels = data[0].to(device), data[1].to(device)
-
-        # zero the parameter gradients
-        optimizer.zero_grad()
-
-        # forward + backward + optimize
-        outputs = net(inputs)
-        loss = criterion(outputs, labels)
-        loss.backward()
-        optimizer.step()
-
-        # print statistics
-        running_loss += loss.item()
-        if i % 2000 == 1999:  # print every 2000 mini-batches
-            print(f"[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}")
-            running_loss = 0.0
-
-print("Finished Training")
-
-
-PATH = "./cifar_net.pth"
-torch.save(net.state_dict(), PATH)
-
-
-net = Net()
-net.load_state_dict(input_model)
-net.to(device)
-
-
-correct = 0
-total = 0
-# since we're not training, we don't need to calculate the gradients for our outputs
-with torch.no_grad():
-    for data in testloader:
-        images, labels = data[0].to(device), data[1].to(device)
-        # calculate outputs by running images through the network
-        outputs = net(images)
-        # the class with the highest energy is what we choose as prediction
-        _, predicted = torch.max(outputs.data, 1)
-        total += labels.size(0)
-        correct += (predicted == labels).sum().item()
-
-print(f"Accuracy of the network on the 10000 test images: {100 * correct // total} %")
-
-net.load_state_dict(torch.load(PATH))
-flare.submit_metrics({"accuracy": 100 * correct // total})
-flare.submit_model(net.cpu().state_dict())
diff --git a/examples/advanced/ml-to-fl/jobs/interface/app/custom/net.py b/examples/advanced/ml-to-fl/jobs/interface/app/custom/net.py
deleted file mode 100644
index 031f84f432..0000000000
--- a/examples/advanced/ml-to-fl/jobs/interface/app/custom/net.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-class Net(nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.conv1 = nn.Conv2d(3, 6, 5)
-        self.pool = nn.MaxPool2d(2, 2)
-        self.conv2 = nn.Conv2d(6, 16, 5)
-        self.fc1 = nn.Linear(16 * 5 * 5, 120)
-        self.fc2 = nn.Linear(120, 84)
-        self.fc3 = nn.Linear(84, 10)
-
-    def forward(self, x):
-        x = self.pool(F.relu(self.conv1(x)))
-        x = self.pool(F.relu(self.conv2(x)))
-        x = torch.flatten(x, 1)  # flatten all dimensions except batch
-        x = F.relu(self.fc1(x))
-        x = F.relu(self.fc2(x))
-        x = self.fc3(x)
-        return x
diff --git a/examples/advanced/ml-to-fl/jobs/interface/meta.json b/examples/advanced/ml-to-fl/jobs/interface/meta.json
deleted file mode 100644
index 01f4992dbe..0000000000
--- a/examples/advanced/ml-to-fl/jobs/interface/meta.json
+++ /dev/null
@@ -1,10 +0,0 @@
-{
-  "name": "subprocess with file pipe",
-  "resource_spec": {},
-  "min_clients" : 2,
-  "deploy_map": {
-    "app": [
-      "@ALL"
-    ]
-  }
-}
diff --git a/nvflare/app_common/utils/fl_model_utils.py b/nvflare/app_common/utils/fl_model_utils.py
index 42fdd53a62..f583b5fedf 100644
--- a/nvflare/app_common/utils/fl_model_utils.py
+++ b/nvflare/app_common/utils/fl_model_utils.py
@@ -133,10 +133,6 @@ def from_shareable(
             kwargs[FLModelConst.META][MetaKey.JOB_ID] = fl_ctx.get_job_id()
             kwargs[FLModelConst.META][MetaKey.SITE_NAME] = fl_ctx.get_identity_name()
 
-        if fl_ctx is not None:
-            kwargs[FLModelConst.META][MetaKey.JOB_ID] = fl_ctx.get_job_id()
-            kwargs[FLModelConst.META][MetaKey.SITE_NAME] = fl_ctx.get_identity_name()
-
         result = FLModel(**kwargs)
         return result
 
diff --git a/nvflare/client/cache.py b/nvflare/client/cache.py
deleted file mode 100644
index 3162b5c298..0000000000
--- a/nvflare/client/cache.py
+++ /dev/null
@@ -1,76 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import Optional
-
-from nvflare.app_common.abstract.fl_model import FLModel, ParamsType
-from nvflare.app_common.model_exchange.model_exchanger import ModelExchanger
-
-from .config import ClientConfig
-from .utils import copy_fl_model_attributes, get_meta_from_fl_model, numerical_params_diff, set_fl_model_with_meta
-
-IN_ATTRS = ("optimizer_params", "current_round")
-SYS_ATTRS = ("job_id", "site_name", "total_rounds")
-
-DIFF_MAP = {"numerical_params_diff": numerical_params_diff}
-
-
-class Cache:
-    """This class is used to remember attributes that need to share for a user code.
-
-    For example, after "global_evaluate" we should remember the "metrics" value.
-    And set that into the model that we want to submit after "train".
-
-    For each user file:
-        - we only need 1 model exchanger.
-        - we only need to pull global model once
-
-    """
-
-    def __init__(self, model_exchanger: ModelExchanger, config: ClientConfig):
-        self.model_exchanger = model_exchanger
-        self.input_model: Optional[FLModel] = None
-        self.meta = None
-        self.sys_meta = None
-
-        self.config = config
-        self.initial_metrics = None  # get from evaluate on "global model"
-        self._get_model()
-
-    def _get_model(self):
-        self.input_model: FLModel = self.model_exchanger.receive_model()
-        self.meta = get_meta_from_fl_model(self.input_model, IN_ATTRS)
-        self.sys_meta = get_meta_from_fl_model(self.input_model, SYS_ATTRS)
-
-    def construct_fl_model(self, params):
-        fl_model = FLModel(params_type=ParamsType.FULL, params=params)
-        if self.initial_metrics is not None:
-            fl_model.metrics = self.initial_metrics
-
-        # model difference
-        params_diff_func_name = self.config.get_params_diff_func()
-        if params_diff_func_name is not None:
-            if params_diff_func_name not in DIFF_MAP:
-                raise RuntimeError(f"params_diff_func {params_diff_func_name} is not pre-defined.")
-            params_diff_func = DIFF_MAP[params_diff_func_name]
-            fl_model.params = params_diff_func(self.input_model.params, fl_model.params)
-            fl_model.params_type = ParamsType.DIFF
-
-        set_fl_model_with_meta(fl_model, self.meta, IN_ATTRS)
-        copy_fl_model_attributes(self.input_model, fl_model)
-        fl_model.meta = self.meta
-        return fl_model
-
-    def __str__(self):
-        return f"Cache(model_exchanger: {self.model_exchanger}, initial_metrics: {self.initial_metrics})"