From da3f8e0620d9a132985e1353ac40931075d8b2e4 Mon Sep 17 00:00:00 2001 From: YuanTingHsieh Date: Thu, 20 Jul 2023 11:41:28 -0700 Subject: [PATCH] Add lightning api --- .../jobs/decorator/app/custom/__init__.py | 13 --- .../interface/app/config/config_exchange.json | 6 -- .../app/config/config_fed_client.json | 28 ----- .../app/config/config_fed_server.json | 49 --------- .../jobs/interface/app/custom/cifar10.py | 102 ------------------ .../ml-to-fl/jobs/interface/app/custom/net.py | 37 ------- .../ml-to-fl/jobs/interface/meta.json | 10 -- nvflare/app_common/utils/fl_model_utils.py | 4 - nvflare/client/cache.py | 76 ------------- 9 files changed, 325 deletions(-) delete mode 100644 examples/advanced/ml-to-fl/jobs/decorator/app/custom/__init__.py delete mode 100644 examples/advanced/ml-to-fl/jobs/interface/app/config/config_exchange.json delete mode 100644 examples/advanced/ml-to-fl/jobs/interface/app/config/config_fed_client.json delete mode 100644 examples/advanced/ml-to-fl/jobs/interface/app/config/config_fed_server.json delete mode 100644 examples/advanced/ml-to-fl/jobs/interface/app/custom/cifar10.py delete mode 100644 examples/advanced/ml-to-fl/jobs/interface/app/custom/net.py delete mode 100644 examples/advanced/ml-to-fl/jobs/interface/meta.json delete mode 100644 nvflare/client/cache.py diff --git a/examples/advanced/ml-to-fl/jobs/decorator/app/custom/__init__.py b/examples/advanced/ml-to-fl/jobs/decorator/app/custom/__init__.py deleted file mode 100644 index 4fc50543f1..0000000000 --- a/examples/advanced/ml-to-fl/jobs/decorator/app/custom/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/examples/advanced/ml-to-fl/jobs/interface/app/config/config_exchange.json b/examples/advanced/ml-to-fl/jobs/interface/app/config/config_exchange.json deleted file mode 100644 index 756b617ec2..0000000000 --- a/examples/advanced/ml-to-fl/jobs/interface/app/config/config_exchange.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "exchange_path": "./", - "exchange_format": "pytorch", - "params_type": "DIFF", - "params_diff_func": "numerical_params_diff" -} \ No newline at end of file diff --git a/examples/advanced/ml-to-fl/jobs/interface/app/config/config_fed_client.json b/examples/advanced/ml-to-fl/jobs/interface/app/config/config_fed_client.json deleted file mode 100644 index 5a09f80ffb..0000000000 --- a/examples/advanced/ml-to-fl/jobs/interface/app/config/config_fed_client.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "format_version": 2, - - "executors": [ - { - "tasks": ["train"], - "executor": { - "name": "PTFilePipeLauncherExecutor", - "args": { - "launcher_id": "launcher" - } - } - } - ], - "task_result_filters": [ - ], - "task_data_filters": [ - ], - "components": [ - { - "id": "launcher", - "name": "SubprocessLauncher", - "args": { - "script": "python custom/cifar10.py --epochs 1" - } - } - ] -} diff --git a/examples/advanced/ml-to-fl/jobs/interface/app/config/config_fed_server.json b/examples/advanced/ml-to-fl/jobs/interface/app/config/config_fed_server.json deleted file mode 100644 index a643eb922e..0000000000 --- a/examples/advanced/ml-to-fl/jobs/interface/app/config/config_fed_server.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "format_version": 2, - - "server": { - "heart_beat_timeout": 600 - }, - "task_data_filters": [], - "task_result_filters": [], - "components": [ - { - "id": "persistor", - "name": "PTFileModelPersistor", - "args": { - "model": { - "path": "net.Net" - } - } - }, - { - "id": "shareable_generator", - "path": "nvflare.app_common.shareablegenerators.full_model_shareable_generator.FullModelShareableGenerator", - "args": {} - }, - { - "id": "aggregator", - "path": "nvflare.app_common.aggregators.intime_accumulate_model_aggregator.InTimeAccumulateWeightedAggregator", - "args": { - "expected_data_kind": "WEIGHT_DIFF" - } - } - ], - "workflows": [ - { - "id": "scatter_and_gather", - "name": "ScatterAndGather", - "args": { - "min_clients" : 2, - "num_rounds" : 2, - "start_round": 0, - "wait_time_after_min_received": 0, - "aggregator_id": "aggregator", - "persistor_id": "persistor", - "shareable_generator_id": "shareable_generator", - "train_task_name": "train", - "train_timeout": 0 - } - } - ] -} diff --git a/examples/advanced/ml-to-fl/jobs/interface/app/custom/cifar10.py b/examples/advanced/ml-to-fl/jobs/interface/app/custom/cifar10.py deleted file mode 100644 index 3d91a653b1..0000000000 --- a/examples/advanced/ml-to-fl/jobs/interface/app/custom/cifar10.py +++ /dev/null @@ -1,102 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import torch.nn as nn -import torch.optim as optim -import torchvision -import torchvision.transforms as transforms -from net import Net - -import nvflare.client as flare - -DATASET_PATH = "/tmp/nvflare/data" -device = "cuda:0" - -transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) -batch_size = 4 - -trainset = torchvision.datasets.CIFAR10(root=DATASET_PATH, train=True, download=True, transform=transform) -trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2) - -testset = torchvision.datasets.CIFAR10(root=DATASET_PATH, train=False, download=True, transform=transform) -testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2) - - -net = Net() - -# initializes NVFlare interface -flare.init(config="config/config_exchange.json") -input_model, input_meta = flare.receive_model() - -# get model from NVFlare -net.load_state_dict(input_model) - -criterion = nn.CrossEntropyLoss() -optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) - - -net.to(device) -for epoch in range(2): # loop over the dataset multiple times - - running_loss = 0.0 - for i, data in enumerate(trainloader, 0): - # get the inputs; data is a list of [inputs, labels] - inputs, labels = data[0].to(device), data[1].to(device) - - # zero the parameter gradients - optimizer.zero_grad() - - # forward + backward + optimize - outputs = net(inputs) - loss = criterion(outputs, labels) - loss.backward() - optimizer.step() - - # print statistics - running_loss += loss.item() - if i % 2000 == 1999: # print every 2000 mini-batches - print(f"[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}") - running_loss = 0.0 - -print("Finished Training") - - -PATH = "./cifar_net.pth" -torch.save(net.state_dict(), PATH) - - -net = Net() -net.load_state_dict(input_model) -net.to(device) - - -correct = 0 -total = 0 -# since we're not training, we don't need to calculate the gradients for our outputs -with torch.no_grad(): - for data in testloader: - images, labels = data[0].to(device), data[1].to(device) - # calculate outputs by running images through the network - outputs = net(images) - # the class with the highest energy is what we choose as prediction - _, predicted = torch.max(outputs.data, 1) - total += labels.size(0) - correct += (predicted == labels).sum().item() - -print(f"Accuracy of the network on the 10000 test images: {100 * correct // total} %") - -net.load_state_dict(torch.load(PATH)) -flare.submit_metrics({"accuracy": 100 * correct // total}) -flare.submit_model(net.cpu().state_dict()) diff --git a/examples/advanced/ml-to-fl/jobs/interface/app/custom/net.py b/examples/advanced/ml-to-fl/jobs/interface/app/custom/net.py deleted file mode 100644 index 031f84f432..0000000000 --- a/examples/advanced/ml-to-fl/jobs/interface/app/custom/net.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import torch.nn as nn -import torch.nn.functional as F - - -class Net(nn.Module): - def __init__(self): - super().__init__() - self.conv1 = nn.Conv2d(3, 6, 5) - self.pool = nn.MaxPool2d(2, 2) - self.conv2 = nn.Conv2d(6, 16, 5) - self.fc1 = nn.Linear(16 * 5 * 5, 120) - self.fc2 = nn.Linear(120, 84) - self.fc3 = nn.Linear(84, 10) - - def forward(self, x): - x = self.pool(F.relu(self.conv1(x))) - x = self.pool(F.relu(self.conv2(x))) - x = torch.flatten(x, 1) # flatten all dimensions except batch - x = F.relu(self.fc1(x)) - x = F.relu(self.fc2(x)) - x = self.fc3(x) - return x diff --git a/examples/advanced/ml-to-fl/jobs/interface/meta.json b/examples/advanced/ml-to-fl/jobs/interface/meta.json deleted file mode 100644 index 01f4992dbe..0000000000 --- a/examples/advanced/ml-to-fl/jobs/interface/meta.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "name": "subprocess with file pipe", - "resource_spec": {}, - "min_clients" : 2, - "deploy_map": { - "app": [ - "@ALL" - ] - } -} diff --git a/nvflare/app_common/utils/fl_model_utils.py b/nvflare/app_common/utils/fl_model_utils.py index 42fdd53a62..f583b5fedf 100644 --- a/nvflare/app_common/utils/fl_model_utils.py +++ b/nvflare/app_common/utils/fl_model_utils.py @@ -133,10 +133,6 @@ def from_shareable( kwargs[FLModelConst.META][MetaKey.JOB_ID] = fl_ctx.get_job_id() kwargs[FLModelConst.META][MetaKey.SITE_NAME] = fl_ctx.get_identity_name() - if fl_ctx is not None: - kwargs[FLModelConst.META][MetaKey.JOB_ID] = fl_ctx.get_job_id() - kwargs[FLModelConst.META][MetaKey.SITE_NAME] = fl_ctx.get_identity_name() - result = FLModel(**kwargs) return result diff --git a/nvflare/client/cache.py b/nvflare/client/cache.py deleted file mode 100644 index 3162b5c298..0000000000 --- a/nvflare/client/cache.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Optional - -from nvflare.app_common.abstract.fl_model import FLModel, ParamsType -from nvflare.app_common.model_exchange.model_exchanger import ModelExchanger - -from .config import ClientConfig -from .utils import copy_fl_model_attributes, get_meta_from_fl_model, numerical_params_diff, set_fl_model_with_meta - -IN_ATTRS = ("optimizer_params", "current_round") -SYS_ATTRS = ("job_id", "site_name", "total_rounds") - -DIFF_MAP = {"numerical_params_diff": numerical_params_diff} - - -class Cache: - """This class is used to remember attributes that need to share for a user code. - - For example, after "global_evaluate" we should remember the "metrics" value. - And set that into the model that we want to submit after "train". - - For each user file: - - we only need 1 model exchanger. - - we only need to pull global model once - - """ - - def __init__(self, model_exchanger: ModelExchanger, config: ClientConfig): - self.model_exchanger = model_exchanger - self.input_model: Optional[FLModel] = None - self.meta = None - self.sys_meta = None - - self.config = config - self.initial_metrics = None # get from evaluate on "global model" - self._get_model() - - def _get_model(self): - self.input_model: FLModel = self.model_exchanger.receive_model() - self.meta = get_meta_from_fl_model(self.input_model, IN_ATTRS) - self.sys_meta = get_meta_from_fl_model(self.input_model, SYS_ATTRS) - - def construct_fl_model(self, params): - fl_model = FLModel(params_type=ParamsType.FULL, params=params) - if self.initial_metrics is not None: - fl_model.metrics = self.initial_metrics - - # model difference - params_diff_func_name = self.config.get_params_diff_func() - if params_diff_func_name is not None: - if params_diff_func_name not in DIFF_MAP: - raise RuntimeError(f"params_diff_func {params_diff_func_name} is not pre-defined.") - params_diff_func = DIFF_MAP[params_diff_func_name] - fl_model.params = params_diff_func(self.input_model.params, fl_model.params) - fl_model.params_type = ParamsType.DIFF - - set_fl_model_with_meta(fl_model, self.meta, IN_ATTRS) - copy_fl_model_attributes(self.input_model, fl_model) - fl_model.meta = self.meta - return fl_model - - def __str__(self): - return f"Cache(model_exchanger: {self.model_exchanger}, initial_metrics: {self.initial_metrics})"