Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CC block byoc jobs #2403

Merged
merged 53 commits into from
Apr 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
14d6b69
WIP: tdx_cc integration.
yhwen Feb 21, 2024
bc6b21e
fixed toke_file read.
yhwen Feb 21, 2024
9f6b715
WIP: added info for CC add client tokens.:
yhwen Feb 21, 2024
dd281f5
Fixed an error when client does not have CC token reported.
yhwen Feb 21, 2024
4d87722
Added handle for client does not have CC_INFO.
yhwen Feb 21, 2024
8830069
Added CLIENT_QUIT event for CCManager to remove client token.
yhwen Feb 22, 2024
9fbdde3
Added _add_client_token client token logging info.
yhwen Feb 22, 2024
dd9fe74
Added peer_ctx for client quit.
yhwen Feb 22, 2024
2f44cef
set_peer_context for client quit.
yhwen Feb 22, 2024
0f87eb3
Changed the AUTHORIZATION_REASON set_prop sticky to False.
yhwen Feb 26, 2024
6ec9ae6
WIP: TokenPundit interface change.
yhwen Feb 26, 2024
044201a
WIP: added cc_authorizer_ids config.
yhwen Feb 26, 2024
3c593c0
Added cc_issuer_id for CCManager.
yhwen Feb 27, 2024
7f74d68
renamed the TokenPundit to CCAutorizer.
yhwen Feb 28, 2024
ff55554
Added CC token adding through client heartbeat.
yhwen Feb 29, 2024
2c5f3bd
Added function to stop current running job if CC verify fail.
yhwen Feb 29, 2024
2f383d6
if CC failed to get toke, don't allow the system to start.
yhwen Feb 29, 2024
ea5ae61
Added exceptions None check.
yhwen Feb 29, 2024
13e0b6b
Address the client side CC check before job scheduled.
yhwen Mar 1, 2024
68d8d91
fixed the PEER_FL_CONTEXT error.
yhwen Mar 1, 2024
b9942e3
Added CCManager support to have multiple cc_issuers.
yhwen Mar 2, 2024
91e3f40
optimized CCManager.
yhwen Mar 4, 2024
6206452
updated the _verify_participants() logic.
yhwen Mar 4, 2024
51226d6
set up the proper fl_ctx for admin send_requests().
yhwen Mar 4, 2024
ed770ac
Add proper fl_ctx.
yhwen Mar 4, 2024
f76fa1e
Refactor the CCManager.
yhwen Mar 5, 2024
74a6059
Refactor the CCManager and TDX_authorizer.
yhwen Mar 5, 2024
313ed21
Added TOKEN_EXPIRATION for each cc_issue in CCManager.
yhwen Mar 6, 2024
40d70c6
Fixed CC TOKEN_EXPIRATION error.
yhwen Mar 6, 2024
0c3c188
refactor the CCManager _prepare_cc_info()
yhwen Mar 6, 2024
affda4b
Refactor.
yhwen Mar 6, 2024
2dc7df3
refactor the cc tokens periodic verification.
yhwen Mar 7, 2024
6934884
added critical_level for CCManager.
yhwen Mar 7, 2024
199eb1e
codestyle fix.
yhwen Mar 8, 2024
0936733
removed no used import.
yhwen Mar 8, 2024
0b28480
removed no use import.
yhwen Mar 8, 2024
000735c
Fixed the unitest.
yhwen Mar 8, 2024
95edf8e
Added CCManager unit tests.
yhwen Mar 12, 2024
ecaa7c6
Added CCTokenGenerateError and CCTokenVerifyError. Updated CCAuthoriz…
yhwen Mar 12, 2024
2b05931
Merge branch 'main' into tdx_cc
yhwen Mar 13, 2024
af39000
WIP: CC block byoc job.
yhwen Mar 13, 2024
b81a76f
block BYOC job for CC.
yhwen Mar 13, 2024
60a5c9d
Addressed some PR reviews.
yhwen Mar 14, 2024
9244fbf
Added exception catch for TDXAuthorizer.
yhwen Mar 15, 2024
8079a5b
merged in the CCManager changes.
yhwen Mar 15, 2024
4380590
codestyle fix.
yhwen Mar 15, 2024
dbc87d1
renamed some events.
yhwen Mar 18, 2024
78b52e6
renamed event names.
yhwen Mar 19, 2024
108a1ec
renamed event names.
yhwen Mar 19, 2024
17a282f
merged from tdx_cc branch.
yhwen Mar 19, 2024
39f3318
Merge branch 'main' into cc_block_byoc
yhwen Mar 19, 2024
3838e2e
Merge branch 'main' into cc_block_byoc
YuanTingHsieh Apr 2, 2024
5cd9bef
Merge branch 'main' into cc_block_byoc
chesterxgchen Apr 2, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions nvflare/apis/event_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class EventType(object):
# event types for job scheduling - server side
BEFORE_CHECK_CLIENT_RESOURCES = "_before_check_client_resources"
AFTER_CHECK_CLIENT_RESOURCES = "_after_check_client_resources"
SUBMIT_JOB = "_submit_job"
DEPLOY_JOB_TO_SERVER = "_deploy_job_to_server"
DEPLOY_JOB_TO_CLIENT = "_deploy_job_to_client"

Expand Down
8 changes: 8 additions & 0 deletions nvflare/app_opt/confidential_computing/cc_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import time
from typing import Dict, List

from nvflare.apis.app_validation import AppValidationKey
from nvflare.apis.event_type import EventType
from nvflare.apis.fl_component import FLComponent
from nvflare.apis.fl_constant import FLContextKey, RunProcessKey
Expand Down Expand Up @@ -146,6 +147,13 @@ def handle_event(self, event_type: str, fl_ctx: FLContext):
):
threading.Thread(target=self._shutdown_system, args=[reason, fl_ctx]).start()
break
elif event_type == EventType.SUBMIT_JOB:
job_meta = fl_ctx.get_prop(FLContextKey.JOB_META, {})
byoc = job_meta.get(AppValidationKey.BYOC, False)
if byoc:
yhwen marked this conversation as resolved.
Show resolved Hide resolved
fl_ctx.set_prop(
key=FLContextKey.JOB_BLOCK_REASON, value="BYOC job not allowed for CC", sticky=False, private=True
)

def _setup_cc_authorizers(self, fl_ctx):
engine = fl_ctx.get_engine()
Expand Down
14 changes: 13 additions & 1 deletion nvflare/private/fed/server/job_cmds.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@

import nvflare.fuel.hci.file_transfer_defs as ftd
from nvflare.apis.client import Client
from nvflare.apis.fl_constant import AdminCommandNames, RunProcessKey
from nvflare.apis.event_type import EventType
from nvflare.apis.fl_constant import AdminCommandNames, FLContextKey, RunProcessKey
from nvflare.apis.job_def import Job, JobMetaKey, is_valid_job_id
from nvflare.apis.job_def_manager_spec import JobDefManagerSpec, RunStatus
from nvflare.apis.storage import DATA, JOB_ZIP, META, META_JSON, WORKSPACE, WORKSPACE_ZIP
Expand Down Expand Up @@ -541,6 +542,17 @@ def submit_job(self, conn: Connection, args: List[str]):
f"job_def_manager in engine is not of type JobDefManagerSpec, but got {type(job_def_manager)}"
)

fl_ctx.set_prop(FLContextKey.JOB_META, meta, private=True, sticky=False)
engine.fire_event(EventType.SUBMIT_JOB, fl_ctx)
block_reason = fl_ctx.get_prop(FLContextKey.JOB_BLOCK_REASON)
yhwen marked this conversation as resolved.
Show resolved Hide resolved
if block_reason:
# submitted job blocked
self.logger.error(f"submitted job is blocked: {block_reason}")
conn.append_error(
block_reason, meta=make_meta(MetaStatusValue.INVALID_JOB_DEFINITION, block_reason)
)
return

# set submitter info
meta[JobMetaKey.SUBMITTER_NAME.value] = conn.get_prop(ConnProps.USER_NAME, "")
meta[JobMetaKey.SUBMITTER_ORG.value] = conn.get_prop(ConnProps.USER_ORG, "")
Expand Down
22 changes: 17 additions & 5 deletions nvflare/private/fed/server/job_meta_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,17 @@
from typing import Optional, Set, Tuple
from zipfile import ZipFile

from nvflare.apis.app_validation import AppValidationKey
from nvflare.apis.fl_constant import JobConstants
from nvflare.apis.job_def import ALL_SITES, SERVER_SITE_NAME, JobMetaKey
from nvflare.apis.job_meta_validator_spec import JobMetaValidatorSpec
from nvflare.fuel.utils.config import ConfigFormat
from nvflare.fuel.utils.config_factory import ConfigFactory
from nvflare.security.logging import secure_format_exception

CONFIG_FOLDER = "/config/"
CUSTOM_FOLDER = "/custom/"

MAX_CLIENTS = 1000000

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -120,27 +124,35 @@ def _validate_app(self, job_name: str, meta: dict, zip_file: ZipFile) -> None:

deploy_map = meta.get(JobMetaKey.DEPLOY_MAP.value)

has_byoc = False
for app, deployments in deploy_map.items():

zip_folder = job_name + "/" + app + "/config/"
if not self._entry_exists(zip_file, zip_folder):
logger.debug(f"zip folder {zip_folder} missing. Files in the zip:")
config_folder = job_name + "/" + app + CONFIG_FOLDER
if not self._entry_exists(zip_file, config_folder):
logger.debug(f"zip folder {config_folder} missing. Files in the zip:")
for x in zip_file.namelist():
logger.debug(f" {x}")
raise ValueError(f"App '{app}' in deploy_map doesn't exist for job {job_name}")

all_sites = ALL_SITES.casefold() in (site.casefold() for site in deployments)

if (all_sites or SERVER_SITE_NAME in deployments) and not self._config_exists(
zip_file, zip_folder, JobConstants.SERVER_JOB_CONFIG
zip_file, config_folder, JobConstants.SERVER_JOB_CONFIG
):
raise ValueError(f"App '{app}' will be deployed to server but server config is missing")

if (all_sites or [site for site in deployments if site != SERVER_SITE_NAME]) and not self._config_exists(
zip_file, zip_folder, JobConstants.CLIENT_JOB_CONFIG
zip_file, config_folder, JobConstants.CLIENT_JOB_CONFIG
):
raise ValueError(f"App '{app}' will be deployed to client but client config is missing")

custom_folder = job_name + "/" + app + CUSTOM_FOLDER
if self._entry_exists(zip_file, custom_folder):
has_byoc = True

if has_byoc:
meta[AppValidationKey.BYOC] = True

@staticmethod
def _convert_value_to_int(v) -> int:
if isinstance(v, int):
Expand Down
Loading