Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions smdebug/core/actions/__init__.py

This file was deleted.

10 changes: 0 additions & 10 deletions smdebug/core/actions/action_base.py

This file was deleted.

24 changes: 0 additions & 24 deletions smdebug/core/actions/terminate_smjob.py

This file was deleted.

42 changes: 0 additions & 42 deletions smdebug/core/sagemaker_utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
# Standard Library
import os

# Third Party
import boto3

# First Party
from smdebug.core.config_constants import DEFAULT_SAGEMAKER_OUTDIR


def is_sagemaker_job():
"""
Expand All @@ -17,39 +11,3 @@ def is_sagemaker_job():
:return: True or False
"""
return "TRAINING_JOB_NAME" in os.environ


def get_sagemaker_out_dir():
return DEFAULT_SAGEMAKER_OUTDIR


class SageMakerUtils:
@staticmethod
def is_sagemaker_job_finished(jobname, returnMock=None):
if returnMock is not None:
return returnMock
client = boto3.client("sagemaker")
response = client.describe_training_job(TrainingJobName=jobname)
status = response["TrainingJobStatus"]
if status in ["InProgress", "Stopping"]:
return False
elif status in ["Completed", "Failed", "Stopped"]:
return True # return 1 if the job is finished

@staticmethod
def terminate_sagemaker_job(jobname):
client = boto3.client("sagemaker")
try:
client.stop_training_job(TrainingJobName=jobname)
except Exception as e:
print(e)

@staticmethod
def add_tags(sm_job_name, tags):
client = boto3.client("sagemaker")
# TODO create resource arn here
resource_arn = "arn:aws:sagemaker:us-east-1:072677473360:training-job/" + sm_job_name
try:
client.add_tags(ResourceArn=resource_arn, Tags=tags)
except Exception as e:
print(e)
10 changes: 0 additions & 10 deletions smdebug/rules/rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ def __init__(self, base_trial, other_trials=None):

self.req_tensors = RequiredTensors(self.base_trial, self.other_trials)

self.actions = None
self.logger = get_logger()
self.rule_name = self.__class__.__name__

Expand Down Expand Up @@ -56,13 +55,4 @@ def invoke(self, step):
val = self.invoke_at_step(step)

if val:
self.run_actions()
raise RuleEvaluationConditionMet(self.rule_name, step)

def register_action(self, actions):
self.actions = actions

def run_actions(self):
if self.actions is not None:
for action in self.actions:
action.run(rule_name=self.__class__.__name__)
46 changes: 0 additions & 46 deletions smdebug/trials/trial_catalog.py

This file was deleted.

6 changes: 0 additions & 6 deletions tests/analysis/trials/test_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,11 @@

# First Party
from smdebug.trials import LocalTrial
from smdebug.trials.trial_catalog import LocalTrialCatalog


def check_local(localdir, trial_name, num_steps, num_tensors):
tc = LocalTrialCatalog(localdir=localdir)
assert trial_name in tc.list_candidates()
path = os.path.join(localdir, trial_name)
trial_obj = LocalTrial(name=trial_name, dirname=path)
tc.add_trial(trial_name, trial_obj)
trial_obj2 = tc.get_trial(trial_name)
assert trial_obj == trial_obj2
check_trial(trial_obj, num_tensors=num_tensors, num_steps=num_steps)


Expand Down