Skip to content

Commit

Permalink
fix example for enas
Browse files Browse the repository at this point in the history
  • Loading branch information
tenzen-y committed Nov 13, 2021
1 parent 67d3e50 commit 6fa3915
Show file tree
Hide file tree
Showing 6 changed files with 14 additions and 15 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
FROM ibmcom/tensorflow-ppc64le:2.2.0-py3
RUN pip install rfc3339 grpcio googleapis-common-protos
ADD . /usr/src/app/github.com/kubeflow/katib
WORKDIR /usr/src/app/github.com/kubeflow/katib/cmd/metricscollector/v1beta1/tfevent-metricscollector/
RUN pip install --no-cache-dir -r requirements.txt
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ ENV TARGET_DIR /opt/enas-cnn-cifar10
ADD examples/v1beta1/trial-images/enas-cnn-cifar10 ${TARGET_DIR}
WORKDIR ${TARGET_DIR}

RUN pip3 install --no-cache-dir -r requirements.txt
ENV PYTHONPATH ${TARGET_DIR}

RUN chgrp -R 0 ${TARGET_DIR} \
Expand Down
20 changes: 9 additions & 11 deletions examples/v1beta1/trial-images/enas-cnn-cifar10/RunTrial.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import keras
import numpy as np
from tensorflow import keras
from keras.datasets import cifar10
from ModelConstructor import ModelConstructor
from tensorflow.keras.utils import to_categorical
from tensorflow.python.keras.utils.multi_gpu_utils import multi_gpu_model
from keras.preprocessing.image import ImageDataGenerator
import argparse
import time

if __name__ == "__main__":
parser = argparse.ArgumentParser(description='TrainingContainer')
Expand Down Expand Up @@ -46,7 +44,7 @@

test_model.summary()
test_model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adam(lr=1e-3, decay=1e-4),
optimizer=keras.optimizers.Adam(learning_rate=1e-3, decay=1e-4),
metrics=['accuracy'])

(x_train, y_train), (x_test, y_test) = cifar10.load_data()
Expand All @@ -67,12 +65,12 @@

print(">>> Data Loaded. Training starts.")
for e in range(num_epochs):
print("\nTotal Epoch {}/{}".format(e+1, num_epochs))
history = test_model.fit_generator(generator=aug_data_flow,
steps_per_epoch=int(len(x_train)/128)+1,
epochs=1, verbose=1,
validation_data=(x_test, y_test))
print("Training-Accuracy={}".format(history.history['acc'][-1]))
print("\nTotal Epoch {}/{}".format(e + 1, num_epochs))
history = test_model.fit(aug_data_flow,
steps_per_epoch=int(len(x_train) / 128) + 1,
epochs=1, verbose=1,
validation_data=(x_test, y_test))
print("Training-Accuracy={}".format(history.history['accuracy'][-1]))
print("Training-Loss={}".format(history.history['loss'][-1]))
print("Validation-Accuracy={}".format(history.history['val_acc'][-1]))
print("Validation-Accuracy={}".format(history.history['val_accuracy'][-1]))
print("Validation-Loss={}".format(history.history['val_loss'][-1]))
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
scipy>=1.7.2
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ If you want to read more about this example, visit the official
GitHub repository.

Katib uses this training container in some Experiments, for instance in the
[TF Event Metrics Collector](../../metrics-collector/tfevent-metrics-collector.yaml#L55-L64).
[TF Event Metrics Collector](../../metrics-collector/tfevent-metrics-collector.yaml#L42-L49).
4 changes: 2 additions & 2 deletions test/e2e/v1beta1/argo_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
"earlystopping-medianstop": "cmd/earlystopping/medianstop/v1beta1/Dockerfile",
"trial-mxnet-mnist": "examples/v1beta1/trial-images/mxnet-mnist/Dockerfile",
"trial-pytorch-mnist": "examples/v1beta1/trial-images/pytorch-mnist/Dockerfile",
# "trial-tf-mnist-with-summaries": "examples/v1beta1/trial-images/tf-mnist-with-summaries/Dockerfile",
"trial-tf-mnist-with-summaries": "examples/v1beta1/trial-images/tf-mnist-with-summaries/Dockerfile",
"trial-enas-cnn-cifar10-gpu": "examples/v1beta1/trial-images/enas-cnn-cifar10/Dockerfile.gpu",
"trial-enas-cnn-cifar10-cpu": "examples/v1beta1/trial-images/enas-cnn-cifar10/Dockerfile.cpu",
"trial-darts-cnn-cifar10": "examples/v1beta1/trial-images/darts-cnn-cifar10/Dockerfile",
Expand All @@ -80,7 +80,7 @@
"pytorchjob": "examples/v1beta1/kubeflow-training-operator/pytorchjob-mnist.yaml",
"tfjob": "examples/v1beta1/kubeflow-training-operator/tfjob-mnist-with-summaries.yaml",
"file-metricscollector": "examples/v1beta1/metrics-collector/file-metrics-collector.yaml",
# "tfevent-metricscollector": "examples/v1beta1/metrics-collector/tfevent-metrics-collector.yaml",
"tfevent-metricscollector": "examples/v1beta1/metrics-collector/tfevent-metrics-collector.yaml",
"never-resume": "examples/v1beta1/resume-experiment/never-resume.yaml",
"from-volume-resume": "examples/v1beta1/resume-experiment/from-volume-resume.yaml",
"median-stop": "examples/v1beta1/early-stopping/median-stop.yaml"
Expand Down

0 comments on commit 6fa3915

Please sign in to comment.