Skip to content

Commit

Permalink
Enhance pre-commit hooks with flake8 and black (#2407)
Browse files Browse the repository at this point in the history
* Add black formater and flake8 linter to pre-commit

Also add's the flake8 config file

Signed-off-by: Ignas Baranauskas <ibaranau@redhat.com>

* Fixes black formating

Signed-off-by: Ignas Baranauskas <ibaranau@redhat.com>

* Fixes flake8 linting errors

Signed-off-by: Ignas Baranauskas <ibaranau@redhat.com>

---------

Signed-off-by: Ignas Baranauskas <ibaranau@redhat.com>
  • Loading branch information
Ygnas authored Aug 16, 2024
1 parent 8eb0e86 commit e9e6e0c
Show file tree
Hide file tree
Showing 21 changed files with 821 additions and 493 deletions.
4 changes: 4 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[flake8]
max-line-length = 100
# E203 is ignored to avoid conflicts with Black's formatting, as it's not PEP 8 compliant
extend-ignore = W503, E203
10 changes: 10 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,16 @@ repos:
- id: isort
name: isort
entry: isort --profile google
- repo: https://github.com/psf/black
rev: 24.2.0
hooks:
- id: black
files: (sdk|examples)/.*
- repo: https://github.com/pycqa/flake8
rev: 7.1.1
hooks:
- id: flake8
files: (sdk|examples)/.*
exclude: |
(?x)^(
.*zz_generated.deepcopy.*|
Expand Down
88 changes: 33 additions & 55 deletions examples/v1beta1/kubeflow-pipelines/mpi-job-horovod.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,12 @@

# This Experiment is similar to this:
# https://github.com/kubeflow/katib/blob/master/examples/v1beta1/kubeflow-training-operator/mpijob-horovod.yaml
# Check the training container source code here: https://github.com/kubeflow/mpi-operator/tree/master/examples/horovod.
# Check the training container source code here:
# https://github.com/kubeflow/mpi-operator/tree/master/examples/horovod.

# Note: To run this example, your Kubernetes cluster should run MPIJob operator.
# Follow this guide to install MPIJob on your cluster: https://www.kubeflow.org/docs/components/training/mpi/
# Follow this guide to install MPIJob on your cluster:
# https://www.kubeflow.org/docs/components/training/mpi/

import kfp
from kfp import components
Expand All @@ -42,13 +44,12 @@

@dsl.pipeline(
name="Launch Katib MPIJob Experiment",
description="An example to launch Katib Experiment with MPIJob"
description="An example to launch Katib Experiment with MPIJob",
)
def horovod_mnist_hpo(
experiment_name: str = "mpi-horovod-mnist",
experiment_namespace: str = "kubeflow-user-example-com",
):

# Trial count specification.
max_trial_count = 6
max_failed_trial_count = 3
Expand All @@ -64,12 +65,7 @@ def horovod_mnist_hpo(
# Algorithm specification.
algorithm = V1beta1AlgorithmSpec(
algorithm_name="bayesianoptimization",
algorithm_settings=[
V1beta1AlgorithmSetting(
name="random_state",
value="10"
)
]
algorithm_settings=[V1beta1AlgorithmSetting(name="random_state", value="10")],
)

# Experiment search space.
Expand All @@ -78,19 +74,12 @@ def horovod_mnist_hpo(
V1beta1ParameterSpec(
name="lr",
parameter_type="double",
feasible_space=V1beta1FeasibleSpace(
min="0.001",
max="0.003"
),
feasible_space=V1beta1FeasibleSpace(min="0.001", max="0.003"),
),
V1beta1ParameterSpec(
name="num-steps",
parameter_type="int",
feasible_space=V1beta1FeasibleSpace(
min="50",
max="150",
step="10"
),
feasible_space=V1beta1FeasibleSpace(min="50", max="150", step="10"),
),
]

Expand All @@ -106,18 +95,14 @@ def horovod_mnist_hpo(
"replicas": 1,
"template": {
"metadata": {
"annotations": {
"sidecar.istio.io/inject": "false"
}
"annotations": {"sidecar.istio.io/inject": "false"}
},
"spec": {
"containers": [
{
"image": "docker.io/kubeflow/mpi-horovod-mnist",
"name": "mpi-launcher",
"command": [
"mpirun"
],
"command": ["mpirun"],
"args": [
"-np",
"2",
Expand All @@ -141,68 +126,58 @@ def horovod_mnist_hpo(
"--lr",
"${trialParameters.learningRate}",
"--num-steps",
"${trialParameters.numberSteps}"
"${trialParameters.numberSteps}",
],
"resources": {
"limits": {
"cpu": "500m",
"memory": "2Gi"
}
}
"limits": {"cpu": "500m", "memory": "2Gi"}
},
}
]
}
}
},
},
},
"Worker": {
"replicas": 2,
"template": {
"metadata": {
"annotations": {
"sidecar.istio.io/inject": "false"
}
"annotations": {"sidecar.istio.io/inject": "false"}
},
"spec": {
"containers": [
{
"image": "docker.io/kubeflow/mpi-horovod-mnist",
"name": "mpi-worker",
"resources": {
"limits": {
"cpu": "500m",
"memory": "4Gi"
}
}
"limits": {"cpu": "500m", "memory": "4Gi"}
},
}
]
}
}
}
}
}
},
},
},
},
},
}

# Configure parameters for the Trial template.
trial_template = V1beta1TrialTemplate(
primary_pod_labels={
"mpi-job-role": "launcher"
},
primary_pod_labels={"mpi-job-role": "launcher"},
primary_container_name="mpi-launcher",
success_condition='status.conditions.#(type=="Succeeded")#|#(status=="True")#',
failure_condition='status.conditions.#(type=="Failed")#|#(status=="True")#',
trial_parameters=[
V1beta1TrialParameterSpec(
name="learningRate",
description="Learning rate for the training model",
reference="lr"
reference="lr",
),
V1beta1TrialParameterSpec(
name="numberSteps",
description="Number of training steps",
reference="num-steps"
reference="num-steps",
),
],
trial_spec=trial_spec
trial_spec=trial_spec,
)

# Create Experiment specification.
Expand All @@ -213,13 +188,15 @@ def horovod_mnist_hpo(
objective=objective,
algorithm=algorithm,
parameters=parameters,
trial_template=trial_template
trial_template=trial_template,
)

# Get the Katib launcher.
# Load component from the URL or from the file.
katib_experiment_launcher_op = components.load_component_from_url(
"https://raw.githubusercontent.com/kubeflow/pipelines/master/components/kubeflow/katib-launcher/component.yaml")
"https://raw.githubusercontent.com/kubeflow/pipelines/master/"
"components/kubeflow/katib-launcher/component.yaml"
)
# katib_experiment_launcher_op = components.load_component_from_file(
# "../../../components/kubeflow/katib-launcher/component.yaml"
# )
Expand All @@ -231,7 +208,8 @@ def horovod_mnist_hpo(
experiment_name=experiment_name,
experiment_namespace=experiment_namespace,
experiment_spec=ApiClient().sanitize_for_serialization(experiment_spec),
experiment_timeout_minutes=60)
experiment_timeout_minutes=60,
)

# Output container to print the results.
dsl.ContainerOp(
Expand Down
44 changes: 26 additions & 18 deletions examples/v1beta1/trial-images/darts-cnn-cifar10/architect.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@
import torch


class Architect():
"""" Architect controls architecture of cell by computing gradients of alphas
"""
class Architect:
""" " Architect controls architecture of cell by computing gradients of alphas"""

def __init__(self, model, w_momentum, w_weight_decay, device):
self.model = model
Expand Down Expand Up @@ -48,25 +47,32 @@ def virtual_step(self, train_x, train_y, xi, w_optim):

# Compute gradient
gradients = torch.autograd.grad(loss, self.model.getWeights())

# Do virtual step (Update gradient)
# Below operations do not need gradient tracking
with torch.no_grad():
# dict key is not the value, but the pointer. So original network weight have to
# be iterated also.
for w, vw, g in zip(self.model.getWeights(), self.v_model.getWeights(), gradients):
m = w_optim.state[w].get("momentum_buffer", 0.) * self.w_momentum
if(self.device == 'cuda'):
vw.copy_(w - torch.cuda.FloatTensor(xi) * (m + g + self.w_weight_decay * w))
elif(self.device == 'cpu'):
vw.copy_(w - torch.FloatTensor(xi) * (m + g + self.w_weight_decay * w))
for w, vw, g in zip(
self.model.getWeights(), self.v_model.getWeights(), gradients
):
m = w_optim.state[w].get("momentum_buffer", 0.0) * self.w_momentum
if self.device == "cuda":
vw.copy_(
w
- torch.cuda.FloatTensor(xi) * (m + g + self.w_weight_decay * w)
)
elif self.device == "cpu":
vw.copy_(
w - torch.FloatTensor(xi) * (m + g + self.w_weight_decay * w)
)

# Sync alphas
for a, va in zip(self.model.getAlphas(), self.v_model.getAlphas()):
va.copy_(a)

def unrolled_backward(self, train_x, train_y, valid_x, valid_y, xi, w_optim):
""" Compute unrolled loss and backward its gradients
"""Compute unrolled loss and backward its gradients
Args:
xi: learning rate for virtual gradient step (same as model lr)
w_optim: weights optimizer - for virtual step
Expand All @@ -77,23 +83,23 @@ def unrolled_backward(self, train_x, train_y, valid_x, valid_y, xi, w_optim):
# Calculate unrolled loss
# Loss for validation with w'. L_valid(w')
loss = self.v_model.loss(valid_x, valid_y)

# Calculate gradient
v_alphas = tuple(self.v_model.getAlphas())
v_weights = tuple(self.v_model.getWeights())
v_grads = torch.autograd.grad(loss, v_alphas + v_weights)

dalpha = v_grads[:len(v_alphas)]
dws = v_grads[len(v_alphas):]
dalpha = v_grads[: len(v_alphas)]
dws = v_grads[len(v_alphas) :]

hessian = self.compute_hessian(dws, train_x, train_y)

# Update final gradient = dalpha - xi * hessian
with torch.no_grad():
for alpha, da, h in zip(self.model.getAlphas(), dalpha, hessian):
if(self.device == 'cuda'):
if self.device == "cuda":
alpha.grad = da - torch.cuda.FloatTensor(xi) * h
elif(self.device == 'cpu'):
elif self.device == "cpu":
alpha.grad = da - torch.cpu.FloatTensor(xi) * h

def compute_hessian(self, dws, train_x, train_y):
Expand Down Expand Up @@ -121,7 +127,7 @@ def compute_hessian(self, dws, train_x, train_y):
with torch.no_grad():
for p, dw in zip(self.model.getWeights(), dws):
# TODO (andreyvelich): Do we need this * 2.0 ?
p -= 2. * eps * dw
p -= 2.0 * eps * dw

loss = self.model.loss(train_x, train_y)
# dalpha { L_train(w-, alpha) }
Expand All @@ -132,5 +138,7 @@ def compute_hessian(self, dws, train_x, train_y):
for p, dw in zip(self.model.getWeights(), dws):
p += eps * dw

hessian = [(p-n) / (2. * eps) for p, n in zip(dalpha_positive, dalpha_negative)]
hessian = [
(p - n) / (2.0 * eps) for p, n in zip(dalpha_positive, dalpha_negative)
]
return hessian
Loading

0 comments on commit e9e6e0c

Please sign in to comment.