Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance pre-commit hooks with flake8 and black #2407

Merged
merged 3 commits into from
Aug 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[flake8]
max-line-length = 100
# E203 is ignored to avoid conflicts with Black's formatting, as it's not PEP 8 compliant
extend-ignore = W503, E203
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
extend-ignore = W503, E203
extend-ignore = W503

Instead of ignoring this error, could you fix those errors?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure I will, there is a small mistake in my flake8 fix as well, wrong indentation. Will fix that as well

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Black adds whitespace before : in slices, and flake8 complains about it.

Recomended configs for black:
Compatible configuration files can be found here

This behaviour may raise E203 whitespace before ':' warnings in style guide enforcement tools like Flake8. Since E203 is not PEP 8 compliant, you should tell Flake8 to ignore these warnings. More here.

WDYT?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, that makes sense.
Could you add a comment here on why we must ignore E203?

10 changes: 10 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,16 @@ repos:
- id: isort
name: isort
entry: isort --profile google
- repo: https://github.com/psf/black
rev: 24.2.0
hooks:
- id: black
files: (sdk|examples)/.*
- repo: https://github.com/pycqa/flake8
rev: 7.1.1
hooks:
- id: flake8
files: (sdk|examples)/.*
Comment on lines +16 to +23
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point.

exclude: |
(?x)^(
.*zz_generated.deepcopy.*|
Expand Down
88 changes: 33 additions & 55 deletions examples/v1beta1/kubeflow-pipelines/mpi-job-horovod.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,12 @@

# This Experiment is similar to this:
# https://github.com/kubeflow/katib/blob/master/examples/v1beta1/kubeflow-training-operator/mpijob-horovod.yaml
# Check the training container source code here: https://github.com/kubeflow/mpi-operator/tree/master/examples/horovod.
# Check the training container source code here:
# https://github.com/kubeflow/mpi-operator/tree/master/examples/horovod.

# Note: To run this example, your Kubernetes cluster should run MPIJob operator.
# Follow this guide to install MPIJob on your cluster: https://www.kubeflow.org/docs/components/training/mpi/
# Follow this guide to install MPIJob on your cluster:
# https://www.kubeflow.org/docs/components/training/mpi/

import kfp
from kfp import components
Expand All @@ -42,13 +44,12 @@

@dsl.pipeline(
name="Launch Katib MPIJob Experiment",
description="An example to launch Katib Experiment with MPIJob"
description="An example to launch Katib Experiment with MPIJob",
)
def horovod_mnist_hpo(
experiment_name: str = "mpi-horovod-mnist",
experiment_namespace: str = "kubeflow-user-example-com",
):

# Trial count specification.
max_trial_count = 6
max_failed_trial_count = 3
Expand All @@ -64,12 +65,7 @@ def horovod_mnist_hpo(
# Algorithm specification.
algorithm = V1beta1AlgorithmSpec(
algorithm_name="bayesianoptimization",
algorithm_settings=[
V1beta1AlgorithmSetting(
name="random_state",
value="10"
)
]
algorithm_settings=[V1beta1AlgorithmSetting(name="random_state", value="10")],
)

# Experiment search space.
Expand All @@ -78,19 +74,12 @@ def horovod_mnist_hpo(
V1beta1ParameterSpec(
name="lr",
parameter_type="double",
feasible_space=V1beta1FeasibleSpace(
min="0.001",
max="0.003"
),
feasible_space=V1beta1FeasibleSpace(min="0.001", max="0.003"),
),
V1beta1ParameterSpec(
name="num-steps",
parameter_type="int",
feasible_space=V1beta1FeasibleSpace(
min="50",
max="150",
step="10"
),
feasible_space=V1beta1FeasibleSpace(min="50", max="150", step="10"),
),
]

Expand All @@ -106,18 +95,14 @@ def horovod_mnist_hpo(
"replicas": 1,
"template": {
"metadata": {
"annotations": {
"sidecar.istio.io/inject": "false"
}
"annotations": {"sidecar.istio.io/inject": "false"}
},
"spec": {
"containers": [
{
"image": "docker.io/kubeflow/mpi-horovod-mnist",
"name": "mpi-launcher",
"command": [
"mpirun"
],
"command": ["mpirun"],
"args": [
"-np",
"2",
Expand All @@ -141,68 +126,58 @@ def horovod_mnist_hpo(
"--lr",
"${trialParameters.learningRate}",
"--num-steps",
"${trialParameters.numberSteps}"
"${trialParameters.numberSteps}",
],
"resources": {
"limits": {
"cpu": "500m",
"memory": "2Gi"
}
}
"limits": {"cpu": "500m", "memory": "2Gi"}
},
}
]
}
}
},
},
},
"Worker": {
"replicas": 2,
"template": {
"metadata": {
"annotations": {
"sidecar.istio.io/inject": "false"
}
"annotations": {"sidecar.istio.io/inject": "false"}
},
"spec": {
"containers": [
{
"image": "docker.io/kubeflow/mpi-horovod-mnist",
"name": "mpi-worker",
"resources": {
"limits": {
"cpu": "500m",
"memory": "4Gi"
}
}
"limits": {"cpu": "500m", "memory": "4Gi"}
},
}
]
}
}
}
}
}
},
},
},
},
},
}

# Configure parameters for the Trial template.
trial_template = V1beta1TrialTemplate(
primary_pod_labels={
"mpi-job-role": "launcher"
},
primary_pod_labels={"mpi-job-role": "launcher"},
primary_container_name="mpi-launcher",
success_condition='status.conditions.#(type=="Succeeded")#|#(status=="True")#',
failure_condition='status.conditions.#(type=="Failed")#|#(status=="True")#',
trial_parameters=[
V1beta1TrialParameterSpec(
name="learningRate",
description="Learning rate for the training model",
reference="lr"
reference="lr",
),
V1beta1TrialParameterSpec(
name="numberSteps",
description="Number of training steps",
reference="num-steps"
reference="num-steps",
),
],
trial_spec=trial_spec
trial_spec=trial_spec,
)

# Create Experiment specification.
Expand All @@ -213,13 +188,15 @@ def horovod_mnist_hpo(
objective=objective,
algorithm=algorithm,
parameters=parameters,
trial_template=trial_template
trial_template=trial_template,
)

# Get the Katib launcher.
# Load component from the URL or from the file.
katib_experiment_launcher_op = components.load_component_from_url(
"https://raw.githubusercontent.com/kubeflow/pipelines/master/components/kubeflow/katib-launcher/component.yaml")
"https://raw.githubusercontent.com/kubeflow/pipelines/master/"
"components/kubeflow/katib-launcher/component.yaml"
)
# katib_experiment_launcher_op = components.load_component_from_file(
# "../../../components/kubeflow/katib-launcher/component.yaml"
# )
Expand All @@ -231,7 +208,8 @@ def horovod_mnist_hpo(
experiment_name=experiment_name,
experiment_namespace=experiment_namespace,
experiment_spec=ApiClient().sanitize_for_serialization(experiment_spec),
experiment_timeout_minutes=60)
experiment_timeout_minutes=60,
)

# Output container to print the results.
dsl.ContainerOp(
Expand Down
44 changes: 26 additions & 18 deletions examples/v1beta1/trial-images/darts-cnn-cifar10/architect.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@
import torch


class Architect():
"""" Architect controls architecture of cell by computing gradients of alphas
"""
class Architect:
""" " Architect controls architecture of cell by computing gradients of alphas"""

def __init__(self, model, w_momentum, w_weight_decay, device):
self.model = model
Expand Down Expand Up @@ -48,25 +47,32 @@ def virtual_step(self, train_x, train_y, xi, w_optim):

# Compute gradient
gradients = torch.autograd.grad(loss, self.model.getWeights())

# Do virtual step (Update gradient)
# Below operations do not need gradient tracking
with torch.no_grad():
# dict key is not the value, but the pointer. So original network weight have to
# be iterated also.
for w, vw, g in zip(self.model.getWeights(), self.v_model.getWeights(), gradients):
m = w_optim.state[w].get("momentum_buffer", 0.) * self.w_momentum
if(self.device == 'cuda'):
vw.copy_(w - torch.cuda.FloatTensor(xi) * (m + g + self.w_weight_decay * w))
elif(self.device == 'cpu'):
vw.copy_(w - torch.FloatTensor(xi) * (m + g + self.w_weight_decay * w))
for w, vw, g in zip(
self.model.getWeights(), self.v_model.getWeights(), gradients
):
m = w_optim.state[w].get("momentum_buffer", 0.0) * self.w_momentum
if self.device == "cuda":
vw.copy_(
w
- torch.cuda.FloatTensor(xi) * (m + g + self.w_weight_decay * w)
)
elif self.device == "cpu":
vw.copy_(
w - torch.FloatTensor(xi) * (m + g + self.w_weight_decay * w)
)

# Sync alphas
for a, va in zip(self.model.getAlphas(), self.v_model.getAlphas()):
va.copy_(a)

def unrolled_backward(self, train_x, train_y, valid_x, valid_y, xi, w_optim):
""" Compute unrolled loss and backward its gradients
"""Compute unrolled loss and backward its gradients
Args:
xi: learning rate for virtual gradient step (same as model lr)
w_optim: weights optimizer - for virtual step
Expand All @@ -77,23 +83,23 @@ def unrolled_backward(self, train_x, train_y, valid_x, valid_y, xi, w_optim):
# Calculate unrolled loss
# Loss for validation with w'. L_valid(w')
loss = self.v_model.loss(valid_x, valid_y)

# Calculate gradient
v_alphas = tuple(self.v_model.getAlphas())
v_weights = tuple(self.v_model.getWeights())
v_grads = torch.autograd.grad(loss, v_alphas + v_weights)

dalpha = v_grads[:len(v_alphas)]
dws = v_grads[len(v_alphas):]
dalpha = v_grads[: len(v_alphas)]
dws = v_grads[len(v_alphas) :]

hessian = self.compute_hessian(dws, train_x, train_y)

# Update final gradient = dalpha - xi * hessian
with torch.no_grad():
for alpha, da, h in zip(self.model.getAlphas(), dalpha, hessian):
if(self.device == 'cuda'):
if self.device == "cuda":
alpha.grad = da - torch.cuda.FloatTensor(xi) * h
elif(self.device == 'cpu'):
elif self.device == "cpu":
alpha.grad = da - torch.cpu.FloatTensor(xi) * h

def compute_hessian(self, dws, train_x, train_y):
Expand Down Expand Up @@ -121,7 +127,7 @@ def compute_hessian(self, dws, train_x, train_y):
with torch.no_grad():
for p, dw in zip(self.model.getWeights(), dws):
# TODO (andreyvelich): Do we need this * 2.0 ?
p -= 2. * eps * dw
p -= 2.0 * eps * dw

loss = self.model.loss(train_x, train_y)
# dalpha { L_train(w-, alpha) }
Expand All @@ -132,5 +138,7 @@ def compute_hessian(self, dws, train_x, train_y):
for p, dw in zip(self.model.getWeights(), dws):
p += eps * dw

hessian = [(p-n) / (2. * eps) for p, n in zip(dalpha_positive, dalpha_negative)]
hessian = [
(p - n) / (2.0 * eps) for p, n in zip(dalpha_positive, dalpha_negative)
]
return hessian
Loading