Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tron topology_spread_constraints support to PaaSTA #3983

Merged
merged 6 commits into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 41 additions & 2 deletions paasta_tools/tron_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@
from paasta_tools import spark_tools

from paasta_tools.kubernetes_tools import (
NodeSelectorConfig,
allowlist_denylist_to_requirements,
contains_zone_label,
get_service_account_name,
limit_size_with_hash,
raw_selectors_to_requirements,
Expand Down Expand Up @@ -248,6 +250,7 @@ class TronActionConfigDict(InstanceConfigDict, total=False):
# maneuvering to unify
command: str
service_account_name: str
node_selectors: Dict[str, NodeSelectorConfig]

# the values for this dict can be anything since it's whatever
# spark accepts
Expand Down Expand Up @@ -594,18 +597,35 @@ def get_node_selectors(self) -> Dict[str, str]:
def get_node_affinities(self) -> Optional[List[Dict[str, Union[str, List[str]]]]]:
"""Converts deploy_whitelist and deploy_blacklist in node affinities.

note: At the time of writing, `kubectl describe` does not show affinities,
NOTE: At the time of writing, `kubectl describe` does not show affinities,
only selectors. To see affinities, use `kubectl get pod -o json` instead.

WARNING: At the time of writing, we only used requiredDuringSchedulingIgnoredDuringExecution node affinities in Tron as we currently have
no use case for preferredDuringSchedulingIgnoredDuringExecution node affinities.
"""
requirements = allowlist_denylist_to_requirements(
allowlist=self.get_deploy_whitelist(),
denylist=self.get_deploy_blacklist(),
)
node_selectors = self.config_dict.get("node_selectors", {})
requirements.extend(
raw_selectors_to_requirements(
raw_selectors=self.config_dict.get("node_selectors", {}), # type: ignore
raw_selectors=self.config_dict.get("node_selectors", {}),
jfongatyelp marked this conversation as resolved.
Show resolved Hide resolved
)
)

# PAASTA-18198: To improve AZ balance with Karpenter, we temporarily allow specifying zone affinities per pool
pool_node_affinities = load_system_paasta_config().get_pool_node_affinities()
if pool_node_affinities and self.get_pool() in pool_node_affinities:
current_pool_node_affinities = pool_node_affinities[self.get_pool()]
Copy link
Contributor

@EmanElsaban EmanElsaban Oct 28, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what does this dictionary/map returns pool_node_affinities[self.get_pool()]?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it the config to set the pool node affinities?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a puppet-defined dict (see paasta_tools::public_config::pool_node_affinities internally) that defines a default set of affinities for certain pools (speaking of which, we should probably add an entry for the tron pool :p):

  default:
    topology.kubernetes.io/zone:
      - us-west-2a
      - us-west-2b

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ahh i see it returns the azs affinities:

paasta_tools::public_config::pool_node_affinities:
  # pnw-prod default should only run on us-west-2a and us-west-2b (unless there's a deploy_whitelist override)
  default:
    topology.kubernetes.io/zone:
      - us-west-2a
      - us-west-2b

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ahh sorry just saw your answer, yea makes sense

# If the service already has a node selector for a zone, we don't want to override it
if current_pool_node_affinities and not contains_zone_label(node_selectors):
requirements.extend(
raw_selectors_to_requirements(
raw_selectors=current_pool_node_affinities,
)
)

if not requirements:
return None

Expand Down Expand Up @@ -984,6 +1004,25 @@ def format_tron_action_dict(action_config: TronActionConfig):
result["node_selectors"] = action_config.get_node_selectors()
result["node_affinities"] = action_config.get_node_affinities()

# XXX: this is currently hardcoded since we should only really need TSC for zone-aware scheduling
result["topology_spread_constraints"] = [
{
# try to evenly spread pods across specified topology
"max_skew": 1,
# narrow down what pods to consider when spreading
"label_selector": {
# only consider pods that are managed by tron
"app.kubernetes.io/managed-by": "tron",
# and in the same pool
"paasta.yelp.com/pool": action_config.get_pool(),
},
# now, spread across AZs
"topology_key": "topology.kubernetes.io/zone",
# but if not possible, schedule even with a zonal imbalance
"when_unsatisfiable": "ScheduleAnyway",
},
]

# XXX: once we're off mesos we can make get_cap_* return just the cap names as a list
result["cap_add"] = [cap["value"] for cap in action_config.get_cap_add()]
result["cap_drop"] = [cap["value"] for cap in action_config.get_cap_drop()]
Expand Down
57 changes: 56 additions & 1 deletion tests/test_tron_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -997,6 +997,17 @@ def test_format_tron_action_dict_paasta(self):
"mem": 1200,
"disk": 42,
"env": mock.ANY,
"topology_spread_constraints": [
{
"label_selector": {
"app.kubernetes.io/managed-by": "tron",
"paasta.yelp.com/pool": "special_pool",
},
"max_skew": 1,
"topology_key": "topology.kubernetes.io/zone",
"when_unsatisfiable": "ScheduleAnyway",
},
],
"secret_volumes": [
{
"secret_volume_name": "tron-secret-my--service-secret1",
Expand Down Expand Up @@ -1347,6 +1358,17 @@ def test_format_tron_action_dict_spark(
"KUBECONFIG": "/etc/kubernetes/spark.conf",
"AWS_DEFAULT_REGION": "us-west-2",
},
"topology_spread_constraints": [
{
"label_selector": {
"app.kubernetes.io/managed-by": "tron",
"paasta.yelp.com/pool": "stable",
},
"max_skew": 1,
"topology_key": "topology.kubernetes.io/zone",
"when_unsatisfiable": "ScheduleAnyway",
},
],
"node_selectors": {"yelp.com/pool": "stable"},
"cap_add": [],
"cap_drop": [
Expand Down Expand Up @@ -1474,6 +1496,17 @@ def test_format_tron_action_dict_paasta_k8s_service_account(self):
},
"node_selectors": {"yelp.com/pool": "default"},
"env": mock.ANY,
"topology_spread_constraints": [
{
"label_selector": {
"app.kubernetes.io/managed-by": "tron",
"paasta.yelp.com/pool": "default",
},
"max_skew": 1,
"topology_key": "topology.kubernetes.io/zone",
"when_unsatisfiable": "ScheduleAnyway",
},
],
"secret_env": {},
"field_selector_env": {"PAASTA_POD_IP": {"field_path": "status.podIP"}},
"secret_volumes": [],
Expand Down Expand Up @@ -1621,6 +1654,17 @@ def test_format_tron_action_dict_paasta_k8s(
}
],
"env": mock.ANY,
"topology_spread_constraints": [
{
"label_selector": {
"app.kubernetes.io/managed-by": "tron",
"paasta.yelp.com/pool": "special_pool",
},
"max_skew": 1,
"topology_key": "topology.kubernetes.io/zone",
"when_unsatisfiable": "ScheduleAnyway",
},
],
"secret_env": {
"SOME_SECRET": {
"secret_name": "tron-secret-my--service-secret--name",
Expand Down Expand Up @@ -1719,6 +1763,17 @@ def test_format_tron_action_dict_paasta_no_branch_dict(self):
"mem": 1200,
"disk": 42,
"env": mock.ANY,
"topology_spread_constraints": [
{
"label_selector": {
"app.kubernetes.io/managed-by": "tron",
"paasta.yelp.com/pool": "special_pool",
},
"max_skew": 1,
"topology_key": "topology.kubernetes.io/zone",
"when_unsatisfiable": "ScheduleAnyway",
},
],
"secret_volumes": [
{
"secret_volume_name": "tron-secret-my--service-secret1",
Expand Down Expand Up @@ -1896,7 +1951,7 @@ def test_create_complete_config_e2e(self, tmpdir):
# that are not static, this will cause continuous reconfiguration, which
# will add significant load to the Tron API, which happened in DAR-1461.
# but if this is intended, just change the hash.
assert hasher.hexdigest() == "0585c2049596190f5510f8ce4fc3a9ac"
assert hasher.hexdigest() == "31634ab048abe9b40b71851797d48e4d"

def test_override_default_pool_override(self, tmpdir):
soa_dir = tmpdir.mkdir("test_create_complete_config_soa")
Expand Down
Loading