@@ -769,13 +769,14 @@ static int generate_sched_domains(cpumask_var_t **domains,
769769 int ndoms = 0 ; /* number of sched domains in result */
770770 int nslot ; /* next empty doms[] struct cpumask slot */
771771 struct cgroup_subsys_state * pos_css ;
772+ bool root_load_balance = is_sched_load_balance (& top_cpuset );
772773
773774 doms = NULL ;
774775 dattr = NULL ;
775776 csa = NULL ;
776777
777778 /* Special case for the 99% of systems with one, full, sched domain */
778- if (is_sched_load_balance ( & top_cpuset ) ) {
779+ if (root_load_balance && ! top_cpuset . nr_subparts_cpus ) {
779780 ndoms = 1 ;
780781 doms = alloc_sched_domains (ndoms );
781782 if (!doms )
@@ -798,6 +799,8 @@ static int generate_sched_domains(cpumask_var_t **domains,
798799 csn = 0 ;
799800
800801 rcu_read_lock ();
802+ if (root_load_balance )
803+ csa [csn ++ ] = & top_cpuset ;
801804 cpuset_for_each_descendant_pre (cp , pos_css , & top_cpuset ) {
802805 if (cp == & top_cpuset )
803806 continue ;
@@ -808,18 +811,26 @@ static int generate_sched_domains(cpumask_var_t **domains,
808811 * parent's cpus, so just skip them, and then we call
809812 * update_domain_attr_tree() to calc relax_domain_level of
810813 * the corresponding sched domain.
814+ *
815+ * If root is load-balancing, we can skip @cp if it
816+ * is a subset of the root's effective_cpus.
811817 */
812818 if (!cpumask_empty (cp -> cpus_allowed ) &&
813819 !(is_sched_load_balance (cp ) &&
814820 cpumask_intersects (cp -> cpus_allowed ,
815821 housekeeping_cpumask (HK_FLAG_DOMAIN ))))
816822 continue ;
817823
824+ if (root_load_balance &&
825+ cpumask_subset (cp -> cpus_allowed , top_cpuset .effective_cpus ))
826+ continue ;
827+
818828 if (is_sched_load_balance (cp ))
819829 csa [csn ++ ] = cp ;
820830
821- /* skip @cp's subtree */
822- pos_css = css_rightmost_descendant (pos_css );
831+ /* skip @cp's subtree if not a partition root */
832+ if (!is_partition_root (cp ))
833+ pos_css = css_rightmost_descendant (pos_css );
823834 }
824835 rcu_read_unlock ();
825836
@@ -947,7 +958,12 @@ static void rebuild_sched_domains_locked(void)
947958 * passing doms with offlined cpu to partition_sched_domains().
948959 * Anyways, hotplug work item will rebuild sched domains.
949960 */
950- if (!cpumask_equal (top_cpuset .effective_cpus , cpu_active_mask ))
961+ if (!top_cpuset .nr_subparts_cpus &&
962+ !cpumask_equal (top_cpuset .effective_cpus , cpu_active_mask ))
963+ goto out ;
964+
965+ if (top_cpuset .nr_subparts_cpus &&
966+ !cpumask_subset (top_cpuset .effective_cpus , cpu_active_mask ))
951967 goto out ;
952968
953969 /* Generate domain masks and attrs */
@@ -1367,11 +1383,15 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
13671383 update_tasks_cpumask (cp );
13681384
13691385 /*
1370- * If the effective cpumask of any non-empty cpuset is changed,
1371- * we need to rebuild sched domains.
1386+ * On legacy hierarchy, if the effective cpumask of any non-
1387+ * empty cpuset is changed, we need to rebuild sched domains.
1388+ * On default hierarchy, the cpuset needs to be a partition
1389+ * root as well.
13721390 */
13731391 if (!cpumask_empty (cp -> cpus_allowed ) &&
1374- is_sched_load_balance (cp ))
1392+ is_sched_load_balance (cp ) &&
1393+ (!cgroup_subsys_on_dfl (cpuset_cgrp_subsys ) ||
1394+ is_partition_root (cp )))
13751395 need_rebuild_sched_domains = true;
13761396
13771397 rcu_read_lock ();
0 commit comments