@@ -65,7 +65,13 @@ static const char * const perr_strings[] = {
};
/*
- * Exclusive CPUs distributed out to sub-partitions of top_cpuset
+ * For local partitions, update to subpartitions_cpus & isolated_cpus is done
+ * in update_parent_effective_cpumask(). For remote partitions, it is done in
+ * the remote_partition_*() and remote_cpus_update() helpers.
+ */
+/*
+ * Exclusive CPUs distributed out to local or remote sub-partitions of
+ * top_cpuset
*/
static cpumask_var_t subpartitions_cpus;
@@ -1089,9 +1095,14 @@ void cpuset_reset_sched_domains(void)
*
* Iterate through each task of @cs updating its cpus_allowed to the
* effective cpuset's. As this function is called with cpuset_mutex held,
- * cpuset membership stays stable. For top_cpuset, task_cpu_possible_mask()
- * is used instead of effective_cpus to make sure all offline CPUs are also
- * included as hotplug code won't update cpumasks for tasks in top_cpuset.
+ * cpuset membership stays stable.
+ *
+ * For top_cpuset, task_cpu_possible_mask() is used instead of effective_cpus
+ * to make sure all offline CPUs are also included as hotplug code won't
+ * update cpumasks for tasks in top_cpuset.
+ *
+ * As task_cpu_possible_mask() can be task dependent in arm64, we have to
+ * do cpu masking per task instead of doing it once for all.
*/
void cpuset_update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus)
{
@@ -1151,7 +1162,7 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs,
*
* Return: 0 if successful, an error code otherwise
*/
-static int update_partition_exclusive(struct cpuset *cs, int new_prs)
+static int update_partition_exclusive_flag(struct cpuset *cs, int new_prs)
{
bool exclusive = (new_prs > PRS_MEMBER);
@@ -1234,12 +1245,12 @@ static void reset_partition_data(struct cpuset *cs)
}
/*
- * partition_xcpus_newstate - Exclusive CPUs state change
+ * isolated_cpus_update - Update the isolated_cpus mask
* @old_prs: old partition_root_state
* @new_prs: new partition_root_state
* @xcpus: exclusive CPUs with state change
*/
-static void partition_xcpus_newstate(int old_prs, int new_prs, struct cpumask *xcpus)
+static void isolated_cpus_update(int old_prs, int new_prs, struct cpumask *xcpus)
{
WARN_ON_ONCE(old_prs == new_prs);
if (new_prs == PRS_ISOLATED)
@@ -1273,8 +1284,8 @@ static bool partition_xcpus_add(int new_prs, struct cpuset *parent,
isolcpus_updated = (new_prs != parent->partition_root_state);
if (isolcpus_updated)
- partition_xcpus_newstate(parent->partition_root_state, new_prs,
- xcpus);
+ isolated_cpus_update(parent->partition_root_state, new_prs,
+ xcpus);
cpumask_andnot(parent->effective_cpus, parent->effective_cpus, xcpus);
return isolcpus_updated;
@@ -1304,8 +1315,8 @@ static bool partition_xcpus_del(int old_prs, struct cpuset *parent,
isolcpus_updated = (old_prs != parent->partition_root_state);
if (isolcpus_updated)
- partition_xcpus_newstate(old_prs, parent->partition_root_state,
- xcpus);
+ isolated_cpus_update(old_prs, parent->partition_root_state,
+ xcpus);
cpumask_and(xcpus, xcpus, cpu_active_mask);
cpumask_or(parent->effective_cpus, parent->effective_cpus, xcpus);
@@ -1634,8 +1645,8 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
int old_prs, new_prs;
int part_error = PERR_NONE; /* Partition error? */
int subparts_delta = 0;
- struct cpumask *xcpus; /* cs effective_xcpus */
int isolcpus_updated = 0;
+ struct cpumask *xcpus = user_xcpus(cs);
bool nocpu;
lockdep_assert_held(&cpuset_mutex);
@@ -1647,7 +1658,6 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
*/
adding = deleting = false;
old_prs = new_prs = cs->partition_root_state;
- xcpus = user_xcpus(cs);
if (cmd == partcmd_invalidate) {
if (is_prs_invalid(old_prs))
@@ -1861,7 +1871,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
* CPU lists in cs haven't been updated yet. So defer it to later.
*/
if ((old_prs != new_prs) && (cmd != partcmd_update)) {
- int err = update_partition_exclusive(cs, new_prs);
+ int err = update_partition_exclusive_flag(cs, new_prs);
if (err)
return err;
@@ -1899,7 +1909,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
update_unbound_workqueue_cpumask(isolcpus_updated);
if ((old_prs != new_prs) && (cmd == partcmd_update))
- update_partition_exclusive(cs, new_prs);
+ update_partition_exclusive_flag(cs, new_prs);
if (adding || deleting) {
cpuset_update_tasks_cpumask(parent, tmp->addmask);
@@ -2829,7 +2839,7 @@ static int update_prstate(struct cpuset *cs, int new_prs)
int err = PERR_NONE, old_prs = cs->partition_root_state;
struct cpuset *parent = parent_cs(cs);
struct tmpmasks tmpmask;
- bool new_xcpus_state = false;
+ bool isolcpus_updated = false;
if (old_prs == new_prs)
return 0;
@@ -2843,7 +2853,7 @@ static int update_prstate(struct cpuset *cs, int new_prs)
if (alloc_cpumasks(NULL, &tmpmask))
return -ENOMEM;
- err = update_partition_exclusive(cs, new_prs);
+ err = update_partition_exclusive_flag(cs, new_prs);
if (err)
goto out;
@@ -2884,8 +2894,9 @@ static int update_prstate(struct cpuset *cs, int new_prs)
} else if (old_prs && new_prs) {
/*
* A change in load balance state only, no change in cpumasks.
+ * Need to update isolated_cpus.
*/
- new_xcpus_state = true;
+ isolcpus_updated = true;
} else {
/*
* Switching back to member is always allowed even if it
@@ -2909,7 +2920,7 @@ static int update_prstate(struct cpuset *cs, int new_prs)
*/
if (err) {
new_prs = -new_prs;
- update_partition_exclusive(cs, new_prs);
+ update_partition_exclusive_flag(cs, new_prs);
}
spin_lock_irq(&callback_lock);
@@ -2917,14 +2928,18 @@ static int update_prstate(struct cpuset *cs, int new_prs)
WRITE_ONCE(cs->prs_err, err);
if (!is_partition_valid(cs))
reset_partition_data(cs);
- else if (new_xcpus_state)
- partition_xcpus_newstate(old_prs, new_prs, cs->effective_xcpus);
+ else if (isolcpus_updated)
+ isolated_cpus_update(old_prs, new_prs, cs->effective_xcpus);
spin_unlock_irq(&callback_lock);
- update_unbound_workqueue_cpumask(new_xcpus_state);
+ update_unbound_workqueue_cpumask(isolcpus_updated);
- /* Force update if switching back to member */
+ /* Force update if switching back to member & update effective_xcpus */
update_cpumasks_hier(cs, &tmpmask, !new_prs);
+ /* A newly created partition must have effective_xcpus set */
+ WARN_ON_ONCE(!old_prs && (new_prs > 0)
+ && cpumask_empty(cs->effective_xcpus));
+
/* Update sched domains and load balance flag */
update_partition_sd_lb(cs, old_prs);
Rename partition_xcpus_newstate() to isolated_cpus_update(), update_partition_exclusive() to update_partition_exclusive_flag() and the new_xcpus_state variable to isolcpus_updated to make their meanings more explicit. Also add some comments to further clarify the code. No functional change is expected. Signed-off-by: Waiman Long <longman@redhat.com> --- kernel/cgroup/cpuset.c | 61 ++++++++++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 23 deletions(-)