diff mbox series

[RFC,4/8] cgroup/cpuset: Better tracking of addition/deletion of isolated CPUs

Message ID 20240117163511.88173-5-longman@redhat.com (mailing list archive)
State New, archived
Headers show
Series cgroup/cpuset: Support RCU_NOCB on isolated partitions | expand

Commit Message

Waiman Long Jan. 17, 2024, 4:35 p.m. UTC
The process of updating workqueue unbound cpumask to exclude isolated
CPUs in cpuset only requires the use of the aggregated isolated_cpus
cpumask.  Other types of CPU isolation, like the RCU no-callback CPU
mode, may require knowing more granular addition and deletion of isolated
CPUs. To enable these types of CPU isolation at run time, we need to
provide better tracking of the addition and deletion of isolated CPUs.

This patch adds a new isolated_cpus_modifier enum type for tracking
the addition and deletion of isolated CPUs as well as renaming
update_unbound_workqueue_cpumask() to update_isolation_cpumasks()
to accommodate additional CPU isolation modes in the future.

There is no functional change.

Signed-off-by: Waiman Long <longman@redhat.com>
---
 kernel/cgroup/cpuset.c | 113 +++++++++++++++++++++++++----------------
 1 file changed, 69 insertions(+), 44 deletions(-)
diff mbox series

Patch

diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index dfbb16aca9f4..0479af76a5dc 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -206,6 +206,13 @@  struct cpuset {
  */
 static cpumask_var_t	subpartitions_cpus;
 
+/* Enum types for possible changes to the set of isolated CPUs */
+enum isolated_cpus_modifiers {
+	ISOL_CPUS_NONE = 0,
+	ISOL_CPUS_ADD,
+	ISOL_CPUS_DELETE,
+};
+
 /*
  * Exclusive CPUs in isolated partitions
  */
@@ -1446,14 +1453,14 @@  static void partition_xcpus_newstate(int old_prs, int new_prs, struct cpumask *x
  * @new_prs: new partition_root_state
  * @parent: parent cpuset
  * @xcpus: exclusive CPUs to be added
- * Return: true if isolated_cpus modified, false otherwise
+ * Return: isolated_cpus modifier
  *
  * Remote partition if parent == NULL
  */
-static bool partition_xcpus_add(int new_prs, struct cpuset *parent,
-				struct cpumask *xcpus)
+static int partition_xcpus_add(int new_prs, struct cpuset *parent,
+			       struct cpumask *xcpus)
 {
-	bool isolcpus_updated;
+	int icpus_mod = ISOL_CPUS_NONE;
 
 	WARN_ON_ONCE(new_prs < 0);
 	lockdep_assert_held(&callback_lock);
@@ -1464,13 +1471,14 @@  static bool partition_xcpus_add(int new_prs, struct cpuset *parent,
 	if (parent == &top_cpuset)
 		cpumask_or(subpartitions_cpus, subpartitions_cpus, xcpus);
 
-	isolcpus_updated = (new_prs != parent->partition_root_state);
-	if (isolcpus_updated)
+	if (new_prs != parent->partition_root_state) {
 		partition_xcpus_newstate(parent->partition_root_state, new_prs,
 					 xcpus);
-
+		icpus_mod = (new_prs == PRS_ISOLATED)
+			    ? ISOL_CPUS_ADD : ISOL_CPUS_DELETE;
+	}
 	cpumask_andnot(parent->effective_cpus, parent->effective_cpus, xcpus);
-	return isolcpus_updated;
+	return icpus_mod;
 }
 
 /*
@@ -1478,14 +1486,14 @@  static bool partition_xcpus_add(int new_prs, struct cpuset *parent,
  * @old_prs: old partition_root_state
  * @parent: parent cpuset
  * @xcpus: exclusive CPUs to be removed
- * Return: true if isolated_cpus modified, false otherwise
+ * Return: isolated_cpus modifier
  *
  * Remote partition if parent == NULL
  */
-static bool partition_xcpus_del(int old_prs, struct cpuset *parent,
+static int partition_xcpus_del(int old_prs, struct cpuset *parent,
 				struct cpumask *xcpus)
 {
-	bool isolcpus_updated;
+	int icpus_mod;
 
 	WARN_ON_ONCE(old_prs < 0);
 	lockdep_assert_held(&callback_lock);
@@ -1495,27 +1503,40 @@  static bool partition_xcpus_del(int old_prs, struct cpuset *parent,
 	if (parent == &top_cpuset)
 		cpumask_andnot(subpartitions_cpus, subpartitions_cpus, xcpus);
 
-	isolcpus_updated = (old_prs != parent->partition_root_state);
-	if (isolcpus_updated)
+	if (old_prs != parent->partition_root_state) {
 		partition_xcpus_newstate(old_prs, parent->partition_root_state,
 					 xcpus);
-
+		icpus_mod = (old_prs == PRS_ISOLATED)
+			    ? ISOL_CPUS_DELETE : ISOL_CPUS_ADD;
+	}
 	cpumask_and(xcpus, xcpus, cpu_active_mask);
 	cpumask_or(parent->effective_cpus, parent->effective_cpus, xcpus);
-	return isolcpus_updated;
+	return icpus_mod;
 }
 
-static void update_unbound_workqueue_cpumask(bool isolcpus_updated)
+/**
+ * update_isolation_cpumasks - Add or remove CPUs to/from full isolation state
+ * @mask: cpumask of the CPUs to be added or removed
+ * @modifier: enum isolated_cpus_modifiers
+ * Return: 0 if successful, error code otherwise
+ *
+ * Workqueue unbound cpumask update is applied irrespective of isolation_full
+ * state and the whole isolated_cpus is passed. Repeated calls with the same
+ * isolated_cpus will not cause further action other than a wasted mutex
+ * lock/unlock.
+ */
+static int update_isolation_cpumasks(struct cpumask *mask, int modifier)
 {
-	int ret;
+	int err;
 
 	lockdep_assert_cpus_held();
 
-	if (!isolcpus_updated)
-		return;
+	if (!modifier)
+		return 0;	/* No change in isolated CPUs */
 
-	ret = workqueue_unbound_exclude_cpumask(isolated_cpus);
-	WARN_ON_ONCE(ret < 0);
+	err = workqueue_unbound_exclude_cpumask(isolated_cpus);
+	WARN_ON_ONCE(err);
+	return err;
 }
 
 /**
@@ -1577,7 +1598,7 @@  static inline bool is_local_partition(struct cpuset *cs)
 static int remote_partition_enable(struct cpuset *cs, int new_prs,
 				   struct tmpmasks *tmp)
 {
-	bool isolcpus_updated;
+	int icpus_mod;
 
 	/*
 	 * The user must have sysadmin privilege.
@@ -1600,7 +1621,7 @@  static int remote_partition_enable(struct cpuset *cs, int new_prs,
 		return 0;
 
 	spin_lock_irq(&callback_lock);
-	isolcpus_updated = partition_xcpus_add(new_prs, NULL, tmp->new_cpus);
+	icpus_mod = partition_xcpus_add(new_prs, NULL, tmp->new_cpus);
 	list_add(&cs->remote_sibling, &remote_children);
 	if (cs->use_parent_ecpus) {
 		struct cpuset *parent = parent_cs(cs);
@@ -1609,7 +1630,7 @@  static int remote_partition_enable(struct cpuset *cs, int new_prs,
 		parent->child_ecpus_count--;
 	}
 	spin_unlock_irq(&callback_lock);
-	update_unbound_workqueue_cpumask(isolcpus_updated);
+	update_isolation_cpumasks(tmp->new_cpus, icpus_mod);
 
 	/*
 	 * Proprogate changes in top_cpuset's effective_cpus down the hierarchy.
@@ -1630,7 +1651,7 @@  static int remote_partition_enable(struct cpuset *cs, int new_prs,
  */
 static void remote_partition_disable(struct cpuset *cs, struct tmpmasks *tmp)
 {
-	bool isolcpus_updated;
+	int icpus_mod;
 
 	compute_effective_exclusive_cpumask(cs, tmp->new_cpus);
 	WARN_ON_ONCE(!is_remote_partition(cs));
@@ -1638,14 +1659,14 @@  static void remote_partition_disable(struct cpuset *cs, struct tmpmasks *tmp)
 
 	spin_lock_irq(&callback_lock);
 	list_del_init(&cs->remote_sibling);
-	isolcpus_updated = partition_xcpus_del(cs->partition_root_state,
-					       NULL, tmp->new_cpus);
+	icpus_mod = partition_xcpus_del(cs->partition_root_state, NULL,
+					tmp->new_cpus);
 	cs->partition_root_state = -cs->partition_root_state;
 	if (!cs->prs_err)
 		cs->prs_err = PERR_INVCPUS;
 	reset_partition_data(cs);
 	spin_unlock_irq(&callback_lock);
-	update_unbound_workqueue_cpumask(isolcpus_updated);
+	update_isolation_cpumasks(tmp->new_cpus, icpus_mod);
 
 	/*
 	 * Proprogate changes in top_cpuset's effective_cpus down the hierarchy.
@@ -1668,7 +1689,8 @@  static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask,
 {
 	bool adding, deleting;
 	int prs = cs->partition_root_state;
-	int isolcpus_updated = 0;
+	int icpus_add_mod = ISOL_CPUS_NONE;
+	int icpus_del_mod = ISOL_CPUS_NONE;
 
 	if (WARN_ON_ONCE(!is_remote_partition(cs)))
 		return;
@@ -1693,12 +1715,12 @@  static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask,
 
 	spin_lock_irq(&callback_lock);
 	if (adding)
-		isolcpus_updated += partition_xcpus_add(prs, NULL, tmp->addmask);
+		icpus_add_mod = partition_xcpus_add(prs, NULL, tmp->addmask);
 	if (deleting)
-		isolcpus_updated += partition_xcpus_del(prs, NULL, tmp->delmask);
+		icpus_del_mod = partition_xcpus_del(prs, NULL, tmp->delmask);
 	spin_unlock_irq(&callback_lock);
-	update_unbound_workqueue_cpumask(isolcpus_updated);
-
+	update_isolation_cpumasks(tmp->addmask, icpus_add_mod);
+	update_isolation_cpumasks(tmp->delmask, icpus_del_mod);
 	/*
 	 * Proprogate changes in top_cpuset's effective_cpus down the hierarchy.
 	 */
@@ -1819,7 +1841,8 @@  static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
 	int part_error = PERR_NONE;	/* Partition error? */
 	int subparts_delta = 0;
 	struct cpumask *xcpus;		/* cs effective_xcpus */
-	int isolcpus_updated = 0;
+	int icpus_add_mod = ISOL_CPUS_NONE;
+	int icpus_del_mod = ISOL_CPUS_NONE;
 	bool nocpu;
 
 	lockdep_assert_held(&cpuset_mutex);
@@ -2052,22 +2075,23 @@  static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
 			cs->nr_subparts = 0;
 	}
 	/*
-	 * Adding to parent's effective_cpus means deletion CPUs from cs
+	 * Adding to parent's effective_cpus means deleting CPUs from cs
 	 * and vice versa.
 	 */
 	if (adding)
-		isolcpus_updated += partition_xcpus_del(old_prs, parent,
-							tmp->addmask);
+		icpus_add_mod = partition_xcpus_del(old_prs, parent,
+						    tmp->addmask);
 	if (deleting)
-		isolcpus_updated += partition_xcpus_add(new_prs, parent,
-							tmp->delmask);
+		icpus_del_mod = partition_xcpus_add(new_prs, parent,
+						    tmp->delmask);
 
 	if (is_partition_valid(parent)) {
 		parent->nr_subparts += subparts_delta;
 		WARN_ON_ONCE(parent->nr_subparts < 0);
 	}
 	spin_unlock_irq(&callback_lock);
-	update_unbound_workqueue_cpumask(isolcpus_updated);
+	update_isolation_cpumasks(tmp->addmask, icpus_add_mod);
+	update_isolation_cpumasks(tmp->delmask, icpus_del_mod);
 
 	if ((old_prs != new_prs) && (cmd == partcmd_update))
 		update_partition_exclusive(cs, new_prs);
@@ -3044,7 +3068,7 @@  static int update_prstate(struct cpuset *cs, int new_prs)
 	int err = PERR_NONE, old_prs = cs->partition_root_state;
 	struct cpuset *parent = parent_cs(cs);
 	struct tmpmasks tmpmask;
-	bool new_xcpus_state = false;
+	int icpus_mod = ISOL_CPUS_NONE;
 
 	if (old_prs == new_prs)
 		return 0;
@@ -3096,7 +3120,8 @@  static int update_prstate(struct cpuset *cs, int new_prs)
 		/*
 		 * A change in load balance state only, no change in cpumasks.
 		 */
-		new_xcpus_state = true;
+		icpus_mod = (new_prs == PRS_ISOLATED)
+			    ? ISOL_CPUS_ADD : ISOL_CPUS_DELETE;
 	} else {
 		/*
 		 * Switching back to member is always allowed even if it
@@ -3128,10 +3153,10 @@  static int update_prstate(struct cpuset *cs, int new_prs)
 	WRITE_ONCE(cs->prs_err, err);
 	if (!is_partition_valid(cs))
 		reset_partition_data(cs);
-	else if (new_xcpus_state)
+	else if (icpus_mod)
 		partition_xcpus_newstate(old_prs, new_prs, cs->effective_xcpus);
 	spin_unlock_irq(&callback_lock);
-	update_unbound_workqueue_cpumask(new_xcpus_state);
+	update_isolation_cpumasks(cs->effective_xcpus, icpus_mod);
 
 	/* Force update if switching back to member */
 	update_cpumasks_hier(cs, &tmpmask, !new_prs ? HIER_CHECKALL : 0);