Message ID | 20210730112443.23245-9-will@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add support for 32-bit tasks on asymmetric AArch32 systems | expand |
On Fri, Jul 30, 2021 at 12:24:35PM +0100, Will Deacon wrote: > @@ -2783,20 +2778,173 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, > > __do_set_cpus_allowed(p, new_mask, flags); > > - return affine_move_task(rq, p, &rf, dest_cpu, flags); > + if (flags & SCA_USER) > + release_user_cpus_ptr(p); > + > + return affine_move_task(rq, p, rf, dest_cpu, flags); > > out: > - task_rq_unlock(rq, p, &rf); > + task_rq_unlock(rq, p, rf); > > return ret; > } > +void relax_compatible_cpus_allowed_ptr(struct task_struct *p) > +{ > + unsigned long flags; > + struct cpumask *mask = p->user_cpus_ptr; > + > + /* > + * Try to restore the old affinity mask. If this fails, then > + * we free the mask explicitly to avoid it being inherited across > + * a subsequent fork(). > + */ > + if (!mask || !__sched_setaffinity(p, mask)) > + return; > + > + raw_spin_lock_irqsave(&p->pi_lock, flags); > + release_user_cpus_ptr(p); > + raw_spin_unlock_irqrestore(&p->pi_lock, flags); > +} Both these are a problem on RT. The easiest recourse is simply never freeing the CPU mask (except on exit). The alternative is something like the below I suppose.. I'm leaning towards the former option, wdyt? --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2733,6 +2733,7 @@ static int __set_cpus_allowed_ptr_locked const struct cpumask *cpu_allowed_mask = task_cpu_possible_mask(p); const struct cpumask *cpu_valid_mask = cpu_active_mask; bool kthread = p->flags & PF_KTHREAD; + struct cpumask *user_mask = NULL; unsigned int dest_cpu; int ret = 0; @@ -2792,9 +2793,13 @@ static int __set_cpus_allowed_ptr_locked __do_set_cpus_allowed(p, new_mask, flags); if (flags & SCA_USER) - release_user_cpus_ptr(p); + swap(user_mask, p->user_cpus_ptr); + + ret = affine_move_task(rq, p, rf, dest_cpu, flags); + + kfree(user_mask); - return affine_move_task(rq, p, rf, dest_cpu, flags); + return ret; out: task_rq_unlock(rq, p, rf); @@ -2954,8 +2959,10 @@ void relax_compatible_cpus_allowed_ptr(s return; raw_spin_lock_irqsave(&p->pi_lock, flags); - release_user_cpus_ptr(p); + p->user_cpus_ptr = NULL; raw_spin_unlock_irqrestore(&p->pi_lock, flags); + + kfree(mask); } void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
On Fri, Jul 30, 2021 at 12:24:35PM +0100, Will Deacon wrote: > + struct rq_flags rf; > + struct rq *rq; > + int err; > + struct cpumask *user_mask = NULL; > + cpumask_var_t new_mask; > + const struct cpumask *override_mask = task_cpu_possible_mask(p); > + unsigned long flags; > + struct cpumask *mask = p->user_cpus_ptr; I've fixed all that up to be proper reverse x-mas trees; similar for other patches.
Hi Peter, On Tue, Aug 17, 2021 at 05:10:53PM +0200, Peter Zijlstra wrote: > On Fri, Jul 30, 2021 at 12:24:35PM +0100, Will Deacon wrote: > > @@ -2783,20 +2778,173 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, > > > > __do_set_cpus_allowed(p, new_mask, flags); > > > > - return affine_move_task(rq, p, &rf, dest_cpu, flags); > > + if (flags & SCA_USER) > > + release_user_cpus_ptr(p); > > + > > + return affine_move_task(rq, p, rf, dest_cpu, flags); > > > > out: > > - task_rq_unlock(rq, p, &rf); > > + task_rq_unlock(rq, p, rf); > > > > return ret; > > } > > > +void relax_compatible_cpus_allowed_ptr(struct task_struct *p) > > +{ > > + unsigned long flags; > > + struct cpumask *mask = p->user_cpus_ptr; > > + > > + /* > > + * Try to restore the old affinity mask. If this fails, then > > + * we free the mask explicitly to avoid it being inherited across > > + * a subsequent fork(). > > + */ > > + if (!mask || !__sched_setaffinity(p, mask)) > > + return; > > + > > + raw_spin_lock_irqsave(&p->pi_lock, flags); > > + release_user_cpus_ptr(p); > > + raw_spin_unlock_irqrestore(&p->pi_lock, flags); > > +} > > Both these are a problem on RT. Ah, sorry. I didn't realise you couldn't _free_ with a raw lock held in RT. Is there somewhere I can read up on that? > The easiest recourse is simply never freeing the CPU mask (except on > exit). The alternative is something like the below I suppose.. > > I'm leaning towards the former option, wdyt? Defering the freeing until exit feels like a little fiddly, as we still want to clear ->user_cpus_ptr on affinity changes when SCA_USER is set so we'd have to keep track of the mask somewhere and reuse it instead of allocating a new one if we need it later on. Do-able, but feels a bit nasty, particular across fork(). As for your other suggestion: > --- a/kernel/sched/core.c > +++ b/kernel/sched/core.c > @@ -2733,6 +2733,7 @@ static int __set_cpus_allowed_ptr_locked > const struct cpumask *cpu_allowed_mask = task_cpu_possible_mask(p); > const struct cpumask *cpu_valid_mask = cpu_active_mask; > bool kthread = p->flags & PF_KTHREAD; > + struct cpumask *user_mask = NULL; > unsigned int dest_cpu; > int ret = 0; > > @@ -2792,9 +2793,13 @@ static int __set_cpus_allowed_ptr_locked > __do_set_cpus_allowed(p, new_mask, flags); > > if (flags & SCA_USER) > - release_user_cpus_ptr(p); > + swap(user_mask, p->user_cpus_ptr); > + > + ret = affine_move_task(rq, p, rf, dest_cpu, flags); > + > + kfree(user_mask); > > - return affine_move_task(rq, p, rf, dest_cpu, flags); > + return ret; > > out: > task_rq_unlock(rq, p, rf); > @@ -2954,8 +2959,10 @@ void relax_compatible_cpus_allowed_ptr(s > return; > > raw_spin_lock_irqsave(&p->pi_lock, flags); > - release_user_cpus_ptr(p); > + p->user_cpus_ptr = NULL; > raw_spin_unlock_irqrestore(&p->pi_lock, flags); > + > + kfree(mask); I think the idea looks good, but perhaps we could wrap things up a bit: /* Comment about why this is useful with RT */ static cpumask_t *clear_user_cpus_ptr(struct task_struct *p) { struct cpumask *user_mask = NULL; swap(user_mask, p->user_cpus_ptr); return user_mask; } void release_user_cpus_ptr(struct task_struct *p) { kfree(clear_user_cpus_ptr(p)); } Then just use clear_user_cpus_ptr() in sched/core.c where we know what we're doing (well, at least one of us does!). Will
On Tue, Aug 17, 2021 at 05:41:42PM +0200, Peter Zijlstra wrote: > On Fri, Jul 30, 2021 at 12:24:35PM +0100, Will Deacon wrote: > > + struct rq_flags rf; > > + struct rq *rq; > > + int err; > > + struct cpumask *user_mask = NULL; > > > + cpumask_var_t new_mask; > > + const struct cpumask *override_mask = task_cpu_possible_mask(p); > > > + unsigned long flags; > > + struct cpumask *mask = p->user_cpus_ptr; > > I've fixed all that up to be proper reverse x-mas trees; similar for > other patches. Thanks. Will
On Wed, Aug 18, 2021 at 11:42:28AM +0100, Will Deacon wrote: > As for your other suggestion: > > > --- a/kernel/sched/core.c > > +++ b/kernel/sched/core.c > > @@ -2733,6 +2733,7 @@ static int __set_cpus_allowed_ptr_locked > > const struct cpumask *cpu_allowed_mask = task_cpu_possible_mask(p); > > const struct cpumask *cpu_valid_mask = cpu_active_mask; > > bool kthread = p->flags & PF_KTHREAD; > > + struct cpumask *user_mask = NULL; > > unsigned int dest_cpu; > > int ret = 0; > > > > @@ -2792,9 +2793,13 @@ static int __set_cpus_allowed_ptr_locked > > __do_set_cpus_allowed(p, new_mask, flags); > > > > if (flags & SCA_USER) > > - release_user_cpus_ptr(p); > > + swap(user_mask, p->user_cpus_ptr); > > + > > + ret = affine_move_task(rq, p, rf, dest_cpu, flags); > > + > > + kfree(user_mask); > > > > - return affine_move_task(rq, p, rf, dest_cpu, flags); > > + return ret; > > > > out: > > task_rq_unlock(rq, p, rf); > > @@ -2954,8 +2959,10 @@ void relax_compatible_cpus_allowed_ptr(s > > return; > > > > raw_spin_lock_irqsave(&p->pi_lock, flags); > > - release_user_cpus_ptr(p); > > + p->user_cpus_ptr = NULL; > > raw_spin_unlock_irqrestore(&p->pi_lock, flags); > > + > > + kfree(mask); > > I think the idea looks good, but perhaps we could wrap things up a bit: > > /* Comment about why this is useful with RT */ > static cpumask_t *clear_user_cpus_ptr(struct task_struct *p) > { > struct cpumask *user_mask = NULL; > > swap(user_mask, p->user_cpus_ptr); > return user_mask; > } > > void release_user_cpus_ptr(struct task_struct *p) > { > kfree(clear_user_cpus_ptr(p)); > } > > Then just use clear_user_cpus_ptr() in sched/core.c where we know what > we're doing (well, at least one of us does!). OK, I'll go make it like that.
On Wed, Aug 18, 2021 at 11:42:28AM +0100, Will Deacon wrote: > Ah, sorry. I didn't realise you couldn't _free_ with a raw lock held in RT. > Is there somewhere I can read up on that? It's because the allocators use spinlock_t, which cannot nest inside raw_spinlock_t.
On Wed, Aug 18, 2021 at 12:56:41PM +0200, Peter Zijlstra wrote: > On Wed, Aug 18, 2021 at 11:42:28AM +0100, Will Deacon wrote: > > I think the idea looks good, but perhaps we could wrap things up a bit: > > > > /* Comment about why this is useful with RT */ > > static cpumask_t *clear_user_cpus_ptr(struct task_struct *p) > > { > > struct cpumask *user_mask = NULL; > > > > swap(user_mask, p->user_cpus_ptr); > > return user_mask; > > } > > > > void release_user_cpus_ptr(struct task_struct *p) > > { > > kfree(clear_user_cpus_ptr(p)); > > } > > > > Then just use clear_user_cpus_ptr() in sched/core.c where we know what > > we're doing (well, at least one of us does!). > > OK, I'll go make it like that. Something like so then? --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2497,10 +2497,18 @@ int dup_user_cpus_ptr(struct task_struct return 0; } +static inline struct cpumask *clear_user_cpus_ptr(struct task_struct *p) +{ + struct cpumask *user_mask = NULL; + + swap(p->user_cpus_ptr, user_mask); + + return user_mask; +} + void release_user_cpus_ptr(struct task_struct *p) { - kfree(p->user_cpus_ptr); - p->user_cpus_ptr = NULL; + kfree(clear_user_cpus_ptr(p)); } /* @@ -2733,6 +2741,7 @@ static int __set_cpus_allowed_ptr_locked const struct cpumask *cpu_allowed_mask = task_cpu_possible_mask(p); const struct cpumask *cpu_valid_mask = cpu_active_mask; bool kthread = p->flags & PF_KTHREAD; + struct cpumask *user_mask = NULL; unsigned int dest_cpu; int ret = 0; @@ -2792,9 +2801,13 @@ static int __set_cpus_allowed_ptr_locked __do_set_cpus_allowed(p, new_mask, flags); if (flags & SCA_USER) - release_user_cpus_ptr(p); + user_mask = clear_user_cpus_ptr(p); - return affine_move_task(rq, p, rf, dest_cpu, flags); + ret = affine_move_task(rq, p, rf, dest_cpu, flags); + + kfree(user_mask); + + return ret; out: task_rq_unlock(rq, p, rf); @@ -2941,20 +2954,22 @@ __sched_setaffinity(struct task_struct * */ void relax_compatible_cpus_allowed_ptr(struct task_struct *p) { + struct cpumask *user_mask = p->user_cpus_ptr; unsigned long flags; - struct cpumask *mask = p->user_cpus_ptr; /* * Try to restore the old affinity mask. If this fails, then * we free the mask explicitly to avoid it being inherited across * a subsequent fork(). */ - if (!mask || !__sched_setaffinity(p, mask)) + if (!user_mask || !__sched_setaffinity(p, user_mask)) return; raw_spin_lock_irqsave(&p->pi_lock, flags); - release_user_cpus_ptr(p); + user_mask = clear_user_cpus_ptr(p); raw_spin_unlock_irqrestore(&p->pi_lock, flags); + + kfree(user_mask); } void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
On Wed, Aug 18, 2021 at 01:53:28PM +0200, Peter Zijlstra wrote: > On Wed, Aug 18, 2021 at 12:56:41PM +0200, Peter Zijlstra wrote: > > On Wed, Aug 18, 2021 at 11:42:28AM +0100, Will Deacon wrote: > > > > I think the idea looks good, but perhaps we could wrap things up a bit: > > > > > > /* Comment about why this is useful with RT */ > > > static cpumask_t *clear_user_cpus_ptr(struct task_struct *p) > > > { > > > struct cpumask *user_mask = NULL; > > > > > > swap(user_mask, p->user_cpus_ptr); > > > return user_mask; > > > } > > > > > > void release_user_cpus_ptr(struct task_struct *p) > > > { > > > kfree(clear_user_cpus_ptr(p)); > > > } > > > > > > Then just use clear_user_cpus_ptr() in sched/core.c where we know what > > > we're doing (well, at least one of us does!). > > > > OK, I'll go make it like that. > > Something like so then? Looks good to me, thanks! Will
diff --git a/include/linux/sched.h b/include/linux/sched.h index 91dab7a62aa1..2ebe3d6f8f0c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1708,6 +1708,8 @@ extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); extern int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node); extern void release_user_cpus_ptr(struct task_struct *p); +extern void force_compatible_cpus_allowed_ptr(struct task_struct *p); +extern void relax_compatible_cpus_allowed_ptr(struct task_struct *p); #else static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d4219d366103..aec75ec1d257 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2707,27 +2707,22 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag } /* - * Change a given task's CPU affinity. Migrate the thread to a - * proper CPU and schedule it away if the CPU it's executing on - * is removed from the allowed bitmask. - * - * NOTE: the caller must have a valid reference to the task, the - * task must not exit() & deallocate itself prematurely. The - * call is not atomic; no spinlocks may be held. + * Called with both p->pi_lock and rq->lock held; drops both before returning. */ -static int __set_cpus_allowed_ptr(struct task_struct *p, - const struct cpumask *new_mask, - u32 flags) +static int __set_cpus_allowed_ptr_locked(struct task_struct *p, + const struct cpumask *new_mask, + u32 flags, + struct rq *rq, + struct rq_flags *rf) + __releases(rq->lock) + __releases(p->pi_lock) { const struct cpumask *cpu_valid_mask = cpu_active_mask; const struct cpumask *cpu_allowed_mask = task_cpu_possible_mask(p); unsigned int dest_cpu; - struct rq_flags rf; - struct rq *rq; int ret = 0; bool kthread = p->flags & PF_KTHREAD; - rq = task_rq_lock(p, &rf); update_rq_clock(rq); if (kthread || is_migration_disabled(p)) { @@ -2783,20 +2778,173 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, __do_set_cpus_allowed(p, new_mask, flags); - return affine_move_task(rq, p, &rf, dest_cpu, flags); + if (flags & SCA_USER) + release_user_cpus_ptr(p); + + return affine_move_task(rq, p, rf, dest_cpu, flags); out: - task_rq_unlock(rq, p, &rf); + task_rq_unlock(rq, p, rf); return ret; } +/* + * Change a given task's CPU affinity. Migrate the thread to a + * proper CPU and schedule it away if the CPU it's executing on + * is removed from the allowed bitmask. + * + * NOTE: the caller must have a valid reference to the task, the + * task must not exit() & deallocate itself prematurely. The + * call is not atomic; no spinlocks may be held. + */ +static int __set_cpus_allowed_ptr(struct task_struct *p, + const struct cpumask *new_mask, u32 flags) +{ + struct rq_flags rf; + struct rq *rq; + + rq = task_rq_lock(p, &rf); + return __set_cpus_allowed_ptr_locked(p, new_mask, flags, rq, &rf); +} + int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) { return __set_cpus_allowed_ptr(p, new_mask, 0); } EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); +/* + * Change a given task's CPU affinity to the intersection of its current + * affinity mask and @subset_mask, writing the resulting mask to @new_mask + * and pointing @p->user_cpus_ptr to a copy of the old mask. + * If the resulting mask is empty, leave the affinity unchanged and return + * -EINVAL. + */ +static int restrict_cpus_allowed_ptr(struct task_struct *p, + struct cpumask *new_mask, + const struct cpumask *subset_mask) +{ + struct rq_flags rf; + struct rq *rq; + int err; + struct cpumask *user_mask = NULL; + + if (!p->user_cpus_ptr) { + user_mask = kmalloc(cpumask_size(), GFP_KERNEL); + + if (!user_mask) + return -ENOMEM; + } + + rq = task_rq_lock(p, &rf); + + /* + * Forcefully restricting the affinity of a deadline task is + * likely to cause problems, so fail and noisily override the + * mask entirely. + */ + if (task_has_dl_policy(p) && dl_bandwidth_enabled()) { + err = -EPERM; + goto err_unlock; + } + + if (!cpumask_and(new_mask, &p->cpus_mask, subset_mask)) { + err = -EINVAL; + goto err_unlock; + } + + /* + * We're about to butcher the task affinity, so keep track of what + * the user asked for in case we're able to restore it later on. + */ + if (user_mask) { + cpumask_copy(user_mask, p->cpus_ptr); + p->user_cpus_ptr = user_mask; + } + + return __set_cpus_allowed_ptr_locked(p, new_mask, 0, rq, &rf); + +err_unlock: + task_rq_unlock(rq, p, &rf); + kfree(user_mask); + return err; +} + +/* + * Restrict the CPU affinity of task @p so that it is a subset of + * task_cpu_possible_mask() and point @p->user_cpu_ptr to a copy of the + * old affinity mask. If the resulting mask is empty, we warn and walk + * up the cpuset hierarchy until we find a suitable mask. + */ +void force_compatible_cpus_allowed_ptr(struct task_struct *p) +{ + cpumask_var_t new_mask; + const struct cpumask *override_mask = task_cpu_possible_mask(p); + + alloc_cpumask_var(&new_mask, GFP_KERNEL); + + /* + * __migrate_task() can fail silently in the face of concurrent + * offlining of the chosen destination CPU, so take the hotplug + * lock to ensure that the migration succeeds. + */ + cpus_read_lock(); + if (!cpumask_available(new_mask)) + goto out_set_mask; + + if (!restrict_cpus_allowed_ptr(p, new_mask, override_mask)) + goto out_free_mask; + + /* + * We failed to find a valid subset of the affinity mask for the + * task, so override it based on its cpuset hierarchy. + */ + cpuset_cpus_allowed(p, new_mask); + override_mask = new_mask; + +out_set_mask: + if (printk_ratelimit()) { + printk_deferred("Overriding affinity for process %d (%s) to CPUs %*pbl\n", + task_pid_nr(p), p->comm, + cpumask_pr_args(override_mask)); + } + + WARN_ON(set_cpus_allowed_ptr(p, override_mask)); +out_free_mask: + cpus_read_unlock(); + free_cpumask_var(new_mask); +} + +static int +__sched_setaffinity(struct task_struct *p, const struct cpumask *mask); + +/* + * Restore the affinity of a task @p which was previously restricted by a + * call to force_compatible_cpus_allowed_ptr(). This will clear (and free) + * @p->user_cpus_ptr. + * + * It is the caller's responsibility to serialise this with any calls to + * force_compatible_cpus_allowed_ptr(@p). + */ +void relax_compatible_cpus_allowed_ptr(struct task_struct *p) +{ + unsigned long flags; + struct cpumask *mask = p->user_cpus_ptr; + + /* + * Try to restore the old affinity mask. If this fails, then + * we free the mask explicitly to avoid it being inherited across + * a subsequent fork(). + */ + if (!mask || !__sched_setaffinity(p, mask)) + return; + + raw_spin_lock_irqsave(&p->pi_lock, flags); + release_user_cpus_ptr(p); + raw_spin_unlock_irqrestore(&p->pi_lock, flags); +} + void set_task_cpu(struct task_struct *p, unsigned int new_cpu) { #ifdef CONFIG_SCHED_DEBUG @@ -7613,7 +7761,7 @@ __sched_setaffinity(struct task_struct *p, const struct cpumask *mask) } #endif again: - retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK); + retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK | SCA_USER); if (retval) goto out_free_new_mask; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 14a41a243f7b..e88c2d399f0d 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2234,6 +2234,7 @@ extern struct task_struct *pick_next_task_idle(struct rq *rq); #define SCA_CHECK 0x01 #define SCA_MIGRATE_DISABLE 0x02 #define SCA_MIGRATE_ENABLE 0x04 +#define SCA_USER 0x08 #ifdef CONFIG_SMP