@@ -1410,6 +1410,7 @@ struct task_struct {
#endif
#ifdef CONFIG_NO_HZ_FULL
+ struct callback_head nohz_full_work;
atomic_t tick_dep_mask;
#endif
@@ -77,4 +77,21 @@ static inline bool cpu_is_isolated(int cpu)
cpuset_cpu_is_isolated(cpu);
}
+#if defined(CONFIG_NO_HZ_FULL)
+extern int __isolated_task_work_queue(void);
+
+static inline int isolated_task_work_queue(void)
+{
+ if (!housekeeping_cpu(raw_smp_processor_id(), HK_TYPE_KERNEL_NOISE))
+ return -ENOTSUPP;
+
+ return __isolated_task_work_queue();
+}
+
+extern void isolated_task_work_init(struct task_struct *tsk);
+#else
+static inline int isolated_task_work_queue(void) { return -ENOTSUPP; }
+static inline void isolated_task_work_init(struct task_struct *tsk) { }
+#endif /* CONFIG_NO_HZ_FULL */
+
#endif /* _LINUX_SCHED_ISOLATION_H */
@@ -4525,6 +4525,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
p->migration_pending = NULL;
#endif
init_sched_mm_cid(p);
+ isolated_task_work_init(p);
}
DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
@@ -249,3 +249,34 @@ static int __init housekeeping_isolcpus_setup(char *str)
return housekeeping_setup(str, flags);
}
__setup("isolcpus=", housekeeping_isolcpus_setup);
+
+#if defined(CONFIG_NO_HZ_FULL)
+static void isolated_task_work(struct callback_head *head)
+{
+}
+
+int __isolated_task_work_queue(void)
+{
+ unsigned long flags;
+ int ret;
+
+ if (current->flags & PF_KTHREAD)
+ return 0;
+
+ local_irq_save(flags);
+ if (task_work_queued(¤t->nohz_full_work)) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = task_work_add(current, ¤t->nohz_full_work, TWA_RESUME);
+out:
+ local_irq_restore(flags);
+ return ret;
+}
+
+void isolated_task_work_init(struct task_struct *tsk)
+{
+ init_task_work(&tsk->nohz_full_work, isolated_task_work);
+}
+#endif /* CONFIG_NO_HZ_FULL */
@@ -60,6 +60,7 @@
#include <linux/stop_machine.h>
#include <linux/syscalls_api.h>
#include <linux/syscalls.h>
+#include <linux/task_work.h>
#include <linux/tick.h>
#include <linux/topology.h>
#include <linux/types.h>
Some asynchronous kernel work may be pending upon resume to userspace and execute later on. On isolated workload this becomes problematic once the process is done with preparatory work involving syscalls and wants to run in userspace without being interrupted. Provide an infrastructure to queue a work to be executed from the current isolated task context right before resuming to userspace. This goes with the assumption that isolated tasks are pinned to a single nohz_full CPU. Signed-off-by: Frederic Weisbecker <frederic@kernel.org> --- include/linux/sched.h | 1 + include/linux/sched/isolation.h | 17 +++++++++++++++++ kernel/sched/core.c | 1 + kernel/sched/isolation.c | 31 +++++++++++++++++++++++++++++++ kernel/sched/sched.h | 1 + 5 files changed, 51 insertions(+)