@@ -106,6 +106,7 @@ enum scx_kf_mask {
__SCX_KF_RQ_LOCKED = SCX_KF_DISPATCH |
SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST,
+ __SCX_KF_TERMINAL = SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST,
};
/*
@@ -120,6 +121,7 @@ struct sched_ext_entity {
s32 sticky_cpu;
s32 holding_cpu;
u32 kf_mask; /* see scx_kf_mask above */
+ struct task_struct *kf_tasks[2]; /* see SCX_CALL_OP_TASK() */
atomic_long_t ops_state;
struct list_head runnable_node; /* rq->scx.runnable_list */
@@ -749,6 +749,47 @@ do { \
__ret; \
})
+/*
+ * Some kfuncs are allowed only on the tasks that are subjects of the
+ * in-progress scx_ops operation for, e.g., locking guarantees. To enforce such
+ * restrictions, the following SCX_CALL_OP_*() variants should be used when
+ * invoking scx_ops operations that take task arguments. These can only be used
+ * for non-nesting operations due to the way the tasks are tracked.
+ *
+ * kfuncs which can only operate on such tasks can in turn use
+ * scx_kf_allowed_on_arg_tasks() to test whether the invocation is allowed on
+ * the specific task.
+ */
+#define SCX_CALL_OP_TASK(mask, op, task, args...) \
+do { \
+ BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL); \
+ current->scx.kf_tasks[0] = task; \
+ SCX_CALL_OP(mask, op, task, ##args); \
+ current->scx.kf_tasks[0] = NULL; \
+} while (0)
+
+#define SCX_CALL_OP_TASK_RET(mask, op, task, args...) \
+({ \
+ __typeof__(scx_ops.op(task, ##args)) __ret; \
+ BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL); \
+ current->scx.kf_tasks[0] = task; \
+ __ret = SCX_CALL_OP_RET(mask, op, task, ##args); \
+ current->scx.kf_tasks[0] = NULL; \
+ __ret; \
+})
+
+#define SCX_CALL_OP_2TASKS_RET(mask, op, task0, task1, args...) \
+({ \
+ __typeof__(scx_ops.op(task0, task1, ##args)) __ret; \
+ BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL); \
+ current->scx.kf_tasks[0] = task0; \
+ current->scx.kf_tasks[1] = task1; \
+ __ret = SCX_CALL_OP_RET(mask, op, task0, task1, ##args); \
+ current->scx.kf_tasks[0] = NULL; \
+ current->scx.kf_tasks[1] = NULL; \
+ __ret; \
+})
+
/* @mask is constant, always inline to cull unnecessary branches */
static __always_inline bool scx_kf_allowed(u32 mask)
{
@@ -778,6 +819,22 @@ static __always_inline bool scx_kf_allowed(u32 mask)
return true;
}
+/* see SCX_CALL_OP_TASK() */
+static __always_inline bool scx_kf_allowed_on_arg_tasks(u32 mask,
+ struct task_struct *p)
+{
+ if (!scx_kf_allowed(mask))
+ return false;
+
+ if (unlikely((p != current->scx.kf_tasks[0] &&
+ p != current->scx.kf_tasks[1]))) {
+ scx_ops_error("called on a task not being operated on");
+ return false;
+ }
+
+ return true;
+}
+
/*
* SCX task iterator.
@@ -1271,7 +1328,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
WARN_ON_ONCE(*ddsp_taskp);
*ddsp_taskp = p;
- SCX_CALL_OP(SCX_KF_ENQUEUE, enqueue, p, enq_flags);
+ SCX_CALL_OP_TASK(SCX_KF_ENQUEUE, enqueue, p, enq_flags);
*ddsp_taskp = NULL;
if (p->scx.ddsp_dsq_id != SCX_DSQ_INVALID)
@@ -1356,7 +1413,7 @@ static void enqueue_task_scx(struct rq *rq, struct task_struct *p, int enq_flags
add_nr_running(rq, 1);
if (SCX_HAS_OP(runnable))
- SCX_CALL_OP(SCX_KF_REST, runnable, p, enq_flags);
+ SCX_CALL_OP_TASK(SCX_KF_REST, runnable, p, enq_flags);
do_enqueue_task(rq, p, enq_flags, sticky_cpu);
}
@@ -1382,7 +1439,7 @@ static void ops_dequeue(struct task_struct *p, u64 deq_flags)
BUG();
case SCX_OPSS_QUEUED:
if (SCX_HAS_OP(dequeue))
- SCX_CALL_OP(SCX_KF_REST, dequeue, p, deq_flags);
+ SCX_CALL_OP_TASK(SCX_KF_REST, dequeue, p, deq_flags);
if (atomic_long_try_cmpxchg(&p->scx.ops_state, &opss,
SCX_OPSS_NONE))
@@ -1433,11 +1490,11 @@ static void dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags
*/
if (SCX_HAS_OP(stopping) && task_current(rq, p)) {
update_curr_scx(rq);
- SCX_CALL_OP(SCX_KF_REST, stopping, p, false);
+ SCX_CALL_OP_TASK(SCX_KF_REST, stopping, p, false);
}
if (SCX_HAS_OP(quiescent))
- SCX_CALL_OP(SCX_KF_REST, quiescent, p, deq_flags);
+ SCX_CALL_OP_TASK(SCX_KF_REST, quiescent, p, deq_flags);
if (deq_flags & SCX_DEQ_SLEEP)
p->scx.flags |= SCX_TASK_DEQD_FOR_SLEEP;
@@ -1456,7 +1513,7 @@ static void yield_task_scx(struct rq *rq)
struct task_struct *p = rq->curr;
if (SCX_HAS_OP(yield))
- SCX_CALL_OP_RET(SCX_KF_REST, yield, p, NULL);
+ SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, p, NULL);
else
p->scx.slice = 0;
}
@@ -1466,7 +1523,7 @@ static bool yield_to_task_scx(struct rq *rq, struct task_struct *to)
struct task_struct *from = rq->curr;
if (SCX_HAS_OP(yield))
- return SCX_CALL_OP_RET(SCX_KF_REST, yield, from, to);
+ return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, from, to);
else
return false;
}
@@ -2024,7 +2081,7 @@ static void set_next_task_scx(struct rq *rq, struct task_struct *p, bool first)
/* see dequeue_task_scx() on why we skip when !QUEUED */
if (SCX_HAS_OP(running) && (p->scx.flags & SCX_TASK_QUEUED))
- SCX_CALL_OP(SCX_KF_REST, running, p);
+ SCX_CALL_OP_TASK(SCX_KF_REST, running, p);
clr_task_runnable(p, true);
@@ -2088,7 +2145,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p)
/* see dequeue_task_scx() on why we skip when !QUEUED */
if (SCX_HAS_OP(stopping) && (p->scx.flags & SCX_TASK_QUEUED))
- SCX_CALL_OP(SCX_KF_REST, stopping, p, true);
+ SCX_CALL_OP_TASK(SCX_KF_REST, stopping, p, true);
/*
* If we're being called from put_prev_task_balance(), balance_scx() may
@@ -2310,8 +2367,8 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag
WARN_ON_ONCE(*ddsp_taskp);
*ddsp_taskp = p;
- cpu = SCX_CALL_OP_RET(SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU,
- select_cpu, p, prev_cpu, wake_flags);
+ cpu = SCX_CALL_OP_TASK_RET(SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU,
+ select_cpu, p, prev_cpu, wake_flags);
*ddsp_taskp = NULL;
if (ops_cpu_valid(cpu, "from ops.select_cpu()"))
return cpu;
@@ -2344,8 +2401,8 @@ static void set_cpus_allowed_scx(struct task_struct *p,
* designation pointless. Cast it away when calling the operation.
*/
if (SCX_HAS_OP(set_cpumask))
- SCX_CALL_OP(SCX_KF_REST, set_cpumask, p,
- (struct cpumask *)p->cpus_ptr);
+ SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, p,
+ (struct cpumask *)p->cpus_ptr);
}
static void reset_idle_masks(void)
@@ -2580,7 +2637,7 @@ static void scx_ops_enable_task(struct task_struct *p)
*/
set_task_scx_weight(p);
if (SCX_HAS_OP(enable))
- SCX_CALL_OP(SCX_KF_REST, enable, p);
+ SCX_CALL_OP_TASK(SCX_KF_REST, enable, p);
scx_set_task_state(p, SCX_TASK_ENABLED);
if (SCX_HAS_OP(set_weight))
@@ -2734,7 +2791,7 @@ static void reweight_task_scx(struct rq *rq, struct task_struct *p, int newprio)
set_task_scx_weight(p);
if (SCX_HAS_OP(set_weight))
- SCX_CALL_OP(SCX_KF_REST, set_weight, p, p->scx.weight);
+ SCX_CALL_OP_TASK(SCX_KF_REST, set_weight, p, p->scx.weight);
}
static void prio_changed_scx(struct rq *rq, struct task_struct *p, int oldprio)
@@ -2750,8 +2807,8 @@ static void switching_to_scx(struct rq *rq, struct task_struct *p)
* different scheduler class. Keep the BPF scheduler up-to-date.
*/
if (SCX_HAS_OP(set_cpumask))
- SCX_CALL_OP(SCX_KF_REST, set_cpumask, p,
- (struct cpumask *)p->cpus_ptr);
+ SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, p,
+ (struct cpumask *)p->cpus_ptr);
}
static void switched_from_scx(struct rq *rq, struct task_struct *p)