[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID:
<AM0PR03MB5076E7272ECC6208135874C299CD2@AM0PR03MB5076.eurprd03.prod.outlook.com>
Date: Thu, 27 Feb 2025 16:49:44 +0000
From: Juntong Deng <juntong.deng@...look.com>
To: Andrea Righi <arighi@...dia.com>
Cc: ast@...nel.org, daniel@...earbox.net, john.fastabend@...il.com,
andrii@...nel.org, martin.lau@...ux.dev, eddyz87@...il.com, song@...nel.org,
yonghong.song@...ux.dev, kpsingh@...nel.org, sdf@...ichev.me,
haoluo@...gle.com, jolsa@...nel.org, memxor@...il.com, tj@...nel.org,
void@...ifault.com, changwoo@...lia.com, bpf@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH sched_ext/for-6.15 v3 4/5] sched_ext: Removed mask-based
runtime restrictions on calling kfuncs in different contexts
On 2025/2/27 16:24, Andrea Righi wrote:
> On Wed, Feb 26, 2025 at 07:28:19PM +0000, Juntong Deng wrote:
>> Currently, kfunc filters already support filtering based on struct_ops
>> context information.
>>
>> The BPF verifier can check context-sensitive kfuncs before the SCX
>> program is run, avoiding runtime overhead.
>>
>> Therefore we no longer need mask-based runtime restrictions.
>>
>> This patch removes the mask-based runtime restrictions.
>
> You may have missed scx_prio_less(), that is still using SCX_KF_REST:
>
> kernel/sched/ext.c: In function ‘scx_prio_less’:
> kernel/sched/ext.c:1171:27: error: ‘struct sched_ext_ops’ has no member named ‘SCX_KF_REST’
> 1171 | __typeof__(scx_ops.op(task0, task1, ##args)) __ret; \
> | ^
>
> I think you just need to add:
>
> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
> index 4a4713c3af67b..51c13b8c27743 100644
> --- a/kernel/sched/ext.c
> +++ b/kernel/sched/ext.c
> @@ -3302,7 +3302,7 @@ bool scx_prio_less(const struct task_struct *a, const struct task_struct *b,
> * verifier.
> */
> if (SCX_HAS_OP(core_sched_before) && !scx_rq_bypassing(task_rq(a)))
> - return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, core_sched_before,
> + return SCX_CALL_OP_2TASKS_RET(core_sched_before,
> (struct task_struct *)a,
> (struct task_struct *)b);
> else
>
> -Andrea
>
Thanks for pointing this out.
I made the mistake of not enabling CONFIG_SCHED_CORE, which led to not
noticing this issue.
If you find any other issues, please let me know.
I will fix these issues in the next version.
>>
>> Signed-off-by: Juntong Deng <juntong.deng@...look.com>
>> ---
>> include/linux/sched/ext.h | 24 ----
>> kernel/sched/ext.c | 227 ++++++++------------------------------
>> kernel/sched/ext_idle.c | 5 +-
>> 3 files changed, 50 insertions(+), 206 deletions(-)
>>
>> diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
>> index f7545430a548..9980d6b55c84 100644
>> --- a/include/linux/sched/ext.h
>> +++ b/include/linux/sched/ext.h
>> @@ -96,29 +96,6 @@ enum scx_ent_dsq_flags {
>> SCX_TASK_DSQ_ON_PRIQ = 1 << 0, /* task is queued on the priority queue of a dsq */
>> };
>>
>> -/*
>> - * Mask bits for scx_entity.kf_mask. Not all kfuncs can be called from
>> - * everywhere and the following bits track which kfunc sets are currently
>> - * allowed for %current. This simple per-task tracking works because SCX ops
>> - * nest in a limited way. BPF will likely implement a way to allow and disallow
>> - * kfuncs depending on the calling context which will replace this manual
>> - * mechanism. See scx_kf_allow().
>> - */
>> -enum scx_kf_mask {
>> - SCX_KF_UNLOCKED = 0, /* sleepable and not rq locked */
>> - /* ENQUEUE and DISPATCH may be nested inside CPU_RELEASE */
>> - SCX_KF_CPU_RELEASE = 1 << 0, /* ops.cpu_release() */
>> - /* ops.dequeue (in REST) may be nested inside DISPATCH */
>> - SCX_KF_DISPATCH = 1 << 1, /* ops.dispatch() */
>> - SCX_KF_ENQUEUE = 1 << 2, /* ops.enqueue() and ops.select_cpu() */
>> - SCX_KF_SELECT_CPU = 1 << 3, /* ops.select_cpu() */
>> - SCX_KF_REST = 1 << 4, /* other rq-locked operations */
>> -
>> - __SCX_KF_RQ_LOCKED = SCX_KF_CPU_RELEASE | SCX_KF_DISPATCH |
>> - SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST,
>> - __SCX_KF_TERMINAL = SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST,
>> -};
>> -
>> enum scx_dsq_lnode_flags {
>> SCX_DSQ_LNODE_ITER_CURSOR = 1 << 0,
>>
>> @@ -147,7 +124,6 @@ struct sched_ext_entity {
>> s32 sticky_cpu;
>> s32 holding_cpu;
>> s32 selected_cpu;
>> - u32 kf_mask; /* see scx_kf_mask above */
>> struct task_struct *kf_tasks[2]; /* see SCX_CALL_OP_TASK() */
>> atomic_long_t ops_state;
>>
>> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
>> index c337f6206ae5..7dc5f11be66b 100644
>> --- a/kernel/sched/ext.c
>> +++ b/kernel/sched/ext.c
>> @@ -1115,19 +1115,6 @@ static long jiffies_delta_msecs(unsigned long at, unsigned long now)
>> return -(long)jiffies_to_msecs(now - at);
>> }
>>
>> -/* if the highest set bit is N, return a mask with bits [N+1, 31] set */
>> -static u32 higher_bits(u32 flags)
>> -{
>> - return ~((1 << fls(flags)) - 1);
>> -}
>> -
>> -/* return the mask with only the highest bit set */
>> -static u32 highest_bit(u32 flags)
>> -{
>> - int bit = fls(flags);
>> - return ((u64)1 << bit) >> 1;
>> -}
>> -
>> static bool u32_before(u32 a, u32 b)
>> {
>> return (s32)(a - b) < 0;
>> @@ -1143,51 +1130,12 @@ static struct scx_dispatch_q *find_user_dsq(u64 dsq_id)
>> return rhashtable_lookup_fast(&dsq_hash, &dsq_id, dsq_hash_params);
>> }
>>
>> -/*
>> - * scx_kf_mask enforcement. Some kfuncs can only be called from specific SCX
>> - * ops. When invoking SCX ops, SCX_CALL_OP[_RET]() should be used to indicate
>> - * the allowed kfuncs and those kfuncs should use scx_kf_allowed() to check
>> - * whether it's running from an allowed context.
>> - *
>> - * @mask is constant, always inline to cull the mask calculations.
>> - */
>> -static __always_inline void scx_kf_allow(u32 mask)
>> -{
>> - /* nesting is allowed only in increasing scx_kf_mask order */
>> - WARN_ONCE((mask | higher_bits(mask)) & current->scx.kf_mask,
>> - "invalid nesting current->scx.kf_mask=0x%x mask=0x%x\n",
>> - current->scx.kf_mask, mask);
>> - current->scx.kf_mask |= mask;
>> - barrier();
>> -}
>> -
>> -static void scx_kf_disallow(u32 mask)
>> -{
>> - barrier();
>> - current->scx.kf_mask &= ~mask;
>> -}
>> -
>> -#define SCX_CALL_OP(mask, op, args...) \
>> -do { \
>> - if (mask) { \
>> - scx_kf_allow(mask); \
>> - scx_ops.op(args); \
>> - scx_kf_disallow(mask); \
>> - } else { \
>> - scx_ops.op(args); \
>> - } \
>> -} while (0)
>> +#define SCX_CALL_OP(op, args...) scx_ops.op(args)
>>
>> -#define SCX_CALL_OP_RET(mask, op, args...) \
>> +#define SCX_CALL_OP_RET(op, args...) \
>> ({ \
>> __typeof__(scx_ops.op(args)) __ret; \
>> - if (mask) { \
>> - scx_kf_allow(mask); \
>> - __ret = scx_ops.op(args); \
>> - scx_kf_disallow(mask); \
>> - } else { \
>> - __ret = scx_ops.op(args); \
>> - } \
>> + __ret = scx_ops.op(args); \
>> __ret; \
>> })
>>
>> @@ -1202,74 +1150,36 @@ do { \
>> * scx_kf_allowed_on_arg_tasks() to test whether the invocation is allowed on
>> * the specific task.
>> */
>> -#define SCX_CALL_OP_TASK(mask, op, task, args...) \
>> +#define SCX_CALL_OP_TASK(op, task, args...) \
>> do { \
>> - BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL); \
>> current->scx.kf_tasks[0] = task; \
>> - SCX_CALL_OP(mask, op, task, ##args); \
>> + SCX_CALL_OP(op, task, ##args); \
>> current->scx.kf_tasks[0] = NULL; \
>> } while (0)
>>
>> -#define SCX_CALL_OP_TASK_RET(mask, op, task, args...) \
>> +#define SCX_CALL_OP_TASK_RET(op, task, args...) \
>> ({ \
>> __typeof__(scx_ops.op(task, ##args)) __ret; \
>> - BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL); \
>> current->scx.kf_tasks[0] = task; \
>> - __ret = SCX_CALL_OP_RET(mask, op, task, ##args); \
>> + __ret = SCX_CALL_OP_RET(op, task, ##args); \
>> current->scx.kf_tasks[0] = NULL; \
>> __ret; \
>> })
>>
>> -#define SCX_CALL_OP_2TASKS_RET(mask, op, task0, task1, args...) \
>> +#define SCX_CALL_OP_2TASKS_RET(op, task0, task1, args...) \
>> ({ \
>> __typeof__(scx_ops.op(task0, task1, ##args)) __ret; \
>> - BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL); \
>> current->scx.kf_tasks[0] = task0; \
>> current->scx.kf_tasks[1] = task1; \
>> - __ret = SCX_CALL_OP_RET(mask, op, task0, task1, ##args); \
>> + __ret = SCX_CALL_OP_RET(op, task0, task1, ##args); \
>> current->scx.kf_tasks[0] = NULL; \
>> current->scx.kf_tasks[1] = NULL; \
>> __ret; \
>> })
>>
>> -/* @mask is constant, always inline to cull unnecessary branches */
>> -static __always_inline bool scx_kf_allowed(u32 mask)
>> -{
>> - if (unlikely(!(current->scx.kf_mask & mask))) {
>> - scx_ops_error("kfunc with mask 0x%x called from an operation only allowing 0x%x",
>> - mask, current->scx.kf_mask);
>> - return false;
>> - }
>> -
>> - /*
>> - * Enforce nesting boundaries. e.g. A kfunc which can be called from
>> - * DISPATCH must not be called if we're running DEQUEUE which is nested
>> - * inside ops.dispatch(). We don't need to check boundaries for any
>> - * blocking kfuncs as the verifier ensures they're only called from
>> - * sleepable progs.
>> - */
>> - if (unlikely(highest_bit(mask) == SCX_KF_CPU_RELEASE &&
>> - (current->scx.kf_mask & higher_bits(SCX_KF_CPU_RELEASE)))) {
>> - scx_ops_error("cpu_release kfunc called from a nested operation");
>> - return false;
>> - }
>> -
>> - if (unlikely(highest_bit(mask) == SCX_KF_DISPATCH &&
>> - (current->scx.kf_mask & higher_bits(SCX_KF_DISPATCH)))) {
>> - scx_ops_error("dispatch kfunc called from a nested operation");
>> - return false;
>> - }
>> -
>> - return true;
>> -}
>> -
>> /* see SCX_CALL_OP_TASK() */
>> -static __always_inline bool scx_kf_allowed_on_arg_tasks(u32 mask,
>> - struct task_struct *p)
>> +static __always_inline bool scx_kf_allowed_on_arg_tasks(struct task_struct *p)
>> {
>> - if (!scx_kf_allowed(mask))
>> - return false;
>> -
>> if (unlikely((p != current->scx.kf_tasks[0] &&
>> p != current->scx.kf_tasks[1]))) {
>> scx_ops_error("called on a task not being operated on");
>> @@ -1279,11 +1189,6 @@ static __always_inline bool scx_kf_allowed_on_arg_tasks(u32 mask,
>> return true;
>> }
>>
>> -static bool scx_kf_allowed_if_unlocked(void)
>> -{
>> - return !current->scx.kf_mask;
>> -}
>> -
>> /**
>> * nldsq_next_task - Iterate to the next task in a non-local DSQ
>> * @dsq: user dsq being iterated
>> @@ -2219,7 +2124,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
>> WARN_ON_ONCE(*ddsp_taskp);
>> *ddsp_taskp = p;
>>
>> - SCX_CALL_OP_TASK(SCX_KF_ENQUEUE, enqueue, p, enq_flags);
>> + SCX_CALL_OP_TASK(enqueue, p, enq_flags);
>>
>> *ddsp_taskp = NULL;
>> if (p->scx.ddsp_dsq_id != SCX_DSQ_INVALID)
>> @@ -2316,7 +2221,7 @@ static void enqueue_task_scx(struct rq *rq, struct task_struct *p, int enq_flags
>> add_nr_running(rq, 1);
>>
>> if (SCX_HAS_OP(runnable) && !task_on_rq_migrating(p))
>> - SCX_CALL_OP_TASK(SCX_KF_REST, runnable, p, enq_flags);
>> + SCX_CALL_OP_TASK(runnable, p, enq_flags);
>>
>> if (enq_flags & SCX_ENQ_WAKEUP)
>> touch_core_sched(rq, p);
>> @@ -2351,7 +2256,7 @@ static void ops_dequeue(struct task_struct *p, u64 deq_flags)
>> BUG();
>> case SCX_OPSS_QUEUED:
>> if (SCX_HAS_OP(dequeue))
>> - SCX_CALL_OP_TASK(SCX_KF_REST, dequeue, p, deq_flags);
>> + SCX_CALL_OP_TASK(dequeue, p, deq_flags);
>>
>> if (atomic_long_try_cmpxchg(&p->scx.ops_state, &opss,
>> SCX_OPSS_NONE))
>> @@ -2400,11 +2305,11 @@ static bool dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags
>> */
>> if (SCX_HAS_OP(stopping) && task_current(rq, p)) {
>> update_curr_scx(rq);
>> - SCX_CALL_OP_TASK(SCX_KF_REST, stopping, p, false);
>> + SCX_CALL_OP_TASK(stopping, p, false);
>> }
>>
>> if (SCX_HAS_OP(quiescent) && !task_on_rq_migrating(p))
>> - SCX_CALL_OP_TASK(SCX_KF_REST, quiescent, p, deq_flags);
>> + SCX_CALL_OP_TASK(quiescent, p, deq_flags);
>>
>> if (deq_flags & SCX_DEQ_SLEEP)
>> p->scx.flags |= SCX_TASK_DEQD_FOR_SLEEP;
>> @@ -2424,7 +2329,7 @@ static void yield_task_scx(struct rq *rq)
>> struct task_struct *p = rq->curr;
>>
>> if (SCX_HAS_OP(yield))
>> - SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, p, NULL);
>> + SCX_CALL_OP_2TASKS_RET(yield, p, NULL);
>> else
>> p->scx.slice = 0;
>> }
>> @@ -2434,7 +2339,7 @@ static bool yield_to_task_scx(struct rq *rq, struct task_struct *to)
>> struct task_struct *from = rq->curr;
>>
>> if (SCX_HAS_OP(yield))
>> - return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, from, to);
>> + return SCX_CALL_OP_2TASKS_RET(yield, from, to);
>> else
>> return false;
>> }
>> @@ -2992,7 +2897,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
>> * emitted in switch_class().
>> */
>> if (SCX_HAS_OP(cpu_acquire))
>> - SCX_CALL_OP(SCX_KF_REST, cpu_acquire, cpu_of(rq), NULL);
>> + SCX_CALL_OP(cpu_acquire, cpu_of(rq), NULL);
>> rq->scx.cpu_released = false;
>> }
>>
>> @@ -3037,8 +2942,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
>> do {
>> dspc->nr_tasks = 0;
>>
>> - SCX_CALL_OP(SCX_KF_DISPATCH, dispatch, cpu_of(rq),
>> - prev_on_scx ? prev : NULL);
>> + SCX_CALL_OP(dispatch, cpu_of(rq), prev_on_scx ? prev : NULL);
>>
>> flush_dispatch_buf(rq);
>>
>> @@ -3159,7 +3063,7 @@ static void set_next_task_scx(struct rq *rq, struct task_struct *p, bool first)
>>
>> /* see dequeue_task_scx() on why we skip when !QUEUED */
>> if (SCX_HAS_OP(running) && (p->scx.flags & SCX_TASK_QUEUED))
>> - SCX_CALL_OP_TASK(SCX_KF_REST, running, p);
>> + SCX_CALL_OP_TASK(running, p);
>>
>> clr_task_runnable(p, true);
>>
>> @@ -3240,8 +3144,7 @@ static void switch_class(struct rq *rq, struct task_struct *next)
>> .task = next,
>> };
>>
>> - SCX_CALL_OP(SCX_KF_CPU_RELEASE,
>> - cpu_release, cpu_of(rq), &args);
>> + SCX_CALL_OP(cpu_release, cpu_of(rq), &args);
>> }
>> rq->scx.cpu_released = true;
>> }
>> @@ -3254,7 +3157,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p,
>>
>> /* see dequeue_task_scx() on why we skip when !QUEUED */
>> if (SCX_HAS_OP(stopping) && (p->scx.flags & SCX_TASK_QUEUED))
>> - SCX_CALL_OP_TASK(SCX_KF_REST, stopping, p, true);
>> + SCX_CALL_OP_TASK(stopping, p, true);
>>
>> if (p->scx.flags & SCX_TASK_QUEUED) {
>> set_task_runnable(rq, p);
>> @@ -3428,8 +3331,7 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag
>> WARN_ON_ONCE(*ddsp_taskp);
>> *ddsp_taskp = p;
>>
>> - cpu = SCX_CALL_OP_TASK_RET(SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU,
>> - select_cpu, p, prev_cpu, wake_flags);
>> + cpu = SCX_CALL_OP_TASK_RET(select_cpu, p, prev_cpu, wake_flags);
>> p->scx.selected_cpu = cpu;
>> *ddsp_taskp = NULL;
>> if (ops_cpu_valid(cpu, "from ops.select_cpu()"))
>> @@ -3473,8 +3375,7 @@ static void set_cpus_allowed_scx(struct task_struct *p,
>> * designation pointless. Cast it away when calling the operation.
>> */
>> if (SCX_HAS_OP(set_cpumask))
>> - SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, p,
>> - (struct cpumask *)p->cpus_ptr);
>> + SCX_CALL_OP_TASK(set_cpumask, p, (struct cpumask *)p->cpus_ptr);
>> }
>>
>> static void handle_hotplug(struct rq *rq, bool online)
>> @@ -3487,9 +3388,9 @@ static void handle_hotplug(struct rq *rq, bool online)
>> scx_idle_update_selcpu_topology(&scx_ops);
>>
>> if (online && SCX_HAS_OP(cpu_online))
>> - SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_online, cpu);
>> + SCX_CALL_OP(cpu_online, cpu);
>> else if (!online && SCX_HAS_OP(cpu_offline))
>> - SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_offline, cpu);
>> + SCX_CALL_OP(cpu_offline, cpu);
>> else
>> scx_ops_exit(SCX_ECODE_ACT_RESTART | SCX_ECODE_RSN_HOTPLUG,
>> "cpu %d going %s, exiting scheduler", cpu,
>> @@ -3593,7 +3494,7 @@ static void task_tick_scx(struct rq *rq, struct task_struct *curr, int queued)
>> curr->scx.slice = 0;
>> touch_core_sched(rq, curr);
>> } else if (SCX_HAS_OP(tick)) {
>> - SCX_CALL_OP(SCX_KF_REST, tick, curr);
>> + SCX_CALL_OP(tick, curr);
>> }
>>
>> if (!curr->scx.slice)
>> @@ -3670,7 +3571,7 @@ static int scx_ops_init_task(struct task_struct *p, struct task_group *tg, bool
>> .fork = fork,
>> };
>>
>> - ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init_task, p, &args);
>> + ret = SCX_CALL_OP_RET(init_task, p, &args);
>> if (unlikely(ret)) {
>> ret = ops_sanitize_err("init_task", ret);
>> return ret;
>> @@ -3727,11 +3628,11 @@ static void scx_ops_enable_task(struct task_struct *p)
>> p->scx.weight = sched_weight_to_cgroup(weight);
>>
>> if (SCX_HAS_OP(enable))
>> - SCX_CALL_OP_TASK(SCX_KF_REST, enable, p);
>> + SCX_CALL_OP_TASK(enable, p);
>> scx_set_task_state(p, SCX_TASK_ENABLED);
>>
>> if (SCX_HAS_OP(set_weight))
>> - SCX_CALL_OP_TASK(SCX_KF_REST, set_weight, p, p->scx.weight);
>> + SCX_CALL_OP_TASK(set_weight, p, p->scx.weight);
>> }
>>
>> static void scx_ops_disable_task(struct task_struct *p)
>> @@ -3740,7 +3641,7 @@ static void scx_ops_disable_task(struct task_struct *p)
>> WARN_ON_ONCE(scx_get_task_state(p) != SCX_TASK_ENABLED);
>>
>> if (SCX_HAS_OP(disable))
>> - SCX_CALL_OP(SCX_KF_REST, disable, p);
>> + SCX_CALL_OP(disable, p);
>> scx_set_task_state(p, SCX_TASK_READY);
>> }
>>
>> @@ -3769,7 +3670,7 @@ static void scx_ops_exit_task(struct task_struct *p)
>> }
>>
>> if (SCX_HAS_OP(exit_task))
>> - SCX_CALL_OP(SCX_KF_REST, exit_task, p, &args);
>> + SCX_CALL_OP(exit_task, p, &args);
>> scx_set_task_state(p, SCX_TASK_NONE);
>> }
>>
>> @@ -3878,7 +3779,7 @@ static void reweight_task_scx(struct rq *rq, struct task_struct *p,
>>
>> p->scx.weight = sched_weight_to_cgroup(scale_load_down(lw->weight));
>> if (SCX_HAS_OP(set_weight))
>> - SCX_CALL_OP_TASK(SCX_KF_REST, set_weight, p, p->scx.weight);
>> + SCX_CALL_OP_TASK(set_weight, p, p->scx.weight);
>> }
>>
>> static void prio_changed_scx(struct rq *rq, struct task_struct *p, int oldprio)
>> @@ -3894,8 +3795,7 @@ static void switching_to_scx(struct rq *rq, struct task_struct *p)
>> * different scheduler class. Keep the BPF scheduler up-to-date.
>> */
>> if (SCX_HAS_OP(set_cpumask))
>> - SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, p,
>> - (struct cpumask *)p->cpus_ptr);
>> + SCX_CALL_OP_TASK(set_cpumask, p, (struct cpumask *)p->cpus_ptr);
>> }
>>
>> static void switched_from_scx(struct rq *rq, struct task_struct *p)
>> @@ -3987,8 +3887,7 @@ int scx_tg_online(struct task_group *tg)
>> struct scx_cgroup_init_args args =
>> { .weight = tg->scx_weight };
>>
>> - ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_init,
>> - tg->css.cgroup, &args);
>> + ret = SCX_CALL_OP_RET(cgroup_init, tg->css.cgroup, &args);
>> if (ret)
>> ret = ops_sanitize_err("cgroup_init", ret);
>> }
>> @@ -4009,7 +3908,7 @@ void scx_tg_offline(struct task_group *tg)
>> percpu_down_read(&scx_cgroup_rwsem);
>>
>> if (SCX_HAS_OP(cgroup_exit) && (tg->scx_flags & SCX_TG_INITED))
>> - SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_exit, tg->css.cgroup);
>> + SCX_CALL_OP(cgroup_exit, tg->css.cgroup);
>> tg->scx_flags &= ~(SCX_TG_ONLINE | SCX_TG_INITED);
>>
>> percpu_up_read(&scx_cgroup_rwsem);
>> @@ -4042,8 +3941,7 @@ int scx_cgroup_can_attach(struct cgroup_taskset *tset)
>> continue;
>>
>> if (SCX_HAS_OP(cgroup_prep_move)) {
>> - ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_prep_move,
>> - p, from, css->cgroup);
>> + ret = SCX_CALL_OP_RET(cgroup_prep_move, p, from, css->cgroup);
>> if (ret)
>> goto err;
>> }
>> @@ -4056,8 +3954,7 @@ int scx_cgroup_can_attach(struct cgroup_taskset *tset)
>> err:
>> cgroup_taskset_for_each(p, css, tset) {
>> if (SCX_HAS_OP(cgroup_cancel_move) && p->scx.cgrp_moving_from)
>> - SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_cancel_move, p,
>> - p->scx.cgrp_moving_from, css->cgroup);
>> + SCX_CALL_OP(cgroup_cancel_move, p, p->scx.cgrp_moving_from, css->cgroup);
>> p->scx.cgrp_moving_from = NULL;
>> }
>>
>> @@ -4075,8 +3972,7 @@ void scx_cgroup_move_task(struct task_struct *p)
>> * cgrp_moving_from set.
>> */
>> if (SCX_HAS_OP(cgroup_move) && !WARN_ON_ONCE(!p->scx.cgrp_moving_from))
>> - SCX_CALL_OP_TASK(SCX_KF_UNLOCKED, cgroup_move, p,
>> - p->scx.cgrp_moving_from, tg_cgrp(task_group(p)));
>> + SCX_CALL_OP_TASK(cgroup_move, p, p->scx.cgrp_moving_from, tg_cgrp(task_group(p)));
>> p->scx.cgrp_moving_from = NULL;
>> }
>>
>> @@ -4095,8 +3991,7 @@ void scx_cgroup_cancel_attach(struct cgroup_taskset *tset)
>>
>> cgroup_taskset_for_each(p, css, tset) {
>> if (SCX_HAS_OP(cgroup_cancel_move) && p->scx.cgrp_moving_from)
>> - SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_cancel_move, p,
>> - p->scx.cgrp_moving_from, css->cgroup);
>> + SCX_CALL_OP(cgroup_cancel_move, p, p->scx.cgrp_moving_from, css->cgroup);
>> p->scx.cgrp_moving_from = NULL;
>> }
>> out_unlock:
>> @@ -4109,8 +4004,7 @@ void scx_group_set_weight(struct task_group *tg, unsigned long weight)
>>
>> if (scx_cgroup_enabled && tg->scx_weight != weight) {
>> if (SCX_HAS_OP(cgroup_set_weight))
>> - SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_set_weight,
>> - tg_cgrp(tg), weight);
>> + SCX_CALL_OP(cgroup_set_weight, tg_cgrp(tg), weight);
>> tg->scx_weight = weight;
>> }
>>
>> @@ -4300,7 +4194,7 @@ static void scx_cgroup_exit(void)
>> continue;
>> rcu_read_unlock();
>>
>> - SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_exit, css->cgroup);
>> + SCX_CALL_OP(cgroup_exit, css->cgroup);
>>
>> rcu_read_lock();
>> css_put(css);
>> @@ -4343,8 +4237,7 @@ static int scx_cgroup_init(void)
>> continue;
>> rcu_read_unlock();
>>
>> - ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_init,
>> - css->cgroup, &args);
>> + ret = SCX_CALL_OP_RET(cgroup_init, css->cgroup, &args);
>> if (ret) {
>> css_put(css);
>> scx_ops_error("ops.cgroup_init() failed (%d)", ret);
>> @@ -4840,7 +4733,7 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
>> }
>>
>> if (scx_ops.exit)
>> - SCX_CALL_OP(SCX_KF_UNLOCKED, exit, ei);
>> + SCX_CALL_OP(exit, ei);
>>
>> cancel_delayed_work_sync(&scx_watchdog_work);
>>
>> @@ -5047,7 +4940,7 @@ static void scx_dump_task(struct seq_buf *s, struct scx_dump_ctx *dctx,
>>
>> if (SCX_HAS_OP(dump_task)) {
>> ops_dump_init(s, " ");
>> - SCX_CALL_OP(SCX_KF_REST, dump_task, dctx, p);
>> + SCX_CALL_OP(dump_task, dctx, p);
>> ops_dump_exit();
>> }
>>
>> @@ -5094,7 +4987,7 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
>>
>> if (SCX_HAS_OP(dump)) {
>> ops_dump_init(&s, "");
>> - SCX_CALL_OP(SCX_KF_UNLOCKED, dump, &dctx);
>> + SCX_CALL_OP(dump, &dctx);
>> ops_dump_exit();
>> }
>>
>> @@ -5151,7 +5044,7 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
>> used = seq_buf_used(&ns);
>> if (SCX_HAS_OP(dump_cpu)) {
>> ops_dump_init(&ns, " ");
>> - SCX_CALL_OP(SCX_KF_REST, dump_cpu, &dctx, cpu, idle);
>> + SCX_CALL_OP(dump_cpu, &dctx, cpu, idle);
>> ops_dump_exit();
>> }
>>
>> @@ -5405,7 +5298,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
>> cpus_read_lock();
>>
>> if (scx_ops.init) {
>> - ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init);
>> + ret = SCX_CALL_OP_RET(init);
>> if (ret) {
>> ret = ops_sanitize_err("init", ret);
>> cpus_read_unlock();
>> @@ -6146,9 +6039,6 @@ void __init init_sched_ext_class(void)
>> */
>> static bool scx_dsq_insert_preamble(struct task_struct *p, u64 enq_flags)
>> {
>> - if (!scx_kf_allowed(SCX_KF_ENQUEUE | SCX_KF_DISPATCH))
>> - return false;
>> -
>> lockdep_assert_irqs_disabled();
>>
>> if (unlikely(!p)) {
>> @@ -6310,9 +6200,6 @@ static bool scx_dsq_move(struct bpf_iter_scx_dsq_kern *kit,
>> bool in_balance;
>> unsigned long flags;
>>
>> - if (!scx_kf_allowed_if_unlocked() && !scx_kf_allowed(SCX_KF_DISPATCH))
>> - return false;
>> -
>> /*
>> * Can be called from either ops.dispatch() locking this_rq() or any
>> * context where no rq lock is held. If latter, lock @p's task_rq which
>> @@ -6395,9 +6282,6 @@ __bpf_kfunc_start_defs();
>> */
>> __bpf_kfunc u32 scx_bpf_dispatch_nr_slots(void)
>> {
>> - if (!scx_kf_allowed(SCX_KF_DISPATCH))
>> - return 0;
>> -
>> return scx_dsp_max_batch - __this_cpu_read(scx_dsp_ctx->cursor);
>> }
>>
>> @@ -6411,9 +6295,6 @@ __bpf_kfunc void scx_bpf_dispatch_cancel(void)
>> {
>> struct scx_dsp_ctx *dspc = this_cpu_ptr(scx_dsp_ctx);
>>
>> - if (!scx_kf_allowed(SCX_KF_DISPATCH))
>> - return;
>> -
>> if (dspc->cursor > 0)
>> dspc->cursor--;
>> else
>> @@ -6439,9 +6320,6 @@ __bpf_kfunc bool scx_bpf_dsq_move_to_local(u64 dsq_id)
>> struct scx_dsp_ctx *dspc = this_cpu_ptr(scx_dsp_ctx);
>> struct scx_dispatch_q *dsq;
>>
>> - if (!scx_kf_allowed(SCX_KF_DISPATCH))
>> - return false;
>> -
>> flush_dispatch_buf(dspc->rq);
>>
>> dsq = find_user_dsq(dsq_id);
>> @@ -6632,9 +6510,6 @@ __bpf_kfunc u32 scx_bpf_reenqueue_local(void)
>> struct rq *rq;
>> struct task_struct *p, *n;
>>
>> - if (!scx_kf_allowed(SCX_KF_CPU_RELEASE))
>> - return 0;
>> -
>> rq = cpu_rq(smp_processor_id());
>> lockdep_assert_rq_held(rq);
>>
>> @@ -7239,7 +7114,7 @@ __bpf_kfunc struct cgroup *scx_bpf_task_cgroup(struct task_struct *p)
>> struct task_group *tg = p->sched_task_group;
>> struct cgroup *cgrp = &cgrp_dfl_root.cgrp;
>>
>> - if (!scx_kf_allowed_on_arg_tasks(__SCX_KF_RQ_LOCKED, p))
>> + if (!scx_kf_allowed_on_arg_tasks(p))
>> goto out;
>>
>> cgrp = tg_cgrp(tg);
>> @@ -7479,10 +7354,6 @@ static int __init scx_init(void)
>> *
>> * Some kfuncs are context-sensitive and can only be called from
>> * specific SCX ops. They are grouped into BTF sets accordingly.
>> - * Unfortunately, BPF currently doesn't have a way of enforcing such
>> - * restrictions. Eventually, the verifier should be able to enforce
>> - * them. For now, register them the same and make each kfunc explicitly
>> - * check using scx_kf_allowed().
>> */
>> if ((ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
>> &scx_kfunc_set_ops_context_sensitive)) ||
>> diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c
>> index efb6077810d8..e241935021eb 100644
>> --- a/kernel/sched/ext_idle.c
>> +++ b/kernel/sched/ext_idle.c
>> @@ -658,7 +658,7 @@ void __scx_update_idle(struct rq *rq, bool idle, bool do_notify)
>> * managed by put_prev_task_idle()/set_next_task_idle().
>> */
>> if (SCX_HAS_OP(update_idle) && do_notify && !scx_rq_bypassing(rq))
>> - SCX_CALL_OP(SCX_KF_REST, update_idle, cpu_of(rq), idle);
>> + SCX_CALL_OP(update_idle, cpu_of(rq), idle);
>>
>> /*
>> * Update the idle masks:
>> @@ -803,9 +803,6 @@ __bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
>> if (!check_builtin_idle_enabled())
>> goto prev_cpu;
>>
>> - if (!scx_kf_allowed(SCX_KF_SELECT_CPU))
>> - goto prev_cpu;
>> -
>> #ifdef CONFIG_SMP
>> return scx_select_cpu_dfl(p, prev_cpu, wake_flags, is_idle);
>> #endif
>> --
>> 2.39.5
>>
Powered by blists - more mailing lists