[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250108085026.GC23315@noisy.programming.kicks-ass.net>
Date: Wed, 8 Jan 2025 09:50:26 +0100
From: Peter Zijlstra <peterz@...radead.org>
To: Changwoo Min <multics69@...il.com>
Cc: tj@...nel.org, void@...ifault.com, arighi@...dia.com, mingo@...hat.com,
changwoo@...lia.com, kernel-dev@...lia.com,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH v7 2/6] sched_ext: Implement scx_bpf_now()
> +__bpf_kfunc u64 scx_bpf_now(void)
> +{
> + struct rq *rq;
> + u64 clock;
> +
> + preempt_disable();
> +
> + rq = this_rq();
> + if (READ_ONCE(rq->scx.flags) & SCX_RQ_CLK_VALID) {
> + /*
> + * If the rq clock is valid, use the cached rq clock.
> + *
> + * Note that scx_bpf_now() is re-entrant between a process
> + * context and an interrupt context (e.g., timer interrupt).
> + * However, we don't need to consider the race between them
> + * because such race is not observable from a caller.
> + */
> + clock = READ_ONCE(rq->scx.clock);
> + } else {
> + /*
> + * Otherwise, return a fresh rq clock.
> + *
> + * The rq clock is updated outside of the rq lock.
> + * In this case, keep the updated rq clock invalid so the next
> + * kfunc call outside the rq lock gets a fresh rq clock.
> + */
> + clock = sched_clock_cpu(cpu_of(rq));
> + }
> +
> + preempt_enable();
> +
> + return clock;
> +}
> +static inline void scx_rq_clock_update(struct rq *rq, u64 clock)
> +{
> + if (!scx_enabled())
> + return;
> + WRITE_ONCE(rq->scx.clock, clock);
> + WRITE_ONCE(rq->scx.flags, rq->scx.flags | SCX_RQ_CLK_VALID);
> +}
AFAICT it is possible to be used like:
CPU0 CPU1
lock(rq1->lock);
...
scx_rq_clock_update(...); scx_bpf_now();
...
unlock(rq->lock);
Which then enables the following ordering problem:
CPU0 CPU1
WRITE_ONCE(rq->scx.clock, clock); if (rq->scx.flags & VALID)
WRITE_ONCE(rq->scx.flags, VALID); return rq->scx.clock;
Where it then becomes possible to observe VALID before clock is written.
That is, I rather think you need:
> +static inline void scx_rq_clock_update(struct rq *rq, u64 clock)
> +{
> + if (!scx_enabled())
> + return;
> + WRITE_ONCE(rq->scx.clock, clock);
> + smp_store_release(&rq->scx.flags, rq->scx.flags | SCX_RQ_CLK_VALID);
> +}
and:
if (smp_load_acquire(&rq->scx.flags) & SCX_RQ_CLK_VALID) {
> + /*
> + * If the rq clock is valid, use the cached rq clock.
> + *
> + * Note that scx_bpf_now() is re-entrant between a process
> + * context and an interrupt context (e.g., timer interrupt).
> + * However, we don't need to consider the race between them
> + * because such race is not observable from a caller.
> + */
> + clock = READ_ONCE(rq->scx.clock);
Such that if you ovbserve VALID, you must then also observe the clock.
Powered by blists - more mailing lists