[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230621142020.GG2053369@hirez.programming.kicks-ass.net>
Date: Wed, 21 Jun 2023 16:20:20 +0200
From: Peter Zijlstra <peterz@...radead.org>
To: David Vernet <void@...ifault.com>
Cc: linux-kernel@...r.kernel.org, mingo@...hat.com,
juri.lelli@...hat.com, vincent.guittot@...aro.org,
rostedt@...dmis.org, dietmar.eggemann@....com, bsegall@...gle.com,
mgorman@...e.de, bristot@...hat.com, vschneid@...hat.com,
joshdon@...gle.com, roman.gushchin@...ux.dev, tj@...nel.org,
kernel-team@...a.com
Subject: Re: [RFC PATCH 3/3] sched: Implement shared wakequeue in CFS
On Tue, Jun 13, 2023 at 12:20:04AM -0500, David Vernet wrote:
> +struct swqueue {
> + struct list_head list;
> + spinlock_t lock;
> +} ____cacheline_aligned;
I'm thinking you can shard this just fine, it makes that pop() needs to
iterate all shards, but that shouldn't be a problem, and it would still
only need to take a single lock.
I'm thinking 4 or 8 shards should be plenty, even for Intel LLC.
> #ifdef CONFIG_SMP
> +static struct task_struct *swqueue_pull_task(struct swqueue *swqueue)
> +{
> + unsigned long flags;
> +
> + struct task_struct *p;
> +
> + spin_lock_irqsave(&swqueue->lock, flags);
> + p = list_first_entry_or_null(&swqueue->list, struct task_struct,
> + swqueue_node);
> + if (p)
> + list_del_init(&p->swqueue_node);
> + spin_unlock_irqrestore(&swqueue->lock, flags);
> +
> + return p;
> +}
Would this not normally be called pop() or somesuch?
> +static void swqueue_enqueue(struct rq *rq, struct task_struct *p, int enq_flags)
> +{
> + unsigned long flags;
> + struct swqueue *swqueue;
> + bool task_migrated = enq_flags & ENQUEUE_MIGRATED;
> + bool task_wakeup = enq_flags & ENQUEUE_WAKEUP;
> +
> + /*
> + * Only enqueue the task in the shared wakequeue if:
> + *
> + * - SWQUEUE is enabled
> + * - The task is on the wakeup path
> + * - The task wasn't purposefully migrated to the current rq by
> + * select_task_rq()
> + * - The task isn't pinned to a specific CPU
> + */
> + if (!task_wakeup || task_migrated || p->nr_cpus_allowed == 1)
> + return;
Elsewhere you mentioned heuristics, this smells like them. This and the
is_cpus_allowed() thing makes you loose plenty of opportunities.
> + swqueue = rq_swqueue(rq);
> + spin_lock_irqsave(&swqueue->lock, flags);
> + list_add_tail(&p->swqueue_node, &swqueue->list);
> + spin_unlock_irqrestore(&swqueue->lock, flags);
> +}
> +
> static int swqueue_pick_next_task(struct rq *rq, struct rq_flags *rf)
> {
> - return 0;
> + struct swqueue *swqueue;
> + struct task_struct *p = NULL;
> + struct rq *src_rq;
> + struct rq_flags src_rf;
> + int ret;
> +
> + swqueue = rq_swqueue(rq);
> + if (!list_empty(&swqueue->list))
> + p = swqueue_pull_task(swqueue);
> +
> + if (!p)
> + return 0;
At this point you can do the whole is_cpu_allowed() and avoid the whole
lock dance if not.
> +
> + rq_unpin_lock(rq, rf);
> + raw_spin_rq_unlock(rq);
> +
> + src_rq = task_rq_lock(p, &src_rf);
> +
> + if (task_on_rq_queued(p) && !task_on_cpu(rq, p))
> + src_rq = migrate_task_to(src_rq, &src_rf, p, cpu_of(rq));
And then this becomes move_queued_task().
> + if (src_rq->cpu != rq->cpu)
> + ret = 1;
> + else
> + ret = -1;
> +
> + task_rq_unlock(src_rq, p, &src_rf);
> +
> + raw_spin_rq_lock(rq);
> + rq_repin_lock(rq, rf);
> +
> + return ret;
> }
>
> static void swqueue_remove_task(struct task_struct *p)
> +{
> + unsigned long flags;
> + struct swqueue *swqueue;
> +
> + if (!list_empty(&p->swqueue_node)) {
> + swqueue = rq_swqueue(task_rq(p));
> + spin_lock_irqsave(&swqueue->lock, flags);
> + list_del_init(&p->swqueue_node);
> + spin_unlock_irqrestore(&swqueue->lock, flags);
> + }
> +}
dequeue()
Powered by blists - more mailing lists