[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <f181285e-4167-4581-a712-8e444a0ab2bb@suse.cz>
Date: Wed, 14 May 2025 16:01:17 +0200
From: Vlastimil Babka <vbabka@...e.cz>
To: Suren Baghdasaryan <surenb@...gle.com>
Cc: "Liam R. Howlett" <Liam.Howlett@...cle.com>,
Christoph Lameter <cl@...ux.com>, David Rientjes <rientjes@...gle.com>,
Roman Gushchin <roman.gushchin@...ux.dev>, Harry Yoo <harry.yoo@...cle.com>,
Uladzislau Rezki <urezki@...il.com>, linux-mm@...ck.org,
linux-kernel@...r.kernel.org, rcu@...r.kernel.org,
maple-tree@...ts.infradead.org
Subject: Re: [PATCH v4 2/9] slab: add sheaf support for batching kfree_rcu()
operations
On 5/6/25 23:34, Suren Baghdasaryan wrote:
> On Fri, Apr 25, 2025 at 1:27 AM Vlastimil Babka <vbabka@...e.cz> wrote:
>> @@ -2631,6 +2637,24 @@ static void sheaf_flush_unused(struct kmem_cache *s, struct slab_sheaf *sheaf)
>> sheaf->size = 0;
>> }
>>
>> +static void __rcu_free_sheaf_prepare(struct kmem_cache *s,
>> + struct slab_sheaf *sheaf);
>
> I think you could safely move __rcu_free_sheaf_prepare() here and
> avoid the above forward declaration.
Right, done.
>> @@ -5304,6 +5340,140 @@ bool free_to_pcs(struct kmem_cache *s, void *object)
>> return true;
>> }
>>
>> +static void __rcu_free_sheaf_prepare(struct kmem_cache *s,
>> + struct slab_sheaf *sheaf)
>
> This function seems to be an almost exact copy of free_to_pcs_bulk()
> from your previous patch. Maybe they can be consolidated?
True, I've extracted it to __kmem_cache_free_bulk_prepare().
>> +{
>> + bool init = slab_want_init_on_free(s);
>> + void **p = &sheaf->objects[0];
>> + unsigned int i = 0;
>> +
>> + while (i < sheaf->size) {
>> + struct slab *slab = virt_to_slab(p[i]);
>> +
>> + memcg_slab_free_hook(s, slab, p + i, 1);
>> + alloc_tagging_slab_free_hook(s, slab, p + i, 1);
>> +
>> + if (unlikely(!slab_free_hook(s, p[i], init, true))) {
>> + p[i] = p[--sheaf->size];
>> + continue;
>> + }
>> +
>> + i++;
>> + }
>> +}
>> +
>> +static void rcu_free_sheaf(struct rcu_head *head)
>> +{
>> + struct slab_sheaf *sheaf;
>> + struct node_barn *barn;
>> + struct kmem_cache *s;
>> +
>> + sheaf = container_of(head, struct slab_sheaf, rcu_head);
>> +
>> + s = sheaf->cache;
>> +
>> + /*
>> + * This may reduce the number of objects that the sheaf is no longer
>> + * technically full, but it's easier to treat it that way (unless it's
>
> I don't understand the sentence above. Could you please clarify and
> maybe reword it?
Is this more clear?
/*
* This may remove some objects due to slab_free_hook() returning false,
* so that the sheaf might no longer be completely full. But it's easier
* to handle it as full (unless it became completely empty), as the code
* handles it fine. The only downside is that sheaf will serve fewer
* allocations when reused. It only happens due to debugging, which is a
* performance hit anyway.
*/
>> +
>> + if (!local_trylock(&s->cpu_sheaves->lock))
>
> Aren't you leaking `empty` sheaf on this failure?
Right! Fixed, thanks.
>> + goto fail;
>> +
>> + pcs = this_cpu_ptr(s->cpu_sheaves);
>> +
>> + if (unlikely(pcs->rcu_free))
>> + barn_put_empty_sheaf(pcs->barn, empty);
>> + else
>> + pcs->rcu_free = empty;
>> + }
>> +
>> +do_free:
>> +
>> + rcu_sheaf = pcs->rcu_free;
>> +
>> + rcu_sheaf->objects[rcu_sheaf->size++] = obj;
>> +
>> + if (likely(rcu_sheaf->size < s->sheaf_capacity))
>> + rcu_sheaf = NULL;
>> + else
>> + pcs->rcu_free = NULL;
>> +
>> + local_unlock(&s->cpu_sheaves->lock);
>> +
>> + if (rcu_sheaf)
>> + call_rcu(&rcu_sheaf->rcu_head, rcu_free_sheaf);
>> +
>> + stat(s, FREE_RCU_SHEAF);
>> + return true;
>> +
>> +fail:
>> + stat(s, FREE_RCU_SHEAF_FAIL);
>> + return false;
>> +}
>> +
>> /*
>> * Bulk free objects to the percpu sheaves.
>> * Unlike free_to_pcs() this includes the calls to all necessary hooks
>> @@ -6802,6 +6972,11 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
>> struct kmem_cache_node *n;
>>
>> flush_all_cpus_locked(s);
>> +
>> + /* we might have rcu sheaves in flight */
>> + if (s->cpu_sheaves)
>> + rcu_barrier();
>> +
>> /* Attempt to free all objects */
>> for_each_kmem_cache_node(s, node, n) {
>> if (n->barn)
>> @@ -8214,6 +8389,8 @@ STAT_ATTR(ALLOC_PCS, alloc_cpu_sheaf);
>> STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
>> STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
>> STAT_ATTR(FREE_PCS, free_cpu_sheaf);
>> +STAT_ATTR(FREE_RCU_SHEAF, free_rcu_sheaf);
>> +STAT_ATTR(FREE_RCU_SHEAF_FAIL, free_rcu_sheaf_fail);
>> STAT_ATTR(FREE_FASTPATH, free_fastpath);
>> STAT_ATTR(FREE_SLOWPATH, free_slowpath);
>> STAT_ATTR(FREE_FROZEN, free_frozen);
>> @@ -8312,6 +8489,8 @@ static struct attribute *slab_attrs[] = {
>> &alloc_fastpath_attr.attr,
>> &alloc_slowpath_attr.attr,
>> &free_cpu_sheaf_attr.attr,
>> + &free_rcu_sheaf_attr.attr,
>> + &free_rcu_sheaf_fail_attr.attr,
>> &free_fastpath_attr.attr,
>> &free_slowpath_attr.attr,
>> &free_frozen_attr.attr,
>>
>> --
>> 2.49.0
>>
Powered by blists - more mailing lists