[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <673dba4d-7d45-4e2e-8d2f-b969be0732c1@linux.alibaba.com>
Date: Mon, 12 Aug 2024 16:00:02 +0800
From: Tianchen Ding <dtcccc@...ux.alibaba.com>
To: Marco Elver <elver@...gle.com>
Cc: linux-kernel@...r.kernel.org, Alexander Potapenko <glider@...gle.com>,
Dmitry Vyukov <dvyukov@...gle.com>, Andrew Morton
<akpm@...ux-foundation.org>, kasan-dev@...glegroups.com, linux-mm@...ck.org
Subject: Re: [PATCH] kfence: Save freeing stack trace at calling time instead
of freeing time
On 2024/8/12 15:49, Marco Elver wrote:
> On Mon, 12 Aug 2024 at 09:00, Tianchen Ding <dtcccc@...ux.alibaba.com> wrote:
>>
>> For kmem_cache with SLAB_TYPESAFE_BY_RCU, the freeing trace stack at
>> calling kmem_cache_free() is more useful. While the following stack is
>> meaningless and provides no help:
>> freed by task 46 on cpu 0 at 656.840729s:
>> rcu_do_batch+0x1ab/0x540
>> nocb_cb_wait+0x8f/0x260
>> rcu_nocb_cb_kthread+0x25/0x80
>> kthread+0xd2/0x100
>> ret_from_fork+0x34/0x50
>> ret_from_fork_asm+0x1a/0x30
>>
>> Signed-off-by: Tianchen Ding <dtcccc@...ux.alibaba.com>
>> ---
>> I'm not sure whether we should keep KFENCE_OBJECT_FREED info remained
>> (maybe the exact free time can be helpful?). But add a new kfence_track
>> will cost more memory, so I prefer to reuse free_track and drop the info
>> when when KFENCE_OBJECT_RCU_FREEING -> KFENCE_OBJECT_FREED.
>
> I think the current version is fine. In the SLAB_TYPESAFE_BY_RCU cases
> it would always print the stack trace of RCU internals, so it's never
> really useful (as you say above).
>
> Have you encountered a bug where you were debugging a UAF like this?
Yes. We are debugging a UAF about struct anon_vma in an old kernel. (finally we
found this may be related to commit 2555283eb40d)
struct anon_vma has SLAB_TYPESAFE_BY_RCU, so we found the freeing stack is useless.
> If not, what prompted you to send this patch?
>
> Did you run the KFENCE test suite?
Yes. All passed.
>
>> ---
>> mm/kfence/core.c | 35 ++++++++++++++++++++++++++---------
>> mm/kfence/kfence.h | 1 +
>> mm/kfence/report.c | 7 ++++---
>> 3 files changed, 31 insertions(+), 12 deletions(-)
>>
>> diff --git a/mm/kfence/core.c b/mm/kfence/core.c
>> index c5cb54fc696d..89469d4f2d95 100644
>> --- a/mm/kfence/core.c
>> +++ b/mm/kfence/core.c
>> @@ -269,6 +269,13 @@ static inline unsigned long metadata_to_pageaddr(const struct kfence_metadata *m
>> return pageaddr;
>> }
>>
>> +static bool kfence_obj_inuse(const struct kfence_metadata *meta)
>
> Other tiny helpers add "inline" so that the compiler is more likely to
> inline this. In optimized kernels it should do so by default, but with
> some heavily instrumented kernels we need to lower the inlining
> threshold - adding "inline" does that.
>
> Also, note we have KFENCE_OBJECT_UNUSED state, so the
> kfence_obj_inuse() helper name would suggest to me that it's all other
> states.
>
> If the object is being freed with RCU, it is still technically
> allocated and _usable_ until the next RCU grace period. So maybe
> kfence_obj_allocated() is a more accurate name?
>
>> +{
>> + enum kfence_object_state state = READ_ONCE(meta->state);
>> +
>> + return state == KFENCE_OBJECT_ALLOCATED || state == KFENCE_OBJECT_RCU_FREEING;
>> +}
>> +
>> /*
>> * Update the object's metadata state, including updating the alloc/free stacks
>> * depending on the state transition.
>> @@ -278,10 +285,14 @@ metadata_update_state(struct kfence_metadata *meta, enum kfence_object_state nex
>> unsigned long *stack_entries, size_t num_stack_entries)
>> {
>> struct kfence_track *track =
>> - next == KFENCE_OBJECT_FREED ? &meta->free_track : &meta->alloc_track;
>> + next == KFENCE_OBJECT_ALLOCATED ? &meta->alloc_track : &meta->free_track;
>>
>> lockdep_assert_held(&meta->lock);
>>
>> + /* Stack has been saved when calling rcu, skip. */
>> + if (READ_ONCE(meta->state) == KFENCE_OBJECT_RCU_FREEING)
>> + goto out;
>> +
>> if (stack_entries) {
>> memcpy(track->stack_entries, stack_entries,
>> num_stack_entries * sizeof(stack_entries[0]));
>> @@ -297,6 +308,7 @@ metadata_update_state(struct kfence_metadata *meta, enum kfence_object_state nex
>> track->cpu = raw_smp_processor_id();
>> track->ts_nsec = local_clock(); /* Same source as printk timestamps. */
>>
>> +out:
>> /*
>> * Pairs with READ_ONCE() in
>> * kfence_shutdown_cache(),
>> @@ -502,7 +514,7 @@ static void kfence_guarded_free(void *addr, struct kfence_metadata *meta, bool z
>>
>> raw_spin_lock_irqsave(&meta->lock, flags);
>>
>> - if (meta->state != KFENCE_OBJECT_ALLOCATED || meta->addr != (unsigned long)addr) {
>> + if (!kfence_obj_inuse(meta) || meta->addr != (unsigned long)addr) {
>> /* Invalid or double-free, bail out. */
>> atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]);
>> kfence_report_error((unsigned long)addr, false, NULL, meta,
>> @@ -780,7 +792,7 @@ static void kfence_check_all_canary(void)
>> for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
>> struct kfence_metadata *meta = &kfence_metadata[i];
>>
>> - if (meta->state == KFENCE_OBJECT_ALLOCATED)
>> + if (kfence_obj_inuse(meta))
>> check_canary(meta);
>> }
>> }
>> @@ -1006,12 +1018,11 @@ void kfence_shutdown_cache(struct kmem_cache *s)
>> * the lock will not help, as different critical section
>> * serialization will have the same outcome.
>> */
>> - if (READ_ONCE(meta->cache) != s ||
>> - READ_ONCE(meta->state) != KFENCE_OBJECT_ALLOCATED)
>> + if (READ_ONCE(meta->cache) != s || !kfence_obj_inuse(meta))
>> continue;
>>
>> raw_spin_lock_irqsave(&meta->lock, flags);
>> - in_use = meta->cache == s && meta->state == KFENCE_OBJECT_ALLOCATED;
>> + in_use = meta->cache == s && kfence_obj_inuse(meta);
>> raw_spin_unlock_irqrestore(&meta->lock, flags);
>>
>> if (in_use) {
>> @@ -1145,6 +1156,7 @@ void *kfence_object_start(const void *addr)
>> void __kfence_free(void *addr)
>> {
>> struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr);
>> + unsigned long flags;
>
> This flags variable does not need to be scoped for the whole function.
> It can just be scoped within the if-branch where it's needed (at least
> I don't see other places besides there where it's used).
>
>> #ifdef CONFIG_MEMCG
>> KFENCE_WARN_ON(meta->obj_exts.objcg);
>> @@ -1154,9 +1166,14 @@ void __kfence_free(void *addr)
>> * the object, as the object page may be recycled for other-typed
>> * objects once it has been freed. meta->cache may be NULL if the cache
>> * was destroyed.
>> + * Save the stack trace here. It is more useful.
>
> "It is more useful." adds no value to the comment.
>
> I would say something like: "Save the stack trace here so that reports
> show where the user freed the object."
>
>> */
>> - if (unlikely(meta->cache && (meta->cache->flags & SLAB_TYPESAFE_BY_RCU)))
>> + if (unlikely(meta->cache && (meta->cache->flags & SLAB_TYPESAFE_BY_RCU))) {
>> + raw_spin_lock_irqsave(&meta->lock, flags);
>> + metadata_update_state(meta, KFENCE_OBJECT_RCU_FREEING, NULL, 0);
>> + raw_spin_unlock_irqrestore(&meta->lock, flags);
>> call_rcu(&meta->rcu_head, rcu_guarded_free);
>> + }
>
> Wrong if-else style. Turn the whole thing into
>
> if (...) {
> ...
> } else {
> kfence_guarded_free(...);
> }
>
> So it looks balanced.
>
>> else
>> kfence_guarded_free(addr, meta, false);
>> }
>> @@ -1182,14 +1199,14 @@ bool kfence_handle_page_fault(unsigned long addr, bool is_write, struct pt_regs
>> int distance = 0;
>>
>> meta = addr_to_metadata(addr - PAGE_SIZE);
>> - if (meta && READ_ONCE(meta->state) == KFENCE_OBJECT_ALLOCATED) {
>> + if (meta && kfence_obj_inuse(meta)) {
>> to_report = meta;
>> /* Data race ok; distance calculation approximate. */
>> distance = addr - data_race(meta->addr + meta->size);
>> }
>>
>> meta = addr_to_metadata(addr + PAGE_SIZE);
>> - if (meta && READ_ONCE(meta->state) == KFENCE_OBJECT_ALLOCATED) {
>> + if (meta && kfence_obj_inuse(meta)) {
>> /* Data race ok; distance calculation approximate. */
>> if (!to_report || distance > data_race(meta->addr) - addr)
>> to_report = meta;
>> diff --git a/mm/kfence/kfence.h b/mm/kfence/kfence.h
>> index db87a05047bd..dfba5ea06b01 100644
>> --- a/mm/kfence/kfence.h
>> +++ b/mm/kfence/kfence.h
>> @@ -38,6 +38,7 @@
>> enum kfence_object_state {
>> KFENCE_OBJECT_UNUSED, /* Object is unused. */
>> KFENCE_OBJECT_ALLOCATED, /* Object is currently allocated. */
>> + KFENCE_OBJECT_RCU_FREEING, /* Object was allocated, and then being freed by rcu. */
>> KFENCE_OBJECT_FREED, /* Object was allocated, and then freed. */
>> };
>>
>> diff --git a/mm/kfence/report.c b/mm/kfence/report.c
>> index 73a6fe42845a..451991a3a8f2 100644
>> --- a/mm/kfence/report.c
>> +++ b/mm/kfence/report.c
>> @@ -114,7 +114,8 @@ static void kfence_print_stack(struct seq_file *seq, const struct kfence_metadat
>>
>> /* Timestamp matches printk timestamp format. */
>> seq_con_printf(seq, "%s by task %d on cpu %d at %lu.%06lus (%lu.%06lus ago):\n",
>> - show_alloc ? "allocated" : "freed", track->pid,
>> + show_alloc ? "allocated" : meta->state == KFENCE_OBJECT_RCU_FREEING ?
>> + "rcu freeing" : "freed", track->pid,
>> track->cpu, (unsigned long)ts_sec, rem_nsec / 1000,
>> (unsigned long)interval_nsec, rem_interval_nsec / 1000);
>>
>> @@ -149,7 +150,7 @@ void kfence_print_object(struct seq_file *seq, const struct kfence_metadata *met
>>
>> kfence_print_stack(seq, meta, true);
>>
>> - if (meta->state == KFENCE_OBJECT_FREED) {
>> + if (meta->state == KFENCE_OBJECT_FREED || meta->state == KFENCE_OBJECT_RCU_FREEING) {
>> seq_con_printf(seq, "\n");
>> kfence_print_stack(seq, meta, false);
>> }
>> @@ -318,7 +319,7 @@ bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *sla
>> kpp->kp_slab_cache = meta->cache;
>> kpp->kp_objp = (void *)meta->addr;
>> kfence_to_kp_stack(&meta->alloc_track, kpp->kp_stack);
>> - if (meta->state == KFENCE_OBJECT_FREED)
>> + if (meta->state == KFENCE_OBJECT_FREED || meta->state == KFENCE_OBJECT_RCU_FREEING)
>> kfence_to_kp_stack(&meta->free_track, kpp->kp_free_stack);
>> /* get_stack_skipnr() ensures the first entry is outside allocator. */
>> kpp->kp_ret = kpp->kp_stack[0];
>> --
>> 2.39.3
>>
>> --
>> You received this message because you are subscribed to the Google Groups "kasan-dev" group.
>> To unsubscribe from this group and stop receiving emails from it, send an email to kasan-dev+unsubscribe@...glegroups.com.
>> To view this discussion on the web visit https://groups.google.com/d/msgid/kasan-dev/20240812065947.6104-1-dtcccc%40linux.alibaba.com.
Thanks for your comments. I'll fix them and send v2 later.
Powered by blists - more mailing lists