lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <ed82e6b9-3d9f-259a-82bc-cc51f9131f29@bytedance.com>
Date:   Wed, 12 Jul 2023 16:37:10 +0800
From:   Peng Zhang <zhangpeng.00@...edance.com>
To:     Peng Zhang <zhangpeng.00@...edance.com>
Cc:     glider@...gle.com, dvyukov@...gle.com, akpm@...ux-foundation.org,
        kasan-dev@...glegroups.com, linux-mm@...ck.org,
        linux-kernel@...r.kernel.org, muchun.song@...ux.dev,
        Marco Elver <elver@...gle.com>
Subject: Re: [PATCH] mm: kfence: allocate kfence_metadata at runtime



在 2023/7/12 16:28, Peng Zhang 写道:
> 
> 
> 在 2023/7/10 18:19, Marco Elver 写道:
>> On Mon, 10 Jul 2023 at 05:27, 'Peng Zhang' via kasan-dev
>> <kasan-dev@...glegroups.com> wrote:
>>>
>>> kfence_metadata is currently a static array. For the purpose of
>>> allocating scalable __kfence_pool, we first change it to runtime
>>> allocation of metadata. Since the size of an object of kfence_metadata
>>> is 1160 bytes, we can save at least 72 pages (with default 256 objects)
>>> without enabling kfence.
>>>
>>> Below is the numbers obtained in qemu (with default 256 objects).
>>> before: Memory: 8134692K/8388080K available (3668K bss)
>>> after: Memory: 8136740K/8388080K available (1620K bss)
>>> More than expected, it saves 2MB memory.
>>>
>>> Signed-off-by: Peng Zhang <zhangpeng.00@...edance.com>
>>
>> Seems like a reasonable optimization, but see comments below.
>>
>> Also with this patch applied on top of v6.5-rc1, KFENCE just doesn't
>> init at all anymore (early init). Please fix.
> I'm very sorry because I made a slight modification before sending the
> patch but it has not been tested, which caused it to not work properly.
> I fixed some of the issues you mentioned in v2[1].
> 
> [1] 
> https://lore.kernel.org/lkml/20230712081616.45177-1-zhangpeng.00@bytedance.com/
> 
>>
>>> ---
>>>   mm/kfence/core.c   | 102 ++++++++++++++++++++++++++++++++-------------
>>>   mm/kfence/kfence.h |   5 ++-
>>>   2 files changed, 78 insertions(+), 29 deletions(-)
>>>
>>> diff --git a/mm/kfence/core.c b/mm/kfence/core.c
>>> index dad3c0eb70a0..b9fec1c46e3d 100644
>>> --- a/mm/kfence/core.c
>>> +++ b/mm/kfence/core.c
>>> @@ -116,7 +116,7 @@ EXPORT_SYMBOL(__kfence_pool); /* Export for test 
>>> modules. */
>>>    * backing pages (in __kfence_pool).
>>>    */
>>>   static_assert(CONFIG_KFENCE_NUM_OBJECTS > 0);
>>> -struct kfence_metadata kfence_metadata[CONFIG_KFENCE_NUM_OBJECTS];
>>> +struct kfence_metadata *kfence_metadata;
>>>
>>>   /* Freelist with available objects. */
>>>   static struct list_head kfence_freelist = 
>>> LIST_HEAD_INIT(kfence_freelist);
>>> @@ -643,13 +643,56 @@ static unsigned long kfence_init_pool(void)
>>>          return addr;
>>>   }
>>>
>>> +static int kfence_alloc_metadata(void)
>>> +{
>>> +       unsigned long nr_pages = KFENCE_METADATA_SIZE / PAGE_SIZE;
>>> +
>>> +#ifdef CONFIG_CONTIG_ALLOC
>>> +       struct page *pages;
>>> +
>>> +       pages = alloc_contig_pages(nr_pages, GFP_KERNEL, 
>>> first_online_node,
>>> +                                  NULL);
>>> +       if (pages)
>>> +               kfence_metadata = page_to_virt(pages);
>>> +#else
>>> +       if (nr_pages > MAX_ORDER_NR_PAGES) {
>>> +               pr_warn("KFENCE_NUM_OBJECTS too large for buddy 
>>> allocator\n");
>>
>> Does this mean that KFENCE won't work at all if we can't allocate the
>> metadata? I.e. it won't work either in early nor late init modes?
>>
>> I know we already have this limitation for _late init_ of the KFENCE 
>> pool.
>>
>> So I have one major question: when doing _early init_, what is the
>> maximum size of the KFENCE pool (#objects) with this change?
> It will be limited to 2^10/sizeof(struct kfence_metadata) by buddy
                 Sorry,  2^10*PAGE_SIZE/sizeof(struct kfence_metadata)
> system, so I used memblock to allocate kfence_metadata in v2.
>>
>>> +               return -EINVAL;
>>> +       }
>>> +       kfence_metadata = alloc_pages_exact(KFENCE_METADATA_SIZE,
>>> +                                           GFP_KERNEL);
>>> +#endif
>>> +
>>> +       if (!kfence_metadata)
>>> +               return -ENOMEM;
>>> +
>>> +       memset(kfence_metadata, 0, KFENCE_METADATA_SIZE);
>>
>> memzero_explicit, or pass __GFP_ZERO to alloc_pages?
> Unfortunately, __GFP_ZERO does not work successfully in
> alloc_contig_pages(), so I used memzero_explicit() in v2.
> Even though I don't know if memzero_explicit() is necessary
> (it just uses the barrier).
>>
>>> +       return 0;
>>> +}
>>> +
>>> +static void kfence_free_metadata(void)
>>> +{
>>> +       if (WARN_ON(!kfence_metadata))
>>> +               return;
>>> +#ifdef CONFIG_CONTIG_ALLOC
>>> +       free_contig_range(page_to_pfn(virt_to_page((void 
>>> *)kfence_metadata)),
>>> +                         KFENCE_METADATA_SIZE / PAGE_SIZE);
>>> +#else
>>> +       free_pages_exact((void *)kfence_metadata, KFENCE_METADATA_SIZE);
>>> +#endif
>>> +       kfence_metadata = NULL;
>>> +}
>>> +
>>>   static bool __init kfence_init_pool_early(void)
>>>   {
>>> -       unsigned long addr;
>>> +       unsigned long addr = (unsigned long)__kfence_pool;
>>>
>>>          if (!__kfence_pool)
>>>                  return false;
>>>
>>> +       if (!kfence_alloc_metadata())
>>> +               goto free_pool;
>>> +
>>>          addr = kfence_init_pool();
>>>
>>>          if (!addr) {
>>> @@ -663,6 +706,7 @@ static bool __init kfence_init_pool_early(void)
>>>                  return true;
>>>          }
>>>
>>> +       kfence_free_metadata();
>>>          /*
>>>           * Only release unprotected pages, and do not try to go back 
>>> and change
>>>           * page attributes due to risk of failing to do so as well. 
>>> If changing
>>> @@ -670,31 +714,12 @@ static bool __init kfence_init_pool_early(void)
>>>           * fails for the first page, and therefore expect 
>>> addr==__kfence_pool in
>>>           * most failure cases.
>>>           */
>>> +free_pool:
>>>          memblock_free_late(__pa(addr), KFENCE_POOL_SIZE - (addr - 
>>> (unsigned long)__kfence_pool));
>>>          __kfence_pool = NULL;
>>>          return false;
>>>   }
>>>
>>> -static bool kfence_init_pool_late(void)
>>> -{
>>> -       unsigned long addr, free_size;
>>> -
>>> -       addr = kfence_init_pool();
>>> -
>>> -       if (!addr)
>>> -               return true;
>>> -
>>> -       /* Same as above. */
>>> -       free_size = KFENCE_POOL_SIZE - (addr - (unsigned 
>>> long)__kfence_pool);
>>> -#ifdef CONFIG_CONTIG_ALLOC
>>> -       free_contig_range(page_to_pfn(virt_to_page((void *)addr)), 
>>> free_size / PAGE_SIZE);
>>> -#else
>>> -       free_pages_exact((void *)addr, free_size);
>>> -#endif
>>> -       __kfence_pool = NULL;
>>> -       return false;
>>> -}
>>> -
>>>   /* === DebugFS Interface 
>>> ==================================================== */
>>>
>>>   static int stats_show(struct seq_file *seq, void *v)
>>> @@ -896,6 +921,10 @@ void __init kfence_init(void)
>>>   static int kfence_init_late(void)
>>>   {
>>>          const unsigned long nr_pages = KFENCE_POOL_SIZE / PAGE_SIZE;
>>> +       unsigned long addr = (unsigned long)__kfence_pool;
>>> +       unsigned long free_size = KFENCE_POOL_SIZE;
>>> +       int ret;
>>> +
>>>   #ifdef CONFIG_CONTIG_ALLOC
>>>          struct page *pages;
>>>
>>> @@ -913,15 +942,29 @@ static int kfence_init_late(void)
>>>                  return -ENOMEM;
>>>   #endif
>>>
>>> -       if (!kfence_init_pool_late()) {
>>> -               pr_err("%s failed\n", __func__);
>>> -               return -EBUSY;
>>> +       ret = kfence_alloc_metadata();
>>> +       if (!ret)
>>> +               goto free_pool;
>>> +
>>> +       addr = kfence_init_pool();
>>> +       if (!addr) {
>>> +               kfence_init_enable();
>>> +               kfence_debugfs_init();
>>> +               return 0;
>>>          }
>>>
>>> -       kfence_init_enable();
>>> -       kfence_debugfs_init();
>>> +       pr_err("%s failed\n", __func__);
>>> +       kfence_free_metadata();
>>> +       free_size = KFENCE_POOL_SIZE - (addr - (unsigned 
>>> long)__kfence_pool);
>>> +       ret = -EBUSY;
>>>
>>> -       return 0;
>>> +free_pool:
>>> +#ifdef CONFIG_CONTIG_ALLOC
>>> +       free_contig_range(page_to_pfn(virt_to_page((void *)addr)), 
>>> free_size / PAGE_SIZE);
>>> +#else
>>> +       free_pages_exact((void *)addr, free_size);
>>> +#endif
>>
>> You moved this from kfence_init_pool_late - that did "__kfence_pool =
>> NULL" which is missing now.
> Thanks for spotting this, I added it in v2.
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ