linux-kernel - Re: [RESEND PATCH V3] NUMA:Improve the efficiency of calculating pages loss

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CACZJ9cU5g5wD=qEg7tbr-Gk4EADDORSG-=U1_c7nq=fO9XhJ0Q@mail.gmail.com>
Date:   Tue, 22 Aug 2023 19:49:05 +0800
From:   Liam Ni <zhiguangni01@...il.com>
To:     Mike Rapoport <rppt@...nel.org>
Cc:     linux-mm@...ck.org, linux-kernel@...r.kernel.org,
        loongarch@...ts.linux.dev, zhoubinbin@...ngson.cn,
        chenfeiyang@...ngson.cn, jiaxun.yang@...goat.com,
        Andrew Morton <akpm@...ux-foundation.org>,
        "H. Peter Anvin" <hpa@...or.com>, x86@...nel.org,
        Borislav Petkov <bp@...en8.de>, Ingo Molnar <mingo@...hat.com>,
        Thomas Gleixner <tglx@...utronix.de>, peterz@...radead.org,
        luto@...nel.org, Dave Hansen <dave.hansen@...ux.intel.com>,
        kernel@...0n.name, chenhuacai@...nel.org
Subject: Re: [RESEND PATCH V3] NUMA:Improve the efficiency of calculating
 pages loss

On Tue, 15 Aug 2023 at 00:00, Mike Rapoport <rppt@...nel.org> wrote:
>
> On Fri, Aug 04, 2023 at 11:32:51PM +0800, Liam Ni wrote:
> > Optimize the way of calculating missing pages.
> >
> > In the previous implementation, We calculate missing pages as follows:
> > 1. calculate numaram by traverse all the numa_meminfo's and for each of
> > them traverse all the regions in memblock.memory to prepare for
> > counting missing pages.
> >
> > 2. Traverse all the regions in memblock.memory again to get e820ram.
> >
> > 3. the missing page is (e820ram - numaram )
> >
> > But,it's enough to count memory in ‘memblock.memory’ that doesn't have
> > the node assigned.
> >
> > V2:https://lore.kernel.org/all/20230619075315.49114-1-zhiguangni01@gmail.com/
> > V1:https://lore.kernel.org/all/20230615142016.419570-1-zhiguangni01@gmail.com/
> >
> > Signed-off-by: Liam Ni <zhiguangni01@...il.com>
> > ---
> >  arch/loongarch/kernel/numa.c | 23 ++++++++---------------
> >  arch/x86/mm/numa.c           | 26 +++++++-------------------
> >  include/linux/mm.h           |  1 +
> >  mm/mm_init.c                 | 20 ++++++++++++++++++++
> >  4 files changed, 36 insertions(+), 34 deletions(-)
> >
> > diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c
> > index 708665895b47..0239891e4d19 100644
> > --- a/arch/loongarch/kernel/numa.c
> > +++ b/arch/loongarch/kernel/numa.c
> > @@ -262,25 +262,18 @@ static void __init node_mem_init(unsigned int node)
> >   * Sanity check to catch more bad NUMA configurations (they are amazingly
> >   * common).  Make sure the nodes cover all memory.
> >   */
> > -static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
> > +static bool __init memblock_validate_numa_coverage(const u64 limit)
>
> There is no need to have arch specific memblock_validate_numa_coverage().
> You can add this function to memblock and call it from NUMA initialization
> instead of numa_meminfo_cover_memory().

Remove implementation of numa_meminfo_cover_memory function?

>
> The memblock_validate_numa_coverage() will count all the pages without node
> ID set and compare to the threshold provided by the architectures.
>
> >  {
> > -       int i;
> > -       u64 numaram, biosram;
> > +       u64 lo_pg;
> >
> > -       numaram = 0;
> > -       for (i = 0; i < mi->nr_blks; i++) {
> > -               u64 s = mi->blk[i].start >> PAGE_SHIFT;
> > -               u64 e = mi->blk[i].end >> PAGE_SHIFT;
> > +       lo_pg = max_pfn - calculate_without_node_pages_in_range();
> >
> > -               numaram += e - s;
> > -               numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
> > -               if ((s64)numaram < 0)
> > -                       numaram = 0;
> > +       /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
> > +       if (lo_pg >= limit) {
> > +               pr_err("NUMA: We lost 1m size page.\n");
> > +               return false;
> >         }
> > -       max_pfn = max_low_pfn;
> > -       biosram = max_pfn - absent_pages_in_range(0, max_pfn);
> >
> > -       BUG_ON((s64)(biosram - numaram) >= (1 << (20 - PAGE_SHIFT)));
> >         return true;
> >  }
> >
> > @@ -428,7 +421,7 @@ int __init init_numa_memory(void)
> >                 return -EINVAL;
> >
> >         init_node_memblock();
> > -       if (numa_meminfo_cover_memory(&numa_meminfo) == false)
> > +       if (memblock_validate_numa_coverage(SZ_1M) == false)
> >                 return -EINVAL;
> >
> >         for_each_node_mask(node, node_possible_map) {
> > diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
> > index 2aadb2019b4f..14feec144675 100644
> > --- a/arch/x86/mm/numa.c
> > +++ b/arch/x86/mm/numa.c
> > @@ -451,30 +451,18 @@ EXPORT_SYMBOL(__node_distance);
> >   * Sanity check to catch more bad NUMA configurations (they are amazingly
> >   * common).  Make sure the nodes cover all memory.
> >   */
> > -static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
> > +static bool __init memblock_validate_numa_coverage(const u64 limit)
> >  {
> > -       u64 numaram, e820ram;
> > -       int i;
> > +       u64 lo_pg;
> >
> > -       numaram = 0;
> > -       for (i = 0; i < mi->nr_blks; i++) {
> > -               u64 s = mi->blk[i].start >> PAGE_SHIFT;
> > -               u64 e = mi->blk[i].end >> PAGE_SHIFT;
> > -               numaram += e - s;
> > -               numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
> > -               if ((s64)numaram < 0)
> > -                       numaram = 0;
> > -       }
> > -
> > -       e820ram = max_pfn - absent_pages_in_range(0, max_pfn);
> > +       lo_pg = max_pfn - calculate_without_node_pages_in_range();
> >
> >         /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
> > -       if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
> > -               printk(KERN_ERR "NUMA: nodes only cover %LuMB of your
> > %LuMB e820 RAM. Not used.\n",
> > -                      (numaram << PAGE_SHIFT) >> 20,
> > -                      (e820ram << PAGE_SHIFT) >> 20);
> > +       if (lo_pg >= limit) {
> > +               pr_err("NUMA: We lost 1m size page.\n");
> >                 return false;
> >         }
> > +
> >         return true;
> >  }
> >
> > @@ -583,7 +571,7 @@ static int __init numa_register_memblks(struct
> > numa_meminfo *mi)
> >                         return -EINVAL;
> >                 }
> >         }
> > -       if (!numa_meminfo_cover_memory(mi))
> > +       if (!memblock_validate_numa_coverage(SZ_1M))
> >                 return -EINVAL;
> >
> >         /* Finally register nodes. */
> > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > index 0daef3f2f029..b32457ad1ae3 100644
> > --- a/include/linux/mm.h
> > +++ b/include/linux/mm.h
> > @@ -3043,6 +3043,7 @@ unsigned long __absent_pages_in_range(int nid,
> > unsigned long start_pfn,
> >                                                 unsigned long end_pfn);
> >  extern unsigned long absent_pages_in_range(unsigned long start_pfn,
> >                                                 unsigned long end_pfn);
> > +extern unsigned long calculate_without_node_pages_in_range(void);
> >  extern void get_pfn_range_for_nid(unsigned int nid,
> >                         unsigned long *start_pfn, unsigned long *end_pfn);
> >
> > diff --git a/mm/mm_init.c b/mm/mm_init.c
> > index 3ddd18a89b66..13a4883787e3 100644
> > --- a/mm/mm_init.c
> > +++ b/mm/mm_init.c
> > @@ -1132,6 +1132,26 @@ static void __init
> > adjust_zone_range_for_zone_movable(int nid,
> >         }
> >  }
> >
> > +/**
> > + * @start_pfn: The start PFN to start searching for holes
> > + * @end_pfn: The end PFN to stop searching for holes
> > + *
> > + * Return: Return the number of page frames without node assigned
> > within a range.
> > + */
> > +unsigned long __init calculate_without_node_pages_in_range(void)
> > +{
> > +       unsigned long num_pages;
> > +       unsigned long start_pfn, end_pfn;
> > +       int nid, i;
> > +
> > +       for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
> > +               if (nid == NUMA_NO_NODE)
> > +                       num_pages += end_pfn - start_pfn;
> > +       }
> > +
> > +       return num_pages;
> > +}
> > +
> >  /*
> >   * Return the number of holes in a range on a node. If nid is MAX_NUMNODES,
> >   * then all holes in the requested range will be accounted for.
> > --
> > 2.25.1
>
> --
> Sincerely yours,
> Mike.