lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <8C523F05-00B4-40F5-BB07-EE4AACA86E26@nvidia.com>
Date:   Mon, 6 Apr 2020 09:17:38 -0400
From:   Zi Yan <ziy@...dia.com>
To:     "Kirill A. Shutemov" <kirill@...temov.name>
CC:     <akpm@...ux-foundation.org>,
        Andrea Arcangeli <aarcange@...hat.com>,
        "Yang Shi" <yang.shi@...ux.alibaba.com>, <linux-mm@...ck.org>,
        <linux-kernel@...r.kernel.org>,
        "Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>
Subject: Re: [PATCHv2 8/8] khugepaged: Introduce 'max_ptes_shared' tunable

On 3 Apr 2020, at 7:29, Kirill A. Shutemov wrote:

> External email: Use caution opening links or attachments
>
>
> ``max_ptes_shared`` speicies how many pages can be shared across multiple

s/speicies/specifies

> processes. Exeeding the number woul block the collapse::

s/Exeeding/Exceeding

s/woul/would

>
>         /sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_shared
>
> A higher value may increase memory footprint for some workloads.
>
> By default, at least half of pages has to be not shared.
>
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@...ux.intel.com>
> ---
>  Documentation/admin-guide/mm/transhuge.rst |  7 ++
>  include/trace/events/huge_memory.h         |  3 +-
>  mm/khugepaged.c                            | 52 ++++++++++++--
>  tools/testing/selftests/vm/khugepaged.c    | 83 ++++++++++++++++++++++
>  4 files changed, 140 insertions(+), 5 deletions(-)
>
> diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
> index bd5714547cee..d16e4f2bb70f 100644
> --- a/Documentation/admin-guide/mm/transhuge.rst
> +++ b/Documentation/admin-guide/mm/transhuge.rst
> @@ -220,6 +220,13 @@ memory. A lower value can prevent THPs from being
>  collapsed, resulting fewer pages being collapsed into
>  THPs, and lower memory access performance.
>
> +``max_ptes_shared`` speicies how many pages can be shared across multiple
> +processes. Exeeding the number woul block the collapse::

s/speicies/specifies

s/Exeeding/Exceeding

s/woul/would
> +
> +       /sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_shared
> +
> +A higher value may increase memory footprint for some workloads.
> +
>  Boot parameter
>  ==============
>
> diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h
> index d82a0f4e824d..53532f5925c3 100644
> --- a/include/trace/events/huge_memory.h
> +++ b/include/trace/events/huge_memory.h
> @@ -12,6 +12,8 @@
>         EM( SCAN_SUCCEED,               "succeeded")                    \
>         EM( SCAN_PMD_NULL,              "pmd_null")                     \
>         EM( SCAN_EXCEED_NONE_PTE,       "exceed_none_pte")              \
> +       EM( SCAN_EXCEED_SWAP_PTE,       "exceed_swap_pte")              \
> +       EM( SCAN_EXCEED_SHARED_PTE,     "exceed_shared_pte")            \
>         EM( SCAN_PTE_NON_PRESENT,       "pte_non_present")              \
>         EM( SCAN_PAGE_RO,               "no_writable_page")             \
>         EM( SCAN_LACK_REFERENCED_PAGE,  "lack_referenced_page")         \
> @@ -30,7 +32,6 @@
>         EM( SCAN_DEL_PAGE_LRU,          "could_not_delete_page_from_lru")\
>         EM( SCAN_ALLOC_HUGE_PAGE_FAIL,  "alloc_huge_page_failed")       \
>         EM( SCAN_CGROUP_CHARGE_FAIL,    "ccgroup_charge_failed")        \
> -       EM( SCAN_EXCEED_SWAP_PTE,       "exceed_swap_pte")              \
>         EM( SCAN_TRUNCATED,             "truncated")                    \
>         EMe(SCAN_PAGE_HAS_PRIVATE,      "page_has_private")             \
>
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 49e56e4e30d1..bfb6155f1d69 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -28,6 +28,8 @@ enum scan_result {
>         SCAN_SUCCEED,
>         SCAN_PMD_NULL,
>         SCAN_EXCEED_NONE_PTE,
> +       SCAN_EXCEED_SWAP_PTE,
> +       SCAN_EXCEED_SHARED_PTE,
>         SCAN_PTE_NON_PRESENT,
>         SCAN_PAGE_RO,
>         SCAN_LACK_REFERENCED_PAGE,
> @@ -46,7 +48,6 @@ enum scan_result {
>         SCAN_DEL_PAGE_LRU,
>         SCAN_ALLOC_HUGE_PAGE_FAIL,
>         SCAN_CGROUP_CHARGE_FAIL,
> -       SCAN_EXCEED_SWAP_PTE,
>         SCAN_TRUNCATED,
>         SCAN_PAGE_HAS_PRIVATE,
>  };
> @@ -71,6 +72,7 @@ static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait);
>   */
>  static unsigned int khugepaged_max_ptes_none __read_mostly;
>  static unsigned int khugepaged_max_ptes_swap __read_mostly;
> +static unsigned int khugepaged_max_ptes_shared __read_mostly;
>
>  #define MM_SLOTS_HASH_BITS 10
>  static __read_mostly DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
> @@ -290,15 +292,43 @@ static struct kobj_attribute khugepaged_max_ptes_swap_attr =
>         __ATTR(max_ptes_swap, 0644, khugepaged_max_ptes_swap_show,
>                khugepaged_max_ptes_swap_store);
>
> +static ssize_t khugepaged_max_ptes_shared_show(struct kobject *kobj,
> +                                            struct kobj_attribute *attr,
> +                                            char *buf)
> +{
> +       return sprintf(buf, "%u\n", khugepaged_max_ptes_shared);
> +}
> +
> +static ssize_t khugepaged_max_ptes_shared_store(struct kobject *kobj,
> +                                             struct kobj_attribute *attr,
> +                                             const char *buf, size_t count)
> +{
> +       int err;
> +       unsigned long max_ptes_shared;
> +
> +       err  = kstrtoul(buf, 10, &max_ptes_shared);
> +       if (err || max_ptes_shared > HPAGE_PMD_NR-1)
> +               return -EINVAL;
> +
> +       khugepaged_max_ptes_shared = max_ptes_shared;
> +
> +       return count;
> +}
> +
> +static struct kobj_attribute khugepaged_max_ptes_shared_attr =
> +       __ATTR(max_ptes_shared, 0644, khugepaged_max_ptes_shared_show,
> +              khugepaged_max_ptes_shared_store);
> +
>  static struct attribute *khugepaged_attr[] = {
>         &khugepaged_defrag_attr.attr,
>         &khugepaged_max_ptes_none_attr.attr,
> +       &khugepaged_max_ptes_swap_attr.attr,
> +       &khugepaged_max_ptes_shared_attr.attr,
>         &pages_to_scan_attr.attr,
>         &pages_collapsed_attr.attr,
>         &full_scans_attr.attr,
>         &scan_sleep_millisecs_attr.attr,
>         &alloc_sleep_millisecs_attr.attr,
> -       &khugepaged_max_ptes_swap_attr.attr,
>         NULL,
>  };
>
> @@ -360,6 +390,7 @@ int __init khugepaged_init(void)
>         khugepaged_pages_to_scan = HPAGE_PMD_NR * 8;
>         khugepaged_max_ptes_none = HPAGE_PMD_NR - 1;
>         khugepaged_max_ptes_swap = HPAGE_PMD_NR / 8;
> +       khugepaged_max_ptes_shared = HPAGE_PMD_NR / 2;
>
>         return 0;
>  }
> @@ -549,7 +580,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
>  {
>         struct page *page = NULL;
>         pte_t *_pte;
> -       int none_or_zero = 0, result = 0, referenced = 0;
> +       int none_or_zero = 0, shared = 0, result = 0, referenced = 0;
>         bool writable = false;
>
>         for (_pte = pte; _pte < pte+HPAGE_PMD_NR;
> @@ -577,6 +608,12 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
>
>                 VM_BUG_ON_PAGE(!PageAnon(page), page);
>
> +               if (page_mapcount(page) > 1 &&
> +                               ++shared > khugepaged_max_ptes_shared) {
> +                       result = SCAN_EXCEED_SHARED_PTE;
> +                       goto out;
> +               }
> +
>                 if (PageCompound(page)) {
>                         struct page *p;
>                         page = compound_head(page);
> @@ -1168,7 +1205,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
>  {
>         pmd_t *pmd;
>         pte_t *pte, *_pte;
> -       int ret = 0, none_or_zero = 0, result = 0, referenced = 0;
> +       int ret = 0, result = 0, referenced = 0;
> +       int none_or_zero = 0, shared = 0;
>         struct page *page = NULL;
>         unsigned long _address;
>         spinlock_t *ptl;
> @@ -1218,6 +1256,12 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
>                         goto out_unmap;
>                 }
>
> +               if (page_mapcount(page) > 1 &&
> +                               ++shared > khugepaged_max_ptes_shared) {
> +                       result = SCAN_EXCEED_SHARED_PTE;
> +                       goto out_unmap;
> +               }
> +
>                 page = compound_head(page);
>
>                 /*
> diff --git a/tools/testing/selftests/vm/khugepaged.c b/tools/testing/selftests/vm/khugepaged.c
> index 3eeff4a0fbc9..9ae119234a39 100644
> --- a/tools/testing/selftests/vm/khugepaged.c
> +++ b/tools/testing/selftests/vm/khugepaged.c
> @@ -77,6 +77,7 @@ struct khugepaged_settings {
>         unsigned int scan_sleep_millisecs;
>         unsigned int max_ptes_none;
>         unsigned int max_ptes_swap;
> +       unsigned int max_ptes_shared;
>         unsigned long pages_to_scan;
>  };
>
> @@ -276,6 +277,7 @@ static void write_settings(struct settings *settings)
>                         khugepaged->scan_sleep_millisecs);
>         write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none);
>         write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap);
> +       write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared);
>         write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan);
>  }
>
> @@ -312,6 +314,7 @@ static void save_settings(void)
>                         read_num("khugepaged/scan_sleep_millisecs"),
>                 .max_ptes_none = read_num("khugepaged/max_ptes_none"),
>                 .max_ptes_swap = read_num("khugepaged/max_ptes_swap"),
> +               .max_ptes_shared = read_num("khugepaged/max_ptes_shared"),
>                 .pages_to_scan = read_num("khugepaged/pages_to_scan"),
>         };
>         success("OK");
> @@ -843,12 +846,90 @@ static void collapse_fork_compound(void)
>                         fail("Fail");
>                 fill_memory(p, 0, page_size);
>
> +               write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1);
>                 if (wait_for_scan("Collapse PTE table full of compound pages in child", p))
>                         fail("Timeout");
>                 else if (check_huge(p))
>                         success("OK");
>                 else
>                         fail("Fail");
> +               write_num("khugepaged/max_ptes_shared",
> +                               default_settings.khugepaged.max_ptes_shared);
> +
> +               validate_memory(p, 0, hpage_pmd_size);
> +               munmap(p, hpage_pmd_size);
> +               exit(exit_status);
> +       }
> +
> +       wait(&wstatus);
> +       exit_status += WEXITSTATUS(wstatus);
> +
> +       printf("Check if parent still has huge page...");
> +       if (check_huge(p))
> +               success("OK");
> +       else
> +               fail("Fail");
> +       validate_memory(p, 0, hpage_pmd_size);
> +       munmap(p, hpage_pmd_size);
> +}
> +
> +static void collapse_max_ptes_shared()
> +{
> +       int max_ptes_shared = read_num("khugepaged/max_ptes_shared");
> +       int wstatus;
> +       void *p;
> +
> +       p = alloc_mapping();
> +
> +       printf("Allocate huge page...");
> +       madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
> +       fill_memory(p, 0, hpage_pmd_size);
> +       if (check_huge(p))
> +               success("OK");
> +       else
> +               fail("Fail");
> +
> +       printf("Share huge page over fork()...");
> +       if (!fork()) {
> +               /* Do not touch settings on child exit */
> +               skip_settings_restore = true;
> +               exit_status = 0;
> +
> +               if (check_huge(p))
> +                       success("OK");
> +               else
> +                       fail("Fail");
> +
> +               printf("Trigger CoW in %d of %d...",
> +                               hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr);
> +               fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size);
> +               if (!check_huge(p))
> +                       success("OK");
> +               else
> +                       fail("Fail");
> +
> +               if (wait_for_scan("Do not collapse with max_ptes_shared exeeded", p))
> +                       fail("Timeout");
> +               else if (!check_huge(p))
> +                       success("OK");
> +               else
> +                       fail("Fail");
> +
> +               printf("Trigger CoW in %d of %d...",
> +                               hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr);
> +               fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared) * page_size);
> +               if (!check_huge(p))
> +                       success("OK");
> +               else
> +                       fail("Fail");
> +
> +
> +               if (wait_for_scan("Collapse with max_ptes_shared PTEs shared", p))
> +                       fail("Timeout");
> +               else if (check_huge(p))
> +                       success("OK");
> +               else
> +                       fail("Fail");
>
>                 validate_memory(p, 0, hpage_pmd_size);
>                 munmap(p, hpage_pmd_size);
> @@ -877,6 +958,7 @@ int main(void)
>
>         default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1;
>         default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8;
> +       default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2;
>         default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8;
>
>         save_settings();
> @@ -894,6 +976,7 @@ int main(void)
>         collapse_compound_extreme();
>         collapse_fork();
>         collapse_fork_compound();
> +       collapse_max_ptes_shared();
>
>         restore_settings(0);
>  }
> --
> 2.26.0


—
Best Regards,
Yan Zi

Download attachment "signature.asc" of type "application/pgp-signature" (855 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ