lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <bc6cdb11-41fc-486b-9c39-17254f00d751@redhat.com>
Date: Tue, 5 Aug 2025 12:55:20 +0200
From: David Hildenbrand <david@...hat.com>
To: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
Cc: "Pankaj Raghav (Samsung)" <kernel@...kajraghav.com>,
 Suren Baghdasaryan <surenb@...gle.com>, Ryan Roberts <ryan.roberts@....com>,
 Baolin Wang <baolin.wang@...ux.alibaba.com>, Borislav Petkov <bp@...en8.de>,
 Ingo Molnar <mingo@...hat.com>, "H . Peter Anvin" <hpa@...or.com>,
 Vlastimil Babka <vbabka@...e.cz>, Zi Yan <ziy@...dia.com>,
 Mike Rapoport <rppt@...nel.org>, Dave Hansen <dave.hansen@...ux.intel.com>,
 Michal Hocko <mhocko@...e.com>, Andrew Morton <akpm@...ux-foundation.org>,
 Thomas Gleixner <tglx@...utronix.de>, Nico Pache <npache@...hat.com>,
 Dev Jain <dev.jain@....com>, "Liam R . Howlett" <Liam.Howlett@...cle.com>,
 Jens Axboe <axboe@...nel.dk>, linux-kernel@...r.kernel.org,
 linux-mm@...ck.org, willy@...radead.org, x86@...nel.org,
 linux-block@...r.kernel.org, Ritesh Harjani <ritesh.list@...il.com>,
 linux-fsdevel@...r.kernel.org, "Darrick J . Wong" <djwong@...nel.org>,
 mcgrof@...nel.org, gost.dev@...sung.com, hch@....de,
 Pankaj Raghav <p.raghav@...sung.com>
Subject: Re: [PATCH 3/5] mm: add static huge zero folio


>>
>>
>> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
>> index 0ce86e14ab5e1..8e2aa18873098 100644
>> --- a/arch/x86/Kconfig
>> +++ b/arch/x86/Kconfig
>> @@ -153,6 +153,7 @@ config X86
>>   	select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP	if X86_64
>>   	select ARCH_WANT_HUGETLB_VMEMMAP_PREINIT if X86_64
>>   	select ARCH_WANTS_THP_SWAP		if X86_64
>> +	select ARCH_WANTS_STATIC_HUGE_ZERO_FOLIO if X86_64
>>   	select ARCH_HAS_PARANOID_L1D_FLUSH
>>   	select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
>>   	select BUILDTIME_TABLE_SORT
>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>> index 7748489fde1b7..ccfa5c95f14b1 100644
>> --- a/include/linux/huge_mm.h
>> +++ b/include/linux/huge_mm.h
>> @@ -495,6 +495,17 @@ static inline bool is_huge_zero_pmd(pmd_t pmd)
>>   struct folio *mm_get_huge_zero_folio(struct mm_struct *mm);
>>   void mm_put_huge_zero_folio(struct mm_struct *mm);
>> +static inline struct folio *get_static_huge_zero_folio(void)
>> +{
>> +	if (!IS_ENABLED(CONFIG_STATIC_HUGE_ZERO_FOLIO))
>> +		return NULL;
>> +
>> +	if (unlikely(!huge_zero_folio))
>> +		return NULL;
>> +
>> +	return huge_zero_folio;
>> +}
>> +
>>   static inline bool thp_migration_supported(void)
>>   {
>>   	return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION);
>> @@ -685,6 +696,11 @@ static inline int change_huge_pud(struct mmu_gather *tlb,
>>   {
>>   	return 0;
>>   }
>> +
>> +static inline struct folio *get_static_huge_zero_folio(void)
>> +{
>> +	return NULL;
>> +}
>>   #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
>>   static inline int split_folio_to_list_to_order(struct folio *folio,
>> diff --git a/mm/Kconfig b/mm/Kconfig
>> index e443fe8cd6cf2..366a6d2d771e3 100644
>> --- a/mm/Kconfig
>> +++ b/mm/Kconfig
>> @@ -823,6 +823,27 @@ config ARCH_WANT_GENERAL_HUGETLB
>>   config ARCH_WANTS_THP_SWAP
>>   	def_bool n
>> +config ARCH_WANTS_STATIC_HUGE_ZERO_FOLIO
>> +	def_bool n
>> +
>> +config STATIC_HUGE_ZERO_FOLIO
>> +	bool "Allocate a PMD sized folio for zeroing"
>> +	depends on ARCH_WANTS_STATIC_HUGE_ZERO_FOLIO && TRANSPARENT_HUGEPAGE
>> +	help
>> +	  Without this config enabled, the huge zero folio is allocated on
>> +	  demand and freed under memory pressure once no longer in use.
>> +	  To detect remaining users reliably, references to the huge zero folio
>> +	  must be tracked precisely, so it is commonly only available for mapping
>> +	  it into user page tables.
>> +
>> +	  With this config enabled, the huge zero folio can also be used
>> +	  for other purposes that do not implement precise reference counting:
>> +	  it is allocated statically and never freed, allowing for more
>> +	  wide-spread use, for example, when performing I/O similar to the
>> +	  traditional shared zeropage.
>> +
>> +	  Not suitable for memory constrained systems.
>> +
>>   config MM_ID
>>   	def_bool n
>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>> index ff06dee213eb2..f65ba3e6f0824 100644
>> --- a/mm/huge_memory.c
>> +++ b/mm/huge_memory.c
>> @@ -866,9 +866,14 @@ static int __init thp_shrinker_init(void)
>>   	huge_zero_folio_shrinker->scan_objects = shrink_huge_zero_folio_scan;
>>   	shrinker_register(huge_zero_folio_shrinker);
>> -	deferred_split_shrinker->count_objects = deferred_split_count;
>> -	deferred_split_shrinker->scan_objects = deferred_split_scan;
>> -	shrinker_register(deferred_split_shrinker);
>> +	if (IS_ENABLED(CONFIG_STATIC_HUGE_ZERO_FOLIO)) {
>> +		if (!get_huge_zero_folio())
>> +			pr_warn("Allocating static huge zero folio failed\n");
>> +	} else {
>> +		deferred_split_shrinker->count_objects = deferred_split_count;
>> +		deferred_split_shrinker->scan_objects = deferred_split_scan;
>> +		shrinker_register(deferred_split_shrinker);
>> +	}
>>   	return 0;
>>   }
>> --
>> 2.50.1
>>
>>
>> Now, one thing I do not like is that we have "ARCH_WANTS_STATIC_HUGE_ZERO_FOLIO" but
>> then have a user-selectable option.
>>
>> Should we just get rid of ARCH_WANTS_STATIC_HUGE_ZERO_FOLIO?
> 
> Yeah, though I guess we probably need to make it need CONFIG_MMU if so?
> Probably don't want to provide it if it might somehow break things?

It would still depend on THP, and THP is !MMU. So that should just work.

We could go one step further and special case in 
mm_get_huge_zero_folio() + mm_put_huge_zero_folio() on 
CONFIG_STATIC_HUGE_ZERO_FOLIO.

Something like

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9c38a95e9f091..9b87884e5f299 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -248,6 +248,9 @@ static void put_huge_zero_page(void)

  struct folio *mm_get_huge_zero_folio(struct mm_struct *mm)
  {
+       if (IS_ENABLED(CONFIG_STATIC_HUGE_ZERO_FOLIO))
+               return huge_zero_folio;
+
         if (test_bit(MMF_HUGE_ZERO_PAGE, &mm->flags))
                 return READ_ONCE(huge_zero_folio);

@@ -262,6 +265,9 @@ struct folio *mm_get_huge_zero_folio(struct 
mm_struct *mm)

  void mm_put_huge_zero_folio(struct mm_struct *mm)
  {
+       if (IS_ENABLED(CONFIG_STATIC_HUGE_ZERO_FOLIO))
+               return huge_zero_folio;
+
         if (test_bit(MMF_HUGE_ZERO_PAGE, &mm->flags))
                 put_huge_zero_page();
  }


-- 
Cheers,

David / dhildenb


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ