lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Message-ID: <555fca66-81b6-3406-eac1-140c00669477@arm.com> Date: Thu, 26 Jan 2023 19:15:26 +0000 From: Robin Murphy <robin.murphy@....com> To: Georgi Djakov <quic_c_gdjako@...cinc.com>, catalin.marinas@....com, will@...nel.org Cc: dave.hansen@...ux.intel.com, luto@...nel.org, peterz@...radead.org, tglx@...utronix.de, mingo@...hat.com, bp@...en8.de, hpa@...or.com, hch@....de, m.szyprowski@...sung.com, linux-arm-kernel@...ts.infradead.org, iommu@...ts.linux.dev, linux-kernel@...r.kernel.org, djakov@...nel.org Subject: Re: [RFC] mm: Allow ZONE_DMA32 to be disabled via kernel command line On 2023-01-26 16:43, Georgi Djakov wrote: > From: Chris Goldsworthy <quic_cgoldswo@...cinc.com> > > It's useful to have an option to disable the ZONE_DMA32 during boot as > CONFIG_ZONE_DMA32 is by default enabled (on multiplatform kernels for > example). There are platforms that do not use this zone and in some high > memory pressure scenarios this would help on easing kswapd (to leave file > backed memory intact / unreclaimed). When the ZONE_DMA32 is enabled on > these platforms - kswapd is woken up more easily and drains the file cache > which leads to some performance issues. > > Signed-off-by: Chris Goldsworthy <quic_cgoldswo@...cinc.com> > [georgi: updated commit text] > Signed-off-by: Georgi Djakov <quic_c_gdjako@...cinc.com> > --- > The main question here is whether we can have a kernel command line > option to disable CONFIG_ZONE_DMA32 during boot (at least on arm64). > I can imagine this being useful also for Linux distros. FWIW I'd say that "disabled" and "left empty then awkwardly tiptoed around in a few places" are very different notions... However, I'm just going to take a step back and read the commit message a few more times... Given what it claims, I can't help but ask why wouldn't we want a parameter to control kswapd's behaviour and address that issue directly, rather than a massive hammer that breaks everyone allocating explicitly or implicitly with __GFP_DMA32 (especially on systems where it doesn't normally matter because all memory is below 4GB anyway), just to achieve one rather niche side-effect? Thanks, Robin. > .../admin-guide/kernel-parameters.txt | 5 +++++ > arch/arm64/mm/init.c | 20 ++++++++++++++++- > arch/x86/mm/init.c | 20 ++++++++++++++++- > include/linux/dma-direct.h | 22 +++++++++++++++++++ > kernel/dma/direct.c | 5 +++-- > kernel/dma/pool.c | 8 +++---- > 6 files changed, 72 insertions(+), 8 deletions(-) > > diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt > index cb12df402161..854ff65ac6b0 100644 > --- a/Documentation/admin-guide/kernel-parameters.txt > +++ b/Documentation/admin-guide/kernel-parameters.txt > @@ -1070,6 +1070,11 @@ > Disable Dynamic DMA Window support. Use this > to workaround buggy firmware. > > + disable_dma32= [KNL] > + Dynamically disable ZONE_DMA32 on kernels compiled with > + CONFIG_ZONE_DMA32=y. > + > + > disable_ipv6= [IPV6] > See Documentation/networking/ipv6.rst. > > diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c > index 58a0bb2c17f1..1a56098c0e19 100644 > --- a/arch/arm64/mm/init.c > +++ b/arch/arm64/mm/init.c > @@ -118,6 +118,12 @@ static int __init reserve_crashkernel_low(unsigned long long low_size) > return 0; > } > > +/* > + * Provide a run-time mean of disabling ZONE_DMA32 if it is enabled via > + * CONFIG_ZONE_DMA32. > + */ > +static bool disable_dma32 __ro_after_init; > + > /* > * reserve_crashkernel() - reserves memory for crash kernel > * > @@ -244,7 +250,7 @@ static void __init zone_sizes_init(void) > max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit); > #endif > #ifdef CONFIG_ZONE_DMA32 > - max_zone_pfns[ZONE_DMA32] = PFN_DOWN(dma32_phys_limit); > + max_zone_pfns[ZONE_DMA32] = disable_dma32 ? 0 : PFN_DOWN(dma32_phys_limit); > if (!arm64_dma_phys_limit) > arm64_dma_phys_limit = dma32_phys_limit; > #endif > @@ -253,6 +259,18 @@ static void __init zone_sizes_init(void) > free_area_init(max_zone_pfns); > } > > +static int __init early_disable_dma32(char *buf) > +{ > + if (!buf) > + return -EINVAL; > + > + if (!strcmp(buf, "on")) > + disable_dma32 = true; > + > + return 0; > +} > +early_param("disable_dma32", early_disable_dma32); > + > int pfn_is_map_memory(unsigned long pfn) > { > phys_addr_t addr = PFN_PHYS(pfn); > diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c > index cb258f58fdc8..b8af7e2f21f5 100644 > --- a/arch/x86/mm/init.c > +++ b/arch/x86/mm/init.c > @@ -112,6 +112,12 @@ static unsigned long min_pfn_mapped; > > static bool __initdata can_use_brk_pgt = true; > > +/* > + * Provide a run-time mean of disabling ZONE_DMA32 if it is enabled via > + * CONFIG_ZONE_DMA32. > + */ > +static bool disable_dma32 __ro_after_init; > + > /* > * Pages returned are already directly mapped. > * > @@ -1032,7 +1038,7 @@ void __init zone_sizes_init(void) > max_zone_pfns[ZONE_DMA] = min(MAX_DMA_PFN, max_low_pfn); > #endif > #ifdef CONFIG_ZONE_DMA32 > - max_zone_pfns[ZONE_DMA32] = min(MAX_DMA32_PFN, max_low_pfn); > + max_zone_pfns[ZONE_DMA32] = disable_dma32 ? 0 : min(MAX_DMA32_PFN, max_low_pfn); > #endif > max_zone_pfns[ZONE_NORMAL] = max_low_pfn; > #ifdef CONFIG_HIGHMEM > @@ -1042,6 +1048,18 @@ void __init zone_sizes_init(void) > free_area_init(max_zone_pfns); > } > > +static int __init early_disable_dma32(char *buf) > +{ > + if (!buf) > + return -EINVAL; > + > + if (!strcmp(buf, "on")) > + disable_dma32 = true; > + > + return 0; > +} > +early_param("disable_dma32", early_disable_dma32); > + > __visible DEFINE_PER_CPU_ALIGNED(struct tlb_state, cpu_tlbstate) = { > .loaded_mm = &init_mm, > .next_asid = 1, > diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h > index 18aade195884..ed69618cf1fc 100644 > --- a/include/linux/dma-direct.h > +++ b/include/linux/dma-direct.h > @@ -24,6 +24,28 @@ struct bus_dma_region { > u64 offset; > }; > > +static inline bool zone_dma32_is_empty(int node) > +{ > +#ifdef CONFIG_ZONE_DMA32 > + pg_data_t *pgdat = NODE_DATA(node); > + > + return zone_is_empty(&pgdat->node_zones[ZONE_DMA32]); > +#else > + return true; > +#endif > +} > + > +static inline bool zone_dma32_are_empty(void) > +{ > + int node; > + > + for_each_node(node) > + if (!zone_dma32_is_empty(node)) > + return false; > + > + return true; > +} > + > static inline dma_addr_t translate_phys_to_dma(struct device *dev, > phys_addr_t paddr) > { > diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c > index 63859a101ed8..754210c65658 100644 > --- a/kernel/dma/direct.c > +++ b/kernel/dma/direct.c > @@ -60,7 +60,7 @@ static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, > *phys_limit = dma_to_phys(dev, dma_limit); > if (*phys_limit <= DMA_BIT_MASK(zone_dma_bits)) > return GFP_DMA; > - if (*phys_limit <= DMA_BIT_MASK(32)) > + if (*phys_limit <= DMA_BIT_MASK(32) && !zone_dma32_is_empty(dev_to_node(dev))) > return GFP_DMA32; > return 0; > } > @@ -145,7 +145,8 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, > > if (IS_ENABLED(CONFIG_ZONE_DMA32) && > phys_limit < DMA_BIT_MASK(64) && > - !(gfp & (GFP_DMA32 | GFP_DMA))) { > + !(gfp & (GFP_DMA32 | GFP_DMA)) && > + !zone_dma32_is_empty(node)) { > gfp |= GFP_DMA32; > goto again; > } > diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c > index 4d40dcce7604..8e79903fbda8 100644 > --- a/kernel/dma/pool.c > +++ b/kernel/dma/pool.c > @@ -71,7 +71,7 @@ static bool cma_in_zone(gfp_t gfp) > end = cma_get_base(cma) + size - 1; > if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp & GFP_DMA)) > return end <= DMA_BIT_MASK(zone_dma_bits); > - if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32)) > + if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32) && !zone_dma32_are_empty()) > return end <= DMA_BIT_MASK(32); > return true; > } > @@ -153,7 +153,7 @@ static void atomic_pool_work_fn(struct work_struct *work) > if (IS_ENABLED(CONFIG_ZONE_DMA)) > atomic_pool_resize(atomic_pool_dma, > GFP_KERNEL | GFP_DMA); > - if (IS_ENABLED(CONFIG_ZONE_DMA32)) > + if (IS_ENABLED(CONFIG_ZONE_DMA32) && !zone_dma32_are_empty()) > atomic_pool_resize(atomic_pool_dma32, > GFP_KERNEL | GFP_DMA32); > atomic_pool_resize(atomic_pool_kernel, GFP_KERNEL); > @@ -209,7 +209,7 @@ static int __init dma_atomic_pool_init(void) > if (!atomic_pool_dma) > ret = -ENOMEM; > } > - if (IS_ENABLED(CONFIG_ZONE_DMA32)) { > + if (IS_ENABLED(CONFIG_ZONE_DMA32) && !zone_dma32_are_empty()) { > atomic_pool_dma32 = __dma_atomic_pool_init(atomic_pool_size, > GFP_KERNEL | GFP_DMA32); > if (!atomic_pool_dma32) > @@ -224,7 +224,7 @@ postcore_initcall(dma_atomic_pool_init); > static inline struct gen_pool *dma_guess_pool(struct gen_pool *prev, gfp_t gfp) > { > if (prev == NULL) { > - if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32)) > + if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32) && !zone_dma32_are_empty()) > return atomic_pool_dma32; > if (atomic_pool_dma && (gfp & GFP_DMA)) > return atomic_pool_dma;
Powered by blists - more mailing lists