lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20220105104526.GA3015@willie-the-truck>
Date:   Wed, 5 Jan 2022 10:45:26 +0000
From:   Will Deacon <will@...nel.org>
To:     Yu Zhao <yuzhao@...gle.com>
Cc:     Andrew Morton <akpm@...ux-foundation.org>,
        Linus Torvalds <torvalds@...ux-foundation.org>,
        Andi Kleen <ak@...ux.intel.com>,
        Catalin Marinas <catalin.marinas@....com>,
        Dave Hansen <dave.hansen@...ux.intel.com>,
        Hillf Danton <hdanton@...a.com>, Jens Axboe <axboe@...nel.dk>,
        Jesse Barnes <jsbarnes@...gle.com>,
        Johannes Weiner <hannes@...xchg.org>,
        Jonathan Corbet <corbet@....net>,
        Matthew Wilcox <willy@...radead.org>,
        Mel Gorman <mgorman@...e.de>,
        Michael Larabel <Michael@...haellarabel.com>,
        Michal Hocko <mhocko@...nel.org>,
        Rik van Riel <riel@...riel.com>,
        Vlastimil Babka <vbabka@...e.cz>,
        Ying Huang <ying.huang@...el.com>,
        linux-arm-kernel@...ts.infradead.org, linux-doc@...r.kernel.org,
        linux-kernel@...r.kernel.org, linux-mm@...ck.org,
        page-reclaim@...gle.com, x86@...nel.org,
        Konstantin Kharlamov <Hi-Angel@...dex.ru>
Subject: Re: [PATCH v6 1/9] mm: x86, arm64: add arch_has_hw_pte_young()

On Tue, Jan 04, 2022 at 01:22:20PM -0700, Yu Zhao wrote:
> Some architectures automatically set the accessed bit in PTEs, e.g.,
> x86 and arm64 v8.2. On architectures that don't have this capability,
> clearing the accessed bit in a PTE usually triggers a page fault
> following the TLB miss of this PTE.
> 
> Being aware of this capability can help make better decisions, e.g.,
> whether to spread the work out over a period of time to avoid bursty
> page faults when trying to clear the accessed bit in a large number of
> PTEs.
> 
> Signed-off-by: Yu Zhao <yuzhao@...gle.com>
> Tested-by: Konstantin Kharlamov <Hi-Angel@...dex.ru>
> ---
>  arch/arm64/include/asm/cpufeature.h |  5 +++++
>  arch/arm64/include/asm/pgtable.h    | 13 ++++++++-----
>  arch/arm64/kernel/cpufeature.c      | 19 +++++++++++++++++++
>  arch/arm64/tools/cpucaps            |  1 +
>  arch/x86/include/asm/pgtable.h      |  6 +++---
>  include/linux/pgtable.h             | 13 +++++++++++++
>  mm/memory.c                         | 14 +-------------
>  7 files changed, 50 insertions(+), 21 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
> index ef6be92b1921..99518b4b2a9e 100644
> --- a/arch/arm64/include/asm/cpufeature.h
> +++ b/arch/arm64/include/asm/cpufeature.h
> @@ -779,6 +779,11 @@ static inline bool system_supports_tlb_range(void)
>  		cpus_have_const_cap(ARM64_HAS_TLB_RANGE);
>  }
>  
> +static inline bool system_has_hw_af(void)
> +{
> +	return IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && cpus_have_const_cap(ARM64_HW_AF);
> +}
> +
>  extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
>  
>  static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> index c4ba047a82d2..e736f47436c7 100644
> --- a/arch/arm64/include/asm/pgtable.h
> +++ b/arch/arm64/include/asm/pgtable.h
> @@ -999,13 +999,16 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
>   * page after fork() + CoW for pfn mappings. We don't always have a
>   * hardware-managed access flag on arm64.
>   */
> -static inline bool arch_faults_on_old_pte(void)
> +static inline bool arch_has_hw_pte_young(bool local)
>  {
> -	WARN_ON(preemptible());
> +	if (local) {
> +		WARN_ON(preemptible());
> +		return cpu_has_hw_af();
> +	}
>  
> -	return !cpu_has_hw_af();
> +	return system_has_hw_af();
>  }
> -#define arch_faults_on_old_pte		arch_faults_on_old_pte
> +#define arch_has_hw_pte_young		arch_has_hw_pte_young
>  
>  /*
>   * Experimentally, it's cheap to set the access flag in hardware and we
> @@ -1013,7 +1016,7 @@ static inline bool arch_faults_on_old_pte(void)
>   */
>  static inline bool arch_wants_old_prefaulted_pte(void)
>  {
> -	return !arch_faults_on_old_pte();
> +	return arch_has_hw_pte_young(true);
>  }
>  #define arch_wants_old_prefaulted_pte	arch_wants_old_prefaulted_pte
>  
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index 6f3e677d88f1..5bb553ee2c0e 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -2171,6 +2171,25 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
>  		.matches = has_hw_dbm,
>  		.cpu_enable = cpu_enable_hw_dbm,
>  	},
> +	{
> +		/*
> +		 * __cpu_setup always enables this capability. But if the boot
> +		 * CPU has it and a late CPU doesn't, the absent
> +		 * ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU will prevent this late CPU
> +		 * from going online. There is neither known hardware does that
> +		 * nor obvious reasons to design hardware works that way, hence
> +		 * no point leaving the door open here. If the need arises, a
> +		 * new weak system feature flag should do the trick.
> +		 */
> +		.desc = "Hardware update of the Access flag",
> +		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
> +		.capability = ARM64_HW_AF,
> +		.sys_reg = SYS_ID_AA64MMFR1_EL1,
> +		.sign = FTR_UNSIGNED,
> +		.field_pos = ID_AA64MMFR1_HADBS_SHIFT,
> +		.min_field_value = 1,
> +		.matches = has_cpuid_feature,
> +	},
>  #endif
>  	{
>  		.desc = "CRC32 instructions",
> diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
> index 870c39537dd0..56e4ef5d95fa 100644
> --- a/arch/arm64/tools/cpucaps
> +++ b/arch/arm64/tools/cpucaps
> @@ -36,6 +36,7 @@ HAS_STAGE2_FWB
>  HAS_SYSREG_GIC_CPUIF
>  HAS_TLB_RANGE
>  HAS_VIRT_HOST_EXTN
> +HW_AF
>  HW_DBM
>  KVM_PROTECTED_MODE
>  MISMATCHED_CACHE_TYPE

As discussed in the previous threads, we really don't need the complexity
of the additional cap for the arm64 part. Please can you just use the
existing code instead? It's both simpler and, as you say, it's equivalent
for existing hardware.

That way, this patch just ends up being a renaming exercise and we're all
good.

Thanks,

Will

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ