lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CALs-HssKav=+P_w5VuoZZU3xkut6FxMUpdpd5HSa5iPMRDu=5A@mail.gmail.com>
Date: Wed, 14 Feb 2024 09:24:16 -0500
From: Evan Green <evan@...osinc.com>
To: Charlie Jenkins <charlie@...osinc.com>
Cc: Paul Walmsley <paul.walmsley@...ive.com>, Palmer Dabbelt <palmer@...belt.com>, 
	Albert Ou <aou@...s.berkeley.edu>, Jisheng Zhang <jszhang@...nel.org>, 
	Clément Léger <cleger@...osinc.com>, 
	Eric Biggers <ebiggers@...nel.org>, Elliot Berman <quic_eberman@...cinc.com>, 
	linux-riscv@...ts.infradead.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v3 2/2] riscv: Disable misaligned access probe when CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS

On Fri, Feb 2, 2024 at 6:54 PM Charlie Jenkins <charlie@...osinc.com> wrote:
>
> When CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is selected, the cpus can be
> set to have fast misaligned access without needing to probe.
>
> To avoid some ifdefs, move unalignment probing code into its own file
> and make CONFIG_RISCV_MISALIGNED depend on
> CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
>
> Signed-off-by: Charlie Jenkins <charlie@...osinc.com>

One nit below, but feel free to add my tag on the next spin:

Reviewed-by: Evan Green <evan@...osinc.com>

> ---
>  arch/riscv/Kconfig                          |   1 +
>  arch/riscv/include/asm/cpufeature.h         |   7 +
>  arch/riscv/kernel/Makefile                  |   3 +
>  arch/riscv/kernel/cpufeature.c              | 255 --------------------------
>  arch/riscv/kernel/misaligned_access_speed.c | 265 ++++++++++++++++++++++++++++
>  arch/riscv/kernel/sys_hwprobe.c             |   4 +
>  6 files changed, 280 insertions(+), 255 deletions(-)
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index bffbd869a068..3223d2d08f74 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -690,6 +690,7 @@ config THREAD_SIZE_ORDER
>  config RISCV_MISALIGNED
>         bool "Support misaligned load/store traps for kernel and userspace"
>         select SYSCTL_ARCH_UNALIGN_ALLOW
> +       depends on !HAVE_EFFICIENT_UNALIGNED_ACCESS
>         default y
>         help
>           Say Y here if you want the kernel to embed support for misaligned
> diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
> index eb3ac304fc42..44734e5169b1 100644
> --- a/arch/riscv/include/asm/cpufeature.h
> +++ b/arch/riscv/include/asm/cpufeature.h
> @@ -51,6 +51,12 @@ static inline bool check_unaligned_access_emulated(int cpu)
>  static inline void unaligned_emulation_finish(void) {}
>  #endif
>
> +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> +static __always_inline bool has_fast_misaligned_accesses(void)
> +{
> +       return true;
> +}
> +#else
>  DECLARE_PER_CPU(long, misaligned_access_speed);
>
>  DECLARE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
> @@ -59,6 +65,7 @@ static __always_inline bool has_fast_misaligned_accesses(void)
>  {
>         return static_branch_likely(&fast_misaligned_access_speed_key);
>  }
> +#endif
>
>  unsigned long riscv_get_elf_hwcap(void);
>
> diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
> index f71910718053..ffba5ecf12c2 100644
> --- a/arch/riscv/kernel/Makefile
> +++ b/arch/riscv/kernel/Makefile
> @@ -62,6 +62,9 @@ obj-y += tests/
>  obj-$(CONFIG_MMU) += vdso.o vdso/
>
>  obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o
> +ifneq ($(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS), y)
> +obj-y  += misaligned_access_speed.o

copy-unaligned.o can also go in here (those are the assembly copy
routines used only by the probing code).

> +endif
>  obj-$(CONFIG_FPU)              += fpu.o
>  obj-$(CONFIG_RISCV_ISA_V)      += vector.o
>  obj-$(CONFIG_RISCV_ISA_V)      += kernel_mode_vector.o
> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> index 89920f84d0a3..319670af5704 100644
> --- a/arch/riscv/kernel/cpufeature.c
> +++ b/arch/riscv/kernel/cpufeature.c
> @@ -11,7 +11,6 @@
>  #include <linux/cpu.h>
>  #include <linux/cpuhotplug.h>
>  #include <linux/ctype.h>
> -#include <linux/jump_label.h>
>  #include <linux/log2.h>
>  #include <linux/memory.h>
>  #include <linux/module.h>
> @@ -21,20 +20,12 @@
>  #include <asm/cacheflush.h>
>  #include <asm/cpufeature.h>
>  #include <asm/hwcap.h>
> -#include <asm/hwprobe.h>
>  #include <asm/patch.h>
>  #include <asm/processor.h>
>  #include <asm/vector.h>
>
> -#include "copy-unaligned.h"
> -
>  #define NUM_ALPHA_EXTS ('z' - 'a' + 1)
>
> -#define MISALIGNED_ACCESS_JIFFIES_LG2 1
> -#define MISALIGNED_BUFFER_SIZE 0x4000
> -#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE)
> -#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
> -
>  unsigned long elf_hwcap __read_mostly;
>
>  /* Host ISA bitmap */
> @@ -43,11 +34,6 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
>  /* Per-cpu ISA extensions. */
>  struct riscv_isainfo hart_isa[NR_CPUS];
>
> -/* Performance information */
> -DEFINE_PER_CPU(long, misaligned_access_speed);
> -
> -static cpumask_t fast_misaligned_access;
> -
>  /**
>   * riscv_isa_extension_base() - Get base extension word
>   *
> @@ -706,247 +692,6 @@ unsigned long riscv_get_elf_hwcap(void)
>         return hwcap;
>  }
>
> -static int check_unaligned_access(void *param)
> -{
> -       int cpu = smp_processor_id();
> -       u64 start_cycles, end_cycles;
> -       u64 word_cycles;
> -       u64 byte_cycles;
> -       int ratio;
> -       unsigned long start_jiffies, now;
> -       struct page *page = param;
> -       void *dst;
> -       void *src;
> -       long speed = RISCV_HWPROBE_MISALIGNED_SLOW;
> -
> -       if (check_unaligned_access_emulated(cpu))
> -               return 0;
> -
> -       /* Make an unaligned destination buffer. */
> -       dst = (void *)((unsigned long)page_address(page) | 0x1);
> -       /* Unalign src as well, but differently (off by 1 + 2 = 3). */
> -       src = dst + (MISALIGNED_BUFFER_SIZE / 2);
> -       src += 2;
> -       word_cycles = -1ULL;
> -       /* Do a warmup. */
> -       __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
> -       preempt_disable();
> -       start_jiffies = jiffies;
> -       while ((now = jiffies) == start_jiffies)
> -               cpu_relax();
> -
> -       /*
> -        * For a fixed amount of time, repeatedly try the function, and take
> -        * the best time in cycles as the measurement.
> -        */
> -       while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
> -               start_cycles = get_cycles64();
> -               /* Ensure the CSR read can't reorder WRT to the copy. */
> -               mb();
> -               __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
> -               /* Ensure the copy ends before the end time is snapped. */
> -               mb();
> -               end_cycles = get_cycles64();
> -               if ((end_cycles - start_cycles) < word_cycles)
> -                       word_cycles = end_cycles - start_cycles;
> -       }
> -
> -       byte_cycles = -1ULL;
> -       __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
> -       start_jiffies = jiffies;
> -       while ((now = jiffies) == start_jiffies)
> -               cpu_relax();
> -
> -       while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
> -               start_cycles = get_cycles64();
> -               mb();
> -               __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
> -               mb();
> -               end_cycles = get_cycles64();
> -               if ((end_cycles - start_cycles) < byte_cycles)
> -                       byte_cycles = end_cycles - start_cycles;
> -       }
> -
> -       preempt_enable();
> -
> -       /* Don't divide by zero. */
> -       if (!word_cycles || !byte_cycles) {
> -               pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n",
> -                       cpu);
> -
> -               return 0;
> -       }
> -
> -       if (word_cycles < byte_cycles)
> -               speed = RISCV_HWPROBE_MISALIGNED_FAST;
> -
> -       ratio = div_u64((byte_cycles * 100), word_cycles);
> -       pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n",
> -               cpu,
> -               ratio / 100,
> -               ratio % 100,
> -               (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow");
> -
> -       per_cpu(misaligned_access_speed, cpu) = speed;
> -
> -       /*
> -        * Set the value of fast_misaligned_access of a CPU. These operations
> -        * are atomic to avoid race conditions.
> -        */
> -       if (speed == RISCV_HWPROBE_MISALIGNED_FAST)
> -               cpumask_set_cpu(cpu, &fast_misaligned_access);
> -       else
> -               cpumask_clear_cpu(cpu, &fast_misaligned_access);
> -
> -       return 0;
> -}
> -
> -static void check_unaligned_access_nonboot_cpu(void *param)
> -{
> -       unsigned int cpu = smp_processor_id();
> -       struct page **pages = param;
> -
> -       if (smp_processor_id() != 0)
> -               check_unaligned_access(pages[cpu]);
> -}
> -
> -DEFINE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
> -
> -static void modify_unaligned_access_branches(cpumask_t *mask, int weight)
> -{
> -       if (cpumask_weight(mask) == weight)
> -               static_branch_enable_cpuslocked(&fast_misaligned_access_speed_key);
> -       else
> -               static_branch_disable_cpuslocked(&fast_misaligned_access_speed_key);
> -}
> -
> -static void set_unaligned_access_static_branches_except_cpu(int cpu)
> -{
> -       /*
> -        * Same as set_unaligned_access_static_branches, except excludes the
> -        * given CPU from the result. When a CPU is hotplugged into an offline
> -        * state, this function is called before the CPU is set to offline in
> -        * the cpumask, and thus the CPU needs to be explicitly excluded.
> -        */
> -
> -       cpumask_t fast_except_me;
> -
> -       cpumask_and(&fast_except_me, &fast_misaligned_access, cpu_online_mask);
> -       cpumask_clear_cpu(cpu, &fast_except_me);
> -
> -       modify_unaligned_access_branches(&fast_except_me, num_online_cpus() - 1);
> -}
> -
> -static void set_unaligned_access_static_branches(void)
> -{
> -       /*
> -        * This will be called after check_unaligned_access_all_cpus so the
> -        * result of unaligned access speed for all CPUs will be available.
> -        *
> -        * To avoid the number of online cpus changing between reading
> -        * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be
> -        * held before calling this function.
> -        */
> -
> -       cpumask_t fast_and_online;
> -
> -       cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask);
> -
> -       modify_unaligned_access_branches(&fast_and_online, num_online_cpus());
> -}
> -
> -static int lock_and_set_unaligned_access_static_branch(void)
> -{
> -       cpus_read_lock();
> -       set_unaligned_access_static_branches();
> -       cpus_read_unlock();
> -
> -       return 0;
> -}
> -
> -arch_initcall_sync(lock_and_set_unaligned_access_static_branch);
> -
> -static int riscv_online_cpu(unsigned int cpu)
> -{
> -       static struct page *buf;
> -
> -       /* We are already set since the last check */
> -       if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
> -               goto exit;
> -
> -       buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
> -       if (!buf) {
> -               pr_warn("Allocation failure, not measuring misaligned performance\n");
> -               return -ENOMEM;
> -       }
> -
> -       check_unaligned_access(buf);
> -       __free_pages(buf, MISALIGNED_BUFFER_ORDER);
> -
> -exit:
> -       set_unaligned_access_static_branches();
> -
> -       return 0;
> -}
> -
> -static int riscv_offline_cpu(unsigned int cpu)
> -{
> -       set_unaligned_access_static_branches_except_cpu(cpu);
> -
> -       return 0;
> -}
> -
> -/* Measure unaligned access on all CPUs present at boot in parallel. */
> -static int check_unaligned_access_all_cpus(void)
> -{
> -       unsigned int cpu;
> -       unsigned int cpu_count = num_possible_cpus();
> -       struct page **bufs = kzalloc(cpu_count * sizeof(struct page *),
> -                                    GFP_KERNEL);
> -
> -       if (!bufs) {
> -               pr_warn("Allocation failure, not measuring misaligned performance\n");
> -               return 0;
> -       }
> -
> -       /*
> -        * Allocate separate buffers for each CPU so there's no fighting over
> -        * cache lines.
> -        */
> -       for_each_cpu(cpu, cpu_online_mask) {
> -               bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
> -               if (!bufs[cpu]) {
> -                       pr_warn("Allocation failure, not measuring misaligned performance\n");
> -                       goto out;
> -               }
> -       }
> -
> -       /* Check everybody except 0, who stays behind to tend jiffies. */
> -       on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1);
> -
> -       /* Check core 0. */
> -       smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
> -
> -       /*
> -        * Setup hotplug callbacks for any new CPUs that come online or go
> -        * offline.
> -        */
> -       cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
> -                                 riscv_online_cpu, riscv_offline_cpu);
> -
> -out:
> -       unaligned_emulation_finish();
> -       for_each_cpu(cpu, cpu_online_mask) {
> -               if (bufs[cpu])
> -                       __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER);
> -       }
> -
> -       kfree(bufs);
> -       return 0;
> -}
> -
> -arch_initcall(check_unaligned_access_all_cpus);
> -
>  void riscv_user_isa_enable(void)
>  {
>         if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICBOZ))
> diff --git a/arch/riscv/kernel/misaligned_access_speed.c b/arch/riscv/kernel/misaligned_access_speed.c
> new file mode 100644
> index 000000000000..b725c07dd1af
> --- /dev/null
> +++ b/arch/riscv/kernel/misaligned_access_speed.c
> @@ -0,0 +1,265 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright 2024 Rivos Inc.
> + */
> +
> +#include <linux/cpu.h>
> +#include <linux/cpumask.h>
> +#include <linux/jump_label.h>
> +#include <linux/mm.h>
> +#include <linux/smp.h>
> +#include <linux/types.h>
> +#include <asm/cpufeature.h>
> +#include <asm/hwprobe.h>
> +
> +#include "copy-unaligned.h"
> +
> +#define MISALIGNED_ACCESS_JIFFIES_LG2 1
> +#define MISALIGNED_BUFFER_SIZE 0x4000
> +#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE)
> +#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
> +
> +DEFINE_PER_CPU(long, misaligned_access_speed);
> +
> +static cpumask_t fast_misaligned_access;
> +
> +static int check_unaligned_access(void *param)
> +{
> +       int cpu = smp_processor_id();
> +       u64 start_cycles, end_cycles;
> +       u64 word_cycles;
> +       u64 byte_cycles;
> +       int ratio;
> +       unsigned long start_jiffies, now;
> +       struct page *page = param;
> +       void *dst;
> +       void *src;
> +       long speed = RISCV_HWPROBE_MISALIGNED_SLOW;
> +
> +       if (check_unaligned_access_emulated(cpu))
> +               return 0;
> +
> +       /* Make an unaligned destination buffer. */
> +       dst = (void *)((unsigned long)page_address(page) | 0x1);
> +       /* Unalign src as well, but differently (off by 1 + 2 = 3). */
> +       src = dst + (MISALIGNED_BUFFER_SIZE / 2);
> +       src += 2;
> +       word_cycles = -1ULL;
> +       /* Do a warmup. */
> +       __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
> +       preempt_disable();
> +       start_jiffies = jiffies;
> +       while ((now = jiffies) == start_jiffies)
> +               cpu_relax();
> +
> +       /*
> +        * For a fixed amount of time, repeatedly try the function, and take
> +        * the best time in cycles as the measurement.
> +        */
> +       while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
> +               start_cycles = get_cycles64();
> +               /* Ensure the CSR read can't reorder WRT to the copy. */
> +               mb();
> +               __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
> +               /* Ensure the copy ends before the end time is snapped. */
> +               mb();
> +               end_cycles = get_cycles64();
> +               if ((end_cycles - start_cycles) < word_cycles)
> +                       word_cycles = end_cycles - start_cycles;
> +       }
> +
> +       byte_cycles = -1ULL;
> +       __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
> +       start_jiffies = jiffies;
> +       while ((now = jiffies) == start_jiffies)
> +               cpu_relax();
> +
> +       while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
> +               start_cycles = get_cycles64();
> +               mb();
> +               __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
> +               mb();
> +               end_cycles = get_cycles64();
> +               if ((end_cycles - start_cycles) < byte_cycles)
> +                       byte_cycles = end_cycles - start_cycles;
> +       }
> +
> +       preempt_enable();
> +
> +       /* Don't divide by zero. */
> +       if (!word_cycles || !byte_cycles) {
> +               pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n",
> +                       cpu);
> +
> +               return 0;
> +       }
> +
> +       if (word_cycles < byte_cycles)
> +               speed = RISCV_HWPROBE_MISALIGNED_FAST;
> +
> +       ratio = div_u64((byte_cycles * 100), word_cycles);
> +       pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n",
> +               cpu,
> +               ratio / 100,
> +               ratio % 100,
> +               (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow");
> +
> +       per_cpu(misaligned_access_speed, cpu) = speed;
> +
> +       /*
> +        * Set the value of fast_misaligned_access of a CPU. These operations
> +        * are atomic to avoid race conditions.
> +        */
> +       if (speed == RISCV_HWPROBE_MISALIGNED_FAST)
> +               cpumask_set_cpu(cpu, &fast_misaligned_access);
> +       else
> +               cpumask_clear_cpu(cpu, &fast_misaligned_access);
> +
> +       return 0;
> +}
> +
> +static void check_unaligned_access_nonboot_cpu(void *param)
> +{
> +       unsigned int cpu = smp_processor_id();
> +       struct page **pages = param;
> +
> +       if (smp_processor_id() != 0)
> +               check_unaligned_access(pages[cpu]);
> +}
> +
> +DEFINE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
> +
> +static void modify_unaligned_access_branches(cpumask_t *mask, int weight)
> +{
> +       if (cpumask_weight(mask) == weight)
> +               static_branch_enable_cpuslocked(&fast_misaligned_access_speed_key);
> +       else
> +               static_branch_disable_cpuslocked(&fast_misaligned_access_speed_key);
> +}
> +
> +static void set_unaligned_access_static_branches_except_cpu(int cpu)
> +{
> +       /*
> +        * Same as set_unaligned_access_static_branches, except excludes the
> +        * given CPU from the result. When a CPU is hotplugged into an offline
> +        * state, this function is called before the CPU is set to offline in
> +        * the cpumask, and thus the CPU needs to be explicitly excluded.
> +        */
> +
> +       cpumask_t fast_except_me;
> +
> +       cpumask_and(&fast_except_me, &fast_misaligned_access, cpu_online_mask);
> +       cpumask_clear_cpu(cpu, &fast_except_me);
> +
> +       modify_unaligned_access_branches(&fast_except_me, num_online_cpus() - 1);
> +}
> +
> +static void set_unaligned_access_static_branches(void)
> +{
> +       /*
> +        * This will be called after check_unaligned_access_all_cpus so the
> +        * result of unaligned access speed for all CPUs will be available.
> +        *
> +        * To avoid the number of online cpus changing between reading
> +        * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be
> +        * held before calling this function.
> +        */
> +
> +       cpumask_t fast_and_online;
> +
> +       cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask);
> +
> +       modify_unaligned_access_branches(&fast_and_online, num_online_cpus());
> +}
> +
> +static int lock_and_set_unaligned_access_static_branch(void)
> +{
> +       cpus_read_lock();
> +       set_unaligned_access_static_branches();
> +       cpus_read_unlock();
> +
> +       return 0;
> +}
> +
> +arch_initcall_sync(lock_and_set_unaligned_access_static_branch);
> +
> +static int riscv_online_cpu(unsigned int cpu)
> +{
> +       static struct page *buf;
> +
> +       /* We are already set since the last check */
> +       if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
> +               goto exit;
> +
> +       buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
> +       if (!buf) {
> +               pr_warn("Allocation failure, not measuring misaligned performance\n");
> +               return -ENOMEM;
> +       }
> +
> +       check_unaligned_access(buf);
> +       __free_pages(buf, MISALIGNED_BUFFER_ORDER);
> +
> +exit:
> +       set_unaligned_access_static_branches();
> +
> +       return 0;
> +}
> +
> +static int riscv_offline_cpu(unsigned int cpu)
> +{
> +       set_unaligned_access_static_branches_except_cpu(cpu);
> +
> +       return 0;
> +}
> +
> +/* Measure unaligned access on all CPUs present at boot in parallel. */
> +static int check_unaligned_access_all_cpus(void)
> +{
> +       unsigned int cpu;
> +       unsigned int cpu_count = num_possible_cpus();
> +       struct page **bufs = kzalloc(cpu_count * sizeof(struct page *),
> +                                    GFP_KERNEL);
> +
> +       if (!bufs) {
> +               pr_warn("Allocation failure, not measuring misaligned performance\n");
> +               return 0;
> +       }
> +
> +       /*
> +        * Allocate separate buffers for each CPU so there's no fighting over
> +        * cache lines.
> +        */
> +       for_each_cpu(cpu, cpu_online_mask) {
> +               bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
> +               if (!bufs[cpu]) {
> +                       pr_warn("Allocation failure, not measuring misaligned performance\n");
> +                       goto out;
> +               }
> +       }
> +
> +       /* Check everybody except 0, who stays behind to tend jiffies. */
> +       on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1);
> +
> +       /* Check core 0. */
> +       smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
> +
> +       /*
> +        * Setup hotplug callbacks for any new CPUs that come online or go
> +        * offline.
> +        */
> +       cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
> +                                 riscv_online_cpu, riscv_offline_cpu);
> +
> +out:
> +       unaligned_emulation_finish();
> +       for_each_cpu(cpu, cpu_online_mask) {
> +               if (bufs[cpu])
> +                       __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER);
> +       }
> +
> +       kfree(bufs);
> +       return 0;
> +}
> +
> +arch_initcall(check_unaligned_access_all_cpus);
> diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
> index a7c56b41efd2..d9bd24776a3e 100644
> --- a/arch/riscv/kernel/sys_hwprobe.c
> +++ b/arch/riscv/kernel/sys_hwprobe.c
> @@ -149,6 +149,9 @@ static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext)
>
>  static u64 hwprobe_misaligned(const struct cpumask *cpus)
>  {
> +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> +       return RISCV_HWPROBE_MISALIGNED_FAST;
> +#else

Ok, two nits, this is a little clunky with the ifdef inside the
function body. The pattern I see more often is the ifdef/else each
containing a separate function definition. That would work nicely
here.

>         int cpu;
>         u64 perf = -1ULL;
>
> @@ -168,6 +171,7 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus)
>                 return RISCV_HWPROBE_MISALIGNED_UNKNOWN;
>
>         return perf;
> +#endif
>  }
>
>  static void hwprobe_one_pair(struct riscv_hwprobe *pair,
>
> --
> 2.43.0
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ