linux-kernel - Re: [PATCH] perf pmu intel: Adjust cpumaks for sub-NUMA clusters on Emeraldrapids

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <85202bc5-4b50-458c-8ef8-be4d22e9d939@intel.com>
Date: Thu, 22 Jan 2026 14:06:03 -0800
From: "Chen, Zide" <zide.chen@...el.com>
To: "Mi, Dapeng" <dapeng1.mi@...ux.intel.com>,
 Chun-Tse Shao <ctshao@...gle.com>, Ian Rogers <irogers@...gle.com>
Cc: Thomas Falcon <thomas.falcon@...el.com>, linux-kernel@...r.kernel.org,
 peterz@...radead.org, mingo@...hat.com, acme@...nel.org,
 namhyung@...nel.org, mark.rutland@....com,
 alexander.shishkin@...ux.intel.com, jolsa@...nel.org,
 adrian.hunter@...el.com, james.clark@...aro.org, ravi.bangoria@....com,
 linux-perf-users@...r.kernel.org
Subject: Re: [PATCH] perf pmu intel: Adjust cpumaks for sub-NUMA clusters on
 Emeraldrapids



On 1/18/2026 4:51 PM, Mi, Dapeng wrote:
> 
> On 1/14/2026 2:06 AM, Chun-Tse Shao wrote:
>> Ping.
>>
>> Thanks for your comment, Ian. To Intel team, can we get confirmation
>> of the GNR SNR2 configuration?
>>
>> -CT
>>
>> On Thu, Jan 8, 2026 at 11:19 AM Ian Rogers <irogers@...gle.com> wrote:
>>> On Thu, Jan 8, 2026 at 10:45 AM Chun-Tse Shao <ctshao@...gle.com> wrote:
>>>> Similar to GNR [1], Emeraldrapids supports sub-NUMA clusters as well.
>>>> Adjust cpumasks as the logic for GNR in [1].
>>>>
>>>> Tested on Emeraldrapids with SNC2 enabled:
>>>>   $ perf stat --per-node -e 'UNC_CHA_CLOCKTICKS,UNC_M_CLOCKTICKS' -a -- sleep 1
>>>>
>>>>    Performance counter stats for 'system wide':
>>>>
>>>>   N0       30        72125876670      UNC_CHA_CLOCKTICKS
>>>>   N0        4         8815163648      UNC_M_CLOCKTICKS
>>>>   N1       30        72124958844      UNC_CHA_CLOCKTICKS
>>>>   N1        4         8815014974      UNC_M_CLOCKTICKS
>>>>   N2       30        72121049022      UNC_CHA_CLOCKTICKS
>>>>   N2        4         8814592626      UNC_M_CLOCKTICKS
>>>>   N3       30        72117133854      UNC_CHA_CLOCKTICKS
>>>>   N3        4         8814012840      UNC_M_CLOCKTICKS
>>>>
>>>>          1.001574118 seconds time elapsed
>>>>
>>>> [1] lore.kernel.org/20250515181417.491401-1-irogers@...gle.com
>>>>
>>>> Signed-off-by: Chun-Tse Shao <ctshao@...gle.com>
>>>> ---
>>>>  tools/perf/arch/x86/util/pmu.c | 45 +++++++++++++++++++++-------------
>>>>  1 file changed, 28 insertions(+), 17 deletions(-)
>>>>
>>>> diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
>>>> index a3f96221758d..fad68a0f7b5d 100644
>>>> --- a/tools/perf/arch/x86/util/pmu.c
>>>> +++ b/tools/perf/arch/x86/util/pmu.c
>>>> @@ -22,20 +22,29 @@
>>>>  #include "util/env.h"
>>>>  #include "util/header.h"
>>>>
>>>> -static bool x86__is_intel_graniterapids(void)
>>>> +static bool x86__is_snc_supported(void)
>>>>  {
>>>> -       static bool checked_if_graniterapids;
>>>> -       static bool is_graniterapids;
>>>> +       static bool checked_if_snc_supported;
>>>> +       static bool is_supported;
>>>>
>>>> -       if (!checked_if_graniterapids) {
>>>> -               const char *graniterapids_cpuid = "GenuineIntel-6-A[DE]";
>>>> +       if (!checked_if_snc_supported) {
>>>> +
>>>> +               /* Emeraldrapids and Graniterapids support SNC configuration. */
>>>> +               static const char *const supported_cpuids[] = {
>>>> +                       "GenuineIntel-6-CF", /* Emeraldrapids */
>>>> +                       "GenuineIntel-6-A[DE]", /* Graniterapids */
>>>> +               };
>>>>                 char *cpuid = get_cpuid_str((struct perf_cpu){0});
>>>>
>>>> -               is_graniterapids = cpuid && strcmp_cpuid_str(graniterapids_cpuid, cpuid) == 0;
>>>> +               for (size_t i = 0; i < ARRAY_SIZE(supported_cpuids); i++) {
>>>> +                       is_supported = cpuid && strcmp_cpuid_str(supported_cpuids[i], cpuid) == 0;
>>>> +                       if (is_supported)
>>>> +                               break;
>>>> +               }
>>>>                 free(cpuid);
>>>> -               checked_if_graniterapids = true;
>>>> +               checked_if_snc_supported = true;
>>>>         }
>>>> -       return is_graniterapids;
>>>> +       return checked_if_snc_supported;
>>>>  }
>>>>
>>>>  static struct perf_cpu_map *read_sysfs_cpu_map(const char *sysfs_path)
>>>> @@ -64,6 +73,7 @@ static int snc_nodes_per_l3_cache(void)
>>>>                         read_sysfs_cpu_map("devices/system/cpu/cpu0/cache/index3/shared_cpu_list");
>>>>
>>>>                 snc_nodes = perf_cpu_map__nr(cache_cpus) / perf_cpu_map__nr(node_cpus);
>>>> +
>>>>                 perf_cpu_map__put(cache_cpus);
>>>>                 perf_cpu_map__put(node_cpus);
>>>>                 checked_snc = true;
>>>> @@ -137,8 +147,8 @@ static int uncore_imc_snc(struct perf_pmu *pmu)
>>>>         // Compute the IMC SNC using lookup tables.
>>>>         unsigned int imc_num;
>>>>         int snc_nodes = snc_nodes_per_l3_cache();
>>>> -       const u8 snc2_map[] = {1, 1, 0, 0, 1, 1, 0, 0};
>>>> -       const u8 snc3_map[] = {1, 1, 0, 0, 2, 2, 1, 1, 0, 0, 2, 2};
>>>> +       const u8 snc2_map[] = {0, 0, 1, 1};
>>> Does this alter the behavior on GNR? ie 1,1,0,0 vs 0,0,1,1.

It appears to break GNR SNC2. While it works for the --per-node test, it
fails the following affinity test. Testing on EMR shows that it follows
the new lookup table. Should we use a model-specific lookup table here?

# Running workload on CPU0
$ taskset -c CPU0 stress-ng --vm 1 --vm-bytes 2G --vm-method all
--timeout 30s

# Profiling UNC_M_PRE_COUNT.ALL on all IMC boxes.
$ perf stat \
        -e uncore_imc_0/event=0x03,umask=0xFF/ \
        -e uncore_imc_1/event=0x03,umask=0xFF/ \
        -e uncore_imc_2/event=0x03,umask=0xFF/ \
        -e uncore_imc_3/event=0x03,umask=0xFF/ \
        -e uncore_imc_4/event=0x03,umask=0xFF/ \
        -e uncore_imc_5/event=0x03,umask=0xFF/ \
        -e uncore_imc_6/event=0x03,umask=0xFF/ \
        -e uncore_imc_7/event=0x03,umask=0xFF/ \
        -a -I 1000

This shows that the uncore_imc_[2|3|6|7] boxes are affinitized to CPU0.

     5.013638757          1,635,470      uncore_imc_0/event=0x03,umask=0xFF/
     5.013638757          1,638,157      uncore_imc_1/event=0x03,umask=0xFF/
     5.013638757         27,093,922      uncore_imc_2/event=0x03,umask=0xFF/
     5.013638757         27,025,980      uncore_imc_3/event=0x03,umask=0xFF/
     5.013638757          1,616,974      uncore_imc_4/event=0x03,umask=0xFF/
     5.013638757          1,627,251      uncore_imc_5/event=0x03,umask=0xFF/
     5.013638757         26,854,588      uncore_imc_6/event=0x03,umask=0xFF/
     5.013638757         26,974,506      uncore_imc_7/event=0x03,umask=0xFF/

Testing with additional CPUs confirms that the original GNR SNC2 lookup
table is correct.

                        CPU                     uncore_imc box
NUMA node0 CPU(s):      0-42,344-386            2 3 6 7
NUMA node1 CPU(s):      43-85,387-429           0 1 4 5
NUMA node2 CPU(s):      86-128,430-472          2 3 6 7
NUMA node3 CPU(s):      129-171,473-515         0 1 4 5
NUMA node4 CPU(s):      172-214,516-558         2 3 6 7
NUMA node5 CPU(s):      215-257,559-601         0 1 4 5
NUMA node6 CPU(s):      258-300,602-644         2 3 6 7
NUMA node7 CPU(s):      301-343,645-687         0 1 4 5


>>> Thanks,
>>> Ian
>>>
>>>> +       const u8 snc3_map[] = {1, 1, 0, 0, 2, 2};
>>>>         const u8 *snc_map;
>>>>         size_t snc_map_len;
>>>>
>>>> @@ -161,11 +171,11 @@ static int uncore_imc_snc(struct perf_pmu *pmu)
>>>>                 pr_warning("Unexpected: unable to compute IMC number '%s'\n", pmu->name);
>>>>                 return 0;
>>>>         }
>>>> -       if (imc_num >= snc_map_len) {
>>>> +       if (imc_num >= snc_map_len * perf_cpu_map__nr(pmu->cpus)) {
>>>>                 pr_warning("Unexpected IMC %d for SNC%d mapping\n", imc_num, snc_nodes);
>>>>                 return 0;
>>>>         }
>>>> -       return snc_map[imc_num];
>>>> +       return snc_map[imc_num % snc_map_len];
>>>>  }
>>>>
>>>>  static int uncore_cha_imc_compute_cpu_adjust(int pmu_snc)
>>>> @@ -205,7 +215,7 @@ static int uncore_cha_imc_compute_cpu_adjust(int pmu_snc)
>>>>         return cpu_adjust[pmu_snc];
>>>>  }
>>>>
>>>> -static void gnr_uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool cha)
>>>> +static void uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool cha)
>>>>  {
>>>>         // With sub-NUMA clustering (SNC) there is a NUMA node per SNC in the
>>>>         // topology. For example, a two socket graniterapids machine may be set
>>>> @@ -304,11 +314,12 @@ void perf_pmu__arch_init(struct perf_pmu *pmu)
>>>>                                 pmu->mem_events = perf_mem_events_intel_aux;
>>>>                         else
>>>>                                 pmu->mem_events = perf_mem_events_intel;
>>>> -               } else if (x86__is_intel_graniterapids()) {
>>>> +               } else if (x86__is_snc_supported()) {
>>>>                         if (starts_with(pmu->name, "uncore_cha_"))
>>>> -                               gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/true);
>>>> -                       else if (starts_with(pmu->name, "uncore_imc_"))
>>>> -                               gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/false);
>>>> +                               uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/true);
>>>> +                       else if (starts_with(pmu->name, "uncore_imc_") &&
>>>> +                                !starts_with(pmu->name, "uncore_imc_free_running"))
>>>> +                               uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/false);
>>>>                 }
>>>>         }
>>>>  }
>>>> --
>>>> 2.52.0.457.g6b5491de43-goog
>>>>