[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <a67f01b7-de0a-4ada-b3b0-59b31e330e8c@intel.com>
Date: Tue, 5 Nov 2024 09:08:44 -0800
From: Reinette Chatre <reinette.chatre@...el.com>
To: Maciej Wieczor-Retman <maciej.wieczor-retman@...el.com>,
<fenghua.yu@...el.com>, <shuah@...nel.org>
CC: <linux-kernel@...r.kernel.org>, <linux-kselftest@...r.kernel.org>,
<ilpo.jarvinen@...ux.intel.com>, <tony.luck@...el.com>
Subject: Re: [PATCH v5 1/2] selftests/resctrl: Adjust effective L3 cache size
with SNC enabled
Hi Maciej,
On 10/29/24 6:00 AM, Maciej Wieczor-Retman wrote:
> Sub-NUMA Cluster divides CPUs sharing an L3 cache into separate NUMA
> nodes. Systems may support splitting into either two, three or four
> nodes.
>
> When SNC mode is enabled the effective amount of L3 cache available
> for allocation is divided by the number of nodes per L3.
>
> Detect which SNC mode is active by comparing the number of CPUs
> that share a cache with CPU0, with the number of CPUs on node0.
>
> Co-developed-by: Tony Luck <tony.luck@...el.com>
> Signed-off-by: Tony Luck <tony.luck@...el.com>
> Signed-off-by: Maciej Wieczor-Retman <maciej.wieczor-retman@...el.com>
> ---
...
> diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
> index 2dda56084588..851b37c9c38a 100644
> --- a/tools/testing/selftests/resctrl/resctrl.h
> +++ b/tools/testing/selftests/resctrl/resctrl.h
> @@ -11,6 +11,7 @@
> #include <signal.h>
> #include <dirent.h>
> #include <stdbool.h>
> +#include <ctype.h>
> #include <sys/stat.h>
> #include <sys/ioctl.h>
> #include <sys/mount.h>
> @@ -43,6 +44,8 @@
>
> #define DEFAULT_SPAN (250 * MB)
>
> +#define MAX_SNC 4
> +
> /*
> * user_params: User supplied parameters
> * @cpu: CPU number to which the benchmark will be bound to
> @@ -120,6 +123,7 @@ extern volatile int *value_sink;
>
> extern char llc_occup_path[1024];
>
> +int snc_nodes_per_l3_cache(void);
> int get_vendor(void);
> bool check_resctrlfs_support(void);
> int filter_dmesg(void);
> diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
> index ecbb7605a981..4b84d6199a36 100644
> --- a/tools/testing/selftests/resctrl/resctrl_tests.c
> +++ b/tools/testing/selftests/resctrl/resctrl_tests.c
> @@ -118,11 +118,17 @@ static bool test_vendor_specific_check(const struct resctrl_test *test)
>
> static void run_single_test(const struct resctrl_test *test, const struct user_params *uparams)
> {
> - int ret;
> + int ret, snc_mode;
>
> if (test->disabled)
> return;
>
> + snc_mode = snc_nodes_per_l3_cache();
> + if (snc_mode > 1)
> + ksft_print_msg("SNC-%d mode discovered.\n", snc_mode);
> + else if (snc_unreliable)
> + ksft_print_msg("SNC detection unreliable due to offline CPUs. Test results may not be accurate if SNC enabled.\n");
As I understand none of the tests can be considered reliable if SNC detection is unreliable
so skipping the test can be done here in a central spot without duplicating it in each test.
> +
> if (!test_vendor_specific_check(test)) {
> ksft_test_result_skip("Hardware does not support %s\n", test->name);
> return;
> diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
> index 250c320349a7..d6384f404d95 100644
> --- a/tools/testing/selftests/resctrl/resctrlfs.c
> +++ b/tools/testing/selftests/resctrl/resctrlfs.c
> @@ -13,6 +13,8 @@
>
> #include "resctrl.h"
>
> +int snc_unreliable;
> +
> static int find_resctrl_mount(char *buffer)
> {
> FILE *mounts;
> @@ -156,6 +158,93 @@ int get_domain_id(const char *resource, int cpu_no, int *domain_id)
> return 0;
> }
>
> +/*
> + * Count number of CPUs in a /sys bitmap
> + */
> +static unsigned int count_sys_bitmap_bits(char *name)
> +{
> + FILE *fp = fopen(name, "r");
> + int count = 0, c;
> +
> + if (!fp)
> + return 0;
> +
> + while ((c = fgetc(fp)) != EOF) {
> + if (!isxdigit(c))
> + continue;
> + switch (c) {
> + case 'f':
> + count++;
> + case '7': case 'b': case 'd': case 'e':
> + count++;
> + case '3': case '5': case '6': case '9': case 'a': case 'c':
> + count++;
> + case '1': case '2': case '4': case '8':
> + count++;
> + }
> + }
> + fclose(fp);
> +
> + return count;
> +}
> +
> +static bool cpus_offline_empty(void)
> +{
> + char offline_cpus_str[64];
> + FILE *fp;
> +
> + fp = fopen("/sys/devices/system/cpu/offline", "r");
> + if (fscanf(fp, "%s", offline_cpus_str) < 0) {
> + if (!errno) {
> + fclose(fp);
> + return 1;
> + }
> + ksft_perror("Could not read /sys/devices/system/cpu/offline");
> + }
> +
> + fclose(fp);
> +
> + return 0;
> +}
> +
> +/*
> + * Detect SNC by comparing #CPUs in node0 with #CPUs sharing LLC with CPU0.
> + * If some CPUs are offline the numbers may not be exact multiples of each
> + * other. Any offline CPUs on node0 will be also gone from shared_cpu_map of
> + * CPU0 but offline CPUs from other nodes will only make the cache_cpus value
> + * lower. Still try to get the ratio right by preventing the second possibility.
Similar to v4 this "try to get the ratio right" is unnecessary because of the explicit
check for offline CPUs.
> + */
> +int snc_nodes_per_l3_cache(void)
> +{
> + int node_cpus, cache_cpus, i;
> + static int snc_mode;
> +
> + if (!snc_mode) {
> + snc_mode = 1;
> + if (!cpus_offline_empty()) {
> + ksft_print_msg("Runtime SNC detection unreliable due to offline CPUs.\n");
> + ksft_print_msg("Setting SNC mode to disabled.\n");
> + snc_unreliable = 1;
> + return snc_mode;
> + }
> + node_cpus = count_sys_bitmap_bits("/sys/devices/system/node/node0/cpumap");
> + cache_cpus = count_sys_bitmap_bits("/sys/devices/system/cpu/cpu0/cache/index3/shared_cpu_map");
> +
> + if (!node_cpus || !cache_cpus) {
> + ksft_print_msg("Could not determine Sub-NUMA Cluster mode.\n");
> + return 1;
Unclear why this is hardcoded "1". If this is an error then it should be negative so that
caller can consume as error. If it is intended to represent SNC mode then it should be
"snc_mode".
> + }
> + for (i = 1; i <= MAX_SNC ; i++) {
> + if (i * node_cpus >= cache_cpus) {
> + snc_mode = i;
> + break;
As I understand this complication is no longer needed because of earlier cpus_offline_empty()
check.
> + }
> + }
> + }
> +
> + return snc_mode;
> +}
> +
> /*
> * get_cache_size - Get cache size for a specified CPU
> * @cpu_no: CPU number
> @@ -211,6 +300,17 @@ int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size
> break;
> }
>
> + /*
> + * The amount of cache represented by each bit in the masks
> + * in the schemata file is reduced by a factor equal to SNC
> + * nodes per L3 cache.
> + * E.g. on a SNC-2 system with a 100MB L3 cache a test that
> + * allocates memory from its local SNC node (default behavior
> + * without using libnuma) will only see 50 MB llc_occupancy
> + * with a fully populated L3 mask in the schemata file.
> + */
> + if (cache_num == 3)
> + *cache_size /= snc_nodes_per_l3_cache();
get_cache_size() is also used by L3 CAT test. L3 CAT test is thus also impacted
if SNC cannot be detected reliably and should be skipped.
> return 0;
> }
>
> @@ -869,3 +969,35 @@ unsigned int count_bits(unsigned long n)
>
> return count;
> }
> +
> +/**
> + * snc_kernel_support - Check for existence of mon_sub_L3_00 file that indicates
> + * SNC resctrl support on the kernel side.
> + *
> + * Return: 0 if not supported, 1 if SNC is disabled or SNC discovery is
> + * unreliable or SNC is both enabled and supported.
> + */
> +int snc_kernel_support(void)
> +{
> + char node_path[PATH_MAX];
> + struct stat statbuf;
> + int ret;
> +
> + ret = snc_nodes_per_l3_cache();
> + /*
> + * If SNC is disabled then its kernel support isn't important. If SNC
> + * got disabled because the discovery process was unreliable the
> + * snc_unreliable variable was set. It can be used to verify the SNC
> + * discovery reliability elsewhere in the selftest.
> + */
> + if (ret == 1)
> + return ret;
> +
> + snprintf(node_path, sizeof(node_path), "%s/%s/%s", RESCTRL_PATH, "mon_data",
> + "mon_L3_00/mon_sub_L3_00");
> +
> + if (!stat(node_path, &statbuf))
> + return 1;
> +
> + return 0;
> +}
Reinette
Powered by blists - more mailing lists