[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <b05d487e-bb8b-56df-8304-6e91323e9d83@intel.com>
Date: Thu, 6 Jan 2022 16:15:16 -0800
From: Reinette Chatre <reinette.chatre@...el.com>
To: Shaopeng Tan <tan.shaopeng@...fujitsu.com>,
Fenghua Yu <fenghua.yu@...el.com>,
Shuah Khan <shuah@...nel.org>
CC: <linux-kernel@...r.kernel.org>, <linux-kselftest@...r.kernel.org>
Subject: Re: [PATCH v2] selftests/resctrl: Print a message if the result of
MBM&CMT tests is failed when Intel Sub-NUMA is enabled
Hi Shaopeng Tan,
On 12/13/2021 2:03 AM, Shaopeng Tan wrote:
> If the result of MBM&CMT tests is failed when Intel
> Sub-NUMA is enabled, print a possible causes of failure.
> Since when the Intel Sub-NUMA Clustering(SNC) feature is enabled,
> the CMT and MBM counters may not be accurate.
>
> Signed-off-by: Shaopeng Tan <tan.shaopeng@...fujitsu.com>
> ---
> Hello,
>
> According to the Intel RDT reference Manual,
> when the sub-numa clustering feature is enabled,
> the CMT and MBM counters may not be accurate.
> When running CMT tests and MBM tests on 2nd Generation
> Intel Xeon Scalable Processor, the result may be "not ok".
> If result of MBM&CMT tests is failed when Intel Sub-NUMA is enabled,
> fix it to print a possible cause of failure,
> instead of SKIP these tests in v1.
>
> Thanks,
>
> tools/testing/selftests/resctrl/Makefile | 1 +
> tools/testing/selftests/resctrl/cmt_test.c | 5 ++-
> tools/testing/selftests/resctrl/mbm_test.c | 5 ++-
> tools/testing/selftests/resctrl/resctrl.h | 2 ++
> .../testing/selftests/resctrl/resctrl_tests.c | 36 +++++++++++++++++++
> tools/testing/selftests/resctrl/resctrlfs.c | 26 ++++++++++++++
> 6 files changed, 73 insertions(+), 2 deletions(-)
>
> diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile
> index adfd92145e12..6d257f40e6ac 100644
> --- a/tools/testing/selftests/resctrl/Makefile
> +++ b/tools/testing/selftests/resctrl/Makefile
> @@ -1,6 +1,7 @@
> #SPDX-License-Identifier: GPL-2.0
>
> CFLAGS += -g -Wall -O2 -D_FORTIFY_SOURCE=2
> +LDLIBS += -lnuma
>
> TEST_GEN_PROGS := resctrl_tests
> EXTRA_SOURCES := $(wildcard *.c)
> diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c
> index 8968e36db99d..c5a49444c5a0 100644
> --- a/tools/testing/selftests/resctrl/cmt_test.c
> +++ b/tools/testing/selftests/resctrl/cmt_test.c
> @@ -136,8 +136,11 @@ int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd)
> return ret;
>
> ret = check_results(¶m, n);
> - if (ret)
> + if (ret) {
> + if (sub_numa_cluster_enable)
> + ksft_print_msg("Sub-NUMA Clustering(SNC) feature is enabled, the CMT counters may not be accurate.\n");
> return ret;
> + }
>
> cmt_test_cleanup();
>
> diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c
> index 8392e5c55ed0..7dc1bdf2d0b8 100644
> --- a/tools/testing/selftests/resctrl/mbm_test.c
> +++ b/tools/testing/selftests/resctrl/mbm_test.c
> @@ -136,8 +136,11 @@ int mbm_bw_change(int span, int cpu_no, char *bw_report, char **benchmark_cmd)
> return ret;
>
> ret = check_results(span);
> - if (ret)
> + if (ret) {
> + if (sub_numa_cluster_enable)
> + ksft_print_msg("Sub-NUMA Clustering(SNC) feature is enabled, the MBM counters may not be accurate.\n");
> return ret;
> + }
>
> mbm_test_cleanup();
>
> diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
> index 1ad10c47e31d..4b8ad4fbd016 100644
> --- a/tools/testing/selftests/resctrl/resctrl.h
> +++ b/tools/testing/selftests/resctrl/resctrl.h
> @@ -76,6 +76,7 @@ extern pid_t bm_pid, ppid;
>
> extern char llc_occup_path[1024];
> extern bool is_amd;
> +extern bool sub_numa_cluster_enable;
>
> bool check_resctrlfs_support(void);
> int filter_dmesg(void);
> @@ -85,6 +86,7 @@ int umount_resctrlfs(void);
> int validate_bw_report_request(char *bw_report);
> bool validate_resctrl_feature_request(const char *resctrl_val);
> char *fgrep(FILE *inf, const char *str);
> +char *fgrep_last_match_line(FILE *inf, const char *str);
> int taskset_benchmark(pid_t bm_pid, int cpu_no);
> void run_benchmark(int signum, siginfo_t *info, void *ucontext);
> int write_schemata(char *ctrlgrp, char *schemata, int cpu_no,
> diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
> index 3be0895c492b..bbab4a7f37ed 100644
> --- a/tools/testing/selftests/resctrl/resctrl_tests.c
> +++ b/tools/testing/selftests/resctrl/resctrl_tests.c
> @@ -8,12 +8,15 @@
> * Sai Praneeth Prakhya <sai.praneeth.prakhya@...el.com>,
> * Fenghua Yu <fenghua.yu@...el.com>
> */
> +#include <numa.h>
> +#include <string.h>
> #include "resctrl.h"
>
> #define BENCHMARK_ARGS 64
> #define BENCHMARK_ARG_SIZE 64
>
> bool is_amd;
> +bool sub_numa_cluster_enable;
>
> void detect_amd(void)
> {
> @@ -34,6 +37,35 @@ void detect_amd(void)
> fclose(inf);
> }
>
> +void check_sub_numa_cluster(void)
> +{
> + FILE *inf = fopen("/proc/cpuinfo", "r");
> + char *res, *s;
> + int socket_num = 0;
> + int numa_nodes = 0;
> +
> + if (!inf)
> + return;
> +
> + res = fgrep_last_match_line(inf, "physical id");
> +
> + if (res) {
> + s = strpbrk(res, "1234567890");
> + socket_num = atoi(s) + 1;
> + free(res);
> + }
> + fclose(inf);
> +
> + numa_nodes = numa_max_node() + 1;
> +
> + /*
> + * when the Sub-NUMA Clustering(SNC) feature is enabled,
> + * the number of numa nodes is twice the number of sockets.
> + */
> + if (numa_nodes == (2 * socket_num))
> + sub_numa_cluster_enable = true;
> +}
Unfortunately there does not seem to be an architectural way to detect if
SNC has been enabled and the above test is fragile wrt the assumptions
about the topology of the system. What we need is a reliable and
future-proof test but I do not know what that should be.
Reinette
Powered by blists - more mailing lists