Resctrl selftest prints a message on test failure that Sub-Numa Clustering (SNC) could be enabled and points the user to check theirs BIOS settings. No actual check is performed before printing that message so it is not very accurate in pinpointing a problem.
Figuring out if SNC is enabled is only one part of the problem, the other being whether the kernel supports it. As there is no easy interface that simply states SNC support in the kernel one can find that information by comparing L3 cache sizes from different sources. Cache size reported by /sys/devices/system/node/node0/cpu0/cache/index3/size will always show the full cache size even if it's split by enabled SNC. On the other hand /sys/fs/resctrl/size has information about L3 size, that with kernel support is adjusted for enabled SNC.
Add a function to find a cache size from /sys/fs/resctrl/size since finding that information from the other source is already implemented.
Add a function that compares the two cache sizes and use it to make the SNC support message more meaningful.
Add the SNC support message just after MBA's check_results() since MBA shares code with MBM and also can suffer from enabled SNC if there is no support in the kernel.
Signed-off-by: Maciej Wieczor-Retman maciej.wieczor-retman@intel.com --- tools/testing/selftests/resctrl/cat_test.c | 2 +- tools/testing/selftests/resctrl/cmt_test.c | 6 +- tools/testing/selftests/resctrl/mba_test.c | 2 + tools/testing/selftests/resctrl/mbm_test.c | 4 +- tools/testing/selftests/resctrl/resctrl.h | 5 +- tools/testing/selftests/resctrl/resctrlfs.c | 69 ++++++++++++++++++++- 6 files changed, 79 insertions(+), 9 deletions(-)
diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c index 4cb991be8e31..1cdaadf35f03 100644 --- a/tools/testing/selftests/resctrl/cat_test.c +++ b/tools/testing/selftests/resctrl/cat_test.c @@ -253,7 +253,7 @@ static int cat_run_test(const struct resctrl_test *test, const struct user_param return ret;
/* Get L3/L2 cache size */ - ret = get_cache_size(uparams->cpu, test->resource, &cache_total_size); + ret = get_sys_cache_size(uparams->cpu, test->resource, &cache_total_size); if (ret) return ret; ksft_print_msg("Cache size :%lu\n", cache_total_size); diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c index a81f91222a89..b7cada602484 100644 --- a/tools/testing/selftests/resctrl/cmt_test.c +++ b/tools/testing/selftests/resctrl/cmt_test.c @@ -112,7 +112,7 @@ static int cmt_run_test(const struct resctrl_test *test, const struct user_param if (ret) return ret;
- ret = get_cache_size(uparams->cpu, "L3", &cache_total_size); + ret = get_sys_cache_size(uparams->cpu, "L3", &cache_total_size); if (ret) return ret; ksft_print_msg("Cache size :%lu\n", cache_total_size); @@ -157,8 +157,8 @@ static int cmt_run_test(const struct resctrl_test *test, const struct user_param goto out;
ret = check_results(¶m, span, n); - if (ret && (get_vendor() == ARCH_INTEL)) - ksft_print_msg("Intel CMT may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n"); + if (ret && (get_vendor() == ARCH_INTEL) && snc_ways() > 1 && !snc_kernel_support()) + ksft_print_msg("Kernel doesn't support Sub-NUMA Clustering but it is enabled. Check BIOS configuration.\n");
out: cmt_test_cleanup(); diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c index fc31a61dab0c..89fe3ecbf497 100644 --- a/tools/testing/selftests/resctrl/mba_test.c +++ b/tools/testing/selftests/resctrl/mba_test.c @@ -160,6 +160,8 @@ static int mba_run_test(const struct resctrl_test *test, const struct user_param goto out;
ret = check_results(); + if (ret && (get_vendor() == ARCH_INTEL) && snc_ways() > 1 && !snc_kernel_support()) + ksft_print_msg("Kernel doesn't support Sub-NUMA Clustering but it is enabled. Check BIOS configuration.\n");
out: mba_test_cleanup(); diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c index d67ffa3ec63a..e12b4b06f6d5 100644 --- a/tools/testing/selftests/resctrl/mbm_test.c +++ b/tools/testing/selftests/resctrl/mbm_test.c @@ -129,8 +129,8 @@ static int mbm_run_test(const struct resctrl_test *test, const struct user_param goto out;
ret = check_results(DEFAULT_SPAN); - if (ret && (get_vendor() == ARCH_INTEL)) - ksft_print_msg("Intel MBM may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n"); + if (ret && (get_vendor() == ARCH_INTEL) && snc_ways() > 1 && !snc_kernel_support()) + ksft_print_msg("Kernel doesn't support Sub-NUMA Clustering but it is enabled. Check BIOS configuration.\n");
out: mbm_test_cleanup(); diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h index 178fb2eab13a..038e1269a3fc 100644 --- a/tools/testing/selftests/resctrl/resctrl.h +++ b/tools/testing/selftests/resctrl/resctrl.h @@ -28,6 +28,7 @@ #define RESCTRL_PATH "/sys/fs/resctrl" #define PHYS_ID_PATH "/sys/devices/system/cpu/cpu" #define INFO_PATH "/sys/fs/resctrl/info" +#define SIZE_PATH "/sys/fs/resctrl/size"
/* * CPU vendor IDs @@ -168,14 +169,16 @@ unsigned long create_bit_mask(unsigned int start, unsigned int len); unsigned int count_contiguous_bits(unsigned long val, unsigned int *start); int get_full_cbm(const char *cache_type, unsigned long *mask); int get_mask_no_shareable(const char *cache_type, unsigned long *mask); -int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size); int resource_info_unsigned_get(const char *resource, const char *filename, unsigned int *val); +int get_sys_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size); +int get_resctrl_cache_size(const char *cache_type, unsigned long *cache_size); void ctrlc_handler(int signum, siginfo_t *info, void *ptr); int signal_handler_register(void); void signal_handler_unregister(void); void cat_test_cleanup(void); unsigned int count_bits(unsigned long n); void cmt_test_cleanup(void); +int snc_kernel_support(void);
void perf_event_attr_initialize(struct perf_event_attr *pea, __u64 config); void perf_event_initialize_read_format(struct perf_event_read *pe_read); diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c index e4d3624a8817..dbd10cb7abf5 100644 --- a/tools/testing/selftests/resctrl/resctrlfs.c +++ b/tools/testing/selftests/resctrl/resctrlfs.c @@ -214,14 +214,14 @@ int snc_ways(void) }
/* - * get_cache_size - Get cache size for a specified CPU + * get_sys_cache_size - Get cache size for a specified CPU * @cpu_no: CPU number * @cache_type: Cache level L2/L3 * @cache_size: pointer to cache_size * * Return: = 0 on success, < 0 on failure. */ -int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size) +int get_sys_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size) { char cache_path[1024], cache_str[64]; int length, i, cache_num; @@ -273,6 +273,44 @@ int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size return 0; }
+/* + * get_resctrl_cache_size - Get cache size as reported by resctrl + * @cache_type: Cache level L2/L3 + * @cache_size: pointer to cache_size + * + * Return: = 0 on success, < 0 on failure. + */ +int get_resctrl_cache_size(const char *cache_type, unsigned long *cache_size) +{ + char line[256], cache_prefix[16], *stripped_line, *token; + size_t len; + FILE *fp; + + strcpy(cache_prefix, cache_type); + strncat(cache_prefix, ":", 1); + + fp = fopen(SIZE_PATH, "r"); + if (!fp) { + ksft_print_msg("Failed to open %s : '%s'\n", + SIZE_PATH, strerror(errno)); + return -1; + } + + while (fgets(line, sizeof(line), fp)) { + stripped_line = strstr(line, cache_prefix); + + if (stripped_line) { + len = strlen(cache_prefix); + stripped_line += len; + token = strtok(stripped_line, ";"); + if (sscanf(token, "0=%lu", cache_size) <= 0) + return -1; + } + } + fclose(fp); + return 0; +} + #define CORE_SIBLINGS_PATH "/sys/bus/cpu/devices/cpu"
/* @@ -935,3 +973,30 @@ unsigned int count_bits(unsigned long n)
return count; } + +/** + * snc_kernel_support - Compare system reported cache size and resctrl + * reported cache size to get an idea if SNC is supported on the kernel side. + * If SNC is enabled and the kernel does support it the value should be equal. + * If the kernel doesn't support SNC the. + * + * Return: 0 if not supported, 1 if supported, < 0 on failure. + */ +int snc_kernel_support(void) +{ + unsigned long resctrl_cache_size, node_cache_size; + int ret; + + ret = get_sys_cache_size(0, "L3", &node_cache_size); + if (ret < 0) + return ret; + + ret = get_resctrl_cache_size("L3", &resctrl_cache_size); + if (ret < 0) + return ret; + + if (resctrl_cache_size == node_cache_size) + return 1; + + return 0; +}