lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 10 May 2019 18:37:16 +0100
From:   Andre Przywara <andre.przywara@....com>
To:     Fenghua Yu <fenghua.yu@...el.com>
Cc:     "Thomas Gleixner" <tglx@...utronix.de>,
        "Ingo Molnar" <mingo@...hat.com>, "Borislav Petkov" <bp@...en8.de>,
        "H Peter Anvin" <hpa@...or.com>, "Tony Luck" <tony.luck@...el.com>,
        "Reinette Chatre" <reinette.chatre@...el.com>,
        "Ravi V Shankar" <ravi.v.shankar@...el.com>,
        "Xiaochen Shen" <xiaochen.shen@...el.com>,
        "Arshiya Hayatkhan Pathan" <arshiya.hayatkhan.pathan@...el.com>,
        "Sai Praneeth Prakhya" <sai.praneeth.prakhya@...el.com>,
        "Babu Moger" <babu.moger@....com>,
        "linux-kernel" <linux-kernel@...r.kernel.org>,
        James Morse <James.Morse@....com>
Subject: Re: [PATCH v7 05/13] selftests/resctrl: Add built in benchmark

On Sat,  9 Feb 2019 18:50:34 -0800
Fenghua Yu <fenghua.yu@...el.com> wrote:

Hi,

> From: Sai Praneeth Prakhya <sai.praneeth.prakhya@...el.com>
> 
> Built-in benchmark fill_buf generates stressful memory bandwidth
> and cache traffic.
> 
> Later it will be used as a default benchmark by various resctrl tests
> such as MBA (Memory Bandwidth Allocation) and MBM (Memory Bandwidth
> Monitoring) tests.
> 
> Signed-off-by: Sai Praneeth Prakhya <sai.praneeth.prakhya@...el.com>
> Signed-off-by: Arshiya Hayatkhan Pathan <arshiya.hayatkhan.pathan@...el.com>
> Signed-off-by: Fenghua Yu <fenghua.yu@...el.com>
> Signed-off-by: Babu Moger <babu.moger@....com>
> ---
>  tools/testing/selftests/resctrl/fill_buf.c | 204 +++++++++++++++++++++++++++++
>  1 file changed, 204 insertions(+)
>  create mode 100644 tools/testing/selftests/resctrl/fill_buf.c
> 
> diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c
> new file mode 100644
> index 000000000000..792a5c40a32a
> --- /dev/null
> +++ b/tools/testing/selftests/resctrl/fill_buf.c
> @@ -0,0 +1,204 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * fill_buf benchmark
> + *
> + * Copyright (C) 2018 Intel Corporation
> + *
> + * Authors:
> + *    Arshiya Hayatkhan Pathan <arshiya.hayatkhan.pathan@...el.com>
> + *    Sai Praneeth Prakhya <sai.praneeth.prakhya@...el.com>,
> + *    Fenghua Yu <fenghua.yu@...el.com>
> + */
> +#include <stdio.h>
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <sys/types.h>
> +#include <sys/wait.h>
> +#include <inttypes.h>
> +#include <malloc.h>
> +#include <string.h>
> +
> +#include "resctrl.h"
> +
> +#define CL_SIZE			(64)
> +#define PAGE_SIZE		(4 * 1024)

This is not really true everywhere. I have definitely seen systems with
cache line sizes of 32 and 128 bytes, also arm64 supports 64KB pages.
There should be POSIX/glibc functions to read those values at runtime.

> +#define MB			(1024 * 1024)
> +
> +static unsigned char *startptr;
> +
> +static void sb(void)
> +{
> +	asm volatile("sfence\n\t"
> +		     : : : "memory");

Please don't have inline x86 assembly without protecting this code on other architectures.
#if defined(__i386) || defined(__x86_64)
does the trick for me.
But aren't there already defined function for those barriers somewhere?

> +}
> +
> +static void ctrl_handler(int signo)
> +{
> +	free(startptr);
> +	printf("\nEnding\n");
> +	sb();
> +	exit(EXIT_SUCCESS);
> +}
> +
> +static void cl_flush(void *p)
> +{
> +	asm volatile("clflush (%0)\n\t"
> +		     : : "r"(p) : "memory");

Same comment about the usage of inline assembly here. This breaks compilation on arm/arm64.
In general I am a bit suspicious about using this instruction in a benchmark here, since we have no guarantee that it has an effect on the performance. The CPU could speculatively fetch in data right after this instruction. Shouldn't we iterate multiple times over the benchmark and let the average remove the effect of a hot or cold caches?

Cheers,
Andre.

> +}
> +
> +static void mem_flush(void *p, size_t s)
> +{
> +	char *cp = (char *)p;
> +	size_t i = 0;
> +
> +	s = s / CL_SIZE; /* mem size in cache llines */
> +
> +	for (i = 0; i < s; i++)
> +		cl_flush(&cp[i * CL_SIZE]);
> +
> +	sb();
> +}
> +
> +static void *malloc_and_init_memory(size_t s)
> +{
> +	uint64_t *p64;
> +	size_t s64;
> +
> +	void *p = memalign(PAGE_SIZE, s);
> +
> +	p64 = (uint64_t *)p;
> +	s64 = s / sizeof(uint64_t);
> +
> +	while (s64 > 0) {
> +		*p64 = (uint64_t)rand();
> +		p64 += (CL_SIZE / sizeof(uint64_t));
> +		s64 -= (CL_SIZE / sizeof(uint64_t));
> +	}
> +
> +	return p;
> +}
> +
> +static int fill_one_span_read(unsigned char *start_ptr, unsigned char *end_ptr)
> +{
> +	unsigned char sum, *p;
> +
> +	sum = 0;
> +	p = start_ptr;
> +	while (p < end_ptr) {
> +		sum += *p;
> +		p += (CL_SIZE / 2);
> +	}
> +
> +	return sum;
> +}
> +
> +static
> +void fill_one_span_write(unsigned char *start_ptr, unsigned char *end_ptr)
> +{
> +	unsigned char *p;
> +
> +	p = start_ptr;
> +	while (p < end_ptr) {
> +		*p = '1';
> +		p += (CL_SIZE / 2);
> +	}
> +}
> +
> +static int fill_cache_read(unsigned char *start_ptr, unsigned char *end_ptr,
> +			   char *resctrl_val)
> +{
> +	int ret = 0;
> +	FILE *fp;
> +
> +	while (1)
> +		ret = fill_one_span_read(start_ptr, end_ptr);
> +
> +	/* Consume read result so that reading memory is not optimized out. */
> +	fp = fopen("/dev/null", "w");
> +	if (!fp)
> +		perror("Unable to write to /dev/null");
> +	fprintf(fp, "Sum: %d ", ret);
> +	fclose(fp);
> +
> +	return 0;
> +}
> +
> +static int fill_cache_write(unsigned char *start_ptr, unsigned char *end_ptr,
> +			    char *resctrl_val)
> +{
> +	while (1)
> +		fill_one_span_write(start_ptr, end_ptr);
> +
> +	return 0;
> +}
> +
> +static int
> +fill_cache(unsigned long long buf_size, int malloc_and_init, int memflush,
> +	   int op, char *resctrl_val)
> +{
> +	unsigned char *start_ptr, *end_ptr;
> +	unsigned long long i;
> +	int ret;
> +
> +	if (malloc_and_init)
> +		start_ptr = malloc_and_init_memory(buf_size);
> +	else
> +		start_ptr = malloc(buf_size);
> +
> +	if (!start_ptr)
> +		return -1;
> +
> +	startptr = start_ptr;
> +	end_ptr = start_ptr + buf_size;
> +
> +	/*
> +	 * It's better to touch the memory once to avoid any compiler
> +	 * optimizations
> +	 */
> +	if (!malloc_and_init) {
> +		for (i = 0; i < buf_size; i++)
> +			*start_ptr++ = (unsigned char)rand();
> +	}
> +
> +	start_ptr = startptr;
> +
> +	/* Flush the memory before using to avoid "cache hot pages" effect */
> +	if (memflush)
> +		mem_flush(start_ptr, buf_size);
> +
> +	if (op == 0)
> +		ret = fill_cache_read(start_ptr, end_ptr, resctrl_val);
> +	else
> +		ret = fill_cache_write(start_ptr, end_ptr, resctrl_val);
> +
> +	if (ret) {
> +		printf("\n Errror in fill cache read/write...\n");
> +		return -1;
> +	}
> +
> +	free(startptr);
> +
> +	return 0;
> +}
> +
> +int run_fill_buf(unsigned long span, int malloc_and_init_memory,
> +		 int memflush, int op, char *resctrl_val)
> +{
> +	unsigned long long cache_size = span;
> +	int ret;
> +
> +	/* set up ctrl-c handler */
> +	if (signal(SIGINT, ctrl_handler) == SIG_ERR)
> +		printf("Failed to catch SIGINT!\n");
> +	if (signal(SIGHUP, ctrl_handler) == SIG_ERR)
> +		printf("Failed to catch SIGHUP!\n");
> +
> +	ret = fill_cache(cache_size, malloc_and_init_memory, memflush, op,
> +			 resctrl_val);
> +	if (ret) {
> +		printf("\n Errror in fill cache\n");
> +		return -1;
> +	}
> +
> +	return 0;
> +}

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ