lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 28 Apr 2015 17:55:11 +0200
From:	Borislav Petkov <bp@...en8.de>
To:	"H. Peter Anvin" <hpa@...or.com>
Cc:	Linus Torvalds <torvalds@...ux-foundation.org>,
	Andy Lutomirski <luto@...capital.net>,
	Andy Lutomirski <luto@...nel.org>, X86 ML <x86@...nel.org>,
	Denys Vlasenko <vda.linux@...glemail.com>,
	Brian Gerst <brgerst@...il.com>,
	Denys Vlasenko <dvlasenk@...hat.com>,
	Ingo Molnar <mingo@...nel.org>,
	Steven Rostedt <rostedt@...dmis.org>,
	Oleg Nesterov <oleg@...hat.com>,
	Frederic Weisbecker <fweisbec@...il.com>,
	Alexei Starovoitov <ast@...mgrid.com>,
	Will Drewry <wad@...omium.org>,
	Kees Cook <keescook@...omium.org>,
	Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
	Mel Gorman <mgorman@...e.com>
Subject: Re: [PATCH] x86_64, asm: Work around AMD SYSRET SS descriptor
 attribute issue

On Mon, Apr 27, 2015 at 01:14:51PM -0700, H. Peter Anvin wrote:
> I did a microbenchmark in user space... let's see if I can find it.

How about the simple one below?

Provided it is correct, it shows that the 0x66-prefixed 3-byte NOPs are
better than the 0F 1F 00 suggested by the manual (Haha!):

$ taskset -c 3 ./nops
Running 600 times, 10000000 loops per run.
nop_0x90 average: 439.805220
nop_3_byte average: 442.412915

---
/*
 * How to run:
 *
 * taskset -c <cpunum> argv0
 */
#include <stdio.h>
#include <sys/syscall.h>
#include <stdlib.h>
#include <unistd.h>

typedef unsigned long long u64;

#define DECLARE_ARGS(val, low, high)    unsigned low, high
#define EAX_EDX_VAL(val, low, high)     ((low) | ((u64)(high) << 32))
#define EAX_EDX_ARGS(val, low, high)    "a" (low), "d" (high)
#define EAX_EDX_RET(val, low, high)     "=a" (low), "=d" (high)

static __always_inline unsigned long long rdtsc(void)
{
        DECLARE_ARGS(val, low, high);

        asm volatile("rdtsc" : EAX_EDX_RET(val, low, high));

        return EAX_EDX_VAL(val, low, high);
}

static inline u64 read_tsc(void)
{
	u64 ret;

	asm volatile("mfence");
	ret = rdtsc();
	asm volatile("mfence");

	return ret;
}

static inline void nop_0x90(void)
{
	asm volatile(
			".byte 0x66, 0x66, 0x90\n\t"
			".byte 0x66, 0x66, 0x90\n\t"
			".byte 0x66, 0x66, 0x90\n\t"
			".byte 0x66, 0x66, 0x90\n\t"
			".byte 0x66, 0x66, 0x90\n\t"

			".byte 0x66, 0x66, 0x90\n\t"
			".byte 0x66, 0x66, 0x90\n\t"
			".byte 0x66, 0x66, 0x90\n\t"
			".byte 0x66, 0x66, 0x90\n\t"
			".byte 0x66, 0x66, 0x90\n\t"

			".byte 0x66, 0x66, 0x90\n\t"
			".byte 0x66, 0x66, 0x90\n\t"
			".byte 0x66, 0x66, 0x90\n\t"
			".byte 0x66, 0x66, 0x90\n\t"
			".byte 0x66, 0x66, 0x90\n\t"
		    );
}

static inline void nop_3_byte(void)
{
	asm volatile(
			".byte 0x0f, 0x1f, 0x00\n\t"
			".byte 0x0f, 0x1f, 0x00\n\t"
			".byte 0x0f, 0x1f, 0x00\n\t"
			".byte 0x0f, 0x1f, 0x00\n\t"
			".byte 0x0f, 0x1f, 0x00\n\t"

			".byte 0x0f, 0x1f, 0x00\n\t"
			".byte 0x0f, 0x1f, 0x00\n\t"
			".byte 0x0f, 0x1f, 0x00\n\t"
			".byte 0x0f, 0x1f, 0x00\n\t"
			".byte 0x0f, 0x1f, 0x00\n\t"

			".byte 0x0f, 0x1f, 0x00\n\t"
			".byte 0x0f, 0x1f, 0x00\n\t"
			".byte 0x0f, 0x1f, 0x00\n\t"
			".byte 0x0f, 0x1f, 0x00\n\t"
			".byte 0x0f, 0x1f, 0x00\n\t"
		    );
}

int main()
{
	int i, j;
	u64 p1, p2;
	u64 r;
	double avg, t;

#define TIMES 600
#define LOOPS 10000000ULL

	printf("Running %d times, %lld loops per run.\n", TIMES, LOOPS);

	avg = 0;

	for (r = 0, j = 0; j < TIMES; j++) {
		for (i = 0; i < LOOPS; i++) {
			p1 = read_tsc();
			nop_0x90();
			p2 = read_tsc();

			r += (p2 - p1);
		}

		t = (double)r / LOOPS;

//		printf("NOP cycles: %lld, cycles/nop_0x90: %f\n", r, t);
		avg += t;
		r = 0;
	}

	printf("nop_0x90 average: %f\n", avg/TIMES);

	avg = 0;

	for (r = 0, j = 0; j < TIMES; j++) {
		for (i = 0; i < LOOPS; i++) {
			p1 = read_tsc();
			nop_3_byte();
			p2 = read_tsc();

			r += (p2 - p1);
		}

		t = (double)r / LOOPS;

//		printf("NOP cycles: %lld, cycles/nop_3_byte: %f\n", r, t);
		avg += t;
		r = 0;
	}

	printf("nop_3_byte average: %f\n", avg/TIMES);

	return 0;
}

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.
--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ