lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20190116131401.GG15409@zn.tnic>
Date:   Wed, 16 Jan 2019 14:14:01 +0100
From:   Borislav Petkov <bp@...en8.de>
To:     kan.liang@...ux.intel.com
Cc:     x86@...nel.org, linux-kernel@...r.kernel.org, tglx@...utronix.de,
        peterz@...radead.org, mingo@...hat.com, ak@...ux.intel.com,
        eranian@...gle.com
Subject: Re: [PATCH V5 2/4] perf/x86/kvm: Avoid unnecessary work in guest
 filtering

On Mon, Jan 07, 2019 at 02:34:23PM -0800, kan.liang@...ux.intel.com wrote:
> From: Andi Kleen <ak@...ux.intel.com>
> 
> KVM added a workaround for PEBS events leaking into guests with
> commit 26a4f3c08de4 ("perf/x86: disable PEBS on a guest entry.")
> This uses the VT entry/exit list to add an extra disable of the
> PEBS_ENABLE MSR.
> 
> Intel also added a fix for this issue to microcode updates on
> Haswell/Broadwell/Skylake.
> 
> It turns out using the MSR entry/exit list makes VM exits
> significantly slower. The list is only needed for disabling
> PEBS, because the GLOBAL_CTRL change gets optimized by
> KVM into changing the VMCS.
> 
> Check for the microcode updates that have the microcode
> fix for leaking PEBS, and disable the extra entry/exit list
> entry for PEBS_ENABLE. In addition we always clear the
> GLOBAL_CTRL for the PEBS counter while running in the guest,
> which is enough to make them never fire at the wrong
> side of the host/guest transition.
> 
> We see significantly reduced overhead for VM exits with the

No "We" in commit messages pls.

> filtering active with the patch from 8% to 4%.
> 
> Signed-off-by: Andi Kleen <ak@...ux.intel.com>

Your SOB is missing.

> ---
> 
> Changes since V4:
> - Use new name x86_cpu_has_min_microcode_rev() and INTEL_CHECK_UCODE
> 
>  arch/x86/events/intel/core.c | 80 +++++++++++++++++++++++++++++++++++++++-----
>  arch/x86/events/perf_event.h |  3 +-
>  2 files changed, 73 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
> index ecc3e34..587d83e 100644
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -18,6 +18,7 @@
>  #include <asm/hardirq.h>
>  #include <asm/intel-family.h>
>  #include <asm/apic.h>
> +#include <asm/cpu_device_id.h>
>  
>  #include "../perf_event.h"
>  
> @@ -3206,16 +3207,27 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
>  	arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
>  	arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
>  	arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
> -	/*
> -	 * If PMU counter has PEBS enabled it is not enough to disable counter
> -	 * on a guest entry since PEBS memory write can overshoot guest entry
> -	 * and corrupt guest memory. Disabling PEBS solves the problem.
> -	 */
> -	arr[1].msr = MSR_IA32_PEBS_ENABLE;
> -	arr[1].host = cpuc->pebs_enabled;
> -	arr[1].guest = 0;
> +	if (x86_pmu.flags & PMU_FL_PEBS_ALL)
> +		arr[0].guest &= ~cpuc->pebs_enabled;
> +	else
> +		arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
> +	*nr = 1;
> +
> +	if (!x86_pmu.pebs_isolated) {
> +		/*
> +		 * If PMU counter has PEBS enabled it is not enough to
> +		 * disable counter on a guest entry since PEBS memory
> +		 * write can overshoot guest entry and corrupt guest
> +		 * memory. Disabling PEBS solves the problem.
> +		 *
> +		 * Don't do this if the CPU already enforces it.
> +		 */
> +		arr[1].msr = MSR_IA32_PEBS_ENABLE;
> +		arr[1].host = cpuc->pebs_enabled;
> +		arr[1].guest = 0;
> +		*nr = 2;
> +	}
>  
> -	*nr = 2;
>  	return arr;
>  }
>  
> @@ -3733,6 +3745,45 @@ static __init void intel_clovertown_quirk(void)
>  	x86_pmu.pebs_constraints = NULL;
>  }
>  
> +static const struct x86_cpu_check isolation_ucodes[] = {
> +	INTEL_CHECK_UCODE(INTEL_FAM6_HASWELL_CORE,	 3, 0x0000001f),
> +	INTEL_CHECK_UCODE(INTEL_FAM6_HASWELL_ULT,	 1, 0x0000001e),
> +	INTEL_CHECK_UCODE(INTEL_FAM6_HASWELL_GT3E,	 1, 0x00000015),
> +	INTEL_CHECK_UCODE(INTEL_FAM6_HASWELL_X,		 2, 0x00000037),
> +	INTEL_CHECK_UCODE(INTEL_FAM6_HASWELL_X,		 4, 0x0000000a),
> +	INTEL_CHECK_UCODE(INTEL_FAM6_BROADWELL_CORE,	 4, 0x00000023),
> +	INTEL_CHECK_UCODE(INTEL_FAM6_BROADWELL_GT3E,	 1, 0x00000014),
> +	INTEL_CHECK_UCODE(INTEL_FAM6_BROADWELL_XEON_D,	 2, 0x00000010),
> +	INTEL_CHECK_UCODE(INTEL_FAM6_BROADWELL_XEON_D,	 3, 0x07000009),
> +	INTEL_CHECK_UCODE(INTEL_FAM6_BROADWELL_XEON_D,	 4, 0x0f000009),
> +	INTEL_CHECK_UCODE(INTEL_FAM6_BROADWELL_XEON_D,	 5, 0x0e000002),
> +	INTEL_CHECK_UCODE(INTEL_FAM6_BROADWELL_X,	 2, 0x0b000014),
> +	INTEL_CHECK_UCODE(INTEL_FAM6_SKYLAKE_X,		 3, 0x00000021),
> +	INTEL_CHECK_UCODE(INTEL_FAM6_SKYLAKE_X,		 4, 0x00000000),
> +	INTEL_CHECK_UCODE(INTEL_FAM6_SKYLAKE_MOBILE,	 3, 0x0000007c),
> +	INTEL_CHECK_UCODE(INTEL_FAM6_SKYLAKE_DESKTOP,	 3, 0x0000007c),
> +	INTEL_CHECK_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP,	 9, 0x0000004e),
> +	INTEL_CHECK_UCODE(INTEL_FAM6_KABYLAKE_MOBILE,	 9, 0x0000004e),
> +	INTEL_CHECK_UCODE(INTEL_FAM6_KABYLAKE_MOBILE,     10, 0x0000004e),
> +	INTEL_CHECK_UCODE(INTEL_FAM6_KABYLAKE_MOBILE,     11, 0x0000004e),
> +	INTEL_CHECK_UCODE(INTEL_FAM6_KABYLAKE_MOBILE,     12, 0x0000004e),
> +	INTEL_CHECK_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP,    10, 0x0000004e),
> +	INTEL_CHECK_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP,    11, 0x0000004e),
> +	INTEL_CHECK_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP,    12, 0x0000004e),
> +	INTEL_CHECK_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP,    13, 0x0000004e),
> +	INTEL_CHECK_UCODE(INTEL_FAM6_CANNONLAKE_MOBILE,    3, 0x00000000),

Align vertically.

> +	{}
> +};
> +
> +static void intel_check_isolation(void)
> +{
> +	if (!x86_cpu_has_min_microcode_rev(isolation_ucodes)) {
> +		x86_pmu.pebs_isolated = 0;
> +		return;
> +	}
> +	x86_pmu.pebs_isolated = 1;
> +}

Simply:

static void intel_check_isolation(void)
{
        x86_pmu.pebs_isolated = x86_cpu_has_min_microcode_rev(isolation_ucodes);
}

> +
>  static int intel_snb_pebs_broken(int cpu)
>  {
>  	u32 rev = UINT_MAX; /* default to broken for unknown models */
> @@ -3757,6 +3808,8 @@ static void intel_snb_check_microcode(void)
>  	int pebs_broken = 0;
>  	int cpu;
>  
> +	intel_check_isolation();

That looks strange:

on the one hand, this function gets assigned to x86_pmu.check_microcode
but then, on the other, it gets called in intel_snb_check_microcode()
too, where latter gets assigned to that ->check_microcode pointer too.

This needs a cleanup to introduce a single microcode callback function
in this file and that function picks apart what to do based on the
models, etc.

Also, intel_snb_pebs_broken() needs to be converted to this new stepping
checking scheme too, says peterz.

> +
>  	for_each_online_cpu(cpu) {
>  		if ((pebs_broken = intel_snb_pebs_broken(cpu)))
>  			break;
> @@ -3838,6 +3891,12 @@ static __init void intel_sandybridge_quirk(void)
>  	cpus_read_unlock();
>  }
>  
> +static __init void intel_isolation_quirk(void)
> +{
> +	x86_pmu.check_microcode = intel_check_isolation;
> +	intel_check_isolation();
> +}
> +
>  static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
>  	{ PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
>  	{ PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
> @@ -4424,6 +4483,7 @@ __init int intel_pmu_init(void)
>  	case INTEL_FAM6_HASWELL_X:
>  	case INTEL_FAM6_HASWELL_ULT:
>  	case INTEL_FAM6_HASWELL_GT3E:
> +		x86_add_quirk(intel_isolation_quirk);

And reportedly, the quirks are one-off things - not what this one
needs to do. So you need to run this unconditionally at the end of
intel_pmu_init() and get rid of all that quirks indirection.

Thx.

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ