lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAFg_LQVAfS_neNsZftUMTN33-ZhzNBdnOhELCPPksdkK8peZGw@mail.gmail.com>
Date:   Tue, 7 Nov 2023 18:51:08 +0800
From:   Jinrong Liang <ljr.kernel@...il.com>
To:     Sean Christopherson <seanjc@...gle.com>
Cc:     Jim Mattson <jmattson@...gle.com>,
        Paolo Bonzini <pbonzini@...hat.com>, kvm@...r.kernel.org,
        linux-kernel@...r.kernel.org,
        Kan Liang <kan.liang@...ux.intel.com>,
        Dapeng Mi <dapeng1.mi@...ux.intel.com>,
        Like Xu <likexu@...cent.com>,
        Aaron Lewis <aaronlewis@...gle.com>,
        Jinrong Liang <cloudliang@...cent.com>
Subject: Re: [PATCH v6 09/20] KVM: selftests: Add pmu.h and lib/pmu.c for
 common PMU assets

Sean Christopherson <seanjc@...gle.com> 于2023年11月7日周二 04:40写道:
>
> On Mon, Nov 06, 2023, JinrongLiang wrote:
> > 在 2023/11/4 21:20, Jim Mattson 写道:
> > > > diff --git a/tools/testing/selftests/kvm/include/pmu.h b/tools/testing/selftests/kvm/include/pmu.h
> > > > new file mode 100644
> > > > index 000000000000..987602c62b51
> > > > --- /dev/null
> > > > +++ b/tools/testing/selftests/kvm/include/pmu.h
> > > > @@ -0,0 +1,84 @@
> > > > +/* SPDX-License-Identifier: GPL-2.0-only */
> > > > +/*
> > > > + * Copyright (C) 2023, Tencent, Inc.
> > > > + */
> > > > +#ifndef SELFTEST_KVM_PMU_H
> > > > +#define SELFTEST_KVM_PMU_H
> > > > +
> > > > +#include <stdint.h>
> > > > +
> > > > +#define X86_PMC_IDX_MAX                                64
> > > > +#define INTEL_PMC_MAX_GENERIC                          32
> > >
> > > I think this is actually 15. Note that IA32_PMC0 through IA32_PMC7
> > > have MSR indices from 0xc1 through 0xc8, and MSR 0xcf is
> > > IA32_CORE_CAPABILITIES. At the very least, we have to handle
> > > non-contiguous MSR indices if we ever go beyond IA32_PMC14.
>
> There's no reason to define this, it's not used in selftests.
>
> > > > +#define KVM_PMU_EVENT_FILTER_MAX_EVENTS                300
> > > > +
> > > > +#define GP_COUNTER_NR_OFS_BIT                          8
> > > > +#define EVENT_LENGTH_OFS_BIT                           24
> > > > +
> > > > +#define PMU_VERSION_MASK                               GENMASK_ULL(7, 0)
> > > > +#define EVENT_LENGTH_MASK                              GENMASK_ULL(31, EVENT_LENGTH_OFS_BIT)
> > > > +#define GP_COUNTER_NR_MASK                             GENMASK_ULL(15, GP_COUNTER_NR_OFS_BIT)
> > > > +#define FIXED_COUNTER_NR_MASK                          GENMASK_ULL(4, 0)
>
> These are also unneeded, they're superseded by CPUID properties.
>
> > > > +#define ARCH_PERFMON_EVENTSEL_EVENT                    GENMASK_ULL(7, 0)
> > > > +#define ARCH_PERFMON_EVENTSEL_UMASK                    GENMASK_ULL(15, 8)
> > > > +#define ARCH_PERFMON_EVENTSEL_USR                      BIT_ULL(16)
> > > > +#define ARCH_PERFMON_EVENTSEL_OS                       BIT_ULL(17)
> > > > +#define ARCH_PERFMON_EVENTSEL_EDGE                     BIT_ULL(18)
> > > > +#define ARCH_PERFMON_EVENTSEL_PIN_CONTROL              BIT_ULL(19)
> > > > +#define ARCH_PERFMON_EVENTSEL_INT                      BIT_ULL(20)
> > > > +#define ARCH_PERFMON_EVENTSEL_ANY                      BIT_ULL(21)
> > > > +#define ARCH_PERFMON_EVENTSEL_ENABLE                   BIT_ULL(22)
> > > > +#define ARCH_PERFMON_EVENTSEL_INV                      BIT_ULL(23)
> > > > +#define ARCH_PERFMON_EVENTSEL_CMASK                    GENMASK_ULL(31, 24)
> > > > +
> > > > +#define PMC_MAX_FIXED                                  16
>
> Also unneeded.
>
> > > > +#define PMC_IDX_FIXED                                  32
>
> This one is absolutely ridiculous.  It's the shift for the enable bit in global
> control, which is super obvious from the name. /s
>
> > > > +
> > > > +/* RDPMC offset for Fixed PMCs */
> > > > +#define PMC_FIXED_RDPMC_BASE                           BIT_ULL(30)
> > > > +#define PMC_FIXED_RDPMC_METRICS                        BIT_ULL(29)
> > > > +
> > > > +#define FIXED_BITS_MASK                                0xFULL
> > > > +#define FIXED_BITS_STRIDE                              4
> > > > +#define FIXED_0_KERNEL                                 BIT_ULL(0)
> > > > +#define FIXED_0_USER                                   BIT_ULL(1)
> > > > +#define FIXED_0_ANYTHREAD                              BIT_ULL(2)
> > > > +#define FIXED_0_ENABLE_PMI                             BIT_ULL(3)
> > > > +
> > > > +#define fixed_bits_by_idx(_idx, _bits)                 \
> > > > +       ((_bits) << ((_idx) * FIXED_BITS_STRIDE))
>
> *sigh*  And now I see where the "i * 4" stuff in the new test comes from.  My
> plan is to redo the above as:
>
> /* RDPMC offset for Fixed PMCs */
> #define FIXED_PMC_RDPMC_METRICS                 BIT_ULL(29)
> #define FIXED_PMC_RDPMC_BASE                    BIT_ULL(30)
>
> #define FIXED_PMC_GLOBAL_CTRL_ENABLE(_idx)      BIT_ULL((32 + (_idx)))
>
> #define FIXED_PMC_KERNEL                        BIT_ULL(0)
> #define FIXED_PMC_USER                          BIT_ULL(1)
> #define FIXED_PMC_ANYTHREAD                     BIT_ULL(2)
> #define FIXED_PMC_ENABLE_PMI                    BIT_ULL(3)
> #define FIXED_PMC_NR_BITS                       4
> #define FIXED_PMC_CTRL(_idx, _val)              ((_val) << ((_idx) * FIXED_PMC_NR_BITS))
>
> > > > +#define AMD64_NR_COUNTERS                              4
> > > > +#define AMD64_NR_COUNTERS_CORE                         6
>
> These too can be dropped for now.
>
> > > > +#define PMU_CAP_FW_WRITES                              BIT_ULL(13)
> > > > +#define PMU_CAP_LBR_FMT                                0x3f
> > > > +
> > > > +enum intel_pmu_architectural_events {
> > > > +       /*
> > > > +        * The order of the architectural events matters as support for each
> > > > +        * event is enumerated via CPUID using the index of the event.
> > > > +        */
> > > > +       INTEL_ARCH_CPU_CYCLES,
> > > > +       INTEL_ARCH_INSTRUCTIONS_RETIRED,
> > > > +       INTEL_ARCH_REFERENCE_CYCLES,
> > > > +       INTEL_ARCH_LLC_REFERENCES,
> > > > +       INTEL_ARCH_LLC_MISSES,
> > > > +       INTEL_ARCH_BRANCHES_RETIRED,
> > > > +       INTEL_ARCH_BRANCHES_MISPREDICTED,
> > > > +       NR_INTEL_ARCH_EVENTS,
> > > > +};
> > > > +
> > > > +enum amd_pmu_k7_events {
> > > > +       AMD_ZEN_CORE_CYCLES,
> > > > +       AMD_ZEN_INSTRUCTIONS,
> > > > +       AMD_ZEN_BRANCHES,
> > > > +       AMD_ZEN_BRANCH_MISSES,
> > > > +       NR_AMD_ARCH_EVENTS,
> > > > +};
> > > > +
> > > > +extern const uint64_t intel_pmu_arch_events[];
> > > > +extern const uint64_t amd_pmu_arch_events[];
> > >
> > > AMD doesn't define *any* architectural events. Perhaps
> > > amd_pmu_zen_events[], though who knows what Zen5 and  beyond will
> > > bring?
> > >
> > > > +extern const int intel_pmu_fixed_pmc_events[];
> > > > +
> > > > +#endif /* SELFTEST_KVM_PMU_H */
> > > > diff --git a/tools/testing/selftests/kvm/lib/pmu.c b/tools/testing/selftests/kvm/lib/pmu.c
> > > > new file mode 100644
> > > > index 000000000000..27a6c35f98a1
> > > > --- /dev/null
> > > > +++ b/tools/testing/selftests/kvm/lib/pmu.c
> > > > @@ -0,0 +1,28 @@
> > > > +// SPDX-License-Identifier: GPL-2.0-only
> > > > +/*
> > > > + * Copyright (C) 2023, Tencent, Inc.
> > > > + */
> > > > +
> > > > +#include <stdint.h>
> > > > +
> > > > +#include "pmu.h"
> > > > +
> > > > +/* Definitions for Architectural Performance Events */
> > > > +#define ARCH_EVENT(select, umask) (((select) & 0xff) | ((umask) & 0xff) << 8)
> > >
> > > There's nothing architectural about this. Perhaps RAW_EVENT() for
> > > consistency with perf?
>
> Works for me.
>
> > > > +const uint64_t intel_pmu_arch_events[] = {
> > > > +       [INTEL_ARCH_CPU_CYCLES]                 = ARCH_EVENT(0x3c, 0x0),
> > > > +       [INTEL_ARCH_INSTRUCTIONS_RETIRED]       = ARCH_EVENT(0xc0, 0x0),
> > > > +       [INTEL_ARCH_REFERENCE_CYCLES]           = ARCH_EVENT(0x3c, 0x1),
> > > > +       [INTEL_ARCH_LLC_REFERENCES]             = ARCH_EVENT(0x2e, 0x4f),
> > > > +       [INTEL_ARCH_LLC_MISSES]                 = ARCH_EVENT(0x2e, 0x41),
> > > > +       [INTEL_ARCH_BRANCHES_RETIRED]           = ARCH_EVENT(0xc4, 0x0),
> > > > +       [INTEL_ARCH_BRANCHES_MISPREDICTED]      = ARCH_EVENT(0xc5, 0x0),
> > >
> > > [INTEL_ARCH_TOPDOWN_SLOTS] = ARCH_EVENT(0xa4, 1),
>
> ...
>
> > > > @@ -63,7 +50,6 @@
> > > >
> > > >   #define AMD_ZEN_BR_RETIRED EVENT(0xc2, 0)
> > >
> > > Now AMD_ZEN_BRANCHES, above?
> >
> > Yes, I forgot to replace INTEL_BR_RETIRED, AMD_ZEN_BR_RETIRED and
> > INST_RETIRED in pmu_event_filter_test.c and remove their macro definitions.
>
> Having to go through an array to get a hardcoded value is silly, e.g. it makes
> it unnecessarily difficult to reference the encodings because they aren't simple
> literals.
>
> My vote is this:
>
> #define INTEL_ARCH_CPU_CYCLES                   RAW_EVENT(0x3c, 0x00)
> #define INTEL_ARCH_INSTRUCTIONS_RETIRED         RAW_EVENT(0xc0, 0x00)
> #define INTEL_ARCH_REFERENCE_CYCLES             RAW_EVENT(0x3c, 0x01)
> #define INTEL_ARCH_LLC_REFERENCES               RAW_EVENT(0x2e, 0x4f)
> #define INTEL_ARCH_LLC_MISSES                   RAW_EVENT(0x2e, 0x41)
> #define INTEL_ARCH_BRANCHES_RETIRED             RAW_EVENT(0xc4, 0x00)
> #define INTEL_ARCH_BRANCHES_MISPREDICTED        RAW_EVENT(0xc5, 0x00)
> #define INTEL_ARCH_TOPDOWN_SLOTS                RAW_EVENT(0xa4, 0x01)
>
> #define AMD_ZEN_CORE_CYCLES                     RAW_EVENT(0x76, 0x00)
> #define AMD_ZEN_INSTRUCTIONS_RETIRED            RAW_EVENT(0xc0, 0x00)
> #define AMD_ZEN_BRANCHES_RETIRED                RAW_EVENT(0xc2, 0x00)
> #define AMD_ZEN_BRANCHES_MISPREDICTED           RAW_EVENT(0xc3, 0x00)
>
> /*
>  * Note!  The order and thus the index of the architectural events matters as
>  * support for each event is enumerated via CPUID using the index of the event.
>  */
> enum intel_pmu_architectural_events {
>         INTEL_ARCH_CPU_CYCLES_INDEX,
>         INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX,
>         INTEL_ARCH_REFERENCE_CYCLES_INDEX,
>         INTEL_ARCH_LLC_REFERENCES_INDEX,
>         INTEL_ARCH_LLC_MISSES_INDEX,
>         INTEL_ARCH_BRANCHES_RETIRED_INDEX,
>         INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX,
>         INTEL_ARCH_TOPDOWN_SLOTS_INDEX,
>         NR_INTEL_ARCH_EVENTS,
> };
>
> enum amd_pmu_zen_events {
>         AMD_ZEN_CORE_CYCLES_INDEX,
>         AMD_ZEN_INSTRUCTIONS_INDEX,
>         AMD_ZEN_BRANCHES_INDEX,
>         AMD_ZEN_BRANCH_MISSES_INDEX,
>         NR_AMD_ZEN_EVENTS,
> };
>
> extern const uint64_t intel_pmu_arch_events[];
> extern const uint64_t amd_pmu_zen_events[];
>
> ...
>
>
> const uint64_t intel_pmu_arch_events[] = {
>         INTEL_ARCH_CPU_CYCLES,
>         INTEL_ARCH_INSTRUCTIONS_RETIRED,
>         INTEL_ARCH_REFERENCE_CYCLES,
>         INTEL_ARCH_LLC_REFERENCES,
>         INTEL_ARCH_LLC_MISSES,
>         INTEL_ARCH_BRANCHES_RETIRED,
>         INTEL_ARCH_BRANCHES_MISPREDICTED,
>         INTEL_ARCH_TOPDOWN_SLOTS,
> };
> kvm_static_assert(ARRAY_SIZE(intel_pmu_arch_events) == NR_INTEL_ARCH_EVENTS);
>
> const uint64_t amd_pmu_zen_events[] = {
>         AMD_ZEN_CORE_CYCLES,
>         AMD_ZEN_INSTRUCTIONS_RETIRED,
>         AMD_ZEN_BRANCHES_RETIRED,
>         AMD_ZEN_BRANCHES_MISPREDICTED,
> };
> kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS);

LGTM, thanks.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ