lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Sun, 22 Mar 2020 17:14:47 -0700
From:   Ian Rogers <irogers@...gle.com>
To:     Peter Zijlstra <peterz@...radead.org>
Cc:     Ingo Molnar <mingo@...hat.com>,
        Arnaldo Carvalho de Melo <acme@...nel.org>,
        Mark Rutland <mark.rutland@....com>,
        Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
        Jiri Olsa <jolsa@...hat.com>,
        Namhyung Kim <namhyung@...nel.org>,
        Andi Kleen <ak@...ux.intel.com>,
        Kan Liang <kan.liang@...ux.intel.com>,
        LKML <linux-kernel@...r.kernel.org>,
        Stephane Eranian <eranian@...gle.com>
Subject: Re: [PATCH v2] perf test x86: address multiplexing in rdpmc test

On Sun, Mar 22, 2020 at 3:18 AM Peter Zijlstra <peterz@...radead.org> wrote:
>
> On Sat, Mar 21, 2020 at 10:37:10AM -0700, Ian Rogers wrote:
>
> > +static u64 mmap_read_self(void *addr, u64 *running)
> >  {
> >       struct perf_event_mmap_page *pc = addr;
> >       u32 seq, idx, time_mult = 0, time_shift = 0;
> > -     u64 count, cyc = 0, time_offset = 0, enabled, running, delta;
> > +     u64 count, cyc = 0, time_offset = 0, enabled, delta;
> >
> >       do {
> >               seq = pc->lock;
> >               barrier();
> >
> >               enabled = pc->time_enabled;
> > -             running = pc->time_running;
> > -
> > -             if (enabled != running) {
> > +             *running = pc->time_running;
> > +
> > +             if (*running == 0) {
> > +                     /*
> > +                      * Counter won't have a value as due to multiplexing the
> > +                      * event wasn't scheduled.
> > +                      */
> > +                     return 0;
> > +             }
>
> I still think adding code for an error case here is a bad idea. And only
> passing running as an argument is inconsistent.
>
> Also, then I had a look at what the compiler made of that function and
> cried.
>
> Here's something a little better. Much of it copied from linux/math64.h
> and asm/div64.h.
>
> ---
> diff --git a/tools/perf/arch/x86/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c
> index 1ea916656a2d..386a6dacb21e 100644
> --- a/tools/perf/arch/x86/tests/rdpmc.c
> +++ b/tools/perf/arch/x86/tests/rdpmc.c
> @@ -34,20 +34,98 @@ static u64 rdtsc(void)
>         return low | ((u64)high) << 32;
>  }
>
> -static u64 mmap_read_self(void *addr)
> +#ifdef __x86_64__
> +static inline u64 mul_u64_u64_div64(u64 a, u64 b, u64 c)
> +{
> +       u64 q;
> +
> +       asm ("mulq %2; divq %3" : "=a" (q)
> +                               : "a" (a), "rm" (b), "rm" (c)
> +                               : "rdx");
> +
> +       return q;
> +}
> +#define mul_u64_u64_div64 mul_u64_u64_div64
> +#endif
> +
> +#ifdef __SIZEOF_INT128__
> +
> +static inline u64 mul_u64_u32_shr(u64 a, u32 b, unsigned int shift)
> +{
> +       return (u64)(((unsigned __int128)a * b) >> shift);
> +}
> +
> +#ifndef mul_u64_u64_div64
> +static inline u64 mul_u64_u64_div64(u64 a, u64 b, u64 c)
> +{
> +       unsigned __int128 m = a;
> +       m *= b;
> +       return m / c;
> +}
> +#endif
> +
> +#else
> +
> +#ifdef __i386__
> +static inline u64 mul_u32_u32(u32 a, u32 b)
> +{
> +       u32 high, low;
> +
> +       asm ("mull %[b]" : "=a" (low), "=d" (high)
> +                        : [a] "a" (a), [b] "rm" (b) );
> +
> +       return low | ((u64)high) << 32;
> +}
> +#else
> +static inline u64 mul_u32_u32(u32 a, u32 b)
> +{
> +       return (u64)a * b;
> +}
> +#endif
> +
> +static inline u64 mul_u64_u32_shr(u64 a, u32 b, unsigned int shift)
> +{
> +       u32 ah, al;
> +       u64 ret;
> +
> +       al = a;
> +       ah = a >> 32;
> +
> +       ret = mul_u32_u32(al, mul) >> shift;
> +       if (ah)
> +               ret += mul_u32_u32(ah, mul) << (32 - shift);
> +
> +       return ret;
> +}
> +
> +#ifndef mul_u64_u64_div64
> +static inline u64 mul_u64_u64_div64(u64 a, u64 b, u64 c)
> +{
> +       u64 quot, rem;
> +
> +       quot = a / c;
> +       rem = a % c;
> +
> +       return qout * b + (rem * b) / c;
> +}
> +#endif
> +
> +#endif
> +
> +static u64 mmap_read_self(void *addr, u64 *enabled, u64 *running)
>  {
>         struct perf_event_mmap_page *pc = addr;
>         u32 seq, idx, time_mult = 0, time_shift = 0;
> -       u64 count, cyc = 0, time_offset = 0, enabled, running, delta;
> +       u64 count, cyc = 0, time_offset = 0;
>
>         do {
>                 seq = pc->lock;
>                 barrier();
>
> -               enabled = pc->time_enabled;
> -               running = pc->time_running;
> +               *enabled = pc->time_enabled;
> +               *running = pc->time_running;
>
> -               if (enabled != running) {
> +               if (*enabled != *running) {
>                         cyc = rdtsc();
>                         time_mult = pc->time_mult;
>                         time_shift = pc->time_shift;
> @@ -62,21 +140,13 @@ static u64 mmap_read_self(void *addr)
>                 barrier();
>         } while (pc->lock != seq);
>
> -       if (enabled != running) {
> -               u64 quot, rem;
> -
> -               quot = (cyc >> time_shift);
> -               rem = cyc & (((u64)1 << time_shift) - 1);
> -               delta = time_offset + quot * time_mult +
> -                       ((rem * time_mult) >> time_shift);
> -
> -               enabled += delta;
> +       if (*enabled != *running) {
> +               u64 delta = time_offset + mul_u64_u32_shr(cyc, time_mult, time_shift);
> +               *enabled += delta;
>                 if (idx)
> -                       running += delta;
> +                       *running += delta;
>
> -               quot = count / running;
> -               rem = count % running;
> -               count = quot * enabled + (rem * enabled) / running;
> +               count = mul_u64_u64_div64(count, *enabled, *running);

*running may be 0 here, because of multiplexing, and so it will yield a SIGFPE.

>         }
>
>         return count;
> @@ -130,14 +200,18 @@ static int __test__rdpmc(void)
>         }
>
>         for (n = 0; n < 6; n++) {
> -               u64 stamp, now, delta;
> +               u64 stamp, now, delta, enabled, running;
>
> -               stamp = mmap_read_self(addr);
> +               stamp = mmap_read_self(addr, &enabled, &running);
>
>                 for (i = 0; i < loops; i++)
>                         tmp++;
>
> -               now = mmap_read_self(addr);
> +               now = mmap_read_self(addr, &enabled, &running);
> +
> +               if (enabled && !running)
> +                       goto out_error;
> +
>                 loops *= 10;
>
>                 delta = now - stamp;
> @@ -155,6 +229,11 @@ static int __test__rdpmc(void)
>                 return -1;
>
>         return 0;
> +
> +out_error:
> +       close(fd);
> +       pr_err("counter never ran; you loose\n");
> +       return -1;

I'd prefer to retry in this case as I'm not running the test on an
isolated machine. Perhaps -2 (TEST_SKIP) rather than -1 (TEST_FAIL),
some kind of resource not available error would be better.

Thanks,
Ian

>  }
>
>  int test__rdpmc(struct test *test __maybe_unused, int subtest __maybe_unused)
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ