[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZPMTVNM2oBCdSYjJ@gmail.com>
Date: Sat, 2 Sep 2023 12:49:56 +0200
From: Ingo Molnar <mingo@...nel.org>
To: Yi Sun <yi.sun@...el.com>
Cc: dave.hansen@...el.com, tglx@...utronix.de,
linux-kernel@...r.kernel.org, x86@...nel.org,
sohil.mehta@...el.com, ak@...ux.intel.com,
ilpo.jarvinen@...ux.intel.com, heng.su@...el.com,
tony.luck@...el.com, dave.hansen@...ux.intel.com,
yi.sun@...el.intel.com
Subject: Re: [PATCH v6 1/3] x86/fpu: Measure the Latency of XSAVES and XRSTORS
* Yi Sun <yi.sun@...el.com> wrote:
> +#define XSTATE_XSAVE(fps, lmask, hmask, err) \
> + do { \
> + struct fpstate *f = fps; \
> + u64 tc = -1; \
> + if (xsave_tracing_enabled()) \
> + tc = trace_clock(); \
> + __XSTATE_XSAVE(&f->regs.xsave, lmask, hmask, err); \
> + if (xsave_tracing_enabled()) \
> + trace_x86_fpu_latency_xsave(f, trace_clock() - tc);\
> + } while (0)
> +
> /*
> * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact
> * XSAVE area format.
> */
> -#define XSTATE_XRESTORE(st, lmask, hmask) \
> +#define __XSTATE_XRESTORE(st, lmask, hmask) \
> asm volatile(ALTERNATIVE(XRSTOR, \
> XRSTORS, X86_FEATURE_XSAVES) \
> "\n" \
> @@ -140,6 +168,17 @@ static inline u64 xfeatures_mask_independent(void)
> : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
> : "memory")
>
> +#define XSTATE_XRESTORE(fps, lmask, hmask) \
> + do { \
> + struct fpstate *f = fps; \
> + u64 tc = -1; \
> + if (xrstor_tracing_enabled()) \
> + tc = trace_clock(); \
> + __XSTATE_XRESTORE(&f->regs.xsave, lmask, hmask); \
> + if (xrstor_tracing_enabled()) \
> + trace_x86_fpu_latency_xrstor(f, trace_clock() - tc);\
> + } while (0)
> +
> #if defined(CONFIG_X86_64) && defined(CONFIG_X86_DEBUG_FPU)
> extern void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor);
> #else
> @@ -184,7 +223,7 @@ static inline void os_xsave(struct fpstate *fpstate)
> WARN_ON_FPU(!alternatives_patched);
> xfd_validate_state(fpstate, mask, false);
>
> - XSTATE_XSAVE(&fpstate->regs.xsave, lmask, hmask, err);
> + XSTATE_XSAVE(fpstate, lmask, hmask, err);
>
> /* We should never fault when copying to a kernel buffer: */
> WARN_ON_FPU(err);
> @@ -201,7 +240,7 @@ static inline void os_xrstor(struct fpstate *fpstate, u64 mask)
> u32 hmask = mask >> 32;
>
> xfd_validate_state(fpstate, mask, true);
> - XSTATE_XRESTORE(&fpstate->regs.xsave, lmask, hmask);
> + XSTATE_XRESTORE(fpstate, lmask, hmask);
> }
Instead of adding overhead to the regular FPU context saving/restoring code
paths, could you add a helper function that has tracing code included, but
which isn't otherwise used - and leave the regular code with no tracing
overhead?
This puts a bit of a long-term maintenance focus on making sure that the
traced functionality won't bitrot, but I'd say that's preferable to adding
tracing overhead.
Thanks,
Ingo
Powered by blists - more mailing lists