lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CABPqkBRpTPcJOohtujR6kk4tAHDFNCv24Ha6E0v1p5MRkPpy6A@mail.gmail.com>
Date:	Fri, 25 Jan 2013 16:30:37 +0100
From:	Stephane Eranian <eranian@...gle.com>
To:	Ingo Molnar <mingo@...nel.org>
Cc:	Michael Ellerman <michael@...erman.id.au>,
	Paul Mackerras <paulus@...ba.org>,
	Benjamin Herrenschmidt <benh@...nel.crashing.org>,
	Sukadev Bhattiprolu <sukadev@...ux.vnet.ibm.com>,
	Maynard Johnson <mpjohn@...ibm.com>,
	Anton Blanchard <anton@...ba.org>,
	LKML <linux-kernel@...r.kernel.org>,
	Peter Zijlstra <peterz@...radead.org>,
	"mingo@...e.hu" <mingo@...e.hu>,
	"ak@...ux.intel.com" <ak@...ux.intel.com>,
	Arnaldo Carvalho de Melo <acme@...hat.com>,
	Jiri Olsa <jolsa@...hat.com>,
	Namhyung Kim <namhyung.kim@....com>
Subject: Re: [PATCH v7 07/18] perf: add generic memory sampling interface

On Fri, Jan 25, 2013 at 10:01 AM, Ingo Molnar <mingo@...nel.org> wrote:
>
> * Stephane Eranian <eranian@...gle.com> wrote:
>
>> This patch adds PERF_SAMPLE_DSRC.
>>
>> PERF_SAMPLE_DSRC collects the data source, i.e., where
>> did the data associated with the sampled instruction
>> come from. Information is stored in a perf_mem_dsrc
>> structure. It contains opcode, mem level, tlb, snoop,
>> lock information, subject to availability in hardware.
>>
>> Signed-off-by: Stephane Eranian <eranian@...gle.com>
>> ---
>>  include/linux/perf_event.h      |    2 ++
>>  include/uapi/linux/perf_event.h |   68 +++++++++++++++++++++++++++++++++++++--
>>  kernel/events/core.c            |    6 ++++
>>  3 files changed, 74 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
>> index bb2429d..8fe4610 100644
>> --- a/include/linux/perf_event.h
>> +++ b/include/linux/perf_event.h
>> @@ -579,6 +579,7 @@ struct perf_sample_data {
>>               u32     reserved;
>>       }                               cpu_entry;
>>       u64                             period;
>> +     union  perf_mem_dsrc            dsrc;
>>       struct perf_callchain_entry     *callchain;
>>       struct perf_raw_record          *raw;
>>       struct perf_branch_stack        *br_stack;
>> @@ -599,6 +600,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
>>       data->regs_user.regs = NULL;
>>       data->stack_user_size = 0;
>>       data->weight = 0;
>> +     data->dsrc.val = 0;
>>  }
>>
>>  extern void perf_output_sample(struct perf_output_handle *handle,
>> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
>> index 3e6c394..3e4844c 100644
>> --- a/include/uapi/linux/perf_event.h
>> +++ b/include/uapi/linux/perf_event.h
>> @@ -133,9 +133,9 @@ enum perf_event_sample_format {
>>       PERF_SAMPLE_REGS_USER                   = 1U << 12,
>>       PERF_SAMPLE_STACK_USER                  = 1U << 13,
>>       PERF_SAMPLE_WEIGHT                      = 1U << 14,
>> +     PERF_SAMPLE_DSRC                        = 1U << 15,
>>
>> -     PERF_SAMPLE_MAX = 1U << 15,             /* non-ABI */
>> -
>> +     PERF_SAMPLE_MAX = 1U << 16,             /* non-ABI */
>>  };
>>
>>  /*
>> @@ -591,6 +591,7 @@ enum perf_event_type {
>>        *        u64                   dyn_size; } && PERF_SAMPLE_STACK_USER
>>        *
>>        *      { u64                   weight;   } && PERF_SAMPLE_WEIGHT
>> +      *      { u64                   dsrc;     } && PERF_SAMPLE_DSRC
>>        * };
>>        */
>>       PERF_RECORD_SAMPLE                      = 9,
>> @@ -616,4 +617,67 @@ enum perf_callchain_context {
>>  #define PERF_FLAG_FD_OUTPUT          (1U << 1)
>>  #define PERF_FLAG_PID_CGROUP         (1U << 2) /* pid=cgroup id, per-cpu mode only */
>>
>> +union perf_mem_dsrc {
>> +     __u64 val;
>> +     struct {
>> +             __u64   mem_op:5,       /* type of opcode */
>> +                     mem_lvl:14,     /* memory hierarchy level */
>> +                     mem_snoop:5,    /* snoop mode */
>> +                     mem_lock:2,     /* lock instr */
>> +                     mem_dtlb:7,     /* tlb access */
>> +                     mem_rsvd:31;
>> +     };
>> +};
>> +
>> +/* type of opcode (load/store/prefetch,code) */
>> +#define PERF_MEM_OP_NA               0x01 /* not available */
>> +#define PERF_MEM_OP_LOAD     0x02 /* load instruction */
>> +#define PERF_MEM_OP_STORE    0x04 /* store instruction */
>> +#define PERF_MEM_OP_PFETCH   0x08 /* prefetch */
>> +#define PERF_MEM_OP_EXEC     0x10 /* code (execution) */
>> +#define PERF_MEM_OP_SHIFT    0
>> +
>> +/* memory hierarchy (memory level, hit or miss) */
>> +#define PERF_MEM_LVL_NA              0x01  /* not available */
>> +#define PERF_MEM_LVL_HIT     0x02  /* hit level */
>> +#define PERF_MEM_LVL_MISS    0x04  /* miss level  */
>> +#define PERF_MEM_LVL_L1              0x08  /* L1 */
>> +#define PERF_MEM_LVL_LFB     0x10  /* Line Fill Buffer */
>> +#define PERF_MEM_LVL_L2              0x20  /* L2 hit */
>> +#define PERF_MEM_LVL_L3              0x40  /* L3 hit */
>> +#define PERF_MEM_LVL_LOC_RAM 0x80  /* Local DRAM */
>> +#define PERF_MEM_LVL_REM_RAM1        0x100 /* Remote DRAM (1 hop) */
>> +#define PERF_MEM_LVL_REM_RAM2        0x200 /* Remote DRAM (2 hops) */
>> +#define PERF_MEM_LVL_REM_CCE1        0x400 /* Remote Cache (1 hop) */
>> +#define PERF_MEM_LVL_REM_CCE2        0x800 /* Remote Cache (2 hops) */
>> +#define PERF_MEM_LVL_IO              0x1000 /* I/O memory */
>> +#define PERF_MEM_LVL_UNC     0x2000 /* Uncached memory */
>> +#define PERF_MEM_LVL_SHIFT   5
>> +
>> +/* snoop mode */
>> +#define PERF_MEM_SNOOP_NA    0x01 /* not available */
>> +#define PERF_MEM_SNOOP_NONE  0x02 /* no snoop */
>> +#define PERF_MEM_SNOOP_HIT   0x04 /* snoop hit */
>> +#define PERF_MEM_SNOOP_MISS  0x08 /* snoop miss */
>> +#define PERF_MEM_SNOOP_HITM  0x10 /* snoop hit modified */
>> +#define PERF_MEM_SNOOP_SHIFT 19
>> +
>> +/* locked instruction */
>> +#define PERF_MEM_LOCK_NA     0x01 /* not available */
>> +#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */
>> +#define PERF_MEM_LOCK_SHIFT  24
>> +
>> +/* TLB access */
>> +#define PERF_MEM_TLB_NA              0x01 /* not available */
>> +#define PERF_MEM_TLB_HIT     0x02 /* hit level */
>> +#define PERF_MEM_TLB_MISS    0x04 /* miss level */
>> +#define PERF_MEM_TLB_L1              0x08 /* L1 */
>> +#define PERF_MEM_TLB_L2              0x10 /* L2 */
>> +#define PERF_MEM_TLB_WK              0x20 /* Hardware Walker*/
>> +#define PERF_MEM_TLB_OS              0x40 /* OS fault handler */
>> +#define PERF_MEM_TLB_SHIFT   26
>> +
>> +#define PERF_MEM_S(a, s) \
>> +     (((u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
>> +
>
> Would be nice to get feedback from PowerPC folks to see how well
> this matches their memory profiling hw capabilities?
>
I agree, I tried to remain as generic as possible here but I probably
don't have all the possibilities covered. I remember IBM asking
me about the categories a long time ago. Haven't heard anything since then.

> I suspect there's a lot of differences, but one can always hope
> ...
>
> If there's some hope for unification we could at least shape it
> in a way that they could pick up and extend.
>
Agreed.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ