lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 5 Aug 2020 14:44:54 +0200
From:   peterz@...radead.org
To:     "Jin, Yao" <yao.jin@...ux.intel.com>
Cc:     mingo@...hat.com, oleg@...hat.com, acme@...nel.org,
        jolsa@...nel.org, Linux-kernel@...r.kernel.org, ak@...ux.intel.com,
        kan.liang@...el.com, yao.jin@...el.com,
        alexander.shishkin@...ux.intel.com, mark.rutland@....com
Subject: Re: [PATCH v1 2/2] perf/core: Fake regs for leaked kernel samples

On Wed, Aug 05, 2020 at 10:15:26AM +0800, Jin, Yao wrote:
> Hi Peter,
> 
> On 8/4/2020 7:49 PM, peterz@...radead.org wrote:
> > On Fri, Jul 31, 2020 at 10:56:17AM +0800, Jin Yao wrote:
> > > @@ -6973,7 +6973,8 @@ static struct perf_callchain_entry __empty_callchain = { .nr = 0, };
> > >   struct perf_callchain_entry *
> > >   perf_callchain(struct perf_event *event, struct pt_regs *regs)
> > >   {
> > > -	bool kernel = !event->attr.exclude_callchain_kernel;
> > > +	bool kernel = !event->attr.exclude_callchain_kernel &&
> > > +		      !event->attr.exclude_kernel;
> > 
> > This seems weird; how can we get there. Also it seems to me that if you
> > have !exclude_callchain_kernel you already have permission for kernel
> > bits, so who cares.
> > 
> 
> In perf tool, exclude_callchain_kernel is set to 1 when perf-record only
> collects the user callchains and exclude_kernel is set to 1 when events are
> configured to run in user space.
> 
> So if an event is configured to run in user space, that should make sense we
> don't need it's kernel callchains.
> 
> But it seems to me there is no code logic in perf tool which can make sure
> !exclude_callchain_kernel -> !exclude_kernel.
> 
> Jiri, Arnaldo, is my understanding correct?

What the perf tool does or does not do is irrelevant. It is a valid,
(albeit slightly silly) configuration to have:

	exclude_kernel && !exclude_callchain_kernel

You're now saying that when you configure things like this you're not
allowed kernel IPs, that's wrong I think.

Also, !exclude_callchain_kernel should require privilidge, whcih needs
fixing, see below.

> So the new code looks like:
> 
> if (event->attr.exclude_kernel && !user_mode(regs)) {
> 	if (!(current->flags & PF_KTHREAD)) {
> 		regs_fake = task_pt_regs(current);
> 		if (!regs_fake)
> 			instruction_pointer_set(regs, -1L);
> 	} else {
> 		instruction_pointer_set(regs, -1L);
> 	}

Again:

	if (!(current->flags & PF_KTHREAD))
		regs_fake = task_pt_regs(current);

	if (!regs_fake)
		instruction_pointer_set(regs, -1L);

Is much simpler and more readable.

> > > +		if ((header->misc & PERF_RECORD_MISC_CPUMODE_MASK) ==
> > > +		     PERF_RECORD_MISC_KERNEL) {
> > > +			header->misc &= ~PERF_RECORD_MISC_CPUMODE_MASK;
> > > +			header->misc |= PERF_RECORD_MISC_USER;
> > > +		}
> > 
> > Why the conditional? At this point it had better be unconditionally
> > user, no?
> > 
> > 		headers->misc &= ~PERF_RECORD_MISC_CPUMODE_MASK;
> > 		headers->misc |= PERF_RECORD_MISC_USER;
> > 
> 
> #define PERF_RECORD_MISC_CPUMODE_MASK		(7 << 0)
> #define PERF_RECORD_MISC_CPUMODE_UNKNOWN	(0 << 0)
> #define PERF_RECORD_MISC_KERNEL			(1 << 0)
> #define PERF_RECORD_MISC_USER			(2 << 0)
> #define PERF_RECORD_MISC_HYPERVISOR		(3 << 0)
> #define PERF_RECORD_MISC_GUEST_KERNEL		(4 << 0)
> #define PERF_RECORD_MISC_GUEST_USER		(5 << 0)
> 
> If we unconditionally set user, it will reset for hypervisor, guest
> kernel and guest_user.

At the same time :u had better not get any of those either. Which seems
to suggest we're going about this wrong.

Also, if we call this before perf_misc_flags() we don't need to fix it
up.

How's this?

---
 kernel/events/core.c | 38 +++++++++++++++++++++++++++++++++-----
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 7c436d705fbd..3e4e328b521a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6988,23 +6988,49 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
 	return callchain ?: &__empty_callchain;
 }
 
+/*
+ * Due to interrupt latency (skid), we may enter the kernel before taking the
+ * PMI, even if the PMU is configured to only count user events. To avoid
+ * leaking kernel addresses, use task_pt_regs(), when available.
+ */
+static struct pt_regs *sanitize_sample_regs(struct perf_event *event, struct pt_regs *regs)
+{
+	struct pt_regs *sample_regs = regs;
+
+	/* user only */
+	if (!event->attr.exclude_kernel || !event->attr.exclude_hv ||
+	    !event->attr.exclude_host   || !event->attr.exclude_guest)
+		return sample_regs;
+
+	if (sample_regs(regs))
+		return sample_regs;
+
+	if (!(current->flags & PF_KTHREAD)) {
+		sample_regs = task_pt_regs(current);
+	else
+		instruction_pointer_set(regs, -1L);
+
+	return sample_regs;
+}
+
 void perf_prepare_sample(struct perf_event_header *header,
 			 struct perf_sample_data *data,
 			 struct perf_event *event,
 			 struct pt_regs *regs)
 {
+	struct pt_regs *sample_regs = sanitize_sample_regs(event, regs);
 	u64 sample_type = event->attr.sample_type;
 
 	header->type = PERF_RECORD_SAMPLE;
 	header->size = sizeof(*header) + event->header_size;
 
 	header->misc = 0;
-	header->misc |= perf_misc_flags(regs);
+	header->misc |= perf_misc_flags(sample_regs);
 
 	__perf_event_header__init_id(header, data, event);
 
 	if (sample_type & PERF_SAMPLE_IP)
-		data->ip = perf_instruction_pointer(regs);
+		data->ip = perf_instruction_pointer(sample_regs);
 
 	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
 		int size = 1;
@@ -7054,9 +7080,10 @@ void perf_prepare_sample(struct perf_event_header *header,
 		header->size += size;
 	}
 
-	if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER))
+	if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER)) {
 		perf_sample_regs_user(&data->regs_user, regs,
 				      &data->regs_user_copy);
+	}
 
 	if (sample_type & PERF_SAMPLE_REGS_USER) {
 		/* regs dump ABI info */
@@ -7099,7 +7126,7 @@ void perf_prepare_sample(struct perf_event_header *header,
 		/* regs dump ABI info */
 		int size = sizeof(u64);
 
-		perf_sample_regs_intr(&data->regs_intr, regs);
+		perf_sample_regs_intr(&data->regs_intr, sample_regs);
 
 		if (data->regs_intr.regs) {
 			u64 mask = event->attr.sample_regs_intr;
@@ -11609,7 +11636,8 @@ SYSCALL_DEFINE5(perf_event_open,
 	if (err)
 		return err;
 
-	if (!attr.exclude_kernel) {
+	if (!attr.exclude_kernel || !attr.exclude_callchain_kernel ||
+	    !attr.exclude_hv || !attr.exclude_host || !attr.exclude_guest) {
 		err = perf_allow_kernel(&attr);
 		if (err)
 			return err;

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ