linux-kernel - Re: [PATCH 1/2] perf: Add persistent events

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <20100525073246.GA6085@liondog.tnic>
Date:	Tue, 25 May 2010 09:32:46 +0200
From:	Borislav Petkov <bp@...en8.de>
To:	Peter Zijlstra <peterz@...radead.org>
Cc:	Ingo Molnar <mingo@...e.hu>,
	Frederic Weisbecker <fweisbec@...il.com>,
	Steven Rostedt <rostedt@...dmis.org>,
	Arnaldo Carvalho de Melo <acme@...hat.com>,
	Lin Ming <ming.m.lin@...el.com>, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 1/2] perf: Add persistent events

From: Peter Zijlstra <peterz@...radead.org>
Date: Sun, May 23, 2010 at 09:23:21PM +0200

> Either we add some notifier thing, or we simply add an explicit call in
> the init sequence after the perf_event subsystem is running. I would
> suggest we start with some explicit call, and take it from there.

Ok, this couldn't be more straightforward. So I looked at the init
sequence we do when booting wrt to perf/ftrace initialization:

start_kernel
...
|-> sched_init
    |-> perf_event_init
...
|-> ftrace_init
rest_init
kernel_init
|-> do_pre_smp_initcalls
|...
|-> smp_int
|-> do_basic_setup
    |-> do_initcalls

and one of the convenient places after both perf is initialized and
ftrace has enumerated the tracepoints is do_initcalls() (It cannot be an
early_initcall since at that time we're not running SMP yet and we want
the MCE event per cpu.)

So I added a core_initcall that registers the mce perf event. This makes
it more or less a persistent event without any changes to the perf_event
subsystem. I guess this should work - at least it builds here, will give
it a run later.

As a further enhancement, the init-function should read out all the
logged mce events which survived the warm reboot and those which happen
between mce init and the actual event registration so that perf can
postprocess those too at a more convenient time.

Thanks.

---
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 8a6f0af..e3370a2 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -94,6 +94,7 @@ static char			*mce_helper_argv[2] = { mce_helper, NULL };
 
 static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
 static DEFINE_PER_CPU(struct mce, mces_seen);
+static DEFINE_PER_CPU(struct perf_event *, mce_event);
 static int			cpu_missing;
 
 /*
@@ -1996,6 +1997,60 @@ static void __cpuinit mce_reenable_cpu(void *h)
 	}
 }
 
+struct perf_event_attr pattr = {
+	.type	= PERF_TYPE_TRACEPOINT,
+	.size	= sizeof(pattr),
+};
+
+static int mcheck_enable_perf_event_on_cpu(int cpu)
+{
+	struct perf_event *event;
+
+	pattr.config = event_mce_record.id;
+
+	event = perf_event_create_kernel_counter(&pattr, cpu, -1, NULL);
+	if (IS_ERR(event))
+		return -EINVAL;
+
+	perf_event_enable(event);
+	per_cpu(mce_event, cpu) = event;
+
+	return 0;
+}
+
+static void mcheck_disable_perf_event_on_cpu(int cpu)
+{
+	struct perf_event *event = per_cpu(mce_event, cpu);
+
+	if (!event)
+		return;
+
+	perf_event_disable(event);
+	per_cpu(mce_event, cpu) = NULL;
+	perf_event_release_kernel(event);
+}
+
+static int mcheck_init_perf_event(void)
+{
+	int cpu, err;
+
+	get_online_cpus();
+
+	for_each_online_cpu(cpu) {
+		err = mcheck_enable_perf_event_on_cpu(cpu);
+		if (err) {
+			printk(KERN_ERR "mce: error initializing mce tracepoint"
+					" on cpu %d\n", cpu);
+			return err;
+		}
+	}
+
+	put_online_cpus();
+
+	return 0;
+}
+core_initcall(mcheck_init_perf_event);
+
 /* Get notified when a cpu comes on/off. Be hotplug friendly. */
 static int __cpuinit
 mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
@@ -2009,6 +2064,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 		mce_create_device(cpu);
 		if (threshold_cpu_callback)
 			threshold_cpu_callback(action, cpu);
+		mcheck_enable_perf_event_on_cpu(cpu);
 		break;
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
@@ -2020,6 +2076,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 	case CPU_DOWN_PREPARE_FROZEN:
 		del_timer_sync(t);
 		smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
+		mcheck_disable_perf_event_on_cpu(cpu);
 		break;
 	case CPU_DOWN_FAILED:
 	case CPU_DOWN_FAILED_FROZEN:
@@ -2029,6 +2086,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 			add_timer_on(t, cpu);
 		}
 		smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
+		mcheck_enable_perf_event_on_cpu(cpu);
 		break;
 	case CPU_POST_DEAD:
 		/* intentionally ignoring frozen here */

-- 
Regards/Gruss,
    Boris.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/