lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1365185813.25942.12.camel@hornet>
Date:	Fri, 05 Apr 2013 19:16:53 +0100
From:	Pawel Moll <pawel.moll@....com>
To:	John Stultz <john.stultz@...aro.org>
Cc:	Peter Zijlstra <peterz@...radead.org>,
	David Ahern <dsahern@...il.com>,
	Stephane Eranian <eranian@...gle.com>,
	Thomas Gleixner <tglx@...utronix.de>,
	LKML <linux-kernel@...r.kernel.org>,
	"mingo@...e.hu" <mingo@...e.hu>, Paul Mackerras <paulus@...ba.org>,
	Anton Blanchard <anton@...ba.org>,
	Will Deacon <Will.Deacon@....com>,
	"ak@...ux.intel.com" <ak@...ux.intel.com>,
	Pekka Enberg <penberg@...il.com>,
	Steven Rostedt <rostedt@...dmis.org>,
	Robert Richter <robert.richter@....com>,
	Richard Cochran <richardcochran@...il.com>
Subject: Re: [RFC] perf: need to expose sched_clock to correlate user
 samples with kernel samples

On Thu, 2013-04-04 at 17:29 +0100, Pawel Moll wrote:
> > Maybe can we extend the dynamic posix clock code to work on more then 
> > just the chardev? 
> 
> The idea I'm following now is to make the dynamic clock framework even
> more generic, so there could be a clock associated with an arbitrary
> struct file * (the perf syscall is getting one with
> anon_inode_getfile()). I don't know how to get this done yet, but I'll
> give it a try and report.

Ok, so how about the code below? Disclaimer: this is just a proposal.
I'm not sure how welcomed would be an extra field in struct file, but
this makes the clocks ultimately flexible - one can "attach" the clock
to any arbitrary struct file. Alternatively we could mark a "clocked"
file with a special flag in f_mode and have some kind of lookup.

Also, I can't stop thinking that the posix-clock.c shouldn't actually do
anything about the character device... The PTP core (as the model of
using character device seems to me just one of possible choices) could
do this on its own and have simple open/release attaching/detaching the
clock. This would remove a lot of "generic dev" code in the
posix-clock.c and all the optional cdev methods in struct posix_clock.
It's just a thought, though...

And a couple of questions to Richard... Isn't the kref_put() in
posix_clock_unregister() a bug? I'm not 100% but it looks like a simple
register->unregister sequence was making the ref count == -1, so the
delete_clock() won't be called. And was there any particular reason that
the ops in struct posix_clock are *not* a pointer? This makes static
clock declaration a bit cumbersome (I'm not a C language lawyer, but gcc
doesn't let me do simply .ops = other_static_struct_with_ops).

Regards

Pawel

8<-------------------------------------------
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2c28271..4090500 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -804,6 +804,9 @@ struct file {
 #ifdef CONFIG_DEBUG_WRITECOUNT
 	unsigned long f_mnt_write_state;
 #endif
+
+	/* for clock_gettime(FD_TO_CLOCKID(fd)) and friends */
+	struct posix_clock	*posix_clock;
 };
 
 struct file_handle {
diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h
index 34c4498..85df2c5 100644
--- a/include/linux/posix-clock.h
+++ b/include/linux/posix-clock.h
@@ -123,6 +123,10 @@ struct posix_clock {
 	void (*release)(struct posix_clock *clk);
 };
 
+void posix_clock_init(struct posix_clock *clk);
+void posix_clock_attach(struct posix_clock *clk, struct file *fp);
+void posix_clock_detach(struct file *fp);
+
 /**
  * posix_clock_register() - register a new clock
  * @clk:   Pointer to the clock. Caller must provide 'ops' and 'release'
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b0cd865..0b70ad1 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -34,6 +34,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/kernel_stat.h>
 #include <linux/perf_event.h>
+#include <linux/posix-clock.h>
 #include <linux/ftrace_event.h>
 #include <linux/hw_breakpoint.h>
 #include <linux/mm_types.h>
@@ -627,6 +628,25 @@ perf_cgroup_mark_enabled(struct perf_event *event,
 }
 #endif
 
+static int perf_posix_clock_getres(struct posix_clock *pc, struct timespec *tp)
+{
+	*tp = ns_to_timespec(TICK_NSEC);
+	return 0;
+}
+
+static int perf_posix_clock_gettime(struct posix_clock *pc, struct timespec *tp)
+{
+	*tp = ns_to_timespec(perf_clock());
+	return 0;
+}
+
+static struct posix_clock perf_posix_clock = {
+	.ops = (struct posix_clock_operations) {
+		.clock_getres = perf_posix_clock_getres,
+		.clock_gettime = perf_posix_clock_gettime,
+	},
+};
+
 void perf_pmu_disable(struct pmu *pmu)
 {
 	int *count = this_cpu_ptr(pmu->pmu_disable_count);
@@ -2992,6 +3012,7 @@ static void put_event(struct perf_event *event)
 
 static int perf_release(struct inode *inode, struct file *file)
 {
+	posix_clock_detach(file);
 	put_event(file->private_data);
 	return 0;
 }
@@ -6671,6 +6692,7 @@ SYSCALL_DEFINE5(perf_event_open,
 	 * perf_group_detach().
 	 */
 	fdput(group);
+	posix_clock_attach(&perf_posix_clock, event_file);
 	fd_install(event_fd, event_file);
 	return event_fd;
 
@@ -7416,6 +7438,8 @@ void __init perf_event_init(void)
 	 */
 	BUILD_BUG_ON((offsetof(struct perf_event_mmap_page, data_head))
 		     != 1024);
+
+	posix_clock_init(&perf_posix_clock);
 }
 
 static int __init perf_event_sysfs_init(void)
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
index ce033c7..525fa44 100644
--- a/kernel/time/posix-clock.c
+++ b/kernel/time/posix-clock.c
@@ -25,14 +25,44 @@
 #include <linux/syscalls.h>
 #include <linux/uaccess.h>
 
-static void delete_clock(struct kref *kref);
+void posix_clock_init(struct posix_clock *clk)
+{
+	kref_init(&clk->kref);
+	init_rwsem(&clk->rwsem);
+}
+EXPORT_SYMBOL_GPL(posix_clock_init);
+
+void posix_clock_attach(struct posix_clock *clk, struct file *fp)
+{
+	kref_get(&clk->kref);
+	fp->posix_clock = clk;
+}
+EXPORT_SYMBOL_GPL(posix_clock_attach);
+
+static void delete_clock(struct kref *kref)
+{
+	struct posix_clock *clk = container_of(kref, struct posix_clock, kref);
+
+	if (clk->release)
+		clk->release(clk);
+}
+
+void posix_clock_detach(struct file *fp)
+{
+	kref_put(&fp->posix_clock->kref, delete_clock);
+	fp->posix_clock = NULL;
+}
+EXPORT_SYMBOL_GPL(posix_clock_detach);
 
 /*
  * Returns NULL if the posix_clock instance attached to 'fp' is old and stale.
  */
 static struct posix_clock *get_posix_clock(struct file *fp)
 {
-	struct posix_clock *clk = fp->private_data;
+	struct posix_clock *clk = fp->posix_clock;
+
+	if (!clk)
+		return NULL;
 
 	down_read(&clk->rwsem);
 
@@ -167,10 +197,8 @@ static int posix_clock_open(struct inode *inode, struct file *fp)
 	else
 		err = 0;
 
-	if (!err) {
-		kref_get(&clk->kref);
-		fp->private_data = clk;
-	}
+	if (!err)
+		posix_clock_attach(clk, fp);
 out:
 	up_read(&clk->rwsem);
 	return err;
@@ -178,15 +206,13 @@ out:
 
 static int posix_clock_release(struct inode *inode, struct file *fp)
 {
-	struct posix_clock *clk = fp->private_data;
+	struct posix_clock *clk = fp->posix_clock;
 	int err = 0;
 
 	if (clk->ops.release)
 		err = clk->ops.release(clk);
 
-	kref_put(&clk->kref, delete_clock);
-
-	fp->private_data = NULL;
+	posix_clock_detach(fp);
 
 	return err;
 }
@@ -210,8 +236,7 @@ int posix_clock_register(struct posix_clock *clk, dev_t devid)
 {
 	int err;
 
-	kref_init(&clk->kref);
-	init_rwsem(&clk->rwsem);
+	posix_clock_init(clk);
 
 	cdev_init(&clk->cdev, &posix_clock_file_operations);
 	clk->cdev.owner = clk->ops.owner;
@@ -221,14 +246,6 @@ int posix_clock_register(struct posix_clock *clk, dev_t devid)
 }
 EXPORT_SYMBOL_GPL(posix_clock_register);
 
-static void delete_clock(struct kref *kref)
-{
-	struct posix_clock *clk = container_of(kref, struct posix_clock, kref);
-
-	if (clk->release)
-		clk->release(clk);
-}
-
 void posix_clock_unregister(struct posix_clock *clk)
 {
 	cdev_del(&clk->cdev);
@@ -249,22 +266,19 @@ struct posix_clock_desc {
 static int get_clock_desc(const clockid_t id, struct posix_clock_desc *cd)
 {
 	struct file *fp = fget(CLOCKID_TO_FD(id));
-	int err = -EINVAL;
 
 	if (!fp)
-		return err;
-
-	if (fp->f_op->open != posix_clock_open || !fp->private_data)
-		goto out;
+		return -EINVAL;
 
 	cd->fp = fp;
 	cd->clk = get_posix_clock(fp);
 
-	err = cd->clk ? 0 : -ENODEV;
-out:
-	if (err)
+	if (!cd->clk) {
 		fput(fp);
-	return err;
+		return -ENODEV;
+	}
+
+	return 0;
 }
 
 static void put_clock_desc(struct posix_clock_desc *cd)




--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ