linux-kernel - [take37 8/10] kevent: Kevent posix timer notifications.

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <11721525053583@2ka.mipt.ru>
Date:	Thu, 22 Feb 2007 16:55:05 +0300
From:	Evgeniy Polyakov <johnpol@....mipt.ru>
To:	Evgeniy Polyakov <johnpol@....mipt.ru>
Cc:	David Miller <davem@...emloft.net>,
	Ulrich Drepper <drepper@...hat.com>,
	Andrew Morton <akpm@...l.org>,
	Evgeniy Polyakov <johnpol@....mipt.ru>,
	netdev <netdev@...r.kernel.org>,
	Zach Brown <zach.brown@...cle.com>,
	Christoph Hellwig <hch@...radead.org>,
	Chase Venters <chase.venters@...entec.com>,
	Johann Borck <johann.borck@...sedata.com>,
	linux-kernel@...r.kernel.org, Jeff Garzik <jeff@...zik.org>,
	Jamal Hadi Salim <hadi@...erus.ca>,
	Ingo Molnar <mingo@...e.hu>
Subject: [take37 8/10] kevent: Kevent posix timer notifications.


Kevent posix timer notifications.

Simple extension to POSIX timers which allows
to deliver notification of the timer expiration
through kevent queue.

Example application posix_timer.c can be found
in archive on project homepage.

Signed-off-by: Evgeniy Polyakov <johnpol@....mipt.ru>


diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h
index 8786e01..3768746 100644
--- a/include/asm-generic/siginfo.h
+++ b/include/asm-generic/siginfo.h
@@ -235,6 +235,7 @@ typedef struct siginfo {
 #define SIGEV_NONE	1	/* other notification: meaningless */
 #define SIGEV_THREAD	2	/* deliver via thread creation */
 #define SIGEV_THREAD_ID 4	/* deliver to thread */
+#define SIGEV_KEVENT	8	/* deliver through kevent queue */
 
 /*
  * This works because the alignment is ok on all current architectures
@@ -260,6 +261,8 @@ typedef struct sigevent {
 			void (*_function)(sigval_t);
 			void *_attribute;	/* really pthread_attr_t */
 		} _sigev_thread;
+
+		int kevent_fd;
 	} _sigev_un;
 } sigevent_t;
 
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index a7dd38f..4b9deb4 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -4,6 +4,7 @@
 #include <linux/spinlock.h>
 #include <linux/list.h>
 #include <linux/sched.h>
+#include <linux/kevent_storage.h>
 
 union cpu_time_count {
 	cputime_t cpu;
@@ -49,6 +50,9 @@ struct k_itimer {
 	sigval_t it_sigev_value;	/* value word of sigevent struct */
 	struct task_struct *it_process;	/* process to send signal to */
 	struct sigqueue *sigq;		/* signal queue entry. */
+#ifdef CONFIG_KEVENT_TIMER
+	struct kevent_storage st;
+#endif
 	union {
 		struct {
 			struct hrtimer timer;
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 44318ca..ce9e357 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -48,6 +48,8 @@
 #include <linux/wait.h>
 #include <linux/workqueue.h>
 #include <linux/module.h>
+#include <linux/kevent.h>
+#include <linux/file.h>
 
 /*
  * Management arrays for POSIX timers.	 Timers are kept in slab memory
@@ -224,6 +226,100 @@ static int posix_ktime_get_ts(clockid_t which_clock, struct timespec *tp)
 	return 0;
 }
 
+#ifdef CONFIG_KEVENT_TIMER
+static int posix_kevent_enqueue(struct kevent *k)
+{
+	/*
+	 * It is not ugly - there is no pointer in the id field union, 
+	 * but its size is 64bits, which is ok for any known pointer size.
+	 */
+	struct k_itimer *tmr = (struct k_itimer *)(unsigned long)k->event.id.raw_u64;
+	return kevent_storage_enqueue(&tmr->st, k);
+}
+static int posix_kevent_dequeue(struct kevent *k)
+{
+	struct k_itimer *tmr = (struct k_itimer *)(unsigned long)k->event.id.raw_u64;
+	kevent_storage_dequeue(&tmr->st, k);
+	return 0;
+}
+static int posix_kevent_callback(struct kevent *k)
+{
+	return 1;
+}
+static int posix_kevent_init(void)
+{
+	struct kevent_callbacks tc = {
+		.callback = &posix_kevent_callback,
+		.enqueue = &posix_kevent_enqueue,
+		.dequeue = &posix_kevent_dequeue,
+		.flags = KEVENT_CALLBACKS_KERNELONLY};
+
+	return kevent_add_callbacks(&tc, KEVENT_POSIX_TIMER);
+}
+
+extern struct file_operations kevent_user_fops;
+
+static int posix_kevent_init_timer(struct k_itimer *tmr, int fd)
+{
+	struct ukevent uk;
+	struct file *file;
+	struct kevent_user *u;
+	int err;
+
+	file = fget(fd);
+	if (!file) {
+		err = -EBADF;
+		goto err_out;
+	}
+
+	if (file->f_op != &kevent_user_fops) {
+		err = -EINVAL;
+		goto err_out_fput;
+	}
+
+	u = file->private_data;
+
+	memset(&uk, 0, sizeof(struct ukevent));
+
+	uk.event = KEVENT_MASK_ALL;
+	uk.type = KEVENT_POSIX_TIMER;
+	uk.id.raw_u64 = (unsigned long)(tmr); /* Just cast to something unique */
+	uk.req_flags = KEVENT_REQ_ONESHOT | KEVENT_REQ_ALWAYS_QUEUE;
+	uk.ptr = tmr->it_sigev_value.sival_ptr;
+
+	err = kevent_user_add_ukevent(&uk, u);
+	if (err)
+		goto err_out_fput;
+
+	fput(file);
+
+	return 0;
+
+err_out_fput:
+	fput(file);
+err_out:
+	return err;
+}
+
+static void posix_kevent_fini_timer(struct k_itimer *tmr)
+{
+	kevent_storage_fini(&tmr->st);
+}
+#else
+static int posix_kevent_init_timer(struct k_itimer *tmr, int fd)
+{
+	return -ENOSYS;
+}
+static int posix_kevent_init(void)
+{
+	return 0;
+}
+static void posix_kevent_fini_timer(struct k_itimer *tmr)
+{
+}
+#endif
+
+
 /*
  * Initialize everything, well, just everything in Posix clocks/timers ;)
  */
@@ -241,6 +337,11 @@ static __init int init_posix_timers(void)
 	register_posix_clock(CLOCK_REALTIME, &clock_realtime);
 	register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);
 
+	if (posix_kevent_init()) {
+		printk(KERN_ERR "Failed to initialize kevent posix timers.\n");
+		BUG();
+	}
+
 	posix_timers_cache = kmem_cache_create("posix_timers_cache",
 					sizeof (struct k_itimer), 0, 0, NULL, NULL);
 	idr_init(&posix_timers_id);
@@ -343,23 +444,32 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
 
 	timr = container_of(timer, struct k_itimer, it.real.timer);
 	spin_lock_irqsave(&timr->it_lock, flags);
+	
+	if (timr->it_sigev_notify == SIGEV_KEVENT) {
+#ifdef CONFIG_KEVENT_TIMER
+		kevent_storage_ready(&timr->st, NULL, KEVENT_MASK_ALL);
+#endif
+	} else {
+		if (timr->it.real.interval.tv64 != 0)
+			si_private = ++timr->it_requeue_pending;
 
-	if (timr->it.real.interval.tv64 != 0)
-		si_private = ++timr->it_requeue_pending;
+		if (timr->it.real.interval.tv64 != 0)
+			si_private = ++timr->it_requeue_pending;
 
-	if (posix_timer_event(timr, si_private)) {
-		/*
-		 * signal was not sent because of sig_ignor
-		 * we will not get a call back to restart it AND
-		 * it should be restarted.
-		 */
-		if (timr->it.real.interval.tv64 != 0) {
-			timr->it_overrun +=
-				hrtimer_forward(timer,
-						hrtimer_cb_get_time(timer),
-						timr->it.real.interval);
-			ret = HRTIMER_RESTART;
-			++timr->it_requeue_pending;
+		if (posix_timer_event(timr, si_private)) {
+			/*
+			 * signal was not sent because of sig_ignor
+			 * we will not get a call back to restart it AND
+			 * it should be restarted.
+			 */
+			if (timr->it.real.interval.tv64 != 0) {
+				timr->it_overrun +=
+					hrtimer_forward(timer,
+							hrtimer_cb_get_time(timer),
+							timr->it.real.interval);
+				ret = HRTIMER_RESTART;
+				++timr->it_requeue_pending;
+			}
 		}
 	}
 
@@ -406,6 +516,9 @@ static struct k_itimer * alloc_posix_timer(void)
 		kmem_cache_free(posix_timers_cache, tmr);
 		tmr = NULL;
 	}
+#ifdef CONFIG_KEVENT_TIMER
+	kevent_storage_init(tmr, &tmr->st);
+#endif
 	return tmr;
 }
 
@@ -423,6 +536,7 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
 	if (unlikely(tmr->it_process) &&
 	    tmr->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
 		put_task_struct(tmr->it_process);
+	posix_kevent_fini_timer(tmr);
 	kmem_cache_free(posix_timers_cache, tmr);
 }
 
@@ -495,40 +609,52 @@ sys_timer_create(const clockid_t which_clock,
 		new_timer->it_sigev_signo = event.sigev_signo;
 		new_timer->it_sigev_value = event.sigev_value;
 
-		read_lock(&tasklist_lock);
-		if ((process = good_sigevent(&event))) {
-			/*
-			 * We may be setting up this process for another
-			 * thread.  It may be exiting.  To catch this
-			 * case the we check the PF_EXITING flag.  If
-			 * the flag is not set, the siglock will catch
-			 * him before it is too late (in exit_itimers).
-			 *
-			 * The exec case is a bit more invloved but easy
-			 * to code.  If the process is in our thread
-			 * group (and it must be or we would not allow
-			 * it here) and is doing an exec, it will cause
-			 * us to be killed.  In this case it will wait
-			 * for us to die which means we can finish this
-			 * linkage with our last gasp. I.e. no code :)
-			 */
+		if (event.sigev_notify == SIGEV_KEVENT) {
+			error = posix_kevent_init_timer(new_timer, event._sigev_un.kevent_fd);
+			if (error)
+				goto out;
+
+			process = current->group_leader;
 			spin_lock_irqsave(&process->sighand->siglock, flags);
-			if (!(process->flags & PF_EXITING)) {
-				new_timer->it_process = process;
-				list_add(&new_timer->list,
-					 &process->signal->posix_timers);
-				spin_unlock_irqrestore(&process->sighand->siglock, flags);
-				if (new_timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
-					get_task_struct(process);
-			} else {
-				spin_unlock_irqrestore(&process->sighand->siglock, flags);
-				process = NULL;
+			new_timer->it_process = process;
+			list_add(&new_timer->list, &process->signal->posix_timers);
+			spin_unlock_irqrestore(&process->sighand->siglock, flags);
+		} else {
+			read_lock(&tasklist_lock);
+			if ((process = good_sigevent(&event))) {
+				/*
+				 * We may be setting up this process for another
+				 * thread.  It may be exiting.  To catch this
+				 * case the we check the PF_EXITING flag.  If
+				 * the flag is not set, the siglock will catch
+				 * him before it is too late (in exit_itimers).
+				 *
+				 * The exec case is a bit more invloved but easy
+				 * to code.  If the process is in our thread
+				 * group (and it must be or we would not allow
+				 * it here) and is doing an exec, it will cause
+				 * us to be killed.  In this case it will wait
+				 * for us to die which means we can finish this
+				 * linkage with our last gasp. I.e. no code :)
+				 */
+				spin_lock_irqsave(&process->sighand->siglock, flags);
+				if (!(process->flags & PF_EXITING)) {
+					new_timer->it_process = process;
+					list_add(&new_timer->list,
+						 &process->signal->posix_timers);
+					spin_unlock_irqrestore(&process->sighand->siglock, flags);
+					if (new_timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
+						get_task_struct(process);
+				} else {
+					spin_unlock_irqrestore(&process->sighand->siglock, flags);
+					process = NULL;
+				}
+			}
+			read_unlock(&tasklist_lock);
+			if (!process) {
+				error = -EINVAL;
+				goto out;
 			}
-		}
-		read_unlock(&tasklist_lock);
-		if (!process) {
-			error = -EINVAL;
-			goto out;
 		}
 	} else {
 		new_timer->it_sigev_notify = SIGEV_SIGNAL;

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/