lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <alpine.LFD.2.02.1104271359580.3323@ionos>
Date:	Wed, 27 Apr 2011 16:02:33 +0200 (CEST)
From:	Thomas Gleixner <tglx@...utronix.de>
To:	Alexander Shishkin <virtuoso@...nd.org>
cc:	Andrew Morton <akpm@...ux-foundation.org>,
	John Stultz <johnstul@...ibm.com>,
	Chris Friesen <chris.friesen@...band.com>,
	Kay Sievers <kay.sievers@...y.org>,
	"Kirill A. Shutemov" <kirill@...temov.name>,
	LKML <linux-kernel@...r.kernel.org>,
	Peter Zijlstra <peterz@...radead.org>,
	Davide Libenzi <davidel@...ilserver.org>
Subject: Re: [RFC][PATCH 1/4] clock_rtoffset: new syscall

On Wed, 27 Apr 2011, Alexander Shishkin wrote:

> In order to keep track of system time changes, we introduce a new
> syscall which returns the offset of CLOCK_MONOTONIC clock against
> CLOCK_REALTIME. The caller is to store this value and use it in
> system calls (like clock_nanosleep or timerfd_settime) that will
> compare it against the effective offset in order to ensure that
> the caller's notion of the system time corresponds to the effective
> system time at the moment of the action being carried out. If it
> has changed, these system calls will return an error and the caller
> will have to obtain this offset again.

No, we do not expose kernel internals like this to user space. The
kernel internal representation of time is subject to change.

Also abusing the reminder argument of clock_nanosleep for handing back
the offset is a horrible hack including the non standard -ECANCELED
return value. No, we don't change posix interfaces that way.

We can add something to timerfd, but definitly not with another
syscall and by bloating hrtimer and sprinkling cancellation calls all
over the place. clock_was_set() should be enough and it already calls
into the hrtimer code, the resume path calls into it as well, so
there is no need to introduce such a mess.

The completely untested patch below should solve the same problem in a
sane way. Restricted to timerfd, but that really should be sufficient.

Thanks,

	tglx

------------->

Subject: timerfd: Allow timers to be cancelled when clock was set
From: Thomas Gleixner <tglx@...utronix.de>
Date: Wed, 27 Apr 2011 14:16:42 +0200

<Add proper changelog here>

Signed-off-by: Thomas Gleixner <tglx@...utronix.de>
---
 fs/timerfd.c              |   57 +++++++++++++++++++++++++++++++++++++++++-----
 include/linux/hrtimer.h   |    3 +-
 include/linux/time.h      |    5 ++++
 include/linux/timerfd.h   |    3 +-
 kernel/hrtimer.c          |   41 +++++++++++++++++++++++++++------
 kernel/time/timekeeping.c |   15 ++++++++++++
 6 files changed, 109 insertions(+), 15 deletions(-)

Index: linux-2.6-tip/fs/timerfd.c
===================================================================
--- linux-2.6-tip.orig/fs/timerfd.c
+++ linux-2.6-tip/fs/timerfd.c
@@ -26,10 +26,12 @@
 struct timerfd_ctx {
 	struct hrtimer tmr;
 	ktime_t tintv;
+	ktime_t moffs;
 	wait_queue_head_t wqh;
 	u64 ticks;
 	int expired;
 	int clockid;
+	bool might_cancel;
 };
 
 /*
@@ -59,24 +61,52 @@ static ktime_t timerfd_get_remaining(str
 	return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
 }
 
-static void timerfd_setup(struct timerfd_ctx *ctx, int flags,
-			  const struct itimerspec *ktmr)
+static bool timerfd_cancelled(struct timerfd_ctx *ctx)
+{
+	ktime_t moffs;
+
+	if (!ctx->might_cancel)
+		return false;
+
+	moffs = get_monotonic_offset();
+
+	if (moffs.tv64 == ctx->moffs.tv64)
+		return false;
+
+	ctx->moffs = moffs;
+	return true;
+}
+
+static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
+			 const struct itimerspec *ktmr)
 {
 	enum hrtimer_mode htmode;
 	ktime_t texp;
+	int clockid = ctx->clockid;
 
 	htmode = (flags & TFD_TIMER_ABSTIME) ?
 		HRTIMER_MODE_ABS: HRTIMER_MODE_REL;
 
+	ctx->might_cancel = false;
+	if (htmode == HRTIMER_MODE_ABS && ctx->clockid == CLOCK_REALTIME &&
+	    (flags & TFD_TIMER_CANCELON_SET)) {
+		clockid = CLOCK_REALTIME_COS;
+		ctx->might_cancel = true;
+	}
+
 	texp = timespec_to_ktime(ktmr->it_value);
 	ctx->expired = 0;
 	ctx->ticks = 0;
 	ctx->tintv = timespec_to_ktime(ktmr->it_interval);
-	hrtimer_init(&ctx->tmr, ctx->clockid, htmode);
+	hrtimer_init(&ctx->tmr, clockid, htmode);
 	hrtimer_set_expires(&ctx->tmr, texp);
 	ctx->tmr.function = timerfd_tmrproc;
-	if (texp.tv64 != 0)
+	if (texp.tv64 != 0) {
 		hrtimer_start(&ctx->tmr, texp, htmode);
+		if (timerfd_cancelled(ctx))
+			return -ECANCELED;
+	}
+	return 0;
 }
 
 static int timerfd_release(struct inode *inode, struct file *file)
@@ -118,8 +148,21 @@ static ssize_t timerfd_read(struct file 
 		res = -EAGAIN;
 	else
 		res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks);
+
 	if (ctx->ticks) {
 		ticks = ctx->ticks;
+
+		/*
+		 * If clock has changed, we do not care about the
+		 * ticks and we do not rearm the timer. Userspace must
+		 * reevaluate anyway.
+		 */
+		if (timerfd_cancelled(ctx)) {
+			ticks = 0;
+			ctx->expired = 0;
+			res = -ECANCELED;
+		}
+
 		if (ctx->expired && ctx->tintv.tv64) {
 			/*
 			 * If tintv.tv64 != 0, this is a periodic timer that
@@ -183,6 +226,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clo
 	init_waitqueue_head(&ctx->wqh);
 	ctx->clockid = clockid;
 	hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
+	ctx->moffs = get_monotonic_offset();
 
 	ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
 			       O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
@@ -199,6 +243,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, uf
 	struct file *file;
 	struct timerfd_ctx *ctx;
 	struct itimerspec ktmr, kotmr;
+	int ret;
 
 	if (copy_from_user(&ktmr, utmr, sizeof(ktmr)))
 		return -EFAULT;
@@ -240,14 +285,14 @@ SYSCALL_DEFINE4(timerfd_settime, int, uf
 	/*
 	 * Re-program the timer to the new value ...
 	 */
-	timerfd_setup(ctx, flags, &ktmr);
+	ret = timerfd_setup(ctx, flags, &ktmr);
 
 	spin_unlock_irq(&ctx->wqh.lock);
 	fput(file);
 	if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr)))
 		return -EFAULT;
 
-	return 0;
+	return ret;
 }
 
 SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
Index: linux-2.6-tip/include/linux/hrtimer.h
===================================================================
--- linux-2.6-tip.orig/include/linux/hrtimer.h
+++ linux-2.6-tip/include/linux/hrtimer.h
@@ -157,6 +157,7 @@ enum  hrtimer_base_type {
 	HRTIMER_BASE_REALTIME,
 	HRTIMER_BASE_MONOTONIC,
 	HRTIMER_BASE_BOOTTIME,
+	HRTIMER_BASE_REALTIME_COS,
 	HRTIMER_MAX_CLOCK_BASES,
 };
 
@@ -319,7 +320,7 @@ static inline int hrtimer_is_hres_active
 extern ktime_t ktime_get(void);
 extern ktime_t ktime_get_real(void);
 extern ktime_t ktime_get_boottime(void);
-
+ktime_t get_monotonic_offset(void);
 
 DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
 
Index: linux-2.6-tip/include/linux/time.h
===================================================================
--- linux-2.6-tip.orig/include/linux/time.h
+++ linux-2.6-tip/include/linux/time.h
@@ -295,6 +295,11 @@ struct itimerval {
 #define CLOCK_MONOTONIC_COARSE		6
 #define CLOCK_BOOTTIME			7
 
+#ifdef __KERNEL__
+/* This clock is not exposed to user space */
+#define CLOCK_REALTIME_COS		8
+#endif
+
 /*
  * The IDs of various hardware clocks:
  */
Index: linux-2.6-tip/include/linux/timerfd.h
===================================================================
--- linux-2.6-tip.orig/include/linux/timerfd.h
+++ linux-2.6-tip/include/linux/timerfd.h
@@ -19,6 +19,7 @@
  * shared O_* flags.
  */
 #define TFD_TIMER_ABSTIME (1 << 0)
+#define TFD_TIMER_CANCELON_SET (1 << 1)
 #define TFD_CLOEXEC O_CLOEXEC
 #define TFD_NONBLOCK O_NONBLOCK
 
@@ -26,6 +27,6 @@
 /* Flags for timerfd_create.  */
 #define TFD_CREATE_FLAGS TFD_SHARED_FCNTL_FLAGS
 /* Flags for timerfd_settime.  */
-#define TFD_SETTIME_FLAGS TFD_TIMER_ABSTIME
+#define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCELON_SET)
 
 #endif /* _LINUX_TIMERFD_H */
Index: linux-2.6-tip/kernel/hrtimer.c
===================================================================
--- linux-2.6-tip.orig/kernel/hrtimer.c
+++ linux-2.6-tip/kernel/hrtimer.c
@@ -78,6 +78,11 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, 
 			.get_time = &ktime_get_boottime,
 			.resolution = KTIME_LOW_RES,
 		},
+		{
+			.index = CLOCK_REALTIME_COS,
+			.get_time = &ktime_get_real,
+			.resolution = KTIME_LOW_RES,
+		},
 	}
 };
 
@@ -106,6 +111,7 @@ static void hrtimer_get_softirq_time(str
 	base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim;
 	base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono;
 	base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot;
+	base->clock_base[HRTIMER_BASE_REALTIME_COS].softirq_time = xtim;
 }
 
 /*
@@ -617,6 +623,8 @@ static int hrtimer_reprogram(struct hrti
 	return res;
 }
 
+static void
+hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base, ktime_t now);
 
 /*
  * Retrigger next event is called after clock was set
@@ -626,13 +634,12 @@ static int hrtimer_reprogram(struct hrti
 static void retrigger_next_event(void *arg)
 {
 	struct hrtimer_cpu_base *base;
-	struct timespec realtime_offset, wtm, sleep;
+	struct timespec realtime_offset, xtim, wtm, sleep;
 
 	if (!hrtimer_hres_active())
 		return;
 
-	get_xtime_and_monotonic_and_sleep_offset(&realtime_offset, &wtm,
-							&sleep);
+	get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep);
 	set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
 
 	base = &__get_cpu_var(hrtimer_bases);
@@ -643,6 +650,10 @@ static void retrigger_next_event(void *a
 		timespec_to_ktime(realtime_offset);
 	base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
 		timespec_to_ktime(sleep);
+	base->clock_base[HRTIMER_BASE_REALTIME_COS].offset =
+		timespec_to_ktime(realtime_offset);
+
+	hrtimer_expire_cancelable(base, timespec_to_ktime(xtim));
 
 	hrtimer_force_reprogram(base, 0);
 	raw_spin_unlock(&base->lock);
@@ -715,7 +726,7 @@ static inline int hrtimer_enqueue_reprog
  */
 static int hrtimer_switch_to_hres(void)
 {
-	int cpu = smp_processor_id();
+	int i, cpu = smp_processor_id();
 	struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu);
 	unsigned long flags;
 
@@ -731,9 +742,8 @@ static int hrtimer_switch_to_hres(void)
 		return 0;
 	}
 	base->hres_active = 1;
-	base->clock_base[HRTIMER_BASE_REALTIME].resolution = KTIME_HIGH_RES;
-	base->clock_base[HRTIMER_BASE_MONOTONIC].resolution = KTIME_HIGH_RES;
-	base->clock_base[HRTIMER_BASE_BOOTTIME].resolution = KTIME_HIGH_RES;
+	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
+		base->clock_base[i].resolution = KTIME_HIGH_RES;
 
 	tick_setup_sched_timer();
 
@@ -1221,6 +1231,22 @@ static void __run_hrtimer(struct hrtimer
 	timer->state &= ~HRTIMER_STATE_CALLBACK;
 }
 
+static void
+hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base, ktime_t now)
+{
+	struct timerqueue_node *node;
+	struct hrtimer_clock_base *base;
+
+	base = cpu_base->clock_base + HRTIMER_BASE_REALTIME_COS;
+
+	while ((node = timerqueue_getnext(&base->active))) {
+			struct hrtimer *timer;
+
+			timer = container_of(node, struct hrtimer, node);
+			__run_hrtimer(timer, &now);
+	}
+}
+
 #ifdef CONFIG_HIGH_RES_TIMERS
 
 /*
@@ -1725,6 +1751,7 @@ void __init hrtimers_init(void)
 	hrtimer_clock_to_base_table[CLOCK_REALTIME] = HRTIMER_BASE_REALTIME;
 	hrtimer_clock_to_base_table[CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC;
 	hrtimer_clock_to_base_table[CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME;
+	hrtimer_clock_to_base_table[CLOCK_REALTIME_COS] = HRTIMER_BASE_REALTIME_COS;
 
 	hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
 			  (void *)(long)smp_processor_id());
Index: linux-2.6-tip/kernel/time/timekeeping.c
===================================================================
--- linux-2.6-tip.orig/kernel/time/timekeeping.c
+++ linux-2.6-tip/kernel/time/timekeeping.c
@@ -1049,6 +1049,21 @@ void get_xtime_and_monotonic_and_sleep_o
 }
 
 /**
+ * get_monotonic_offset() - get wall_to_monotonic
+ */
+ktime_t get_monotonic_offset(void)
+{
+	unsigned long seq;
+	struct timespec wtom;
+
+	do {
+		seq = read_seqbegin(&xtime_lock);
+		wtom = wall_to_monotonic;
+	} while (read_seqretry(&xtime_lock, seq));
+	return timespec_to_ktime(wtom);
+}
+
+/**
  * xtime_update() - advances the timekeeping infrastructure
  * @ticks:	number of ticks, that have elapsed since the last call.
  *
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ