linux-kernel - [PATCH 1/4] Add clock_gettime

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Sun, 25 Dec 2011 08:51:00 -0800
From:	Andy Lutomirski <luto@...capital.net>
To:	linux-kernel@...r.kernel.org, Kumar Sundararajan <kumar@...com>,
	john stultz <johnstul@...ibm.com>, Arun Sharma <asharma@...com>
Cc:	Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...e.hu>,
	Thomas Gleixner <tglx@...utronix.de>,
	Richard Cochran <richardcochran@...il.com>,
	Andy Lutomirski <luto@...capital.net>
Subject: [PATCH 1/4] Add clock_gettime_ns syscall

On some architectures, clock_gettime is fast enough that converting
between nanoseconds and struct timespec takes a significant amount
of time.  Introduce a new syscall that does the same thing but
returns the answer in nanoseconds.  2^64 nanoseconds since the epoch
won't wrap around until the year 2554, and by then we can use
128-bit types.

clock_gettime_ns returns an unsigned nanosecond count.  It will wrap
when the time from whatever clock is being read exceeds about 584
years.  For CLOCK_MONOTONIC, CLOCK_BOOTTIME, etc, this is unlikely
to be a problem.  For CLOCK_REALTIME, either user code can check for
wraparound or can switch to 128-bit integers in a little over 500
years.

This interface intentionally does not support sub-nanosecond
precision.  For one thing, light only travels about a foot per
nanosecond, so nanoseconds are really pretty good for networking
purposes.  For another, 2^64 picoseconds (say) is a short enough
interval to be inconvenient.  If anyone needs sub-nanosecond
precision for anything other than profiling, they're welcome to
figure out an appropriate interface.  For very precise profiling,
"what time is it" is the wrong question, anyway -- modern CPUs can
reorder things across time scales much longer than a nanosecond.

Signed-off-by: Andy Lutomirski <luto@...capital.net>
---
 arch/x86/include/asm/unistd_64.h |    2 ++
 include/linux/syscalls.h         |    3 +++
 include/linux/time.h             |    5 +++++
 kernel/posix-timers.c            |   30 ++++++++++++++++++++++++++++++
 4 files changed, 40 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 2010405..3a48069 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -683,6 +683,8 @@ __SYSCALL(__NR_sendmmsg, sys_sendmmsg)
 __SYSCALL(__NR_setns, sys_setns)
 #define __NR_getcpu				309
 __SYSCALL(__NR_getcpu, sys_getcpu)
+#define __NR_clock_gettime_ns			310
+__SYSCALL(__NR_clock_gettime_ns, sys_clock_gettime_ns)
 
 #ifndef __NO_STUBS
 #define __ARCH_WANT_OLD_READDIR
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 1ff0ec2..89cb897 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -49,6 +49,7 @@ struct statfs;
 struct statfs64;
 struct __sysctl_args;
 struct sysinfo;
+struct timens;
 struct timespec;
 struct timeval;
 struct timex;
@@ -316,6 +317,8 @@ asmlinkage long sys_clock_settime(clockid_t which_clock,
 				const struct timespec __user *tp);
 asmlinkage long sys_clock_gettime(clockid_t which_clock,
 				struct timespec __user *tp);
+asmlinkage long sys_clock_gettime_ns(clockid_t which_clock,
+				struct timens __user *tp);
 asmlinkage long sys_clock_adjtime(clockid_t which_clock,
 				struct timex __user *tx);
 asmlinkage long sys_clock_getres(clockid_t which_clock,
diff --git a/include/linux/time.h b/include/linux/time.h
index b306178..d4488b1 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -27,6 +27,11 @@ struct timezone {
 	int	tz_dsttime;	/* type of dst correction */
 };
 
+struct timens {
+	u64	ns;		/* nanoseconds since the relevant epoch */
+	u64	padding;	/* for future expansion (UTC offset? sub-ns?) */
+};
+
 #ifdef __KERNEL__
 
 extern struct timezone sys_tz;
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 4556182..43bc842 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -980,6 +980,36 @@ SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
 	return error;
 }
 
+SYSCALL_DEFINE2(clock_gettime_ns, const clockid_t, which_clock,
+		struct timens __user *, tp)
+{
+	/*
+	 * This implementation isn't as fast as it could be, but the syscall
+	 * entry will take much longer than the unnecessary division and
+	 * multiplication.  Arch-specific implementations can be made faster.
+	 */
+
+	struct k_clock *kc = clockid_to_kclock(which_clock);
+	struct timespec kernel_timespec;
+	struct timens timens;
+	int error;
+
+	if (!kc)
+		return -EINVAL;
+
+	error = kc->clock_get(which_clock, &kernel_timespec);
+
+	if (!error) {
+		timens.ns = kernel_timespec.tv_sec * NSEC_PER_SEC
+			+ kernel_timespec.tv_nsec;
+		timens.padding = 0;
+
+		error = copy_to_user(tp, &timens, sizeof(timens));
+	}
+
+	return error;
+}
+
 SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock,
 		struct timex __user *, utx)
 {
-- 
1.7.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/