lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240914-mgtime-v8-6-5bd872330bed@kernel.org>
Date: Sat, 14 Sep 2024 13:07:19 -0400
From: Jeff Layton <jlayton@...nel.org>
To: John Stultz <jstultz@...gle.com>, Thomas Gleixner <tglx@...utronix.de>, 
 Stephen Boyd <sboyd@...nel.org>, Alexander Viro <viro@...iv.linux.org.uk>, 
 Christian Brauner <brauner@...nel.org>, Jan Kara <jack@...e.cz>, 
 Steven Rostedt <rostedt@...dmis.org>, 
 Masami Hiramatsu <mhiramat@...nel.org>, 
 Mathieu Desnoyers <mathieu.desnoyers@...icios.com>, 
 Jonathan Corbet <corbet@....net>, Chandan Babu R <chandan.babu@...cle.com>, 
 "Darrick J. Wong" <djwong@...nel.org>, Theodore Ts'o <tytso@....edu>, 
 Andreas Dilger <adilger.kernel@...ger.ca>, Chris Mason <clm@...com>, 
 Josef Bacik <josef@...icpanda.com>, David Sterba <dsterba@...e.com>, 
 Hugh Dickins <hughd@...gle.com>, Andrew Morton <akpm@...ux-foundation.org>, 
 Chuck Lever <chuck.lever@...cle.com>, 
 Vadim Fedorenko <vadim.fedorenko@...ux.dev>
Cc: Randy Dunlap <rdunlap@...radead.org>, linux-kernel@...r.kernel.org, 
 linux-fsdevel@...r.kernel.org, linux-trace-kernel@...r.kernel.org, 
 linux-doc@...r.kernel.org, linux-xfs@...r.kernel.org, 
 linux-ext4@...r.kernel.org, linux-btrfs@...r.kernel.org, 
 linux-nfs@...r.kernel.org, linux-mm@...ck.org, 
 Jeff Layton <jlayton@...nel.org>
Subject: [PATCH v8 06/11] fs: add percpu counters for significant
 multigrain timestamp events

New percpu counters for counting various stats around mgtimes, and a new
debugfs file for displaying them when CONFIG_DEBUG_FS is enabled:

- number of attempted ctime updates
- number of successful i_ctime_nsec swaps
- number of fine-grained timestamp fetches
- number of coarse-grained floor swaps

Reviewed-by: Josef Bacik <josef@...icpanda.com>
Reviewed-by: Darrick J. Wong <djwong@...nel.org>
Reviewed-by: Jan Kara <jack@...e.cz>
Signed-off-by: Jeff Layton <jlayton@...nel.org>
---
 fs/inode.c                         | 76 ++++++++++++++++++++++++++++++++++++--
 include/linux/timekeeping.h        |  1 +
 kernel/time/timekeeping.c          |  3 +-
 kernel/time/timekeeping_debug.c    | 12 ++++++
 kernel/time/timekeeping_internal.h |  3 ++
 5 files changed, 90 insertions(+), 5 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index d7da9d06921f..1f0487104c71 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -21,6 +21,8 @@
 #include <linux/list_lru.h>
 #include <linux/iversion.h>
 #include <linux/rw_hint.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
 #include <trace/events/writeback.h>
 #define CREATE_TRACE_POINTS
 #include <trace/events/timestamp.h>
@@ -101,6 +103,70 @@ long get_nr_dirty_inodes(void)
 	return nr_dirty > 0 ? nr_dirty : 0;
 }
 
+#ifdef CONFIG_DEBUG_FS
+static DEFINE_PER_CPU(long, mg_ctime_updates);
+static DEFINE_PER_CPU(long, mg_fine_stamps);
+static DEFINE_PER_CPU(long, mg_ctime_swaps);
+
+static long get_mg_ctime_updates(void)
+{
+	int i;
+	long sum = 0;
+
+	for_each_possible_cpu(i)
+		sum += per_cpu(mg_ctime_updates, i);
+	return sum < 0 ? 0 : sum;
+}
+
+static long get_mg_fine_stamps(void)
+{
+	int i;
+	long sum = 0;
+
+	for_each_possible_cpu(i)
+		sum += per_cpu(mg_fine_stamps, i);
+	return sum < 0 ? 0 : sum;
+}
+
+static long get_mg_ctime_swaps(void)
+{
+	int i;
+	long sum = 0;
+
+	for_each_possible_cpu(i)
+		sum += per_cpu(mg_ctime_swaps, i);
+	return sum < 0 ? 0 : sum;
+}
+
+#define mgtime_counter_inc(__var)	this_cpu_inc(__var)
+
+static int mgts_show(struct seq_file *s, void *p)
+{
+	long ctime_updates = get_mg_ctime_updates();
+	long ctime_swaps = get_mg_ctime_swaps();
+	long fine_stamps = get_mg_fine_stamps();
+	long floor_swaps = get_mg_floor_swaps();
+
+	seq_printf(s, "%ld %ld %ld %ld\n",
+		   ctime_updates, ctime_swaps, fine_stamps, floor_swaps);
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(mgts);
+
+static int __init mg_debugfs_init(void)
+{
+	debugfs_create_file("multigrain_timestamps", S_IFREG | S_IRUGO, NULL, NULL, &mgts_fops);
+	return 0;
+}
+late_initcall(mg_debugfs_init);
+
+#else /* ! CONFIG_DEBUG_FS */
+
+#define mgtime_counter_inc()	do { } while (0)
+
+#endif /* CONFIG_DEBUG_FS */
+
 /*
  * Handle nr_inode sysctl
  */
@@ -2655,10 +2721,9 @@ EXPORT_SYMBOL(timestamp_truncate);
  *
  * If it is multigrain, then we first see if the coarse-grained timestamp is
  * distinct from what we have. If so, then we'll just use that. If we have to
- * get a fine-grained timestamp, then do so, and try to swap it into the floor.
- * We accept the new floor value regardless of the outcome of the cmpxchg.
- * After that, we try to swap the new value into i_ctime_nsec. Again, we take
- * the resulting ctime, regardless of the outcome of the swap.
+ * get a fine-grained timestamp, then do so. After that, we try to swap the new
+ * value into i_ctime_nsec. We take the resulting ctime, regardless of the
+ * outcome of the swap.
  */
 struct timespec64 inode_set_ctime_current(struct inode *inode)
 {
@@ -2687,8 +2752,10 @@ struct timespec64 inode_set_ctime_current(struct inode *inode)
 		if (timespec64_compare(&now, &ctime) <= 0) {
 			ktime_get_real_ts64_mg(&now);
 			now = timestamp_truncate(now, inode);
+			mgtime_counter_inc(mg_fine_stamps);
 		}
 	}
+	mgtime_counter_inc(mg_ctime_updates);
 
 	/* No need to cmpxchg if it's exactly the same */
 	if (cns == now.tv_nsec && inode->i_ctime_sec == now.tv_sec) {
@@ -2702,6 +2769,7 @@ struct timespec64 inode_set_ctime_current(struct inode *inode)
 		/* If swap occurred, then we're (mostly) done */
 		inode->i_ctime_sec = now.tv_sec;
 		trace_ctime_ns_xchg(inode, cns, now.tv_nsec, cur);
+		mgtime_counter_inc(mg_ctime_swaps);
 	} else {
 		/*
 		 * Was the change due to someone marking the old ctime QUERIED?
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 7aa85246c183..b9c8c597a073 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -48,6 +48,7 @@ extern void ktime_get_coarse_real_ts64(struct timespec64 *ts);
 /* Multigrain timestamp interfaces */
 extern void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts);
 extern void ktime_get_real_ts64_mg(struct timespec64 *ts);
+extern long get_mg_floor_swaps(void);
 
 void getboottime64(struct timespec64 *ts);
 
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 16937242b904..94b0219955a2 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -2440,7 +2440,7 @@ EXPORT_SYMBOL_GPL(ktime_get_coarse_real_ts64_mg);
  * regardless of the outcome of the swap. Note that this is a filesystem
  * specific interface and should be avoided outside of that context.
  */
-void ktime_get_real_ts64_mg(struct timespec64 *ts, u64 cookie)
+void ktime_get_real_ts64_mg(struct timespec64 *ts)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
 	ktime_t old = atomic64_read(&mg_floor);
@@ -2464,6 +2464,7 @@ void ktime_get_real_ts64_mg(struct timespec64 *ts, u64 cookie)
 	if (atomic64_try_cmpxchg(&mg_floor, &old, mono)) {
 		ts->tv_nsec = 0;
 		timespec64_add_ns(ts, nsecs);
+		mgtime_counter_inc(mg_floor_swaps);
 	} else {
 		/*
 		 * Something has changed mg_floor since "old" was
diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c
index b73e8850e58d..9a3792072762 100644
--- a/kernel/time/timekeeping_debug.c
+++ b/kernel/time/timekeeping_debug.c
@@ -17,6 +17,9 @@
 
 #define NUM_BINS 32
 
+/* incremented every time mg_floor is updated */
+DEFINE_PER_CPU(long, mg_floor_swaps);
+
 static unsigned int sleep_time_bin[NUM_BINS] = {0};
 
 static int tk_debug_sleep_time_show(struct seq_file *s, void *data)
@@ -53,3 +56,12 @@ void tk_debug_account_sleep_time(const struct timespec64 *t)
 			   (s64)t->tv_sec, t->tv_nsec / NSEC_PER_MSEC);
 }
 
+long get_mg_floor_swaps(void)
+{
+	int i;
+	long sum = 0;
+
+	for_each_possible_cpu(i)
+		sum += per_cpu(mg_floor_swaps, i);
+	return sum < 0 ? 0 : sum;
+}
diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h
index 4ca2787d1642..2b49332b45a5 100644
--- a/kernel/time/timekeeping_internal.h
+++ b/kernel/time/timekeeping_internal.h
@@ -11,8 +11,11 @@
  */
 #ifdef CONFIG_DEBUG_FS
 extern void tk_debug_account_sleep_time(const struct timespec64 *t);
+DECLARE_PER_CPU(long, mg_floor_swaps);
+#define mgtime_counter_inc(__var)	this_cpu_inc(__var)
 #else
 #define tk_debug_account_sleep_time(x)
+#define mgtime_counter_inc()	do { } while (0)
 #endif
 
 #ifdef CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE

-- 
2.46.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ