lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <172070452878.2992819.10548676901200594081.stgit@firesoul>
Date: Thu, 11 Jul 2024 15:29:04 +0200
From: Jesper Dangaard Brouer <hawk@...nel.org>
To: tj@...nel.org, cgroups@...r.kernel.org, yosryahmed@...gle.com,
 shakeel.butt@...ux.dev
Cc: Jesper Dangaard Brouer <hawk@...nel.org>, hannes@...xchg.org,
 lizefan.x@...edance.com, longman@...hat.com, kernel-team@...udflare.com,
 linux-mm@...ck.org, linux-kernel@...r.kernel.org
Subject: [PATCH V7 2/2 RFC] cgroup/rstat: add tracepoint for ongoing flusher
 waits

I'll be using this tracepoint in production and will
report back on findings, e.g. measuring how often the
race for ongoing flusher happens. Tthen we can decide
if it is worth to keep/apply this patch.

Signed-off-by: Jesper Dangaard Brouer <hawk@...nel.org>
---
 include/trace/events/cgroup.h |   49 +++++++++++++++++++++++++++++++++++++++++
 kernel/cgroup/rstat.c         |   15 ++++++++++---
 2 files changed, 61 insertions(+), 3 deletions(-)

diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h
index af2755bda6eb..c8d84e9a08dc 100644
--- a/include/trace/events/cgroup.h
+++ b/include/trace/events/cgroup.h
@@ -296,6 +296,55 @@ DEFINE_EVENT(cgroup_rstat, cgroup_rstat_cpu_unlock_fastpath,
 	TP_ARGS(cgrp, cpu, contended)
 );
 
+DECLARE_EVENT_CLASS(cgroup_ongoing,
+
+	TP_PROTO(struct cgroup *cgrp, struct cgroup *cgrp_ongoing, \
+		 long res, unsigned int race, ktime_t ts),
+
+	TP_ARGS(cgrp, cgrp_ongoing, res, race, ts),
+
+	TP_STRUCT__entry(
+		__field(	int,		root			)
+		__field(	int,		level			)
+		__field(	u64,		id			)
+		__field(	u64,		id_ongoing		)
+		__field(	ktime_t,	ts			)
+		__field(	long,		res			)
+		__field(	u64,		race			)
+	),
+
+	TP_fast_assign(
+		__entry->root = cgrp->root->hierarchy_id;
+		__entry->id = cgroup_id(cgrp);
+		__entry->level = cgrp->level;
+		__entry->id_ongoing = cgroup_id(cgrp_ongoing);
+		__entry->res = res;
+		__entry->race = race;
+		__entry->ts = ts;
+	),
+
+	TP_printk("root=%d id=%llu level=%d ongoing_flusher=%llu res=%ld race=%llu ts=%lld",
+		  __entry->root, __entry->id, __entry->level,
+		  __entry->id_ongoing, __entry->res, __entry->race, __entry->ts)
+);
+
+DEFINE_EVENT(cgroup_ongoing, cgroup_ongoing_flusher,
+
+	TP_PROTO(struct cgroup *cgrp, struct cgroup *cgrp_ongoing, \
+		 long res, unsigned int race, ktime_t ts),
+
+	TP_ARGS(cgrp, cgrp_ongoing, res, race, ts)
+);
+
+DEFINE_EVENT(cgroup_ongoing, cgroup_ongoing_flusher_wait,
+
+	TP_PROTO(struct cgroup *cgrp, struct cgroup *cgrp_ongoing, \
+		 long res, unsigned int race, ktime_t ts),
+
+	TP_ARGS(cgrp, cgrp_ongoing, res, race, ts)
+);
+
+
 #endif /* _TRACE_CGROUP_H */
 
 /* This part must be outside protection */
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index fe2a81a310bb..2d7af86e31c8 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -321,6 +321,7 @@ static inline void __cgroup_rstat_unlock(struct cgroup *cgrp, int cpu_in_loop)
 static bool cgroup_rstat_trylock_flusher(struct cgroup *cgrp)
 {
 	struct cgroup *ongoing;
+	unsigned int race = 0;
 	bool locked;
 
 	/* Check if ongoing flusher is already taking care of this, if
@@ -330,17 +331,25 @@ static bool cgroup_rstat_trylock_flusher(struct cgroup *cgrp)
 retry:
 	ongoing = READ_ONCE(cgrp_rstat_ongoing_flusher);
 	if (ongoing && cgroup_is_descendant(cgrp, ongoing)) {
-		wait_for_completion_interruptible_timeout(
+		ktime_t ts = ktime_get_mono_fast_ns();
+		long res = 0;
+
+		trace_cgroup_ongoing_flusher(cgrp, ongoing, 0, race, ts);
+
+		res = wait_for_completion_interruptible_timeout(
 			&ongoing->flush_done, MAX_WAIT);
-		/* TODO: Add tracepoint here */
+		trace_cgroup_ongoing_flusher_wait(cgrp, ongoing, res, race, ts);
+
 		return false;
 	}
 
 	locked = __cgroup_rstat_trylock(cgrp, -1);
 	if (!locked) {
 		/* Contended: Handle loosing race for ongoing flusher */
-		if (!ongoing && READ_ONCE(cgrp_rstat_ongoing_flusher))
+		if (!ongoing && READ_ONCE(cgrp_rstat_ongoing_flusher)) {
+			race++;
 			goto retry;
+		}
 
 		__cgroup_rstat_lock(cgrp, -1, false);
 	}



Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ