lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 28 Jul 2020 11:20:01 +0800
From:   Frankie Chang <Frankie.Chang@...iatek.com>
To:     Todd Kjos <tkjos@...gle.com>
CC:     Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
        Joel Fernandes <joel@...lfernandes.org>,
        Martijn Coenen <maco@...roid.com>,
        Arve Hjønnevåg <arve@...roid.com>,
        Christian Brauner <christian@...uner.io>,
        <linux-kernel@...r.kernel.org>,
        <linux-mediatek@...ts.infradead.org>, <wsd_upstream@...iatek.com>,
        Jian-Min Liu <Jian-Min.Liu@...iatek.com>,
        Frankie Chang <Frankie.Chang@...iatek.com>
Subject: [PATCH v6 3/3] binder: add transaction latency tracer

From: "Frankie.Chang" <Frankie.Chang@...iatek.com>

Record start/end timestamp for binder transaction.
When transaction is completed or transaction is free,
it would be checked if transaction latency over threshold
(default 2 sec), if yes, printing related information for tracing.

/* Implement details */
- Add latency tracer module to monitor transaction
  by attaching to new tracepoints introduced
  when transactions are allocated and freed.
  The trace_binder_txn_latency_free would not be enabled
  by default. Monitoring which transaction is too slow to
  cause some of exceptions is important. So we hook the
  tracepoint to call the monitor function.

- Since some of modules would trigger timeout NE
  if their binder transaction don't finish in time,
  such as audio timeout (5 sec), even BT command
  timeout (2 sec), etc.
  Therefore, setting the timeout threshold as default
  2 seconds could be helpful to debug.
  But this timeout threshold is configurable, to let
  all users determine the more suitable threshold.

- The reason why printing the related information to
  kernel information log but not trace buffer is that
  some abnormal transactions may be pending for a long
  time ago, they could not be recorded due to buffer
  limited.

Signed-off-by: Frankie.Chang <Frankie.Chang@...iatek.com>
---
 drivers/android/Kconfig                 |    8 +++
 drivers/android/Makefile                |    1 +
 drivers/android/binder.c                |    2 +
 drivers/android/binder_internal.h       |   13 ++++
 drivers/android/binder_latency_tracer.c |  112 +++++++++++++++++++++++++++++++
 drivers/android/binder_trace.h          |   26 ++++++-
 6 files changed, 159 insertions(+), 3 deletions(-)
 create mode 100644 drivers/android/binder_latency_tracer.c

diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig
index 6fdf2ab..91fff1e 100644
--- a/drivers/android/Kconfig
+++ b/drivers/android/Kconfig
@@ -54,6 +54,14 @@ config ANDROID_BINDER_IPC_SELFTEST
 	  exhaustively with combinations of various buffer sizes and
 	  alignments.
 
+config BINDER_TRANSACTION_LATENCY_TRACKING
+	tristate "Android Binder transaction tracking"
+	help
+	  Used for track abnormal binder transaction which is over threshold,
+	  when the transaction is done or be free, this transaction would be
+	  checked whether it executed overtime.
+	  If yes, printing out the detailed info.
+
 endif # if ANDROID
 
 endmenu
diff --git a/drivers/android/Makefile b/drivers/android/Makefile
index c9d3d0c9..c2ffdb6 100644
--- a/drivers/android/Makefile
+++ b/drivers/android/Makefile
@@ -4,3 +4,4 @@ ccflags-y += -I$(src)			# needed for trace events
 obj-$(CONFIG_ANDROID_BINDERFS)		+= binderfs.o
 obj-$(CONFIG_ANDROID_BINDER_IPC)	+= binder.o binder_alloc.o
 obj-$(CONFIG_ANDROID_BINDER_IPC_SELFTEST) += binder_alloc_selftest.o
+obj-$(CONFIG_BINDER_TRANSACTION_LATENCY_TRACKING)	+= binder_latency_tracer.o
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 1e6fc40..0956403 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2658,6 +2658,7 @@ static void binder_transaction(struct binder_proc *proc,
 		return_error_line = __LINE__;
 		goto err_alloc_t_failed;
 	}
+	trace_binder_txn_latency_alloc(t);
 	INIT_LIST_HEAD(&t->fd_fixups);
 	binder_stats_created(BINDER_STAT_TRANSACTION);
 	spin_lock_init(&t->lock);
@@ -5147,6 +5148,7 @@ static void print_binder_transaction_ilocked(struct seq_file *m,
 		   to_proc ? to_proc->pid : 0,
 		   t->to_thread ? t->to_thread->pid : 0,
 		   t->code, t->flags, t->priority, t->need_reply);
+	trace_binder_txn_latency_info(m, t);
 	spin_unlock(&t->lock);
 
 	if (proc != to_proc) {
diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h
index 5b65413..596db00 100644
--- a/drivers/android/binder_internal.h
+++ b/drivers/android/binder_internal.h
@@ -13,6 +13,11 @@
 #include <linux/uidgid.h>
 #include "binder_alloc.h"
 
+#if IS_ENABLED(CONFIG_BINDER_TRANSACTION_LATENCY_TRACKING)
+#include <linux/rtc.h>
+#include <linux/time.h>
+#endif
+
 struct binder_context {
 	struct binder_node *binder_context_mgr_node;
 	struct mutex context_mgr_node_lock;
@@ -521,6 +526,14 @@ struct binder_transaction {
 	 * during thread teardown
 	 */
 	spinlock_t lock;
+	/**
+	 * @timestamp and @tv are used to record the time
+	 * that the binder transaction startup
+	 */
+#if IS_ENABLED(CONFIG_BINDER_TRANSACTION_LATENCY_TRACKING)
+	struct timespec64 timestamp;
+	struct timeval tv;
+#endif
 };
 
 /**
diff --git a/drivers/android/binder_latency_tracer.c b/drivers/android/binder_latency_tracer.c
new file mode 100644
index 0000000..b339ae1
--- /dev/null
+++ b/drivers/android/binder_latency_tracer.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 MediaTek Inc.
+ */
+
+#include <linux/module.h>
+#include <uapi/linux/android/binder.h>
+#include "binder_internal.h"
+#include "binder_trace.h"
+
+/**
+ * The reason setting the binder_txn_latency_threshold to 2 sec
+ * is that most of timeout abort is greater or equal to 2 sec.
+ * Making it configurable to let all users determine which
+ * threshold is more suitable.
+ */
+static uint32_t binder_txn_latency_threshold = 2;
+module_param_named(threshold, binder_txn_latency_threshold,
+			uint, 0644);
+
+/*
+ * probe_binder_txn_latency_free - Output info of a delay transaction
+ * @t:          pointer to the over-time transaction
+ */
+void probe_binder_txn_latency_free(void *ignore, struct binder_transaction *t)
+{
+	struct rtc_time tm;
+	struct timespec64 *startime;
+	struct timespec64 cur, sub_t;
+
+	ktime_get_ts64(&cur);
+	startime = &t->timestamp;
+	sub_t = timespec64_sub(cur, *startime);
+
+	/* if transaction time is over than binder_txn_latency_threshold (sec),
+	 * show timeout warning log.
+	 */
+	if (sub_t.tv_sec < binder_txn_latency_threshold)
+		return;
+
+	rtc_time_to_tm(t->tv.tv_sec, &tm);
+
+	pr_info_ratelimited("%d: from %d:%d to %d:%d",
+			t->debug_id,
+			t->from ? t->from->proc->pid : 0,
+			t->from ? t->from->pid : 0,
+			t->to_proc ? t->to_proc->pid : 0,
+			t->to_thread ? t->to_thread->pid : 0);
+
+	pr_info_ratelimited(" total %u.%03ld s code %u start %lu.%03ld android %d-%02d-%02d %02d:%02d:%02d.%03lu\n",
+			(unsigned int)sub_t.tv_sec,
+			(sub_t.tv_nsec / NSEC_PER_MSEC),
+			t->code,
+			(unsigned long)startime->tv_sec,
+			(startime->tv_nsec / NSEC_PER_MSEC),
+			(tm.tm_year + 1900), (tm.tm_mon + 1), tm.tm_mday,
+			tm.tm_hour, tm.tm_min, tm.tm_sec,
+			(unsigned long)(t->tv.tv_usec / USEC_PER_MSEC));
+}
+
+static void probe_binder_txn_latency_alloc(void *ignore,
+					struct binder_transaction *t)
+{
+	struct timespec64 now;
+	ktime_get_ts64(&t->timestamp);
+	ktime_get_real_ts64(&now);
+	t->tv.tv_sec = now.tv_sec;
+	t->tv.tv_sec -= (sys_tz.tz_minuteswest * 60);
+	t->tv.tv_usec = now.tv_nsec/1000;
+}
+
+static void probe_binder_txn_latency_info(void *ignore, struct seq_file *m,
+					struct binder_transaction *t)
+{
+	struct rtc_time tm;
+
+	rtc_time_to_tm(t->tv.tv_sec, &tm);
+	seq_printf(m,
+		   " start %lu.%06lu android %d-%02d-%02d %02d:%02d:%02d.%03lu",
+		   (unsigned long)t->timestamp.tv_sec,
+		   (t->timestamp.tv_nsec / NSEC_PER_USEC),
+		   (tm.tm_year + 1900), (tm.tm_mon + 1), tm.tm_mday,
+		   tm.tm_hour, tm.tm_min, tm.tm_sec,
+		   (unsigned long)(t->tv.tv_usec / USEC_PER_MSEC));
+}
+
+static int __init init_binder_latency_tracer(void)
+{
+	register_trace_binder_txn_latency_free(
+			probe_binder_txn_latency_free, NULL);
+	register_trace_binder_txn_latency_alloc(
+			probe_binder_txn_latency_alloc, NULL);
+	register_trace_binder_txn_latency_info(
+			probe_binder_txn_latency_info, NULL);
+
+	return 0;
+}
+
+static void exit_binder_latency_tracer(void)
+{
+	unregister_trace_binder_txn_latency_free(
+			probe_binder_txn_latency_free, NULL);
+	unregister_trace_binder_txn_latency_alloc(
+			probe_binder_txn_latency_alloc, NULL);
+	unregister_trace_binder_txn_latency_info(
+			probe_binder_txn_latency_info, NULL);
+}
+
+module_init(init_binder_latency_tracer);
+module_exit(exit_binder_latency_tracer);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/android/binder_trace.h b/drivers/android/binder_trace.h
index 8ac87d1..405d493 100644
--- a/drivers/android/binder_trace.h
+++ b/drivers/android/binder_trace.h
@@ -95,6 +95,17 @@
 		  __entry->thread_todo)
 );
 
+DECLARE_TRACE(binder_txn_latency_alloc,
+	TP_PROTO(struct binder_transaction *t),
+	TP_ARGS(t, e)
+);
+
+DECLARE_TRACE(binder_txn_latency_info,
+	TP_PROTO(struct seq_file *m,
+		 struct binder_transaction *t),
+	TP_ARGS(m, t)
+);
+
 TRACE_EVENT(binder_txn_latency_free,
 	TP_PROTO(struct binder_transaction *t),
 	TP_ARGS(t),
@@ -106,6 +117,8 @@
 		__field(int, to_thread)
 		__field(unsigned int, code)
 		__field(unsigned int, flags)
+		__field(unsigned long, start_sec)
+		__field(unsigned long, start_nsec)
 	),
 	TP_fast_assign(
 		__entry->debug_id = t->debug_id;
@@ -115,11 +128,18 @@
 		__entry->to_thread = t->to_thread ? t->to_thread->pid : 0;
 		__entry->code = t->code;
 		__entry->flags = t->flags;
-	),
-	TP_printk("transaction=%d from %d:%d to %d:%d flags=0x%x code=0x%x",
+#if IS_ENABLED(CONFIG_BINDER_TRANSACTION_LATENCY_TRACKING)
+		__entry->start_sec = t->timestamp.tv_sec;
+		__entry->start_nsec = t->timestamp.tv_nsec / NSEC_PER_MSEC;
+#else
+		__entry->start_sec = 0;
+		__entry->start_nsec = 0;
+#endif
+	),
+	TP_printk("transaction=%d from %d:%d to %d:%d flags=0x%x code=0x%x start %lu.%03ld",
 		  __entry->debug_id, __entry->from_proc, __entry->from_thread,
 		  __entry->to_proc, __entry->to_thread, __entry->code,
-		  __entry->flags)
+		  __entry->flags, __entry->start_sec, __entry->start_nsec)
 );
 
 TRACE_EVENT(binder_transaction,
-- 
1.7.9.5

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ