lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20200221211657.147250-1-lrizzo@google.com>
Date:   Fri, 21 Feb 2020 13:16:57 -0800
From:   Luigi Rizzo <lrizzo@...gle.com>
To:     linux-kernel@...r.kernel.org, naveen.n.rao@...ux.ibm.com,
        anil.s.keshavamurthy@...el.com, davem@...emloft.net,
        mhiramat@...nel.org, gregkh@...uxfoundation.org
Cc:     rizzo@....unipi.it, Luigi Rizzo <lrizzo@...gle.com>
Subject: [PATCH v4] kretprobe: percpu support

kretprobe uses a list protected by a single lock to allocate a
kretprobe_instance in pre_handler_kretprobe(). This works poorly with
concurrent calls.

This patch offers a simplified fix: the percpu_instance flag indicates
that kretprobe_instance's are per CPU, which makes allocation lock free.

As part of this patch, we factor out the code to allocate instances in
get_pcpu_rp_instance() and get_rp_instance().

At the moment we only allow one pending kretprobe per CPU. This can be
extended to a small constant number of entries, but finding a free entry
would either bring back the lock, or require scanning an array, which can
be expensive (especially if callers block and migrate).

Signed-off-by: Luigi Rizzo <lrizzo@...gle.com>
---
 include/linux/kprobes.h |   8 +++-
 kernel/kprobes.c        | 102 +++++++++++++++++++++++++++++++---------
 kernel/test_kprobes.c   |  20 ++++++--
 3 files changed, 101 insertions(+), 29 deletions(-)

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 04bdaf01112cb..c5894f4c5c26c 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -142,6 +142,8 @@ static inline int kprobe_ftrace(struct kprobe *p)
  * can be active concurrently.
  * nmissed - tracks the number of times the probed function's return was
  * ignored, due to maxactive being too low.
+ * percpu_instance - if set, uses one instance per cpu instead of allocating
+ * from the list protected by lock.
  *
  */
 struct kretprobe {
@@ -151,8 +153,12 @@ struct kretprobe {
 	int maxactive;
 	int nmissed;
 	size_t data_size;
-	struct hlist_head free_instances;
+	union {
+		struct kretprobe_instance __percpu *pcpu;
+		struct hlist_head free_instances;
+	};
 	raw_spinlock_t lock;
+	bool percpu_instance;
 };
 
 struct kretprobe_instance {
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 2625c241ac00f..46018a4a76178 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1184,6 +1184,10 @@ void recycle_rp_inst(struct kretprobe_instance *ri,
 	hlist_del(&ri->hlist);
 	INIT_HLIST_NODE(&ri->hlist);
 	if (likely(rp)) {
+		if (rp->percpu_instance) {
+			ri->rp = NULL;
+			return;
+		}
 		raw_spin_lock(&rp->lock);
 		hlist_add_head(&ri->hlist, &rp->free_instances);
 		raw_spin_unlock(&rp->lock);
@@ -1274,6 +1278,11 @@ static inline void free_rp_inst(struct kretprobe *rp)
 	struct kretprobe_instance *ri;
 	struct hlist_node *next;
 
+	if (rp->percpu_instance) {
+		free_percpu(rp->pcpu);
+		return;
+	}
+
 	hlist_for_each_entry_safe(ri, next, &rp->free_instances, hlist) {
 		hlist_del(&ri->hlist);
 		kfree(ri);
@@ -1851,6 +1860,46 @@ unsigned long __weak arch_deref_entry_point(void *entry)
 }
 
 #ifdef CONFIG_KRETPROBES
+struct kretprobe_instance *get_pcpu_rp_instance(struct kretprobe *rp)
+{
+	struct kretprobe_instance *ri;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	ri = this_cpu_ptr(rp->pcpu);
+	if (!ri || ri->rp) { /* already in use */
+		local_irq_restore(flags);
+		rp->nmissed++;
+		return NULL;
+	}
+	INIT_HLIST_NODE(&ri->hlist);
+	ri->rp = rp;
+	ri->task = current;
+	local_irq_restore(flags);
+	return ri;
+}
+
+struct kretprobe_instance *get_rp_instance(struct kretprobe *rp)
+{
+	struct kretprobe_instance *ri;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&rp->lock, flags);
+	if (hlist_empty(&rp->free_instances)) {
+		rp->nmissed++;
+		raw_spin_unlock_irqrestore(&rp->lock, flags);
+		return NULL;
+	}
+	ri = hlist_entry(rp->free_instances.first, struct kretprobe_instance,
+			 hlist);
+	hlist_del(&ri->hlist);
+	raw_spin_unlock_irqrestore(&rp->lock, flags);
+
+	ri->rp = rp;
+	ri->task = current;
+	return ri;
+}
+
 /*
  * This kprobe pre_handler is registered with every kretprobe. When probe
  * hits it will set up the return probe.
@@ -1873,35 +1922,32 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
 	}
 
 	/* TODO: consider to only swap the RA after the last pre_handler fired */
-	hash = hash_ptr(current, KPROBE_HASH_BITS);
-	raw_spin_lock_irqsave(&rp->lock, flags);
-	if (!hlist_empty(&rp->free_instances)) {
-		ri = hlist_entry(rp->free_instances.first,
-				struct kretprobe_instance, hlist);
-		hlist_del(&ri->hlist);
-		raw_spin_unlock_irqrestore(&rp->lock, flags);
-
-		ri->rp = rp;
-		ri->task = current;
-
-		if (rp->entry_handler && rp->entry_handler(ri, regs)) {
+	if (rp->percpu_instance) {
+		ri = get_pcpu_rp_instance(rp);
+	} else {
+		ri = get_rp_instance(rp);
+	}
+	if (!ri)
+		return 0;
+	if (rp->entry_handler && rp->entry_handler(ri, regs)) {
+		if (rp->percpu_instance) {
+			ri->rp = NULL;
+		} else {
 			raw_spin_lock_irqsave(&rp->lock, flags);
 			hlist_add_head(&ri->hlist, &rp->free_instances);
 			raw_spin_unlock_irqrestore(&rp->lock, flags);
-			return 0;
 		}
+		return 0;
+	}
+	arch_prepare_kretprobe(ri, regs);
 
-		arch_prepare_kretprobe(ri, regs);
+	/* XXX(hch): why is there no hlist_move_head? */
+	INIT_HLIST_NODE(&ri->hlist);
+	hash = hash_ptr(current, KPROBE_HASH_BITS);
+	kretprobe_table_lock(hash, &flags);
+	hlist_add_head(&ri->hlist, &kretprobe_inst_table[hash]);
+	kretprobe_table_unlock(hash, &flags);
 
-		/* XXX(hch): why is there no hlist_move_head? */
-		INIT_HLIST_NODE(&ri->hlist);
-		kretprobe_table_lock(hash, &flags);
-		hlist_add_head(&ri->hlist, &kretprobe_inst_table[hash]);
-		kretprobe_table_unlock(hash, &flags);
-	} else {
-		rp->nmissed++;
-		raw_spin_unlock_irqrestore(&rp->lock, flags);
-	}
 	return 0;
 }
 NOKPROBE_SYMBOL(pre_handler_kretprobe);
@@ -1950,6 +1996,15 @@ int register_kretprobe(struct kretprobe *rp)
 	rp->kp.post_handler = NULL;
 	rp->kp.fault_handler = NULL;
 
+	if (rp->percpu_instance) {
+		rp->pcpu = __alloc_percpu(sizeof(*rp->pcpu) + rp->data_size,
+					  __alignof__(*rp->pcpu));
+		if (rp->pcpu)
+			goto finalize;
+		free_rp_inst(rp);
+		return -ENOMEM;
+	}
+
 	/* Pre-allocate memory for max kretprobe instances */
 	if (rp->maxactive <= 0) {
 #ifdef CONFIG_PREEMPTION
@@ -1971,6 +2026,7 @@ int register_kretprobe(struct kretprobe *rp)
 		hlist_add_head(&inst->hlist, &rp->free_instances);
 	}
 
+finalize:
 	rp->nmissed = 0;
 	/* Establish function entry probe point */
 	ret = register_kprobe(&rp->kp);
diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c
index 76c997fdbc9da..4ebe48301d743 100644
--- a/kernel/test_kprobes.c
+++ b/kernel/test_kprobes.c
@@ -236,31 +236,36 @@ static struct kretprobe rp2 = {
 	.kp.symbol_name = "kprobe_target2"
 };
 
-static int test_kretprobes(void)
+static int test_kretprobes(bool percpu_instance)
 {
 	int ret;
 	struct kretprobe *rps[2] = {&rp, &rp2};
+	const char *mode = percpu_instance ? "percpu " : "normal";
 
 	/* addr and flags should be cleard for reusing kprobe. */
 	rp.kp.addr = NULL;
 	rp.kp.flags = 0;
+	rp.percpu_instance = percpu_instance;
+	rp2.kp.addr = NULL;
+	rp2.kp.flags = 0;
+	rp2.percpu_instance = percpu_instance;
 	ret = register_kretprobes(rps, 2);
 	if (ret < 0) {
-		pr_err("register_kretprobe returned %d\n", ret);
+		pr_err("register_kretprobe mode %s returned %d\n", mode, ret);
 		return ret;
 	}
 
 	krph_val = 0;
 	ret = target(rand1);
 	if (krph_val != rand1) {
-		pr_err("kretprobe handler not called\n");
+		pr_err("kretprobe handler mode %s not called\n", mode);
 		handler_errors++;
 	}
 
 	krph_val = 0;
 	ret = target2(rand1);
 	if (krph_val != rand1) {
-		pr_err("kretprobe handler2 not called\n");
+		pr_err("kretprobe handler2 mode %s not called\n", mode);
 		handler_errors++;
 	}
 	unregister_kretprobes(rps, 2);
@@ -297,7 +302,12 @@ int init_test_probes(void)
 		errors++;
 
 	num_tests++;
-	ret = test_kretprobes();
+	ret = test_kretprobes(false);
+	if (ret < 0)
+		errors++;
+
+	num_tests++;
+	ret = test_kretprobes(true);
 	if (ret < 0)
 		errors++;
 #endif /* CONFIG_KRETPROBES */
-- 
2.25.0.265.gbab2e86ba0-goog

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ