[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20221214025101.1268437-4-ming.lei@redhat.com>
Date: Wed, 14 Dec 2022 10:51:01 +0800
From: Ming Lei <ming.lei@...hat.com>
To: Jens Axboe <axboe@...nel.dk>, Tejun Heo <tj@...nel.org>
Cc: linux-block@...r.kernel.org, linux-kernel@...r.kernel.org,
Zhong Jinghua <zhongjinghua@...wei.com>,
Yu Kuai <yukuai3@...wei.com>, Dennis Zhou <dennis@...nel.org>,
Ming Lei <ming.lei@...hat.com>
Subject: [PATCH 3/3] lib/percpu-refcount: drain ->release() in perpcu_ref_exit()
The pattern of wait_event(percpu_ref_is_zero()) has been used in several
kernel components, and this way actually has the following risk:
- percpu_ref_is_zero() can be returned just between
atomic_long_sub_and_test() and ref->data->release(ref)
- given the refcount is found as zero, percpu_ref_exit() could
be called, and the host data structure is freed
- then use-after-free is triggered in ->release() when the user host
data structure is freed after percpu_ref_exit() returns
Reported-by: Zhong Jinghua <zhongjinghua@...wei.com>
Fixes: 2b0d3d3e4fcf ("percpu_ref: reduce memory footprint of percpu_ref in fast path")
Signed-off-by: Ming Lei <ming.lei@...hat.com>
---
include/linux/percpu-refcount.h | 41 ++++++++++++++++++++++-----------
lib/percpu-refcount.c | 22 ++++++++++++++++++
2 files changed, 50 insertions(+), 13 deletions(-)
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 006c6aae261e..6ef29ebffd58 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -55,6 +55,7 @@
#include <linux/rcupdate.h>
#include <linux/types.h>
#include <linux/gfp.h>
+#include <linux/sched.h>
struct percpu_ref;
typedef void (percpu_ref_func_t)(struct percpu_ref *);
@@ -104,6 +105,7 @@ struct percpu_ref_data {
bool force_atomic:1;
bool allow_reinit:1;
bool auto_exit:1;
+ bool being_release:1;
struct rcu_head rcu;
struct percpu_ref *ref;
};
@@ -137,6 +139,7 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
void percpu_ref_resurrect(struct percpu_ref *ref);
void percpu_ref_reinit(struct percpu_ref *ref);
bool percpu_ref_is_zero(struct percpu_ref *ref);
+wait_queue_head_t *percpu_ref_get_switch_waitq(void);
/**
* percpu_ref_kill - drop the initial ref
@@ -319,6 +322,29 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
return ret;
}
+/* Internal helper, please do not call it outside */
+static inline void __percpu_ref_put_many(struct percpu_ref *ref,
+ unsigned long nr)
+{
+ struct percpu_ref_data *data = ref->data;
+ struct percpu_ref copy = *ref;
+ bool release = false;
+
+ data->being_release = 1;
+ if (unlikely(atomic_long_sub_and_test(nr, &data->count))) {
+ data->release(ref);
+ release = true;
+ }
+ data->being_release = 0;
+
+ if (release) {
+ if (data->auto_exit)
+ percpu_ref_exit(©);
+ /* re-use switch waitq for ack the release done */
+ wake_up_all(percpu_ref_get_switch_waitq());
+ }
+}
+
/**
* percpu_ref_put_many - decrement a percpu refcount
* @ref: percpu_ref to put
@@ -337,19 +363,8 @@ static inline void percpu_ref_put_many(struct percpu_ref *ref, unsigned long nr)
if (__ref_is_percpu(ref, &percpu_count))
this_cpu_sub(*percpu_count, nr);
- else {
- struct percpu_ref_data *data = ref->data;
- struct percpu_ref copy = *ref;
- bool release = false;
-
- if (unlikely(atomic_long_sub_and_test(nr, &data->count))) {
- data->release(ref);
- release = true;
- }
-
- if (release && data->auto_exit)
- percpu_ref_exit(©);
- }
+ else
+ __percpu_ref_put_many(ref, nr);
rcu_read_unlock();
}
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index c0cadf92948f..fd50eda233ed 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -140,6 +140,22 @@ void percpu_ref_exit(struct percpu_ref *ref)
if (!data)
return;
+ /*
+ * We may reach here because wait_event(percpu_ref_is_zero())
+ * returns, and ->release() may not be completed or even started
+ * ye, then use-after-free is caused, so drain ->release() here
+ */
+ if (!data->auto_exit) {
+ /*
+ * Order reading the atomic count in percpu_ref_is_zero
+ * and reading data->being_release. The counter pair is
+ * the one implied in atomic_long_sub_and_test() called
+ * from __percpu_ref_put_many().
+ */
+ smp_rmb();
+ wait_event(percpu_ref_switch_waitq, !data->being_release);
+ }
+
spin_lock_irqsave(&percpu_ref_switch_lock, flags);
ref->percpu_count_ptr |= atomic_long_read(&ref->data->count) <<
__PERCPU_REF_FLAG_BITS;
@@ -480,3 +496,9 @@ void percpu_ref_resurrect(struct percpu_ref *ref)
spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
}
EXPORT_SYMBOL_GPL(percpu_ref_resurrect);
+
+wait_queue_head_t *percpu_ref_get_switch_waitq()
+{
+ return &percpu_ref_switch_waitq;
+}
+EXPORT_SYMBOL_GPL(percpu_ref_get_switch_waitq);
--
2.38.1
Powered by blists - more mailing lists