lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 24 Feb 2022 17:06:40 +0800
From:   "Wang Jianchao (Kuaishou)" <jianchao.wan9@...il.com>
To:     Jens Axboe <axboe@...nel.dk>
Cc:     Josef Bacik <jbacik@...com>, Tejun Heo <tj@...nel.org>,
        Bart Van Assche <bvanassche@....org>,
        linux-block@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: [RFC V5 02/16] blk/rq-qos: prepare to make blk-rq-qos pluggable

This patch makes blk-rq-qos policies pluggable as following,
(1) Add code to maintain the rq_qos_ops. A rq-qos policy need to
    register itself with rq_qos_register(). The original enum
    rq_qos_id will be removed in following patch. They will use
    a dynamic id maintained by rq_qos_ida.
(2) Add .init callback into rq_qos_ops. We use it to initialize the
    resource.
(3) Add /sys/block/x/queue/qos
    We can use '+name' or "-name" to open or close the blk-rq-qos
    policy.

This patch mainly prepare help interfaces and no functional changes.
Following patches will adpat the code of wbt, iolatency, iocost and
ioprio to make them pluggable one by one. And after that, the sysfs
interface /sys/block/xxx/queue/qos will be exported.

Signed-off-by: Wang Jianchao (Kuaishou) <jianchao.wan9@...il.com>
---
 block/blk-core.c       |   1 +
 block/blk-mq-debugfs.c |  12 +-
 block/blk-rq-qos.c     | 246 ++++++++++++++++++++++++++++++++++++++++-
 block/blk-rq-qos.h     |  25 ++++-
 include/linux/blkdev.h |   1 +
 5 files changed, 280 insertions(+), 5 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index d93e3bb9a769..448d153e515b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -485,6 +485,7 @@ struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
 	mutex_init(&q->sysfs_lock);
 	mutex_init(&q->sysfs_dir_lock);
 	spin_lock_init(&q->queue_lock);
+	spin_lock_init(&q->rqos_lock);
 
 	init_waitqueue_head(&q->mq_freeze_wq);
 	mutex_init(&q->mq_freeze_lock);
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 3a790eb4995c..24d47bc90b4a 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -729,7 +729,10 @@ void blk_mq_debugfs_register(struct request_queue *q)
 
 	if (q->rq_qos) {
 		struct rq_qos *rqos = q->rq_qos;
-
+		/*
+		 * Queue has not been registered right now, it is safe to
+		 * iterate the rqos w/o lock
+		 */
 		while (rqos) {
 			blk_mq_debugfs_register_rqos(rqos);
 			rqos = rqos->next;
@@ -844,7 +847,12 @@ void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos)
 void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
 {
 	struct request_queue *q = rqos->q;
-	const char *dir_name = rq_qos_id_to_name(rqos->id);
+	const char *dir_name;
+
+	if (rqos->ops->name)
+		dir_name = rqos->ops->name;
+	else
+		dir_name = rq_qos_id_to_name(rqos->id);
 
 	if (rqos->debugfs_dir || !rqos->ops->debugfs_attrs)
 		return;
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index e83af7bc7591..c43436c2ead1 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -2,6 +2,9 @@
 
 #include "blk-rq-qos.h"
 
+static DEFINE_IDR(rq_qos_idr);
+static DEFINE_MUTEX(rq_qos_mutex);
+
 /*
  * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded,
  * false if 'v' + 1 would be bigger than 'below'.
@@ -294,11 +297,250 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
 
 void rq_qos_exit(struct request_queue *q)
 {
-	blk_mq_debugfs_unregister_queue_rqos(q);
-
+	/*
+	 * Queue must have been unregistered here, it is safe to
+	 * iterate the list w/o lock
+	 */
 	while (q->rq_qos) {
 		struct rq_qos *rqos = q->rq_qos;
 		q->rq_qos = rqos->next;
 		rqos->ops->exit(rqos);
 	}
+	blk_mq_debugfs_unregister_queue_rqos(q);
+}
+
+static struct rq_qos *rq_qos_by_name(struct request_queue *q,
+		const char *name)
+{
+	struct rq_qos *rqos;
+
+	for (rqos = q->rq_qos; rqos; rqos = rqos->next) {
+		if (!rqos->ops->name)
+			continue;
+
+		if (!strncmp(rqos->ops->name, name,
+					strlen(rqos->ops->name)))
+			return rqos;
+	}
+	return NULL;
+}
+
+/*
+ * This helper interface is to keep rqos alive against
+ * rqos switching. rq_qos_deactivate() drains all of
+ * rq_qos_get() before it frees the rqos structure.
+ */
+struct rq_qos *rq_qos_get(struct request_queue *q, int id)
+{
+	struct rq_qos *rqos;
+
+	spin_lock_irq(&q->rqos_lock);
+	for (rqos = q->rq_qos; rqos; rqos = rqos->next) {
+		if (rqos->id == id) {
+			break;
+		}
+	}
+	if (rqos && rqos->dying)
+		rqos = NULL;
+	if (rqos)
+		refcount_inc(&rqos->ref);
+	spin_unlock_irq(&q->rqos_lock);
+	return rqos;
+}
+
+void rq_qos_put(struct rq_qos *rqos)
+{
+	struct request_queue *q = rqos->q;
+
+	spin_lock_irq(&q->rqos_lock);
+	refcount_dec(&rqos->ref);
+	if (rqos->dying)
+		wake_up(&rqos->waitq);
+	spin_unlock_irq(&q->rqos_lock);
+}
+
+void rq_qos_activate(struct request_queue *q,
+		struct rq_qos *rqos, const struct rq_qos_ops *ops)
+{
+	struct rq_qos *pos;
+
+	rqos->dying = false;
+	refcount_set(&rqos->ref, 1);
+	init_waitqueue_head(&rqos->waitq);
+	rqos->id = ops->id;
+	rqos->ops = ops;
+	rqos->q = q;
+	rqos->next = NULL;
+
+	spin_lock_irq(&q->rqos_lock);
+	pos = q->rq_qos;
+	if (pos) {
+		while (pos->next)
+			pos = pos->next;
+		pos->next = rqos;
+	} else {
+		q->rq_qos = rqos;
+	}
+	spin_unlock_irq(&q->rqos_lock);
+
+	if (rqos->ops->debugfs_attrs)
+		blk_mq_debugfs_register_rqos(rqos);
+}
+
+void rq_qos_deactivate(struct rq_qos *rqos)
+{
+	struct request_queue *q = rqos->q;
+	struct rq_qos **cur;
+
+	spin_lock_irq(&q->rqos_lock);
+	rqos->dying = true;
+	/*
+	 * Drain all of the usage of get/put_rqos()
+	 */
+	wait_event_lock_irq(rqos->waitq,
+		refcount_read(&rqos->ref) == 1, q->rqos_lock);
+	for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
+		if (*cur == rqos) {
+			*cur = rqos->next;
+			break;
+		}
+	}
+	spin_unlock_irq(&q->rqos_lock);
+	blk_mq_debugfs_unregister_rqos(rqos);
+}
+
+static struct rq_qos_ops *rq_qos_op_find(const char *name)
+{
+	struct rq_qos_ops *pos;
+	int id;
+
+	idr_for_each_entry(&rq_qos_idr, pos, id) {
+		if (!strncmp(pos->name, name, strlen(pos->name)))
+			return pos;
+	}
+
+	return NULL;
+}
+
+int rq_qos_register(struct rq_qos_ops *ops)
+{
+	int ret, start;
+
+	mutex_lock(&rq_qos_mutex);
+
+	if (rq_qos_op_find(ops->name)) {
+		ret = -EEXIST;
+		goto out;
+	}
+
+	start = RQ_QOS_IOPRIO + 1;
+	ret = idr_alloc(&rq_qos_idr, ops, start, INT_MAX, GFP_KERNEL);
+	if (ret < 0)
+		goto out;
+
+	ops->id = ret;
+	ret = 0;
+out:
+	mutex_unlock(&rq_qos_mutex);
+	return ret;
+}
+
+void rq_qos_unregister(struct rq_qos_ops *ops)
+{
+	mutex_lock(&rq_qos_mutex);
+	idr_remove(&rq_qos_idr, ops->id);
+	mutex_unlock(&rq_qos_mutex);
+}
+
+ssize_t queue_qos_show(struct request_queue *q, char *buf)
+{
+	struct rq_qos_ops *ops;
+	struct rq_qos *rqos;
+	int id, ret = 0;
+
+	mutex_lock(&rq_qos_mutex);
+	/*
+	 * Show the policies in the order of being invoked.
+	 * rqos_lock is not needed here as the sysfs_lock is
+	 * protected us from the queue_qos_store()
+	 */
+	for (rqos = q->rq_qos; rqos; rqos = rqos->next) {
+		if (!rqos->ops->name)
+			continue;
+		ret += sprintf(buf + ret, "[%s] ", rqos->ops->name);
+	}
+	idr_for_each_entry(&rq_qos_idr, ops, id) {
+		if (!rq_qos_by_name(q, ops->name))
+			ret += sprintf(buf + ret, "%s ", ops->name);
+	}
+
+	ret--; /* overwrite the last space */
+	ret += sprintf(buf + ret, "\n");
+	mutex_unlock(&rq_qos_mutex);
+
+	return ret;
+}
+
+ssize_t queue_qos_store(struct request_queue *q, const char *page,
+			  size_t count)
+{
+	const struct rq_qos_ops *ops;
+	struct rq_qos *rqos;
+	const char *qosname;
+	char *buf;
+	bool add;
+	int ret;
+
+	if (!blk_queue_registered(q))
+		return -ENOENT;
+
+	buf = kstrdup(page, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	buf = strim(buf);
+	if (buf[0] != '+' && buf[0] != '-') {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	add = buf[0] == '+';
+	qosname = buf + 1;
+
+	rqos = rq_qos_by_name(q, qosname);
+	if ((buf[0] == '+' && rqos)) {
+		ret = -EEXIST;
+		goto out;
+	}
+
+	if ((buf[0] == '-' && !rqos)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	if (add) {
+		mutex_lock(&rq_qos_mutex);
+		ops = rq_qos_op_find(qosname);
+		mutex_unlock(&rq_qos_mutex);
+		if (!ops) {
+			ret = -EINVAL;
+			goto out;
+		}
+	} else {
+		ops = rqos->ops;
+	}
+
+	blk_mq_freeze_queue(q);
+	blk_mq_quiesce_queue(q);
+	if (add) {
+		ret = ops->init(q);
+	} else {
+		ops->exit(rqos);
+		ret = 0;
+	}
+	blk_mq_unquiesce_queue(q);
+	blk_mq_unfreeze_queue(q);
+out:
+	kfree(buf);
+	return ret ? ret : count;
 }
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
index 3cfbc8668cba..aa6ef12637a0 100644
--- a/block/blk-rq-qos.h
+++ b/block/blk-rq-qos.h
@@ -26,9 +26,12 @@ struct rq_wait {
 };
 
 struct rq_qos {
-	struct rq_qos_ops *ops;
+	const struct rq_qos_ops *ops;
 	struct request_queue *q;
 	enum rq_qos_id id;
+	refcount_t ref;
+	wait_queue_head_t waitq;
+	bool dying;
 	struct rq_qos *next;
 #ifdef CONFIG_BLK_DEBUG_FS
 	struct dentry *debugfs_dir;
@@ -36,6 +39,8 @@ struct rq_qos {
 };
 
 struct rq_qos_ops {
+	const char *name;
+	int id;
 	void (*throttle)(struct rq_qos *, struct bio *);
 	void (*track)(struct rq_qos *, struct request *, struct bio *);
 	void (*merge)(struct rq_qos *, struct request *, struct bio *);
@@ -46,6 +51,7 @@ struct rq_qos_ops {
 	void (*cleanup)(struct rq_qos *, struct bio *);
 	void (*queue_depth_changed)(struct rq_qos *);
 	void (*exit)(struct rq_qos *);
+	int (*init)(struct request_queue *);
 	const struct blk_mq_debugfs_attr *debugfs_attrs;
 };
 
@@ -132,6 +138,17 @@ static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
 	blk_mq_debugfs_unregister_rqos(rqos);
 }
 
+int rq_qos_register(struct rq_qos_ops *ops);
+void rq_qos_unregister(struct rq_qos_ops *ops);
+void rq_qos_activate(struct request_queue *q,
+		struct rq_qos *rqos, const struct rq_qos_ops *ops);
+void rq_qos_deactivate(struct rq_qos *rqos);
+ssize_t queue_qos_show(struct request_queue *q, char *buf);
+ssize_t queue_qos_store(struct request_queue *q, const char *page,
+			  size_t count);
+struct rq_qos *rq_qos_get(struct request_queue *q, int id);
+void rq_qos_put(struct rq_qos *rqos);
+
 typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data);
 typedef void (cleanup_cb_t)(struct rq_wait *rqw, void *private_data);
 
@@ -211,8 +228,14 @@ static inline void rq_qos_merge(struct request_queue *q, struct request *rq,
 
 static inline void rq_qos_queue_depth_changed(struct request_queue *q)
 {
+	/*
+	 * It is called by external module, protect the rqos list with
+	 * sysfs_lock against qos switching
+	 */
+	mutex_lock(&q->sysfs_lock);
 	if (q->rq_qos)
 		__rq_qos_queue_depth_changed(q->rq_qos);
+	mutex_unlock(&q->sysfs_lock);
 }
 
 void rq_qos_exit(struct request_queue *);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f35aea98bc35..126aac824ccc 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -230,6 +230,7 @@ struct request_queue {
 	int			id;
 
 	spinlock_t		queue_lock;
+	spinlock_t		rqos_lock;
 
 	struct gendisk		*disk;
 
-- 
2.17.1

Powered by blists - more mailing lists