[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <eabac8f5-108c-40dc-bd75-b5a04075a4d4@huaweicloud.com>
Date: Thu, 4 May 2023 10:17:44 +0800
From: Yu Kuai <yukuai1@...weicloud.com>
To: Yu Kuai <yukuai1@...weicloud.com>, axboe@...nel.dk
Cc: linux-block@...r.kernel.org, linux-kernel@...r.kernel.org,
yi.zhang@...wei.com, yangerkun@...wei.com,
"yukuai (C)" <yukuai3@...wei.com>
Subject: Re: [PATCH RFC -next] block: support enable/disable blk-mq debugfs
dynamically
Hi,
在 2023/04/15 16:20, Yu Kuai 写道:
> From: Yu Kuai <yukuai3@...wei.com>
>
> After a disk is created, debugfs inode and dentry will be created
> together, and the memory used for debugfs can't be freed until disk
> removal.
>
> The number of debugfs inode and dentry is based on how many cpus and
> hctxs. For example, testing on a 128-core environemt, with default
> module parameters, each loop device will cost 1679KB memory, and debugfs
> will cost 336KB(20%).
>
> The memory cost for debugfs for a disk seems little, but if a big machine
> contains thousands of disks, the cost will be xxGB. This memory overhead
> can be avoided by disabling CONFIG_BLK_DEBUG_FS.
>
> This patch add a disk level switch that can enable/disable debugfs
> dynamically, so that user can disable debugfs if they care about the
> memory overhead, in the meantime, debugfs can be enabled again in demand.
>
Friendly ping ...
Thanks,
Kuai
> Signed-off-by: Yu Kuai <yukuai3@...wei.com>
> ---
> block/blk-mq-debugfs.c | 58 ++++++++++++++++++++++++++++++++++++++----
> block/blk-mq-debugfs.h | 1 +
> block/blk-sysfs.c | 41 +++++++++++++++++++++++++++++
> include/linux/blkdev.h | 2 ++
> 4 files changed, 97 insertions(+), 5 deletions(-)
>
> diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
> index 212a7f301e73..3ffc27fd4d07 100644
> --- a/block/blk-mq-debugfs.c
> +++ b/block/blk-mq-debugfs.c
> @@ -657,6 +657,9 @@ void blk_mq_debugfs_register(struct request_queue *q)
> struct blk_mq_hw_ctx *hctx;
> unsigned long i;
>
> + if (!test_bit(QUEUE_FLAG_DEBUGFS, &q->queue_flags))
> + return;
> +
> debugfs_create_files(q->debugfs_dir, q, blk_mq_debugfs_queue_attrs);
>
> /*
> @@ -685,6 +688,47 @@ void blk_mq_debugfs_register(struct request_queue *q)
> }
> }
>
> +static void debugfs_remove_files(struct dentry *parent,
> + const struct blk_mq_debugfs_attr *attr)
> +{
> + if (IS_ERR_OR_NULL(parent))
> + return;
> +
> + for (; attr->name; attr++)
> + debugfs_lookup_and_remove(attr->name, parent);
> +}
> +
> +void blk_mq_debugfs_unregister(struct request_queue *q)
> +{
> + struct blk_mq_hw_ctx *hctx;
> + unsigned long i;
> +
> + if (q->rq_qos) {
> + struct rq_qos *rqos = q->rq_qos;
> +
> + while (rqos) {
> + if (rqos->debugfs_dir)
> + blk_mq_debugfs_unregister_rqos(rqos);
> + rqos = rqos->next;
> + }
> + }
> +
> + debugfs_remove_recursive(q->rqos_debugfs_dir);
> + q->rqos_debugfs_dir = NULL;
> +
> + queue_for_each_hw_ctx(q, hctx, i) {
> + if (hctx->debugfs_dir)
> + blk_mq_debugfs_unregister_hctx(hctx);
> + if (hctx->sched_debugfs_dir)
> + blk_mq_debugfs_unregister_sched_hctx(hctx);
> + }
> +
> + if (q->sched_debugfs_dir)
> + blk_mq_debugfs_unregister_sched(q);
> +
> + debugfs_remove_files(q->debugfs_dir, blk_mq_debugfs_queue_attrs);
> +}
> +
> static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx,
> struct blk_mq_ctx *ctx)
> {
> @@ -704,7 +748,7 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q,
> char name[20];
> int i;
>
> - if (!q->debugfs_dir)
> + if (!q->debugfs_dir || !test_bit(QUEUE_FLAG_DEBUGFS, &q->queue_flags))
> return;
>
> snprintf(name, sizeof(name), "hctx%u", hctx->queue_num);
> @@ -718,7 +762,8 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q,
>
> void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx)
> {
> - if (!hctx->queue->debugfs_dir)
> + if (!hctx->queue->debugfs_dir ||
> + !test_bit(QUEUE_FLAG_DEBUGFS, &hctx->queue->queue_flags))
> return;
> debugfs_remove_recursive(hctx->debugfs_dir);
> hctx->sched_debugfs_dir = NULL;
> @@ -756,7 +801,8 @@ void blk_mq_debugfs_register_sched(struct request_queue *q)
> if (!q->debugfs_dir)
> return;
>
> - if (!e->queue_debugfs_attrs)
> + if (!e->queue_debugfs_attrs ||
> + !test_bit(QUEUE_FLAG_DEBUGFS, &q->queue_flags))
> return;
>
> q->sched_debugfs_dir = debugfs_create_dir("sched", q->debugfs_dir);
> @@ -802,7 +848,8 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
>
> lockdep_assert_held(&q->debugfs_mutex);
>
> - if (rqos->debugfs_dir || !rqos->ops->debugfs_attrs)
> + if (rqos->debugfs_dir || !rqos->ops->debugfs_attrs ||
> + !test_bit(QUEUE_FLAG_DEBUGFS, &q->queue_flags))
> return;
>
> if (!q->rqos_debugfs_dir)
> @@ -828,7 +875,8 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
> if (!hctx->debugfs_dir)
> return;
>
> - if (!e->hctx_debugfs_attrs)
> + if (!e->hctx_debugfs_attrs ||
> + !test_bit(QUEUE_FLAG_DEBUGFS, &q->queue_flags))
> return;
>
> hctx->sched_debugfs_dir = debugfs_create_dir("sched",
> diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h
> index 9c7d4b6117d4..c8dd03f73d8c 100644
> --- a/block/blk-mq-debugfs.h
> +++ b/block/blk-mq-debugfs.h
> @@ -21,6 +21,7 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq);
> int blk_mq_debugfs_rq_show(struct seq_file *m, void *v);
>
> void blk_mq_debugfs_register(struct request_queue *q);
> +void blk_mq_debugfs_unregister(struct request_queue *q);
> void blk_mq_debugfs_register_hctx(struct request_queue *q,
> struct blk_mq_hw_ctx *hctx);
> void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx);
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index 1a743b4f2958..450acea23f21 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -618,6 +618,44 @@ QUEUE_RW_ENTRY(queue_iostats, "iostats");
> QUEUE_RW_ENTRY(queue_random, "add_random");
> QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes");
>
> +#ifdef CONFIG_BLK_DEBUG_FS
> +static ssize_t queue_debugfs_show(struct request_queue *q, char *page)
> +{
> + return queue_var_show(test_bit(QUEUE_FLAG_DEBUGFS, &q->queue_flags),
> + page);
> +}
> +
> +static ssize_t queue_debugfs_store(struct request_queue *q, const char *page,
> + size_t count)
> +{
> + unsigned long val;
> + bool enabled;
> + ssize_t ret = queue_var_store(&val, page, count);
> +
> + if (ret < 0)
> + return ret;
> +
> + mutex_lock(&q->debugfs_mutex);
> + enabled = test_bit(QUEUE_FLAG_DEBUGFS, &q->queue_flags);
> + if (val) {
> + if (!enabled && queue_is_mq(q)) {
> + blk_queue_flag_set(QUEUE_FLAG_DEBUGFS, q);
> + blk_mq_debugfs_register(q);
> + }
> + } else {
> + if (enabled) {
> + blk_mq_debugfs_unregister(q);
> + blk_queue_flag_clear(QUEUE_FLAG_DEBUGFS, q);
> + }
> + }
> + mutex_unlock(&q->debugfs_mutex);
> +
> + return ret;
> +}
> +
> +QUEUE_RW_ENTRY(queue_debugfs, "debugfs");
> +#endif
> +
> static struct attribute *queue_attrs[] = {
> &queue_requests_entry.attr,
> &queue_ra_entry.attr,
> @@ -664,6 +702,9 @@ static struct attribute *queue_attrs[] = {
> #endif
> &queue_virt_boundary_mask_entry.attr,
> &queue_dma_alignment_entry.attr,
> +#ifdef CONFIG_BLK_DEBUG_FS
> + &queue_debugfs_entry.attr,
> +#endif
> NULL,
> };
>
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index e3242e67a8e3..be51592751b2 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -544,6 +544,7 @@ struct request_queue {
> #define QUEUE_FLAG_NONROT 6 /* non-rotational device (SSD) */
> #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */
> #define QUEUE_FLAG_IO_STAT 7 /* do disk/partitions IO accounting */
> +#define QUEUE_FLAG_DEBUGFS 8 /* supports debugfs */
> #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */
> #define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */
> #define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */
> @@ -566,6 +567,7 @@ struct request_queue {
> #define QUEUE_FLAG_SKIP_TAGSET_QUIESCE 31 /* quiesce_tagset skip the queue*/
>
> #define QUEUE_FLAG_MQ_DEFAULT ((1UL << QUEUE_FLAG_IO_STAT) | \
> + (1UL << QUEUE_FLAG_DEBUGFS) | \
> (1UL << QUEUE_FLAG_SAME_COMP) | \
> (1UL << QUEUE_FLAG_NOWAIT))
>
>
Powered by blists - more mailing lists