lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 13 Dec 2023 15:42:00 -0800
From:   Tim Chen <tim.c.chen@...ux.intel.com>
To:     Ming Lei <ming.lei@...hat.com>, Jens Axboe <axboe@...nel.dk>
Cc:     linux-block@...r.kernel.org, Tejun Heo <tj@...nel.org>,
        linux-kernel@...r.kernel.org, Juri Lelli <juri.lelli@...hat.com>,
        Andrew Theurer <atheurer@...hat.com>,
        Joe Mario <jmario@...hat.com>,
        Sebastian Jug <sejug@...hat.com>,
        Frederic Weisbecker <frederic@...nel.org>,
        Bart Van Assche <bvanassche@....org>
Subject: Re: [PATCH V3] blk-mq: don't schedule block kworker on isolated CPUs

On Wed, 2023-10-25 at 10:57 +0800, Ming Lei wrote:
> Kernel parameter of `isolcpus=` or 'nohz_full=' are used for isolating CPUs
> for specific task, and user often won't want block IO to disturb these CPUs,
Suggest breaking up this long sentence to make reading easier.

for specific tasks.  Users do not want block I/O operations to disturb these CPUS,
> also long IO latency may be caused if blk-mq kworker is scheduled on these

as long I/O latency could delay intended tasks if blk-mq kworker is scheduled on these 

> isolated CPUs.
> 
> Kernel workqueue only respects this limit for WQ_UNBOUND, for bound wq,
> the responsibility should be on wq user.
> 
> So don't not run block kworker on isolated CPUs by ruling out isolated CPUs
So don't run block kworker on isolated CPUs by removing isolated CPUs

> from hctx->cpumask. Meantime in cpuhp handler, use queue map to check if
> all CPUs in this hw queue are offline, this way can avoid any cost in fast
> IO code path.
> 
> Cc: Juri Lelli <juri.lelli@...hat.com>
> Cc: Andrew Theurer <atheurer@...hat.com>
> Cc: Joe Mario <jmario@...hat.com>
> Cc: Sebastian Jug <sejug@...hat.com>
> Cc: Frederic Weisbecker <frederic@...nel.org>
> Cc: Bart Van Assche <bvanassche@....org>
> Signed-off-by: Ming Lei <ming.lei@...hat.com>
> ---
> 
> V3:
> 	- avoid to check invalid cpu as reported by Bart
> 	- take current cpu(to be offline, not done yet) into account
> 	- simplify blk_mq_hctx_has_online_cpu()
> 
> V2:
> 	- remove module parameter, meantime use queue map to check if
> 	all cpus in one hctx are offline
> 
>  block/blk-mq.c | 51 ++++++++++++++++++++++++++++++++++++++++----------
>  1 file changed, 41 insertions(+), 10 deletions(-)
> 
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index e2d11183f62e..4556978ce71b 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -29,6 +29,7 @@
>  #include <linux/prefetch.h>
>  #include <linux/blk-crypto.h>
>  #include <linux/part_stat.h>
> +#include <linux/sched/isolation.h>
>  
>  #include <trace/events/block.h>
>  
> @@ -2158,7 +2159,11 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
>  	bool tried = false;
>  	int next_cpu = hctx->next_cpu;
>  
> -	if (hctx->queue->nr_hw_queues == 1)
> +	/*
> +	 * In case of single queue or no allowed CPU for scheduling
> +	 * worker, don't bound our worker with any CPU
> +	 */
> +	if (hctx->queue->nr_hw_queues == 1 || next_cpu >= nr_cpu_ids)
>  		return WORK_CPU_UNBOUND;
>  
>  	if (--hctx->next_cpu_batch <= 0) {
> @@ -3459,14 +3464,30 @@ static bool blk_mq_hctx_has_requests(struct blk_mq_hw_ctx *hctx)
>  	return data.has_rq;
>  }
>  
> -static inline bool blk_mq_last_cpu_in_hctx(unsigned int cpu,
> -		struct blk_mq_hw_ctx *hctx)
> +static bool blk_mq_hctx_has_online_cpu(struct blk_mq_hw_ctx *hctx,
> +		unsigned int this_cpu)
>  {
> -	if (cpumask_first_and(hctx->cpumask, cpu_online_mask) != cpu)
> -		return false;
> -	if (cpumask_next_and(cpu, hctx->cpumask, cpu_online_mask) < nr_cpu_ids)
> -		return false;
> -	return true;
> +	enum hctx_type type = hctx->type;
> +	int cpu;
> +
> +	/*
> +	 * hctx->cpumask has rule out isolated CPUs, but userspace still
> +	 * might submit IOs on these isolated CPUs, so use queue map to
> +	 * check if all CPUs mapped to this hctx are offline
> +	 */
> +	for_each_online_cpu(cpu) {
> +		struct blk_mq_hw_ctx *h = blk_mq_map_queue_type(hctx->queue,
> +				type, cpu);
> +
> +		if (h != hctx)
> +			continue;
> +
> +		/* this current CPU isn't put offline yet */
> +		if (this_cpu != cpu)
> +			return true;
> +	}
> +
> +	return false;
>  }
>  
>  static int blk_mq_hctx_notify_offline(unsigned int cpu, struct hlist_node *node)
> @@ -3474,8 +3495,7 @@ static int blk_mq_hctx_notify_offline(unsigned int cpu, struct hlist_node *node)
>  	struct blk_mq_hw_ctx *hctx = hlist_entry_safe(node,
>  			struct blk_mq_hw_ctx, cpuhp_online);
>  
> -	if (!cpumask_test_cpu(cpu, hctx->cpumask) ||
> -	    !blk_mq_last_cpu_in_hctx(cpu, hctx))
> +	if (blk_mq_hctx_has_online_cpu(hctx, cpu))
>  		return 0;
>  
>  	/*
> @@ -3883,6 +3903,8 @@ static void blk_mq_map_swqueue(struct request_queue *q)
>  	}
>  
>  	queue_for_each_hw_ctx(q, hctx, i) {
> +		int cpu;
> +
>  		/*
>  		 * If no software queues are mapped to this hardware queue,
>  		 * disable it and free the request entries.
> @@ -3909,6 +3931,15 @@ static void blk_mq_map_swqueue(struct request_queue *q)
>  		 */
>  		sbitmap_resize(&hctx->ctx_map, hctx->nr_ctx);
>  
> +		/*
> +		 * rule out isolated CPUs from hctx->cpumask for avoiding to

s/for avoiding to run/to avoid running/

> +		 * run wq worker on isolated CPU
> +		 */
> +		for_each_cpu(cpu, hctx->cpumask) {
> +			if (cpu_is_isolated(cpu))
> +				cpumask_clear_cpu(cpu, hctx->cpumask);
> +		}
> +
>  		/*
>  		 * Initialize batch roundrobin counts
>  		 */

Thanks.

Tim

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ