linux-kernel - Re: [PATCH v3] blk-cgroup: Replace u64 sync with spinlock for iostat update

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <Zpbify32lel9J-5I@slm.duckdns.org>
Date: Tue, 16 Jul 2024 11:13:35 -1000
From: Tejun Heo <tj@...nel.org>
To: "boy.wu" <boy.wu@...iatek.com>
Cc: Josef Bacik <josef@...icpanda.com>, Jens Axboe <axboe@...nel.dk>,
	Matthias Brugger <matthias.bgg@...il.com>,
	AngeloGioacchino Del Regno <angelogioacchino.delregno@...labora.com>,
	Boris Burkov <boris@....io>, cgroups@...r.kernel.org,
	linux-block@...r.kernel.org, linux-kernel@...r.kernel.org,
	linux-arm-kernel@...ts.infradead.org,
	linux-mediatek@...ts.infradead.org, iverlin.wang@...iatek.com
Subject: Re: [PATCH v3] blk-cgroup: Replace u64 sync with spinlock for iostat
 update

Hello, Boy.

So, looking at the patch, I'm not sure per-blkg lock makes sense.

On Tue, Jul 16, 2024 at 03:52:06PM +0800, boy.wu wrote:
> @@ -995,15 +995,13 @@ static void blkcg_iostat_update(struct blkcg_gq *blkg, struct blkg_iostat *cur,
>  				struct blkg_iostat *last)
>  {
>  	struct blkg_iostat delta;
> -	unsigned long flags;
>  
>  	/* propagate percpu delta to global */
> -	flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync);
> +	guard(spinlock_irqsave)(&blkg->iostat.spinlock);
>  	blkg_iostat_set(&delta, cur);
>  	blkg_iostat_sub(&delta, last);
>  	blkg_iostat_add(&blkg->iostat.cur, &delta);
>  	blkg_iostat_add(last, &delta);
> -	u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
>  }

This is already called with blkg_stat_lock held.

> @@ -1051,10 +1048,8 @@ static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu)
>  			goto propagate_up; /* propagate up to parent only */
>  
>  		/* fetch the current per-cpu values */
> -		do {
> -			seq = u64_stats_fetch_begin(&bisc->sync);
> +		scoped_guard(spinlock_irqsave, &bisc->spinlock)
>  			blkg_iostat_set(&cur, &bisc->cur);
> -		} while (u64_stats_fetch_retry(&bisc->sync, seq));

This is per-cpu stat and we should keep using u64_sync for them.

> @@ -1134,9 +1128,8 @@ static void blkcg_fill_root_iostats(void)
>  				cpu_dkstats->sectors[STAT_DISCARD] << 9;
>  		}
>  
> -		flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync);
> +		guard(spinlock_irqsave)(&blkg->iostat.spinlock);
>  		blkg_iostat_set(&blkg->iostat.cur, &tmp);
> -		u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
>  	}
>  }
...
> @@ -1157,16 +1149,14 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
>  
>  	seq_printf(s, "%s ", dname);
>  
> -	do {
> -		seq = u64_stats_fetch_begin(&bis->sync);
> -
> +	scoped_guard(spinlock_irqsave, &bis->spinlock) {
>  		rbytes = bis->cur.bytes[BLKG_IOSTAT_READ];
>  		wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE];
>  		dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD];
>  		rios = bis->cur.ios[BLKG_IOSTAT_READ];
>  		wios = bis->cur.ios[BLKG_IOSTAT_WRITE];
>  		dios = bis->cur.ios[BLKG_IOSTAT_DISCARD];
> -	} while (u64_stats_fetch_retry(&bis->sync, seq));
> +	}

The above two are the only places which can potentially benefit from
per-blkg lock but these aren't hot paths. I'd just use blkg_stat_lock for
the above.

> @@ -2152,30 +2141,29 @@ void blk_cgroup_bio_start(struct bio *bio)
>  
>  	cpu = get_cpu();
>  	bis = per_cpu_ptr(bio->bi_blkg->iostat_cpu, cpu);
> -	flags = u64_stats_update_begin_irqsave(&bis->sync);
> -
> -	/*
> -	 * If the bio is flagged with BIO_CGROUP_ACCT it means this is a split
> -	 * bio and we would have already accounted for the size of the bio.
> -	 */
> -	if (!bio_flagged(bio, BIO_CGROUP_ACCT)) {
> -		bio_set_flag(bio, BIO_CGROUP_ACCT);
> -		bis->cur.bytes[rwd] += bio->bi_iter.bi_size;
> -	}
> -	bis->cur.ios[rwd]++;
> +	scoped_guard(spinlock_irqsave, &bis->spinlock) {
> +		/*
> +		 * If the bio is flagged with BIO_CGROUP_ACCT it means this is a split
> +		 * bio and we would have already accounted for the size of the bio.
> +		 */
> +		if (!bio_flagged(bio, BIO_CGROUP_ACCT)) {
> +			bio_set_flag(bio, BIO_CGROUP_ACCT);
> +			bis->cur.bytes[rwd] += bio->bi_iter.bi_size;
> +		}
> +		bis->cur.ios[rwd]++;
>  
> -	/*
> -	 * If the iostat_cpu isn't in a lockless list, put it into the
> -	 * list to indicate that a stat update is pending.
> -	 */
> -	if (!READ_ONCE(bis->lqueued)) {
> -		struct llist_head *lhead = this_cpu_ptr(blkcg->lhead);
> +		/*
> +		 * If the iostat_cpu isn't in a lockless list, put it into the
> +		 * list to indicate that a stat update is pending.
> +		 */
> +		if (!READ_ONCE(bis->lqueued)) {
> +			struct llist_head *lhead = this_cpu_ptr(blkcg->lhead);
>  
> -		llist_add(&bis->lnode, lhead);
> -		WRITE_ONCE(bis->lqueued, true);
> +			llist_add(&bis->lnode, lhead);
> +			WRITE_ONCE(bis->lqueued, true);
> +		}

These are per-cpu stat updates which should keep using u64_sync. We don't
want to incur locking overhead for stat updates in the hot issue path.

Thanks.

-- 
tejun