lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <14ACCED7-D24D-4AC4-8677-F7F4630A840A@suse.de>
Date:   Thu, 1 Sep 2022 00:00:17 +0800
From:   Coly Li <colyli@...e.de>
To:     Kent Overstreet <kent.overstreet@...ux.dev>
Cc:     linux-kernel@...r.kernel.org, linux-bcache@...r.kernel.org
Subject: Re: [PATCH 2/3] bcache: Convert to lib/time_stats



> 2022年8月30日 00:53,Kent Overstreet <kent.overstreet@...ux.dev> 写道:
> 
> This patch converts bcache to the new generic time_stats code
> lib/time_stats.c. The new code is from bcachefs, and has some changes
> from the version in bcache:
> 
> - we now use ktime_get_ns(), not local_clock(). When the code was
>   originally written multi processor systems that lacked synchronized
>   TSCs were still common, and so local_clock() was much cheaper than
>   sched_clock() (though not necessarily fully accurate, due to TSC
>   drift). ktime_get_ns() should be cheap enough on all common hardware
>   now, and more standard/correct.
> 
> - time_stats are now exported in a single file in sysfs, which means we
>   can improve the statistics we keep track of without changing all
>   users. This also means we don't have to manually specify which units
>   (ms, us, ns) a given time_stats should be printed in; that's handled
>   dynamically.
> 
> - There's a lazily-allocated percpu buffer, which now needs to be freed
>   with time_stats_exit().
> 
> Signed-off-by: Kent Overstreet <kent.overstreet@...ux.dev>
> Cc: Coly Li <colyli@...e.de>

Hi Kent,

Overall I am OK with the change to bcache code. You may add
	Acked-by: Coly Li <colyli@...e.de>
in future version of this patch.

In bcache-tools, they don’t read the changed sysfs files (including bcache-status), IMHO changing the output format won’t be problem for upstream.

My only question is, how to understand the time_stats_to_text() output format,
count:          3
rate:           0/sec
frequency:      4 sec
avg duration:   4 sec
max duration:   4 sec
quantiles (ns): 0 4288669120 4288669120 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048 5360836048

Fro the above output, what are “rate”, “frequence” and “quantiles” for?

Thanks.

Coly Li

> ---
> drivers/md/bcache/Kconfig  |  1 +
> drivers/md/bcache/bcache.h |  1 +
> drivers/md/bcache/bset.c   |  8 +++---
> drivers/md/bcache/bset.h   |  1 +
> drivers/md/bcache/btree.c  | 12 ++++----
> drivers/md/bcache/super.c  |  3 ++
> drivers/md/bcache/sysfs.c  | 43 ++++++++++++++++++++--------
> drivers/md/bcache/util.c   | 30 --------------------
> drivers/md/bcache/util.h   | 57 --------------------------------------
> 9 files changed, 47 insertions(+), 109 deletions(-)
> 
> diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig
> index 529c9d04e9..8d165052e5 100644
> --- a/drivers/md/bcache/Kconfig
> +++ b/drivers/md/bcache/Kconfig
> @@ -4,6 +4,7 @@ config BCACHE
> 	tristate "Block device as cache"
> 	select BLOCK_HOLDER_DEPRECATED if SYSFS
> 	select CRC64
> +	select TIME_STATS
> 	help
> 	Allows a block device to be used as cache for other devices; uses
> 	a btree for indexing and the layout is optimized for SSDs.
> diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
> index 2acda9cea0..5100010a38 100644
> --- a/drivers/md/bcache/bcache.h
> +++ b/drivers/md/bcache/bcache.h
> @@ -185,6 +185,7 @@
> #include <linux/rbtree.h>
> #include <linux/rwsem.h>
> #include <linux/refcount.h>
> +#include <linux/time_stats.h>
> #include <linux/types.h>
> #include <linux/workqueue.h>
> #include <linux/kthread.h>
> diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
> index 94d38e8a59..727e9b7aea 100644
> --- a/drivers/md/bcache/bset.c
> +++ b/drivers/md/bcache/bset.c
> @@ -1251,7 +1251,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
> 		order = state->page_order;
> 	}
> 
> -	start_time = local_clock();
> +	start_time = ktime_get_ns();
> 
> 	btree_mergesort(b, out, iter, fixup, false);
> 	b->nsets = start;
> @@ -1286,7 +1286,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
> 	bch_bset_build_written_tree(b);
> 
> 	if (!start)
> -		bch_time_stats_update(&state->time, start_time);
> +		time_stats_update(&state->time, start_time);
> }
> 
> void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
> @@ -1322,14 +1322,14 @@ void bch_btree_sort_and_fix_extents(struct btree_keys *b,
> void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new,
> 			 struct bset_sort_state *state)
> {
> -	uint64_t start_time = local_clock();
> +	uint64_t start_time = ktime_get_ns();
> 	struct btree_iter iter;
> 
> 	bch_btree_iter_init(b, &iter, NULL);
> 
> 	btree_mergesort(b, new->set->data, &iter, false, true);
> 
> -	bch_time_stats_update(&state->time, start_time);
> +	time_stats_update(&state->time, start_time);
> 
> 	new->set->size = 0; // XXX: why?
> }
> diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
> index d795c84246..13e524ad77 100644
> --- a/drivers/md/bcache/bset.h
> +++ b/drivers/md/bcache/bset.h
> @@ -3,6 +3,7 @@
> #define _BCACHE_BSET_H
> 
> #include <linux/kernel.h>
> +#include <linux/time_stats.h>
> #include <linux/types.h>
> 
> #include "bcache_ondisk.h"
> diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
> index 147c493a98..abf543bc75 100644
> --- a/drivers/md/bcache/btree.c
> +++ b/drivers/md/bcache/btree.c
> @@ -242,7 +242,7 @@ static void btree_node_read_endio(struct bio *bio)
> 
> static void bch_btree_node_read(struct btree *b)
> {
> -	uint64_t start_time = local_clock();
> +	uint64_t start_time = ktime_get_ns();
> 	struct closure cl;
> 	struct bio *bio;
> 
> @@ -270,7 +270,7 @@ static void bch_btree_node_read(struct btree *b)
> 		goto err;
> 
> 	bch_btree_node_read_done(b);
> -	bch_time_stats_update(&b->c->btree_read_time, start_time);
> +	time_stats_update(&b->c->btree_read_time, start_time);
> 
> 	return;
> err:
> @@ -1789,7 +1789,7 @@ static void bch_btree_gc(struct cache_set *c)
> 	struct gc_stat stats;
> 	struct closure writes;
> 	struct btree_op op;
> -	uint64_t start_time = local_clock();
> +	uint64_t start_time = ktime_get_ns();
> 
> 	trace_bcache_gc_start(c);
> 
> @@ -1815,7 +1815,7 @@ static void bch_btree_gc(struct cache_set *c)
> 	bch_btree_gc_finish(c);
> 	wake_up_allocators(c);
> 
> -	bch_time_stats_update(&c->btree_gc_time, start_time);
> +	time_stats_update(&c->btree_gc_time, start_time);
> 
> 	stats.key_bytes *= sizeof(uint64_t);
> 	stats.data	<<= 9;
> @@ -2191,7 +2191,7 @@ static int btree_split(struct btree *b, struct btree_op *op,
> {
> 	bool split;
> 	struct btree *n1, *n2 = NULL, *n3 = NULL;
> -	uint64_t start_time = local_clock();
> +	uint64_t start_time = ktime_get_ns();
> 	struct closure cl;
> 	struct keylist parent_keys;
> 
> @@ -2297,7 +2297,7 @@ static int btree_split(struct btree *b, struct btree_op *op,
> 	btree_node_free(b);
> 	rw_unlock(true, n1);
> 
> -	bch_time_stats_update(&b->c->btree_split_time, start_time);
> +	time_stats_update(&b->c->btree_split_time, start_time);
> 
> 	return 0;
> err_free2:
> diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
> index ba3909bb6b..26c8fa93b5 100644
> --- a/drivers/md/bcache/super.c
> +++ b/drivers/md/bcache/super.c
> @@ -1691,6 +1691,9 @@ static void cache_set_free(struct closure *cl)
> 		kobject_put(&ca->kobj);
> 	}
> 
> +	time_stats_exit(&c->btree_gc_time);
> +	time_stats_exit(&c->btree_split_time);
> +	time_stats_exit(&c->sort.time);
> 
> 	if (c->moving_gc_wq)
> 		destroy_workqueue(c->moving_gc_wq);
> diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
> index c6f6770592..01eec5877c 100644
> --- a/drivers/md/bcache/sysfs.c
> +++ b/drivers/md/bcache/sysfs.c
> @@ -16,6 +16,7 @@
> #include <linux/blkdev.h>
> #include <linux/sort.h>
> #include <linux/sched/clock.h>
> +#include <linux/seq_buf.h>
> 
> extern bool bcache_is_reboot;
> 
> @@ -79,10 +80,10 @@ read_attribute(active_journal_entries);
> read_attribute(backing_dev_name);
> read_attribute(backing_dev_uuid);
> 
> -sysfs_time_stats_attribute(btree_gc,	sec, ms);
> -sysfs_time_stats_attribute(btree_split, sec, us);
> -sysfs_time_stats_attribute(btree_sort,	ms,  us);
> -sysfs_time_stats_attribute(btree_read,	ms,  us);
> +read_attribute(btree_gc_time);
> +read_attribute(btree_split_time);
> +read_attribute(btree_sort_time);
> +read_attribute(btree_read_time);
> 
> read_attribute(btree_nodes);
> read_attribute(btree_used_percent);
> @@ -731,6 +732,9 @@ static unsigned int bch_average_key_size(struct cache_set *c)
> SHOW(__bch_cache_set)
> {
> 	struct cache_set *c = container_of(kobj, struct cache_set, kobj);
> +	struct seq_buf s;
> +
> +	seq_buf_init(&s, buf, PAGE_SIZE);
> 
> 	sysfs_print(synchronous,		CACHE_SYNC(&c->cache->sb));
> 	sysfs_print(journal_delay_ms,		c->journal_delay_ms);
> @@ -743,10 +747,25 @@ SHOW(__bch_cache_set)
> 	sysfs_print(btree_cache_max_chain,	bch_cache_max_chain(c));
> 	sysfs_print(cache_available_percent,	100 - c->gc_stats.in_use);
> 
> -	sysfs_print_time_stats(&c->btree_gc_time,	btree_gc, sec, ms);
> -	sysfs_print_time_stats(&c->btree_split_time,	btree_split, sec, us);
> -	sysfs_print_time_stats(&c->sort.time,		btree_sort, ms, us);
> -	sysfs_print_time_stats(&c->btree_read_time,	btree_read, ms, us);
> +	if (attr == &sysfs_btree_gc_time) {
> +		time_stats_to_text(&s, &c->btree_gc_time);
> +		return s.len;
> +	}
> +
> +	if (attr == &sysfs_btree_split_time) {
> +		time_stats_to_text(&s, &c->btree_split_time);
> +		return s.len;
> +	}
> +
> +	if (attr == &sysfs_btree_sort_time) {
> +		time_stats_to_text(&s, &c->sort.time);
> +		return s.len;
> +	}
> +
> +	if (attr == &sysfs_btree_read_time) {
> +		time_stats_to_text(&s, &c->btree_read_time);
> +		return s.len;
> +	}
> 
> 	sysfs_print(btree_used_percent,	bch_btree_used(c));
> 	sysfs_print(btree_nodes,	c->gc_stats.nodes);
> @@ -988,10 +1007,10 @@ KTYPE(bch_cache_set);
> static struct attribute *bch_cache_set_internal_attrs[] = {
> 	&sysfs_active_journal_entries,
> 
> -	sysfs_time_stats_attribute_list(btree_gc, sec, ms)
> -	sysfs_time_stats_attribute_list(btree_split, sec, us)
> -	sysfs_time_stats_attribute_list(btree_sort, ms, us)
> -	sysfs_time_stats_attribute_list(btree_read, ms, us)
> +	&sysfs_btree_gc_time,
> +	&sysfs_btree_split_time,
> +	&sysfs_btree_sort_time,
> +	&sysfs_btree_read_time,
> 
> 	&sysfs_btree_nodes,
> 	&sysfs_btree_used_percent,
> diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
> index ae380bc399..95282bf0f9 100644
> --- a/drivers/md/bcache/util.c
> +++ b/drivers/md/bcache/util.c
> @@ -160,36 +160,6 @@ int bch_parse_uuid(const char *s, char *uuid)
> 	return i;
> }
> 
> -void bch_time_stats_update(struct time_stats *stats, uint64_t start_time)
> -{
> -	uint64_t now, duration, last;
> -
> -	spin_lock(&stats->lock);
> -
> -	now		= local_clock();
> -	duration	= time_after64(now, start_time)
> -		? now - start_time : 0;
> -	last		= time_after64(now, stats->last)
> -		? now - stats->last : 0;
> -
> -	stats->max_duration = max(stats->max_duration, duration);
> -
> -	if (stats->last) {
> -		ewma_add(stats->average_duration, duration, 8, 8);
> -
> -		if (stats->average_frequency)
> -			ewma_add(stats->average_frequency, last, 8, 8);
> -		else
> -			stats->average_frequency  = last << 8;
> -	} else {
> -		stats->average_duration  = duration << 8;
> -	}
> -
> -	stats->last = now ?: 1;
> -
> -	spin_unlock(&stats->lock);
> -}
> -
> /**
>  * bch_next_delay() - update ratelimiting statistics and calculate next delay
>  * @d: the struct bch_ratelimit to update
> diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
> index 6f3cb7c921..1e1bdbae95 100644
> --- a/drivers/md/bcache/util.h
> +++ b/drivers/md/bcache/util.h
> @@ -345,68 +345,11 @@ ssize_t bch_hprint(char *buf, int64_t v);
> bool bch_is_zero(const char *p, size_t n);
> int bch_parse_uuid(const char *s, char *uuid);
> 
> -struct time_stats {
> -	spinlock_t	lock;
> -	/*
> -	 * all fields are in nanoseconds, averages are ewmas stored left shifted
> -	 * by 8
> -	 */
> -	uint64_t	max_duration;
> -	uint64_t	average_duration;
> -	uint64_t	average_frequency;
> -	uint64_t	last;
> -};
> -
> -void bch_time_stats_update(struct time_stats *stats, uint64_t time);
> -
> static inline unsigned int local_clock_us(void)
> {
> 	return local_clock() >> 10;
> }
> 
> -#define NSEC_PER_ns			1L
> -#define NSEC_PER_us			NSEC_PER_USEC
> -#define NSEC_PER_ms			NSEC_PER_MSEC
> -#define NSEC_PER_sec			NSEC_PER_SEC
> -
> -#define __print_time_stat(stats, name, stat, units)			\
> -	sysfs_print(name ## _ ## stat ## _ ## units,			\
> -		    div_u64((stats)->stat >> 8, NSEC_PER_ ## units))
> -
> -#define sysfs_print_time_stats(stats, name,				\
> -			       frequency_units,				\
> -			       duration_units)				\
> -do {									\
> -	__print_time_stat(stats, name,					\
> -			  average_frequency,	frequency_units);	\
> -	__print_time_stat(stats, name,					\
> -			  average_duration,	duration_units);	\
> -	sysfs_print(name ## _ ##max_duration ## _ ## duration_units,	\
> -			div_u64((stats)->max_duration,			\
> -				NSEC_PER_ ## duration_units));		\
> -									\
> -	sysfs_print(name ## _last_ ## frequency_units, (stats)->last	\
> -		    ? div_s64(local_clock() - (stats)->last,		\
> -			      NSEC_PER_ ## frequency_units)		\
> -		    : -1LL);						\
> -} while (0)
> -
> -#define sysfs_time_stats_attribute(name,				\
> -				   frequency_units,			\
> -				   duration_units)			\
> -read_attribute(name ## _average_frequency_ ## frequency_units);		\
> -read_attribute(name ## _average_duration_ ## duration_units);		\
> -read_attribute(name ## _max_duration_ ## duration_units);		\
> -read_attribute(name ## _last_ ## frequency_units)
> -
> -#define sysfs_time_stats_attribute_list(name,				\
> -					frequency_units,		\
> -					duration_units)			\
> -&sysfs_ ## name ## _average_frequency_ ## frequency_units,		\
> -&sysfs_ ## name ## _average_duration_ ## duration_units,		\
> -&sysfs_ ## name ## _max_duration_ ## duration_units,			\
> -&sysfs_ ## name ## _last_ ## frequency_units,
> -
> #define ewma_add(ewma, val, weight, factor)				\
> ({									\
> 	(ewma) *= (weight) - 1;						\
> -- 
> 2.36.1
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ