lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <doivz4drfffwi63k2yeanupbjpubqha4ew7hjgyghokvgosd7v@hehsy6mkfq6h>
Date: Sun, 15 Jun 2025 13:51:33 +0800
From: Coly Li <colyli@...nel.org>
To: Kuan-Wei Chiu <visitorckw@...il.com>
Cc: akpm@...ux-foundation.org, kent.overstreet@...ux.dev, 
	robertpang@...gle.com, linux-kernel@...r.kernel.org, linux-bcache@...r.kernel.org, 
	jserv@...s.ncku.edu.tw, stable@...r.kernel.org
Subject: Re: [PATCH v2 2/3] Revert "bcache: remove heap-related macros and
 switch to generic min_heap"

On Sun, Jun 15, 2025 at 04:23:52AM +0800, Kuan-Wei Chiu wrote:
> This reverts commit 866898efbb25bb44fd42848318e46db9e785973a.
> 
> The generic bottom-up min_heap implementation causes performance
> regression in invalidate_buckets_lru(), a hot path in bcache. Before
> the cache is fully populated, new_bucket_prio() often returns zero,
> leading to many equal comparisons. In such cases, bottom-up sift_down
> performs up to 2 * log2(n) comparisons, while the original top-down
> approach completes with just O() comparisons, resulting in a measurable
> performance gap.
> 
> The performance degradation is further worsened by the non-inlined
> min_heap API functions introduced in commit 92a8b224b833
> ("lib/min_heap: introduce non-inline versions of min heap API
> functions"), adding function call overhead to this critical path.
> 
> As reported by Robert, bcache now suffers from latency spikes, with
> P100 (max) latency increasing from 600 ms to 2.4 seconds every 5
> minutes. These regressions degrade bcache's effectiveness as a
> low-latency cache layer and lead to frequent timeouts and application
> stalls in production environments.
> 
> This revert aims to restore bcache's original low-latency behavior.
> 
> Link: https://lore.kernel.org/lkml/CAJhEC05+0S69z+3+FB2Cd0hD+pCRyWTKLEOsc8BOmH73p1m+KQ@mail.gmail.com
> Fixes: 866898efbb25 ("bcache: remove heap-related macros and switch to generic min_heap")
> Fixes: 92a8b224b833 ("lib/min_heap: introduce non-inline versions of min heap API functions")
> Reported-by: Robert Pang <robertpang@...gle.com>
> Closes: https://lore.kernel.org/linux-bcache/CAJhEC06F_AtrPgw2-7CvCqZgeStgCtitbD-ryuPpXQA-JG5XXw@mail.gmail.com
> Cc: stable@...r.kernel.org
> Signed-off-by: Kuan-Wei Chiu <visitorckw@...il.com>

Acked-by: Coly Li <colyli@...nel.org>

Thanks.

> ---
>  drivers/md/bcache/alloc.c     |  64 +++++-------------
>  drivers/md/bcache/bcache.h    |   2 +-
>  drivers/md/bcache/bset.c      | 124 ++++++++++++----------------------
>  drivers/md/bcache/bset.h      |  40 ++++++-----
>  drivers/md/bcache/btree.c     |  69 ++++++++-----------
>  drivers/md/bcache/extents.c   |  53 ++++++---------
>  drivers/md/bcache/movinggc.c  |  41 +++--------
>  drivers/md/bcache/super.c     |   3 +-
>  drivers/md/bcache/sysfs.c     |   4 +-
>  drivers/md/bcache/util.h      |  67 +++++++++++++++++-
>  drivers/md/bcache/writeback.c |  13 ++--
>  11 files changed, 217 insertions(+), 263 deletions(-)
> 
> diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
> index da50f6661bae..48ce750bf70a 100644
> --- a/drivers/md/bcache/alloc.c
> +++ b/drivers/md/bcache/alloc.c
> @@ -164,68 +164,40 @@ static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
>   * prio is worth 1/8th of what INITIAL_PRIO is worth.
>   */
>  
> -static inline unsigned int new_bucket_prio(struct cache *ca, struct bucket *b)
> -{
> -	unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8;
> -
> -	return (b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b);
> -}
> -
> -static inline bool new_bucket_max_cmp(const void *l, const void *r, void *args)
> -{
> -	struct bucket **lhs = (struct bucket **)l;
> -	struct bucket **rhs = (struct bucket **)r;
> -	struct cache *ca = args;
> -
> -	return new_bucket_prio(ca, *lhs) > new_bucket_prio(ca, *rhs);
> -}
> -
> -static inline bool new_bucket_min_cmp(const void *l, const void *r, void *args)
> -{
> -	struct bucket **lhs = (struct bucket **)l;
> -	struct bucket **rhs = (struct bucket **)r;
> -	struct cache *ca = args;
> -
> -	return new_bucket_prio(ca, *lhs) < new_bucket_prio(ca, *rhs);
> -}
> -
> -static inline void new_bucket_swap(void *l, void *r, void __always_unused *args)
> -{
> -	struct bucket **lhs = l, **rhs = r;
> +#define bucket_prio(b)							\
> +({									\
> +	unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8;	\
> +									\
> +	(b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b);	\
> +})
>  
> -	swap(*lhs, *rhs);
> -}
> +#define bucket_max_cmp(l, r)	(bucket_prio(l) < bucket_prio(r))
> +#define bucket_min_cmp(l, r)	(bucket_prio(l) > bucket_prio(r))
>  
>  static void invalidate_buckets_lru(struct cache *ca)
>  {
>  	struct bucket *b;
> -	const struct min_heap_callbacks bucket_max_cmp_callback = {
> -		.less = new_bucket_max_cmp,
> -		.swp = new_bucket_swap,
> -	};
> -	const struct min_heap_callbacks bucket_min_cmp_callback = {
> -		.less = new_bucket_min_cmp,
> -		.swp = new_bucket_swap,
> -	};
> +	ssize_t i;
>  
> -	ca->heap.nr = 0;
> +	ca->heap.used = 0;
>  
>  	for_each_bucket(b, ca) {
>  		if (!bch_can_invalidate_bucket(ca, b))
>  			continue;
>  
> -		if (!min_heap_full(&ca->heap))
> -			min_heap_push(&ca->heap, &b, &bucket_max_cmp_callback, ca);
> -		else if (!new_bucket_max_cmp(&b, min_heap_peek(&ca->heap), ca)) {
> +		if (!heap_full(&ca->heap))
> +			heap_add(&ca->heap, b, bucket_max_cmp);
> +		else if (bucket_max_cmp(b, heap_peek(&ca->heap))) {
>  			ca->heap.data[0] = b;
> -			min_heap_sift_down(&ca->heap, 0, &bucket_max_cmp_callback, ca);
> +			heap_sift(&ca->heap, 0, bucket_max_cmp);
>  		}
>  	}
>  
> -	min_heapify_all(&ca->heap, &bucket_min_cmp_callback, ca);
> +	for (i = ca->heap.used / 2 - 1; i >= 0; --i)
> +		heap_sift(&ca->heap, i, bucket_min_cmp);
>  
>  	while (!fifo_full(&ca->free_inc)) {
> -		if (!ca->heap.nr) {
> +		if (!heap_pop(&ca->heap, b, bucket_min_cmp)) {
>  			/*
>  			 * We don't want to be calling invalidate_buckets()
>  			 * multiple times when it can't do anything
> @@ -234,8 +206,6 @@ static void invalidate_buckets_lru(struct cache *ca)
>  			wake_up_gc(ca->set);
>  			return;
>  		}
> -		b = min_heap_peek(&ca->heap)[0];
> -		min_heap_pop(&ca->heap, &bucket_min_cmp_callback, ca);
>  
>  		bch_invalidate_one_bucket(ca, b);
>  	}
> diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
> index 785b0d9008fa..1d33e40d26ea 100644
> --- a/drivers/md/bcache/bcache.h
> +++ b/drivers/md/bcache/bcache.h
> @@ -458,7 +458,7 @@ struct cache {
>  	/* Allocation stuff: */
>  	struct bucket		*buckets;
>  
> -	DEFINE_MIN_HEAP(struct bucket *, cache_heap) heap;
> +	DECLARE_HEAP(struct bucket *, heap);
>  
>  	/*
>  	 * If nonzero, we know we aren't going to find any buckets to invalidate
> diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
> index bd97d8626887..463eb13bd0b2 100644
> --- a/drivers/md/bcache/bset.c
> +++ b/drivers/md/bcache/bset.c
> @@ -54,11 +54,9 @@ void bch_dump_bucket(struct btree_keys *b)
>  int __bch_count_data(struct btree_keys *b)
>  {
>  	unsigned int ret = 0;
> -	struct btree_iter iter;
> +	struct btree_iter_stack iter;
>  	struct bkey *k;
>  
> -	min_heap_init(&iter.heap, NULL, MAX_BSETS);
> -
>  	if (b->ops->is_extents)
>  		for_each_key(b, k, &iter)
>  			ret += KEY_SIZE(k);
> @@ -69,11 +67,9 @@ void __bch_check_keys(struct btree_keys *b, const char *fmt, ...)
>  {
>  	va_list args;
>  	struct bkey *k, *p = NULL;
> -	struct btree_iter iter;
> +	struct btree_iter_stack iter;
>  	const char *err;
>  
> -	min_heap_init(&iter.heap, NULL, MAX_BSETS);
> -
>  	for_each_key(b, k, &iter) {
>  		if (b->ops->is_extents) {
>  			err = "Keys out of order";
> @@ -114,9 +110,9 @@ void __bch_check_keys(struct btree_keys *b, const char *fmt, ...)
>  
>  static void bch_btree_iter_next_check(struct btree_iter *iter)
>  {
> -	struct bkey *k = iter->heap.data->k, *next = bkey_next(k);
> +	struct bkey *k = iter->data->k, *next = bkey_next(k);
>  
> -	if (next < iter->heap.data->end &&
> +	if (next < iter->data->end &&
>  	    bkey_cmp(k, iter->b->ops->is_extents ?
>  		     &START_KEY(next) : next) > 0) {
>  		bch_dump_bucket(iter->b);
> @@ -883,14 +879,12 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
>  	unsigned int status = BTREE_INSERT_STATUS_NO_INSERT;
>  	struct bset *i = bset_tree_last(b)->data;
>  	struct bkey *m, *prev = NULL;
> -	struct btree_iter iter;
> +	struct btree_iter_stack iter;
>  	struct bkey preceding_key_on_stack = ZERO_KEY;
>  	struct bkey *preceding_key_p = &preceding_key_on_stack;
>  
>  	BUG_ON(b->ops->is_extents && !KEY_SIZE(k));
>  
> -	min_heap_init(&iter.heap, NULL, MAX_BSETS);
> -
>  	/*
>  	 * If k has preceding key, preceding_key_p will be set to address
>  	 *  of k's preceding key; otherwise preceding_key_p will be set
> @@ -901,9 +895,9 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
>  	else
>  		preceding_key(k, &preceding_key_p);
>  
> -	m = bch_btree_iter_init(b, &iter, preceding_key_p);
> +	m = bch_btree_iter_stack_init(b, &iter, preceding_key_p);
>  
> -	if (b->ops->insert_fixup(b, k, &iter, replace_key))
> +	if (b->ops->insert_fixup(b, k, &iter.iter, replace_key))
>  		return status;
>  
>  	status = BTREE_INSERT_STATUS_INSERT;
> @@ -1083,102 +1077,79 @@ struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t,
>  
>  /* Btree iterator */
>  
> -typedef bool (new_btree_iter_cmp_fn)(const void *, const void *, void *);
> -
> -static inline bool new_btree_iter_cmp(const void *l, const void *r, void __always_unused *args)
> -{
> -	const struct btree_iter_set *_l = l;
> -	const struct btree_iter_set *_r = r;
> -
> -	return bkey_cmp(_l->k, _r->k) <= 0;
> -}
> +typedef bool (btree_iter_cmp_fn)(struct btree_iter_set,
> +				 struct btree_iter_set);
>  
> -static inline void new_btree_iter_swap(void *iter1, void *iter2, void __always_unused *args)
> +static inline bool btree_iter_cmp(struct btree_iter_set l,
> +				  struct btree_iter_set r)
>  {
> -	struct btree_iter_set *_iter1 = iter1;
> -	struct btree_iter_set *_iter2 = iter2;
> -
> -	swap(*_iter1, *_iter2);
> +	return bkey_cmp(l.k, r.k) > 0;
>  }
>  
>  static inline bool btree_iter_end(struct btree_iter *iter)
>  {
> -	return !iter->heap.nr;
> +	return !iter->used;
>  }
>  
>  void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k,
>  			 struct bkey *end)
>  {
> -	const struct min_heap_callbacks callbacks = {
> -		.less = new_btree_iter_cmp,
> -		.swp = new_btree_iter_swap,
> -	};
> -
>  	if (k != end)
> -		BUG_ON(!min_heap_push(&iter->heap,
> -				 &((struct btree_iter_set) { k, end }),
> -				 &callbacks,
> -				 NULL));
> +		BUG_ON(!heap_add(iter,
> +				 ((struct btree_iter_set) { k, end }),
> +				 btree_iter_cmp));
>  }
>  
> -static struct bkey *__bch_btree_iter_init(struct btree_keys *b,
> -					  struct btree_iter *iter,
> -					  struct bkey *search,
> -					  struct bset_tree *start)
> +static struct bkey *__bch_btree_iter_stack_init(struct btree_keys *b,
> +						struct btree_iter_stack *iter,
> +						struct bkey *search,
> +						struct bset_tree *start)
>  {
>  	struct bkey *ret = NULL;
>  
> -	iter->heap.size = ARRAY_SIZE(iter->heap.preallocated);
> -	iter->heap.nr = 0;
> +	iter->iter.size = ARRAY_SIZE(iter->stack_data);
> +	iter->iter.used = 0;
>  
>  #ifdef CONFIG_BCACHE_DEBUG
> -	iter->b = b;
> +	iter->iter.b = b;
>  #endif
>  
>  	for (; start <= bset_tree_last(b); start++) {
>  		ret = bch_bset_search(b, start, search);
> -		bch_btree_iter_push(iter, ret, bset_bkey_last(start->data));
> +		bch_btree_iter_push(&iter->iter, ret, bset_bkey_last(start->data));
>  	}
>  
>  	return ret;
>  }
>  
> -struct bkey *bch_btree_iter_init(struct btree_keys *b,
> -				 struct btree_iter *iter,
> +struct bkey *bch_btree_iter_stack_init(struct btree_keys *b,
> +				 struct btree_iter_stack *iter,
>  				 struct bkey *search)
>  {
> -	return __bch_btree_iter_init(b, iter, search, b->set);
> +	return __bch_btree_iter_stack_init(b, iter, search, b->set);
>  }
>  
>  static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
> -						 new_btree_iter_cmp_fn *cmp)
> +						 btree_iter_cmp_fn *cmp)
>  {
>  	struct btree_iter_set b __maybe_unused;
>  	struct bkey *ret = NULL;
> -	const struct min_heap_callbacks callbacks = {
> -		.less = cmp,
> -		.swp = new_btree_iter_swap,
> -	};
>  
>  	if (!btree_iter_end(iter)) {
>  		bch_btree_iter_next_check(iter);
>  
> -		ret = iter->heap.data->k;
> -		iter->heap.data->k = bkey_next(iter->heap.data->k);
> +		ret = iter->data->k;
> +		iter->data->k = bkey_next(iter->data->k);
>  
> -		if (iter->heap.data->k > iter->heap.data->end) {
> +		if (iter->data->k > iter->data->end) {
>  			WARN_ONCE(1, "bset was corrupt!\n");
> -			iter->heap.data->k = iter->heap.data->end;
> +			iter->data->k = iter->data->end;
>  		}
>  
> -		if (iter->heap.data->k == iter->heap.data->end) {
> -			if (iter->heap.nr) {
> -				b = min_heap_peek(&iter->heap)[0];
> -				min_heap_pop(&iter->heap, &callbacks, NULL);
> -			}
> -		}
> +		if (iter->data->k == iter->data->end)
> +			heap_pop(iter, b, cmp);
>  		else
> -			min_heap_sift_down(&iter->heap, 0, &callbacks, NULL);
> +			heap_sift(iter, 0, cmp);
>  	}
>  
>  	return ret;
> @@ -1186,7 +1157,7 @@ static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
>  
>  struct bkey *bch_btree_iter_next(struct btree_iter *iter)
>  {
> -	return __bch_btree_iter_next(iter, new_btree_iter_cmp);
> +	return __bch_btree_iter_next(iter, btree_iter_cmp);
>  
>  }
>  
> @@ -1224,18 +1195,16 @@ static void btree_mergesort(struct btree_keys *b, struct bset *out,
>  			    struct btree_iter *iter,
>  			    bool fixup, bool remove_stale)
>  {
> +	int i;
>  	struct bkey *k, *last = NULL;
>  	BKEY_PADDED(k) tmp;
>  	bool (*bad)(struct btree_keys *, const struct bkey *) = remove_stale
>  		? bch_ptr_bad
>  		: bch_ptr_invalid;
> -	const struct min_heap_callbacks callbacks = {
> -		.less = b->ops->sort_cmp,
> -		.swp = new_btree_iter_swap,
> -	};
>  
>  	/* Heapify the iterator, using our comparison function */
> -	min_heapify_all(&iter->heap, &callbacks, NULL);
> +	for (i = iter->used / 2 - 1; i >= 0; --i)
> +		heap_sift(iter, i, b->ops->sort_cmp);
>  
>  	while (!btree_iter_end(iter)) {
>  		if (b->ops->sort_fixup && fixup)
> @@ -1324,11 +1293,10 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
>  			    struct bset_sort_state *state)
>  {
>  	size_t order = b->page_order, keys = 0;
> -	struct btree_iter iter;
> +	struct btree_iter_stack iter;
>  	int oldsize = bch_count_data(b);
>  
> -	min_heap_init(&iter.heap, NULL, MAX_BSETS);
> -	__bch_btree_iter_init(b, &iter, NULL, &b->set[start]);
> +	__bch_btree_iter_stack_init(b, &iter, NULL, &b->set[start]);
>  
>  	if (start) {
>  		unsigned int i;
> @@ -1339,7 +1307,7 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
>  		order = get_order(__set_bytes(b->set->data, keys));
>  	}
>  
> -	__btree_sort(b, &iter, start, order, false, state);
> +	__btree_sort(b, &iter.iter, start, order, false, state);
>  
>  	EBUG_ON(oldsize >= 0 && bch_count_data(b) != oldsize);
>  }
> @@ -1355,13 +1323,11 @@ void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new,
>  			 struct bset_sort_state *state)
>  {
>  	uint64_t start_time = local_clock();
> -	struct btree_iter iter;
> -
> -	min_heap_init(&iter.heap, NULL, MAX_BSETS);
> +	struct btree_iter_stack iter;
>  
> -	bch_btree_iter_init(b, &iter, NULL);
> +	bch_btree_iter_stack_init(b, &iter, NULL);
>  
> -	btree_mergesort(b, new->set->data, &iter, false, true);
> +	btree_mergesort(b, new->set->data, &iter.iter, false, true);
>  
>  	bch_time_stats_update(&state->time, start_time);
>  
> diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
> index f79441acd4c1..011f6062c4c0 100644
> --- a/drivers/md/bcache/bset.h
> +++ b/drivers/md/bcache/bset.h
> @@ -187,9 +187,8 @@ struct bset_tree {
>  };
>  
>  struct btree_keys_ops {
> -	bool		(*sort_cmp)(const void *l,
> -				    const void *r,
> -					void *args);
> +	bool		(*sort_cmp)(struct btree_iter_set l,
> +				    struct btree_iter_set r);
>  	struct bkey	*(*sort_fixup)(struct btree_iter *iter,
>  				       struct bkey *tmp);
>  	bool		(*insert_fixup)(struct btree_keys *b,
> @@ -313,17 +312,23 @@ enum {
>  	BTREE_INSERT_STATUS_FRONT_MERGE,
>  };
>  
> -struct btree_iter_set {
> -	struct bkey *k, *end;
> -};
> -
>  /* Btree key iteration */
>  
>  struct btree_iter {
> +	size_t size, used;
>  #ifdef CONFIG_BCACHE_DEBUG
>  	struct btree_keys *b;
>  #endif
> -	MIN_HEAP_PREALLOCATED(struct btree_iter_set, btree_iter_heap, MAX_BSETS) heap;
> +	struct btree_iter_set {
> +		struct bkey *k, *end;
> +	} data[];
> +};
> +
> +/* Fixed-size btree_iter that can be allocated on the stack */
> +
> +struct btree_iter_stack {
> +	struct btree_iter iter;
> +	struct btree_iter_set stack_data[MAX_BSETS];
>  };
>  
>  typedef bool (*ptr_filter_fn)(struct btree_keys *b, const struct bkey *k);
> @@ -335,9 +340,9 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter,
>  
>  void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k,
>  			 struct bkey *end);
> -struct bkey *bch_btree_iter_init(struct btree_keys *b,
> -				 struct btree_iter *iter,
> -				 struct bkey *search);
> +struct bkey *bch_btree_iter_stack_init(struct btree_keys *b,
> +				       struct btree_iter_stack *iter,
> +				       struct bkey *search);
>  
>  struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t,
>  			       const struct bkey *search);
> @@ -352,13 +357,14 @@ static inline struct bkey *bch_bset_search(struct btree_keys *b,
>  	return search ? __bch_bset_search(b, t, search) : t->data->start;
>  }
>  
> -#define for_each_key_filter(b, k, iter, filter)				\
> -	for (bch_btree_iter_init((b), (iter), NULL);			\
> -	     ((k) = bch_btree_iter_next_filter((iter), (b), filter));)
> +#define for_each_key_filter(b, k, stack_iter, filter)                      \
> +	for (bch_btree_iter_stack_init((b), (stack_iter), NULL);           \
> +	     ((k) = bch_btree_iter_next_filter(&((stack_iter)->iter), (b), \
> +					       filter));)
>  
> -#define for_each_key(b, k, iter)					\
> -	for (bch_btree_iter_init((b), (iter), NULL);			\
> -	     ((k) = bch_btree_iter_next(iter));)
> +#define for_each_key(b, k, stack_iter)                           \
> +	for (bch_btree_iter_stack_init((b), (stack_iter), NULL); \
> +	     ((k) = bch_btree_iter_next(&((stack_iter)->iter)));)
>  
>  /* Sorting */
>  
> diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
> index 1d0100677357..210b59007d98 100644
> --- a/drivers/md/bcache/btree.c
> +++ b/drivers/md/bcache/btree.c
> @@ -148,19 +148,19 @@ void bch_btree_node_read_done(struct btree *b)
>  {
>  	const char *err = "bad btree header";
>  	struct bset *i = btree_bset_first(b);
> -	struct btree_iter iter;
> +	struct btree_iter *iter;
>  
>  	/*
>  	 * c->fill_iter can allocate an iterator with more memory space
>  	 * than static MAX_BSETS.
>  	 * See the comment arount cache_set->fill_iter.
>  	 */
> -	iter.heap.data = mempool_alloc(&b->c->fill_iter, GFP_NOIO);
> -	iter.heap.size = b->c->cache->sb.bucket_size / b->c->cache->sb.block_size;
> -	iter.heap.nr = 0;
> +	iter = mempool_alloc(&b->c->fill_iter, GFP_NOIO);
> +	iter->size = b->c->cache->sb.bucket_size / b->c->cache->sb.block_size;
> +	iter->used = 0;
>  
>  #ifdef CONFIG_BCACHE_DEBUG
> -	iter.b = &b->keys;
> +	iter->b = &b->keys;
>  #endif
>  
>  	if (!i->seq)
> @@ -198,7 +198,7 @@ void bch_btree_node_read_done(struct btree *b)
>  		if (i != b->keys.set[0].data && !i->keys)
>  			goto err;
>  
> -		bch_btree_iter_push(&iter, i->start, bset_bkey_last(i));
> +		bch_btree_iter_push(iter, i->start, bset_bkey_last(i));
>  
>  		b->written += set_blocks(i, block_bytes(b->c->cache));
>  	}
> @@ -210,7 +210,7 @@ void bch_btree_node_read_done(struct btree *b)
>  		if (i->seq == b->keys.set[0].data->seq)
>  			goto err;
>  
> -	bch_btree_sort_and_fix_extents(&b->keys, &iter, &b->c->sort);
> +	bch_btree_sort_and_fix_extents(&b->keys, iter, &b->c->sort);
>  
>  	i = b->keys.set[0].data;
>  	err = "short btree key";
> @@ -222,7 +222,7 @@ void bch_btree_node_read_done(struct btree *b)
>  		bch_bset_init_next(&b->keys, write_block(b),
>  				   bset_magic(&b->c->cache->sb));
>  out:
> -	mempool_free(iter.heap.data, &b->c->fill_iter);
> +	mempool_free(iter, &b->c->fill_iter);
>  	return;
>  err:
>  	set_btree_node_io_error(b);
> @@ -1306,11 +1306,9 @@ static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc)
>  	uint8_t stale = 0;
>  	unsigned int keys = 0, good_keys = 0;
>  	struct bkey *k;
> -	struct btree_iter iter;
> +	struct btree_iter_stack iter;
>  	struct bset_tree *t;
>  
> -	min_heap_init(&iter.heap, NULL, MAX_BSETS);
> -
>  	gc->nodes++;
>  
>  	for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid) {
> @@ -1569,11 +1567,9 @@ static int btree_gc_rewrite_node(struct btree *b, struct btree_op *op,
>  static unsigned int btree_gc_count_keys(struct btree *b)
>  {
>  	struct bkey *k;
> -	struct btree_iter iter;
> +	struct btree_iter_stack iter;
>  	unsigned int ret = 0;
>  
> -	min_heap_init(&iter.heap, NULL, MAX_BSETS);
> -
>  	for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
>  		ret += bkey_u64s(k);
>  
> @@ -1612,18 +1608,18 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
>  	int ret = 0;
>  	bool should_rewrite;
>  	struct bkey *k;
> -	struct btree_iter iter;
> +	struct btree_iter_stack iter;
>  	struct gc_merge_info r[GC_MERGE_NODES];
>  	struct gc_merge_info *i, *last = r + ARRAY_SIZE(r) - 1;
>  
> -	min_heap_init(&iter.heap, NULL, MAX_BSETS);
> -	bch_btree_iter_init(&b->keys, &iter, &b->c->gc_done);
> +	bch_btree_iter_stack_init(&b->keys, &iter, &b->c->gc_done);
>  
>  	for (i = r; i < r + ARRAY_SIZE(r); i++)
>  		i->b = ERR_PTR(-EINTR);
>  
>  	while (1) {
> -		k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad);
> +		k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
> +					       bch_ptr_bad);
>  		if (k) {
>  			r->b = bch_btree_node_get(b->c, op, k, b->level - 1,
>  						  true, b);
> @@ -1918,9 +1914,7 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
>  {
>  	int ret = 0;
>  	struct bkey *k, *p = NULL;
> -	struct btree_iter iter;
> -
> -	min_heap_init(&iter.heap, NULL, MAX_BSETS);
> +	struct btree_iter_stack iter;
>  
>  	for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid)
>  		bch_initial_mark_key(b->c, b->level, k);
> @@ -1928,10 +1922,10 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
>  	bch_initial_mark_key(b->c, b->level + 1, &b->key);
>  
>  	if (b->level) {
> -		bch_btree_iter_init(&b->keys, &iter, NULL);
> +		bch_btree_iter_stack_init(&b->keys, &iter, NULL);
>  
>  		do {
> -			k = bch_btree_iter_next_filter(&iter, &b->keys,
> +			k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
>  						       bch_ptr_bad);
>  			if (k) {
>  				btree_node_prefetch(b, k);
> @@ -1959,7 +1953,7 @@ static int bch_btree_check_thread(void *arg)
>  	struct btree_check_info *info = arg;
>  	struct btree_check_state *check_state = info->state;
>  	struct cache_set *c = check_state->c;
> -	struct btree_iter iter;
> +	struct btree_iter_stack iter;
>  	struct bkey *k, *p;
>  	int cur_idx, prev_idx, skip_nr;
>  
> @@ -1967,11 +1961,9 @@ static int bch_btree_check_thread(void *arg)
>  	cur_idx = prev_idx = 0;
>  	ret = 0;
>  
> -	min_heap_init(&iter.heap, NULL, MAX_BSETS);
> -
>  	/* root node keys are checked before thread created */
> -	bch_btree_iter_init(&c->root->keys, &iter, NULL);
> -	k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
> +	bch_btree_iter_stack_init(&c->root->keys, &iter, NULL);
> +	k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad);
>  	BUG_ON(!k);
>  
>  	p = k;
> @@ -1989,7 +1981,7 @@ static int bch_btree_check_thread(void *arg)
>  		skip_nr = cur_idx - prev_idx;
>  
>  		while (skip_nr) {
> -			k = bch_btree_iter_next_filter(&iter,
> +			k = bch_btree_iter_next_filter(&iter.iter,
>  						       &c->root->keys,
>  						       bch_ptr_bad);
>  			if (k)
> @@ -2062,11 +2054,9 @@ int bch_btree_check(struct cache_set *c)
>  	int ret = 0;
>  	int i;
>  	struct bkey *k = NULL;
> -	struct btree_iter iter;
> +	struct btree_iter_stack iter;
>  	struct btree_check_state check_state;
>  
> -	min_heap_init(&iter.heap, NULL, MAX_BSETS);
> -
>  	/* check and mark root node keys */
>  	for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid)
>  		bch_initial_mark_key(c, c->root->level, k);
> @@ -2560,12 +2550,11 @@ static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op,
>  
>  	if (b->level) {
>  		struct bkey *k;
> -		struct btree_iter iter;
> +		struct btree_iter_stack iter;
>  
> -		min_heap_init(&iter.heap, NULL, MAX_BSETS);
> -		bch_btree_iter_init(&b->keys, &iter, from);
> +		bch_btree_iter_stack_init(&b->keys, &iter, from);
>  
> -		while ((k = bch_btree_iter_next_filter(&iter, &b->keys,
> +		while ((k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
>  						       bch_ptr_bad))) {
>  			ret = bcache_btree(map_nodes_recurse, k, b,
>  				    op, from, fn, flags);
> @@ -2594,12 +2583,12 @@ int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
>  {
>  	int ret = MAP_CONTINUE;
>  	struct bkey *k;
> -	struct btree_iter iter;
> +	struct btree_iter_stack iter;
>  
> -	min_heap_init(&iter.heap, NULL, MAX_BSETS);
> -	bch_btree_iter_init(&b->keys, &iter, from);
> +	bch_btree_iter_stack_init(&b->keys, &iter, from);
>  
> -	while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) {
> +	while ((k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
> +					       bch_ptr_bad))) {
>  		ret = !b->level
>  			? fn(op, b, k)
>  			: bcache_btree(map_keys_recurse, k,
> diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
> index a7221e5dbe81..d626ffcbecb9 100644
> --- a/drivers/md/bcache/extents.c
> +++ b/drivers/md/bcache/extents.c
> @@ -33,16 +33,15 @@ static void sort_key_next(struct btree_iter *iter,
>  	i->k = bkey_next(i->k);
>  
>  	if (i->k == i->end)
> -		*i = iter->heap.data[--iter->heap.nr];
> +		*i = iter->data[--iter->used];
>  }
>  
> -static bool new_bch_key_sort_cmp(const void *l, const void *r, void *args)
> +static bool bch_key_sort_cmp(struct btree_iter_set l,
> +			     struct btree_iter_set r)
>  {
> -	struct btree_iter_set *_l = (struct btree_iter_set *)l;
> -	struct btree_iter_set *_r = (struct btree_iter_set *)r;
> -	int64_t c = bkey_cmp(_l->k, _r->k);
> +	int64_t c = bkey_cmp(l.k, r.k);
>  
> -	return !(c ? c > 0 : _l->k < _r->k);
> +	return c ? c > 0 : l.k < r.k;
>  }
>  
>  static bool __ptr_invalid(struct cache_set *c, const struct bkey *k)
> @@ -239,7 +238,7 @@ static bool bch_btree_ptr_insert_fixup(struct btree_keys *bk,
>  }
>  
>  const struct btree_keys_ops bch_btree_keys_ops = {
> -	.sort_cmp	= new_bch_key_sort_cmp,
> +	.sort_cmp	= bch_key_sort_cmp,
>  	.insert_fixup	= bch_btree_ptr_insert_fixup,
>  	.key_invalid	= bch_btree_ptr_invalid,
>  	.key_bad	= bch_btree_ptr_bad,
> @@ -256,36 +255,22 @@ const struct btree_keys_ops bch_btree_keys_ops = {
>   * Necessary for btree_sort_fixup() - if there are multiple keys that compare
>   * equal in different sets, we have to process them newest to oldest.
>   */
> -
> -static bool new_bch_extent_sort_cmp(const void *l, const void *r, void __always_unused *args)
> -{
> -	struct btree_iter_set *_l = (struct btree_iter_set *)l;
> -	struct btree_iter_set *_r = (struct btree_iter_set *)r;
> -	int64_t c = bkey_cmp(&START_KEY(_l->k), &START_KEY(_r->k));
> -
> -	return !(c ? c > 0 : _l->k < _r->k);
> -}
> -
> -static inline void new_btree_iter_swap(void *iter1, void *iter2, void __always_unused *args)
> +static bool bch_extent_sort_cmp(struct btree_iter_set l,
> +				struct btree_iter_set r)
>  {
> -	struct btree_iter_set *_iter1 = iter1;
> -	struct btree_iter_set *_iter2 = iter2;
> +	int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k));
>  
> -	swap(*_iter1, *_iter2);
> +	return c ? c > 0 : l.k < r.k;
>  }
>  
>  static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter,
>  					  struct bkey *tmp)
>  {
> -	const struct min_heap_callbacks callbacks = {
> -		.less = new_bch_extent_sort_cmp,
> -		.swp = new_btree_iter_swap,
> -	};
> -	while (iter->heap.nr > 1) {
> -		struct btree_iter_set *top = iter->heap.data, *i = top + 1;
> -
> -		if (iter->heap.nr > 2 &&
> -		    !new_bch_extent_sort_cmp(&i[0], &i[1], NULL))
> +	while (iter->used > 1) {
> +		struct btree_iter_set *top = iter->data, *i = top + 1;
> +
> +		if (iter->used > 2 &&
> +		    bch_extent_sort_cmp(i[0], i[1]))
>  			i++;
>  
>  		if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0)
> @@ -293,7 +278,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter,
>  
>  		if (!KEY_SIZE(i->k)) {
>  			sort_key_next(iter, i);
> -			min_heap_sift_down(&iter->heap, i - top, &callbacks, NULL);
> +			heap_sift(iter, i - top, bch_extent_sort_cmp);
>  			continue;
>  		}
>  
> @@ -303,7 +288,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter,
>  			else
>  				bch_cut_front(top->k, i->k);
>  
> -			min_heap_sift_down(&iter->heap, i - top, &callbacks, NULL);
> +			heap_sift(iter, i - top, bch_extent_sort_cmp);
>  		} else {
>  			/* can't happen because of comparison func */
>  			BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k)));
> @@ -313,7 +298,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter,
>  
>  				bch_cut_back(&START_KEY(i->k), tmp);
>  				bch_cut_front(i->k, top->k);
> -				min_heap_sift_down(&iter->heap, 0, &callbacks, NULL);
> +				heap_sift(iter, 0, bch_extent_sort_cmp);
>  
>  				return tmp;
>  			} else {
> @@ -633,7 +618,7 @@ static bool bch_extent_merge(struct btree_keys *bk,
>  }
>  
>  const struct btree_keys_ops bch_extent_keys_ops = {
> -	.sort_cmp	= new_bch_extent_sort_cmp,
> +	.sort_cmp	= bch_extent_sort_cmp,
>  	.sort_fixup	= bch_extent_sort_fixup,
>  	.insert_fixup	= bch_extent_insert_fixup,
>  	.key_invalid	= bch_extent_invalid,
> diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
> index d6c73dd8eb2b..26a6a535ec32 100644
> --- a/drivers/md/bcache/movinggc.c
> +++ b/drivers/md/bcache/movinggc.c
> @@ -182,27 +182,16 @@ err:		if (!IS_ERR_OR_NULL(w->private))
>  	closure_sync(&cl);
>  }
>  
> -static bool new_bucket_cmp(const void *l, const void *r, void __always_unused *args)
> +static bool bucket_cmp(struct bucket *l, struct bucket *r)
>  {
> -	struct bucket **_l = (struct bucket **)l;
> -	struct bucket **_r = (struct bucket **)r;
> -
> -	return GC_SECTORS_USED(*_l) >= GC_SECTORS_USED(*_r);
> -}
> -
> -static void new_bucket_swap(void *l, void *r, void __always_unused *args)
> -{
> -	struct bucket **_l = l;
> -	struct bucket **_r = r;
> -
> -	swap(*_l, *_r);
> +	return GC_SECTORS_USED(l) < GC_SECTORS_USED(r);
>  }
>  
>  static unsigned int bucket_heap_top(struct cache *ca)
>  {
>  	struct bucket *b;
>  
> -	return (b = min_heap_peek(&ca->heap)[0]) ? GC_SECTORS_USED(b) : 0;
> +	return (b = heap_peek(&ca->heap)) ? GC_SECTORS_USED(b) : 0;
>  }
>  
>  void bch_moving_gc(struct cache_set *c)
> @@ -210,10 +199,6 @@ void bch_moving_gc(struct cache_set *c)
>  	struct cache *ca = c->cache;
>  	struct bucket *b;
>  	unsigned long sectors_to_move, reserve_sectors;
> -	const struct min_heap_callbacks callbacks = {
> -		.less = new_bucket_cmp,
> -		.swp = new_bucket_swap,
> -	};
>  
>  	if (!c->copy_gc_enabled)
>  		return;
> @@ -224,7 +209,7 @@ void bch_moving_gc(struct cache_set *c)
>  	reserve_sectors = ca->sb.bucket_size *
>  			     fifo_used(&ca->free[RESERVE_MOVINGGC]);
>  
> -	ca->heap.nr = 0;
> +	ca->heap.used = 0;
>  
>  	for_each_bucket(b, ca) {
>  		if (GC_MARK(b) == GC_MARK_METADATA ||
> @@ -233,31 +218,25 @@ void bch_moving_gc(struct cache_set *c)
>  		    atomic_read(&b->pin))
>  			continue;
>  
> -		if (!min_heap_full(&ca->heap)) {
> +		if (!heap_full(&ca->heap)) {
>  			sectors_to_move += GC_SECTORS_USED(b);
> -			min_heap_push(&ca->heap, &b, &callbacks, NULL);
> -		} else if (!new_bucket_cmp(&b, min_heap_peek(&ca->heap), ca)) {
> +			heap_add(&ca->heap, b, bucket_cmp);
> +		} else if (bucket_cmp(b, heap_peek(&ca->heap))) {
>  			sectors_to_move -= bucket_heap_top(ca);
>  			sectors_to_move += GC_SECTORS_USED(b);
>  
>  			ca->heap.data[0] = b;
> -			min_heap_sift_down(&ca->heap, 0, &callbacks, NULL);
> +			heap_sift(&ca->heap, 0, bucket_cmp);
>  		}
>  	}
>  
>  	while (sectors_to_move > reserve_sectors) {
> -		if (ca->heap.nr) {
> -			b = min_heap_peek(&ca->heap)[0];
> -			min_heap_pop(&ca->heap, &callbacks, NULL);
> -		}
> +		heap_pop(&ca->heap, b, bucket_cmp);
>  		sectors_to_move -= GC_SECTORS_USED(b);
>  	}
>  
> -	while (ca->heap.nr) {
> -		b = min_heap_peek(&ca->heap)[0];
> -		min_heap_pop(&ca->heap, &callbacks, NULL);
> +	while (heap_pop(&ca->heap, b, bucket_cmp))
>  		SET_GC_MOVE(b, 1);
> -	}
>  
>  	mutex_unlock(&c->bucket_lock);
>  
> diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
> index 1efb768b2890..2ea490b9d370 100644
> --- a/drivers/md/bcache/super.c
> +++ b/drivers/md/bcache/super.c
> @@ -1912,7 +1912,8 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
>  	INIT_LIST_HEAD(&c->btree_cache_freed);
>  	INIT_LIST_HEAD(&c->data_buckets);
>  
> -	iter_size = ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) *
> +	iter_size = sizeof(struct btree_iter) +
> +		    ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) *
>  			    sizeof(struct btree_iter_set);
>  
>  	c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL);
> diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
> index e8f696cb58c0..826b14cae4e5 100644
> --- a/drivers/md/bcache/sysfs.c
> +++ b/drivers/md/bcache/sysfs.c
> @@ -660,9 +660,7 @@ static unsigned int bch_root_usage(struct cache_set *c)
>  	unsigned int bytes = 0;
>  	struct bkey *k;
>  	struct btree *b;
> -	struct btree_iter iter;
> -
> -	min_heap_init(&iter.heap, NULL, MAX_BSETS);
> +	struct btree_iter_stack iter;
>  
>  	goto lock_root;
>  
> diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
> index 539454d8e2d0..f61ab1bada6c 100644
> --- a/drivers/md/bcache/util.h
> +++ b/drivers/md/bcache/util.h
> @@ -9,7 +9,6 @@
>  #include <linux/kernel.h>
>  #include <linux/sched/clock.h>
>  #include <linux/llist.h>
> -#include <linux/min_heap.h>
>  #include <linux/ratelimit.h>
>  #include <linux/vmalloc.h>
>  #include <linux/workqueue.h>
> @@ -31,10 +30,16 @@ struct closure;
>  
>  #endif
>  
> +#define DECLARE_HEAP(type, name)					\
> +	struct {							\
> +		size_t size, used;					\
> +		type *data;						\
> +	} name
> +
>  #define init_heap(heap, _size, gfp)					\
>  ({									\
>  	size_t _bytes;							\
> -	(heap)->nr = 0;						\
> +	(heap)->used = 0;						\
>  	(heap)->size = (_size);						\
>  	_bytes = (heap)->size * sizeof(*(heap)->data);			\
>  	(heap)->data = kvmalloc(_bytes, (gfp) & GFP_KERNEL);		\
> @@ -47,6 +52,64 @@ do {									\
>  	(heap)->data = NULL;						\
>  } while (0)
>  
> +#define heap_swap(h, i, j)	swap((h)->data[i], (h)->data[j])
> +
> +#define heap_sift(h, i, cmp)						\
> +do {									\
> +	size_t _r, _j = i;						\
> +									\
> +	for (; _j * 2 + 1 < (h)->used; _j = _r) {			\
> +		_r = _j * 2 + 1;					\
> +		if (_r + 1 < (h)->used &&				\
> +		    cmp((h)->data[_r], (h)->data[_r + 1]))		\
> +			_r++;						\
> +									\
> +		if (cmp((h)->data[_r], (h)->data[_j]))			\
> +			break;						\
> +		heap_swap(h, _r, _j);					\
> +	}								\
> +} while (0)
> +
> +#define heap_sift_down(h, i, cmp)					\
> +do {									\
> +	while (i) {							\
> +		size_t p = (i - 1) / 2;					\
> +		if (cmp((h)->data[i], (h)->data[p]))			\
> +			break;						\
> +		heap_swap(h, i, p);					\
> +		i = p;							\
> +	}								\
> +} while (0)
> +
> +#define heap_add(h, d, cmp)						\
> +({									\
> +	bool _r = !heap_full(h);					\
> +	if (_r) {							\
> +		size_t _i = (h)->used++;				\
> +		(h)->data[_i] = d;					\
> +									\
> +		heap_sift_down(h, _i, cmp);				\
> +		heap_sift(h, _i, cmp);					\
> +	}								\
> +	_r;								\
> +})
> +
> +#define heap_pop(h, d, cmp)						\
> +({									\
> +	bool _r = (h)->used;						\
> +	if (_r) {							\
> +		(d) = (h)->data[0];					\
> +		(h)->used--;						\
> +		heap_swap(h, 0, (h)->used);				\
> +		heap_sift(h, 0, cmp);					\
> +	}								\
> +	_r;								\
> +})
> +
> +#define heap_peek(h)	((h)->used ? (h)->data[0] : NULL)
> +
> +#define heap_full(h)	((h)->used == (h)->size)
> +
>  #define DECLARE_FIFO(type, name)					\
>  	struct {							\
>  		size_t front, back, size, mask;				\
> diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
> index 453efbbdc8ee..302e75f1fc4b 100644
> --- a/drivers/md/bcache/writeback.c
> +++ b/drivers/md/bcache/writeback.c
> @@ -908,16 +908,15 @@ static int bch_dirty_init_thread(void *arg)
>  	struct dirty_init_thrd_info *info = arg;
>  	struct bch_dirty_init_state *state = info->state;
>  	struct cache_set *c = state->c;
> -	struct btree_iter iter;
> +	struct btree_iter_stack iter;
>  	struct bkey *k, *p;
>  	int cur_idx, prev_idx, skip_nr;
>  
>  	k = p = NULL;
>  	prev_idx = 0;
>  
> -	min_heap_init(&iter.heap, NULL, MAX_BSETS);
> -	bch_btree_iter_init(&c->root->keys, &iter, NULL);
> -	k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
> +	bch_btree_iter_stack_init(&c->root->keys, &iter, NULL);
> +	k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad);
>  	BUG_ON(!k);
>  
>  	p = k;
> @@ -931,7 +930,7 @@ static int bch_dirty_init_thread(void *arg)
>  		skip_nr = cur_idx - prev_idx;
>  
>  		while (skip_nr) {
> -			k = bch_btree_iter_next_filter(&iter,
> +			k = bch_btree_iter_next_filter(&iter.iter,
>  						       &c->root->keys,
>  						       bch_ptr_bad);
>  			if (k)
> @@ -980,13 +979,11 @@ void bch_sectors_dirty_init(struct bcache_device *d)
>  	int i;
>  	struct btree *b = NULL;
>  	struct bkey *k = NULL;
> -	struct btree_iter iter;
> +	struct btree_iter_stack iter;
>  	struct sectors_dirty_init op;
>  	struct cache_set *c = d->c;
>  	struct bch_dirty_init_state state;
>  
> -	min_heap_init(&iter.heap, NULL, MAX_BSETS);
> -
>  retry_lock:
>  	b = c->root;
>  	rw_lock(0, b, b->level);
> -- 
> 2.34.1
> 

-- 
Coly Li

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ