[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20231107230822.371443-25-ankur.a.arora@oracle.com>
Date:   Tue,  7 Nov 2023 15:08:17 -0800
From:   Ankur Arora <ankur.a.arora@...cle.com>
To:     linux-kernel@...r.kernel.org
Cc:     tglx@...utronix.de, peterz@...radead.org,
        torvalds@...ux-foundation.org, paulmck@...nel.org,
        linux-mm@...ck.org, x86@...nel.org, akpm@...ux-foundation.org,
        luto@...nel.org, bp@...en8.de, dave.hansen@...ux.intel.com,
        hpa@...or.com, mingo@...hat.com, juri.lelli@...hat.com,
        vincent.guittot@...aro.org, willy@...radead.org, mgorman@...e.de,
        jon.grimm@....com, bharata@....com, raghavendra.kt@....com,
        boris.ostrovsky@...cle.com, konrad.wilk@...cle.com,
        jgross@...e.com, andrew.cooper3@...rix.com, mingo@...nel.org,
        bristot@...nel.org, mathieu.desnoyers@...icios.com,
        geert@...ux-m68k.org, glaubitz@...sik.fu-berlin.de,
        anton.ivanov@...bridgegreys.com, mattst88@...il.com,
        krypton@...ich-teichert.org, rostedt@...dmis.org,
        David.Laight@...LAB.COM, richard@....at, mjguzik@...il.com,
        Ankur Arora <ankur.a.arora@...cle.com>,
        Coly Li <colyli@...e.de>,
        Kent Overstreet <kent.overstreet@...il.com>,
        Alasdair Kergon <agk@...hat.com>,
        Mike Snitzer <snitzer@...nel.org>
Subject: [RFC PATCH 81/86] treewide: md: remove cond_resched()
There are broadly three sets of uses of cond_resched():
1.  Calls to cond_resched() out of the goodness of our heart,
    otherwise known as avoiding lockup splats.
2.  Open coded variants of cond_resched_lock() which call
    cond_resched().
3.  Retry or error handling loops, where cond_resched() is used as a
    quick alternative to spinning in a tight-loop.
When running under a full preemption model, the cond_resched() reduces
to a NOP (not even a barrier) so removing it obviously cannot matter.
But considering only voluntary preemption models (for say code that
has been mostly tested under those), for set-1 and set-2 the
scheduler can now preempt kernel tasks running beyond their time
quanta anywhere they are preemptible() [1]. Which removes any need
for these explicitly placed scheduling points.
The cond_resched() calls in set-3 are a little more difficult.
To start with, given it's NOP character under full preemption, it
never actually saved us from a tight loop.
With voluntary preemption, it's not a NOP, but it might as well be --
for most workloads the scheduler does not have an interminable supply
of runnable tasks on the runqueue.
So, cond_resched() is useful to not get softlockup splats, but not
terribly good for error handling. Ideally, these should be replaced
with some kind of timed or event wait.
For now we use cond_resched_stall(), which tries to schedule if
possible, and executes a cpu_relax() if not.
Most of the uses here are in set-1. Remove them.
[1] https://lore.kernel.org/lkml/20231107215742.363031-1-ankur.a.arora@oracle.com/
Cc: Coly Li <colyli@...e.de> 
Cc: Kent Overstreet <kent.overstreet@...il.com> 
Cc: Alasdair Kergon <agk@...hat.com> 
Cc: Mike Snitzer <snitzer@...nel.org> 
Signed-off-by: Ankur Arora <ankur.a.arora@...cle.com>
---
 drivers/md/bcache/btree.c     |  5 -----
 drivers/md/bcache/journal.c   |  2 --
 drivers/md/bcache/sysfs.c     |  1 -
 drivers/md/bcache/writeback.c |  2 --
 drivers/md/dm-bufio.c         | 14 --------------
 drivers/md/dm-cache-target.c  |  4 ----
 drivers/md/dm-crypt.c         |  3 ---
 drivers/md/dm-integrity.c     |  3 ---
 drivers/md/dm-kcopyd.c        |  2 --
 drivers/md/dm-snap.c          |  1 -
 drivers/md/dm-stats.c         |  8 --------
 drivers/md/dm-thin.c          |  2 --
 drivers/md/dm-writecache.c    | 11 -----------
 drivers/md/dm.c               |  4 ----
 drivers/md/md.c               |  1 -
 drivers/md/raid1.c            |  2 --
 drivers/md/raid10.c           |  3 ---
 drivers/md/raid5.c            |  2 --
 18 files changed, 70 deletions(-)
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index fd121a61f17c..b9389d3c39d7 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -1826,7 +1826,6 @@ static void bch_btree_gc(struct cache_set *c)
 	do {
 		ret = bcache_btree_root(gc_root, c, &op, &writes, &stats);
 		closure_sync(&writes);
-		cond_resched();
 
 		if (ret == -EAGAIN)
 			schedule_timeout_interruptible(msecs_to_jiffies
@@ -1981,7 +1980,6 @@ static int bch_btree_check_thread(void *arg)
 				goto out;
 			}
 			skip_nr--;
-			cond_resched();
 		}
 
 		if (p) {
@@ -2005,7 +2003,6 @@ static int bch_btree_check_thread(void *arg)
 		}
 		p = NULL;
 		prev_idx = cur_idx;
-		cond_resched();
 	}
 
 out:
@@ -2670,8 +2667,6 @@ void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf,
 	struct bkey start = buf->last_scanned;
 	struct refill refill;
 
-	cond_resched();
-
 	bch_btree_op_init(&refill.op, -1);
 	refill.nr_found	= 0;
 	refill.buf	= buf;
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index c182c21de2e8..5e06a665d082 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -384,8 +384,6 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
 
 			BUG_ON(!bch_keylist_empty(&keylist));
 			keys++;
-
-			cond_resched();
 		}
 
 		if (i->pin)
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 0e2c1880f60b..d7e248b54abd 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -1030,7 +1030,6 @@ KTYPE(bch_cache_set_internal);
 
 static int __bch_cache_cmp(const void *l, const void *r)
 {
-	cond_resched();
 	return *((uint16_t *)r) - *((uint16_t *)l);
 }
 
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 24c049067f61..7da09bba3067 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -863,8 +863,6 @@ static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b,
 					     KEY_START(k), KEY_SIZE(k));
 
 	op->count++;
-	if (!(op->count % INIT_KEYS_EACH_TIME))
-		cond_resched();
 
 	return MAP_CONTINUE;
 }
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index bc309e41d074..0b8f3341fa79 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -294,8 +294,6 @@ static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *con
 		}
 
 		h = h->next;
-
-		cond_resched();
 	}
 
 	return NULL;
@@ -762,7 +760,6 @@ static void __cache_iterate(struct dm_buffer_cache *bc, int list_mode,
 		case IT_COMPLETE:
 			return;
 		}
-		cond_resched();
 
 		le = to_le(le->list.next);
 	} while (le != first);
@@ -890,8 +887,6 @@ static void __remove_range(struct dm_buffer_cache *bc,
 	struct dm_buffer *b;
 
 	while (true) {
-		cond_resched();
-
 		b = __find_next(root, begin);
 		if (!b || (b->block >= end))
 			break;
@@ -1435,7 +1430,6 @@ static void __flush_write_list(struct list_head *write_list)
 			list_entry(write_list->next, struct dm_buffer, write_list);
 		list_del(&b->write_list);
 		submit_io(b, REQ_OP_WRITE, write_endio);
-		cond_resched();
 	}
 	blk_finish_plug(&plug);
 }
@@ -1953,8 +1947,6 @@ void dm_bufio_prefetch(struct dm_bufio_client *c,
 				submit_io(b, REQ_OP_READ, read_endio);
 			dm_bufio_release(b);
 
-			cond_resched();
-
 			if (!n_blocks)
 				goto flush_plug;
 			dm_bufio_lock(c);
@@ -2093,8 +2085,6 @@ int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c)
 			cache_mark(&c->cache, b, LIST_CLEAN);
 
 		cache_put_and_wake(c, b);
-
-		cond_resched();
 	}
 	lru_iter_end(&it);
 
@@ -2350,7 +2340,6 @@ static void __scan(struct dm_bufio_client *c)
 
 			atomic_long_dec(&c->need_shrink);
 			freed++;
-			cond_resched();
 		}
 	}
 }
@@ -2659,8 +2648,6 @@ static unsigned long __evict_many(struct dm_bufio_client *c,
 
 		__make_buffer_clean(b);
 		__free_buffer_wake(b);
-
-		cond_resched();
 	}
 
 	return count;
@@ -2802,7 +2789,6 @@ static void evict_old(void)
 	while (dm_bufio_current_allocated > threshold) {
 		if (!__evict_a_few(64))
 			break;
-		cond_resched();
 	}
 	mutex_unlock(&dm_bufio_clients_lock);
 }
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 911f73f7ebba..df136b29471a 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -1829,7 +1829,6 @@ static void process_deferred_bios(struct work_struct *ws)
 
 		else
 			commit_needed = process_bio(cache, bio) || commit_needed;
-		cond_resched();
 	}
 
 	if (commit_needed)
@@ -1853,7 +1852,6 @@ static void requeue_deferred_bios(struct cache *cache)
 	while ((bio = bio_list_pop(&bios))) {
 		bio->bi_status = BLK_STS_DM_REQUEUE;
 		bio_endio(bio);
-		cond_resched();
 	}
 }
 
@@ -1894,8 +1892,6 @@ static void check_migrations(struct work_struct *ws)
 		r = mg_start(cache, op, NULL);
 		if (r)
 			break;
-
-		cond_resched();
 	}
 }
 
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 5315fd261c23..70a24ade34af 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1629,8 +1629,6 @@ static blk_status_t crypt_convert(struct crypt_config *cc,
 			atomic_dec(&ctx->cc_pending);
 			ctx->cc_sector += sector_step;
 			tag_offset++;
-			if (!atomic)
-				cond_resched();
 			continue;
 		/*
 		 * There was a data integrity error.
@@ -1965,7 +1963,6 @@ static int dmcrypt_write(void *data)
 			io = crypt_io_from_node(rb_first(&write_tree));
 			rb_erase(&io->rb_node, &write_tree);
 			kcryptd_io_write(io);
-			cond_resched();
 		} while (!RB_EMPTY_ROOT(&write_tree));
 		blk_finish_plug(&plug);
 	}
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 97a8d5fc9ebb..63c88f23b585 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -2717,12 +2717,10 @@ static void integrity_recalc(struct work_struct *w)
 				       ic->sectors_per_block, BITMAP_OP_TEST_ALL_CLEAR)) {
 			logical_sector += ic->sectors_per_block;
 			n_sectors -= ic->sectors_per_block;
-			cond_resched();
 		}
 		while (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector + n_sectors - ic->sectors_per_block,
 				       ic->sectors_per_block, BITMAP_OP_TEST_ALL_CLEAR)) {
 			n_sectors -= ic->sectors_per_block;
-			cond_resched();
 		}
 		get_area_and_offset(ic, logical_sector, &area, &offset);
 	}
@@ -2782,7 +2780,6 @@ static void integrity_recalc(struct work_struct *w)
 	}
 
 advance_and_next:
-	cond_resched();
 
 	spin_lock_irq(&ic->endio_wait.lock);
 	remove_range_unlocked(ic, &range);
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index d01807c50f20..8a91e83188e7 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -512,8 +512,6 @@ static int run_complete_job(struct kcopyd_job *job)
 	if (atomic_dec_and_test(&kc->nr_jobs))
 		wake_up(&kc->destroyq);
 
-	cond_resched();
-
 	return 0;
 }
 
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index bf7a574499a3..cd8891c12cca 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1762,7 +1762,6 @@ static void copy_callback(int read_err, unsigned long write_err, void *context)
 			s->exception_complete_sequence++;
 			rb_erase(&pe->out_of_order_node, &s->out_of_order_tree);
 			complete_exception(pe);
-			cond_resched();
 		}
 	} else {
 		struct rb_node *parent = NULL;
diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c
index db2d997a6c18..d6878cb7b0ef 100644
--- a/drivers/md/dm-stats.c
+++ b/drivers/md/dm-stats.c
@@ -230,7 +230,6 @@ void dm_stats_cleanup(struct dm_stats *stats)
 				       atomic_read(&shared->in_flight[READ]),
 				       atomic_read(&shared->in_flight[WRITE]));
 			}
-			cond_resched();
 		}
 		dm_stat_free(&s->rcu_head);
 	}
@@ -336,7 +335,6 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
 	for (ni = 0; ni < n_entries; ni++) {
 		atomic_set(&s->stat_shared[ni].in_flight[READ], 0);
 		atomic_set(&s->stat_shared[ni].in_flight[WRITE], 0);
-		cond_resched();
 	}
 
 	if (s->n_histogram_entries) {
@@ -350,7 +348,6 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
 		for (ni = 0; ni < n_entries; ni++) {
 			s->stat_shared[ni].tmp.histogram = hi;
 			hi += s->n_histogram_entries + 1;
-			cond_resched();
 		}
 	}
 
@@ -372,7 +369,6 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
 			for (ni = 0; ni < n_entries; ni++) {
 				p[ni].histogram = hi;
 				hi += s->n_histogram_entries + 1;
-				cond_resched();
 			}
 		}
 	}
@@ -512,7 +508,6 @@ static int dm_stats_list(struct dm_stats *stats, const char *program,
 			}
 			DMEMIT("\n");
 		}
-		cond_resched();
 	}
 	mutex_unlock(&stats->mutex);
 
@@ -794,7 +789,6 @@ static void __dm_stat_clear(struct dm_stat *s, size_t idx_start, size_t idx_end,
 				local_irq_enable();
 			}
 		}
-		cond_resched();
 	}
 }
 
@@ -910,8 +904,6 @@ static int dm_stats_print(struct dm_stats *stats, int id,
 
 		if (unlikely(sz + 1 >= maxlen))
 			goto buffer_overflow;
-
-		cond_resched();
 	}
 
 	if (clear)
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 07c7f9795b10..52e4a7dc6923 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -2234,7 +2234,6 @@ static void process_thin_deferred_bios(struct thin_c *tc)
 			throttle_work_update(&pool->throttle);
 			dm_pool_issue_prefetches(pool->pmd);
 		}
-		cond_resched();
 	}
 	blk_finish_plug(&plug);
 }
@@ -2317,7 +2316,6 @@ static void process_thin_deferred_cells(struct thin_c *tc)
 			else
 				pool->process_cell(tc, cell);
 		}
-		cond_resched();
 	} while (!list_empty(&cells));
 }
 
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 074cb785eafc..75ecc26915a1 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -321,8 +321,6 @@ static int persistent_memory_claim(struct dm_writecache *wc)
 			while (daa-- && i < p) {
 				pages[i++] = pfn_t_to_page(pfn);
 				pfn.val++;
-				if (!(i & 15))
-					cond_resched();
 			}
 		} while (i < p);
 		wc->memory_map = vmap(pages, p, VM_MAP, PAGE_KERNEL);
@@ -819,7 +817,6 @@ static void writecache_flush(struct dm_writecache *wc)
 		if (writecache_entry_is_committed(wc, e2))
 			break;
 		e = e2;
-		cond_resched();
 	}
 	writecache_commit_flushed(wc, true);
 
@@ -848,7 +845,6 @@ static void writecache_flush(struct dm_writecache *wc)
 		if (unlikely(e->lru.prev == &wc->lru))
 			break;
 		e = container_of(e->lru.prev, struct wc_entry, lru);
-		cond_resched();
 	}
 
 	if (need_flush_after_free)
@@ -970,7 +966,6 @@ static int writecache_alloc_entries(struct dm_writecache *wc)
 
 		e->index = b;
 		e->write_in_progress = false;
-		cond_resched();
 	}
 
 	return 0;
@@ -1058,7 +1053,6 @@ static void writecache_resume(struct dm_target *ti)
 			e->original_sector = le64_to_cpu(wme.original_sector);
 			e->seq_count = le64_to_cpu(wme.seq_count);
 		}
-		cond_resched();
 	}
 #endif
 	for (b = 0; b < wc->n_blocks; b++) {
@@ -1093,7 +1087,6 @@ static void writecache_resume(struct dm_target *ti)
 				}
 			}
 		}
-		cond_resched();
 	}
 
 	if (need_flush) {
@@ -1824,7 +1817,6 @@ static void __writeback_throttle(struct dm_writecache *wc, struct writeback_list
 			wc_unlock(wc);
 		}
 	}
-	cond_resched();
 }
 
 static void __writecache_writeback_pmem(struct dm_writecache *wc, struct writeback_list *wbl)
@@ -2024,7 +2016,6 @@ static void writecache_writeback(struct work_struct *work)
 				     read_original_sector(wc, e))) {
 				BUG_ON(!f->write_in_progress);
 				list_move(&e->lru, &skipped);
-				cond_resched();
 				continue;
 			}
 		}
@@ -2079,7 +2070,6 @@ static void writecache_writeback(struct work_struct *work)
 				break;
 			}
 		}
-		cond_resched();
 	}
 
 	if (!list_empty(&skipped)) {
@@ -2168,7 +2158,6 @@ static int init_memory(struct dm_writecache *wc)
 
 	for (b = 0; b < wc->n_blocks; b++) {
 		write_original_sector_seq_count(wc, &wc->entries[b], -1, -1);
-		cond_resched();
 	}
 
 	writecache_flush_all_metadata(wc);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 64a1f306c96c..ac0aff4de190 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -996,7 +996,6 @@ static void dm_wq_requeue_work(struct work_struct *work)
 		io->next = NULL;
 		__dm_io_complete(io, false);
 		io = next;
-		cond_resched();
 	}
 }
 
@@ -1379,12 +1378,10 @@ static noinline void __set_swap_bios_limit(struct mapped_device *md, int latch)
 {
 	mutex_lock(&md->swap_bios_lock);
 	while (latch < md->swap_bios) {
-		cond_resched();
 		down(&md->swap_bios_semaphore);
 		md->swap_bios--;
 	}
 	while (latch > md->swap_bios) {
-		cond_resched();
 		up(&md->swap_bios_semaphore);
 		md->swap_bios++;
 	}
@@ -2583,7 +2580,6 @@ static void dm_wq_work(struct work_struct *work)
 			break;
 
 		submit_bio_noacct(bio);
-		cond_resched();
 	}
 }
 
diff --git a/drivers/md/md.c b/drivers/md/md.c
index a104a025084d..88e8148be28f 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -9048,7 +9048,6 @@ void md_do_sync(struct md_thread *thread)
 		 * about not overloading the IO subsystem. (things like an
 		 * e2fsck being done on the RAID array should execute fast)
 		 */
-		cond_resched();
 
 		recovery_done = io_sectors - atomic_read(&mddev->recovery_active);
 		currspeed = ((unsigned long)(recovery_done - mddev->resync_mark_cnt))/2
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 2aabac773fe7..71bd8d8d1d1c 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -807,7 +807,6 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio)
 
 		raid1_submit_write(bio);
 		bio = next;
-		cond_resched();
 	}
 }
 
@@ -2613,7 +2612,6 @@ static void raid1d(struct md_thread *thread)
 		else
 			WARN_ON_ONCE(1);
 
-		cond_resched();
 		if (mddev->sb_flags & ~(1<<MD_SB_CHANGE_PENDING))
 			md_check_recovery(mddev);
 	}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 023413120851..d41f856ebcf4 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -916,7 +916,6 @@ static void flush_pending_writes(struct r10conf *conf)
 
 			raid1_submit_write(bio);
 			bio = next;
-			cond_resched();
 		}
 		blk_finish_plug(&plug);
 	} else
@@ -1132,7 +1131,6 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
 
 		raid1_submit_write(bio);
 		bio = next;
-		cond_resched();
 	}
 	kfree(plug);
 }
@@ -3167,7 +3165,6 @@ static void raid10d(struct md_thread *thread)
 		else
 			WARN_ON_ONCE(1);
 
-		cond_resched();
 		if (mddev->sb_flags & ~(1<<MD_SB_CHANGE_PENDING))
 			md_check_recovery(mddev);
 	}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 284cd71bcc68..47b995c97363 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -6727,8 +6727,6 @@ static int handle_active_stripes(struct r5conf *conf, int group,
 		handle_stripe(batch[i]);
 	log_write_stripe_run(conf);
 
-	cond_resched();
-
 	spin_lock_irq(&conf->device_lock);
 	for (i = 0; i < batch_size; i++) {
 		hash = batch[i]->hash_lock_index;
-- 
2.31.1
Powered by blists - more mailing lists
 
