lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 29 May 2014 19:52:00 -0400 (EDT)
From:	Mikulas Patocka <mpatocka@...hat.com>
To:	Jens Axboe <axboe@...nel.dk>
cc:	Kent Overstreet <kmo@...erainc.com>, linux-kernel@...r.kernel.org,
	dm-devel@...hat.com, "Alasdair G. Kergon" <agk@...hat.com>,
	Mike Snitzer <msnitzer@...hat.com>
Subject: Re: [PATCH] block: flush queued bios when the process blocks



On Tue, 27 May 2014, Jens Axboe wrote:

> On 2014-05-27 10:26, Mikulas Patocka wrote:
> > On Tue, 27 May 2014, Jens Axboe wrote:
> > 
> > > On 2014-05-27 09:23, Mikulas Patocka wrote:
> > > 
> > > > The patch adds bio list flushing to the scheduler just besides plug
> > > > flushsing.
> > > 
> > > ... which is exactly why I'm commenting. It'd be great to avoid yet one
> > > more
> > > scheduler hook for this sort of thing.
> > > 
> > > --
> > > Jens Axboe
> > 
> > One could create something like schedule notifier chain, but I'm not sure
> > if it is worth the complexity because of just two users. If more users
> > come in the future, it could be generalized.
> 
> Except such a thing already exists, there are unplug callback chains. All I'm
> asking is that you look into how feasible it would be to use something like
> that, instead of reinventing the wheel.
> 
> -- 
> Jens Axboe


You can use this patch as an example that moves current->bio_list to 
struct plug, but I don't recommend to put it in the kernel - this patch 
still has some issues (some lvm raid tests fail) - and at -rc7 stage we 
should really be fixing bugs and not rearchitecting the code.


You should commit my original patch because it is small and it generated 
no regressions for me. Think about stable kernels and enterprise 
distributions - the smaller the patch is, the easier it is to backport.


Mikulas


---
 block/blk-core.c       |   19 ++++++++++++-------
 drivers/md/dm-bufio.c  |    2 +-
 drivers/md/raid1.c     |    6 +++---
 drivers/md/raid10.c    |    6 +++---
 fs/bio.c               |   21 +++++++++------------
 include/linux/blkdev.h |    7 +++++--
 include/linux/sched.h  |    4 ----
 kernel/sched/core.c    |    7 -------
 8 files changed, 33 insertions(+), 39 deletions(-)

Index: linux-3.15-rc5/block/blk-core.c
===================================================================
--- linux-3.15-rc5.orig/block/blk-core.c	2014-05-29 23:06:29.000000000 +0200
+++ linux-3.15-rc5/block/blk-core.c	2014-05-30 00:30:41.000000000 +0200
@@ -1828,7 +1828,7 @@ end_io:
  */
 void generic_make_request(struct bio *bio)
 {
-	struct bio_list bio_list_on_stack;
+	struct blk_plug plug;
 
 	if (!generic_make_request_checks(bio))
 		return;
@@ -1858,8 +1858,8 @@ void generic_make_request(struct bio *bi
 	 * it is non-NULL, then a make_request is active, and new requests
 	 * should be added at the tail
 	 */
-	if (current->bio_list) {
-		bio_list_add(current->bio_list, bio);
+	if (current->plug) {
+		bio_list_add(&current->plug->bio_list, bio);
 		return;
 	}
 
@@ -1877,17 +1877,18 @@ void generic_make_request(struct bio *bi
 	 * of the top of the list (no pretending) and so remove it from
 	 * bio_list, and call into ->make_request() again.
 	 */
+	blk_start_plug(&plug);
+	current->plug->in_generic_make_request = 1;
 	BUG_ON(bio->bi_next);
-	bio_list_init(&bio_list_on_stack);
-	current->bio_list = &bio_list_on_stack;
 	do {
 		struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 
 		q->make_request_fn(q, bio);
 
-		bio = bio_list_pop(current->bio_list);
+		bio = bio_list_pop(&current->plug->bio_list);
 	} while (bio);
-	current->bio_list = NULL; /* deactivate */
+	current->plug->in_generic_make_request = 0;
+	blk_finish_plug(&plug);
 }
 EXPORT_SYMBOL(generic_make_request);
 
@@ -2965,6 +2966,8 @@ void blk_start_plug(struct blk_plug *plu
 	INIT_LIST_HEAD(&plug->list);
 	INIT_LIST_HEAD(&plug->mq_list);
 	INIT_LIST_HEAD(&plug->cb_list);
+	bio_list_init(&plug->bio_list);
+	plug->in_generic_make_request = 0;
 
 	/*
 	 * If this is a nested plug, don't actually assign it. It will be
@@ -3060,6 +3063,8 @@ void blk_flush_plug_list(struct blk_plug
 
 	BUG_ON(plug->magic != PLUG_MAGIC);
 
+	blk_flush_bio_list(plug);
+
 	flush_plug_callbacks(plug, from_schedule);
 
 	if (!list_empty(&plug->mq_list))
Index: linux-3.15-rc5/include/linux/blkdev.h
===================================================================
--- linux-3.15-rc5.orig/include/linux/blkdev.h	2014-05-29 23:05:46.000000000 +0200
+++ linux-3.15-rc5/include/linux/blkdev.h	2014-05-30 00:30:54.000000000 +0200
@@ -1061,6 +1061,8 @@ struct blk_plug {
 	struct list_head list; /* requests */
 	struct list_head mq_list; /* blk-mq requests */
 	struct list_head cb_list; /* md requires an unplug callback */
+	struct bio_list bio_list; /* list of queued bios */
+	int in_generic_make_request;
 };
 #define BLK_MAX_REQUEST_COUNT 16
 
@@ -1100,10 +1102,11 @@ static inline bool blk_needs_flush_plug(
 	return plug &&
 		(!list_empty(&plug->list) ||
 		 !list_empty(&plug->mq_list) ||
-		 !list_empty(&plug->cb_list));
+		 !list_empty(&plug->cb_list) ||
+		 !bio_list_empty(&plug->bio_list));
 }
 
-extern void blk_flush_bio_list(void);
+extern void blk_flush_bio_list(struct blk_plug *plug);
 
 /*
  * tag stuff
Index: linux-3.15-rc5/include/linux/sched.h
===================================================================
--- linux-3.15-rc5.orig/include/linux/sched.h	2014-05-29 23:07:01.000000000 +0200
+++ linux-3.15-rc5/include/linux/sched.h	2014-05-29 23:07:08.000000000 +0200
@@ -126,7 +126,6 @@ struct sched_attr {
 struct exec_domain;
 struct futex_pi_state;
 struct robust_list_head;
-struct bio_list;
 struct fs_struct;
 struct perf_event_context;
 struct blk_plug;
@@ -1427,9 +1426,6 @@ struct task_struct {
 /* journalling filesystem info */
 	void *journal_info;
 
-/* stacked block device info */
-	struct bio_list *bio_list;
-
 #ifdef CONFIG_BLOCK
 /* stack plugging */
 	struct blk_plug *plug;
Index: linux-3.15-rc5/fs/bio.c
===================================================================
--- linux-3.15-rc5.orig/fs/bio.c	2014-05-29 23:19:04.000000000 +0200
+++ linux-3.15-rc5/fs/bio.c	2014-05-29 23:36:40.000000000 +0200
@@ -352,23 +352,20 @@ static void bio_alloc_rescue(struct work
  * However, stacking drivers should use bio_set, so this shouldn't be
  * an issue.
  */
-void blk_flush_bio_list(void)
+void blk_flush_bio_list(struct blk_plug *plug)
 {
 	struct bio *bio;
-	struct bio_list list = *current->bio_list;
-	bio_list_init(current->bio_list);
 
-	while ((bio = bio_list_pop(&list))) {
+	while ((bio = bio_list_pop(&plug->bio_list))) {
 		struct bio_set *bs = bio->bi_pool;
-		if (unlikely(!bs)) {
-			bio_list_add(current->bio_list, bio);
-		} else {
-			spin_lock(&bs->rescue_lock);
-			bio_list_add(&bs->rescue_list, bio);
-			spin_unlock(&bs->rescue_lock);
+		if (!bs)
+			bs = fs_bio_set;
 
-			queue_work(bs->rescue_workqueue, &bs->rescue_work);
-		}
+		spin_lock(&bs->rescue_lock);
+		bio_list_add(&bs->rescue_list, bio);
+		spin_unlock(&bs->rescue_lock);
+
+		queue_work(bs->rescue_workqueue, &bs->rescue_work);
 	}
 }
 
Index: linux-3.15-rc5/kernel/sched/core.c
===================================================================
--- linux-3.15-rc5.orig/kernel/sched/core.c	2014-05-29 23:17:04.000000000 +0200
+++ linux-3.15-rc5/kernel/sched/core.c	2014-05-29 23:18:28.000000000 +0200
@@ -2734,13 +2734,6 @@ static inline void sched_submit_work(str
 	if (!tsk->state || tsk_is_pi_blocked(tsk))
 		return;
 	/*
-	 * If there are bios on the bio list, flush them to the appropriate
-	 * rescue threads.
-	 */
-	if (unlikely(current->bio_list != NULL) &&
-	    !bio_list_empty(current->bio_list))
-		blk_flush_bio_list();
-	/*
 	 * If we are going to sleep and we have plugged IO queued,
 	 * make sure to submit it to avoid deadlocks.
 	 */
Index: linux-3.15-rc5/drivers/md/dm-bufio.c
===================================================================
--- linux-3.15-rc5.orig/drivers/md/dm-bufio.c	2014-05-30 00:25:55.000000000 +0200
+++ linux-3.15-rc5/drivers/md/dm-bufio.c	2014-05-30 00:31:28.000000000 +0200
@@ -169,7 +169,7 @@ static inline int dm_bufio_cache_index(s
 #define DM_BUFIO_CACHE(c)	(dm_bufio_caches[dm_bufio_cache_index(c)])
 #define DM_BUFIO_CACHE_NAME(c)	(dm_bufio_cache_names[dm_bufio_cache_index(c)])
 
-#define dm_bufio_in_request()	(!!current->bio_list)
+#define dm_bufio_in_request()	(current->plug && current->plug->in_generic_make_request)
 
 static void dm_bufio_lock(struct dm_bufio_client *c)
 {
Index: linux-3.15-rc5/drivers/md/raid1.c
===================================================================
--- linux-3.15-rc5.orig/drivers/md/raid1.c	2014-05-30 00:19:28.000000000 +0200
+++ linux-3.15-rc5/drivers/md/raid1.c	2014-05-30 00:33:11.000000000 +0200
@@ -912,8 +912,8 @@ static sector_t wait_barrier(struct r1co
 				    (!conf->barrier ||
 				    ((conf->start_next_window <
 				      conf->next_resync + RESYNC_SECTORS) &&
-				     current->bio_list &&
-				     !bio_list_empty(current->bio_list))),
+				     current->plug &&
+				     !bio_list_empty(&current->plug->bio_list))),
 				    conf->resync_lock);
 		conf->nr_waiting--;
 	}
@@ -1052,7 +1052,7 @@ static void raid1_unplug(struct blk_plug
 	struct r1conf *conf = mddev->private;
 	struct bio *bio;
 
-	if (from_schedule || current->bio_list) {
+	if (from_schedule || (current->plug && current->plug->in_generic_make_request)) {
 		spin_lock_irq(&conf->device_lock);
 		bio_list_merge(&conf->pending_bio_list, &plug->pending);
 		conf->pending_count += plug->pending_cnt;
Index: linux-3.15-rc5/drivers/md/raid10.c
===================================================================
--- linux-3.15-rc5.orig/drivers/md/raid10.c	2014-05-30 00:23:51.000000000 +0200
+++ linux-3.15-rc5/drivers/md/raid10.c	2014-05-30 00:32:50.000000000 +0200
@@ -1045,8 +1045,8 @@ static void wait_barrier(struct r10conf 
 		wait_event_lock_irq(conf->wait_barrier,
 				    !conf->barrier ||
 				    (conf->nr_pending &&
-				     current->bio_list &&
-				     !bio_list_empty(current->bio_list)),
+				     current->plug &&
+				     !bio_list_empty(&current->plug->bio_list)),
 				    conf->resync_lock);
 		conf->nr_waiting--;
 	}
@@ -1122,7 +1122,7 @@ static void raid10_unplug(struct blk_plu
 	struct r10conf *conf = mddev->private;
 	struct bio *bio;
 
-	if (from_schedule || current->bio_list) {
+	if (from_schedule || (current->plug && current->plug->in_generic_make_request)) {
 		spin_lock_irq(&conf->device_lock);
 		bio_list_merge(&conf->pending_bio_list, &plug->pending);
 		conf->pending_count += plug->pending_cnt;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ