linux-kernel - [PATCH v4 03/10] xen/blkfront: pseudo support for multi hardware queues/rings

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1446438106-20171-4-git-send-email-bob.liu@oracle.com>
Date:	Mon,  2 Nov 2015 12:21:39 +0800
From:	Bob Liu <bob.liu@...cle.com>
To:	xen-devel@...ts.xen.org
Cc:	linux-kernel@...r.kernel.org, roger.pau@...rix.com,
	konrad.wilk@...cle.com, felipe.franciosi@...rix.com, axboe@...com,
	avanzini.arianna@...il.com, rafal.mielniczuk@...rix.com,
	jonathan.davies@...rix.com, david.vrabel@...rix.com,
	Bob Liu <bob.liu@...cle.com>
Subject: [PATCH v4 03/10] xen/blkfront: pseudo support for multi hardware queues/rings

Preparatory patch for multiple hardware queues (rings). The number of
rings is unconditionally set to 1, larger number will be enabled in next
patch so as to make every single patch small and readable.

Signed-off-by: Bob Liu <bob.liu@...cle.com>
---
 drivers/block/xen-blkfront.c | 327 +++++++++++++++++++++++++------------------
 1 file changed, 188 insertions(+), 139 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 2a557e4..eab78e7 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -145,6 +145,7 @@ struct blkfront_info
 	int vdevice;
 	blkif_vdev_t handle;
 	enum blkif_state connected;
+	/* Number of pages per ring buffer */
 	unsigned int nr_ring_pages;
 	struct request_queue *rq;
 	struct list_head grants;
@@ -158,7 +159,8 @@ struct blkfront_info
 	unsigned int max_indirect_segments;
 	int is_ready;
 	struct blk_mq_tag_set tag_set;
-	struct blkfront_ring_info rinfo;
+	struct blkfront_ring_info *rinfo;
+	unsigned int nr_rings;
 };
 
 static unsigned int nr_minors;
@@ -190,7 +192,7 @@ static DEFINE_SPINLOCK(minor_lock);
 	((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
 
 static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo);
-static int blkfront_gather_backend_features(struct blkfront_info *info);
+static void blkfront_gather_backend_features(struct blkfront_info *info);
 
 static int get_id_from_freelist(struct blkfront_ring_info *rinfo)
 {
@@ -443,12 +445,13 @@ static int blkif_queue_request(struct request *req, struct blkfront_ring_info *r
 		 */
 		max_grefs += INDIRECT_GREFS(req->nr_phys_segments);
 
-	/* Check if we have enough grants to allocate a requests */
-	if (info->persistent_gnts_c < max_grefs) {
+	/* Check if we have enough grants to allocate a requests, we have to
+	 * reserve 'max_grefs' grants because persistent grants are shared by all
+	 * rings */
+	if (0 < max_grefs) {
 		new_persistent_gnts = 1;
 		if (gnttab_alloc_grant_references(
-		    max_grefs - info->persistent_gnts_c,
-		    &gref_head) < 0) {
+		    max_grefs, &gref_head) < 0) {
 			gnttab_request_free_callback(
 				&rinfo->callback,
 				blkif_restart_queue_callback,
@@ -665,7 +668,7 @@ static int blk_mq_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 {
 	struct blkfront_info *info = (struct blkfront_info *)data;
 
-	hctx->driver_data = &info->rinfo;
+	hctx->driver_data = &info->rinfo[index];
 	return 0;
 }
 
@@ -924,8 +927,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
 
 static void xlvbd_release_gendisk(struct blkfront_info *info)
 {
-	unsigned int minor, nr_minors;
-	struct blkfront_ring_info *rinfo = &info->rinfo;
+	unsigned int minor, nr_minors, i;
 
 	if (info->rq == NULL)
 		return;
@@ -933,11 +935,15 @@ static void xlvbd_release_gendisk(struct blkfront_info *info)
 	/* No more blkif_request(). */
 	blk_mq_stop_hw_queues(info->rq);
 
-	/* No more gnttab callback work. */
-	gnttab_cancel_free_callback(&rinfo->callback);
+	for (i = 0; i < info->nr_rings; i++) {
+		struct blkfront_ring_info *rinfo = &info->rinfo[i];
 
-	/* Flush gnttab callback work. Must be done with no locks held. */
-	flush_work(&rinfo->work);
+		/* No more gnttab callback work. */
+		gnttab_cancel_free_callback(&rinfo->callback);
+
+		/* Flush gnttab callback work. Must be done with no locks held. */
+		flush_work(&rinfo->work);
+	}
 
 	del_gendisk(info->gd);
 
@@ -970,37 +976,11 @@ static void blkif_restart_queue(struct work_struct *work)
 	spin_unlock_irq(&rinfo->dev_info->io_lock);
 }
 
-static void blkif_free(struct blkfront_info *info, int suspend)
+static void blkif_free_ring(struct blkfront_ring_info *rinfo)
 {
 	struct grant *persistent_gnt;
-	struct grant *n;
+	struct blkfront_info *info = rinfo->dev_info;
 	int i, j, segs;
-	struct blkfront_ring_info *rinfo = &info->rinfo;
-
-	/* Prevent new requests being issued until we fix things up. */
-	spin_lock_irq(&info->io_lock);
-	info->connected = suspend ?
-		BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
-	/* No more blkif_request(). */
-	if (info->rq)
-		blk_mq_stop_hw_queues(info->rq);
-
-	/* Remove all persistent grants */
-	if (!list_empty(&info->grants)) {
-		list_for_each_entry_safe(persistent_gnt, n,
-					 &info->grants, node) {
-			list_del(&persistent_gnt->node);
-			if (persistent_gnt->gref != GRANT_INVALID_REF) {
-				gnttab_end_foreign_access(persistent_gnt->gref,
-				                          0, 0UL);
-				info->persistent_gnts_c--;
-			}
-			if (info->feature_persistent)
-				__free_page(pfn_to_page(persistent_gnt->pfn));
-			kfree(persistent_gnt);
-		}
-	}
-	BUG_ON(info->persistent_gnts_c != 0);
 
 	/*
 	 * Remove indirect pages, this only happens when using indirect
@@ -1060,7 +1040,6 @@ free_shadow:
 
 	/* No more gnttab callback work. */
 	gnttab_cancel_free_callback(&rinfo->callback);
-	spin_unlock_irq(&info->io_lock);
 
 	/* Flush gnttab callback work. Must be done with no locks held. */
 	flush_work(&rinfo->work);
@@ -1078,7 +1057,41 @@ free_shadow:
 	if (rinfo->irq)
 		unbind_from_irqhandler(rinfo->irq, rinfo);
 	rinfo->evtchn = rinfo->irq = 0;
+}
 
+static void blkif_free(struct blkfront_info *info, int suspend)
+{
+	struct grant *persistent_gnt, *n;
+	int i;
+
+	/* Prevent new requests being issued until we fix things up. */
+	spin_lock_irq(&info->io_lock);
+	info->connected = suspend ?
+		BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
+	/* No more blkif_request(). */
+	if (info->rq)
+		blk_mq_stop_hw_queues(info->rq);
+
+	/* Remove all persistent grants */
+	if (!list_empty(&info->grants)) {
+		list_for_each_entry_safe(persistent_gnt, n,
+					 &info->grants, node) {
+			list_del(&persistent_gnt->node);
+			if (persistent_gnt->gref != GRANT_INVALID_REF) {
+				gnttab_end_foreign_access(persistent_gnt->gref,
+							  0, 0UL);
+				info->persistent_gnts_c--;
+			}
+			if (info->feature_persistent)
+				__free_page(pfn_to_page(persistent_gnt->pfn));
+			kfree(persistent_gnt);
+		}
+	}
+	BUG_ON(info->persistent_gnts_c != 0);
+
+	for (i = 0; i < info->nr_rings; i++)
+		blkif_free_ring(&info->rinfo[i]);
+	spin_unlock_irq(&info->io_lock);
 }
 
 static void blkif_completion(struct blk_shadow *s, struct blkfront_ring_info *rinfo,
@@ -1334,7 +1347,7 @@ static int talk_to_blkback(struct xenbus_device *dev,
 	int err, i;
 	unsigned int max_page_order = 0;
 	unsigned int ring_page_order = 0;
-	struct blkfront_ring_info *rinfo = &info->rinfo;
+	struct blkfront_ring_info *rinfo;
 
 	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
 			   "max-ring-page-order", "%u", &max_page_order);
@@ -1345,10 +1358,13 @@ static int talk_to_blkback(struct xenbus_device *dev,
 		info->nr_ring_pages = 1 << ring_page_order;
 	}
 
-	/* Create shared ring, alloc event channel. */
-	err = setup_blkring(dev, rinfo);
-	if (err)
-		goto out;
+	for (i = 0; i < info->nr_rings; i++) {
+		rinfo = &info->rinfo[i];
+		/* Create shared ring, alloc event channel. */
+		err = setup_blkring(dev, rinfo);
+		if (err)
+			goto destroy_blkring;
+	}
 
 again:
 	err = xenbus_transaction_start(&xbt);
@@ -1357,37 +1373,43 @@ again:
 		goto destroy_blkring;
 	}
 
-	if (info->nr_ring_pages == 1) {
-		err = xenbus_printf(xbt, dev->nodename,
-				    "ring-ref", "%u", rinfo->ring_ref[0]);
-		if (err) {
-			message = "writing ring-ref";
-			goto abort_transaction;
-		}
-	} else {
-		err = xenbus_printf(xbt, dev->nodename,
-				    "ring-page-order", "%u", ring_page_order);
-		if (err) {
-			message = "writing ring-page-order";
-			goto abort_transaction;
-		}
-
-		for (i = 0; i < info->nr_ring_pages; i++) {
-			char ring_ref_name[RINGREF_NAME_LEN];
-
-			snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
-			err = xenbus_printf(xbt, dev->nodename, ring_ref_name,
-					    "%u", rinfo->ring_ref[i]);
+	if (info->nr_rings == 1) {
+		rinfo = &info->rinfo[0];
+		if (info->nr_ring_pages == 1) {
+			err = xenbus_printf(xbt, dev->nodename,
+					    "ring-ref", "%u", rinfo->ring_ref[0]);
 			if (err) {
 				message = "writing ring-ref";
 				goto abort_transaction;
 			}
+		} else {
+			err = xenbus_printf(xbt, dev->nodename,
+					    "ring-page-order", "%u", ring_page_order);
+			if (err) {
+				message = "writing ring-page-order";
+				goto abort_transaction;
+			}
+
+			for (i = 0; i < info->nr_ring_pages; i++) {
+				char ring_ref_name[RINGREF_NAME_LEN];
+
+				snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
+				err = xenbus_printf(xbt, dev->nodename, ring_ref_name,
+						    "%u", rinfo->ring_ref[i]);
+				if (err) {
+					message = "writing ring-ref";
+					goto abort_transaction;
+				}
+			}
 		}
-	}
-	err = xenbus_printf(xbt, dev->nodename,
-			    "event-channel", "%u", rinfo->evtchn);
-	if (err) {
-		message = "writing event-channel";
+		err = xenbus_printf(xbt, dev->nodename,
+				    "event-channel", "%u", rinfo->evtchn);
+		if (err) {
+			message = "writing event-channel";
+			goto abort_transaction;
+		}
+	} else {
+		/* Not supported at this stage */
 		goto abort_transaction;
 	}
 	err = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
@@ -1410,9 +1432,14 @@ again:
 		goto destroy_blkring;
 	}
 
-	for (i = 0; i < BLK_RING_SIZE(info); i++)
-		rinfo->shadow[i].req.u.rw.id = i+1;
-	rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
+	for (i = 0; i < info->nr_rings; i++) {
+		int j;
+		rinfo = &info->rinfo[i];
+
+		for (j = 0; j < BLK_RING_SIZE(info); j++)
+			rinfo->shadow[j].req.u.rw.id = j + 1;
+		rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
+	}
 	xenbus_switch_state(dev, XenbusStateInitialised);
 
 	return 0;
@@ -1423,7 +1450,7 @@ again:
 		xenbus_dev_fatal(dev, err, "%s", message);
  destroy_blkring:
 	blkif_free(info, 0);
- out:
+
 	return err;
 }
 
@@ -1436,9 +1463,8 @@ again:
 static int blkfront_probe(struct xenbus_device *dev,
 			  const struct xenbus_device_id *id)
 {
-	int err, vdevice;
+	int err, vdevice, r_index;
 	struct blkfront_info *info;
-	struct blkfront_ring_info *rinfo;
 
 	/* FIXME: Use dynamic device id if this is not set. */
 	err = xenbus_scanf(XBT_NIL, dev->nodename,
@@ -1488,10 +1514,22 @@ static int blkfront_probe(struct xenbus_device *dev,
 		return -ENOMEM;
 	}
 
-	rinfo = &info->rinfo;
-	INIT_LIST_HEAD(&rinfo->indirect_pages);
-	rinfo->dev_info = info;
-	INIT_WORK(&rinfo->work, blkif_restart_queue);
+	info->nr_rings = 1;
+	info->rinfo = kzalloc(sizeof(struct blkfront_ring_info) * info->nr_rings, GFP_KERNEL);
+	if (!info->rinfo) {
+		xenbus_dev_fatal(dev, -ENOMEM, "allocating ring_info structure");
+		kfree(info);
+		return -ENOMEM;
+	}
+
+	for (r_index = 0; r_index < info->nr_rings; r_index++) {
+		struct blkfront_ring_info *rinfo;
+
+		rinfo = &info->rinfo[r_index];
+		INIT_LIST_HEAD(&rinfo->indirect_pages);
+		rinfo->dev_info = info;
+		INIT_WORK(&rinfo->work, blkif_restart_queue);
+	}
 
 	mutex_init(&info->mutex);
 	spin_lock_init(&info->io_lock);
@@ -1523,7 +1561,7 @@ static void split_bio_end(struct bio *bio)
 
 static int blkif_recover(struct blkfront_info *info)
 {
-	int i;
+	int i, r_index;
 	struct request *req, *n;
 	struct blk_shadow *copy;
 	int rc;
@@ -1533,57 +1571,62 @@ static int blkif_recover(struct blkfront_info *info)
 	int pending, size;
 	struct split_bio *split_bio;
 	struct list_head requests;
-	struct blkfront_ring_info *rinfo = &info->rinfo;
-
-	/* Stage 1: Make a safe copy of the shadow state. */
-	copy = kmemdup(rinfo->shadow, sizeof(rinfo->shadow),
-		       GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
-	if (!copy)
-		return -ENOMEM;
-
-	/* Stage 2: Set up free list. */
-	memset(&rinfo->shadow, 0, sizeof(rinfo->shadow));
-	for (i = 0; i < BLK_RING_SIZE(info); i++)
-		rinfo->shadow[i].req.u.rw.id = i+1;
-	rinfo->shadow_free = rinfo->ring.req_prod_pvt;
-	rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
-
-	rc = blkfront_gather_backend_features(info);
-	if (rc) {
-		kfree(copy);
-		return rc;
-	}
 
+	blkfront_gather_backend_features(info);
 	segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
 	blk_queue_max_segments(info->rq, segs);
 	bio_list_init(&bio_list);
 	INIT_LIST_HEAD(&requests);
-	for (i = 0; i < BLK_RING_SIZE(info); i++) {
-		/* Not in use? */
-		if (!copy[i].request)
-			continue;
 
-		/*
-		 * Get the bios in the request so we can re-queue them.
-		 */
-		if (copy[i].request->cmd_flags &
-		    (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
+	for (r_index = 0; r_index < info->nr_rings; r_index++) {
+		struct blkfront_ring_info *rinfo;
+
+		rinfo = &info->rinfo[r_index];
+		/* Stage 1: Make a safe copy of the shadow state. */
+		copy = kmemdup(rinfo->shadow, sizeof(rinfo->shadow),
+			       GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
+		if (!copy)
+			return -ENOMEM;
+
+		/* Stage 2: Set up free list. */
+		memset(&rinfo->shadow, 0, sizeof(rinfo->shadow));
+		for (i = 0; i < BLK_RING_SIZE(info); i++)
+			rinfo->shadow[i].req.u.rw.id = i+1;
+		rinfo->shadow_free = rinfo->ring.req_prod_pvt;
+		rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
+
+		rc = blkfront_setup_indirect(rinfo);
+		if (rc) {
+			kfree(copy);
+			return rc;
+		}
+
+		for (i = 0; i < BLK_RING_SIZE(info); i++) {
+			/* Not in use? */
+			if (!copy[i].request)
+				continue;
+
 			/*
-			 * Flush operations don't contain bios, so
-			 * we need to requeue the whole request
+			 * Get the bios in the request so we can re-queue them.
 			 */
-			list_add(&copy[i].request->queuelist, &requests);
-			continue;
+			if (copy[i].request->cmd_flags &
+			    (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
+				/*
+				 * Flush operations don't contain bios, so
+				 * we need to requeue the whole request
+				 */
+				list_add(&copy[i].request->queuelist, &requests);
+				continue;
+			}
+			merge_bio.head = copy[i].request->bio;
+			merge_bio.tail = copy[i].request->biotail;
+			bio_list_merge(&bio_list, &merge_bio);
+			copy[i].request->bio = NULL;
+			blk_end_request_all(copy[i].request, 0);
 		}
-		merge_bio.head = copy[i].request->bio;
-		merge_bio.tail = copy[i].request->biotail;
-		bio_list_merge(&bio_list, &merge_bio);
-		copy[i].request->bio = NULL;
-		blk_end_request_all(copy[i].request, 0);
-	}
-
-	kfree(copy);
 
+		kfree(copy);
+	}
 	xenbus_switch_state(info->xbdev, XenbusStateConnected);
 
 	spin_lock_irq(&info->io_lock);
@@ -1591,8 +1634,13 @@ static int blkif_recover(struct blkfront_info *info)
 	/* Now safe for us to use the shared ring */
 	info->connected = BLKIF_STATE_CONNECTED;
 
-	/* Kick any other new requests queued since we resumed */
-	kick_pending_request_queues(rinfo);
+	for (r_index = 0; r_index < info->nr_rings; r_index++) {
+		struct blkfront_ring_info *rinfo;
+
+		rinfo = &info->rinfo[r_index];
+		/* Kick any other new requests queued since we resumed */
+		kick_pending_request_queues(rinfo);
+	}
 
 	list_for_each_entry_safe(req, n, &requests, queuelist) {
 		/* Requeue pending requests (flush or discard) */
@@ -1801,7 +1849,7 @@ out_of_memory:
 /*
  * Gather all backend feature-*
  */
-static int blkfront_gather_backend_features(struct blkfront_info *info)
+static void blkfront_gather_backend_features(struct blkfront_info *info)
 {
 	int err;
 	int barrier, flush, discard, persistent;
@@ -1856,8 +1904,6 @@ static int blkfront_gather_backend_features(struct blkfront_info *info)
 	else
 		info->max_indirect_segments = min(indirect_segments,
 						  xen_blkif_max_segments);
-
-	return blkfront_setup_indirect(&info->rinfo);
 }
 
 /*
@@ -1870,8 +1916,7 @@ static void blkfront_connect(struct blkfront_info *info)
 	unsigned long sector_size;
 	unsigned int physical_sector_size;
 	unsigned int binfo;
-	int err;
-	struct blkfront_ring_info *rinfo = &info->rinfo;
+	int err, i;
 
 	switch (info->connected) {
 	case BLKIF_STATE_CONNECTED:
@@ -1928,11 +1973,14 @@ static void blkfront_connect(struct blkfront_info *info)
 	if (err != 1)
 		physical_sector_size = sector_size;
 
-	err = blkfront_gather_backend_features(info);
-	if (err) {
-		xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s",
-				 info->xbdev->otherend);
-		return;
+	blkfront_gather_backend_features(info);
+	for (i = 0; i < info->nr_rings; i++) {
+		err = blkfront_setup_indirect(&info->rinfo[i]);
+		if (err) {
+			xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s",
+					 info->xbdev->otherend);
+			blkif_free(info, 0);
+		}
 	}
 
 	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size,
@@ -1948,7 +1996,8 @@ static void blkfront_connect(struct blkfront_info *info)
 	/* Kick pending requests. */
 	spin_lock_irq(&info->io_lock);
 	info->connected = BLKIF_STATE_CONNECTED;
-	kick_pending_request_queues(rinfo);
+	for (i = 0; i < info->nr_rings; i++)
+		kick_pending_request_queues(&info->rinfo[i]);
 	spin_unlock_irq(&info->io_lock);
 
 	add_disk(info->gd);
@@ -2111,7 +2160,7 @@ static void blkif_release(struct gendisk *disk, fmode_t mode)
 		dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
 		xlvbd_release_gendisk(info);
 		xenbus_frontend_closed(info->xbdev);
- 	}
+	}
 
 	mutex_unlock(&info->mutex);
 
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/