lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 28 Feb 2013 11:28:49 +0100
From:	Roger Pau Monne <roger.pau@...rix.com>
To:	<linux-kernel@...r.kernel.org>, <xen-devel@...ts.xen.org>
CC:	Roger Pau Monne <roger.pau@...rix.com>,
	Konrad Rzeszutek Wilk <konrad.wilk@...cle.com>
Subject: [PATCH RFC 06/12] xen-blkback: implement LRU mechanism for persistent grants

This mechanism allows blkback to change the number of grants
persistently mapped at run time.

The algorithm uses a simple LRU mechanism that removes (if needed) the
persistent grants that have not been used since the last LRU run, or
if all grants have been used it removes the first grants in the list
(that are not in use).

The algorithm has several parameters that can be tuned by the user
from sysfs:

 * max_persistent_grants: maximum number of grants that will be
   persistently mapped.
 * lru_interval: minimum interval (in ms) at which the LRU should be
   run
 * lru_num_clean: number of persistent grants to remove when executing
   the LRU.

Signed-off-by: Roger Pau Monné <roger.pau@...rix.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@...cle.com>
Cc: xen-devel@...ts.xen.org
---
 drivers/block/xen-blkback/blkback.c |  207 +++++++++++++++++++++++++++--------
 drivers/block/xen-blkback/common.h  |    4 +
 drivers/block/xen-blkback/xenbus.c  |    1 +
 3 files changed, 166 insertions(+), 46 deletions(-)

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 415a0c7..c14b736 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -63,6 +63,44 @@ static int xen_blkif_reqs = 64;
 module_param_named(reqs, xen_blkif_reqs, int, 0);
 MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
 
+/*
+ * Maximum number of grants to map persistently in blkback. For maximum
+ * performance this should be the total numbers of grants that can be used
+ * to fill the ring, but since this might become too high, specially with
+ * the use of indirect descriptors, we set it to a value that provides good
+ * performance without using too much memory.
+ *
+ * When the list of persistent grants is full we clean it using a LRU
+ * algorithm.
+ */
+
+static int xen_blkif_max_pgrants = 352;
+module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644);
+MODULE_PARM_DESC(max_persistent_grants,
+                 "Maximum number of grants to map persistently");
+
+/*
+ * The LRU mechanism to clean the lists of persistent grants needs to
+ * be executed periodically. The time interval between consecutive executions
+ * of the purge mechanism is set in ms.
+ */
+
+static int xen_blkif_lru_interval = 100;
+module_param_named(lru_interval, xen_blkif_lru_interval, int, 0644);
+MODULE_PARM_DESC(lru_interval,
+"Execution interval (in ms) of the LRU mechanism to clean the list of persistent grants");
+
+/*
+ * When the persistent grants list is full we will remove unused grants
+ * from the list. The number of grants to be removed at each LRU execution
+ * can be set dynamically.
+ */
+
+static int xen_blkif_lru_num_clean = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+module_param_named(lru_num_clean, xen_blkif_lru_num_clean, int, 0644);
+MODULE_PARM_DESC(lru_num_clean,
+"Number of persistent grants to unmap when the list is full");
+
 /* Run-time switchable: /sys/module/blkback/parameters/ */
 static unsigned int log_stats;
 module_param(log_stats, int, 0644);
@@ -81,7 +119,7 @@ struct pending_req {
 	unsigned short		operation;
 	int			status;
 	struct list_head	free_list;
-	DECLARE_BITMAP(unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+	struct persistent_gnt	*persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 };
 
 #define BLKBACK_INVALID_HANDLE (~0)
@@ -102,36 +140,6 @@ struct xen_blkbk {
 static struct xen_blkbk *blkbk;
 
 /*
- * Maximum number of grant pages that can be mapped in blkback.
- * BLKIF_MAX_SEGMENTS_PER_REQUEST * RING_SIZE is the maximum number of
- * pages that blkback will persistently map.
- * Currently, this is:
- * RING_SIZE = 32 (for all known ring types)
- * BLKIF_MAX_SEGMENTS_PER_REQUEST = 11
- * sizeof(struct persistent_gnt) = 48
- * So the maximum memory used to store the grants is:
- * 32 * 11 * 48 = 16896 bytes
- */
-static inline unsigned int max_mapped_grant_pages(enum blkif_protocol protocol)
-{
-	switch (protocol) {
-	case BLKIF_PROTOCOL_NATIVE:
-		return __CONST_RING_SIZE(blkif, PAGE_SIZE) *
-			   BLKIF_MAX_SEGMENTS_PER_REQUEST;
-	case BLKIF_PROTOCOL_X86_32:
-		return __CONST_RING_SIZE(blkif_x86_32, PAGE_SIZE) *
-			   BLKIF_MAX_SEGMENTS_PER_REQUEST;
-	case BLKIF_PROTOCOL_X86_64:
-		return __CONST_RING_SIZE(blkif_x86_64, PAGE_SIZE) *
-			   BLKIF_MAX_SEGMENTS_PER_REQUEST;
-	default:
-		BUG();
-	}
-	return 0;
-}
-
-
-/*
  * Little helpful macro to figure out the index and virtual address of the
  * pending_pages[..]. For each 'pending_req' we have have up to
  * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through
@@ -251,6 +259,76 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num)
 	BUG_ON(num != 0);
 }
 
+static int purge_persistent_gnt(struct rb_root *root, int num)
+{
+	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct persistent_gnt *persistent_gnt;
+	struct rb_node *n;
+	int ret, segs_to_unmap = 0;
+	int requested_num = num;
+	int preserve_used = 1;
+
+	pr_debug("Requested the purge of %d persistent grants\n", num);
+
+purge_list:
+	foreach_grant_safe(persistent_gnt, n, root, node) {
+		BUG_ON(persistent_gnt->handle ==
+			BLKBACK_INVALID_HANDLE);
+
+		if (persistent_gnt->flags & PERSISTENT_GNT_ACTIVE)
+			continue;
+		if (preserve_used &&
+		    (persistent_gnt->flags & PERSISTENT_GNT_USED))
+			continue;
+
+		gnttab_set_unmap_op(&unmap[segs_to_unmap],
+			(unsigned long) pfn_to_kaddr(page_to_pfn(
+				persistent_gnt->page)),
+			GNTMAP_host_map,
+			persistent_gnt->handle);
+
+		pages[segs_to_unmap] = persistent_gnt->page;
+
+		if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+			ret = gnttab_unmap_refs(unmap, NULL, pages,
+				segs_to_unmap);
+			BUG_ON(ret);
+			free_xenballooned_pages(segs_to_unmap, pages);
+			segs_to_unmap = 0;
+		}
+
+		rb_erase(&persistent_gnt->node, root);
+		kfree(persistent_gnt);
+		if (--num == 0)
+			goto finished;
+	}
+	/*
+	 * If we get here it means we also need to start cleaning
+	 * grants that were used since last purge in order to cope
+	 * with the requested num
+	 */
+	if (preserve_used) {
+		pr_debug("Still missing %d purged frames\n", num);
+		preserve_used = 0;
+		goto purge_list;
+	}
+finished:
+	if (segs_to_unmap > 0) {
+		ret = gnttab_unmap_refs(unmap, NULL, pages, segs_to_unmap);
+		BUG_ON(ret);
+		free_xenballooned_pages(segs_to_unmap, pages);
+	}
+	/* Finally remove the "used" flag from all the persistent grants */
+	foreach_grant_safe(persistent_gnt, n, root, node) {
+		BUG_ON(persistent_gnt->handle ==
+			BLKBACK_INVALID_HANDLE);
+		persistent_gnt->flags &= ~PERSISTENT_GNT_USED;
+	}
+	pr_debug("Purged %d/%d\n", (requested_num - num), requested_num);
+	return (requested_num - num);
+}
+
 /*
  * Retrieve from the 'pending_reqs' a free pending_req structure to be used.
  */
@@ -397,6 +475,8 @@ int xen_blkif_schedule(void *arg)
 {
 	struct xen_blkif *blkif = arg;
 	struct xen_vbd *vbd = &blkif->vbd;
+	int rq_purge, purged;
+	unsigned long timeout;
 
 	xen_blkif_get(blkif);
 
@@ -406,13 +486,21 @@ int xen_blkif_schedule(void *arg)
 		if (unlikely(vbd->size != vbd_sz(vbd)))
 			xen_vbd_resize(blkif);
 
-		wait_event_interruptible(
+		timeout = msecs_to_jiffies(xen_blkif_lru_interval);
+
+		timeout = wait_event_interruptible_timeout(
 			blkif->wq,
-			blkif->waiting_reqs || kthread_should_stop());
-		wait_event_interruptible(
+			blkif->waiting_reqs || kthread_should_stop(),
+			timeout);
+		if (timeout == 0)
+			goto purge_gnt_list;
+		timeout = wait_event_interruptible_timeout(
 			blkbk->pending_free_wq,
 			!list_empty(&blkbk->pending_free) ||
-			kthread_should_stop());
+			kthread_should_stop(),
+			timeout);
+		if (timeout == 0)
+			goto purge_gnt_list;
 
 		blkif->waiting_reqs = 0;
 		smp_mb(); /* clear flag *before* checking for work */
@@ -420,6 +508,32 @@ int xen_blkif_schedule(void *arg)
 		if (do_block_io_op(blkif))
 			blkif->waiting_reqs = 1;
 
+purge_gnt_list:
+		if (blkif->vbd.feature_gnt_persistent &&
+		    time_after(jiffies, blkif->next_lru)) {
+			/* Clean the list of persistent grants */
+			if (blkif->persistent_gnt_c > xen_blkif_max_pgrants ||
+			    (blkif->persistent_gnt_c == xen_blkif_max_pgrants &&
+			     blkif->vbd.overflow_max_grants)) {
+				rq_purge = blkif->persistent_gnt_c -
+				           xen_blkif_max_pgrants +
+				           xen_blkif_lru_num_clean;
+				rq_purge = rq_purge > blkif->persistent_gnt_c ?
+				           blkif->persistent_gnt_c : rq_purge;
+				purged = purge_persistent_gnt(
+					  &blkif->persistent_gnts, rq_purge);
+				if (purged != rq_purge)
+					pr_debug(DRV_PFX " unable to meet persistent grants purge requirements for device %#x, domain %u, requested %d done %d\n",
+					         blkif->domid,
+					         blkif->vbd.handle,
+					         rq_purge, purged);
+				blkif->persistent_gnt_c -= purged;
+				blkif->vbd.overflow_max_grants = 0;
+			}
+			blkif->next_lru = jiffies +
+			        msecs_to_jiffies(xen_blkif_lru_interval);
+		}
+
 		if (log_stats && time_after(jiffies, blkif->st_print))
 			print_stats(blkif);
 	}
@@ -453,13 +567,18 @@ static void xen_blkbk_unmap(struct pending_req *req)
 {
 	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct persistent_gnt *persistent_gnt;
 	unsigned int i, invcount = 0;
 	grant_handle_t handle;
 	int ret;
 
 	for (i = 0; i < req->nr_pages; i++) {
-		if (!test_bit(i, req->unmap_seg))
+		if (req->persistent_gnts[i] != NULL) {
+			persistent_gnt = req->persistent_gnts[i];
+			persistent_gnt->flags |= PERSISTENT_GNT_USED;
+			persistent_gnt->flags &= ~PERSISTENT_GNT_ACTIVE;
 			continue;
+		}
 		handle = pending_handle(req, i);
 		if (handle == BLKBACK_INVALID_HANDLE)
 			continue;
@@ -480,8 +599,8 @@ static int xen_blkbk_map(struct blkif_request *req,
 			 struct page *pages[])
 {
 	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-	struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	struct page *pages_to_gnt[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct persistent_gnt **persistent_gnts = pending_req->persistent_gnts;
 	struct persistent_gnt *persistent_gnt = NULL;
 	struct xen_blkif *blkif = pending_req->blkif;
 	phys_addr_t addr = 0;
@@ -494,9 +613,6 @@ static int xen_blkbk_map(struct blkif_request *req,
 
 	use_persistent_gnts = (blkif->vbd.feature_gnt_persistent);
 
-	BUG_ON(blkif->persistent_gnt_c >
-		   max_mapped_grant_pages(pending_req->blkif->blk_protocol));
-
 	/*
 	 * Fill out preq.nr_sects with proper amount of sectors, and setup
 	 * assign map[..] with the PFN of the page in our domain with the
@@ -516,9 +632,9 @@ static int xen_blkbk_map(struct blkif_request *req,
 			 * the grant is already mapped
 			 */
 			new_map = false;
+			persistent_gnt->flags |= PERSISTENT_GNT_ACTIVE;
 		} else if (use_persistent_gnts &&
-			   blkif->persistent_gnt_c <
-			   max_mapped_grant_pages(blkif->blk_protocol)) {
+			   blkif->persistent_gnt_c < xen_blkif_max_pgrants) {
 			/*
 			 * We are using persistent grants, the grant is
 			 * not mapped but we have room for it
@@ -536,6 +652,7 @@ static int xen_blkbk_map(struct blkif_request *req,
 			}
 			persistent_gnt->gnt = req->u.rw.seg[i].gref;
 			persistent_gnt->handle = BLKBACK_INVALID_HANDLE;
+			persistent_gnt->flags = PERSISTENT_GNT_ACTIVE;
 
 			pages_to_gnt[segs_to_map] =
 				persistent_gnt->page;
@@ -547,7 +664,7 @@ static int xen_blkbk_map(struct blkif_request *req,
 			blkif->persistent_gnt_c++;
 			pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n",
 				 persistent_gnt->gnt, blkif->persistent_gnt_c,
-				 max_mapped_grant_pages(blkif->blk_protocol));
+				 xen_blkif_max_pgrants);
 		} else {
 			/*
 			 * We are either using persistent grants and
@@ -557,7 +674,7 @@ static int xen_blkbk_map(struct blkif_request *req,
 			if (use_persistent_gnts &&
 				!blkif->vbd.overflow_max_grants) {
 				blkif->vbd.overflow_max_grants = 1;
-				pr_alert(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n",
+				pr_debug(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n",
 					 blkif->domid, blkif->vbd.handle);
 			}
 			new_map = true;
@@ -595,7 +712,6 @@ static int xen_blkbk_map(struct blkif_request *req,
 	 * so that when we access vaddr(pending_req,i) it has the contents of
 	 * the page from the other domain.
 	 */
-	bitmap_zero(pending_req->unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
 	for (i = 0, j = 0; i < nseg; i++) {
 		if (!persistent_gnts[i] ||
 		    persistent_gnts[i]->handle == BLKBACK_INVALID_HANDLE) {
@@ -634,7 +750,6 @@ static int xen_blkbk_map(struct blkif_request *req,
 				(req->u.rw.seg[i].first_sect << 9);
 		} else {
 			pending_handle(pending_req, i) = map[j].handle;
-			bitmap_set(pending_req->unmap_seg, i, 1);
 
 			if (ret) {
 				j++;
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index f338f8a..bd44d75 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -167,11 +167,14 @@ struct xen_vbd {
 
 struct backend_info;
 
+#define PERSISTENT_GNT_ACTIVE	0x1
+#define PERSISTENT_GNT_USED		0x2
 
 struct persistent_gnt {
 	struct page *page;
 	grant_ref_t gnt;
 	grant_handle_t handle;
+	uint8_t flags;
 	struct rb_node node;
 };
 
@@ -204,6 +207,7 @@ struct xen_blkif {
 	/* tree to store persistent grants */
 	struct rb_root		persistent_gnts;
 	unsigned int		persistent_gnt_c;
+	unsigned long		next_lru;
 
 	/* statistics */
 	unsigned long		st_print;
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 5e237f6..abb399a 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -116,6 +116,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 	init_completion(&blkif->drain_complete);
 	atomic_set(&blkif->drain, 0);
 	blkif->st_print = jiffies;
+	blkif->next_lru = jiffies;
 	init_waitqueue_head(&blkif->waiting_to_free);
 	blkif->persistent_gnts.rb_node = NULL;
 
-- 
1.7.7.5 (Apple Git-26)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ