lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20171220172205.26464-18-m@bjorling.me>
Date:   Wed, 20 Dec 2017 18:21:57 +0100
From:   Matias Bjørling <m@...rling.me>
To:     linux-block@...r.kernel.org, linux-kernel@...r.kernel.org
Cc:     Javier González <javier@...xlabs.com>,
        Hans Holmberg <hans.holmberg@...xlabs.com>,
        Matias Bjørling <m@...rling.me>
Subject: [PATCH 17/25] lightnvm: pblk: use exact free block counter in RL

From: Javier González <javier@...xlabs.com>

Until now, pblk's rate-limiter has used a heuristic to reserve space for
GC I/O given that the over-provision area was fixed.

In preparation for allowing to define the over-provision area on target
creation, define a dedicated free_block counter in the rate-limiter to
track the number of blocks being used for user data.

Signed-off-by: Javier González <javier@...xlabs.com>
Signed-off-by: Hans Holmberg <hans.holmberg@...xlabs.com>
Signed-off-by: Matias Bjørling <m@...rling.me>
---
 drivers/lightnvm/pblk-core.c     | 19 +++++---------
 drivers/lightnvm/pblk-init.c     | 18 +++++++++++---
 drivers/lightnvm/pblk-recovery.c |  4 +--
 drivers/lightnvm/pblk-rl.c       | 54 +++++++++++++++++++++++++++-------------
 drivers/lightnvm/pblk-sysfs.c    |  9 ++++---
 drivers/lightnvm/pblk.h          | 15 ++++++-----
 6 files changed, 73 insertions(+), 46 deletions(-)

diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 9ebc60c..22cdafd 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -1145,7 +1145,7 @@ int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line)
 	}
 	spin_unlock(&l_mg->free_lock);
 
-	pblk_rl_free_lines_dec(&pblk->rl, line);
+	pblk_rl_free_lines_dec(&pblk->rl, line, true);
 
 	if (!pblk_line_init_bb(pblk, line, 0)) {
 		list_add(&line->list, &l_mg->free_list);
@@ -1233,7 +1233,7 @@ static struct pblk_line *pblk_line_retry(struct pblk *pblk,
 	l_mg->data_line = retry_line;
 	spin_unlock(&l_mg->free_lock);
 
-	pblk_rl_free_lines_dec(&pblk->rl, retry_line);
+	pblk_rl_free_lines_dec(&pblk->rl, line, false);
 
 	if (pblk_line_erase(pblk, retry_line))
 		goto retry;
@@ -1252,7 +1252,6 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
 {
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct pblk_line *line;
-	int is_next = 0;
 
 	spin_lock(&l_mg->free_lock);
 	line = pblk_line_get(pblk);
@@ -1280,7 +1279,6 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
 	} else {
 		l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
 		l_mg->data_next->type = PBLK_LINETYPE_DATA;
-		is_next = 1;
 	}
 	spin_unlock(&l_mg->free_lock);
 
@@ -1290,10 +1288,6 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
 			return NULL;
 	}
 
-	pblk_rl_free_lines_dec(&pblk->rl, line);
-	if (is_next)
-		pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
-
 retry_setup:
 	if (!pblk_line_init_metadata(pblk, line, NULL)) {
 		line = pblk_line_retry(pblk, line);
@@ -1311,6 +1305,8 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
 		goto retry_setup;
 	}
 
+	pblk_rl_free_lines_dec(&pblk->rl, line, true);
+
 	return line;
 }
 
@@ -1395,7 +1391,6 @@ struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct pblk_line *cur, *new = NULL;
 	unsigned int left_seblks;
-	int is_next = 0;
 
 	cur = l_mg->data_line;
 	new = l_mg->data_next;
@@ -1444,6 +1439,8 @@ struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
 		goto retry_setup;
 	}
 
+	pblk_rl_free_lines_dec(&pblk->rl, new, true);
+
 	/* Allocate next line for preparation */
 	spin_lock(&l_mg->free_lock);
 	l_mg->data_next = pblk_line_get(pblk);
@@ -1457,13 +1454,9 @@ struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
 	} else {
 		l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
 		l_mg->data_next->type = PBLK_LINETYPE_DATA;
-		is_next = 1;
 	}
 	spin_unlock(&l_mg->free_lock);
 
-	if (is_next)
-		pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
-
 out:
 	return new;
 }
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index 2af1b0a..e939f0a 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -579,22 +579,34 @@ static unsigned int calc_emeta_len(struct pblk *pblk)
 static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct pblk_line_meta *lm = &pblk->lm;
 	struct nvm_geo *geo = &dev->geo;
 	sector_t provisioned;
+	int sec_meta, blk_meta;
 
-	pblk->over_pct = 20;
+	pblk->op = 20;
 
 	provisioned = nr_free_blks;
-	provisioned *= (100 - pblk->over_pct);
+	provisioned *= (100 - pblk->op);
 	sector_div(provisioned, 100);
 
+	pblk->op_blks = nr_free_blks - provisioned;
+
 	/* Internally pblk manages all free blocks, but all calculations based
 	 * on user capacity consider only provisioned blocks
 	 */
 	pblk->rl.total_blocks = nr_free_blks;
 	pblk->rl.nr_secs = nr_free_blks * geo->sec_per_chk;
-	pblk->capacity = provisioned * geo->sec_per_chk;
+
+	/* Consider sectors used for metadata */
+	sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
+	blk_meta = DIV_ROUND_UP(sec_meta, geo->sec_per_chk);
+
+	pblk->capacity = (provisioned - blk_meta) * geo->sec_per_chk;
+
 	atomic_set(&pblk->rl.free_blocks, nr_free_blks);
+	atomic_set(&pblk->rl.free_user_blocks, nr_free_blks);
 }
 
 static int pblk_lines_alloc_metadata(struct pblk *pblk)
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index 39a2e19..fd38036 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -989,10 +989,8 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
 	}
 	spin_unlock(&l_mg->free_lock);
 
-	if (is_next) {
+	if (is_next)
 		pblk_line_erase(pblk, l_mg->data_next);
-		pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
-	}
 
 out:
 	if (found_lines != recovered_lines)
diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c
index abae31f..861572c 100644
--- a/drivers/lightnvm/pblk-rl.c
+++ b/drivers/lightnvm/pblk-rl.c
@@ -89,17 +89,15 @@ unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl)
 	return atomic_read(&rl->free_blocks);
 }
 
-/*
- * We check for (i) the number of free blocks in the current LUN and (ii) the
- * total number of free blocks in the pblk instance. This is to even out the
- * number of free blocks on each LUN when GC kicks in.
- *
- * Only the total number of free blocks is used to configure the rate limiter.
- */
-void pblk_rl_update_rates(struct pblk_rl *rl)
+unsigned long pblk_rl_nr_user_free_blks(struct pblk_rl *rl)
+{
+	return atomic_read(&rl->free_user_blocks);
+}
+
+static void __pblk_rl_update_rates(struct pblk_rl *rl,
+				   unsigned long free_blocks)
 {
 	struct pblk *pblk = container_of(rl, struct pblk, rl);
-	unsigned long free_blocks = pblk_rl_nr_free_blks(rl);
 	int max = rl->rb_budget;
 
 	if (free_blocks >= rl->high) {
@@ -132,20 +130,37 @@ void pblk_rl_update_rates(struct pblk_rl *rl)
 		pblk_gc_should_stop(pblk);
 }
 
+void pblk_rl_update_rates(struct pblk_rl *rl)
+{
+	__pblk_rl_update_rates(rl, pblk_rl_nr_user_free_blks(rl));
+}
+
 void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
 {
 	int blk_in_line = atomic_read(&line->blk_in_line);
+	int free_blocks;
 
 	atomic_add(blk_in_line, &rl->free_blocks);
-	pblk_rl_update_rates(rl);
+	free_blocks = atomic_add_return(blk_in_line, &rl->free_user_blocks);
+
+	__pblk_rl_update_rates(rl, free_blocks);
 }
 
-void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line)
+void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line,
+			    bool used)
 {
 	int blk_in_line = atomic_read(&line->blk_in_line);
+	int free_blocks;
 
 	atomic_sub(blk_in_line, &rl->free_blocks);
-	pblk_rl_update_rates(rl);
+
+	if (used)
+		free_blocks = atomic_sub_return(blk_in_line,
+							&rl->free_user_blocks);
+	else
+		free_blocks = atomic_read(&rl->free_user_blocks);
+
+	__pblk_rl_update_rates(rl, free_blocks);
 }
 
 int pblk_rl_high_thrs(struct pblk_rl *rl)
@@ -174,17 +189,22 @@ void pblk_rl_free(struct pblk_rl *rl)
 void pblk_rl_init(struct pblk_rl *rl, int budget)
 {
 	struct pblk *pblk = container_of(rl, struct pblk, rl);
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct pblk_line_meta *lm = &pblk->lm;
 	int min_blocks = lm->blk_per_line * PBLK_GC_RSV_LINE;
+	int sec_meta, blk_meta;
+
 	unsigned int rb_windows;
 
-	rl->high = rl->total_blocks / PBLK_USER_HIGH_THRS;
+	/* Consider sectors used for metadata */
+	sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
+	blk_meta = DIV_ROUND_UP(sec_meta, geo->sec_per_chk);
+
+	rl->high = pblk->op_blks - blk_meta - lm->blk_per_line;
 	rl->high_pw = get_count_order(rl->high);
 
-	rl->low = rl->total_blocks / PBLK_USER_LOW_THRS;
-	if (rl->low < min_blocks)
-		rl->low = min_blocks;
-
 	rl->rsv_blocks = min_blocks;
 
 	/* This will always be a power-of-2 */
diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c
index 5cee2ac..620bab8 100644
--- a/drivers/lightnvm/pblk-sysfs.c
+++ b/drivers/lightnvm/pblk-sysfs.c
@@ -49,11 +49,12 @@ static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page)
 
 static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page)
 {
-	int free_blocks, total_blocks;
+	int free_blocks, free_user_blocks, total_blocks;
 	int rb_user_max, rb_user_cnt;
 	int rb_gc_max, rb_gc_cnt, rb_budget, rb_state;
 
-	free_blocks = atomic_read(&pblk->rl.free_blocks);
+	free_blocks = pblk_rl_nr_free_blks(&pblk->rl);
+	free_user_blocks = pblk_rl_nr_user_free_blks(&pblk->rl);
 	rb_user_max = pblk->rl.rb_user_max;
 	rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt);
 	rb_gc_max = pblk->rl.rb_gc_max;
@@ -64,16 +65,16 @@ static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page)
 	total_blocks = pblk->rl.total_blocks;
 
 	return snprintf(page, PAGE_SIZE,
-		"u:%u/%u,gc:%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n",
+		"u:%u/%u,gc:%u/%u(%u)(stop:<%u,full:>%u,free:%d/%d/%d)-%d\n",
 				rb_user_cnt,
 				rb_user_max,
 				rb_gc_cnt,
 				rb_gc_max,
 				rb_state,
 				rb_budget,
-				pblk->rl.low,
 				pblk->rl.high,
 				free_blocks,
+				free_user_blocks,
 				total_blocks,
 				READ_ONCE(pblk->rl.rb_user_active));
 }
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 6f5fbd1..9a21d36 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -252,9 +252,6 @@ struct pblk_rl {
 	unsigned int high;	/* Upper threshold for rate limiter (free run -
 				 * user I/O rate limiter
 				 */
-	unsigned int low;	/* Lower threshold for rate limiter (user I/O
-				 * rate limiter - stall)
-				 */
 	unsigned int high_pw;	/* High rounded up as a power of 2 */
 
 #define PBLK_USER_HIGH_THRS 8	/* Begin write limit at 12% available blks */
@@ -288,7 +285,9 @@ struct pblk_rl {
 
 	unsigned long long nr_secs;
 	unsigned long total_blocks;
-	atomic_t free_blocks;
+
+	atomic_t free_blocks;		/* Total number of free blocks (+ OP) */
+	atomic_t free_user_blocks;	/* Number of user free blocks (no OP) */
 };
 
 #define PBLK_LINE_EMPTY (~0U)
@@ -579,7 +578,9 @@ struct pblk {
 			    */
 
 	sector_t capacity; /* Device capacity when bad blocks are subtracted */
-	int over_pct;      /* Percentage of device used for over-provisioning */
+
+	int op;      /* Percentage of device used for over-provisioning */
+	int op_blks; /* Number of blocks used for over-provisioning */
 
 	/* pblk provisioning values. Used by rate limiter */
 	struct pblk_rl rl;
@@ -839,6 +840,7 @@ void pblk_rl_free(struct pblk_rl *rl);
 void pblk_rl_update_rates(struct pblk_rl *rl);
 int pblk_rl_high_thrs(struct pblk_rl *rl);
 unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl);
+unsigned long pblk_rl_nr_user_free_blks(struct pblk_rl *rl);
 int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries);
 void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries);
 void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries);
@@ -847,7 +849,8 @@ void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries);
 void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc);
 int pblk_rl_max_io(struct pblk_rl *rl);
 void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line);
-void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line);
+void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line,
+			    bool used);
 int pblk_rl_is_limit(struct pblk_rl *rl);
 
 /*
-- 
2.9.3

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ