[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20171013124647.32668-37-m@bjorling.me>
Date: Fri, 13 Oct 2017 14:46:25 +0200
From: Matias Bjørling <m@...rling.me>
To: axboe@...com
Cc: linux-block@...r.kernel.org, linux-kernel@...r.kernel.org,
Javier González <javier@...xlabs.com>,
Matias Bjørling <m@...rling.me>
Subject: [GIT PULL 36/58] lightnvm: pblk: remove I/O dependency on write path
From: Javier González <javier@...xlabs.com>
pblk schedules user I/O, metadata I/O and erases on the write path in
order to minimize collisions at the media level. Until now, there has
been a dependency between user and metadata I/Os that could lead to a
deadlock as both take the per-LUN semaphore to schedule submission.
This path removes this dependency and guarantees forward progress at a
per I/O granurality.
Signed-off-by: Javier González <javier@...xlabs.com>
Signed-off-by: Matias Bjørling <m@...rling.me>
---
drivers/lightnvm/pblk-write.c | 145 +++++++++++++++++++-----------------------
1 file changed, 65 insertions(+), 80 deletions(-)
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index f2e846f..6c1cafa 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -220,15 +220,16 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
}
static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
- struct pblk_c_ctx *c_ctx, struct ppa_addr *erase_ppa)
+ struct ppa_addr *erase_ppa)
{
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line *e_line = pblk_line_get_erase(pblk);
+ struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
unsigned int valid = c_ctx->nr_valid;
unsigned int padded = c_ctx->nr_padded;
unsigned int nr_secs = valid + padded;
unsigned long *lun_bitmap;
- int ret = 0;
+ int ret;
lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL);
if (!lun_bitmap)
@@ -294,55 +295,6 @@ static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
return secs_to_sync;
}
-static inline int pblk_valid_meta_ppa(struct pblk *pblk,
- struct pblk_line *meta_line,
- struct ppa_addr *ppa_list, int nr_ppas)
-{
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
- struct pblk_line *data_line;
- struct ppa_addr ppa, ppa_opt;
- u64 paddr;
- int i;
-
- data_line = &pblk->lines[pblk_dev_ppa_to_line(ppa_list[0])];
- paddr = pblk_lookup_page(pblk, meta_line);
- ppa = addr_to_gen_ppa(pblk, paddr, 0);
-
- if (test_bit(pblk_ppa_to_pos(geo, ppa), data_line->blk_bitmap))
- return 1;
-
- /* Schedule a metadata I/O that is half the distance from the data I/O
- * with regards to the number of LUNs forming the pblk instance. This
- * balances LUN conflicts across every I/O.
- *
- * When the LUN configuration changes (e.g., due to GC), this distance
- * can align, which would result on a LUN deadlock. In this case, modify
- * the distance to not be optimal, but allow metadata I/Os to succeed.
- */
- ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0);
- if (unlikely(ppa_opt.ppa == ppa.ppa)) {
- data_line->meta_distance--;
- return 0;
- }
-
- for (i = 0; i < nr_ppas; i += pblk->min_write_pgs)
- if (ppa_list[i].g.ch == ppa_opt.g.ch &&
- ppa_list[i].g.lun == ppa_opt.g.lun)
- return 1;
-
- if (test_bit(pblk_ppa_to_pos(geo, ppa_opt), data_line->blk_bitmap)) {
- for (i = 0; i < nr_ppas; i += pblk->min_write_pgs)
- if (ppa_list[i].g.ch == ppa.g.ch &&
- ppa_list[i].g.lun == ppa.g.lun)
- return 0;
-
- return 1;
- }
-
- return 0;
-}
-
int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
{
struct nvm_tgt_dev *dev = pblk->dev;
@@ -421,8 +373,44 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
return ret;
}
-static int pblk_sched_meta_io(struct pblk *pblk, struct ppa_addr *prev_list,
- int prev_n)
+static inline bool pblk_valid_meta_ppa(struct pblk *pblk,
+ struct pblk_line *meta_line,
+ struct nvm_rq *data_rqd)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct nvm_geo *geo = &dev->geo;
+ struct pblk_c_ctx *data_c_ctx = nvm_rq_to_pdu(data_rqd);
+ struct pblk_line *data_line = pblk_line_get_data(pblk);
+ struct ppa_addr ppa, ppa_opt;
+ u64 paddr;
+ int pos_opt;
+
+ /* Schedule a metadata I/O that is half the distance from the data I/O
+ * with regards to the number of LUNs forming the pblk instance. This
+ * balances LUN conflicts across every I/O.
+ *
+ * When the LUN configuration changes (e.g., due to GC), this distance
+ * can align, which would result on metadata and data I/Os colliding. In
+ * this case, modify the distance to not be optimal, but move the
+ * optimal in the right direction.
+ */
+ paddr = pblk_lookup_page(pblk, meta_line);
+ ppa = addr_to_gen_ppa(pblk, paddr, 0);
+ ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0);
+ pos_opt = pblk_ppa_to_pos(geo, ppa_opt);
+
+ if (test_bit(pos_opt, data_c_ctx->lun_bitmap) ||
+ test_bit(pos_opt, data_line->blk_bitmap))
+ return true;
+
+ if (unlikely(pblk_ppa_comp(ppa_opt, ppa)))
+ data_line->meta_distance--;
+
+ return false;
+}
+
+static struct pblk_line *pblk_should_submit_meta_io(struct pblk *pblk,
+ struct nvm_rq *data_rqd)
{
struct pblk_line_meta *lm = &pblk->lm;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
@@ -432,57 +420,45 @@ static int pblk_sched_meta_io(struct pblk *pblk, struct ppa_addr *prev_list,
retry:
if (list_empty(&l_mg->emeta_list)) {
spin_unlock(&l_mg->close_lock);
- return 0;
+ return NULL;
}
meta_line = list_first_entry(&l_mg->emeta_list, struct pblk_line, list);
if (meta_line->emeta->mem >= lm->emeta_len[0])
goto retry;
spin_unlock(&l_mg->close_lock);
- if (!pblk_valid_meta_ppa(pblk, meta_line, prev_list, prev_n))
- return 0;
+ if (!pblk_valid_meta_ppa(pblk, meta_line, data_rqd))
+ return NULL;
- return pblk_submit_meta_io(pblk, meta_line);
+ return meta_line;
}
static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd)
{
- struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
struct ppa_addr erase_ppa;
+ struct pblk_line *meta_line;
int err;
ppa_set_empty(&erase_ppa);
/* Assign lbas to ppas and populate request structure */
- err = pblk_setup_w_rq(pblk, rqd, c_ctx, &erase_ppa);
+ err = pblk_setup_w_rq(pblk, rqd, &erase_ppa);
if (err) {
pr_err("pblk: could not setup write request: %d\n", err);
return NVM_IO_ERR;
}
- if (likely(ppa_empty(erase_ppa))) {
- /* Submit metadata write for previous data line */
- err = pblk_sched_meta_io(pblk, rqd->ppa_list, rqd->nr_ppas);
- if (err) {
- pr_err("pblk: metadata I/O submission failed: %d", err);
- return NVM_IO_ERR;
- }
+ meta_line = pblk_should_submit_meta_io(pblk, rqd);
- /* Submit data write for current data line */
- err = pblk_submit_io(pblk, rqd);
- if (err) {
- pr_err("pblk: data I/O submission failed: %d\n", err);
- return NVM_IO_ERR;
- }
- } else {
- /* Submit data write for current data line */
- err = pblk_submit_io(pblk, rqd);
- if (err) {
- pr_err("pblk: data I/O submission failed: %d\n", err);
- return NVM_IO_ERR;
- }
+ /* Submit data write for current data line */
+ err = pblk_submit_io(pblk, rqd);
+ if (err) {
+ pr_err("pblk: data I/O submission failed: %d\n", err);
+ return NVM_IO_ERR;
+ }
- /* Submit available erase for next data line */
+ if (!ppa_empty(erase_ppa)) {
+ /* Submit erase for next data line */
if (pblk_blk_erase_async(pblk, erase_ppa)) {
struct pblk_line *e_line = pblk_line_get_erase(pblk);
struct nvm_tgt_dev *dev = pblk->dev;
@@ -495,6 +471,15 @@ static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd)
}
}
+ if (meta_line) {
+ /* Submit metadata write for previous data line */
+ err = pblk_submit_meta_io(pblk, meta_line);
+ if (err) {
+ pr_err("pblk: metadata I/O submission failed: %d", err);
+ return NVM_IO_ERR;
+ }
+ }
+
return NVM_IO_OK;
}
--
2.9.3
Powered by blists - more mailing lists