lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1412783736-18115-6-git-send-email-m@bjorling.me>
Date:	Wed,  8 Oct 2014 17:55:36 +0200
From:	Matias Bjørling <m@...rling.me>
To:	thornber@...hat.com, snitzer@...hat.com, hch@...radead.org,
	hayakawa@...inux.co.jp, axboe@...com, andy@...off.com,
	dm-devel@...hat.com, linux-fsdevel@...r.kernel.org,
	linux-kernel@...r.kernel.org, bvanassche@....org,
	linux-nvme@...ts.infradead.org
Cc:	jmad@....dk, Matias Bjørling <m@...rling.me>
Subject: [PATCH 5/5] lightnvm: null_blk integration

Allows the null_blk driver to hook into LightNVM for performance
evaluation. The number of channels exposed to LightNVM can be configured
through the lightnvm_num_channels module parameter.

Contributions in this patch from:

  Jesper Madsen <jmad@....dk>

Signed-off-by: Matias Bjørling <m@...rling.me>
---
 Documentation/block/null_blk.txt |   9 +++
 drivers/block/null_blk.c         | 149 +++++++++++++++++++++++++++++++++++----
 2 files changed, 144 insertions(+), 14 deletions(-)

diff --git a/Documentation/block/null_blk.txt b/Documentation/block/null_blk.txt
index b2830b4..639d378 100644
--- a/Documentation/block/null_blk.txt
+++ b/Documentation/block/null_blk.txt
@@ -14,6 +14,9 @@ The following instances are possible:
   Multi-queue block-layer
     - Request-based.
     - Configurable submission queues per device.
+  LightNVM compatible
+    - Request-based.
+    - Same configuration as the multi-queue block layer.
   No block-layer (Known as bio-based)
     - Bio-based. IO requests are submitted directly to the device driver.
     - Directly accepts bio data structure and returns them.
@@ -28,6 +31,7 @@ queue_mode=[0-2]: Default: 2-Multi-queue
   0: Bio-based.
   1: Single-queue.
   2: Multi-queue.
+  3: LightNVM device with multi-queue.
 
 home_node=[0--nr_nodes]: Default: NUMA_NO_NODE
   Selects what CPU node the data structures are allocated from.
@@ -70,3 +74,8 @@ use_per_node_hctx=[0/1]: Default: 0
      parameter.
   1: The multi-queue block layer is instantiated with a hardware dispatch
      queue for each CPU node in the system.
+
+IV: LightNVM specific parameters
+
+lightnvm_num_channels=[x]: Default: 1
+  Number of LightNVM channels that are exposed to the LightNVM driver.
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 00d469c..2f679b1 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -7,6 +7,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/blk-mq.h>
+#include <linux/lightnvm.h>
 #include <linux/hrtimer.h>
 
 struct nullb_cmd {
@@ -25,6 +26,7 @@ struct nullb_queue {
 	unsigned int queue_depth;
 
 	struct nullb_cmd *cmds;
+	struct nullb *nb;
 };
 
 struct nullb {
@@ -33,6 +35,7 @@ struct nullb {
 	struct request_queue *q;
 	struct gendisk *disk;
 	struct blk_mq_tag_set tag_set;
+	struct nvm_dev *nvm_dev;
 	struct hrtimer timer;
 	unsigned int queue_depth;
 	spinlock_t lock;
@@ -67,6 +70,7 @@ enum {
 	NULL_Q_BIO		= 0,
 	NULL_Q_RQ		= 1,
 	NULL_Q_MQ		= 2,
+	NULL_Q_LIGHTNVM		= 4,
 };
 
 static int submit_queues;
@@ -79,7 +83,7 @@ MODULE_PARM_DESC(home_node, "Home node for the device");
 
 static int queue_mode = NULL_Q_MQ;
 module_param(queue_mode, int, S_IRUGO);
-MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)");
+MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue,4=lightnvm)");
 
 static int gb = 250;
 module_param(gb, int, S_IRUGO);
@@ -109,6 +113,10 @@ static bool use_per_node_hctx = false;
 module_param(use_per_node_hctx, bool, S_IRUGO);
 MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
 
+static int lightnvm_num_channels = 1;
+module_param(lightnvm_num_channels, int, S_IRUGO);
+MODULE_PARM_DESC(lightnvm_num_channels, "Number of channels to be exposed to LightNVM. Default: 1");
+
 static void put_tag(struct nullb_queue *nq, unsigned int tag)
 {
 	clear_bit_unlock(tag, nq->tag_map);
@@ -179,6 +187,9 @@ static void end_cmd(struct nullb_cmd *cmd)
 	case NULL_Q_MQ:
 		blk_mq_end_io(cmd->rq, 0);
 		return;
+	case NULL_Q_LIGHTNVM:
+		nvm_end_io(cmd->nq->nb->nvm_dev, cmd->rq, 0);
+		return;
 	case NULL_Q_RQ:
 		INIT_LIST_HEAD(&cmd->rq->queuelist);
 		blk_end_request_all(cmd->rq, 0);
@@ -227,7 +238,7 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd)
 
 static void null_softirq_done_fn(struct request *rq)
 {
-	if (queue_mode == NULL_Q_MQ)
+	if (queue_mode & (NULL_Q_MQ|NULL_Q_LIGHTNVM))
 		end_cmd(blk_mq_rq_to_pdu(rq));
 	else
 		end_cmd(rq->special);
@@ -239,6 +250,7 @@ static inline void null_handle_cmd(struct nullb_cmd *cmd)
 	switch (irqmode) {
 	case NULL_IRQ_SOFTIRQ:
 		switch (queue_mode)  {
+		case NULL_Q_LIGHTNVM:
 		case NULL_Q_MQ:
 			blk_mq_complete_request(cmd->rq);
 			break;
@@ -313,14 +325,67 @@ static void null_request_fn(struct request_queue *q)
 	}
 }
 
+static int null_nvm_id(struct nvm_dev *dev, struct nvm_id *nvm_id)
+{
+	nvm_id->ver_id = 0x1;
+	nvm_id->nvm_type = NVM_NVMT_BLK;
+	nvm_id->nchannels = lightnvm_num_channels;
+	return 0;
+}
+
+static int null_nvm_id_chnl(struct nvm_dev *dev, int chnl_num,
+							struct nvm_id_chnl *ic)
+{
+	sector_t size = gb * 1024 * 1024 * 1024ULL;
+
+	sector_div(size, bs);
+	ic->queue_size = hw_queue_depth;
+	ic->gran_read = bs;
+	ic->gran_write = bs;
+	ic->gran_erase = bs * 256;
+	ic->oob_size = 0;
+	ic->t_r = ic->t_sqr = 25000; /* 25us */
+	ic->t_w = ic->t_sqw = 500000; /* 500us */
+	ic->t_e = 1500000; /* 1.500us */
+	ic->io_sched = NVM_IOSCHED_CHANNEL;
+	ic->laddr_begin = 0;
+	ic->laddr_end = size / 8;
+
+	return 0;
+}
+
+static int null_nvm_get_features(struct nvm_dev *dev,
+						struct nvm_get_features *gf)
+{
+	gf->rsp[0] = (1 << NVM_RSP_L2P);
+	gf->rsp[0] |= (1 << NVM_RSP_P2L);
+	gf->rsp[0] |= (1 << NVM_RSP_GC);
+	return 0;
+}
+
+static int null_nvm_set_rsp(struct nvm_dev *dev, u8 rsp, u8 val)
+{
+	return NVM_RID_NOT_CHANGEABLE | NVM_DNR;
+}
+
 static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq)
 {
 	struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
+	struct nullb_queue *nq = hctx->driver_data;
+	struct nvm_dev *nvm_dev = nq->nb->nvm_dev;
+	int ret = BLK_MQ_RQ_QUEUE_OK;
+
+	if (nvm_dev) {
+		ret = nvm_queue_rq(nvm_dev, rq);
+		if (ret)
+			goto out;
+	}
 
 	cmd->rq = rq;
-	cmd->nq = hctx->driver_data;
+	cmd->nq = nq;
 
 	null_handle_cmd(cmd);
+out:
 	return BLK_MQ_RQ_QUEUE_OK;
 }
 
@@ -331,6 +396,7 @@ static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
 
 	init_waitqueue_head(&nq->wait);
 	nq->queue_depth = nullb->queue_depth;
+	nq->nb = nullb;
 }
 
 static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
@@ -346,6 +412,13 @@ static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 	return 0;
 }
 
+static struct lightnvm_dev_ops null_nvm_dev_ops = {
+	.identify		= null_nvm_id,
+	.identify_channel	= null_nvm_id_chnl,
+	.get_features		= null_nvm_get_features,
+	.set_responsibility	= null_nvm_set_rsp,
+};
+
 static struct blk_mq_ops null_mq_ops = {
 	.queue_rq       = null_queue_rq,
 	.map_queue      = blk_mq_map_queue,
@@ -359,8 +432,11 @@ static void null_del_dev(struct nullb *nullb)
 
 	del_gendisk(nullb->disk);
 	blk_cleanup_queue(nullb->q);
-	if (queue_mode == NULL_Q_MQ)
+	if (queue_mode & (NULL_Q_MQ|NULL_Q_LIGHTNVM)) {
+		if (queue_mode == NULL_Q_LIGHTNVM)
+			nvm_remove_sysfs(nullb->disk->private_data);
 		blk_mq_free_tag_set(&nullb->tag_set);
+	}
 	put_disk(nullb->disk);
 	kfree(nullb);
 }
@@ -374,10 +450,26 @@ static void null_release(struct gendisk *disk, fmode_t mode)
 {
 }
 
+static int null_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
+							unsigned long arg)
+{
+	struct nullb *nullb = bdev->bd_disk->private_data;
+	int ret;
+
+	if (nullb->nvm_dev) {
+		ret = nvm_ioctl(nullb->nvm_dev, mode, cmd, arg);
+		if (ret != -ENOTTY)
+			return ret;
+	}
+
+	return -ENOTTY;
+};
+
 static const struct block_device_operations null_fops = {
 	.owner =	THIS_MODULE,
 	.open =		null_open,
 	.release =	null_release,
+	.ioctl =	null_ioctl,
 };
 
 static int setup_commands(struct nullb_queue *nq)
@@ -461,6 +553,7 @@ static int null_add_dev(void)
 {
 	struct gendisk *disk;
 	struct nullb *nullb;
+	struct nvm_dev *nvm_dev = NULL;
 	sector_t size;
 	int rv;
 
@@ -472,14 +565,14 @@ static int null_add_dev(void)
 
 	spin_lock_init(&nullb->lock);
 
-	if (queue_mode == NULL_Q_MQ && use_per_node_hctx)
+	if ((queue_mode & (NULL_Q_MQ|NULL_Q_LIGHTNVM)) && use_per_node_hctx)
 		submit_queues = nr_online_nodes;
 
 	rv = setup_queues(nullb);
 	if (rv)
 		goto out_free_nullb;
 
-	if (queue_mode == NULL_Q_MQ) {
+	if (queue_mode & (NULL_Q_MQ|NULL_Q_LIGHTNVM)) {
 		nullb->tag_set.ops = &null_mq_ops;
 		nullb->tag_set.nr_hw_queues = submit_queues;
 		nullb->tag_set.queue_depth = hw_queue_depth;
@@ -497,6 +590,18 @@ static int null_add_dev(void)
 			rv = -ENOMEM;
 			goto out_cleanup_tags;
 		}
+
+		if (queue_mode == NULL_Q_LIGHTNVM) {
+			nvm_dev = nvm_alloc();
+			if (!nvm_dev)
+				goto out_cleanup_tags;
+
+			nvm_dev->ops = &null_nvm_dev_ops;
+			nvm_dev->driver_data = nullb;
+
+			nvm_dev->drv_cmd_size = nullb->tag_set.cmd_size;
+			nullb->tag_set.cmd_size += nvm_cmd_size();
+		}
 	} else if (queue_mode == NULL_Q_BIO) {
 		nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node);
 		if (!nullb->q) {
@@ -517,6 +622,7 @@ static int null_add_dev(void)
 	}
 
 	nullb->q->queuedata = nullb;
+
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q);
 
 	disk = nullb->disk = alloc_disk_node(1, home_node);
@@ -525,11 +631,6 @@ static int null_add_dev(void)
 		goto out_cleanup_blk_queue;
 	}
 
-	mutex_lock(&lock);
-	list_add_tail(&nullb->list, &nullb_list);
-	nullb->index = nullb_indexes++;
-	mutex_unlock(&lock);
-
 	blk_queue_logical_block_size(nullb->q, bs);
 	blk_queue_physical_block_size(nullb->q, bs);
 
@@ -540,17 +641,37 @@ static int null_add_dev(void)
 	disk->flags |= GENHD_FL_EXT_DEVT;
 	disk->major		= null_major;
 	disk->first_minor	= nullb->index;
-	disk->fops		= &null_fops;
 	disk->private_data	= nullb;
+	disk->fops		= &null_fops;
 	disk->queue		= nullb->q;
+
+	if (nvm_dev) {
+		nvm_dev->q = nullb->q;
+		nvm_dev->disk = disk;
+
+		if (nvm_init(disk, nvm_dev))
+			goto out_cleanup_nvm;
+
+		nullb->nvm_dev = nvm_dev;
+	}
+
+	mutex_lock(&lock);
+	list_add_tail(&nullb->list, &nullb_list);
+	nullb->index = nullb_indexes++;
+	mutex_unlock(&lock);
+
 	sprintf(disk->disk_name, "nullb%d", nullb->index);
 	add_disk(disk);
+	nvm_add_sysfs(nvm_dev);
 	return 0;
 
+out_cleanup_nvm:
+	put_disk(disk);
 out_cleanup_blk_queue:
 	blk_cleanup_queue(nullb->q);
 out_cleanup_tags:
-	if (queue_mode == NULL_Q_MQ)
+	nvm_free(nvm_dev);
+	if (queue_mode & (NULL_Q_MQ|NULL_Q_LIGHTNVM))
 		blk_mq_free_tag_set(&nullb->tag_set);
 out_cleanup_queues:
 	cleanup_queues(nullb);
@@ -570,7 +691,7 @@ static int __init null_init(void)
 		bs = PAGE_SIZE;
 	}
 
-	if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
+	if (queue_mode & (NULL_Q_MQ|NULL_Q_LIGHTNVM) && use_per_node_hctx) {
 		if (submit_queues < nr_online_nodes) {
 			pr_warn("null_blk: submit_queues param is set to %u.",
 							nr_online_nodes);
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ