[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <1442851550-25249-5-git-send-email-m@bjorling.me>
Date: Mon, 21 Sep 2015 18:05:49 +0200
From: Matias Bjørling <m@...rling.me>
To: hch@...radead.org, axboe@...com, linux-fsdevel@...r.kernel.org,
linux-kernel@...r.kernel.org, linux-nvme@...ts.infradead.org
Cc: jg@...htnvm.io, Stephen.Bates@...s.com, keith.busch@...el.com,
Matias Bjørling <m@...rling.me>
Subject: [PATCH v8 4/5] null_nvm: LightNVM test driver
This driver implements the I/O flow for a LightNVM device driver. It
does no transfers. It can be used to test setup/teardown of devices and
evaluating performance of block managers and targets.
The framework of the driver is derived from the null_blk module.
Signed-off-by: Matias Bjørling <m@...rling.me>
---
drivers/lightnvm/Kconfig | 6 +
drivers/lightnvm/Makefile | 1 +
drivers/lightnvm/null_nvm.c | 468 ++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 475 insertions(+)
create mode 100644 drivers/lightnvm/null_nvm.c
diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig
index 21a271e..d34ba6a 100644
--- a/drivers/lightnvm/Kconfig
+++ b/drivers/lightnvm/Kconfig
@@ -39,4 +39,10 @@ config NVM_RRPC
host. The target is implemented using a linear mapping table and
cost-based garbage collection. It is optimized for 4K IO sizes.
+config NVM_NULL_NVM
+ tristate "Null test LightNVM driver"
+ ---help---
+ The null test driver can be used to evaluate targets without an
+ underlying device.
+
endif # NVM
diff --git a/drivers/lightnvm/Makefile b/drivers/lightnvm/Makefile
index 1b7c00b..145ee5d 100644
--- a/drivers/lightnvm/Makefile
+++ b/drivers/lightnvm/Makefile
@@ -5,3 +5,4 @@
obj-$(CONFIG_NVM) := core.o
obj-$(CONFIG_NVM_BM_HB) += bm_hb.o
obj-$(CONFIG_NVM_RRPC) += rrpc.o
+obj-$(CONFIG_NVM_NULL_NVM) += null_nvm.o
diff --git a/drivers/lightnvm/null_nvm.c b/drivers/lightnvm/null_nvm.c
new file mode 100644
index 0000000..6fb80dd
--- /dev/null
+++ b/drivers/lightnvm/null_nvm.c
@@ -0,0 +1,468 @@
+/*
+ * derived from Jens Axboe's block/null_blk.c
+ */
+
+#include <linux/module.h>
+
+#include <linux/moduleparam.h>
+#include <linux/sched.h>
+#include <linux/blkdev.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/blk-mq.h>
+#include <linux/hrtimer.h>
+#include <linux/lightnvm.h>
+
+static struct kmem_cache *ppa_cache;
+struct nulln_cmd {
+ struct llist_node ll_list;
+ struct request *rq;
+};
+
+struct nulln {
+ struct list_head list;
+ unsigned int index;
+ struct request_queue *q;
+ struct blk_mq_tag_set tag_set;
+ struct hrtimer timer;
+ char disk_name[DISK_NAME_LEN];
+};
+
+static LIST_HEAD(nulln_list);
+static struct mutex nulln_lock;
+static int nulln_indexes;
+
+struct completion_queue {
+ struct llist_head list;
+ struct hrtimer timer;
+};
+
+/*
+ * These are per-cpu for now, they will need to be configured by the
+ * complete_queues parameter and appropriately mapped.
+ */
+static DEFINE_PER_CPU(struct completion_queue, completion_queues);
+
+enum {
+ NULL_IRQ_NONE = 0,
+ NULL_IRQ_SOFTIRQ = 1,
+ NULL_IRQ_TIMER = 2,
+};
+
+static int submit_queues;
+module_param(submit_queues, int, S_IRUGO);
+MODULE_PARM_DESC(submit_queues, "Number of submission queues");
+
+static int home_node = NUMA_NO_NODE;
+module_param(home_node, int, S_IRUGO);
+MODULE_PARM_DESC(home_node, "Home node for the device");
+
+static int null_param_store_val(const char *str, int *val, int min, int max)
+{
+ int ret, new_val;
+
+ ret = kstrtoint(str, 10, &new_val);
+ if (ret)
+ return -EINVAL;
+
+ if (new_val < min || new_val > max)
+ return -EINVAL;
+
+ *val = new_val;
+ return 0;
+}
+
+static int gb = 250;
+module_param(gb, int, S_IRUGO);
+MODULE_PARM_DESC(gb, "Size in GB");
+
+static int bs = 4096;
+module_param(bs, int, S_IRUGO);
+MODULE_PARM_DESC(bs, "Block size (in bytes)");
+
+static int nr_devices = 1;
+module_param(nr_devices, int, S_IRUGO);
+MODULE_PARM_DESC(nr_devices, "Number of devices to register");
+
+static int irqmode = NULL_IRQ_SOFTIRQ;
+
+static int null_set_irqmode(const char *str, const struct kernel_param *kp)
+{
+ return null_param_store_val(str, &irqmode, NULL_IRQ_NONE,
+ NULL_IRQ_TIMER);
+}
+
+static const struct kernel_param_ops null_irqmode_param_ops = {
+ .set = null_set_irqmode,
+ .get = param_get_int,
+};
+
+device_param_cb(irqmode, &null_irqmode_param_ops, &irqmode, S_IRUGO);
+MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
+
+static int completion_nsec = 10000;
+module_param(completion_nsec, int, S_IRUGO);
+MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
+
+static int hw_queue_depth = 64;
+module_param(hw_queue_depth, int, S_IRUGO);
+MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64");
+
+static bool use_per_node_hctx;
+module_param(use_per_node_hctx, bool, S_IRUGO);
+MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
+
+static int num_channels = 1;
+module_param(num_channels, int, S_IRUGO);
+MODULE_PARM_DESC(num_channels, "Number of channels to be exposed. Default: 1");
+
+static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
+{
+ struct completion_queue *cq;
+ struct llist_node *entry;
+ struct nulln_cmd *cmd;
+
+ cq = &per_cpu(completion_queues, smp_processor_id());
+
+ while ((entry = llist_del_all(&cq->list)) != NULL) {
+ entry = llist_reverse_order(entry);
+ do {
+ cmd = container_of(entry, struct nulln_cmd, ll_list);
+ entry = entry->next;
+ blk_mq_end_request(cmd->rq, 0);
+
+ if (cmd->rq) {
+ struct request_queue *q = cmd->rq->q;
+
+ if (!q->mq_ops && blk_queue_stopped(q)) {
+ spin_lock(q->queue_lock);
+ if (blk_queue_stopped(q))
+ blk_start_queue(q);
+ spin_unlock(q->queue_lock);
+ }
+ }
+ } while (entry);
+ }
+
+ return HRTIMER_NORESTART;
+}
+
+static void null_cmd_end_timer(struct nulln_cmd *cmd)
+{
+ struct completion_queue *cq = &per_cpu(completion_queues, get_cpu());
+
+ cmd->ll_list.next = NULL;
+ if (llist_add(&cmd->ll_list, &cq->list)) {
+ ktime_t kt = ktime_set(0, completion_nsec);
+
+ hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL_PINNED);
+ }
+
+ put_cpu();
+}
+
+static void null_softirq_done_fn(struct request *rq)
+{
+ blk_mq_end_request(rq, 0);
+}
+
+static inline void null_handle_cmd(struct nulln_cmd *cmd)
+{
+ /* Complete IO by inline, softirq or timer */
+ switch (irqmode) {
+ case NULL_IRQ_SOFTIRQ:
+ case NULL_IRQ_NONE:
+ blk_mq_complete_request(cmd->rq);
+ break;
+ case NULL_IRQ_TIMER:
+ null_cmd_end_timer(cmd);
+ break;
+ }
+}
+
+static int null_id(struct request_queue *q, struct nvm_id *id)
+{
+ sector_t size = gb * 1024 * 1024 * 1024ULL;
+ unsigned long per_chnl_size =
+ size / bs / num_channels;
+ struct nvm_id_chnl *chnl;
+ int i;
+
+ id->ver_id = 0x1;
+ id->nvm_type = NVM_NVMT_BLK;
+ id->nchannels = num_channels;
+
+ id->chnls = kmalloc_array(id->nchannels, sizeof(struct nvm_id_chnl),
+ GFP_KERNEL);
+ if (!id->chnls)
+ return -ENOMEM;
+
+ for (i = 0; i < id->nchannels; i++) {
+ chnl = &id->chnls[i];
+ chnl->queue_size = hw_queue_depth;
+ chnl->gran_read = bs;
+ chnl->gran_write = bs;
+ chnl->gran_erase = bs * 256;
+ chnl->oob_size = 0;
+ chnl->t_r = chnl->t_sqr = 25000; /* 25us */
+ chnl->t_w = chnl->t_sqw = 500000; /* 500us */
+ chnl->t_e = 1500000; /* 1.500us */
+ chnl->io_sched = NVM_IOSCHED_CHANNEL;
+ chnl->laddr_begin = per_chnl_size * i;
+ chnl->laddr_end = per_chnl_size * (i + 1) - 1;
+ }
+
+ return 0;
+}
+
+static int null_get_features(struct request_queue *q,
+ struct nvm_get_features *gf)
+{
+ gf->rsp = NVM_RSP_L2P;
+ gf->ext = 0;
+
+ return 0;
+}
+
+static void null_end_io(struct request *rq, int error)
+{
+ struct nvm_rq *rqd = rq->end_io_data;
+ struct nvm_tgt_instance *ins = rqd->ins;
+
+ ins->tt->end_io(rq->end_io_data, error);
+
+ blk_put_request(rq);
+}
+
+static int null_submit_io(struct request_queue *q, struct nvm_rq *rqd)
+{
+ struct request *rq;
+ struct bio *bio = rqd->bio;
+
+ rq = blk_mq_alloc_request(q, bio_rw(bio), GFP_KERNEL, 0);
+ if (IS_ERR(rq))
+ return -ENOMEM;
+
+ rq->cmd_type = REQ_TYPE_DRV_PRIV;
+ rq->__sector = bio->bi_iter.bi_sector;
+ rq->ioprio = bio_prio(bio);
+
+ if (bio_has_data(bio))
+ rq->nr_phys_segments = bio_phys_segments(q, bio);
+
+ rq->__data_len = bio->bi_iter.bi_size;
+ rq->bio = rq->biotail = bio;
+
+ rq->end_io_data = rqd;
+
+ blk_execute_rq_nowait(q, NULL, rq, 0, null_end_io);
+
+ return 0;
+}
+
+static void *null_create_dma_pool(struct request_queue *q, char *name)
+{
+ mempool_t *virtmem_pool;
+
+ ppa_cache = kmem_cache_create(name, PAGE_SIZE, 0, 0, NULL);
+ if (!ppa_cache) {
+ pr_err("null_nvm: Unable to create kmem cache\n");
+ return NULL;
+ }
+
+ virtmem_pool = mempool_create_slab_pool(64, ppa_cache);
+ if (!virtmem_pool) {
+ pr_err("null_nvm: Unable to create virtual memory pool\n");
+ return NULL;
+ }
+
+ return virtmem_pool;
+}
+
+static void null_destroy_dma_pool(void *pool)
+{
+ mempool_t *virtmem_pool = pool;
+
+ mempool_destroy(virtmem_pool);
+}
+
+static void *null_dev_dma_alloc(struct request_queue *q, void *pool,
+ gfp_t mem_flags, dma_addr_t *dma_handler)
+{
+ return mempool_alloc(pool, mem_flags);
+}
+
+static void null_dev_dma_free(void *pool, void *entry, dma_addr_t dma_handler)
+{
+ mempool_free(entry, pool);
+}
+
+static struct nvm_dev_ops nulln_dev_ops = {
+ .identify = null_id,
+
+ .get_features = null_get_features,
+
+ .submit_io = null_submit_io,
+
+ .create_dma_pool = null_create_dma_pool,
+ .destroy_dma_pool = null_destroy_dma_pool,
+ .dev_dma_alloc = null_dev_dma_alloc,
+ .dev_dma_free = null_dev_dma_free,
+
+ /* Emulate nvme protocol */
+ .max_phys_sect = 64,
+};
+
+static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
+ const struct blk_mq_queue_data *bd)
+{
+ struct nulln_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
+
+ cmd->rq = bd->rq;
+
+ blk_mq_start_request(bd->rq);
+
+ null_handle_cmd(cmd);
+ return BLK_MQ_RQ_QUEUE_OK;
+}
+
+static struct blk_mq_ops null_mq_ops = {
+ .queue_rq = null_queue_rq,
+ .map_queue = blk_mq_map_queue,
+ .complete = null_softirq_done_fn,
+};
+
+static void null_del_dev(struct nulln *nulln)
+{
+ list_del_init(&nulln->list);
+
+ nvm_unregister(nulln->disk_name);
+
+ blk_cleanup_queue(nulln->q);
+ blk_mq_free_tag_set(&nulln->tag_set);
+ kfree(nulln);
+}
+
+static int null_add_dev(void)
+{
+ struct nulln *nulln;
+ int rv;
+
+ nulln = kzalloc_node(sizeof(*nulln), GFP_KERNEL, home_node);
+ if (!nulln) {
+ rv = -ENOMEM;
+ goto out;
+ }
+
+ if (use_per_node_hctx)
+ submit_queues = nr_online_nodes;
+
+ nulln->tag_set.ops = &null_mq_ops;
+ nulln->tag_set.nr_hw_queues = submit_queues;
+ nulln->tag_set.queue_depth = hw_queue_depth;
+ nulln->tag_set.numa_node = home_node;
+ nulln->tag_set.cmd_size = sizeof(struct nulln_cmd);
+ nulln->tag_set.driver_data = nulln;
+
+ rv = blk_mq_alloc_tag_set(&nulln->tag_set);
+ if (rv)
+ goto out_free_nulln;
+
+ nulln->q = blk_mq_init_queue(&nulln->tag_set);
+ if (IS_ERR(nulln->q)) {
+ rv = -ENOMEM;
+ goto out_cleanup_tags;
+ }
+
+ nulln->q->queuedata = nulln;
+ queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nulln->q);
+ queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nulln->q);
+
+ mutex_lock(&nulln_lock);
+ list_add_tail(&nulln->list, &nulln_list);
+ nulln->index = nulln_indexes++;
+ mutex_unlock(&nulln_lock);
+
+ blk_queue_logical_block_size(nulln->q, bs);
+ blk_queue_physical_block_size(nulln->q, bs);
+
+ sprintf(nulln->disk_name, "nulln%d", nulln->index);
+
+ rv = nvm_register(nulln->q, nulln->disk_name, &nulln_dev_ops);
+ if (rv)
+ goto out_cleanup_blk_queue;
+
+ return 0;
+
+out_cleanup_blk_queue:
+ blk_cleanup_queue(nulln->q);
+out_cleanup_tags:
+ blk_mq_free_tag_set(&nulln->tag_set);
+out_free_nulln:
+ kfree(nulln);
+out:
+ return rv;
+}
+
+static int __init null_init(void)
+{
+ unsigned int i;
+
+ if (bs > PAGE_SIZE) {
+ pr_warn("null_nvm: invalid block size\n");
+ pr_warn("null_nvm: defaults block size to %lu\n", PAGE_SIZE);
+ bs = PAGE_SIZE;
+ }
+
+ if (use_per_node_hctx) {
+ if (submit_queues < nr_online_nodes) {
+ pr_warn("null_nvm: submit_queues param is set to %u.",
+ nr_online_nodes);
+ submit_queues = nr_online_nodes;
+ }
+ } else if (submit_queues > nr_cpu_ids)
+ submit_queues = nr_cpu_ids;
+ else if (!submit_queues)
+ submit_queues = 1;
+
+ mutex_init(&nulln_lock);
+
+ /* Initialize a separate list for each CPU for issuing softirqs */
+ for_each_possible_cpu(i) {
+ struct completion_queue *cq = &per_cpu(completion_queues, i);
+
+ init_llist_head(&cq->list);
+
+ if (irqmode != NULL_IRQ_TIMER)
+ continue;
+
+ hrtimer_init(&cq->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ cq->timer.function = null_cmd_timer_expired;
+ }
+
+ for (i = 0; i < nr_devices; i++) {
+ if (null_add_dev())
+ return -EINVAL;
+ }
+
+ pr_info("null_nvm: module loaded\n");
+ return 0;
+}
+
+static void __exit null_exit(void)
+{
+ struct nulln *nulln;
+
+ mutex_lock(&nulln_lock);
+ while (!list_empty(&nulln_list)) {
+ nulln = list_entry(nulln_list.next, struct nulln, list);
+ null_del_dev(nulln);
+ }
+ mutex_unlock(&nulln_lock);
+}
+
+module_init(null_init);
+module_exit(null_exit);
+
+MODULE_AUTHOR("Matias Bjorling <mb@...htnvm.io>");
+MODULE_LICENSE("GPL");
--
2.1.4
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists