[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <63a499c3-25be-5c5b-5822-124854945279@intel.com>
Date: Thu, 2 May 2019 15:12:14 -0600
From: "Heitke, Kenneth" <kenneth.heitke@...el.com>
To: Maxim Levitsky <mlevitsk@...hat.com>,
linux-nvme@...ts.infradead.org
Cc: Fam Zheng <fam@...hon.net>, Keith Busch <keith.busch@...el.com>,
Sagi Grimberg <sagi@...mberg.me>, kvm@...r.kernel.org,
"David S . Miller" <davem@...emloft.net>,
Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
Liang Cunming <cunming.liang@...el.com>,
Wolfram Sang <wsa@...-dreams.de>, linux-kernel@...r.kernel.org,
Kirti Wankhede <kwankhede@...dia.com>,
Jens Axboe <axboe@...com>,
Alex Williamson <alex.williamson@...hat.com>,
John Ferlan <jferlan@...hat.com>,
Mauro Carvalho Chehab <mchehab+samsung@...nel.org>,
Paolo Bonzini <pbonzini@...hat.com>,
Liu Changpeng <changpeng.liu@...el.com>,
"Paul E . McKenney" <paulmck@...ux.ibm.com>,
Amnon Ilan <ailan@...hat.com>, Christoph Hellwig <hch@....de>,
Nicolas Ferre <nicolas.ferre@...rochip.com>
Subject: Re: [PATCH v2 08/10] nvme/pci: implement the mdev external queue
allocation interface
On 5/2/2019 5:47 AM, Maxim Levitsky wrote:
> Note that currently the number of hw queues reserved for mdev,
> has to be pre determined on module load.
>
> (I used to allocate the queues dynamicaly on demand, but
> recent changes to allocate polled/read queues made
> this somewhat difficult, so I dropped this for now)
>
> Signed-off-by: Maxim Levitsky <mlevitsk@...hat.com>
> ---
> drivers/nvme/host/pci.c | 375 ++++++++++++++++++++++++++++++++++++++-
> drivers/nvme/mdev/host.c | 46 ++---
> drivers/nvme/mdev/io.c | 46 +++--
> drivers/nvme/mdev/mmio.c | 3 -
> 4 files changed, 421 insertions(+), 49 deletions(-)
>
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index 282f28c851c1..87507e710374 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -23,6 +23,7 @@
> #include <linux/io-64-nonatomic-lo-hi.h>
> #include <linux/sed-opal.h>
> #include <linux/pci-p2pdma.h>
> +#include "../mdev/mdev.h"
>
> #include "trace.h"
> #include "nvme.h"
> @@ -32,6 +33,7 @@
>
> #define SGES_PER_PAGE (PAGE_SIZE / sizeof(struct nvme_sgl_desc))
>
> +#define USE_SMALL_PRP_POOL(nprps) ((nprps) < (256 / 8))
> /*
> * These can be higher, but we need to ensure that any command doesn't
> * require an sg allocation that needs more than a page of data.
> @@ -83,12 +85,24 @@ static int poll_queues = 0;
> module_param_cb(poll_queues, &queue_count_ops, &poll_queues, 0644);
> MODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO.");
>
> +static int mdev_queues;
> +#ifdef CONFIG_NVME_MDEV
> +module_param_cb(mdev_queues, &queue_count_ops, &mdev_queues, 0644);
> +MODULE_PARM_DESC(mdev_queues, "Number of queues to use for mediated VFIO");
> +#endif
> +
> struct nvme_dev;
> struct nvme_queue;
>
> static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
> static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode);
>
> +#ifdef CONFIG_NVME_MDEV
> +static void nvme_ext_queue_reset(struct nvme_dev *dev, u16 qid);
> +#else
> +static void nvme_ext_queue_reset(struct nvme_dev *dev, u16 qid) {}
> +#endif
> +
> /*
> * Represents an NVM Express device. Each nvme_dev is a PCI function.
> */
> @@ -103,6 +117,7 @@ struct nvme_dev {
> unsigned online_queues;
> unsigned max_qid;
> unsigned io_queues[HCTX_MAX_TYPES];
> + unsigned int mdev_queues;
> unsigned int num_vecs;
> int q_depth;
> u32 db_stride;
> @@ -110,6 +125,7 @@ struct nvme_dev {
> unsigned long bar_mapped_size;
> struct work_struct remove_work;
> struct mutex shutdown_lock;
> + struct mutex ext_dev_lock;
> bool subsystem;
> u64 cmb_size;
> bool cmb_use_sqes;
> @@ -172,6 +188,16 @@ static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
> return container_of(ctrl, struct nvme_dev, ctrl);
> }
>
> +/* Simplified IO descriptor for MDEV use */
> +struct nvme_ext_iod {
> + struct list_head link;
> + u32 user_tag;
> + int nprps;
> + struct nvme_ext_data_iter *saved_iter;
> + dma_addr_t first_prplist_dma;
> + __le64 *prpslists[NVME_MAX_SEGS];
> +};
> +
> /*
> * An NVM Express queue. Each device has at least two (one for admin
> * commands and one for I/O commands).
> @@ -196,15 +222,26 @@ struct nvme_queue {
> u16 qid;
> u8 cq_phase;
> unsigned long flags;
> +
> #define NVMEQ_ENABLED 0
> #define NVMEQ_SQ_CMB 1
> #define NVMEQ_DELETE_ERROR 2
> #define NVMEQ_POLLED 3
> +#define NVMEQ_EXTERNAL 4
> +
> u32 *dbbuf_sq_db;
> u32 *dbbuf_cq_db;
> u32 *dbbuf_sq_ei;
> u32 *dbbuf_cq_ei;
> struct completion delete_done;
> +
> + /* queue passthrough for external use */
> + struct {
> + int inflight;
> + struct nvme_ext_iod *iods;
> + struct list_head free_iods;
> + struct list_head used_iods;
> + } ext;
> };
>
> /*
> @@ -255,7 +292,7 @@ static inline void _nvme_check_size(void)
>
> static unsigned int max_io_queues(void)
> {
> - return num_possible_cpus() + write_queues + poll_queues;
> + return num_possible_cpus() + write_queues + poll_queues + mdev_queues;
> }
>
> static unsigned int max_queue_count(void)
> @@ -1066,6 +1103,7 @@ static irqreturn_t nvme_irq(int irq, void *data)
> * the irq handler, even if that was on another CPU.
> */
> rmb();
> +
> if (nvmeq->cq_head != nvmeq->last_cq_head)
> ret = IRQ_HANDLED;
> nvme_process_cq(nvmeq, &start, &end, -1);
> @@ -1553,7 +1591,11 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
> memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth));
> nvme_dbbuf_init(dev, nvmeq, qid);
> dev->online_queues++;
> +
> wmb(); /* ensure the first interrupt sees the initialization */
> +
> + if (test_bit(NVMEQ_EXTERNAL, &nvmeq->flags))
> + nvme_ext_queue_reset(nvmeq->dev, qid);
> }
>
> static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
> @@ -1759,7 +1801,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
> }
>
> max = min(dev->max_qid, dev->ctrl.queue_count - 1);
> - if (max != 1 && dev->io_queues[HCTX_TYPE_POLL]) {
> + if (max != 1) {
> rw_queues = dev->io_queues[HCTX_TYPE_DEFAULT] +
> dev->io_queues[HCTX_TYPE_READ];
> } else {
> @@ -2095,14 +2137,23 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
> * Poll queues don't need interrupts, but we need at least one IO
> * queue left over for non-polled IO.
> */
> - this_p_queues = poll_queues;
> + this_p_queues = poll_queues + mdev_queues;
> if (this_p_queues >= nr_io_queues) {
> this_p_queues = nr_io_queues - 1;
> irq_queues = 1;
> } else {
> irq_queues = nr_io_queues - this_p_queues + 1;
> }
> +
> + if (mdev_queues > this_p_queues) {
> + mdev_queues = this_p_queues;
> + this_p_queues = 0;
> + } else {
> + this_p_queues -= mdev_queues;
> + }
> +
> dev->io_queues[HCTX_TYPE_POLL] = this_p_queues;
> + dev->mdev_queues = mdev_queues;
>
> /* Initialize for the single interrupt case */
> dev->io_queues[HCTX_TYPE_DEFAULT] = 1;
> @@ -2170,7 +2221,8 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
>
> dev->num_vecs = result;
> result = max(result - 1, 1);
> - dev->max_qid = result + dev->io_queues[HCTX_TYPE_POLL];
> + dev->max_qid = result + dev->io_queues[HCTX_TYPE_POLL] +
> + dev->mdev_queues;
>
> /*
> * Should investigate if there's a performance win from allocating
> @@ -2193,10 +2245,11 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
> nvme_suspend_io_queues(dev);
> goto retry;
> }
> - dev_info(dev->ctrl.device, "%d/%d/%d default/read/poll queues\n",
> + dev_info(dev->ctrl.device, "%d/%d/%d/%d default/read/poll/mdev queues\n",
> dev->io_queues[HCTX_TYPE_DEFAULT],
> dev->io_queues[HCTX_TYPE_READ],
> - dev->io_queues[HCTX_TYPE_POLL]);
> + dev->io_queues[HCTX_TYPE_POLL],
> + dev->mdev_queues);
> return 0;
> }
>
> @@ -2623,6 +2676,301 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work)
> nvme_put_ctrl(&dev->ctrl);
> }
>
> +#ifdef CONFIG_NVME_MDEV
> +static void nvme_ext_free_iod(struct nvme_dev *dev, struct nvme_ext_iod *iod)
> +{
> + int i = 0, max_prp, nprps = iod->nprps;
> + dma_addr_t dma = iod->first_prplist_dma;
> +
> + if (iod->saved_iter) {
> + iod->saved_iter->release(iod->saved_iter);
> + iod->saved_iter = NULL;
> + }
> +
> + if (--nprps < 2) {
> + goto out;
> + } else if (USE_SMALL_PRP_POOL(nprps)) {
> + dma_pool_free(dev->prp_small_pool, iod->prpslists[0], dma);
> + goto out;
> + }
> +
> + max_prp = (dev->ctrl.page_size >> 3) - 1;
> + while (nprps > 0) {
> + if (i > 0) {
> + dma = iod->prpslists[i - 1][max_prp];
> + if (nprps == 1)
> + break;
> + }
> + dma_pool_free(dev->prp_page_pool, iod->prpslists[i++], dma);
> + nprps -= max_prp;
> + }
> +out:
> + iod->nprps = -1;
> + iod->first_prplist_dma = 0;
> + iod->user_tag = 0xDEADDEAD;
> +}
> +
> +static int nvme_ext_setup_iod(struct nvme_dev *dev, struct nvme_ext_iod *iod,
> + struct nvme_common_command *cmd,
> + struct nvme_ext_data_iter *iter)
> +{
> + int ret, i, j;
> + __le64 *prp_list;
> + dma_addr_t prp_dma;
> + struct dma_pool *pool;
> + int max_prp = (dev->ctrl.page_size >> 3) - 1;
> +
> + iod->saved_iter = iter && iter->release ? iter : NULL;
> + iod->nprps = iter ? iter->count : 0;
> + cmd->dptr.prp1 = 0;
> + cmd->dptr.prp2 = 0;
> + cmd->metadata = 0;
> +
> + if (!iter)
> + return 0;
> +
> + /* put first pointer*/
> + cmd->dptr.prp1 = cpu_to_le64(iter->host_iova);
> + if (iter->count == 1)
> + return 0;
> +
> + ret = iter->next(iter);
> + if (ret)
> + goto error;
> +
> + /* if only have one more pointer, put it to second data pointer*/
> + if (iter->count == 1) {
> + cmd->dptr.prp2 = cpu_to_le64(iter->host_iova);
> + return 0;
> + }
> +
> + pool = USE_SMALL_PRP_POOL(iter->count) ? dev->prp_small_pool :
> + dev->prp_page_pool;
> +
> + /* Allocate prp lists as needed and fill them */
> + for (i = 0 ; i < NVME_MAX_SEGS && iter->count ; i++) {
> + prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
> + if (!prp_list) {
> + ret = -ENOMEM;
> + goto error;
> + }
> +
> + iod->prpslists[i++] = prp_list;
> +
> + if (i == 1) {
> + iod->first_prplist_dma = prp_dma;
> + cmd->dptr.prp2 = cpu_to_le64(prp_dma);
> + j = 0;
> + } else {
> + prp_list[0] = iod->prpslists[i - 1][max_prp];
> + iod->prpslists[i - 1][max_prp] = prp_dma;
> + j = 1;
> + }
> +
> + while (j <= max_prp && iter->count) {
> + prp_list[j++] = iter->host_iova;
> + ret = iter->next(iter);
> + if (ret)
> + goto error;
> + }
> + }
> +
> + if (iter->count) {
> + ret = -ENOSPC;
> + goto error;
> + }
> + return 0;
> +error:
> + iod->nprps -= iter->count;
> + nvme_ext_free_iod(dev, iod);
> + return ret;
> +}
> +
> +static int nvme_ext_queues_available(struct nvme_ctrl *ctrl)
> +{
> + struct nvme_dev *dev = to_nvme_dev(ctrl);
> + unsigned int ret = 0, qid;
> + unsigned int first_mdev_q = dev->online_queues - dev->mdev_queues;
> +
> + for (qid = first_mdev_q; qid < dev->online_queues; qid++) {
> + struct nvme_queue *nvmeq = &dev->queues[qid];
> +
> + if (!test_bit(NVMEQ_EXTERNAL, &nvmeq->flags))
> + ret++;
> + }
> + return ret;
> +}
> +
> +static void nvme_ext_queue_reset(struct nvme_dev *dev, u16 qid)
> +{
> + struct nvme_queue *nvmeq = &dev->queues[qid];
> + struct nvme_ext_iod *iod, *tmp;
> +
> + list_for_each_entry_safe(iod, tmp, &nvmeq->ext.used_iods, link) {
> + if (iod->saved_iter && iod->saved_iter->release) {
> + iod->saved_iter->release(iod->saved_iter);
> + iod->saved_iter = NULL;
> + list_move(&iod->link, &nvmeq->ext.free_iods);
> + }
> + }
> +
> + nvmeq->ext.inflight = 0;
> +}
> +
> +static int nvme_ext_queue_alloc(struct nvme_ctrl *ctrl, u16 *ret_qid)
> +{
> + struct nvme_dev *dev = to_nvme_dev(ctrl);
> + struct nvme_queue *nvmeq;
> + int ret = 0, qid, i;
> + unsigned int first_mdev_q = dev->online_queues - dev->mdev_queues;
> +
> + mutex_lock(&dev->ext_dev_lock);
> +
> + /* find a polled queue to allocate */
> + for (qid = dev->online_queues - 1 ; qid >= first_mdev_q ; qid--) {
> + nvmeq = &dev->queues[qid];
> + if (!test_bit(NVMEQ_EXTERNAL, &nvmeq->flags))
> + break;
> + }
> +
> + if (qid < first_mdev_q) {
> + ret = -ENOSPC;
> + goto out;
> + }
> +
> + INIT_LIST_HEAD(&nvmeq->ext.free_iods);
> + INIT_LIST_HEAD(&nvmeq->ext.used_iods);
> +
> + nvmeq->ext.iods =
> + vzalloc_node(sizeof(struct nvme_ext_iod) * nvmeq->q_depth,
> + dev_to_node(dev->dev));
> +
> + if (!nvmeq->ext.iods) {
> + ret = -ENOMEM;
> + goto out;
> + }
> +
> + for (i = 0 ; i < nvmeq->q_depth ; i++)
> + list_add_tail(&nvmeq->ext.iods[i].link, &nvmeq->ext.free_iods);
> +
> + set_bit(NVMEQ_EXTERNAL, &nvmeq->flags);
> + *ret_qid = qid;
> +out:
> + mutex_unlock(&dev->ext_dev_lock);
> + return ret;
> +}
> +
> +static void nvme_ext_queue_free(struct nvme_ctrl *ctrl, u16 qid)
> +{
> + struct nvme_dev *dev = to_nvme_dev(ctrl);
> + struct nvme_queue *nvmeq;
> +
> + mutex_lock(&dev->ext_dev_lock);
> + nvmeq = &dev->queues[qid];
> +
> + if (WARN_ON(!test_bit(NVMEQ_EXTERNAL, &nvmeq->flags)))
> + return;
This condition is probably not expected to happen (since its a warning)
but do you need to unlock the ext_dev_lock before returning?
> +
> + nvme_ext_queue_reset(dev, qid);
> +
> + vfree(nvmeq->ext.iods);
> + nvmeq->ext.iods = NULL;
> + INIT_LIST_HEAD(&nvmeq->ext.free_iods);
> + INIT_LIST_HEAD(&nvmeq->ext.used_iods);
> +
> + clear_bit(NVMEQ_EXTERNAL, &nvmeq->flags);
> + mutex_unlock(&dev->ext_dev_lock);
> +}
> +
> +static int nvme_ext_queue_submit(struct nvme_ctrl *ctrl, u16 qid, u32 user_tag,
> + struct nvme_command *command,
> + struct nvme_ext_data_iter *iter)
> +{
> + struct nvme_dev *dev = to_nvme_dev(ctrl);
> + struct nvme_queue *nvmeq = &dev->queues[qid];
> + struct nvme_ext_iod *iod;
> + int ret;
> +
> + if (WARN_ON(!test_bit(NVMEQ_EXTERNAL, &nvmeq->flags)))
> + return -EINVAL;
> +
> + if (list_empty(&nvmeq->ext.free_iods))
> + return -1;
> +
> + iod = list_first_entry(&nvmeq->ext.free_iods,
> + struct nvme_ext_iod, link);
> +
> + list_move(&iod->link, &nvmeq->ext.used_iods);
> +
> + command->common.command_id = cpu_to_le16(iod - nvmeq->ext.iods);
> + iod->user_tag = user_tag;
> +
> + ret = nvme_ext_setup_iod(dev, iod, &command->common, iter);
> + if (ret) {
> + list_move(&iod->link, &nvmeq->ext.free_iods);
> + return ret;
> + }
> +
> + nvmeq->ext.inflight++;
> + nvme_submit_cmd(nvmeq, command, true);
> + return 0;
> +}
> +
> +static int nvme_ext_queue_poll(struct nvme_ctrl *ctrl, u16 qid,
> + struct nvme_ext_cmd_result *results,
> + unsigned int max_len)
> +{
> + struct nvme_dev *dev = to_nvme_dev(ctrl);
> + struct nvme_queue *nvmeq = &dev->queues[qid];
> + u16 old_head;
> + int i, j;
> +
> + if (WARN_ON(!test_bit(NVMEQ_EXTERNAL, &nvmeq->flags)))
> + return -EINVAL;
> +
> + if (nvmeq->ext.inflight == 0)
> + return -1;
> +
> + old_head = nvmeq->cq_head;
> +
> + for (i = 0 ; nvme_cqe_pending(nvmeq) && i < max_len ; i++) {
> + u16 status = le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status);
> + u16 tag = le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].command_id);
> +
> + results[i].status = status >> 1;
> + results[i].tag = (u32)tag;
> + nvme_update_cq_head(nvmeq);
> + }
> +
> + if (old_head != nvmeq->cq_head)
> + nvme_ring_cq_doorbell(nvmeq);
> +
> + for (j = 0 ; j < i ; j++) {
> + u16 tag = results[j].tag & 0xFFFF;
> + struct nvme_ext_iod *iod = &nvmeq->ext.iods[tag];
> +
> + if (WARN_ON(tag >= nvmeq->q_depth || iod->nprps == -1))
> + continue;
> +
> + results[j].tag = iod->user_tag;
> + nvme_ext_free_iod(dev, iod);
> + list_move(&iod->link, &nvmeq->ext.free_iods);
> + nvmeq->ext.inflight--;
> + }
> +
> + WARN_ON(nvmeq->ext.inflight < 0);
> + return i;
> +}
> +
> +static bool nvme_ext_queue_full(struct nvme_ctrl *ctrl, u16 qid)
> +{
> + struct nvme_dev *dev = to_nvme_dev(ctrl);
> + struct nvme_queue *nvmeq = &dev->queues[qid];
> +
> + return nvmeq->ext.inflight < nvmeq->q_depth - 1;
> +}
> +#endif
> +
> static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
> {
> *val = readl(to_nvme_dev(ctrl)->bar + off);
> @@ -2652,13 +3000,25 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
> .name = "pcie",
> .module = THIS_MODULE,
> .flags = NVME_F_METADATA_SUPPORTED |
> - NVME_F_PCI_P2PDMA,
> + NVME_F_PCI_P2PDMA |
> + NVME_F_MDEV_SUPPORTED |
> + NVME_F_MDEV_DMA_SUPPORTED,
> +
> .reg_read32 = nvme_pci_reg_read32,
> .reg_write32 = nvme_pci_reg_write32,
> .reg_read64 = nvme_pci_reg_read64,
> .free_ctrl = nvme_pci_free_ctrl,
> .submit_async_event = nvme_pci_submit_async_event,
> .get_address = nvme_pci_get_address,
> +
> +#ifdef CONFIG_NVME_MDEV
> + .ext_queues_available = nvme_ext_queues_available,
> + .ext_queue_alloc = nvme_ext_queue_alloc,
> + .ext_queue_free = nvme_ext_queue_free,
> + .ext_queue_submit = nvme_ext_queue_submit,
> + .ext_queue_poll = nvme_ext_queue_poll,
> + .ext_queue_full = nvme_ext_queue_full,
> +#endif
> };
>
> static int nvme_dev_map(struct nvme_dev *dev)
> @@ -2747,6 +3107,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work);
> INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
> mutex_init(&dev->shutdown_lock);
> + mutex_init(&dev->ext_dev_lock);
>
> result = nvme_setup_prp_pools(dev);
> if (result)
> diff --git a/drivers/nvme/mdev/host.c b/drivers/nvme/mdev/host.c
> index 5766bad7e909..6590946b86c2 100644
> --- a/drivers/nvme/mdev/host.c
> +++ b/drivers/nvme/mdev/host.c
> @@ -48,19 +48,21 @@ static struct nvme_mdev_hctrl *nvme_mdev_hctrl_create(struct nvme_ctrl *ctrl)
> return NULL;
> }
>
> + hctrl = kzalloc_node(sizeof(*hctrl), GFP_KERNEL,
> + dev_to_node(ctrl->dev));
> + if (!hctrl)
> + return NULL;
> +
> nr_host_queues = ctrl->ops->ext_queues_available(ctrl);
> max_lba_transfer = ctrl->max_hw_sectors >> (PAGE_SHIFT - 9);
>
> if (nr_host_queues == 0) {
> dev_info(ctrl->dev,
> "no support for mdev - no mdev reserved queues available");
> + kfree(hctrl);
> return NULL;
> }
>
> - hctrl = kzalloc_node(sizeof(*hctrl), GFP_KERNEL,
> - dev_to_node(ctrl->dev));
> - if (!hctrl)
> - return NULL;
>
> kref_init(&hctrl->ref);
> mutex_init(&hctrl->lock);
> @@ -180,6 +182,24 @@ void nvme_mdev_hctrl_hqs_unreserve(struct nvme_mdev_hctrl *hctrl,
> mutex_unlock(&hctrl->lock);
> }
>
> +/* Check if IO passthrough is supported for given IO optcode */
> +bool nvme_mdev_hctrl_hq_check_op(struct nvme_mdev_hctrl *hctrl, u8 optcode)
> +{
> + switch (optcode) {
> + case nvme_cmd_flush:
> + case nvme_cmd_read:
> + case nvme_cmd_write:
> + /* these are mandatory*/
> + return true;
> + case nvme_cmd_write_zeroes:
> + return (hctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES);
> + case nvme_cmd_dsm:
> + return (hctrl->oncs & NVME_CTRL_ONCS_DSM);
> + default:
> + return false;
> + }
> +}
> +
> /* Allocate a host IO queue */
> int nvme_mdev_hctrl_hq_alloc(struct nvme_mdev_hctrl *hctrl)
> {
> @@ -204,23 +224,7 @@ bool nvme_mdev_hctrl_hq_can_submit(struct nvme_mdev_hctrl *hctrl, u16 qid)
> return hctrl->nvme_ctrl->ops->ext_queue_full(hctrl->nvme_ctrl, qid);
> }
>
> -/* Check if IO passthrough is supported for given IO optcode */
> -bool nvme_mdev_hctrl_hq_check_op(struct nvme_mdev_hctrl *hctrl, u8 optcode)
> -{
> - switch (optcode) {
> - case nvme_cmd_flush:
> - case nvme_cmd_read:
> - case nvme_cmd_write:
> - /* these are mandatory*/
> - return true;
> - case nvme_cmd_write_zeroes:
> - return (hctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES);
> - case nvme_cmd_dsm:
> - return (hctrl->oncs & NVME_CTRL_ONCS_DSM);
> - default:
> - return false;
> - }
> -}
> +
>
> /* Submit a IO passthrough command */
> int nvme_mdev_hctrl_hq_submit(struct nvme_mdev_hctrl *hctrl,
> diff --git a/drivers/nvme/mdev/io.c b/drivers/nvme/mdev/io.c
> index a731196d0365..59837540fec2 100644
> --- a/drivers/nvme/mdev/io.c
> +++ b/drivers/nvme/mdev/io.c
> @@ -11,14 +11,16 @@
> #include <linux/ktime.h>
> #include "priv.h"
>
> +
> struct io_ctx {
> struct nvme_mdev_hctrl *hctrl;
> struct nvme_mdev_vctrl *vctrl;
>
> const struct nvme_command *in;
> - struct nvme_command out;
> struct nvme_mdev_vns *ns;
> struct nvme_ext_data_iter udatait;
> +
> + struct nvme_command out;
> struct nvme_ext_data_iter *kdatait;
>
> ktime_t last_io_t;
> @@ -28,6 +30,20 @@ struct io_ctx {
> unsigned int arb_burst;
> };
>
> +/* Check if we need to read a command from the admin queue */
> +static bool nvme_mdev_adm_needs_processing(struct io_ctx *ctx)
> +{
> + if (!timeout(ctx->last_admin_poll_time,
> + ctx->vctrl->now, ctx->admin_poll_rate_ms))
> + return false;
> +
> + if (nvme_mdev_vsq_has_data(ctx->vctrl, &ctx->vctrl->vsqs[0]))
> + return true;
> +
> + ctx->last_admin_poll_time = ctx->vctrl->now;
> + return false;
> +}
> +
> /* Handle read/write command.*/
> static int nvme_mdev_io_translate_rw(struct io_ctx *ctx)
> {
> @@ -229,6 +245,7 @@ static int nvme_mdev_io_translate_cmd(struct io_ctx *ctx)
> }
> }
>
> +/* process a user submission queue */
> static bool nvme_mdev_io_process_sq(struct io_ctx *ctx, u16 sqid)
> {
> struct nvme_vsq *vsq = &ctx->vctrl->vsqs[sqid];
> @@ -275,7 +292,13 @@ static bool nvme_mdev_io_process_sq(struct io_ctx *ctx, u16 sqid)
> return true;
> }
>
> -/* process host replies to the passed through commands */
> +/* process a user completion queue */
> +static void nvme_mdev_io_process_cq(struct io_ctx *ctx, u16 cqid)
> +{
> + nvme_mdev_vcq_process(ctx->vctrl, cqid, true);
> +}
> +
> +/* process hardware completion queue */
> static int nvme_mdev_io_process_hwq(struct io_ctx *ctx, u16 hwq)
> {
> int n, i;
> @@ -301,22 +324,9 @@ static int nvme_mdev_io_process_hwq(struct io_ctx *ctx, u16 hwq)
> return n;
> }
>
> -/* Check if we need to read a command from the admin queue */
> -static bool nvme_mdev_adm_needs_processing(struct io_ctx *ctx)
> -{
> - if (!timeout(ctx->last_admin_poll_time,
> - ctx->vctrl->now, ctx->admin_poll_rate_ms))
> - return false;
> -
> - if (nvme_mdev_vsq_has_data(ctx->vctrl, &ctx->vctrl->vsqs[0]))
> - return true;
> -
> - ctx->last_admin_poll_time = ctx->vctrl->now;
> - return false;
> -}
>
> /* do polling till one of events stops it */
> -static void nvme_mdev_io_maintask(struct io_ctx *ctx)
> +static void nvme_mdev_io_polling_loop(struct io_ctx *ctx)
> {
> struct nvme_mdev_vctrl *vctrl = ctx->vctrl;
> u16 i, cqid, sqid, hsqcnt;
> @@ -353,7 +363,7 @@ static void nvme_mdev_io_maintask(struct io_ctx *ctx)
> /* process the completions from the guest*/
> cqid = 1;
> for_each_set_bit_from(cqid, vctrl->vcq_en, MAX_VIRTUAL_QUEUES)
> - nvme_mdev_vcq_process(vctrl, cqid, true);
> + nvme_mdev_io_process_cq(ctx, cqid);
>
> /* process the completions from the hardware*/
> for (i = 0 ; i < hsqcnt ; i++)
> @@ -470,7 +480,7 @@ static int nvme_mdev_io_polling_thread(void *data)
> if (kthread_should_stop())
> break;
>
> - nvme_mdev_io_maintask(&ctx);
> + nvme_mdev_io_polling_loop(&ctx);
> }
>
> _DBG(ctx.vctrl, "IO: iothread stopped\n");
> diff --git a/drivers/nvme/mdev/mmio.c b/drivers/nvme/mdev/mmio.c
> index cf03c1f22f4c..a80962bf4a3d 100644
> --- a/drivers/nvme/mdev/mmio.c
> +++ b/drivers/nvme/mdev/mmio.c
> @@ -54,9 +54,6 @@ static const struct vm_operations_struct nvme_mdev_mmio_dbs_vm_ops = {
> bool nvme_mdev_mmio_db_check(struct nvme_mdev_vctrl *vctrl,
> u16 qid, u16 size, u16 db)
> {
> - if (get_current() != vctrl->iothread)
> - lockdep_assert_held(&vctrl->lock);
> -
> if (db < size)
> return true;
> if (qid == 0) {
>
Powered by blists - more mailing lists