[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <71b009057582cd9c82cff2b45bc1af846408bcf7.camel@kernel.crashing.org>
Date: Mon, 15 Jul 2019 13:43:37 +1000
From: Benjamin Herrenschmidt <benh@...nel.crashing.org>
To: linux-nvme@...ts.infradead.org
Cc: "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
Keith Busch <kbusch@...nel.org>, Jens Axboe <axboe@...com>,
Christoph Hellwig <hch@....de>, Paul Pawlowski <paul@...rm.io>
Subject: [PATCH] nvme: Add support for Apple 2018+ models
Based on reverse engineering and original patch by
Paul Pawlowski <paul@...rm.io>
This adds support for Apple weird implementation of NVME in their
2018 or later machines. It accounts for the twice-as-big SQ entries
for the IO queues, and the fact that only interrupt vector 0 appears
to function properly.
Signed-off-by: Benjamin Herrenschmidt <benh@...nel.crashing.org>
---
I reworked Paul's patch to be less invasive in nvme_submit_cmd()
hot path, effectively only adding a shift with a value hopefully
coming from the same cache line as existing stuff.
It could probably be made even less by making sq_extra_shift be
instead "sq_shift" and contain the complete shift between entries,
ie, 6 or 7, and then replacing the memcpy to
&nvmeq->sq_cmds[nvmeq->sq_tail]
With something like
((char *)nvmeq->sq_cmds) + ((size_t)nvmeq->sq_tail) << nvmeq->sq_shift
But I doubt the difference will be measurable anywhere and it makes
the code grosser imho.
Note: I'm not subscribed to linux-nvme, please CC me on replies.
drivers/nvme/host/nvme.h | 5 ++++
drivers/nvme/host/pci.c | 59 ++++++++++++++++++++++++++++------------
2 files changed, 47 insertions(+), 17 deletions(-)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 55553d293a98..9ae53cbfb320 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -92,6 +92,11 @@ enum nvme_quirks {
* Broken Write Zeroes.
*/
NVME_QUIRK_DISABLE_WRITE_ZEROES = (1 << 9),
+
+ /*
+ * Apple 2018 and latter variant has a few issues
+ */
+ NVME_QUIRK_APPLE_2018 = (1 << 10),
};
/*
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 524d6bd6d095..1a41412fc48b 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -27,8 +27,8 @@
#include "trace.h"
#include "nvme.h"
-#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
-#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
+#define SQ_SIZE(q) ((((size_t)(q)->q_depth) << (q)->sq_extra_shift) * sizeof(struct nvme_command))
+#define CQ_SIZE(q) (((size_t)(q)->q_depth) * sizeof(struct nvme_completion))
#define SGES_PER_PAGE (PAGE_SIZE / sizeof(struct nvme_sgl_desc))
@@ -195,6 +195,7 @@ struct nvme_queue {
u16 last_cq_head;
u16 qid;
u8 cq_phase;
+ u8 sq_extra_shift;
unsigned long flags;
#define NVMEQ_ENABLED 0
#define NVMEQ_SQ_CMB 1
@@ -504,8 +505,11 @@ static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq)
static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd,
bool write_sq)
{
+ u16 sq_actual_pos;
+
spin_lock(&nvmeq->sq_lock);
- memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd));
+ sq_actual_pos = nvmeq->sq_tail << nvmeq->sq_extra_shift;
+ memcpy(&nvmeq->sq_cmds[sq_actual_pos], cmd, sizeof(*cmd));
if (++nvmeq->sq_tail == nvmeq->q_depth)
nvmeq->sq_tail = 0;
nvme_write_sq_db(nvmeq, write_sq);
@@ -1361,16 +1365,16 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
static void nvme_free_queue(struct nvme_queue *nvmeq)
{
- dma_free_coherent(nvmeq->dev->dev, CQ_SIZE(nvmeq->q_depth),
+ dma_free_coherent(nvmeq->dev->dev, CQ_SIZE(nvmeq),
(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
if (!nvmeq->sq_cmds)
return;
if (test_and_clear_bit(NVMEQ_SQ_CMB, &nvmeq->flags)) {
pci_free_p2pmem(to_pci_dev(nvmeq->dev->dev),
- nvmeq->sq_cmds, SQ_SIZE(nvmeq->q_depth));
+ nvmeq->sq_cmds, SQ_SIZE(nvmeq));
} else {
- dma_free_coherent(nvmeq->dev->dev, SQ_SIZE(nvmeq->q_depth),
+ dma_free_coherent(nvmeq->dev->dev, SQ_SIZE(nvmeq),
nvmeq->sq_cmds, nvmeq->sq_dma_addr);
}
}
@@ -1450,12 +1454,12 @@ static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
}
static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
- int qid, int depth)
+ int qid)
{
struct pci_dev *pdev = to_pci_dev(dev->dev);
if (qid && dev->cmb_use_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
- nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(depth));
+ nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(nvmeq));
nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev,
nvmeq->sq_cmds);
if (nvmeq->sq_dma_addr) {
@@ -1464,7 +1468,7 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
}
}
- nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
+ nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(nvmeq),
&nvmeq->sq_dma_addr, GFP_KERNEL);
if (!nvmeq->sq_cmds)
return -ENOMEM;
@@ -1478,12 +1482,24 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth)
if (dev->ctrl.queue_count > qid)
return 0;
- nvmeq->cqes = dma_alloc_coherent(dev->dev, CQ_SIZE(depth),
+ /*
+ * On Apple 2018 or later implementations, the IO submission
+ * queue(s) have twice as large entries. Current implementations
+ * seem to only have qid 1, let's assume this is true of all IO
+ * queues until we know better.
+ */
+ if (qid && (dev->ctrl.quirks & NVME_QUIRK_APPLE_2018))
+ nvmeq->sq_extra_shift = 1;
+ else
+ nvmeq->sq_extra_shift = 0;
+
+ nvmeq->q_depth = depth;
+ nvmeq->cqes = dma_alloc_coherent(dev->dev, CQ_SIZE(nvmeq),
&nvmeq->cq_dma_addr, GFP_KERNEL);
if (!nvmeq->cqes)
goto free_nvmeq;
- if (nvme_alloc_sq_cmds(dev, nvmeq, qid, depth))
+ if (nvme_alloc_sq_cmds(dev, nvmeq, qid))
goto free_cqdma;
nvmeq->dev = dev;
@@ -1492,15 +1508,14 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth)
nvmeq->cq_head = 0;
nvmeq->cq_phase = 1;
nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
- nvmeq->q_depth = depth;
nvmeq->qid = qid;
dev->ctrl.queue_count++;
return 0;
free_cqdma:
- dma_free_coherent(dev->dev, CQ_SIZE(depth), (void *)nvmeq->cqes,
- nvmeq->cq_dma_addr);
+ dma_free_coherent(dev->dev, CQ_SIZE(nvmeq), (void *)nvmeq->cqes,
+ nvmeq->cq_dma_addr);
free_nvmeq:
return -ENOMEM;
}
@@ -1527,8 +1542,9 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
nvmeq->last_sq_tail = 0;
nvmeq->cq_head = 0;
nvmeq->cq_phase = 1;
+
nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
- memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth));
+ memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq));
nvme_dbbuf_init(dev, nvmeq, qid);
dev->online_queues++;
wmb(); /* ensure the first interrupt sees the initialization */
@@ -1546,9 +1562,16 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
* A queue's vector matches the queue identifier unless the controller
* has only one vector available.
*/
- if (!polled)
+ if (!polled) {
vector = dev->num_vecs == 1 ? 0 : qid;
- else
+
+ /*
+ * On Apple 2018 or later implementations, only vector 0 is accepted
+ * by the drive firmware
+ */
+ if (dev->ctrl.quirks & NVME_QUIRK_APPLE_2018)
+ vector = 0;
+ } else
set_bit(NVMEQ_POLLED, &nvmeq->flags);
result = adapter_alloc_cq(dev, qid, nvmeq, vector);
@@ -2949,6 +2972,8 @@ static const struct pci_device_id nvme_id_table[] = {
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
+ { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2005),
+ .driver_data = NVME_QUIRK_APPLE_2018},
{ 0, }
};
MODULE_DEVICE_TABLE(pci, nvme_id_table);
Powered by blists - more mailing lists