[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260118135440.1958279-27-den@valinux.co.jp>
Date: Sun, 18 Jan 2026 22:54:28 +0900
From: Koichiro Den <den@...inux.co.jp>
To: Frank.Li@....com,
dave.jiang@...el.com,
cassel@...nel.org,
mani@...nel.org,
kwilczynski@...nel.org,
kishon@...nel.org,
bhelgaas@...gle.com,
geert+renesas@...der.be,
robh@...nel.org,
vkoul@...nel.org,
jdmason@...zu.us,
allenbh@...il.com,
jingoohan1@...il.com,
lpieralisi@...nel.org
Cc: linux-pci@...r.kernel.org,
linux-doc@...r.kernel.org,
linux-kernel@...r.kernel.org,
linux-renesas-soc@...r.kernel.org,
devicetree@...r.kernel.org,
dmaengine@...r.kernel.org,
iommu@...ts.linux.dev,
ntb@...ts.linux.dev,
netdev@...r.kernel.org,
linux-kselftest@...r.kernel.org,
arnd@...db.de,
gregkh@...uxfoundation.org,
joro@...tes.org,
will@...nel.org,
robin.murphy@....com,
magnus.damm@...il.com,
krzk+dt@...nel.org,
conor+dt@...nel.org,
corbet@....net,
skhan@...uxfoundation.org,
andriy.shevchenko@...ux.intel.com,
jbrunet@...libre.com,
utkarsh02t@...il.com
Subject: [RFC PATCH v4 26/38] NTB: ntb_transport: Add remote embedded-DMA transport client
Introduce a new NTB transport client (ntb_client) that uses a PCI
endpoint embedded DMA engine to move data between the endpoint and the
host.
Unlike the existing cpu/dma memcpy-based transport, this transport
offloads the data plane to an embedded DMA engine located on the
endpoint and driven by the remote host. Control and queue management
remain on the peer-exposed memory window, while bulk data movement is
performed by the remote embedded DMA engine.
This transport requires a different memory window layout from the
traditional NTB transport. A key benefit of this client implementation
is that the memory window no longer needs to carry data buffers. This
makes the design less sensitive to limited memory window space and
allows scaling to multiple queue pairs.
The transport itself is generic and does not assume a specific vendor's
DMA implementation. Support for concrete embedded DMA engines is
provided via the ntb_edma backend registry. The initial backend
implementation is ntb_dw_edma, which integrates with the DesignWare eDMA
driver.
This separation allows additional embedded DMA backends to be added in
the future without changing the NTB transport core or client logic.
Signed-off-by: Koichiro Den <den@...inux.co.jp>
---
drivers/ntb/Kconfig | 13 +
drivers/ntb/Makefile | 1 +
drivers/ntb/ntb_transport_edma.c | 1110 ++++++++++++++++++++++++++++++
3 files changed, 1124 insertions(+)
create mode 100644 drivers/ntb/ntb_transport_edma.c
diff --git a/drivers/ntb/Kconfig b/drivers/ntb/Kconfig
index df16c755b4da..0dfb89ec290c 100644
--- a/drivers/ntb/Kconfig
+++ b/drivers/ntb/Kconfig
@@ -37,4 +37,17 @@ config NTB_TRANSPORT
If unsure, say N.
+config NTB_TRANSPORT_EDMA
+ tristate "NTB Transport Client on PCI EP embedded DMA"
+ depends on NTB_TRANSPORT
+ select NTB_EDMA
+ help
+ Enable a transport backend that uses a peer-exposed PCI embedded DMA
+ engine through a dedicated NTB memory window.
+
+ NOTE: You also need at least one eDMA backend driver enabled/loaded
+ (e.g. NTB_DW_EDMA) so the transport can find a matching backend.
+
+ If unsure, say N.
+
endif # NTB
diff --git a/drivers/ntb/Makefile b/drivers/ntb/Makefile
index 47e6b95ef7ce..7bb952a1cf8f 100644
--- a/drivers/ntb/Makefile
+++ b/drivers/ntb/Makefile
@@ -2,6 +2,7 @@
obj-$(CONFIG_NTB) += ntb.o hw/ test/
obj-$(CONFIG_NTB_TRANSPORT) += ntb_transport.o
obj-$(CONFIG_NTB_TRANSPORT) += ntb_transport_core.o
+obj-$(CONFIG_NTB_TRANSPORT_EDMA) += ntb_transport_edma.o
ntb-y := core.o
ntb-$(CONFIG_NTB_MSI) += msi.o
diff --git a/drivers/ntb/ntb_transport_edma.c b/drivers/ntb/ntb_transport_edma.c
new file mode 100644
index 000000000000..778143a15930
--- /dev/null
+++ b/drivers/ntb/ntb_transport_edma.c
@@ -0,0 +1,1110 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * NTB transport backend for remote embedded DMA (eDMA).
+ *
+ * The backend uses an endpoint-exposed embedded DMA engine via an NTB
+ * memory window. Hardware-specific details are provided by an ntb_edma
+ * backend driver.
+ */
+
+#include <linux/bug.h>
+#include <linux/compiler.h>
+#include <linux/debugfs.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/errno.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/module.h>
+#include <linux/ntb.h>
+#include <linux/ntb_transport.h>
+#include <linux/pci.h>
+#include <linux/pci-epc.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+
+#include "ntb_transport_internal.h"
+#include "hw/edma/backend.h"
+
+static unsigned long max_mw_size;
+module_param(max_mw_size, ulong, 0644);
+MODULE_PARM_DESC(max_mw_size, "Limit size of large memory windows");
+
+static unsigned char max_num_clients;
+module_param(max_num_clients, byte, 0644);
+MODULE_PARM_DESC(max_num_clients, "Maximum number of NTB transport clients");
+
+#define NTB_EDMA_RING_ORDER 7
+#define NTB_EDMA_RING_ENTRIES BIT(NTB_EDMA_RING_ORDER)
+#define NTB_EDMA_RING_MASK (NTB_EDMA_RING_ENTRIES - 1)
+
+#define NTB_EDMA_MAX_POLL 32
+
+/*
+ * Remote eDMA mode implementation
+ */
+struct ntb_queue_entry_edma {
+ dma_addr_t addr;
+ struct scatterlist sgl;
+};
+
+struct ntb_transport_ctx_edma {
+ remote_edma_mode_t remote_edma_mode;
+ struct device *dma_dev;
+ struct workqueue_struct *wq;
+ struct ntb_edma_chans chans;
+
+ const struct ntb_edma_backend *be;
+ void *be_priv;
+};
+
+struct ntb_transport_qp_edma {
+ struct ntb_transport_qp *qp;
+
+ /*
+ * Schedule peer notification from a sleepable context.
+ * ntb_peer_db_set() may sleep.
+ */
+ struct work_struct db_work;
+
+ u32 rx_prod;
+ u32 rx_cons;
+ u32 tx_cons;
+ u32 tx_issue;
+
+ spinlock_t rx_lock;
+ spinlock_t tx_lock;
+
+ struct work_struct rx_work;
+ struct work_struct tx_work;
+};
+
+struct ntb_edma_desc {
+ u32 len;
+ u32 flags;
+ u64 addr; /* DMA address */
+ u64 data;
+};
+
+struct ntb_edma_ring {
+ struct ntb_edma_desc desc[NTB_EDMA_RING_ENTRIES];
+ u32 head;
+ u32 tail;
+};
+
+static inline bool ntb_qp_edma_is_rc(struct ntb_transport_qp *qp)
+{
+ struct ntb_transport_ctx_edma *ctx = qp->transport->priv;
+
+ return ctx->remote_edma_mode == REMOTE_EDMA_RC;
+}
+
+static inline bool ntb_qp_edma_is_ep(struct ntb_transport_qp *qp)
+{
+ struct ntb_transport_ctx_edma *ctx = qp->transport->priv;
+
+ return ctx->remote_edma_mode == REMOTE_EDMA_EP;
+}
+
+static inline bool ntb_qp_edma_enabled(struct ntb_transport_qp *qp)
+{
+ return ntb_qp_edma_is_rc(qp) || ntb_qp_edma_is_ep(qp);
+}
+
+static inline unsigned int ntb_edma_ring_sel(struct ntb_transport_qp *qp,
+ unsigned int n)
+{
+ return n ^ !!ntb_qp_edma_is_ep(qp);
+}
+
+static inline struct ntb_edma_ring *
+ntb_edma_ring_local(struct ntb_transport_qp *qp, unsigned int n)
+{
+ unsigned int r = ntb_edma_ring_sel(qp, n);
+
+ return &((struct ntb_edma_ring *)qp->rx_buff)[r];
+}
+
+static inline struct ntb_edma_ring __iomem *
+ntb_edma_ring_remote(struct ntb_transport_qp *qp, unsigned int n)
+{
+ unsigned int r = ntb_edma_ring_sel(qp, n);
+
+ return &((struct ntb_edma_ring __iomem *)qp->tx_mw)[r];
+}
+
+static inline struct ntb_edma_desc *
+ntb_edma_desc_local(struct ntb_transport_qp *qp, unsigned int n, unsigned int i)
+{
+ return &ntb_edma_ring_local(qp, n)->desc[i];
+}
+
+static inline struct ntb_edma_desc __iomem *
+ntb_edma_desc_remote(struct ntb_transport_qp *qp, unsigned int n,
+ unsigned int i)
+{
+ return &ntb_edma_ring_remote(qp, n)->desc[i];
+}
+
+static inline u32 *ntb_edma_head_local(struct ntb_transport_qp *qp,
+ unsigned int n)
+{
+ return &ntb_edma_ring_local(qp, n)->head;
+}
+
+static inline u32 __iomem *ntb_edma_head_remote(struct ntb_transport_qp *qp,
+ unsigned int n)
+{
+ return &ntb_edma_ring_remote(qp, n)->head;
+}
+
+static inline u32 *ntb_edma_tail_local(struct ntb_transport_qp *qp,
+ unsigned int n)
+{
+ return &ntb_edma_ring_local(qp, n)->tail;
+}
+
+static inline u32 __iomem *ntb_edma_tail_remote(struct ntb_transport_qp *qp,
+ unsigned int n)
+{
+ return &ntb_edma_ring_remote(qp, n)->tail;
+}
+
+/* The 'i' must be generated by ntb_edma_ring_idx() */
+#define NTB_DESC_TX_O(qp, i) ntb_edma_desc_remote(qp, 0, i)
+#define NTB_DESC_TX_I(qp, i) ntb_edma_desc_local(qp, 0, i)
+#define NTB_DESC_RX_O(qp, i) ntb_edma_desc_remote(qp, 1, i)
+#define NTB_DESC_RX_I(qp, i) ntb_edma_desc_local(qp, 1, i)
+
+#define NTB_HEAD_TX_I(qp) ntb_edma_head_local(qp, 0)
+#define NTB_HEAD_RX_O(qp) ntb_edma_head_remote(qp, 1)
+
+#define NTB_TAIL_TX_O(qp) ntb_edma_tail_remote(qp, 0)
+#define NTB_TAIL_RX_I(qp) ntb_edma_tail_local(qp, 1)
+
+/* ntb_edma_ring helpers */
+static __always_inline u32 ntb_edma_ring_idx(u32 v)
+{
+ return v & NTB_EDMA_RING_MASK;
+}
+
+static __always_inline u32 ntb_edma_ring_used_entry(u32 head, u32 tail)
+{
+ if (head >= tail) {
+ WARN_ON_ONCE((head - tail) > (NTB_EDMA_RING_ENTRIES - 1));
+ return head - tail;
+ }
+
+ WARN_ON_ONCE((U32_MAX - tail + head + 1) > (NTB_EDMA_RING_ENTRIES - 1));
+ return U32_MAX - tail + head + 1;
+}
+
+static __always_inline u32 ntb_edma_ring_free_entry(u32 head, u32 tail)
+{
+ return NTB_EDMA_RING_ENTRIES - ntb_edma_ring_used_entry(head, tail) - 1;
+}
+
+static __always_inline bool ntb_edma_ring_full(u32 head, u32 tail)
+{
+ return ntb_edma_ring_free_entry(head, tail) == 0;
+}
+
+static void *ntb_transport_edma_entry_priv_alloc(void)
+{
+ return kzalloc(sizeof(struct ntb_queue_entry_edma), GFP_KERNEL);
+}
+
+static void ntb_transport_edma_entry_priv_free(void *priv)
+{
+ kfree(priv);
+}
+
+static unsigned int ntb_transport_edma_tx_free_entry(struct ntb_transport_qp *qp)
+{
+ struct ntb_transport_qp_edma *edma = qp->priv;
+ unsigned int head, tail;
+
+ scoped_guard(spinlock_irqsave, &edma->tx_lock) {
+ /* In this scope, only 'head' might proceed */
+ tail = READ_ONCE(edma->tx_issue);
+ head = READ_ONCE(*NTB_HEAD_TX_I(qp));
+ }
+ /*
+ * 'used' amount indicates how much the other end has refilled,
+ * which are available for us to use for TX.
+ */
+ return ntb_edma_ring_used_entry(head, tail);
+}
+
+static void ntb_transport_edma_debugfs_stats_show(struct seq_file *s,
+ struct ntb_transport_qp *qp)
+{
+ seq_printf(s, "rx_bytes - \t%llu\n", qp->rx_bytes);
+ seq_printf(s, "rx_pkts - \t%llu\n", qp->rx_pkts);
+ seq_printf(s, "rx_err_no_buf - %llu\n", qp->rx_err_no_buf);
+ seq_printf(s, "rx_buff - \t0x%p\n", qp->rx_buff);
+ seq_printf(s, "rx_max_entry - \t%u\n", qp->rx_max_entry);
+ seq_printf(s, "rx_alloc_entry - \t%u\n\n", qp->rx_alloc_entry);
+
+ seq_printf(s, "tx_bytes - \t%llu\n", qp->tx_bytes);
+ seq_printf(s, "tx_pkts - \t%llu\n", qp->tx_pkts);
+ seq_printf(s, "tx_ring_full - \t%llu\n", qp->tx_ring_full);
+ seq_printf(s, "tx_err_no_buf - %llu\n", qp->tx_err_no_buf);
+ seq_printf(s, "tx_mw - \t0x%p\n", qp->tx_mw);
+ seq_printf(s, "tx_max_entry - \t%u\n", qp->tx_max_entry);
+ seq_printf(s, "free tx - \t%u\n", ntb_transport_tx_free_entry(qp));
+ seq_putc(s, '\n');
+
+ seq_puts(s, "Using Remote eDMA - Yes\n");
+ seq_printf(s, "QP Link - \t%s\n", qp->link_is_up ? "Up" : "Down");
+}
+
+static void ntb_transport_edma_db_work(struct work_struct *work)
+{
+ struct ntb_transport_qp_edma *edma =
+ container_of(work, struct ntb_transport_qp_edma, db_work);
+ struct ntb_transport_qp *qp = edma->qp;
+
+ ntb_peer_db_set(qp->ndev, qp->qp_bit);
+}
+
+static void ntb_transport_edma_notify_peer(struct ntb_transport_qp_edma *edma)
+{
+ struct ntb_transport_qp *qp = edma->qp;
+ struct ntb_transport_ctx_edma *ctx = qp->transport->priv;
+
+ if (!ctx->be->ops->notify_peer(&ctx->chans, ctx->be_priv, qp->qp_num))
+ return;
+
+ /*
+ * Called from contexts that may be atomic. Since ntb_peer_db_set()
+ * may sleep, delegate the actual doorbell write to a workqueue.
+ */
+ queue_work(system_highpri_wq, &edma->db_work);
+}
+
+static void ntb_transport_edma_isr(void *data, int qp_num)
+{
+ struct ntb_transport_ctx *nt = data;
+ struct ntb_transport_qp_edma *edma;
+ struct ntb_transport_ctx_edma *ctx;
+ struct ntb_transport_qp *qp;
+
+ if (qp_num < 0 || qp_num >= nt->qp_count)
+ return;
+
+ qp = &nt->qp_vec[qp_num];
+ if (WARN_ON(!qp))
+ return;
+
+ ctx = (struct ntb_transport_ctx_edma *)qp->transport->priv;
+ edma = qp->priv;
+ if (!edma || !ctx)
+ return;
+
+ queue_work(ctx->wq, &edma->rx_work);
+ queue_work(ctx->wq, &edma->tx_work);
+}
+
+static int ntb_transport_edma_rc_init(struct ntb_transport_ctx *nt)
+{
+ struct ntb_transport_ctx_edma *ctx = nt->priv;
+ struct ntb_dev *ndev = nt->ndev;
+ struct pci_dev *pdev = ndev->pdev;
+ int peer_mw;
+ int rc;
+
+ if (ctx->remote_edma_mode != REMOTE_EDMA_UNKNOWN)
+ return 0;
+
+ peer_mw = ntb_peer_mw_count(ndev);
+ if (peer_mw <= 0)
+ return -ENODEV;
+
+ rc = ctx->be->ops->rc_connect(ndev, ctx->be_priv, peer_mw - 1, nt->qp_count);
+ if (rc) {
+ dev_err(&pdev->dev, "Failed to enable remote eDMA: %d\n", rc);
+ return rc;
+ }
+
+ rc = ctx->be->ops->tx_chans_init(ndev, ctx->be_priv, &ctx->chans, true);
+ if (rc) {
+ dev_err(&pdev->dev, "Failed to setup eDMA channels: %d\n", rc);
+ goto err_rc_disconnect;
+ }
+
+ ctx->remote_edma_mode = REMOTE_EDMA_RC;
+ return 0;
+
+err_rc_disconnect:
+ ctx->be->ops->rc_disconnect(ndev, ctx->be_priv);
+ return rc;
+}
+
+static void ntb_transport_edma_rc_deinit(struct ntb_transport_ctx *nt)
+{
+ struct ntb_transport_ctx_edma *ctx = nt->priv;
+ struct ntb_dev *ndev = nt->ndev;
+
+ if (ctx->remote_edma_mode != REMOTE_EDMA_RC)
+ return;
+
+ ctx->be->ops->tx_chans_deinit(&ctx->chans);
+ ctx->be->ops->rc_disconnect(ndev, ctx->be_priv);
+
+ ctx->remote_edma_mode = REMOTE_EDMA_UNKNOWN;
+}
+
+static int ntb_transport_edma_ep_init(struct ntb_transport_ctx *nt)
+{
+ struct ntb_transport_ctx_edma *ctx = nt->priv;
+ struct ntb_dev *ndev = nt->ndev;
+ struct pci_dev *pdev = ndev->pdev;
+ int peer_mw;
+ int rc;
+
+ if (ctx->remote_edma_mode != REMOTE_EDMA_UNKNOWN)
+ return 0;
+
+ /*
+ * This check assumes that the endpoint (pci-epf-vntb.c)
+ * ntb_dev_ops implements .get_private_data() while the host side
+ * (ntb_hw_epf.c) does not.
+ */
+ if (!ntb_get_private_data(ndev))
+ return 0;
+
+ peer_mw = ntb_peer_mw_count(ndev);
+ if (peer_mw <= 0)
+ return -ENODEV;
+
+ rc = ctx->be->ops->ep_publish(ndev, ctx->be_priv, peer_mw - 1, nt->qp_count,
+ ntb_transport_edma_isr, nt);
+ if (rc) {
+ dev_err(&pdev->dev,
+ "Failed to set up memory window for eDMA: %d\n", rc);
+ return rc;
+ }
+
+ rc = ctx->be->ops->tx_chans_init(ndev, ctx->be_priv, &ctx->chans, false);
+ if (rc) {
+ dev_err(&pdev->dev, "Failed to setup eDMA channels: %d\n", rc);
+ ctx->be->ops->ep_unpublish(ndev, ctx->be_priv);
+ return rc;
+ }
+
+ ctx->remote_edma_mode = REMOTE_EDMA_EP;
+ return 0;
+}
+
+static void ntb_transport_edma_ep_deinit(struct ntb_transport_ctx *nt)
+{
+ struct ntb_transport_ctx_edma *ctx = nt->priv;
+ struct ntb_dev *ndev = nt->ndev;
+
+ if (ctx->remote_edma_mode != REMOTE_EDMA_EP)
+ return;
+
+ ctx->be->ops->tx_chans_deinit(&ctx->chans);
+ ctx->be->ops->ep_unpublish(ndev, ctx->be_priv);
+
+ ctx->remote_edma_mode = REMOTE_EDMA_UNKNOWN;
+}
+
+static int ntb_transport_edma_setup_qp_mw(struct ntb_transport_ctx *nt,
+ unsigned int qp_num)
+{
+ struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
+ struct ntb_dev *ndev = nt->ndev;
+ struct ntb_queue_entry *entry;
+ struct ntb_transport_mw *mw;
+ unsigned int mw_num, mw_count, qp_count;
+ unsigned int qp_offset, rx_info_offset;
+ unsigned int mw_size, mw_size_per_qp;
+ unsigned int num_qps_mw;
+ size_t edma_total;
+ unsigned int i;
+ int node;
+
+ mw_count = nt->mw_count;
+ qp_count = nt->qp_count;
+
+ mw_num = QP_TO_MW(nt, qp_num);
+ mw = &nt->mw_vec[mw_num];
+
+ if (!mw->virt_addr)
+ return -ENOMEM;
+
+ if (mw_num < qp_count % mw_count)
+ num_qps_mw = qp_count / mw_count + 1;
+ else
+ num_qps_mw = qp_count / mw_count;
+
+ mw_size = min(nt->mw_vec[mw_num].phys_size, mw->xlat_size);
+ if (max_mw_size && mw_size > max_mw_size)
+ mw_size = max_mw_size;
+
+ mw_size_per_qp = round_down((unsigned int)mw_size / num_qps_mw, SZ_64);
+ qp_offset = mw_size_per_qp * (qp_num / mw_count);
+ rx_info_offset = mw_size_per_qp - sizeof(struct ntb_rx_info);
+
+ qp->tx_mw_size = mw_size_per_qp;
+ qp->tx_mw = nt->mw_vec[mw_num].vbase + qp_offset;
+ if (!qp->tx_mw)
+ return -EINVAL;
+ qp->tx_mw_phys = nt->mw_vec[mw_num].phys_addr + qp_offset;
+ if (!qp->tx_mw_phys)
+ return -EINVAL;
+ qp->rx_info = qp->tx_mw + rx_info_offset;
+ qp->rx_buff = mw->virt_addr + qp_offset;
+ qp->remote_rx_info = qp->rx_buff + rx_info_offset;
+
+ /* Due to housekeeping, there must be at least 2 buffs */
+ qp->tx_max_frame = min(nt->transport_mtu, mw_size_per_qp / 2);
+ qp->rx_max_frame = min(nt->transport_mtu, mw_size_per_qp / 2);
+
+ /* In eDMA mode, decouple from MW sizing and force ring-sized entries */
+ edma_total = 2 * sizeof(struct ntb_edma_ring);
+ if (rx_info_offset < edma_total) {
+ dev_err(&ndev->dev, "Ring space requires %zuB (>=%uB)\n",
+ edma_total, rx_info_offset);
+ return -EINVAL;
+ }
+ qp->tx_max_entry = NTB_EDMA_RING_ENTRIES;
+ qp->rx_max_entry = NTB_EDMA_RING_ENTRIES;
+
+ /*
+ * Checking to see if we have more entries than the default.
+ * We should add additional entries if that is the case so we
+ * can be in sync with the transport frames.
+ */
+ node = dev_to_node(&ndev->dev);
+ for (i = qp->rx_alloc_entry; i < qp->rx_max_entry; i++) {
+ entry = ntb_queue_entry_alloc(nt, qp, node);
+ if (!entry)
+ return -ENOMEM;
+
+ entry->qp = qp;
+ ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
+ &qp->rx_free_q);
+ qp->rx_alloc_entry++;
+ }
+
+ memset(qp->rx_buff, 0, edma_total);
+
+ qp->rx_pkts = 0;
+ qp->tx_pkts = 0;
+
+ return 0;
+}
+
+static int ntb_transport_edma_rx_complete(struct ntb_transport_qp *qp)
+{
+ struct device *dma_dev = ntb_get_dma_dev(qp->ndev);
+ struct ntb_transport_qp_edma *edma = qp->priv;
+ struct ntb_queue_entry_edma *e;
+ struct ntb_queue_entry *entry;
+ struct ntb_edma_desc *in;
+ unsigned int len;
+ bool link_down;
+ u32 idx;
+
+ if (ntb_edma_ring_used_entry(READ_ONCE(*NTB_TAIL_RX_I(qp)),
+ edma->rx_cons) == 0)
+ return 0;
+
+ idx = ntb_edma_ring_idx(edma->rx_cons);
+ in = NTB_DESC_RX_I(qp, idx);
+ if (!(in->flags & DESC_DONE_FLAG))
+ return 0;
+
+ link_down = in->flags & LINK_DOWN_FLAG;
+ in->flags = 0;
+ len = in->len; /* might be smaller than entry->len */
+
+ entry = (struct ntb_queue_entry *)(uintptr_t)in->data;
+ if (WARN_ON(!entry))
+ return 0;
+
+ e = entry->priv;
+ dma_unmap_single(dma_dev, e->addr, entry->len, DMA_FROM_DEVICE);
+
+ if (link_down) {
+ ntb_qp_link_down(qp);
+ edma->rx_cons++;
+ ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q);
+ return 1;
+ }
+
+ qp->rx_bytes += len;
+ qp->rx_pkts++;
+ edma->rx_cons++;
+
+ if (qp->rx_handler && qp->client_ready)
+ qp->rx_handler(qp, qp->cb_data, entry->cb_data, len);
+
+ ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q);
+ return 1;
+}
+
+static void ntb_transport_edma_rx_work(struct work_struct *work)
+{
+ struct ntb_transport_qp_edma *edma =
+ container_of(work, struct ntb_transport_qp_edma, rx_work);
+ struct ntb_transport_qp *qp = edma->qp;
+ struct ntb_transport_ctx_edma *ctx = qp->transport->priv;
+ unsigned int i;
+
+ for (i = 0; i < NTB_EDMA_MAX_POLL; i++) {
+ if (!ntb_transport_edma_rx_complete(qp))
+ break;
+ }
+
+ if (ntb_transport_edma_rx_complete(qp))
+ queue_work(ctx->wq, &edma->rx_work);
+}
+
+static void ntb_transport_edma_tx_work(struct work_struct *work)
+{
+ struct ntb_transport_qp_edma *edma =
+ container_of(work, struct ntb_transport_qp_edma, tx_work);
+ struct ntb_transport_qp *qp = edma->qp;
+ struct ntb_edma_desc *in, __iomem *out;
+ struct ntb_queue_entry *entry;
+ void *cb_data;
+ int len;
+ u32 idx;
+
+ while (ntb_edma_ring_used_entry(READ_ONCE(edma->tx_issue),
+ edma->tx_cons) != 0) {
+ /* Paired with smp_wmb() in ntb_transport_edma_tx_enqueue_inner() */
+ smp_rmb();
+
+ idx = ntb_edma_ring_idx(edma->tx_cons);
+ in = NTB_DESC_TX_I(qp, idx);
+ entry = (struct ntb_queue_entry *)(uintptr_t)in->data;
+ if (!entry || !(entry->flags & DESC_DONE_FLAG))
+ break;
+
+ in->data = 0;
+
+ cb_data = entry->cb_data;
+ len = entry->len;
+
+ out = NTB_DESC_TX_O(qp, idx);
+
+ WRITE_ONCE(edma->tx_cons, edma->tx_cons + 1);
+
+ iowrite32(entry->flags, &out->flags);
+ iowrite32(edma->tx_cons, NTB_TAIL_TX_O(qp));
+
+ ntb_transport_edma_notify_peer(edma);
+
+ ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry,
+ &qp->tx_free_q);
+
+ if (qp->tx_handler)
+ qp->tx_handler(qp, qp->cb_data, cb_data, len);
+
+ if (len < 0)
+ continue;
+
+ /* stat updates */
+ qp->tx_bytes += len;
+ qp->tx_pkts++;
+ }
+}
+
+static void ntb_transport_edma_tx_cb(void *data,
+ const struct dmaengine_result *res)
+{
+ struct ntb_queue_entry *entry = data;
+ struct ntb_transport_qp *qp = entry->qp;
+ struct ntb_queue_entry_edma *e = entry->priv;
+ struct ntb_transport_ctx *nt = qp->transport;
+ struct device *dma_dev = ntb_get_dma_dev(qp->ndev);
+ enum dmaengine_tx_result dma_err = res->result;
+ struct ntb_transport_ctx_edma *ctx = nt->priv;
+ struct ntb_transport_qp_edma *edma = qp->priv;
+
+ switch (dma_err) {
+ case DMA_TRANS_READ_FAILED:
+ case DMA_TRANS_WRITE_FAILED:
+ case DMA_TRANS_ABORTED:
+ entry->errors++;
+ entry->len = -EIO;
+ break;
+ case DMA_TRANS_NOERROR:
+ default:
+ break;
+ }
+ dma_unmap_sg(dma_dev, &e->sgl, 1, DMA_TO_DEVICE);
+ sg_dma_address(&e->sgl) = 0;
+
+ entry->flags |= DESC_DONE_FLAG;
+
+ queue_work(ctx->wq, &edma->tx_work);
+}
+
+static int ntb_transport_edma_submit(struct device *d, struct dma_chan *chan,
+ size_t len, void *rc_src, dma_addr_t dst,
+ struct ntb_queue_entry *entry)
+{
+ struct ntb_queue_entry_edma *e = entry->priv;
+ struct dma_async_tx_descriptor *txd;
+ struct scatterlist *sgl = &e->sgl;
+ struct dma_slave_config cfg;
+ dma_cookie_t cookie;
+ int nents, rc;
+
+ if (!d)
+ return -ENODEV;
+
+ if (!chan)
+ return -ENXIO;
+
+ if (WARN_ON(!rc_src || !dst))
+ return -EINVAL;
+
+ if (WARN_ON(sg_dma_address(sgl)))
+ return -EINVAL;
+
+ sg_init_one(sgl, rc_src, len);
+ nents = dma_map_sg(d, sgl, 1, DMA_TO_DEVICE);
+ if (nents <= 0)
+ return -EIO;
+
+ memset(&cfg, 0, sizeof(cfg));
+ cfg.dst_addr = dst;
+ cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ cfg.direction = DMA_MEM_TO_DEV;
+
+ txd = dmaengine_prep_config_sg(chan, sgl, 1, DMA_MEM_TO_DEV,
+ DMA_CTRL_ACK | DMA_PREP_INTERRUPT, &cfg);
+ if (!txd) {
+ rc = -EIO;
+ goto out_unmap;
+ }
+
+ txd->callback_result = ntb_transport_edma_tx_cb;
+ txd->callback_param = entry;
+
+ cookie = dmaengine_submit(txd);
+ if (dma_submit_error(cookie)) {
+ rc = -EIO;
+ goto out_unmap;
+ }
+ dma_async_issue_pending(chan);
+ return 0;
+out_unmap:
+ dma_unmap_sg(d, sgl, 1, DMA_TO_DEVICE);
+ return rc;
+}
+
+static struct dma_chan *ntb_transport_edma_pick_chan(struct ntb_edma_chans *chans,
+ unsigned int idx)
+{
+ return chans->chan[idx % chans->num_chans];
+}
+
+static int ntb_transport_edma_tx_enqueue_inner(struct ntb_transport_qp *qp,
+ struct ntb_queue_entry *entry)
+{
+ struct device *dma_dev = ntb_get_dma_dev(qp->ndev);
+ struct ntb_transport_qp_edma *edma = qp->priv;
+ struct ntb_transport_ctx *nt = qp->transport;
+ struct ntb_edma_desc *in, __iomem *out;
+ struct ntb_transport_ctx_edma *ctx = nt->priv;
+ unsigned int len = entry->len;
+ struct dma_chan *chan;
+ u32 issue, idx, head;
+ dma_addr_t dst;
+ int rc;
+
+ WARN_ON_ONCE(entry->flags & DESC_DONE_FLAG);
+
+ scoped_guard(spinlock_irqsave, &edma->tx_lock) {
+ head = READ_ONCE(*NTB_HEAD_TX_I(qp));
+ issue = edma->tx_issue;
+ if (ntb_edma_ring_used_entry(head, issue) == 0) {
+ qp->tx_ring_full++;
+ return -ENOSPC;
+ }
+
+ /*
+ * ntb_transport_edma_tx_work() checks entry->flags
+ * so it needs to be set before tx_issue++.
+ */
+ idx = ntb_edma_ring_idx(issue);
+ in = NTB_DESC_TX_I(qp, idx);
+ in->data = (uintptr_t)entry;
+
+ /* Make in->data visible before tx_issue++ */
+ smp_wmb();
+
+ WRITE_ONCE(edma->tx_issue, edma->tx_issue + 1);
+ }
+
+ /* Publish the final transfer length to the other end */
+ out = NTB_DESC_TX_O(qp, idx);
+ iowrite32(len, &out->len);
+ ioread32(&out->len);
+
+ if (unlikely(!len)) {
+ entry->flags |= DESC_DONE_FLAG;
+ queue_work(ctx->wq, &edma->tx_work);
+ return 0;
+ }
+
+ /* Paired with dma_wmb() in ntb_transport_edma_rx_enqueue_inner() */
+ dma_rmb();
+
+ /* kick remote eDMA read transfer */
+ dst = (dma_addr_t)in->addr;
+ chan = ntb_transport_edma_pick_chan(&ctx->chans, qp->qp_num);
+ rc = ntb_transport_edma_submit(dma_dev, chan, len, entry->buf, dst,
+ entry);
+ if (rc) {
+ entry->errors++;
+ entry->len = -EIO;
+ entry->flags |= DESC_DONE_FLAG;
+ queue_work(ctx->wq, &edma->tx_work);
+ }
+ return 0;
+}
+
+static int ntb_transport_edma_tx_enqueue(struct ntb_transport_qp *qp,
+ struct ntb_queue_entry *entry,
+ void *cb, void *data, unsigned int len,
+ unsigned int flags)
+{
+ struct ntb_queue_entry_edma *e = entry->priv;
+ struct device *dma_dev;
+
+ if (e->addr) {
+ /* Deferred unmap */
+ dma_dev = ntb_get_dma_dev(qp->ndev);
+ dma_unmap_single(dma_dev, e->addr, entry->len,
+ DMA_TO_DEVICE);
+ }
+
+ entry->cb_data = cb;
+ entry->buf = data;
+ entry->len = len;
+ entry->flags = flags;
+ entry->errors = 0;
+
+ e->addr = 0;
+
+ WARN_ON_ONCE(!ntb_qp_edma_enabled(qp));
+
+ return ntb_transport_edma_tx_enqueue_inner(qp, entry);
+}
+
+static int ntb_transport_edma_rx_enqueue_inner(struct ntb_transport_qp *qp,
+ struct ntb_queue_entry *entry)
+{
+ struct device *dma_dev = ntb_get_dma_dev(qp->ndev);
+ struct ntb_transport_qp_edma *edma = qp->priv;
+ struct ntb_queue_entry_edma *e = entry->priv;
+ struct ntb_edma_desc *in, __iomem *out;
+ unsigned int len = entry->len;
+ void *data = entry->buf;
+ dma_addr_t dst;
+ u32 idx;
+ int rc;
+
+ dst = dma_map_single(dma_dev, data, len, DMA_FROM_DEVICE);
+ rc = dma_mapping_error(dma_dev, dst);
+ if (rc)
+ return rc;
+
+ guard(spinlock_bh)(&edma->rx_lock);
+
+ if (ntb_edma_ring_full(READ_ONCE(edma->rx_prod),
+ READ_ONCE(edma->rx_cons))) {
+ rc = -ENOSPC;
+ goto out_unmap;
+ }
+
+ idx = ntb_edma_ring_idx(edma->rx_prod);
+ in = NTB_DESC_RX_I(qp, idx);
+ out = NTB_DESC_RX_O(qp, idx);
+
+ iowrite32(len, &out->len);
+ iowrite64(dst, &out->addr);
+
+ WARN_ON(in->flags & DESC_DONE_FLAG);
+ in->data = (uintptr_t)entry;
+ e->addr = dst;
+
+ /* Ensure len/addr are visible before the head update */
+ dma_wmb();
+
+ WRITE_ONCE(edma->rx_prod, edma->rx_prod + 1);
+ iowrite32(edma->rx_prod, NTB_HEAD_RX_O(qp));
+
+ return 0;
+out_unmap:
+ dma_unmap_single(dma_dev, dst, len, DMA_FROM_DEVICE);
+ return rc;
+}
+
+static int ntb_transport_edma_rx_enqueue(struct ntb_transport_qp *qp,
+ struct ntb_queue_entry *entry)
+{
+ int rc;
+
+ rc = ntb_transport_edma_rx_enqueue_inner(qp, entry);
+ if (rc) {
+ ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
+ &qp->rx_free_q);
+ return rc;
+ }
+
+ if (qp->active)
+ tasklet_schedule(&qp->rxc_db_work);
+
+ return 0;
+}
+
+static void ntb_transport_edma_rx_poll(struct ntb_transport_qp *qp)
+{
+ struct ntb_transport_ctx *nt = qp->transport;
+ struct ntb_transport_ctx_edma *ctx = nt->priv;
+ struct ntb_transport_qp_edma *edma = qp->priv;
+
+ queue_work(ctx->wq, &edma->rx_work);
+ queue_work(ctx->wq, &edma->tx_work);
+}
+
+static int ntb_transport_edma_qp_init(struct ntb_transport_ctx *nt,
+ unsigned int qp_num)
+{
+ struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
+ struct ntb_transport_qp_edma *edma;
+ struct ntb_dev *ndev = nt->ndev;
+ int node;
+
+ node = dev_to_node(&ndev->dev);
+
+ qp->priv = kzalloc_node(sizeof(*edma), GFP_KERNEL, node);
+ if (!qp->priv)
+ return -ENOMEM;
+
+ edma = (struct ntb_transport_qp_edma *)qp->priv;
+ edma->qp = qp;
+ edma->rx_prod = 0;
+ edma->rx_cons = 0;
+ edma->tx_cons = 0;
+ edma->tx_issue = 0;
+
+ spin_lock_init(&edma->rx_lock);
+ spin_lock_init(&edma->tx_lock);
+
+ INIT_WORK(&edma->db_work, ntb_transport_edma_db_work);
+ INIT_WORK(&edma->rx_work, ntb_transport_edma_rx_work);
+ INIT_WORK(&edma->tx_work, ntb_transport_edma_tx_work);
+
+ return 0;
+}
+
+static void ntb_transport_edma_qp_free(struct ntb_transport_qp *qp)
+{
+ struct ntb_transport_qp_edma *edma = qp->priv;
+
+ disable_work_sync(&edma->db_work);
+ disable_work_sync(&edma->rx_work);
+ disable_work_sync(&edma->tx_work);
+
+ kfree(qp->priv);
+ qp->priv = NULL;
+}
+
+static int ntb_transport_edma_link_up_pre(struct ntb_transport_ctx *nt)
+{
+ struct ntb_dev *ndev = nt->ndev;
+ struct pci_dev *pdev = ndev->pdev;
+ int rc;
+
+ rc = ntb_transport_edma_ep_init(nt);
+ if (rc)
+ dev_err(&pdev->dev, "Failed to init EP: %d\n", rc);
+
+ return rc;
+}
+
+static int ntb_transport_edma_link_up_post(struct ntb_transport_ctx *nt)
+{
+ struct ntb_dev *ndev = nt->ndev;
+ struct pci_dev *pdev = ndev->pdev;
+ int rc;
+
+ rc = ntb_transport_edma_rc_init(nt);
+ if (rc)
+ dev_err(&pdev->dev, "Failed to init RC: %d\n", rc);
+
+ return rc;
+}
+
+static void ntb_transport_edma_link_down(struct ntb_transport_ctx *nt)
+{
+ struct ntb_transport_ctx_edma *ctx = nt->priv;
+
+ WARN_ON_ONCE(!ctx);
+ switch (ctx->remote_edma_mode) {
+ case REMOTE_EDMA_EP:
+ ntb_transport_edma_ep_deinit(nt);
+ break;
+ case REMOTE_EDMA_RC:
+ ntb_transport_edma_rc_deinit(nt);
+ break;
+ default:
+ }
+}
+
+static void ntb_transport_edma_disable(struct ntb_transport_ctx *nt)
+{
+ struct ntb_transport_ctx_edma *ctx = nt->priv;
+ struct ntb_dev *ndev = nt->ndev;
+
+ if (!ctx)
+ return;
+
+ if (ctx->wq)
+ destroy_workqueue(ctx->wq);
+ if (ctx->be_priv)
+ ctx->be->ops->free(ndev, &ctx->be_priv);
+ if (ctx->be)
+ ntb_edma_backend_put(ctx->be);
+
+ kfree(ctx);
+ nt->priv = NULL;
+}
+
+static int ntb_transport_edma_enable(struct ntb_transport_ctx *nt,
+ unsigned int *mw_count)
+{
+ struct ntb_transport_ctx_edma *ctx;
+ struct ntb_dev *ndev = nt->ndev;
+ int node;
+ int ret;
+
+ node = dev_to_node(&ndev->dev);
+ ctx = kzalloc_node(sizeof(*ctx), GFP_KERNEL, node);
+ if (!ctx)
+ return -ENOMEM;
+
+ nt->priv = ctx;
+ ctx->be = ntb_edma_backend_get(ndev);
+ if (!ctx->be) {
+ dev_err(&ndev->dev, "No suitable eDMA backend found\n");
+ ret = -ENODEV;
+ goto err;
+ }
+ dev_info(&ndev->dev, "Selected eDMA backend: %s\n", ctx->be->name);
+
+ ret = ctx->be->ops->alloc(ndev, &ctx->be_priv);
+ if (ret)
+ goto err;
+
+ /*
+ * We need at least one MW for the transport plus one MW reserved
+ * for the remote eDMA window (see ntb_edma_setup_mws/peer).
+ */
+ if (*mw_count <= 1) {
+ dev_err(&ndev->dev,
+ "remote eDMA requires at least two MWS (have %u)\n",
+ *mw_count);
+ kfree(ctx->be_priv);
+ ret = -ENODEV;
+ goto err;
+ }
+
+ ctx->wq = alloc_workqueue("ntb-edma-wq", WQ_UNBOUND | WQ_SYSFS, 0);
+ if (!ctx->wq) {
+ kfree(ctx->be_priv);
+ ntb_transport_edma_disable(nt);
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ /* Reserve the last peer MW exclusively for the eDMA window. */
+ *mw_count -= 1;
+
+ return 0;
+err:
+ ntb_transport_edma_disable(nt);
+ return ret;
+}
+
+static const struct ntb_transport_backend_ops edma_transport_ops = {
+ .enable = ntb_transport_edma_enable,
+ .disable = ntb_transport_edma_disable,
+ .qp_init = ntb_transport_edma_qp_init,
+ .qp_free = ntb_transport_edma_qp_free,
+ .link_up_pre = ntb_transport_edma_link_up_pre,
+ .link_up_post = ntb_transport_edma_link_up_post,
+ .link_down = ntb_transport_edma_link_down,
+ .setup_qp_mw = ntb_transport_edma_setup_qp_mw,
+ .entry_priv_alloc = ntb_transport_edma_entry_priv_alloc,
+ .entry_priv_free = ntb_transport_edma_entry_priv_free,
+ .tx_free_entry = ntb_transport_edma_tx_free_entry,
+ .tx_enqueue = ntb_transport_edma_tx_enqueue,
+ .rx_enqueue = ntb_transport_edma_rx_enqueue,
+ .rx_poll = ntb_transport_edma_rx_poll,
+ .debugfs_stats_show = ntb_transport_edma_debugfs_stats_show,
+};
+
+static struct ntb_transport_backend ntb_edma_transport_backend = {
+ .name = "edma",
+ .ops = &edma_transport_ops,
+ .owner = THIS_MODULE,
+};
+
+static int ntb_transport_edma_client_probe(struct ntb_client *self,
+ struct ntb_dev *ndev)
+{
+ return ntb_transport_attach(ndev, "edma", false, max_mw_size, 0xffff,
+ max_num_clients, 0, false,
+ NTB_EDMA_RING_ENTRIES);
+}
+
+static void ntb_transport_edma_client_remove(struct ntb_client *self,
+ struct ntb_dev *ndev)
+{
+ ntb_transport_detach(ndev);
+}
+
+static struct ntb_client ntb_transport_edma_client = {
+ .ops = {
+ .probe = ntb_transport_edma_client_probe,
+ .remove = ntb_transport_edma_client_remove,
+ },
+};
+
+static int __init ntb_transport_edma_init(void)
+{
+ int rc;
+
+ rc = ntb_transport_backend_register(&ntb_edma_transport_backend);
+ if (rc)
+ return rc;
+
+ rc = ntb_register_client(&ntb_transport_edma_client);
+ if (rc)
+ ntb_transport_backend_unregister(&ntb_edma_transport_backend);
+
+ return rc;
+}
+module_init(ntb_transport_edma_init);
+
+static void ntb_transport_edma_exit(void)
+{
+ ntb_unregister_client(&ntb_transport_edma_client);
+ ntb_transport_backend_unregister(&ntb_edma_transport_backend);
+}
+module_exit(ntb_transport_edma_exit);
+
+MODULE_DESCRIPTION("NTB transport backend for remote PCI embedded DMA");
+MODULE_LICENSE("Dual BSD/GPL");
--
2.51.0
Powered by blists - more mailing lists