[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220321104209.273535-7-simon.horman@corigine.com>
Date: Mon, 21 Mar 2022 11:42:05 +0100
From: Simon Horman <simon.horman@...igine.com>
To: David Miller <davem@...emloft.net>,
Jakub Kicinski <kuba@...nel.org>
Cc: Yinjun Zhang <yinjun.zhang@...igine.com>, netdev@...r.kernel.org,
oss-drivers@...igine.com
Subject: [PATCH net-next v2 06/10] nfp: use TX ring pointer write back
From: Jakub Kicinski <jakub.kicinski@...ronome.com>
Newer versions of the PCIe microcode support writing back the
position of the TX pointer back into host memory. This speeds
up TX completions, because we avoid a read from device memory
(replacing PCIe read with DMA coherent read).
Signed-off-by: Jakub Kicinski <jakub.kicinski@...ronome.com>
Signed-off-by: Fei Qin <fei.qin@...igine.com>
Signed-off-by: Simon Horman <simon.horman@...igine.com>
---
drivers/net/ethernet/netronome/nfp/nfd3/dp.c | 5 ++--
drivers/net/ethernet/netronome/nfp/nfp_net.h | 7 +++++
.../ethernet/netronome/nfp/nfp_net_common.c | 9 +++++-
.../ethernet/netronome/nfp/nfp_net_debugfs.c | 5 +++-
.../net/ethernet/netronome/nfp/nfp_net_dp.c | 29 +++++++++++++++++--
.../net/ethernet/netronome/nfp/nfp_net_dp.h | 8 +++++
6 files changed, 56 insertions(+), 7 deletions(-)
diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c
index 619f4d09e4e0..7db56abaa582 100644
--- a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c
+++ b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c
@@ -392,7 +392,7 @@ void nfp_nfd3_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget)
return;
/* Work out how many descriptors have been transmitted */
- qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q);
+ qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp);
if (qcp_rd_p == tx_ring->qcp_rd_p)
return;
@@ -467,13 +467,14 @@ void nfp_nfd3_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget)
static bool nfp_nfd3_xdp_complete(struct nfp_net_tx_ring *tx_ring)
{
struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
+ struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
u32 done_pkts = 0, done_bytes = 0;
bool done_all;
int idx, todo;
u32 qcp_rd_p;
/* Work out how many descriptors have been transmitted */
- qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q);
+ qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp);
if (qcp_rd_p == tx_ring->qcp_rd_p)
return true;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 4e288b8f3510..3c386972f69a 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -126,6 +126,7 @@ struct nfp_nfd3_tx_buf;
* @r_vec: Back pointer to ring vector structure
* @idx: Ring index from Linux's perspective
* @qcp_q: Pointer to base of the QCP TX queue
+ * @txrwb: TX pointer write back area
* @cnt: Size of the queue in number of descriptors
* @wr_p: TX ring write pointer (free running)
* @rd_p: TX ring read pointer (free running)
@@ -145,6 +146,7 @@ struct nfp_net_tx_ring {
u32 idx;
u8 __iomem *qcp_q;
+ u64 *txrwb;
u32 cnt;
u32 wr_p;
@@ -444,6 +446,8 @@ struct nfp_stat_pair {
* @ctrl_bar: Pointer to mapped control BAR
*
* @ops: Callbacks and parameters for this vNIC's NFD version
+ * @txrwb: TX pointer write back area (indexed by queue id)
+ * @txrwb_dma: TX pointer write back area DMA address
* @txd_cnt: Size of the TX ring in number of min size packets
* @rxd_cnt: Size of the RX ring in number of min size packets
* @num_r_vecs: Number of used ring vectors
@@ -480,6 +484,9 @@ struct nfp_net_dp {
const struct nfp_dp_ops *ops;
+ u64 *txrwb;
+ dma_addr_t txrwb_dma;
+
unsigned int txd_cnt;
unsigned int rxd_cnt;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index dd234f5228f1..5cac5563028c 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1427,6 +1427,8 @@ struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn)
new->rx_rings = NULL;
new->num_r_vecs = 0;
new->num_stack_tx_rings = 0;
+ new->txrwb = NULL;
+ new->txrwb_dma = 0;
return new;
}
@@ -1963,7 +1965,7 @@ void nfp_net_info(struct nfp_net *nn)
nn->fw_ver.resv, nn->fw_ver.class,
nn->fw_ver.major, nn->fw_ver.minor,
nn->max_mtu);
- nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+ nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
nn->cap,
nn->cap & NFP_NET_CFG_CTRL_PROMISC ? "PROMISC " : "",
nn->cap & NFP_NET_CFG_CTRL_L2BC ? "L2BCFILT " : "",
@@ -1981,6 +1983,7 @@ void nfp_net_info(struct nfp_net *nn)
nn->cap & NFP_NET_CFG_CTRL_CTAG_FILTER ? "CTAG_FILTER " : "",
nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "",
nn->cap & NFP_NET_CFG_CTRL_IRQMOD ? "IRQMOD " : "",
+ nn->cap & NFP_NET_CFG_CTRL_TXRWB ? "TXRWB " : "",
nn->cap & NFP_NET_CFG_CTRL_VXLAN ? "VXLAN " : "",
nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : "",
nn->cap & NFP_NET_CFG_CTRL_CSUM_COMPLETE ?
@@ -2352,6 +2355,10 @@ int nfp_net_init(struct nfp_net *nn)
nn->dp.ctrl |= NFP_NET_CFG_CTRL_IRQMOD;
}
+ /* Enable TX pointer writeback, if supported */
+ if (nn->cap & NFP_NET_CFG_CTRL_TXRWB)
+ nn->dp.ctrl |= NFP_NET_CFG_CTRL_TXRWB;
+
/* Stash the re-configuration queue away. First odd queue in TX Bar */
nn->qcp_cfg = nn->tx_bar + NFP_QCP_QUEUE_ADDR_SZ;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
index 791203d07ac7..d8b735ccf899 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
@@ -99,11 +99,14 @@ static int nfp_tx_q_show(struct seq_file *file, void *data)
d_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q);
d_wr_p = nfp_qcp_wr_ptr_read(tx_ring->qcp_q);
- seq_printf(file, "TX[%02d,%02d%s]: cnt=%u dma=%pad host=%p H_RD=%u H_WR=%u D_RD=%u D_WR=%u\n",
+ seq_printf(file, "TX[%02d,%02d%s]: cnt=%u dma=%pad host=%p H_RD=%u H_WR=%u D_RD=%u D_WR=%u",
tx_ring->idx, tx_ring->qcidx,
tx_ring == r_vec->tx_ring ? "" : "xdp",
tx_ring->cnt, &tx_ring->dma, tx_ring->txds,
tx_ring->rd_p, tx_ring->wr_p, d_rd_p, d_wr_p);
+ if (tx_ring->txrwb)
+ seq_printf(file, " TXRWB=%llu", *tx_ring->txrwb);
+ seq_putc(file, '\n');
nfp_net_debugfs_print_tx_descs(file, &nn->dp, r_vec, tx_ring,
d_rd_p, d_wr_p);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_dp.c b/drivers/net/ethernet/netronome/nfp/nfp_net_dp.c
index 431bd2c13221..34dd94811df3 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_dp.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_dp.c
@@ -44,12 +44,13 @@ void *nfp_net_rx_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr)
/**
* nfp_net_tx_ring_init() - Fill in the boilerplate for a TX ring
* @tx_ring: TX ring structure
+ * @dp: NFP Net data path struct
* @r_vec: IRQ vector servicing this ring
* @idx: Ring index
* @is_xdp: Is this an XDP TX ring?
*/
static void
-nfp_net_tx_ring_init(struct nfp_net_tx_ring *tx_ring,
+nfp_net_tx_ring_init(struct nfp_net_tx_ring *tx_ring, struct nfp_net_dp *dp,
struct nfp_net_r_vector *r_vec, unsigned int idx,
bool is_xdp)
{
@@ -61,6 +62,7 @@ nfp_net_tx_ring_init(struct nfp_net_tx_ring *tx_ring,
u64_stats_init(&tx_ring->r_vec->tx_sync);
tx_ring->qcidx = tx_ring->idx * nn->stride_tx;
+ tx_ring->txrwb = dp->txrwb ? &dp->txrwb[idx] : NULL;
tx_ring->qcp_q = nn->tx_bar + NFP_QCP_QUEUE_OFF(tx_ring->qcidx);
}
@@ -187,14 +189,22 @@ int nfp_net_tx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp)
if (!dp->tx_rings)
return -ENOMEM;
+ if (dp->ctrl & NFP_NET_CFG_CTRL_TXRWB) {
+ dp->txrwb = dma_alloc_coherent(dp->dev,
+ dp->num_tx_rings * sizeof(u64),
+ &dp->txrwb_dma, GFP_KERNEL);
+ if (!dp->txrwb)
+ goto err_free_rings;
+ }
+
for (r = 0; r < dp->num_tx_rings; r++) {
int bias = 0;
if (r >= dp->num_stack_tx_rings)
bias = dp->num_stack_tx_rings;
- nfp_net_tx_ring_init(&dp->tx_rings[r], &nn->r_vecs[r - bias],
- r, bias);
+ nfp_net_tx_ring_init(&dp->tx_rings[r], dp,
+ &nn->r_vecs[r - bias], r, bias);
if (nfp_net_tx_ring_alloc(dp, &dp->tx_rings[r]))
goto err_free_prev;
@@ -211,6 +221,10 @@ int nfp_net_tx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp)
err_free_ring:
nfp_net_tx_ring_free(dp, &dp->tx_rings[r]);
}
+ if (dp->txrwb)
+ dma_free_coherent(dp->dev, dp->num_tx_rings * sizeof(u64),
+ dp->txrwb, dp->txrwb_dma);
+err_free_rings:
kfree(dp->tx_rings);
return -ENOMEM;
}
@@ -224,6 +238,9 @@ void nfp_net_tx_rings_free(struct nfp_net_dp *dp)
nfp_net_tx_ring_free(dp, &dp->tx_rings[r]);
}
+ if (dp->txrwb)
+ dma_free_coherent(dp->dev, dp->num_tx_rings * sizeof(u64),
+ dp->txrwb, dp->txrwb_dma);
kfree(dp->tx_rings);
}
@@ -377,6 +394,11 @@ nfp_net_tx_ring_hw_cfg_write(struct nfp_net *nn,
struct nfp_net_tx_ring *tx_ring, unsigned int idx)
{
nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), tx_ring->dma);
+ if (tx_ring->txrwb) {
+ *tx_ring->txrwb = 0;
+ nn_writeq(nn, NFP_NET_CFG_TXR_WB_ADDR(idx),
+ nn->dp.txrwb_dma + idx * sizeof(u64));
+ }
nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), ilog2(tx_ring->cnt));
nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), tx_ring->r_vec->irq_entry);
}
@@ -388,6 +410,7 @@ void nfp_net_vec_clear_ring_data(struct nfp_net *nn, unsigned int idx)
nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), 0);
nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), 0);
+ nn_writeq(nn, NFP_NET_CFG_TXR_WB_ADDR(idx), 0);
nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), 0);
nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), 0);
}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_dp.h b/drivers/net/ethernet/netronome/nfp/nfp_net_dp.h
index 81be8d17fa93..99579722aacf 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_dp.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_dp.h
@@ -60,6 +60,14 @@ static inline void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring)
tx_ring->wr_ptr_add = 0;
}
+static inline u32
+nfp_net_read_tx_cmpl(struct nfp_net_tx_ring *tx_ring, struct nfp_net_dp *dp)
+{
+ if (tx_ring->txrwb)
+ return *tx_ring->txrwb;
+ return nfp_qcp_rd_ptr_read(tx_ring->qcp_q);
+}
+
static inline void nfp_net_free_frag(void *frag, bool xdp)
{
if (!xdp)
--
2.30.2
Powered by blists - more mailing lists