[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20200430171835.20812-16-saeedm@mellanox.com>
Date: Thu, 30 Apr 2020 10:18:35 -0700
From: Saeed Mahameed <saeedm@...lanox.com>
To: "David S. Miller" <davem@...emloft.net>, kuba@...nel.org
Cc: netdev@...r.kernel.org, Maxim Mikityanskiy <maximmi@...lanox.com>,
Tariq Toukan <tariqt@...lanox.com>,
Saeed Mahameed <saeedm@...lanox.com>
Subject: [net-next 15/15] net/mlx5e: Unify reserving space for WQEs
From: Maxim Mikityanskiy <maximmi@...lanox.com>
In our fast-path design, a WQE (Work Queue Element) must not cross the
page boundary. To enforce that, for WQEs consisting of more than one BB
(Basic Block), the driver checks the available contiguous space in the
WQ in advance, and if it's not enough, it pads it with NOPs.
This patch modifies the code that calculates the position of next WQE,
considering the padding, and prepares the WQE. This code is common for
all SQ types. In this patch it's reorganized in a way that makes the
usage pattern unified for all SQ types, and makes the implementations
self-contained and look almost the same, preparing the repeating code to
further attempts to deduplicate it.
One place is left as is: mlx5e_sq_xmit and mlx5e_fill_sq_frag_edge call
inside, because it is special in a way that it may also copy WQE's cseg
and eseg when reserving space. This will be eliminated in one of the
following patches, and this place will be converted to the new approach,
too.
Signed-off-by: Maxim Mikityanskiy <maximmi@...lanox.com>
Reviewed-by: Tariq Toukan <tariqt@...lanox.com>
Signed-off-by: Saeed Mahameed <saeedm@...lanox.com>
---
.../net/ethernet/mellanox/mlx5/core/en/txrx.h | 56 +++++++++++++++++++
.../net/ethernet/mellanox/mlx5/core/en/xdp.c | 31 ++++++++--
.../net/ethernet/mellanox/mlx5/core/en/xdp.h | 17 ------
.../mellanox/mlx5/core/en_accel/ktls_tx.c | 16 +-----
.../net/ethernet/mellanox/mlx5/core/en_rx.c | 26 +--------
.../net/ethernet/mellanox/mlx5/core/en_tx.c | 11 +---
6 files changed, 88 insertions(+), 69 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index 8682d9148ab9..89fe65593c16 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -81,6 +81,62 @@ mlx5e_post_nop_fence(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
return wqe;
}
+static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ u16 pi, contig_wqebbs;
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+ if (unlikely(contig_wqebbs < size)) {
+ struct mlx5e_tx_wqe_info *wi, *edge_wi;
+
+ wi = &sq->db.wqe_info[pi];
+ edge_wi = wi + contig_wqebbs;
+
+ /* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */
+ for (; wi < edge_wi; wi++) {
+ *wi = (struct mlx5e_tx_wqe_info) {
+ .num_wqebbs = 1,
+ };
+ mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ }
+ sq->stats->nop += contig_wqebbs;
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ }
+
+ return pi;
+}
+
+static inline u16 mlx5e_icosq_get_next_pi(struct mlx5e_icosq *sq, u16 size)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ u16 pi, contig_wqebbs;
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+ if (unlikely(contig_wqebbs < size)) {
+ struct mlx5e_icosq_wqe_info *wi, *edge_wi;
+
+ wi = &sq->db.wqe_info[pi];
+ edge_wi = wi + contig_wqebbs;
+
+ /* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */
+ for (; wi < edge_wi; wi++) {
+ *wi = (struct mlx5e_icosq_wqe_info) {
+ .opcode = MLX5_OPCODE_NOP,
+ .num_wqebbs = 1,
+ };
+ mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ }
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ }
+
+ return pi;
+}
+
static inline void
mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, struct mlx5_wq_cyc *wq,
u16 pi, u16 nnops)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index cf089520c031..c4a7fb4ecd14 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -178,21 +178,42 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
}
}
-static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
+static u16 mlx5e_xdpsq_get_next_pi(struct mlx5e_xdpsq *sq, u16 size)
{
- struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
- struct mlx5e_xdpsq_stats *stats = sq->stats;
struct mlx5_wq_cyc *wq = &sq->wq;
u16 pi, contig_wqebbs;
pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+ if (unlikely(contig_wqebbs < size)) {
+ struct mlx5e_xdp_wqe_info *wi, *edge_wi;
+
+ wi = &sq->db.wqe_info[pi];
+ edge_wi = wi + contig_wqebbs;
+
+ /* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */
+ for (; wi < edge_wi; wi++) {
+ *wi = (struct mlx5e_xdp_wqe_info) {
+ .num_wqebbs = 1,
+ .num_pkts = 0,
+ };
+ mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ }
+ sq->stats->nops += contig_wqebbs;
- if (unlikely(contig_wqebbs < MLX5_SEND_WQE_MAX_WQEBBS)) {
- mlx5e_fill_xdpsq_frag_edge(sq, wq, pi, contig_wqebbs);
pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
}
+ return pi;
+}
+
+static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
+{
+ struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+ struct mlx5e_xdpsq_stats *stats = sq->stats;
+ u16 pi;
+
+ pi = mlx5e_xdpsq_get_next_pi(sq, MLX5_SEND_WQE_MAX_WQEBBS);
session->wqe = MLX5E_TX_FETCH_WQE(sq, pi);
prefetchw(session->wqe->data);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index 4fd0ff47bdc3..ed6f045febeb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -137,23 +137,6 @@ mlx5e_xdp_no_room_for_inline_pkt(struct mlx5e_xdp_mpwqe *session)
session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT > MLX5E_XDP_MPW_MAX_NUM_DS;
}
-static inline void
-mlx5e_fill_xdpsq_frag_edge(struct mlx5e_xdpsq *sq, struct mlx5_wq_cyc *wq,
- u16 pi, u16 nnops)
-{
- struct mlx5e_xdp_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
-
- edge_wi = wi + nnops;
- /* fill sq frag edge with nops to avoid wqe wrapping two pages */
- for (; wi < edge_wi; wi++) {
- wi->num_wqebbs = 1;
- wi->num_pkts = 0;
- mlx5e_post_nop(wq, sq->sqn, &sq->pc);
- }
-
- sq->stats->nops += nnops;
-}
-
static inline void
mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq,
struct mlx5e_xdp_xmit_data *xdptxd,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index 717d36b45aa9..ba973937f0b5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -165,14 +165,8 @@ mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq,
bool skip_static_post, bool fence_first_post)
{
bool progress_fence = skip_static_post || !fence_first_post;
- struct mlx5_wq_cyc *wq = &sq->wq;
- u16 contig_wqebbs_room, pi;
- pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
- contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
- if (unlikely(contig_wqebbs_room <
- MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS))
- mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
+ mlx5e_txqsq_get_next_pi(sq, MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS);
if (!skip_static_post)
post_static_params(sq, priv_tx, fence_first_post);
@@ -346,10 +340,8 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx,
u32 seq)
{
struct mlx5e_sq_stats *stats = sq->stats;
- struct mlx5_wq_cyc *wq = &sq->wq;
enum mlx5e_ktls_sync_retval ret;
struct tx_sync_info info = {};
- u16 contig_wqebbs_room, pi;
u8 num_wqebbs;
int i = 0;
@@ -380,11 +372,7 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx,
}
num_wqebbs = mlx5e_ktls_dumps_num_wqebbs(sq, info.nr_frags, info.sync_len);
- pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
- contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
-
- if (unlikely(contig_wqebbs_room < num_wqebbs))
- mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
+ mlx5e_txqsq_get_next_pi(sq, num_wqebbs);
for (; i < info.nr_frags; i++) {
unsigned int orig_fsz, frag_offset = 0, n = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 9f33a0e7dd9a..d9a5a669b84d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -468,22 +468,6 @@ static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq, u8 n)
mlx5_wq_ll_update_db_record(wq);
}
-static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq,
- struct mlx5_wq_cyc *wq,
- u16 pi, u16 nnops)
-{
- struct mlx5e_icosq_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
-
- edge_wi = wi + nnops;
-
- /* fill sq frag edge with nops to avoid wqe wrapping two pages */
- for (; wi < edge_wi; wi++) {
- wi->opcode = MLX5_OPCODE_NOP;
- wi->num_wqebbs = 1;
- mlx5e_post_nop(wq, sq->sqn, &sq->pc);
- }
-}
-
static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
{
struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
@@ -492,7 +476,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
struct mlx5_wq_cyc *wq = &sq->wq;
struct mlx5e_umr_wqe *umr_wqe;
u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1);
- u16 pi, contig_wqebbs_room;
+ u16 pi;
int err;
int i;
@@ -502,13 +486,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
goto err;
}
- pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
- contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
- if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) {
- mlx5e_fill_icosq_frag_edge(sq, wq, pi, contig_wqebbs_room);
- pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
- }
-
+ pi = mlx5e_icosq_get_next_pi(sq, MLX5E_UMR_WQEBBS);
umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
memcpy(umr_wqe, &rq->mpwqe.umr_wqe, offsetof(struct mlx5e_umr_wqe, inline_mtts));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index ec1429596cb7..583e1b201b75 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -572,7 +572,6 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5_av *av, u32 dqpn, u32 dqkey,
bool xmit_more)
{
- struct mlx5_wq_cyc *wq = &sq->wq;
struct mlx5i_tx_wqe *wqe;
struct mlx5_wqe_datagram_seg *datagram;
@@ -582,9 +581,9 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5e_tx_wqe_info *wi;
struct mlx5e_sq_stats *stats = sq->stats;
- u16 headlen, ihs, pi, contig_wqebbs_room;
u16 ds_cnt, ds_cnt_inl = 0;
u8 num_wqebbs, opcode;
+ u16 headlen, ihs, pi;
u32 num_bytes;
int num_dma;
__be16 mss;
@@ -620,13 +619,7 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
}
num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
- pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
- contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
- if (unlikely(contig_wqebbs_room < num_wqebbs)) {
- mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
- pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
- }
-
+ pi = mlx5e_txqsq_get_next_pi(sq, num_wqebbs);
wqe = MLX5I_SQ_FETCH_WQE(sq, pi);
/* fill wqe */
--
2.25.4
Powered by blists - more mailing lists