[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200618160941.879717-11-jonathan.lemon@gmail.com>
Date: Thu, 18 Jun 2020 09:09:30 -0700
From: Jonathan Lemon <jonathan.lemon@...il.com>
To: <netdev@...r.kernel.org>
CC: <kernel-team@...com>, <axboe@...nel.dk>
Subject: [RFC PATCH 10/21] mlx5: add netgpu queue functions
Add the netgpu setup/teardown functions, which are not hooked up yet.
The driver also handles netgpu module loading and unloading.
Signed-off-by: Jonathan Lemon <jonathan.lemon@...il.com>
---
.../net/ethernet/mellanox/mlx5/core/Makefile | 3 +-
.../mellanox/mlx5/core/en/netgpu/setup.c | 475 ++++++++++++++++++
.../mellanox/mlx5/core/en/netgpu/setup.h | 42 ++
3 files changed, 519 insertions(+), 1 deletion(-)
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/netgpu/setup.c
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/netgpu/setup.h
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index b61e47bc16e8..27983bd074e9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -25,7 +25,8 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
en_selftest.o en/port.o en/monitor_stats.o en/health.o \
en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/umem.o \
- en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o
+ en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o \
+ en/netgpu/setup.o
#
# Netdev extra
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/netgpu/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/netgpu/setup.c
new file mode 100644
index 000000000000..f0578c41951d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/netgpu/setup.c
@@ -0,0 +1,475 @@
+#include <linux/prefetch.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/indirect_call_wrapper.h>
+#include <net/ip6_checksum.h>
+#include <net/page_pool.h>
+#include <net/inet_ecn.h>
+#include "en.h"
+#include "en_tc.h"
+#include "lib/clock.h"
+#include "en/xdp.h"
+#include "en/params.h"
+#include "en/netgpu/setup.h"
+
+#include <net/netgpu.h>
+#include <uapi/misc/shqueue.h>
+
+int (*fn_netgpu_get_page)(struct netgpu_ctx *ctx,
+ struct page **page, dma_addr_t *dma);
+void (*fn_netgpu_put_page)(struct netgpu_ctx *, struct page *, bool);
+int (*fn_netgpu_get_pages)(struct sock *, struct page **,
+ unsigned long, int);
+struct netgpu_ctx *g_ctx;
+
+static void
+netgpu_fn_unload(void)
+{
+ if (fn_netgpu_get_page)
+ symbol_put(netgpu_get_page);
+ if (fn_netgpu_put_page)
+ symbol_put(netgpu_put_page);
+ if (fn_netgpu_get_pages)
+ symbol_put(netgpu_get_pages);
+
+ fn_netgpu_get_page = NULL;
+ fn_netgpu_put_page = NULL;
+ fn_netgpu_get_pages = NULL;
+}
+
+static int
+netgpu_fn_load(void)
+{
+ fn_netgpu_get_page = symbol_get(netgpu_get_page);
+ fn_netgpu_put_page = symbol_get(netgpu_put_page);
+ fn_netgpu_get_pages = symbol_get(netgpu_get_pages);
+
+ if (fn_netgpu_get_page &&
+ fn_netgpu_put_page &&
+ fn_netgpu_get_pages)
+ return 0;
+
+ netgpu_fn_unload();
+
+ return -EFAULT;
+}
+
+void
+mlx5e_netgpu_put_page(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
+ bool recycle)
+{
+ struct netgpu_ctx *ctx = rq->netgpu;
+ struct page *page = dma_info->page;
+
+ if (page) {
+ put_page(page);
+ __netgpu_put_page(ctx, page, recycle);
+ }
+}
+
+bool
+mlx5e_netgpu_avail(struct mlx5e_rq *rq, u8 count)
+{
+ struct netgpu_ctx *ctx = rq->netgpu;
+
+ /* XXX
+ * napi_cache_count is not a total count, and this also
+ * doesn't consider any_cache_count.
+ */
+ return ctx->napi_cache_count >= count ||
+ sq_cons_ready(&ctx->fill) >= (count - ctx->napi_cache_count);
+}
+
+void mlx5e_netgpu_taken(struct mlx5e_rq *rq)
+{
+ struct netgpu_ctx *ctx = rq->netgpu;
+
+ sq_cons_complete(&ctx->fill);
+}
+
+int
+mlx5e_netgpu_get_page(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info)
+{
+ struct netgpu_ctx *ctx = rq->netgpu;
+
+ return __netgpu_get_page(ctx, &dma_info->page, &dma_info->addr);
+}
+
+struct netgpu_ctx *
+mlx5e_netgpu_get_ctx(struct mlx5e_params *params, struct mlx5e_xsk *xsk,
+ u16 ix)
+{
+ if (!xsk || !xsk->ctx_tbl)
+ return NULL;
+
+ if (unlikely(ix >= params->num_channels))
+ return NULL;
+
+ if (unlikely(!xsk->is_netgpu))
+ return NULL;
+
+ return xsk->ctx_tbl[ix];
+}
+
+static int mlx5e_netgpu_get_tbl(struct mlx5e_xsk *xsk)
+{
+ if (!xsk->ctx_tbl) {
+ xsk->ctx_tbl = kcalloc(MLX5E_MAX_NUM_CHANNELS,
+ sizeof(*xsk->ctx_tbl), GFP_KERNEL);
+ if (unlikely(!xsk->ctx_tbl))
+ return -ENOMEM;
+ xsk->is_netgpu = true;
+ }
+ if (!xsk->is_netgpu)
+ return -EINVAL;
+
+ xsk->refcnt++;
+ xsk->ever_used = true;
+
+ return 0;
+}
+
+static void mlx5e_netgpu_put_tbl(struct mlx5e_xsk *xsk)
+{
+ if (!--xsk->refcnt) {
+ kfree(xsk->ctx_tbl);
+ xsk->ctx_tbl = NULL;
+ }
+}
+
+static void mlx5e_netgpu_remove_ctx(struct mlx5e_xsk *xsk, u16 ix)
+{
+ xsk->ctx_tbl[ix] = NULL;
+
+ mlx5e_netgpu_put_tbl(xsk);
+}
+
+static int mlx5e_netgpu_add_ctx(struct mlx5e_xsk *xsk, struct netgpu_ctx *ctx,
+ u16 ix)
+{
+ int err;
+
+ err = mlx5e_netgpu_get_tbl(xsk);
+ if (unlikely(err))
+ return err;
+
+ xsk->ctx_tbl[ix] = ctx;
+
+ return 0;
+}
+
+static int mlx5e_netgpu_enable_locked(struct mlx5e_priv *priv,
+ struct netgpu_ctx *ctx, u16 ix)
+{
+ struct mlx5e_params *params = &priv->channels.params;
+ struct mlx5e_channel *c;
+ int err;
+
+ if (unlikely(mlx5e_netgpu_get_ctx(&priv->channels.params,
+ &priv->xsk, ix)))
+ return -EBUSY;
+
+ err = mlx5e_netgpu_add_ctx(&priv->xsk, ctx, ix);
+ if (unlikely(err))
+ return err;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ /* XSK objects will be created on open. */
+ goto validate_closed;
+ }
+
+ if (!params->hd_split) {
+ /* XSK objects will be created when header split is set,
+ * and the channels are reopened.
+ */
+ goto validate_closed;
+ }
+
+ c = priv->channels.c[ix];
+
+ err = mlx5e_open_netgpu(priv, params, ctx, c);
+ if (unlikely(err))
+ goto err_remove_ctx;
+
+ mlx5e_activate_netgpu(c);
+
+ /* Don't wait for WQEs, because the newer xdpsock sample doesn't provide
+ * any Fill Ring entries at the setup stage.
+ */
+
+ err = mlx5e_netgpu_redirect_rqt_to_channel(priv, priv->channels.c[ix]);
+ if (unlikely(err))
+ goto err_deactivate;
+
+ return 0;
+
+err_deactivate:
+ mlx5e_deactivate_netgpu(c);
+ mlx5e_close_netgpu(c);
+
+err_remove_ctx:
+ mlx5e_netgpu_remove_ctx(&priv->xsk, ix);
+
+ return err;
+
+validate_closed:
+ return 0;
+}
+
+static int mlx5e_netgpu_disable_locked(struct mlx5e_priv *priv, u16 ix)
+{
+ struct mlx5e_channel *c;
+ struct netgpu_ctx *ctx;
+
+ ctx = mlx5e_netgpu_get_ctx(&priv->channels.params, &priv->xsk, ix);
+
+ if (unlikely(!ctx))
+ return -EINVAL;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto remove_ctx;
+
+ /* NETGPU RQ is only created if header split is set. */
+ if (!priv->channels.params.hd_split)
+ goto remove_ctx;
+
+ c = priv->channels.c[ix];
+ mlx5e_netgpu_redirect_rqt_to_drop(priv, ix);
+ mlx5e_deactivate_netgpu(c);
+ mlx5e_close_netgpu(c);
+
+remove_ctx:
+ mlx5e_netgpu_remove_ctx(&priv->xsk, ix);
+
+ return 0;
+}
+
+static int mlx5e_netgpu_enable_ctx(struct mlx5e_priv *priv,
+ struct netgpu_ctx *ctx, u16 ix)
+{
+ int err;
+
+ mutex_lock(&priv->state_lock);
+ err = netgpu_fn_load();
+ if (!err)
+ err = mlx5e_netgpu_enable_locked(priv, ctx, ix);
+ g_ctx = ctx;
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+static int mlx5e_netgpu_disable_ctx(struct mlx5e_priv *priv, u16 ix)
+{
+ int err;
+
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_netgpu_disable_locked(priv, ix);
+ netgpu_fn_unload();
+ g_ctx = NULL;
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+int
+mlx5e_netgpu_setup_ctx(struct net_device *dev, struct netgpu_ctx *ctx, u16 qid)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_params *params = &priv->channels.params;
+ u16 ix;
+
+ if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid,
+ MLX5E_RQ_GROUP_XSK, &ix)))
+ return -EINVAL;
+
+ return ctx ? mlx5e_netgpu_enable_ctx(priv, ctx, ix) :
+ mlx5e_netgpu_disable_ctx(priv, ix);
+}
+
+static void mlx5e_build_netgpuicosq_param(struct mlx5e_priv *priv,
+ u8 log_wq_size,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ mlx5e_build_sq_param_common(priv, param);
+
+ MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
+}
+
+static void mlx5e_build_netgpu_cparam(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_channel_param *cparam)
+{
+ const u8 icosq_size = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+ struct mlx5e_xsk_param *xsk = (void *)0x1;
+
+ mlx5e_build_rq_param(priv, params, xsk, &cparam->rq);
+ mlx5e_build_rx_cq_param(priv, params, NULL, &cparam->rx_cq);
+
+ mlx5e_build_netgpuicosq_param(priv, icosq_size, &cparam->icosq);
+ mlx5e_build_ico_cq_param(priv, icosq_size, &cparam->icosq_cq);
+}
+
+int mlx5e_open_netgpu(struct mlx5e_priv *priv, struct mlx5e_params *params,
+ struct netgpu_ctx *ctx, struct mlx5e_channel *c)
+{
+ struct mlx5e_channel_param *cparam;
+ struct dim_cq_moder icocq_moder = {};
+ struct xdp_umem *umem = (void *)0x1;
+ int err;
+
+ cparam = kvzalloc(sizeof(*cparam), GFP_KERNEL);
+ if (!cparam)
+ return -ENOMEM;
+
+ mlx5e_build_netgpu_cparam(priv, params, cparam);
+
+ err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam->rx_cq,
+ &c->xskrq.cq);
+ if (unlikely(err))
+ goto err_free_cparam;
+
+ err = mlx5e_open_rq(c, params, &cparam->rq, NULL, umem, &c->xskrq);
+ if (unlikely(err))
+ goto err_close_rx_cq;
+ c->xskrq.netgpu = ctx;
+
+ err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->xskicosq.cq);
+ if (unlikely(err))
+ goto err_close_rq;
+
+ /* Create a dedicated SQ for posting NOPs whenever we need an IRQ to be
+ * triggered and NAPI to be called on the correct CPU.
+ */
+ err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->xskicosq);
+ if (unlikely(err))
+ goto err_close_icocq;
+
+ kvfree(cparam);
+
+ spin_lock_init(&c->xskicosq_lock);
+
+ set_bit(MLX5E_CHANNEL_STATE_NETGPU, c->state);
+
+ return 0;
+
+err_close_icocq:
+ mlx5e_close_cq(&c->xskicosq.cq);
+
+err_close_rq:
+ mlx5e_close_rq(&c->xskrq);
+
+err_close_rx_cq:
+ mlx5e_close_cq(&c->xskrq.cq);
+
+err_free_cparam:
+ kvfree(cparam);
+
+ return err;
+}
+
+void mlx5e_close_netgpu(struct mlx5e_channel *c)
+{
+ clear_bit(MLX5E_CHANNEL_STATE_NETGPU, c->state);
+ napi_synchronize(&c->napi);
+ synchronize_rcu(); /* Sync with the XSK wakeup. */
+
+ mlx5e_close_rq(&c->xskrq);
+ mlx5e_close_cq(&c->xskrq.cq);
+ mlx5e_close_icosq(&c->xskicosq);
+ mlx5e_close_cq(&c->xskicosq.cq);
+
+ /* zero these out - so the next open has a clean slate. */
+ memset(&c->xskrq, 0, sizeof(c->xskrq));
+ memset(&c->xsksq, 0, sizeof(c->xsksq));
+ memset(&c->xskicosq, 0, sizeof(c->xskicosq));
+}
+
+void mlx5e_activate_netgpu(struct mlx5e_channel *c)
+{
+ mlx5e_activate_icosq(&c->xskicosq);
+ set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
+ /* TX queue is created active. */
+
+ spin_lock(&c->xskicosq_lock);
+ mlx5e_trigger_irq(&c->xskicosq);
+ spin_unlock(&c->xskicosq_lock);
+}
+
+void mlx5e_deactivate_netgpu(struct mlx5e_channel *c)
+{
+ mlx5e_deactivate_rq(&c->xskrq);
+ /* TX queue is disabled on close. */
+ mlx5e_deactivate_icosq(&c->xskicosq);
+}
+
+static int mlx5e_redirect_netgpu_rqt(struct mlx5e_priv *priv, u16 ix, u32 rqn)
+{
+ struct mlx5e_redirect_rqt_param direct_rrp = {
+ .is_rss = false,
+ {
+ .rqn = rqn,
+ },
+ };
+
+ u32 rqtn = priv->xsk_tir[ix].rqt.rqtn;
+
+ return mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp);
+}
+
+int mlx5e_netgpu_redirect_rqt_to_channel(struct mlx5e_priv *priv,
+ struct mlx5e_channel *c)
+{
+ return mlx5e_redirect_netgpu_rqt(priv, c->ix, c->xskrq.rqn);
+}
+
+int mlx5e_netgpu_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix)
+{
+ return mlx5e_redirect_netgpu_rqt(priv, ix, priv->drop_rq.rqn);
+}
+
+int mlx5e_netgpu_redirect_rqts_to_channels(struct mlx5e_priv *priv,
+ struct mlx5e_channels *chs)
+{
+ int err, i;
+
+ for (i = 0; i < chs->num; i++) {
+ struct mlx5e_channel *c = chs->c[i];
+
+ if (!test_bit(MLX5E_CHANNEL_STATE_NETGPU, c->state))
+ continue;
+
+ err = mlx5e_netgpu_redirect_rqt_to_channel(priv, c);
+ if (unlikely(err))
+ goto err_stop;
+ }
+
+ return 0;
+
+err_stop:
+ for (i--; i >= 0; i--) {
+ if (!test_bit(MLX5E_CHANNEL_STATE_NETGPU, chs->c[i]->state))
+ continue;
+
+ mlx5e_netgpu_redirect_rqt_to_drop(priv, i);
+ }
+
+ return err;
+}
+
+void mlx5e_netgpu_redirect_rqts_to_drop(struct mlx5e_priv *priv,
+ struct mlx5e_channels *chs)
+{
+ int i;
+
+ for (i = 0; i < chs->num; i++) {
+ if (!test_bit(MLX5E_CHANNEL_STATE_NETGPU, chs->c[i]->state))
+ continue;
+
+ mlx5e_netgpu_redirect_rqt_to_drop(priv, i);
+ }
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/netgpu/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/netgpu/setup.h
new file mode 100644
index 000000000000..37fde92ef89d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/netgpu/setup.h
@@ -0,0 +1,42 @@
+#pragma once
+
+struct netgpu_ctx *
+mlx5e_netgpu_get_ctx(struct mlx5e_params *params, struct mlx5e_xsk *xsk,
+ u16 ix);
+
+int
+mlx5e_open_netgpu(struct mlx5e_priv *priv, struct mlx5e_params *params,
+ struct netgpu_ctx *ctx, struct mlx5e_channel *c);
+
+bool mlx5e_netgpu_avail(struct mlx5e_rq *rq, u8 count);
+void mlx5e_netgpu_taken(struct mlx5e_rq *rq);
+
+int
+mlx5e_netgpu_setup_ctx(struct net_device *dev, struct netgpu_ctx *ctx, u16 qid);
+
+int
+mlx5e_netgpu_get_page(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info);
+
+void
+mlx5e_netgpu_put_page(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
+ bool recycle);
+
+int mlx5e_open_netgpu(struct mlx5e_priv *priv, struct mlx5e_params *params,
+ struct netgpu_ctx *ctx, struct mlx5e_channel *c);
+
+void mlx5e_close_netgpu(struct mlx5e_channel *c);
+
+void mlx5e_activate_netgpu(struct mlx5e_channel *c);
+
+void mlx5e_deactivate_netgpu(struct mlx5e_channel *c);
+
+int mlx5e_netgpu_redirect_rqt_to_channel(struct mlx5e_priv *priv,
+ struct mlx5e_channel *c);
+
+int mlx5e_netgpu_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix);
+
+int mlx5e_netgpu_redirect_rqts_to_channels(struct mlx5e_priv *priv,
+ struct mlx5e_channels *chs);
+
+void mlx5e_netgpu_redirect_rqts_to_drop(struct mlx5e_priv *priv,
+ struct mlx5e_channels *chs);
--
2.24.1
Powered by blists - more mailing lists