[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20190427054913.GE6705@mtr-leonro.mtl.com>
Date: Sat, 27 Apr 2019 05:49:15 +0000
From: Leon Romanovsky <leonro@...lanox.com>
To: Saeed Mahameed <saeedm@...lanox.com>
CC: Jason Gunthorpe <jgg@...lanox.com>,
"netdev@...r.kernel.org" <netdev@...r.kernel.org>,
"linux-rdma@...r.kernel.org" <linux-rdma@...r.kernel.org>,
Maor Gottlieb <maorg@...lanox.com>,
Mark Bloch <markb@...lanox.com>
Subject: Re: [PATCH mlx5-next 09/11] net/mlx5: Eswitch, enable RoCE loopback
traffic
On Fri, Apr 26, 2019 at 09:58:23PM +0000, Saeed Mahameed wrote:
> From: Maor Gottlieb <maorg@...lanox.com>
>
> When in switchdev mode, we would like to treat loopback RoCE
> traffic (on eswitch manager) as RDMA and not as regular
> Ethernet traffic
> In order to enable it we add flow steering rule that forward RoCE
> loopback traffic to the HW RoCE filter (by adding allow rule).
> In addition we add RoCE address in GID index 0, which will be
> set in the RoCE loopback packet.
>
> Signed-off-by: Maor Gottlieb <maorg@...lanox.com>
> Reviewed-by: Mark Bloch <markb@...lanox.com>
> Signed-off-by: Saeed Mahameed <saeedm@...lanox.com>
> ---
> .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +-
> .../mellanox/mlx5/core/eswitch_offloads.c | 9 +
> .../net/ethernet/mellanox/mlx5/core/rdma.c | 181 ++++++++++++++++++
> .../net/ethernet/mellanox/mlx5/core/rdma.h | 20 ++
> include/linux/mlx5/driver.h | 7 +
> 5 files changed, 218 insertions(+), 1 deletion(-)
> create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/rdma.c
> create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/rdma.h
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> index 1a16f6d73cbc..5f0be9b36a04 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> @@ -35,7 +35,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tu
> #
> # Core extra
> #
> -mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o ecpf.o
> +mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o ecpf.o rdma.o
> mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
> mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o
> mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> index 6c8a17ca236e..4b48bb98981e 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> @@ -37,6 +37,7 @@
> #include <linux/mlx5/fs.h>
> #include "mlx5_core.h"
> #include "eswitch.h"
> +#include "rdma.h"
> #include "en.h"
> #include "fs_core.h"
> #include "lib/devcom.h"
> @@ -1713,6 +1714,13 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports,
> esw->host_info.num_vfs = vf_nvports;
> }
>
> + err = mlx5_rdma_enable_roce(esw->dev);
> + if (err) {
> + esw_debug(esw->dev, "Failed to enable RoCE, err: %d\n",
> + err);
You are already printing errors in all flows of mlx5_rdma_enable_roce(),
there is no need in extra debug print.
> + err = 0;
If you are not interested in return value, better to declare function as void.
> + }
> +
> return 0;
>
> err_reps:
> @@ -1751,6 +1759,7 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw)
> num_vfs = esw->dev->priv.sriov.num_vfs;
> }
>
> + mlx5_rdma_disable_roce(esw->dev);
> esw_offloads_devcom_cleanup(esw);
> esw_offloads_unload_all_reps(esw, num_vfs);
> esw_offloads_steering_cleanup(esw);
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
> new file mode 100644
> index 000000000000..f6c5e4f91aa8
> --- /dev/null
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
> @@ -0,0 +1,181 @@
> +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
> +/* Copyright (c) 2019 Mellanox Technologies */
> +
> +#include <linux/mlx5/vport.h>
> +#include <rdma/ib_verbs.h>
> +#include <net/addrconf.h>
> +
> +#include "lib/mlx5.h"
> +#include "eswitch.h"
> +#include "fs_core.h"
> +
> +void mlx5_rdma_disable_roce_steering(struct mlx5_core_dev *dev)
> +{
> + struct mlx5_core_roce *roce = &dev->priv.roce;
> +
> + if (IS_ERR_OR_NULL(roce->ft))
roce->ft shouldn't be error, or NULL or proper pointer.
> + return;
> +
> + mlx5_del_flow_rules(roce->allow_rule);
> + mlx5_destroy_flow_group(roce->fg);
> + mlx5_destroy_flow_table(roce->ft);
> +}
> +
> +int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev)
> +{
> + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
> + struct mlx5_core_roce *roce = &dev->priv.roce;
> + struct mlx5_flow_handle *flow_rule = NULL;
> + struct mlx5_flow_table_attr ft_attr = {};
> + struct mlx5_flow_namespace *ns = NULL;
> + struct mlx5_flow_act flow_act = {0};
{0} -> {}
> + struct mlx5_flow_spec *spec;
> + struct mlx5_flow_table *ft;
> + struct mlx5_flow_group *fg;
> + void *match_criteria;
> + u32 *flow_group_in;
> + void *misc;
> + int err;
> +
> + if (!(MLX5_CAP_FLOWTABLE_RDMA_RX(dev, ft_support) &&
> + MLX5_CAP_FLOWTABLE_RDMA_RX(dev, table_miss_action_domain)))
> + return -EOPNOTSUPP;
> +
> + flow_group_in = kvzalloc(inlen, GFP_KERNEL);
> + if (!flow_group_in)
> + return -ENOMEM;
> + spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
> + if (!spec) {
> + kvfree(flow_group_in);
> + return -ENOMEM;
> + }
> +
> + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_RDMA_RX);
> + if (!ns) {
> + mlx5_core_err(dev, "Failed to get RDMA RX namespace");
> + err = -EOPNOTSUPP;
> + goto free;
> + }
> +
> + ft_attr.max_fte = 1;
> + ft = mlx5_create_flow_table(ns, &ft_attr);
> + if (IS_ERR(ft)) {
> + mlx5_core_err(dev, "Failed to create RDMA RX flow table");
> + err = PTR_ERR(ft);
> + goto free;
> + }
> +
> + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
> + MLX5_MATCH_MISC_PARAMETERS);
> + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
> + match_criteria);
> + MLX5_SET_TO_ONES(fte_match_param, match_criteria,
> + misc_parameters.source_port);
> +
> + fg = mlx5_create_flow_group(ft, flow_group_in);
> + if (IS_ERR(fg)) {
> + err = PTR_ERR(fg);
> + mlx5_core_err(dev, "Failed to create RDMA RX flow group err(%d)\n", err);
> + goto destroy_flow_table;
> + }
> +
> + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
> + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
> + misc_parameters);
> + MLX5_SET(fte_match_set_misc, misc, source_port,
> + dev->priv.eswitch->manager_vport);
> + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
> + misc_parameters);
> + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
> +
> + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
> + flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, NULL, 0);
> + if (IS_ERR(flow_rule)) {
> + err = PTR_ERR(flow_rule);
> + mlx5_core_err(dev, "Failed to add RoCE allow rule, err=%d\n",
> + err);
> + goto destroy_flow_group;
> + }
> +
> + kvfree(spec);
> + kvfree(flow_group_in);
> + roce->ft = ft;
> + roce->fg = fg;
> + roce->allow_rule = flow_rule;
> +
> + return 0;
> +
> +destroy_flow_table:
> + mlx5_destroy_flow_table(ft);
> +destroy_flow_group:
> + mlx5_destroy_flow_group(fg);
> +free:
> + kvfree(spec);
> + kvfree(flow_group_in);
> + return err;
> +}
> +
> +static void mlx5_rdma_del_roce_addr(struct mlx5_core_dev *dev)
> +{
> + mlx5_core_roce_gid_set(dev, 0, 0, 0,
> + NULL, NULL, false, 0, 0);
> +}
> +
> +static void mlx5_rdma_make_default_gid(struct mlx5_core_dev *dev, union ib_gid *gid)
> +{
> + u8 hw_id[ETH_ALEN];
> +
> + mlx5_query_nic_vport_mac_address(dev, 0, hw_id);
> + gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
> + addrconf_addr_eui48(&gid->raw[8], hw_id);
> +}
> +
> +static int mlx5_rdma_add_roce_addr(struct mlx5_core_dev *dev)
> +{
> + union ib_gid gid;
> + u8 mac[ETH_ALEN];
> +
> + mlx5_rdma_make_default_gid(dev, &gid);
> + return mlx5_core_roce_gid_set(dev, 0,
> + MLX5_ROCE_VERSION_1,
> + 0, gid.raw, mac,
> + false, 0, 1);
> +}
> +
> +void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev)
> +{
> + mlx5_rdma_disable_roce_steering(dev);
> + mlx5_rdma_del_roce_addr(dev);
> + mlx5_nic_vport_disable_roce(dev);
> +}
> +
> +int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev)
> +{
> + int err;
> +
> + err = mlx5_nic_vport_enable_roce(dev);
> + if (err) {
> + mlx5_core_err(dev, "Failed to enable RoCE: %d\n", err);
> + return err;
> + }
> +
> + err = mlx5_rdma_add_roce_addr(dev);
> + if (err) {
> + mlx5_core_err(dev, "Failed to add RoCE address: %d\n", err);
> + goto disable_roce;
> + }
> +
> + err = mlx5_rdma_enable_roce_steering(dev);
> + if (err) {
> + mlx5_core_err(dev, "Failed to enable RoCE steering: %d\n", err);
> + goto del_roce_addr;
> + }
> +
> + return 0;
> +
> +del_roce_addr:
> + mlx5_rdma_del_roce_addr(dev);
> +disable_roce:
> + mlx5_nic_vport_disable_roce(dev);
> + return err;
> +}
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.h b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h
> new file mode 100644
> index 000000000000..3d9e76c3d42f
> --- /dev/null
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h
> @@ -0,0 +1,20 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
> +/* Copyright (c) 2019 Mellanox Technologies. */
> +
> +#ifndef __MLX5_RDMA_H__
> +#define __MLX5_RDMA_H__
> +
> +#include "mlx5_core.h"
> +
> +#ifdef CONFIG_MLX5_ESWITCH
> +
> +int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev);
> +void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev);
> +
> +#else /* CONFIG_MLX5_ESWITCH */
> +
> +static inline int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) { return 0; }
> +static inline void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) {}
> +
> +#endif /* CONFIG_MLX5_ESWITCH */
> +#endif /* __MLX5_RDMA_H__ */
> diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
> index 582a9680b182..7fa95270dd59 100644
> --- a/include/linux/mlx5/driver.h
> +++ b/include/linux/mlx5/driver.h
> @@ -512,6 +512,12 @@ struct mlx5_rl_table {
> struct mlx5_rl_entry *rl_entry;
> };
>
> +struct mlx5_core_roce {
> + struct mlx5_flow_table *ft;
> + struct mlx5_flow_group *fg;
> + struct mlx5_flow_handle *allow_rule;
> +};
> +
> struct mlx5_priv {
> struct mlx5_eq_table *eq_table;
>
> @@ -565,6 +571,7 @@ struct mlx5_priv {
> struct mlx5_lag *lag;
> struct mlx5_devcom *devcom;
> unsigned long pci_dev_data;
> + struct mlx5_core_roce roce;
> struct mlx5_fc_stats fc_stats;
> struct mlx5_rl_table rl_table;
>
> --
> 2.20.1
>
Powered by blists - more mailing lists