[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <AM0PR05MB48664868E0B89E582807830BD1EA0@AM0PR05MB4866.eurprd05.prod.outlook.com>
Date: Tue, 18 Jun 2019 10:31:28 +0000
From: Parav Pandit <parav@...lanox.com>
To: Saeed Mahameed <saeedm@...lanox.com>,
Saeed Mahameed <saeedm@...lanox.com>,
Leon Romanovsky <leonro@...lanox.com>
CC: "netdev@...r.kernel.org" <netdev@...r.kernel.org>,
"linux-rdma@...r.kernel.org" <linux-rdma@...r.kernel.org>,
Jianbo Liu <jianbol@...lanox.com>,
Eli Britstein <elibr@...lanox.com>,
Roi Dayan <roid@...lanox.com>, Mark Bloch <markb@...lanox.com>
Subject: RE: [PATCH mlx5-next 05/15] net/mlx5: E-Switch, Tag packet with vport
number in VF vports and uplink ingress ACLs
> -----Original Message-----
> From: netdev-owner@...r.kernel.org <netdev-owner@...r.kernel.org> On
> Behalf Of Saeed Mahameed
> Sent: Tuesday, June 18, 2019 12:53 AM
> To: Saeed Mahameed <saeedm@...lanox.com>; Leon Romanovsky
> <leonro@...lanox.com>
> Cc: netdev@...r.kernel.org; linux-rdma@...r.kernel.org; Jianbo Liu
> <jianbol@...lanox.com>; Eli Britstein <elibr@...lanox.com>; Roi Dayan
> <roid@...lanox.com>; Mark Bloch <markb@...lanox.com>
> Subject: [PATCH mlx5-next 05/15] net/mlx5: E-Switch, Tag packet with vport
> number in VF vports and uplink ingress ACLs
>
> From: Jianbo Liu <jianbol@...lanox.com>
>
> When a dual-port VHCA sends a RoCE packet on its non-native port, and the
> packet arrives to its affiliated vport FDB, a mismatch might occur on the rules
> that match the packet source vport as it is not represented by single VHCA only
> in this case. So we change to match on metadata instead of source vport.
> To do that, a rule is created in all vports and uplink ingress ACLs, to save the
> source vport number and vhca id in the packet's metadata in order to match on
> it later.
> The metadata register used is the first of the 32-bit type C registers. It can be
> used for matching and header modify operations. The higher 16 bits of this
> register are for vhca id, and the lower 16 ones is for vport number.
> This change is not for dual-port RoCE only. If HW and FW allow, the vport
> metadata matching is enabled by default.
>
> Signed-off-by: Jianbo Liu <jianbol@...lanox.com>
> Reviewed-by: Eli Britstein <elibr@...lanox.com>
> Reviewed-by: Roi Dayan <roid@...lanox.com>
> Reviewed-by: Mark Bloch <markb@...lanox.com>
> Signed-off-by: Saeed Mahameed <saeedm@...lanox.com>
> ---
> .../net/ethernet/mellanox/mlx5/core/eswitch.c | 2 +
> .../net/ethernet/mellanox/mlx5/core/eswitch.h | 9 +
> .../mellanox/mlx5/core/eswitch_offloads.c | 183 ++++++++++++++----
> include/linux/mlx5/eswitch.h | 3 +
> 4 files changed, 161 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
> b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
> index a42a23e505df..1235fd84ae3a 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
> @@ -1168,6 +1168,8 @@ void esw_vport_cleanup_ingress_rules(struct
> mlx5_eswitch *esw,
>
> vport->ingress.drop_rule = NULL;
> vport->ingress.allow_rule = NULL;
> +
> + esw_vport_del_ingress_acl_modify_metadata(esw, vport);
> }
>
> void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, diff --git
> a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
> b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
> index 8b9f2cf58e91..4417a195832e 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
> @@ -68,6 +68,8 @@ struct vport_ingress {
> struct mlx5_flow_group *allow_spoofchk_only_grp;
> struct mlx5_flow_group *allow_untagged_only_grp;
> struct mlx5_flow_group *drop_grp;
> + int modify_metadata_id;
No need for random alignment. Just have one white space after int.
> + struct mlx5_flow_handle *modify_metadata_rule;
> struct mlx5_flow_handle *allow_rule;
> struct mlx5_flow_handle *drop_rule;
> struct mlx5_fc *drop_counter;
> @@ -196,6 +198,10 @@ struct mlx5_esw_functions {
> u16 num_vfs;
> };
>
> +enum {
> + MLX5_ESWITCH_VPORT_MATCH_METADATA = BIT(0), };
> +
> struct mlx5_eswitch {
> struct mlx5_core_dev *dev;
> struct mlx5_nb nb;
> @@ -203,6 +209,7 @@ struct mlx5_eswitch {
> struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE];
> struct workqueue_struct *work_queue;
> struct mlx5_vport *vports;
> + u32 flags;
Same as above, no need for extra aligment.
> int total_vports;
> int enabled_vports;
> /* Synchronize between vport change events @@ -240,6 +247,8 @@
> void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw,
> struct mlx5_vport *vport);
> void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
> struct mlx5_vport *vport);
> +void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
> + struct mlx5_vport *vport);
>
> /* E-Switch API */
> int mlx5_eswitch_init(struct mlx5_core_dev *dev); diff --git
> a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> index 17abb98b48af..871ae44dc132 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> @@ -1555,32 +1555,16 @@ static void esw_offloads_devcom_cleanup(struct
> mlx5_eswitch *esw) static int esw_vport_ingress_prio_tag_config(struct
> mlx5_eswitch *esw,
> struct mlx5_vport *vport)
> {
> - struct mlx5_core_dev *dev = esw->dev;
> struct mlx5_flow_act flow_act = {0};
> struct mlx5_flow_spec *spec;
> int err = 0;
>
> /* For prio tag mode, there is only 1 FTEs:
> - * 1) Untagged packets - push prio tag VLAN, allow
> + * 1) Untagged packets - push prio tag VLAN and modify metadata if
> + * required, allow
> * Unmatched traffic is allowed by default
> */
>
> - if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support))
> - return -EOPNOTSUPP;
> -
> - esw_vport_cleanup_ingress_rules(esw, vport);
> -
> - err = esw_vport_enable_ingress_acl(esw, vport);
> - if (err) {
> - mlx5_core_warn(esw->dev,
> - "failed to enable prio tag ingress acl (%d) on
> vport[%d]\n",
> - err, vport->vport);
> - return err;
> - }
> -
> - esw_debug(esw->dev,
> - "vport[%d] configure ingress rules\n", vport->vport);
> -
> spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
> if (!spec) {
> err = -ENOMEM;
> @@ -1596,6 +1580,12 @@ static int esw_vport_ingress_prio_tag_config(struct
> mlx5_eswitch *esw,
> flow_act.vlan[0].ethtype = ETH_P_8021Q;
> flow_act.vlan[0].vid = 0;
> flow_act.vlan[0].prio = 0;
> +
> + if (vport->ingress.modify_metadata_rule) {
> + flow_act.action |=
> MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
> + flow_act.modify_id = vport->ingress.modify_metadata_id;
> + }
> +
> vport->ingress.allow_rule =
> mlx5_add_flow_rules(vport->ingress.acl, spec,
> &flow_act, NULL, 0);
> @@ -1616,6 +1606,59 @@ static int esw_vport_ingress_prio_tag_config(struct
> mlx5_eswitch *esw,
> return err;
> }
>
> +static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch
> *esw,
> + struct mlx5_vport *vport)
> +{
> + u8 action[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] =
> {};
> + struct mlx5_flow_act flow_act = {};
> + struct mlx5_flow_spec spec = {};
> + int err = 0;
> +
> + MLX5_SET(set_action_in, action, action_type,
> MLX5_ACTION_TYPE_SET);
> + MLX5_SET(set_action_in, action, field,
> MLX5_ACTION_IN_FIELD_METADATA_REG_C_0);
> + MLX5_SET(set_action_in, action, data,
> + mlx5_eswitch_get_vport_metadata_for_match(esw, vport-
> >vport));
> +
> + err = mlx5_modify_header_alloc(esw->dev,
> MLX5_FLOW_NAMESPACE_ESW_INGRESS,
> + 1, action, &vport-
> >ingress.modify_metadata_id);
> +
> + if (err) {
> + esw_warn(esw->dev,
> + "failed to alloc modify header for vport %d ingress acl
> (%d)\n",
> + vport->vport, err);
> + return err;
> + }
> +
> + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
> MLX5_FLOW_CONTEXT_ACTION_ALLOW;
> + flow_act.modify_id = vport->ingress.modify_metadata_id;
> + vport->ingress.modify_metadata_rule = mlx5_add_flow_rules(vport-
> >ingress.acl,
> + &spec,
> &flow_act, NULL, 0);
> + if (IS_ERR(vport->ingress.modify_metadata_rule)) {
> + err = PTR_ERR(vport->ingress.modify_metadata_rule);
> + esw_warn(esw->dev,
> + "failed to add setting metadata rule for vport %d
> ingress acl, err(%d)\n",
> + vport->vport, err);
> + vport->ingress.modify_metadata_rule = NULL;
> + goto out;
> + }
> +
> +out:
> + if (err)
> + mlx5_modify_header_dealloc(esw->dev, vport-
> >ingress.modify_metadata_id);
> + return err;
> +}
> +
> +void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
> + struct mlx5_vport *vport)
> +{
> + if (vport->ingress.modify_metadata_rule) {
> + mlx5_del_flow_rules(vport->ingress.modify_metadata_rule);
> + mlx5_modify_header_dealloc(esw->dev,
> +vport->ingress.modify_metadata_id);
> +
> + vport->ingress.modify_metadata_rule = NULL;
> + }
> +}
> +
> static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw,
> struct mlx5_vport *vport)
> {
> @@ -1623,6 +1666,9 @@ static int esw_vport_egress_prio_tag_config(struct
> mlx5_eswitch *esw,
> struct mlx5_flow_spec *spec;
> int err = 0;
>
> + if (!MLX5_CAP_GEN(esw->dev, prio_tag_required))
> + return 0;
> +
> /* For prio tag mode, there is only 1 FTEs:
> * 1) prio tag packets - pop the prio tag VLAN, allow
> * Unmatched traffic is allowed by default @@ -1676,27 +1722,77 @@
> static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw,
> return err;
> }
>
> -static int esw_prio_tag_acls_config(struct mlx5_eswitch *esw, int nvports)
> +static int esw_vport_ingress_common_config(struct mlx5_eswitch *esw,
> + struct mlx5_vport *vport)
> {
> - struct mlx5_vport *vport = NULL;
> - int i, j;
> int err;
>
> - mlx5_esw_for_each_vf_vport(esw, i, vport, nvports) {
> + if (!mlx5_eswitch_vport_match_metadata_enabled(esw) &&
> + !MLX5_CAP_GEN(esw->dev, prio_tag_required))
> + return 0;
> +
> + esw_vport_cleanup_ingress_rules(esw, vport);
> +
> + err = esw_vport_enable_ingress_acl(esw, vport);
> + if (err) {
> + esw_warn(esw->dev,
> + "failed to enable ingress acl (%d) on vport[%d]\n",
> + err, vport->vport);
> + return err;
> + }
> +
> + esw_debug(esw->dev,
> + "vport[%d] configure ingress rules\n", vport->vport);
> +
> + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
> + err = esw_vport_add_ingress_acl_modify_metadata(esw,
> vport);
> + if (err)
> + goto out;
> + }
> +
> + if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
> + (vport->vport >= MLX5_VPORT_FIRST_VF &&
> + vport->vport <= esw->dev->priv.sriov.num_vfs)) {
> err = esw_vport_ingress_prio_tag_config(esw, vport);
> if (err)
> - goto err_ingress;
> - err = esw_vport_egress_prio_tag_config(esw, vport);
> + goto out;
> + }
> +
> +out:
> + if (err)
> + esw_vport_disable_ingress_acl(esw, vport);
> + return err;
> +}
> +
> +static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw) {
> + struct mlx5_vport *vport;
> + int i, j;
> + int err;
> +
> + mlx5_esw_for_all_vports(esw, i, vport) {
> + err = esw_vport_ingress_common_config(esw, vport);
> if (err)
> - goto err_egress;
> + goto err_ingress;
> +
> + if (vport->vport >= MLX5_VPORT_FIRST_VF &&
> + vport->vport <= esw->dev->priv.sriov.num_vfs) {
Add an helper API mlx5_esw_is_vport(const struct mlx5_esw *esw, const struct mlx5_vport *vport)
and use at two places in ingress and egress config.
> + err = esw_vport_egress_prio_tag_config(esw, vport);
> + if (err)
> + goto err_egress;
> + }
> }
>
> + if (mlx5_eswitch_vport_match_metadata_enabled(esw))
> + esw_info(esw->dev, "Use metadata reg_c as source vport to
> match\n");
> +
> return 0;
>
> err_egress:
> esw_vport_disable_ingress_acl(esw, vport);
> err_ingress:
> - mlx5_esw_for_each_vf_vport_reverse(esw, j, vport, i - 1) {
> + for (j = MLX5_VPORT_PF; j < i; j++) {
Keep the reverse order as before.
> + vport = &esw->vports[j];
> esw_vport_disable_egress_acl(esw, vport);
> esw_vport_disable_ingress_acl(esw, vport);
> }
> @@ -1704,15 +1800,17 @@ static int esw_prio_tag_acls_config(struct
> mlx5_eswitch *esw, int nvports)
> return err;
> }
>
> -static void esw_prio_tag_acls_cleanup(struct mlx5_eswitch *esw)
> +static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw)
> {
> struct mlx5_vport *vport;
> int i;
>
> - mlx5_esw_for_each_vf_vport(esw, i, vport, esw->nvports) {
> + mlx5_esw_for_all_vports(esw, i, vport) {
If you are changing this, please do in reverse order to keep it exact mirror of create/enable sequence.
> esw_vport_disable_egress_acl(esw, vport);
> esw_vport_disable_ingress_acl(esw, vport);
> }
> +
> + esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
> }
>
> static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int nvports)
> @@ -1722,15 +1820,13 @@ static int esw_offloads_steering_init(struct
> mlx5_eswitch *esw, int nvports)
> memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb));
> mutex_init(&esw->fdb_table.offloads.fdb_prio_lock);
>
> - if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) {
> - err = esw_prio_tag_acls_config(esw, nvports);
> - if (err)
> - return err;
> - }
> + err = esw_create_offloads_acl_tables(esw);
> + if (err)
> + return err;
>
> err = esw_create_offloads_fdb_tables(esw, nvports);
> if (err)
> - return err;
> + goto create_fdb_err;
>
> err = esw_create_offloads_table(esw, nvports);
> if (err)
> @@ -1748,6 +1844,9 @@ static int esw_offloads_steering_init(struct
> mlx5_eswitch *esw, int nvports)
> create_ft_err:
> esw_destroy_offloads_fdb_tables(esw);
>
> +create_fdb_err:
> + esw_destroy_offloads_acl_tables(esw);
> +
> return err;
> }
>
> @@ -1756,8 +1855,7 @@ static void esw_offloads_steering_cleanup(struct
> mlx5_eswitch *esw)
> esw_destroy_vport_rx_group(esw);
> esw_destroy_offloads_table(esw);
> esw_destroy_offloads_fdb_tables(esw);
> - if (MLX5_CAP_GEN(esw->dev, prio_tag_required))
> - esw_prio_tag_acls_cleanup(esw);
> + esw_destroy_offloads_acl_tables(esw);
> }
>
> static void esw_functions_changed_event_handler(struct work_struct *work)
> @@ -2290,3 +2388,16 @@ struct mlx5_eswitch_rep
> *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
> return mlx5_eswitch_get_rep(esw, vport); }
> EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
> +
> +u32 mlx5_eswitch_vport_match_metadata_enabled(struct mlx5_eswitch
> *esw)
> +{
> + return esw->flags & MLX5_ESWITCH_VPORT_MATCH_METADATA;
> +}
> +EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled);
> +
Return type should book and const *esw.
> +u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw,
> + u16 vport)
> +{
> + return ((MLX5_CAP_GEN(esw->dev, vhca_id) & 0xffff) << 16) | vport; }
> +EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match);
This one too.
> diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index
> 174eec0871d9..d729f5e4d70a 100644
> --- a/include/linux/mlx5/eswitch.h
> +++ b/include/linux/mlx5/eswitch.h
> @@ -64,6 +64,9 @@ struct mlx5_flow_handle *
> mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw,
> int vport, u32 sqn);
>
> +u32 mlx5_eswitch_vport_match_metadata_enabled(struct mlx5_eswitch
> +*esw);
> +u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw,
> +u16 vport);
> +
> #ifdef CONFIG_MLX5_ESWITCH
> enum devlink_eswitch_encap_mode
> mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev);
> --
> 2.21.0
Powered by blists - more mailing lists