lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 29 Oct 2021 13:56:26 -0700
From:   Saeed Mahameed <saeed@...nel.org>
To:     "David S. Miller" <davem@...emloft.net>,
        Jakub Kicinski <kuba@...nel.org>
Cc:     netdev@...r.kernel.org, Ariel Levkovich <lariel@...dia.com>,
        Vlad Buslov <vladbu@...dia.com>, Roi Dayan <roid@...dia.com>,
        Saeed Mahameed <saeedm@...dia.com>
Subject: [net-next 08/14] net/mlx5: E-Switch, Add ovs internal port mapping to metadata support

From: Ariel Levkovich <lariel@...dia.com>

Adding infrastructure to map ovs internal port device to vport
match metadata to support offload of rules with internal port as
the filter device or as the destination device.

The infrastructure allows adding and removing internal port device
to an eswitch database and getting a unique vport metadata value to
be placed and match on in reg_c0 when offloading rules that are coming
from or going to an internal port.

The new int port metadata can be written to the source port register
in HW to indicate that current source port of the packet is the
internal port and not one of the actual HW vports (uplink or VF).
Using this method, it is possible to offload TC rules with an OVS
internal port as their destination port (overwriting the src vport
register) or as the filter port (matching on the value of the src
vport register and making sure it matches to the internal port's
value).

There is also a need to handle a miss case where the packet's
src port value was changed in HW to an internal port but a following
rule which matches on this new src port value wasn't found in HW.

In such case, the packet will be forwarded to the driver with
metadata which allows driver to restore the info of the internal
port's netdevice. Once this info is restored, the uplink driver
can forward the packet to the relevant netdevice in SW.

Signed-off-by: Ariel Levkovich <lariel@...dia.com>
Reviewed-by: Vlad Buslov <vladbu@...dia.com>
Reviewed-by: Roi Dayan <roid@...dia.com>
Signed-off-by: Saeed Mahameed <saeedm@...dia.com>
---
 .../net/ethernet/mellanox/mlx5/core/Makefile  |   2 +-
 .../ethernet/mellanox/mlx5/core/en/rep/tc.c   |  43 +-
 .../ethernet/mellanox/mlx5/core/en/rep/tc.h   |   1 -
 .../mellanox/mlx5/core/en/tc/int_port.c       | 457 ++++++++++++++++++
 .../mellanox/mlx5/core/en/tc/int_port.h       |  65 +++
 .../net/ethernet/mellanox/mlx5/core/en_rep.c  |  13 +-
 .../net/ethernet/mellanox/mlx5/core/en_rep.h  |   4 +
 .../net/ethernet/mellanox/mlx5/core/en_tc.c   |   4 +
 .../net/ethernet/mellanox/mlx5/core/en_tc.h   |   3 +
 .../net/ethernet/mellanox/mlx5/core/eswitch.h |   6 +
 .../mellanox/mlx5/core/eswitch_offloads.c     |  18 +-
 11 files changed, 607 insertions(+), 9 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index bdb271b604d9..e63bb9ceb9c0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -45,7 +45,7 @@ mlx5_core-$(CONFIG_MLX5_CLS_ACT)     += en_tc.o en/rep/tc.o en/rep/neigh.o \
 					esw/indir_table.o en/tc_tun_encap.o \
 					en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \
 					en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o \
-					en/tc/post_act.o
+					en/tc/post_act.o en/tc/int_port.o
 mlx5_core-$(CONFIG_MLX5_TC_CT)	     += en/tc_ct.o
 mlx5_core-$(CONFIG_MLX5_TC_SAMPLE)   += en/tc/sample.o
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index de683724e184..c69129940268 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -20,6 +20,7 @@
 #include "lib/port_tun.h"
 #include "en/tc/sample.h"
 #include "en_accel/ipsec_rxtx.h"
+#include "en/tc/int_port.h"
 
 struct mlx5e_rep_indr_block_priv {
 	struct net_device *netdev;
@@ -672,12 +673,43 @@ static void mlx5e_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *sk
 	mlx5_rep_tc_post_napi_receive(tc_priv);
 }
 
+static bool mlx5e_restore_skb_int_port(struct mlx5e_priv *priv, struct sk_buff *skb,
+				       struct mlx5_mapped_obj *mapped_obj,
+				       struct mlx5e_tc_update_priv *tc_priv,
+				       bool *forward_tx,
+				       u32 reg_c1)
+{
+	u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK;
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5_rep_uplink_priv *uplink_priv;
+	struct mlx5e_rep_priv *uplink_rpriv;
+
+	/* Tunnel restore takes precedence over int port restore */
+	if (tunnel_id)
+		return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
+
+	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+	uplink_priv = &uplink_rpriv->uplink_priv;
+
+	if (mlx5e_tc_int_port_dev_fwd(uplink_priv->int_port_priv, skb,
+				      mapped_obj->int_port_metadata, forward_tx)) {
+		/* Set fwd_dev for future dev_put */
+		tc_priv->fwd_dev = skb->dev;
+
+		return true;
+	}
+
+	return false;
+}
+
 void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq,
 			  struct sk_buff *skb)
 {
+	u32 reg_c1 = be32_to_cpu(cqe->ft_metadata);
 	struct mlx5e_tc_update_priv tc_priv = {};
 	struct mlx5_mapped_obj mapped_obj;
 	struct mlx5_eswitch *esw;
+	bool forward_tx = false;
 	struct mlx5e_priv *priv;
 	u32 reg_c0;
 	int err;
@@ -702,21 +734,26 @@ void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq,
 	}
 
 	if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) {
-		u32 reg_c1 = be32_to_cpu(cqe->ft_metadata);
-
 		if (!mlx5e_restore_skb_chain(skb, mapped_obj.chain, reg_c1, &tc_priv) &&
 		    !mlx5_ipsec_is_rx_flow(cqe))
 			goto free_skb;
 	} else if (mapped_obj.type == MLX5_MAPPED_OBJ_SAMPLE) {
 		mlx5e_restore_skb_sample(priv, skb, &mapped_obj, &tc_priv);
 		goto free_skb;
+	} else if (mapped_obj.type == MLX5_MAPPED_OBJ_INT_PORT_METADATA) {
+		if (!mlx5e_restore_skb_int_port(priv, skb, &mapped_obj, &tc_priv,
+						&forward_tx, reg_c1))
+			goto free_skb;
 	} else {
 		netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type);
 		goto free_skb;
 	}
 
 forward:
-	napi_gro_receive(rq->cq.napi, skb);
+	if (forward_tx)
+		dev_queue_xmit(skb);
+	else
+		napi_gro_receive(rq->cq.napi, skb);
 
 	mlx5_rep_tc_post_napi_receive(&tc_priv);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
index 0a8334d20b3b..d6c7c81690eb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
@@ -64,7 +64,6 @@ static inline int
 mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
 		   void *type_data) { return -EOPNOTSUPP; }
 
-struct mlx5e_tc_update_priv;
 static inline void
 mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq,
 		     struct sk_buff *skb) {}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c
new file mode 100644
index 000000000000..ca834bbcb44f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c
@@ -0,0 +1,457 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/mlx5/fs.h>
+#include "en/mapping.h"
+#include "en/tc/int_port.h"
+#include "en.h"
+#include "en_rep.h"
+#include "en_tc.h"
+
+struct mlx5e_tc_int_port {
+	enum mlx5e_tc_int_port_type type;
+	int ifindex;
+	u32 match_metadata;
+	u32 mapping;
+	struct list_head list;
+	struct mlx5_flow_handle *rx_rule;
+	refcount_t refcnt;
+	struct rcu_head rcu_head;
+};
+
+struct mlx5e_tc_int_port_priv {
+	struct mlx5_core_dev *dev;
+	struct mutex int_ports_lock; /* Protects int ports list */
+	struct list_head int_ports; /* Uses int_ports_lock */
+	u16 num_ports;
+	bool ul_rep_rx_ready; /* Set when uplink is performing teardown */
+	struct mapping_ctx *metadata_mapping; /* Metadata for source port rewrite and matching */
+};
+
+bool mlx5e_tc_int_port_supported(const struct mlx5_eswitch *esw)
+{
+	return mlx5_eswitch_vport_match_metadata_enabled(esw) &&
+	       MLX5_CAP_GEN(esw->dev, reg_c_preserve);
+}
+
+u32 mlx5e_tc_int_port_get_metadata(struct mlx5e_tc_int_port *int_port)
+{
+	return int_port->match_metadata;
+}
+
+int mlx5e_tc_int_port_get_flow_source(struct mlx5e_tc_int_port *int_port)
+{
+	/* For egress forwarding we can have the case
+	 * where the packet came from a vport and redirected
+	 * to int port or it came from the uplink, going
+	 * via internal port and hairpinned back to uplink
+	 * so we set the source to any port in this case.
+	 */
+	return int_port->type == MLX5E_TC_INT_PORT_EGRESS ?
+		MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT :
+		MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+}
+
+u32 mlx5e_tc_int_port_get_metadata_for_match(struct mlx5e_tc_int_port *int_port)
+{
+	return int_port->match_metadata << (32 - ESW_SOURCE_PORT_METADATA_BITS);
+}
+
+static struct mlx5_flow_handle *
+mlx5e_int_port_create_rx_rule(struct mlx5_eswitch *esw,
+			      struct mlx5e_tc_int_port *int_port,
+			      struct mlx5_flow_destination *dest)
+
+{
+	struct mlx5_flow_context *flow_context;
+	struct mlx5_flow_act flow_act = {};
+	struct mlx5_flow_handle *flow_rule;
+	struct mlx5_flow_spec *spec;
+	void *misc;
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
+		return ERR_PTR(-ENOMEM);
+
+	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
+	MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+		 mlx5e_tc_int_port_get_metadata_for_match(int_port));
+
+	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
+	MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+		 mlx5_eswitch_get_vport_metadata_mask());
+
+	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+
+	/* Overwrite flow tag with the int port metadata mapping
+	 * instead of the chain mapping.
+	 */
+	flow_context = &spec->flow_context;
+	flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
+	flow_context->flow_tag = int_port->mapping;
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec,
+					&flow_act, dest, 1);
+	if (IS_ERR(flow_rule))
+		mlx5_core_warn(esw->dev, "ft offloads: Failed to add internal vport rx rule err %ld\n",
+			       PTR_ERR(flow_rule));
+
+	kvfree(spec);
+
+	return flow_rule;
+}
+
+static struct mlx5e_tc_int_port *
+mlx5e_int_port_lookup(struct mlx5e_tc_int_port_priv *priv,
+		      int ifindex,
+		      enum mlx5e_tc_int_port_type type)
+{
+	struct mlx5e_tc_int_port *int_port;
+
+	if (!priv->ul_rep_rx_ready)
+		goto not_found;
+
+	list_for_each_entry(int_port, &priv->int_ports, list)
+		if (int_port->ifindex == ifindex && int_port->type == type) {
+			refcount_inc(&int_port->refcnt);
+			return int_port;
+		}
+
+not_found:
+	return NULL;
+}
+
+static int mlx5e_int_port_metadata_alloc(struct mlx5e_tc_int_port_priv *priv,
+					 int ifindex, enum mlx5e_tc_int_port_type type,
+					 u32 *id)
+{
+	u32 mapped_key[2] = {type, ifindex};
+	int err;
+
+	err = mapping_add(priv->metadata_mapping, mapped_key, id);
+	if (err)
+		return err;
+
+	/* Fill upper 4 bits of PFNUM with reserved value */
+	*id |= 0xf << ESW_VPORT_BITS;
+
+	return 0;
+}
+
+static void mlx5e_int_port_metadata_free(struct mlx5e_tc_int_port_priv *priv,
+					 u32 id)
+{
+	id &= (1 << ESW_VPORT_BITS) - 1;
+	mapping_remove(priv->metadata_mapping, id);
+}
+
+/* Must be called with priv->int_ports_lock held */
+static struct mlx5e_tc_int_port *
+mlx5e_int_port_add(struct mlx5e_tc_int_port_priv *priv,
+		   int ifindex,
+		   enum mlx5e_tc_int_port_type type)
+{
+	struct mlx5_eswitch *esw = priv->dev->priv.eswitch;
+	struct mlx5_mapped_obj mapped_obj = {};
+	struct mlx5e_rep_priv *uplink_rpriv;
+	struct mlx5e_tc_int_port *int_port;
+	struct mlx5_flow_destination dest;
+	struct mapping_ctx *ctx;
+	u32 match_metadata;
+	u32 mapping;
+	int err;
+
+	if (priv->num_ports == MLX5E_TC_MAX_INT_PORT_NUM) {
+		mlx5_core_dbg(priv->dev, "Cannot add a new int port, max supported %d",
+			      MLX5E_TC_MAX_INT_PORT_NUM);
+		return ERR_PTR(-ENOSPC);
+	}
+
+	int_port = kzalloc(sizeof(*int_port), GFP_KERNEL);
+	if (!int_port)
+		return ERR_PTR(-ENOMEM);
+
+	err = mlx5e_int_port_metadata_alloc(priv, ifindex, type, &match_metadata);
+	if (err) {
+		mlx5_core_warn(esw->dev, "Cannot add a new internal port, metadata allocation failed for ifindex %d",
+			       ifindex);
+		goto err_metadata;
+	}
+
+	/* map metadata to reg_c0 object for miss handling */
+	ctx = esw->offloads.reg_c0_obj_pool;
+	mapped_obj.type = MLX5_MAPPED_OBJ_INT_PORT_METADATA;
+	mapped_obj.int_port_metadata = match_metadata;
+	err = mapping_add(ctx, &mapped_obj, &mapping);
+	if (err)
+		goto err_map;
+
+	int_port->type = type;
+	int_port->ifindex = ifindex;
+	int_port->match_metadata = match_metadata;
+	int_port->mapping = mapping;
+
+	/* Create a match on internal vport metadata in vport table */
+	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+	dest.ft = uplink_rpriv->root_ft;
+
+	int_port->rx_rule = mlx5e_int_port_create_rx_rule(esw, int_port, &dest);
+	if (IS_ERR(int_port->rx_rule)) {
+		err = PTR_ERR(int_port->rx_rule);
+		mlx5_core_warn(esw->dev, "Can't add internal port rx rule, err %d", err);
+		goto err_rx_rule;
+	}
+
+	refcount_set(&int_port->refcnt, 1);
+	list_add_rcu(&int_port->list, &priv->int_ports);
+	priv->num_ports++;
+
+	return int_port;
+
+err_rx_rule:
+	mapping_remove(ctx, int_port->mapping);
+
+err_map:
+	mlx5e_int_port_metadata_free(priv, match_metadata);
+
+err_metadata:
+	kfree(int_port);
+
+	return ERR_PTR(err);
+}
+
+/* Must be called with priv->int_ports_lock held */
+static void
+mlx5e_int_port_remove(struct mlx5e_tc_int_port_priv *priv,
+		      struct mlx5e_tc_int_port *int_port)
+{
+	struct mlx5_eswitch *esw = priv->dev->priv.eswitch;
+	struct mapping_ctx *ctx;
+
+	ctx = esw->offloads.reg_c0_obj_pool;
+
+	list_del_rcu(&int_port->list);
+
+	/* The following parameters are not used by the
+	 * rcu readers of this int_port object so it is
+	 * safe to release them.
+	 */
+	if (int_port->rx_rule)
+		mlx5_del_flow_rules(int_port->rx_rule);
+	mapping_remove(ctx, int_port->mapping);
+	mlx5e_int_port_metadata_free(priv, int_port->match_metadata);
+	kfree_rcu(int_port);
+	priv->num_ports--;
+}
+
+/* Must be called with rcu_read_lock held */
+static struct mlx5e_tc_int_port *
+mlx5e_int_port_get_from_metadata(struct mlx5e_tc_int_port_priv *priv,
+				 u32 metadata)
+{
+	struct mlx5e_tc_int_port *int_port;
+
+	list_for_each_entry_rcu(int_port, &priv->int_ports, list)
+		if (int_port->match_metadata == metadata)
+			return int_port;
+
+	return NULL;
+}
+
+struct mlx5e_tc_int_port *
+mlx5e_tc_int_port_get(struct mlx5e_tc_int_port_priv *priv,
+		      int ifindex,
+		      enum mlx5e_tc_int_port_type type)
+{
+	struct mlx5e_tc_int_port *int_port;
+
+	if (!priv)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	mutex_lock(&priv->int_ports_lock);
+
+	/* Reject request if ul rep not ready */
+	if (!priv->ul_rep_rx_ready) {
+		int_port = ERR_PTR(-EOPNOTSUPP);
+		goto done;
+	}
+
+	int_port = mlx5e_int_port_lookup(priv, ifindex, type);
+	if (int_port)
+		goto done;
+
+	/* Alloc and add new int port to list */
+	int_port = mlx5e_int_port_add(priv, ifindex, type);
+
+done:
+	mutex_unlock(&priv->int_ports_lock);
+
+	return int_port;
+}
+
+void
+mlx5e_tc_int_port_put(struct mlx5e_tc_int_port_priv *priv,
+		      struct mlx5e_tc_int_port *int_port)
+{
+	if (!refcount_dec_and_mutex_lock(&int_port->refcnt, &priv->int_ports_lock))
+		return;
+
+	mlx5e_int_port_remove(priv, int_port);
+	mutex_unlock(&priv->int_ports_lock);
+}
+
+struct mlx5e_tc_int_port_priv *
+mlx5e_tc_int_port_init(struct mlx5e_priv *priv)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5e_tc_int_port_priv *int_port_priv;
+	u64 mapping_id;
+
+	if (!mlx5e_tc_int_port_supported(esw))
+		return NULL;
+
+	int_port_priv = kzalloc(sizeof(*int_port_priv), GFP_KERNEL);
+	if (!int_port_priv)
+		return NULL;
+
+	mapping_id = mlx5_query_nic_system_image_guid(priv->mdev);
+
+	int_port_priv->metadata_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_INT_PORT,
+								sizeof(u32) * 2,
+								(1 << ESW_VPORT_BITS) - 1, true);
+	if (IS_ERR(int_port_priv->metadata_mapping)) {
+		mlx5_core_warn(priv->mdev, "Can't allocate metadata mapping of int port offload, err=%ld\n",
+			       PTR_ERR(int_port_priv->metadata_mapping));
+		goto err_mapping;
+	}
+
+	int_port_priv->dev = priv->mdev;
+	mutex_init(&int_port_priv->int_ports_lock);
+	INIT_LIST_HEAD(&int_port_priv->int_ports);
+
+	return int_port_priv;
+
+err_mapping:
+	kfree(int_port_priv);
+
+	return NULL;
+}
+
+void
+mlx5e_tc_int_port_cleanup(struct mlx5e_tc_int_port_priv *priv)
+{
+	if (!priv)
+		return;
+
+	mutex_destroy(&priv->int_ports_lock);
+	mapping_destroy(priv->metadata_mapping);
+	kfree(priv);
+}
+
+/* Int port rx rules reside in ul rep rx tables.
+ * It is possible the ul rep will go down while there are
+ * still int port rules in its rx table so proper cleanup
+ * is required to free resources.
+ */
+void mlx5e_tc_int_port_init_rep_rx(struct mlx5e_priv *priv)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5_rep_uplink_priv *uplink_priv;
+	struct mlx5e_tc_int_port_priv *ppriv;
+	struct mlx5e_rep_priv *uplink_rpriv;
+
+	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+	uplink_priv = &uplink_rpriv->uplink_priv;
+
+	ppriv = uplink_priv->int_port_priv;
+
+	if (!ppriv)
+		return;
+
+	mutex_lock(&ppriv->int_ports_lock);
+	ppriv->ul_rep_rx_ready = true;
+	mutex_unlock(&ppriv->int_ports_lock);
+}
+
+void mlx5e_tc_int_port_cleanup_rep_rx(struct mlx5e_priv *priv)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5_rep_uplink_priv *uplink_priv;
+	struct mlx5e_tc_int_port_priv *ppriv;
+	struct mlx5e_rep_priv *uplink_rpriv;
+	struct mlx5e_tc_int_port *int_port;
+
+	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+	uplink_priv = &uplink_rpriv->uplink_priv;
+
+	ppriv = uplink_priv->int_port_priv;
+
+	if (!ppriv)
+		return;
+
+	mutex_lock(&ppriv->int_ports_lock);
+
+	ppriv->ul_rep_rx_ready = false;
+
+	list_for_each_entry(int_port, &ppriv->int_ports, list) {
+		if (!IS_ERR_OR_NULL(int_port->rx_rule))
+			mlx5_del_flow_rules(int_port->rx_rule);
+
+		int_port->rx_rule = NULL;
+	}
+
+	mutex_unlock(&ppriv->int_ports_lock);
+}
+
+bool
+mlx5e_tc_int_port_dev_fwd(struct mlx5e_tc_int_port_priv *priv,
+			  struct sk_buff *skb, u32 int_vport_metadata,
+			  bool *forward_tx)
+{
+	enum mlx5e_tc_int_port_type fwd_type;
+	struct mlx5e_tc_int_port *int_port;
+	struct net_device *dev;
+	int ifindex;
+
+	if (!priv)
+		return false;
+
+	rcu_read_lock();
+	int_port = mlx5e_int_port_get_from_metadata(priv, int_vport_metadata);
+	if (!int_port) {
+		rcu_read_unlock();
+		mlx5_core_dbg(priv->dev, "Unable to find int port with metadata 0x%.8x\n",
+			      int_vport_metadata);
+		return false;
+	}
+
+	ifindex = int_port->ifindex;
+	fwd_type = int_port->type;
+	rcu_read_unlock();
+
+	dev = dev_get_by_index(&init_net, ifindex);
+	if (!dev) {
+		mlx5_core_dbg(priv->dev,
+			      "Couldn't find internal port device with ifindex: %d\n",
+			      ifindex);
+		return false;
+	}
+
+	skb->skb_iif = dev->ifindex;
+	skb->dev = dev;
+
+	if (fwd_type == MLX5E_TC_INT_PORT_INGRESS) {
+		skb->pkt_type = PACKET_HOST;
+		skb_set_redirected(skb, true);
+		*forward_tx = false;
+	} else {
+		skb_reset_network_header(skb);
+		skb_push_rcsum(skb, skb->mac_len);
+		skb_set_redirected(skb, false);
+		*forward_tx = true;
+	}
+
+	return true;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.h
new file mode 100644
index 000000000000..e72c79d308d7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_TC_INT_PORT_H__
+#define __MLX5_EN_TC_INT_PORT_H__
+
+#include "en.h"
+
+struct mlx5e_tc_int_port;
+struct mlx5e_tc_int_port_priv;
+
+enum mlx5e_tc_int_port_type {
+	MLX5E_TC_INT_PORT_INGRESS,
+	MLX5E_TC_INT_PORT_EGRESS,
+};
+
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+bool mlx5e_tc_int_port_supported(const struct mlx5_eswitch *esw);
+
+struct mlx5e_tc_int_port_priv *
+mlx5e_tc_int_port_init(struct mlx5e_priv *priv);
+void
+mlx5e_tc_int_port_cleanup(struct mlx5e_tc_int_port_priv *priv);
+
+void mlx5e_tc_int_port_init_rep_rx(struct mlx5e_priv *priv);
+void mlx5e_tc_int_port_cleanup_rep_rx(struct mlx5e_priv *priv);
+
+bool
+mlx5e_tc_int_port_dev_fwd(struct mlx5e_tc_int_port_priv *priv,
+			  struct sk_buff *skb, u32 int_vport_metadata,
+			  bool *forward_tx);
+struct mlx5e_tc_int_port *
+mlx5e_tc_int_port_get(struct mlx5e_tc_int_port_priv *priv,
+		      int ifindex,
+		      enum mlx5e_tc_int_port_type type);
+void
+mlx5e_tc_int_port_put(struct mlx5e_tc_int_port_priv *priv,
+		      struct mlx5e_tc_int_port *int_port);
+
+u32 mlx5e_tc_int_port_get_metadata(struct mlx5e_tc_int_port *int_port);
+u32 mlx5e_tc_int_port_get_metadata_for_match(struct mlx5e_tc_int_port *int_port);
+int mlx5e_tc_int_port_get_flow_source(struct mlx5e_tc_int_port *int_port);
+#else /* CONFIG_MLX5_CLS_ACT */
+static inline u32
+mlx5e_tc_int_port_get_metadata_for_match(struct mlx5e_tc_int_port *int_port)
+{
+		return 0;
+}
+
+static inline int
+mlx5e_tc_int_port_get_flow_source(struct mlx5e_tc_int_port *int_port)
+{
+		return 0;
+}
+
+static inline bool mlx5e_tc_int_port_supported(const struct mlx5_eswitch *esw)
+{
+	return false;
+}
+
+static inline void mlx5e_tc_int_port_init_rep_rx(struct mlx5e_priv *priv) {}
+static inline void mlx5e_tc_int_port_cleanup_rep_rx(struct mlx5e_priv *priv) {}
+
+#endif /* CONFIG_MLX5_CLS_ACT */
+#endif /* __MLX5_EN_TC_INT_PORT_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 5230e0422cae..e58a9ec42553 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -53,6 +53,7 @@
 #define CREATE_TRACE_POINTS
 #include "diag/en_rep_tracepoint.h"
 #include "en_accel/ipsec.h"
+#include "en/tc/int_port.h"
 
 #define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \
 	max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)
@@ -857,12 +858,22 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
 
 static int mlx5e_init_ul_rep_rx(struct mlx5e_priv *priv)
 {
+	int err;
+
 	mlx5e_create_q_counters(priv);
-	return mlx5e_init_rep_rx(priv);
+	err = mlx5e_init_rep_rx(priv);
+	if (err)
+		goto out;
+
+	mlx5e_tc_int_port_init_rep_rx(priv);
+
+out:
+	return err;
 }
 
 static void mlx5e_cleanup_ul_rep_rx(struct mlx5e_priv *priv)
 {
+	mlx5e_tc_int_port_cleanup_rep_rx(priv);
 	mlx5e_cleanup_rep_rx(priv);
 	mlx5e_destroy_q_counters(priv);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 48a203a9e7d9..b01dacb6f527 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -58,6 +58,7 @@ struct mlx5e_neigh_update_table {
 };
 
 struct mlx5_tc_ct_priv;
+struct mlx5_tc_int_port_priv;
 struct mlx5e_rep_bond;
 struct mlx5e_tc_tun_encap;
 struct mlx5e_post_act;
@@ -98,6 +99,9 @@ struct mlx5_rep_uplink_priv {
 
 	/* tc tunneling encapsulation private data */
 	struct mlx5e_tc_tun_encap *encap;
+
+	/* OVS internal port support */
+	struct mlx5e_tc_int_port_priv *int_port_priv;
 };
 
 struct mlx5e_rep_priv {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index f458f7f6b299..2b2caff6c4e7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -5073,6 +5073,8 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
 					       MLX5_FLOW_NAMESPACE_FDB,
 					       uplink_priv->post_act);
 
+	uplink_priv->int_port_priv = mlx5e_tc_int_port_init(netdev_priv(priv->netdev));
+
 	uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act);
 
 	mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
@@ -5120,6 +5122,7 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
 	mapping_destroy(uplink_priv->tunnel_mapping);
 err_tun_mapping:
 	mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
+	mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv);
 	mlx5_tc_ct_clean(uplink_priv->ct_priv);
 	netdev_warn(priv->netdev,
 		    "Failed to initialize tc (eswitch), err: %d", err);
@@ -5140,6 +5143,7 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
 	mapping_destroy(uplink_priv->tunnel_mapping);
 
 	mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
+	mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv);
 	mlx5_tc_ct_clean(uplink_priv->ct_priv);
 	mlx5e_tc_post_act_destroy(uplink_priv->post_act);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index df0f63c21e72..74999dcff70b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -38,6 +38,7 @@
 #include "eswitch.h"
 #include "en/tc_ct.h"
 #include "en/tc_tun.h"
+#include "en/tc/int_port.h"
 #include "en_rep.h"
 
 #define MLX5E_TC_FLOW_ID_MASK 0x0000ffff
@@ -104,6 +105,8 @@ struct mlx5_rx_tun_attr {
 #define MLX5E_TC_TABLE_CHAIN_TAG_BITS 16
 #define MLX5E_TC_TABLE_CHAIN_TAG_MASK GENMASK(MLX5E_TC_TABLE_CHAIN_TAG_BITS - 1, 0)
 
+#define MLX5E_TC_MAX_INT_PORT_NUM (8)
+
 #if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
 
 struct tunnel_match_key {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 28467f11f04b..194ba8313d4d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -51,6 +51,7 @@
 enum mlx5_mapped_obj_type {
 	MLX5_MAPPED_OBJ_CHAIN,
 	MLX5_MAPPED_OBJ_SAMPLE,
+	MLX5_MAPPED_OBJ_INT_PORT_METADATA,
 };
 
 struct mlx5_mapped_obj {
@@ -63,6 +64,7 @@ struct mlx5_mapped_obj {
 			u32 trunc_size;
 			u32 tunnel_id;
 		} sample;
+		u32 int_port_metadata;
 	};
 };
 
@@ -88,6 +90,7 @@ enum {
 	MAPPING_TYPE_TUNNEL_ENC_OPTS,
 	MAPPING_TYPE_LABELS,
 	MAPPING_TYPE_ZONE,
+	MAPPING_TYPE_INT_PORT,
 };
 
 struct vport_ingress {
@@ -336,6 +339,9 @@ void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata);
 
 int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps);
 
+bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw);
+int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable);
+
 /* E-Switch API */
 int mlx5_eswitch_init(struct mlx5_core_dev *dev);
 void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 0ef126fd6a8e..94da4aca28c9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -1857,6 +1857,17 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
 	atomic64_set(&esw->user_count, 0);
 }
 
+static int esw_get_offloads_ft_size(struct mlx5_eswitch *esw)
+{
+	int nvports;
+
+	nvports = esw->total_vports + MLX5_ESW_MISS_FLOWS;
+	if (mlx5e_tc_int_port_supported(esw))
+		nvports += MLX5E_TC_MAX_INT_PORT_NUM;
+
+	return nvports;
+}
+
 static int esw_create_offloads_table(struct mlx5_eswitch *esw)
 {
 	struct mlx5_flow_table_attr ft_attr = {};
@@ -1871,7 +1882,7 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw)
 		return -EOPNOTSUPP;
 	}
 
-	ft_attr.max_fte = esw->total_vports + MLX5_ESW_MISS_FLOWS;
+	ft_attr.max_fte = esw_get_offloads_ft_size(esw);
 	ft_attr.prio = 1;
 
 	ft_offloads = mlx5_create_flow_table(ns, &ft_attr);
@@ -1900,7 +1911,7 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw)
 	int nvports;
 	int err = 0;
 
-	nvports = esw->total_vports + MLX5_ESW_MISS_FLOWS;
+	nvports = esw_get_offloads_ft_size(esw);
 	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
 	if (!flow_group_in)
 		return -ENOMEM;
@@ -2805,7 +2816,8 @@ bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw)
 u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw)
 {
 	u32 vport_end_ida = (1 << ESW_VPORT_BITS) - 1;
-	u32 max_pf_num = (1 << ESW_PFNUM_BITS) - 1;
+	/* Reserve 0xf for internal port offload */
+	u32 max_pf_num = (1 << ESW_PFNUM_BITS) - 2;
 	u32 pf_num;
 	int id;
 
-- 
2.31.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ