lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20181214215200.31222-4-saeedm@mellanox.com>
Date:   Fri, 14 Dec 2018 13:51:50 -0800
From:   Saeed Mahameed <saeedm@...lanox.com>
To:     "David S. Miller" <davem@...emloft.net>
Cc:     netdev@...r.kernel.org, Roi Dayan <roid@...lanox.com>,
        Aviv Heller <avivh@...lanox.com>,
        Shahar Klein <shahark@...lanox.com>,
        Saeed Mahameed <saeedm@...lanox.com>
Subject: [net-next 03/13] net/mlx5e: Infrastructure for duplicated offloading of TC flows

From: Roi Dayan <roid@...lanox.com>

Under uplink LAG or multipath schemes, traffic that matches one flow
might arrive on both uplink ports and transmitted through both
as part of supporting aggregation and high-availability.

To cope with the fact that the SW model might use logical SW port
(e.g uplink team or bond) but we have two HW ports with e-switch on
each, there are cases where in order to offload a SW TC rule we
need to duplicate it to two HW flows.

Since each HW rule has its own counter we also aggregate the counter
of both rules when a flow stats query is executed from user-space.

Introduce the changes for the different elements (add/delete/stats),
currently nothing is duplicated.

Signed-off-by: Roi Dayan <roid@...lanox.com>
Signed-off-by: Aviv Heller <avivh@...lanox.com>
Signed-off-by: Shahar Klein <shahark@...lanox.com>
Signed-off-by: Saeed Mahameed <saeedm@...lanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_tc.c   | 174 +++++++++++++++++-
 .../net/ethernet/mellanox/mlx5/core/eswitch.h |   2 +
 .../mellanox/mlx5/core/eswitch_offloads.c     |   8 +-
 3 files changed, 176 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 0921213561cb..eacccac05dda 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -52,6 +52,7 @@
 #include "fs_core.h"
 #include "en/port.h"
 #include "en/tc_tun.h"
+#include "lib/devcom.h"
 
 struct mlx5_nic_flow_attr {
 	u32 action;
@@ -74,6 +75,7 @@ enum {
 	MLX5E_TC_FLOW_HAIRPIN	= BIT(MLX5E_TC_FLOW_BASE + 3),
 	MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 4),
 	MLX5E_TC_FLOW_SLOW	  = BIT(MLX5E_TC_FLOW_BASE + 5),
+	MLX5E_TC_FLOW_DUP         = BIT(MLX5E_TC_FLOW_BASE + 6),
 };
 
 #define MLX5E_TC_MAX_SPLITS 1
@@ -111,8 +113,10 @@ struct mlx5e_tc_flow {
 	 * destinations.
 	 */
 	struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
+	struct mlx5e_tc_flow    *peer_flow;
 	struct list_head	mod_hdr; /* flows sharing the same mod hdr ID */
 	struct list_head	hairpin; /* flows sharing the same hairpin */
+	struct list_head	peer;    /* flows with peer flow */
 	union {
 		struct mlx5_esw_flow_attr esw_attr[0];
 		struct mlx5_nic_flow_attr nic_attr[0];
@@ -1249,13 +1253,48 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv,
 	}
 }
 
+static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
+{
+	struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
+
+	if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) ||
+	    !(flow->flags & MLX5E_TC_FLOW_DUP))
+		return;
+
+	mutex_lock(&esw->offloads.peer_mutex);
+	list_del(&flow->peer);
+	mutex_unlock(&esw->offloads.peer_mutex);
+
+	flow->flags &= ~MLX5E_TC_FLOW_DUP;
+
+	mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
+	kvfree(flow->peer_flow);
+	flow->peer_flow = NULL;
+}
+
+static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
+{
+	struct mlx5_core_dev *dev = flow->priv->mdev;
+	struct mlx5_devcom *devcom = dev->priv.devcom;
+	struct mlx5_eswitch *peer_esw;
+
+	peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+	if (!peer_esw)
+		return;
+
+	__mlx5e_tc_del_fdb_peer_flow(flow);
+	mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+}
+
 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
 			      struct mlx5e_tc_flow *flow)
 {
-	if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
+	if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
+		mlx5e_tc_del_fdb_peer_flow(flow);
 		mlx5e_tc_del_fdb_flow(priv, flow);
-	else
+	} else {
 		mlx5e_tc_del_nic_flow(priv, flow);
+	}
 }
 
 
@@ -2660,6 +2699,11 @@ static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv)
 		return &priv->fs.tc.ht;
 }
 
+static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
+{
+	return false;
+}
+
 static int
 mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
 		 struct tc_cls_flower_offload *f, u16 flow_flags,
@@ -2693,11 +2737,13 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
 }
 
 static int
-mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
-		   struct tc_cls_flower_offload *f,
-		   u16 flow_flags,
-		   struct net_device *filter_dev,
-		   struct mlx5e_tc_flow **__flow)
+__mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
+		     struct tc_cls_flower_offload *f,
+		     u16 flow_flags,
+		     struct net_device *filter_dev,
+		     struct mlx5_eswitch_rep *in_rep,
+		     struct mlx5_core_dev *in_mdev,
+		     struct mlx5e_tc_flow **__flow)
 {
 	struct netlink_ext_ack *extack = f->common.extack;
 	struct mlx5e_tc_flow_parse_attr *parse_attr;
@@ -2723,6 +2769,8 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
 	if (err)
 		goto err_free;
 
+	flow->esw_attr->in_rep = in_rep;
+	flow->esw_attr->in_mdev = in_mdev;
 	err = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow, extack);
 	if (err)
 		goto err_free;
@@ -2738,6 +2786,87 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
 	return err;
 }
 
+static int mlx5e_tc_add_fdb_peer_flow(struct tc_cls_flower_offload *f,
+				      struct mlx5e_tc_flow *flow)
+{
+	struct mlx5e_priv *priv = flow->priv, *peer_priv;
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
+	struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
+	struct mlx5e_tc_flow_parse_attr *parse_attr;
+	struct mlx5e_rep_priv *peer_urpriv;
+	struct mlx5e_tc_flow *peer_flow;
+	struct mlx5_core_dev *in_mdev;
+	int err = 0;
+
+	peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+	if (!peer_esw)
+		return -ENODEV;
+
+	peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
+	peer_priv = netdev_priv(peer_urpriv->netdev);
+
+	/* in_mdev is assigned of which the packet originated from.
+	 * So packets redirected to uplink use the same mdev of the
+	 * original flow and packets redirected from uplink use the
+	 * peer mdev.
+	 */
+	if (flow->esw_attr->in_rep->vport == FDB_UPLINK_VPORT)
+		in_mdev = peer_priv->mdev;
+	else
+		in_mdev = priv->mdev;
+
+	parse_attr = flow->esw_attr->parse_attr;
+	err = __mlx5e_add_fdb_flow(peer_priv, f, flow->flags,
+				   parse_attr->filter_dev,
+				   flow->esw_attr->in_rep, in_mdev, &peer_flow);
+	if (err)
+		goto out;
+
+	flow->peer_flow = peer_flow;
+	flow->flags |= MLX5E_TC_FLOW_DUP;
+	mutex_lock(&esw->offloads.peer_mutex);
+	list_add_tail(&flow->peer, &esw->offloads.peer_flows);
+	mutex_unlock(&esw->offloads.peer_mutex);
+
+out:
+	mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+	return err;
+}
+
+static int
+mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
+		   struct tc_cls_flower_offload *f,
+		   u16 flow_flags,
+		   struct net_device *filter_dev,
+		   struct mlx5e_tc_flow **__flow)
+{
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+	struct mlx5_eswitch_rep *in_rep = rpriv->rep;
+	struct mlx5_core_dev *in_mdev = priv->mdev;
+	struct mlx5e_tc_flow *flow;
+	int err;
+
+	err = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
+				   in_mdev, &flow);
+	if (err)
+		goto out;
+
+	if (is_peer_flow_needed(flow)) {
+		err = mlx5e_tc_add_fdb_peer_flow(f, flow);
+		if (err) {
+			mlx5e_tc_del_fdb_flow(priv, flow);
+			goto out;
+		}
+	}
+
+	*__flow = flow;
+
+	return 0;
+
+out:
+	return err;
+}
+
 static int
 mlx5e_add_nic_flow(struct mlx5e_priv *priv,
 		   struct tc_cls_flower_offload *f,
@@ -2882,7 +3011,9 @@ int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
 int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
 		       struct tc_cls_flower_offload *f, int flags)
 {
+	struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
 	struct rhashtable *tc_ht = get_tc_ht(priv);
+	struct mlx5_eswitch *peer_esw;
 	struct mlx5e_tc_flow *flow;
 	struct mlx5_fc *counter;
 	u64 bytes;
@@ -2902,6 +3033,27 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
 
 	mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
 
+	peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+	if (!peer_esw)
+		goto out;
+
+	if ((flow->flags & MLX5E_TC_FLOW_DUP) &&
+	    (flow->peer_flow->flags & MLX5E_TC_FLOW_OFFLOADED)) {
+		u64 bytes2;
+		u64 packets2;
+		u64 lastuse2;
+
+		counter = mlx5e_tc_get_counter(flow->peer_flow);
+		mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);
+
+		bytes += bytes2;
+		packets += packets2;
+		lastuse = max_t(u64, lastuse, lastuse2);
+	}
+
+	mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+
+out:
 	tcf_exts_stats_update(f->exts, bytes, packets, lastuse);
 
 	return 0;
@@ -3014,3 +3166,11 @@ int mlx5e_tc_num_filters(struct mlx5e_priv *priv)
 
 	return atomic_read(&tc_ht->nelems);
 }
+
+void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
+{
+	struct mlx5e_tc_flow *flow, *tmp;
+
+	list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
+		__mlx5e_tc_del_fdb_peer_flow(flow);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 9dba6ad5744d..4d048f7e703b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -167,6 +167,8 @@ struct mlx5_esw_offload {
 	struct mlx5_flow_table *ft_offloads;
 	struct mlx5_flow_group *vport_rx_group;
 	struct mlx5_eswitch_rep *vport_reps;
+	struct list_head peer_flows;
+	struct mutex peer_mutex;
 	DECLARE_HASHTABLE(encap_tbl, 8);
 	DECLARE_HASHTABLE(mod_hdr_tbl, 8);
 	u8 inline_mode;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index a6927ca3d4ca..76cb57202474 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -563,7 +563,7 @@ static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev,
 	dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
 	dest->vport.num = 0;
 	dest->vport.vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id);
-	dest->vport.vhca_id_valid = 1;
+	dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
 }
 
 static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
@@ -1313,8 +1313,11 @@ static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
 	return 0;
 }
 
+void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw);
+
 static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
 {
+	mlx5e_tc_clean_fdb_peer_flows(esw);
 	esw_del_fdb_peer_miss_rules(esw);
 }
 
@@ -1365,6 +1368,9 @@ static void esw_offloads_devcom_init(struct mlx5_eswitch *esw)
 {
 	struct mlx5_devcom *devcom = esw->dev->priv.devcom;
 
+	INIT_LIST_HEAD(&esw->offloads.peer_flows);
+	mutex_init(&esw->offloads.peer_mutex);
+
 	if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
 		return;
 
-- 
2.19.2

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ