[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20181214215200.31222-4-saeedm@mellanox.com>
Date: Fri, 14 Dec 2018 13:51:50 -0800
From: Saeed Mahameed <saeedm@...lanox.com>
To: "David S. Miller" <davem@...emloft.net>
Cc: netdev@...r.kernel.org, Roi Dayan <roid@...lanox.com>,
Aviv Heller <avivh@...lanox.com>,
Shahar Klein <shahark@...lanox.com>,
Saeed Mahameed <saeedm@...lanox.com>
Subject: [net-next 03/13] net/mlx5e: Infrastructure for duplicated offloading of TC flows
From: Roi Dayan <roid@...lanox.com>
Under uplink LAG or multipath schemes, traffic that matches one flow
might arrive on both uplink ports and transmitted through both
as part of supporting aggregation and high-availability.
To cope with the fact that the SW model might use logical SW port
(e.g uplink team or bond) but we have two HW ports with e-switch on
each, there are cases where in order to offload a SW TC rule we
need to duplicate it to two HW flows.
Since each HW rule has its own counter we also aggregate the counter
of both rules when a flow stats query is executed from user-space.
Introduce the changes for the different elements (add/delete/stats),
currently nothing is duplicated.
Signed-off-by: Roi Dayan <roid@...lanox.com>
Signed-off-by: Aviv Heller <avivh@...lanox.com>
Signed-off-by: Shahar Klein <shahark@...lanox.com>
Signed-off-by: Saeed Mahameed <saeedm@...lanox.com>
---
.../net/ethernet/mellanox/mlx5/core/en_tc.c | 174 +++++++++++++++++-
.../net/ethernet/mellanox/mlx5/core/eswitch.h | 2 +
.../mellanox/mlx5/core/eswitch_offloads.c | 8 +-
3 files changed, 176 insertions(+), 8 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 0921213561cb..eacccac05dda 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -52,6 +52,7 @@
#include "fs_core.h"
#include "en/port.h"
#include "en/tc_tun.h"
+#include "lib/devcom.h"
struct mlx5_nic_flow_attr {
u32 action;
@@ -74,6 +75,7 @@ enum {
MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 3),
MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 4),
MLX5E_TC_FLOW_SLOW = BIT(MLX5E_TC_FLOW_BASE + 5),
+ MLX5E_TC_FLOW_DUP = BIT(MLX5E_TC_FLOW_BASE + 6),
};
#define MLX5E_TC_MAX_SPLITS 1
@@ -111,8 +113,10 @@ struct mlx5e_tc_flow {
* destinations.
*/
struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct mlx5e_tc_flow *peer_flow;
struct list_head mod_hdr; /* flows sharing the same mod hdr ID */
struct list_head hairpin; /* flows sharing the same hairpin */
+ struct list_head peer; /* flows with peer flow */
union {
struct mlx5_esw_flow_attr esw_attr[0];
struct mlx5_nic_flow_attr nic_attr[0];
@@ -1249,13 +1253,48 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv,
}
}
+static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
+
+ if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) ||
+ !(flow->flags & MLX5E_TC_FLOW_DUP))
+ return;
+
+ mutex_lock(&esw->offloads.peer_mutex);
+ list_del(&flow->peer);
+ mutex_unlock(&esw->offloads.peer_mutex);
+
+ flow->flags &= ~MLX5E_TC_FLOW_DUP;
+
+ mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
+ kvfree(flow->peer_flow);
+ flow->peer_flow = NULL;
+}
+
+static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
+{
+ struct mlx5_core_dev *dev = flow->priv->mdev;
+ struct mlx5_devcom *devcom = dev->priv.devcom;
+ struct mlx5_eswitch *peer_esw;
+
+ peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ if (!peer_esw)
+ return;
+
+ __mlx5e_tc_del_fdb_peer_flow(flow);
+ mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+}
+
static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow)
{
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
+ if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
+ mlx5e_tc_del_fdb_peer_flow(flow);
mlx5e_tc_del_fdb_flow(priv, flow);
- else
+ } else {
mlx5e_tc_del_nic_flow(priv, flow);
+ }
}
@@ -2660,6 +2699,11 @@ static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv)
return &priv->fs.tc.ht;
}
+static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
+{
+ return false;
+}
+
static int
mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
struct tc_cls_flower_offload *f, u16 flow_flags,
@@ -2693,11 +2737,13 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
}
static int
-mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
- struct tc_cls_flower_offload *f,
- u16 flow_flags,
- struct net_device *filter_dev,
- struct mlx5e_tc_flow **__flow)
+__mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f,
+ u16 flow_flags,
+ struct net_device *filter_dev,
+ struct mlx5_eswitch_rep *in_rep,
+ struct mlx5_core_dev *in_mdev,
+ struct mlx5e_tc_flow **__flow)
{
struct netlink_ext_ack *extack = f->common.extack;
struct mlx5e_tc_flow_parse_attr *parse_attr;
@@ -2723,6 +2769,8 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
if (err)
goto err_free;
+ flow->esw_attr->in_rep = in_rep;
+ flow->esw_attr->in_mdev = in_mdev;
err = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow, extack);
if (err)
goto err_free;
@@ -2738,6 +2786,87 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
return err;
}
+static int mlx5e_tc_add_fdb_peer_flow(struct tc_cls_flower_offload *f,
+ struct mlx5e_tc_flow *flow)
+{
+ struct mlx5e_priv *priv = flow->priv, *peer_priv;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
+ struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5e_rep_priv *peer_urpriv;
+ struct mlx5e_tc_flow *peer_flow;
+ struct mlx5_core_dev *in_mdev;
+ int err = 0;
+
+ peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ if (!peer_esw)
+ return -ENODEV;
+
+ peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
+ peer_priv = netdev_priv(peer_urpriv->netdev);
+
+ /* in_mdev is assigned of which the packet originated from.
+ * So packets redirected to uplink use the same mdev of the
+ * original flow and packets redirected from uplink use the
+ * peer mdev.
+ */
+ if (flow->esw_attr->in_rep->vport == FDB_UPLINK_VPORT)
+ in_mdev = peer_priv->mdev;
+ else
+ in_mdev = priv->mdev;
+
+ parse_attr = flow->esw_attr->parse_attr;
+ err = __mlx5e_add_fdb_flow(peer_priv, f, flow->flags,
+ parse_attr->filter_dev,
+ flow->esw_attr->in_rep, in_mdev, &peer_flow);
+ if (err)
+ goto out;
+
+ flow->peer_flow = peer_flow;
+ flow->flags |= MLX5E_TC_FLOW_DUP;
+ mutex_lock(&esw->offloads.peer_mutex);
+ list_add_tail(&flow->peer, &esw->offloads.peer_flows);
+ mutex_unlock(&esw->offloads.peer_mutex);
+
+out:
+ mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ return err;
+}
+
+static int
+mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f,
+ u16 flow_flags,
+ struct net_device *filter_dev,
+ struct mlx5e_tc_flow **__flow)
+{
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *in_rep = rpriv->rep;
+ struct mlx5_core_dev *in_mdev = priv->mdev;
+ struct mlx5e_tc_flow *flow;
+ int err;
+
+ err = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
+ in_mdev, &flow);
+ if (err)
+ goto out;
+
+ if (is_peer_flow_needed(flow)) {
+ err = mlx5e_tc_add_fdb_peer_flow(f, flow);
+ if (err) {
+ mlx5e_tc_del_fdb_flow(priv, flow);
+ goto out;
+ }
+ }
+
+ *__flow = flow;
+
+ return 0;
+
+out:
+ return err;
+}
+
static int
mlx5e_add_nic_flow(struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f,
@@ -2882,7 +3011,9 @@ int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f, int flags)
{
+ struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
struct rhashtable *tc_ht = get_tc_ht(priv);
+ struct mlx5_eswitch *peer_esw;
struct mlx5e_tc_flow *flow;
struct mlx5_fc *counter;
u64 bytes;
@@ -2902,6 +3033,27 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
+ peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+ if (!peer_esw)
+ goto out;
+
+ if ((flow->flags & MLX5E_TC_FLOW_DUP) &&
+ (flow->peer_flow->flags & MLX5E_TC_FLOW_OFFLOADED)) {
+ u64 bytes2;
+ u64 packets2;
+ u64 lastuse2;
+
+ counter = mlx5e_tc_get_counter(flow->peer_flow);
+ mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);
+
+ bytes += bytes2;
+ packets += packets2;
+ lastuse = max_t(u64, lastuse, lastuse2);
+ }
+
+ mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+
+out:
tcf_exts_stats_update(f->exts, bytes, packets, lastuse);
return 0;
@@ -3014,3 +3166,11 @@ int mlx5e_tc_num_filters(struct mlx5e_priv *priv)
return atomic_read(&tc_ht->nelems);
}
+
+void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
+{
+ struct mlx5e_tc_flow *flow, *tmp;
+
+ list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
+ __mlx5e_tc_del_fdb_peer_flow(flow);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 9dba6ad5744d..4d048f7e703b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -167,6 +167,8 @@ struct mlx5_esw_offload {
struct mlx5_flow_table *ft_offloads;
struct mlx5_flow_group *vport_rx_group;
struct mlx5_eswitch_rep *vport_reps;
+ struct list_head peer_flows;
+ struct mutex peer_mutex;
DECLARE_HASHTABLE(encap_tbl, 8);
DECLARE_HASHTABLE(mod_hdr_tbl, 8);
u8 inline_mode;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index a6927ca3d4ca..76cb57202474 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -563,7 +563,7 @@ static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev,
dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
dest->vport.num = 0;
dest->vport.vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id);
- dest->vport.vhca_id_valid = 1;
+ dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
}
static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
@@ -1313,8 +1313,11 @@ static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
return 0;
}
+void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw);
+
static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
{
+ mlx5e_tc_clean_fdb_peer_flows(esw);
esw_del_fdb_peer_miss_rules(esw);
}
@@ -1365,6 +1368,9 @@ static void esw_offloads_devcom_init(struct mlx5_eswitch *esw)
{
struct mlx5_devcom *devcom = esw->dev->priv.devcom;
+ INIT_LIST_HEAD(&esw->offloads.peer_flows);
+ mutex_init(&esw->offloads.peer_mutex);
+
if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
return;
--
2.19.2
Powered by blists - more mailing lists