[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20180109064434.12726-5-saeedm@mellanox.com>
Date: Tue, 9 Jan 2018 08:44:28 +0200
From: Saeed Mahameed <saeedm@...lanox.com>
To: "David S. Miller" <davem@...emloft.net>
Cc: netdev@...r.kernel.org, Or Gerlitz <ogerlitz@...lanox.com>,
Saeed Mahameed <saeedm@...lanox.com>
Subject: [net-next 04/10] net/mlx5e: Support offloading TC NIC hairpin flows
From: Or Gerlitz <ogerlitz@...lanox.com>
We refer to TC NIC rule that involves forwarding as "hairpin".
All hairpin rules from the current NIC device (called "func" in
the code) to a given NIC device ("peer") are steered into the
same hairpin RQ/SQ pair.
The hairpin pair is set on demand and removed when there are no
TC rules that need it.
Here's a TC rule that matches on icmp, does header re-write of the
dst mac and hairpin from RX/enp1s2f1 to TX/enp1s2f2 (enp1s2f1/2 are
two mlx5 devices):
tc filter add dev enp1s2f1 protocol ip parent ffff: prio 2
flower skip_sw ip_proto icmp
action pedit ex munge eth dst set 10:22:33:44:55:66 pipe
action mirred egress redirect dev enp1s2f2
Signed-off-by: Or Gerlitz <ogerlitz@...lanox.com>
Signed-off-by: Saeed Mahameed <saeedm@...lanox.com>
---
drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 +
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 183 ++++++++++++++++++++++--
2 files changed, 172 insertions(+), 12 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 5299310f2481..72bab8d3f4b0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -659,6 +659,7 @@ struct mlx5e_tc_table {
struct rhashtable ht;
DECLARE_HASHTABLE(mod_hdr_tbl, 8);
+ DECLARE_HASHTABLE(hairpin_tbl, 8);
};
struct mlx5e_vlan_table {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 55a527bda2e5..cf528da51243 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -56,12 +56,14 @@ struct mlx5_nic_flow_attr {
u32 action;
u32 flow_tag;
u32 mod_hdr_id;
+ u32 hairpin_tirn;
};
enum {
MLX5E_TC_FLOW_ESWITCH = BIT(0),
MLX5E_TC_FLOW_NIC = BIT(1),
MLX5E_TC_FLOW_OFFLOADED = BIT(2),
+ MLX5E_TC_FLOW_HAIRPIN = BIT(3),
};
struct mlx5e_tc_flow {
@@ -71,6 +73,7 @@ struct mlx5e_tc_flow {
struct mlx5_flow_handle *rule;
struct list_head encap; /* flows sharing the same encap ID */
struct list_head mod_hdr; /* flows sharing the same mod hdr ID */
+ struct list_head hairpin; /* flows sharing the same hairpin */
union {
struct mlx5_esw_flow_attr esw_attr[0];
struct mlx5_nic_flow_attr nic_attr[0];
@@ -101,6 +104,17 @@ struct mlx5e_hairpin {
u32 tirn;
};
+struct mlx5e_hairpin_entry {
+ /* a node of a hash table which keeps all the hairpin entries */
+ struct hlist_node hairpin_hlist;
+
+ /* flows sharing the same hairpin */
+ struct list_head flows;
+
+ int peer_ifindex;
+ struct mlx5e_hairpin *hp;
+};
+
struct mod_hdr_key {
int num_actions;
void *actions;
@@ -319,6 +333,98 @@ static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp)
kvfree(hp);
}
+static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
+ int peer_ifindex)
+{
+ struct mlx5e_hairpin_entry *hpe;
+
+ hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe,
+ hairpin_hlist, peer_ifindex) {
+ if (hpe->peer_ifindex == peer_ifindex)
+ return hpe;
+ }
+
+ return NULL;
+}
+
+static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5e_tc_flow_parse_attr *parse_attr)
+{
+ int peer_ifindex = parse_attr->mirred_ifindex;
+ struct mlx5_hairpin_params params;
+ struct mlx5e_hairpin_entry *hpe;
+ struct mlx5e_hairpin *hp;
+ int err;
+
+ if (!MLX5_CAP_GEN(priv->mdev, hairpin)) {
+ netdev_warn(priv->netdev, "hairpin is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ hpe = mlx5e_hairpin_get(priv, peer_ifindex);
+ if (hpe)
+ goto attach_flow;
+
+ hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
+ if (!hpe)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&hpe->flows);
+ hpe->peer_ifindex = peer_ifindex;
+
+ params.log_data_size = 15;
+ params.log_data_size = min_t(u8, params.log_data_size,
+ MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
+ params.log_data_size = max_t(u8, params.log_data_size,
+ MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz));
+ params.q_counter = priv->q_counter;
+
+ hp = mlx5e_hairpin_create(priv, ¶ms, peer_ifindex);
+ if (IS_ERR(hp)) {
+ err = PTR_ERR(hp);
+ goto create_hairpin_err;
+ }
+
+ netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x log data size %d\n",
+ hp->tirn, hp->pair->rqn, hp->pair->peer_mdev->priv.name,
+ hp->pair->sqn, params.log_data_size);
+
+ hpe->hp = hp;
+ hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist, peer_ifindex);
+
+attach_flow:
+ flow->nic_attr->hairpin_tirn = hpe->hp->tirn;
+ list_add(&flow->hairpin, &hpe->flows);
+ return 0;
+
+create_hairpin_err:
+ kfree(hpe);
+ return err;
+}
+
+static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow)
+{
+ struct list_head *next = flow->hairpin.next;
+
+ list_del(&flow->hairpin);
+
+ /* no more hairpin flows for us, release the hairpin pair */
+ if (list_empty(next)) {
+ struct mlx5e_hairpin_entry *hpe;
+
+ hpe = list_entry(next, struct mlx5e_hairpin_entry, flows);
+
+ netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
+ hpe->hp->pair->peer_mdev->priv.name);
+
+ mlx5e_hairpin_destroy(hpe->hp);
+ hash_del(&hpe->hairpin_hlist);
+ kfree(hpe);
+ }
+}
+
static struct mlx5_flow_handle *
mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow_parse_attr *parse_attr,
@@ -326,7 +432,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
{
struct mlx5_nic_flow_attr *attr = flow->nic_attr;
struct mlx5_core_dev *dev = priv->mdev;
- struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_destination dest[2] = {};
struct mlx5_flow_act flow_act = {
.action = attr->action,
.flow_tag = attr->flow_tag,
@@ -335,18 +441,33 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
struct mlx5_fc *counter = NULL;
struct mlx5_flow_handle *rule;
bool table_created = false;
- int err;
+ int err, dest_ix = 0;
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
- dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest.ft = priv->fs.vlan.ft.t;
- } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
- counter = mlx5_fc_create(dev, true);
- if (IS_ERR(counter))
- return ERR_CAST(counter);
+ if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) {
+ err = mlx5e_hairpin_flow_add(priv, flow, parse_attr);
+ if (err) {
+ rule = ERR_PTR(err);
+ goto err_add_hairpin_flow;
+ }
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dest[dest_ix].tir_num = attr->hairpin_tirn;
+ } else {
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[dest_ix].ft = priv->fs.vlan.ft.t;
+ }
+ dest_ix++;
+ }
- dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- dest.counter = counter;
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ counter = mlx5_fc_create(dev, true);
+ if (IS_ERR(counter)) {
+ rule = ERR_CAST(counter);
+ goto err_fc_create;
+ }
+ dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest[dest_ix].counter = counter;
+ dest_ix++;
}
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
@@ -389,7 +510,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
rule = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
- &flow_act, &dest, 1);
+ &flow_act, dest, dest_ix);
if (IS_ERR(rule))
goto err_add_rule;
@@ -406,7 +527,10 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
mlx5e_detach_mod_hdr(priv, flow);
err_create_mod_hdr_id:
mlx5_fc_destroy(dev, counter);
-
+err_fc_create:
+ if (flow->flags & MLX5E_TC_FLOW_HAIRPIN)
+ mlx5e_hairpin_flow_del(priv, flow);
+err_add_hairpin_flow:
return rule;
}
@@ -427,6 +551,9 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
mlx5e_detach_mod_hdr(priv, flow);
+
+ if (flow->flags & MLX5E_TC_FLOW_HAIRPIN)
+ mlx5e_hairpin_flow_del(priv, flow);
}
static void mlx5e_detach_encap(struct mlx5e_priv *priv,
@@ -1519,6 +1646,20 @@ static bool actions_match_supported(struct mlx5e_priv *priv,
return true;
}
+static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
+{
+ struct mlx5_core_dev *fmdev, *pmdev;
+ u16 func_id, peer_id;
+
+ fmdev = priv->mdev;
+ pmdev = peer_priv->mdev;
+
+ func_id = (u16)((fmdev->pdev->bus->number << 8) | PCI_SLOT(fmdev->pdev->devfn));
+ peer_id = (u16)((pmdev->pdev->bus->number << 8) | PCI_SLOT(pmdev->pdev->devfn));
+
+ return (func_id == peer_id);
+}
+
static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
struct mlx5e_tc_flow_parse_attr *parse_attr,
struct mlx5e_tc_flow *flow)
@@ -1563,6 +1704,23 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
return -EOPNOTSUPP;
}
+ if (is_tcf_mirred_egress_redirect(a)) {
+ struct net_device *peer_dev = tcf_mirred_dev(a);
+
+ if (priv->netdev->netdev_ops == peer_dev->netdev_ops &&
+ same_hw_devs(priv, netdev_priv(peer_dev))) {
+ parse_attr->mirred_ifindex = peer_dev->ifindex;
+ flow->flags |= MLX5E_TC_FLOW_HAIRPIN;
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ } else {
+ netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n",
+ peer_dev->name);
+ return -EINVAL;
+ }
+ continue;
+ }
+
if (is_tcf_skbedit_mark(a)) {
u32 mark = tcf_skbedit_mark(a);
@@ -2285,6 +2443,7 @@ int mlx5e_tc_init(struct mlx5e_priv *priv)
struct mlx5e_tc_table *tc = &priv->fs.tc;
hash_init(tc->mod_hdr_tbl);
+ hash_init(tc->hairpin_tbl);
tc->ht_params = mlx5e_tc_flow_ht_params;
return rhashtable_init(&tc->ht, &tc->ht_params);
--
2.13.0
Powered by blists - more mailing lists