lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1408637945-10390-11-git-send-email-jiri@resnulli.us>
Date:	Thu, 21 Aug 2014 18:19:03 +0200
From:	Jiri Pirko <jiri@...nulli.us>
To:	netdev@...r.kernel.org
Cc:	davem@...emloft.net, nhorman@...driver.com, andy@...yhouse.net,
	tgraf@...g.ch, dborkman@...hat.com, ogerlitz@...lanox.com,
	jesse@...ira.com, pshelar@...ira.com, azhou@...ira.com,
	ben@...adent.org.uk, stephen@...workplumber.org,
	jeffrey.t.kirsher@...el.com, vyasevic@...hat.com,
	xiyou.wangcong@...il.com, john.r.fastabend@...el.com,
	edumazet@...gle.com, jhs@...atatu.com, sfeldma@...ulusnetworks.com,
	f.fainelli@...il.com, roopa@...ulusnetworks.com,
	linville@...driver.com, dev@...nvswitch.org, jasowang@...hat.com,
	ebiederm@...ssion.com, nicolas.dichtel@...nd.com,
	ryazanov.s.a@...il.com, buytenh@...tstofly.org,
	aviadr@...lanox.com, nbd@...nwrt.org, alexei.starovoitov@...il.com,
	Neil.Jerram@...aswitch.com, ronye@...lanox.com
Subject: [patch net-next RFC 10/12] openvswitch: add support for datapath hardware offload

Benefit from the possibility to work with flows in switch devices and
use the swdev api to offload flow datapath.

Signed-off-by: Jiri Pirko <jiri@...nulli.us>
---
 include/linux/sw_flow.h        |  14 +++
 net/openvswitch/Makefile       |   3 +-
 net/openvswitch/datapath.c     |  33 ++++++
 net/openvswitch/datapath.h     |   3 +
 net/openvswitch/flow_table.c   |   1 +
 net/openvswitch/hw_offload.c   | 235 +++++++++++++++++++++++++++++++++++++++++
 net/openvswitch/hw_offload.h   |  22 ++++
 net/openvswitch/vport-netdev.c |   3 +
 net/openvswitch/vport.h        |   2 +
 9 files changed, 315 insertions(+), 1 deletion(-)
 create mode 100644 net/openvswitch/hw_offload.c
 create mode 100644 net/openvswitch/hw_offload.h

diff --git a/include/linux/sw_flow.h b/include/linux/sw_flow.h
index b622fde..079d065 100644
--- a/include/linux/sw_flow.h
+++ b/include/linux/sw_flow.h
@@ -80,7 +80,21 @@ struct sw_flow_mask {
 	struct sw_flow_key key;
 };
 
+enum sw_flow_action_type {
+	SW_FLOW_ACTION_TYPE_OUTPUT,
+	SW_FLOW_ACTION_TYPE_VLAN_PUSH,
+	SW_FLOW_ACTION_TYPE_VLAN_POP,
+};
+
 struct sw_flow_action {
+	enum sw_flow_action_type type;
+	union {
+		struct net_device *output_dev;
+		struct {
+			__be16 vlan_proto;
+			u16 vlan_tci;
+		} vlan;
+	};
 };
 
 struct sw_flow_actions {
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 3591cb5..5152437 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -13,7 +13,8 @@ openvswitch-y := \
 	flow_table.o \
 	vport.o \
 	vport-internal_dev.o \
-	vport-netdev.o
+	vport-netdev.o \
+	hw_offload.o
 
 ifneq ($(CONFIG_OPENVSWITCH_VXLAN),)
 openvswitch-y += vport-vxlan.o
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 75bb07f..3e43e1d 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -57,6 +57,7 @@
 #include "flow_netlink.h"
 #include "vport-internal_dev.h"
 #include "vport-netdev.h"
+#include "hw_offload.h"
 
 int ovs_net_id __read_mostly;
 
@@ -864,6 +865,9 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 			acts = NULL;
 			goto err_unlock_ovs;
 		}
+		error = ovs_hw_flow_insert(dp, new_flow);
+		if (error)
+			pr_warn("failed to insert flow into hw\n");
 
 		if (unlikely(reply)) {
 			error = ovs_flow_cmd_fill_info(new_flow,
@@ -896,10 +900,18 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 				goto err_unlock_ovs;
 			}
 		}
+		error = ovs_hw_flow_remove(dp, flow);
+		if (error)
+			pr_warn("failed to remove flow from hw\n");
+
 		/* Update actions. */
 		old_acts = ovsl_dereference(flow->sf_acts);
 		rcu_assign_pointer(flow->sf_acts, acts);
 
+		error = ovs_hw_flow_insert(dp, flow);
+		if (error)
+			pr_warn("failed to insert flow into hw\n");
+
 		if (unlikely(reply)) {
 			error = ovs_flow_cmd_fill_info(flow,
 						       ovs_header->dp_ifindex,
@@ -993,9 +1005,17 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 
 	/* Update actions, if present. */
 	if (likely(acts)) {
+		error = ovs_hw_flow_remove(dp, flow);
+		if (error)
+			pr_warn("failed to remove flow from hw\n");
+
 		old_acts = ovsl_dereference(flow->sf_acts);
 		rcu_assign_pointer(flow->sf_acts, acts);
 
+		error = ovs_hw_flow_insert(dp, flow);
+		if (error)
+			pr_warn("failed to insert flow into hw\n");
+
 		if (unlikely(reply)) {
 			error = ovs_flow_cmd_fill_info(flow,
 						       ovs_header->dp_ifindex,
@@ -1109,6 +1129,9 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	if (unlikely(!a[OVS_FLOW_ATTR_KEY])) {
+		err = ovs_hw_flow_flush(dp);
+		if (err)
+			pr_warn("failed to flush flows from hw\n");
 		err = ovs_flow_tbl_flush(&dp->table);
 		goto unlock;
 	}
@@ -1120,6 +1143,9 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	ovs_flow_tbl_remove(&dp->table, flow);
+	err = ovs_hw_flow_remove(dp, flow);
+	if (err)
+		pr_warn("failed to remove flow from hw\n");
 	ovs_unlock();
 
 	reply = ovs_flow_cmd_alloc_info((const struct ovs_flow_actions __force *) flow->sf_acts,
@@ -1368,6 +1394,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
 		INIT_HLIST_HEAD(&dp->ports[i]);
 
+	INIT_LIST_HEAD(&dp->swdev_rep_list);
+
 	/* Set up our datapath device. */
 	parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
 	parms.type = OVS_VPORT_TYPE_INTERNAL;
@@ -1431,6 +1459,7 @@ err:
 static void __dp_destroy(struct datapath *dp)
 {
 	int i;
+	int err;
 
 	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
 		struct vport *vport;
@@ -1448,6 +1477,10 @@ static void __dp_destroy(struct datapath *dp)
 	 */
 	ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
 
+	err = ovs_hw_flow_flush(dp);
+	if (err)
+		pr_warn("failed to flush flows from hw\n");
+
 	/* RCU destroy the flow table */
 	ovs_flow_tbl_destroy(&dp->table, true);
 
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 291f5a0..9dc11a6 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -90,6 +90,9 @@ struct datapath {
 #endif
 
 	u32 user_features;
+
+	/* List of switchdev representative ports */
+	struct list_head swdev_rep_list;
 };
 
 /**
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index e7d9a41..c01e4cb 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -85,6 +85,7 @@ struct ovs_flow *ovs_flow_alloc(void)
 
 	flow->sf_acts = NULL;
 	flow->flow.mask = NULL;
+	flow->flow.actions = NULL;
 	flow->stats_last_writer = NUMA_NO_NODE;
 
 	/* Initialize the default stat node. */
diff --git a/net/openvswitch/hw_offload.c b/net/openvswitch/hw_offload.c
new file mode 100644
index 0000000..edb8a68
--- /dev/null
+++ b/net/openvswitch/hw_offload.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2014 Jiri Pirko <jiri@...nulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/sw_flow.h>
+#include <linux/switchdev.h>
+
+#include "datapath.h"
+#include "vport-netdev.h"
+
+static int sw_flow_action_create(struct datapath *dp,
+				 struct sw_flow_actions **p_actions,
+				 struct ovs_flow_actions *acts)
+{
+	const struct nlattr *attr = acts->actions;
+	int len = acts->actions_len;
+	const struct nlattr *a;
+	int rem;
+	struct sw_flow_actions *actions;
+	struct sw_flow_action *cur;
+	size_t count = 0;
+	int err;
+
+	for (a = attr, rem = len; rem > 0; a = nla_next(a, &rem))
+		count++;
+
+	actions = kzalloc(sizeof(struct sw_flow_actions) +
+			  sizeof(struct sw_flow_action) * count,
+			  GFP_KERNEL);
+	if (!actions)
+		return -ENOMEM;
+	actions->count = count;
+
+	cur = actions->actions;
+	for (a = attr, rem = len; rem > 0; a = nla_next(a, &rem)) {
+		switch (nla_type(a)) {
+		case OVS_ACTION_ATTR_OUTPUT:
+			{
+				struct vport *vport;
+
+				vport = ovs_vport_ovsl_rcu(dp, nla_get_u32(a));
+				cur->type = SW_FLOW_ACTION_TYPE_OUTPUT;
+				cur->output_dev = vport->ops->get_netdev(vport);
+			}
+			break;
+
+		case OVS_ACTION_ATTR_PUSH_VLAN:
+			{
+				const struct ovs_action_push_vlan *vlan;
+
+				vlan = nla_data(a);
+				cur->type = SW_FLOW_ACTION_TYPE_VLAN_PUSH;
+				cur->vlan.vlan_proto = vlan->vlan_tpid;
+				cur->vlan.vlan_tci = vlan->vlan_tci;
+			}
+			break;
+
+		case OVS_ACTION_ATTR_POP_VLAN:
+			cur->type = SW_FLOW_ACTION_TYPE_VLAN_POP;
+			break;
+
+		default:
+			err = -EOPNOTSUPP;
+			goto errout;
+		}
+		cur++;
+	}
+	*p_actions = actions;
+	return 0;
+
+errout:
+	kfree(actions);
+	return err;
+}
+
+int ovs_hw_flow_insert(struct datapath *dp, struct ovs_flow *flow)
+{
+	struct sw_flow_actions *actions;
+	struct vport *vport;
+	struct net_device *dev;
+	int err;
+
+	ASSERT_OVSL();
+	BUG_ON(flow->flow.actions);
+
+	err = sw_flow_action_create(dp, &actions, flow->sf_acts);
+	if (err)
+		return err;
+	flow->flow.actions = actions;
+
+	list_for_each_entry(vport, &dp->swdev_rep_list, swdev_rep_list) {
+		dev = vport->ops->get_netdev(vport);
+		BUG_ON(!dev);
+		err = swdev_flow_insert(dev, &flow->flow);
+		if (err == -ENODEV) /* out device is not in this switch */
+			continue;
+		if (err)
+			break;
+	}
+
+	if (err) {
+		kfree(actions);
+		flow->flow.actions = NULL;
+	}
+	return err;
+}
+
+int ovs_hw_flow_remove(struct datapath *dp, struct ovs_flow *flow)
+{
+	struct vport *vport;
+	struct net_device *dev;
+	int err = 0;
+
+	ASSERT_OVSL();
+	list_for_each_entry(vport, &dp->swdev_rep_list, swdev_rep_list) {
+		dev = vport->ops->get_netdev(vport);
+		BUG_ON(!dev);
+		err = swdev_flow_remove(dev, &flow->flow);
+		if (err == -ENODEV) /* out device is not in this switch */
+			continue;
+		if (err)
+			break;
+	}
+	kfree(flow->flow.actions);
+	flow->flow.actions = NULL;
+	return err;
+}
+
+int ovs_hw_flow_flush(struct datapath *dp)
+{
+	struct table_instance *ti;
+	int i;
+	int ver;
+	int err;
+
+	ti = ovsl_dereference(dp->table.ti);
+	ver = ti->node_ver;
+
+	for (i = 0; i < ti->n_buckets; i++) {
+		struct ovs_flow *flow;
+		struct hlist_head *head = flex_array_get(ti->buckets, i);
+
+		hlist_for_each_entry(flow, head, hash_node[ver]) {
+			err = ovs_hw_flow_remove(dp, flow);
+			if (err)
+				return err;
+		}
+	}
+	return 0;
+}
+
+static bool __is_vport_in_swdev_rep_list(struct datapath *dp,
+					 struct vport *vport)
+{
+	struct vport *cur_vport;
+
+	list_for_each_entry(cur_vport, &dp->swdev_rep_list, swdev_rep_list) {
+		if (cur_vport == vport)
+			return true;
+	}
+	return false;
+}
+
+static struct vport *__find_vport_by_swdev_id(struct datapath *dp,
+					      struct vport *vport)
+{
+	struct net_device *dev;
+	struct vport *cur_vport;
+	struct netdev_phys_item_id id;
+	struct netdev_phys_item_id cur_id;
+	int i;
+	int err;
+
+	err = swdev_get_id(vport->ops->get_netdev(vport), &id);
+	if (err)
+		return ERR_PTR(err);
+
+	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
+		hlist_for_each_entry(cur_vport, &dp->ports[i], dp_hash_node) {
+			if (cur_vport->ops->type != OVS_VPORT_TYPE_NETDEV)
+				continue;
+			if (cur_vport == vport)
+				continue;
+			dev = cur_vport->ops->get_netdev(cur_vport);
+			if (!dev)
+				continue;
+			err = swdev_get_id(dev, &cur_id);
+			if (err)
+				continue;
+			if (netdev_phys_item_ids_match(&id, &cur_id))
+				return cur_vport;
+		}
+	}
+	return ERR_PTR(-ENOENT);
+}
+
+void ovs_hw_port_add(struct datapath *dp, struct vport *vport)
+{
+	struct vport *found_vport;
+
+	ASSERT_OVSL();
+	/* The representative list contains always one port per switch dev id */
+	found_vport = __find_vport_by_swdev_id(dp, vport);
+	if (IS_ERR(found_vport) && PTR_ERR(found_vport) == -ENOENT) {
+		list_add(&vport->swdev_rep_list, &dp->swdev_rep_list);
+		pr_debug("%s added to rep_list\n", vport->ops->get_name(vport));
+	}
+}
+
+void ovs_hw_port_del(struct datapath *dp, struct vport *vport)
+{
+	struct vport *found_vport;
+
+	ASSERT_OVSL();
+	if (!__is_vport_in_swdev_rep_list(dp, vport))
+		return;
+
+	list_del(&vport->swdev_rep_list);
+	pr_debug("%s deleted from rep_list\n", vport->ops->get_name(vport));
+	found_vport = __find_vport_by_swdev_id(dp, vport);
+	if (!IS_ERR(found_vport)) {
+		list_add(&found_vport->swdev_rep_list, &dp->swdev_rep_list);
+		pr_debug("%s added to rep_list instead\n",
+			 found_vport->ops->get_name(found_vport));
+	}
+}
diff --git a/net/openvswitch/hw_offload.h b/net/openvswitch/hw_offload.h
new file mode 100644
index 0000000..83972d7
--- /dev/null
+++ b/net/openvswitch/hw_offload.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2014 Jiri Pirko <jiri@...nulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef HW_OFFLOAD_H
+#define HW_OFFLOAD_H 1
+
+#include "datapath.h"
+#include "flow.h"
+
+int ovs_hw_flow_insert(struct datapath *dp, struct ovs_flow *flow);
+int ovs_hw_flow_remove(struct datapath *dp, struct ovs_flow *flow);
+int ovs_hw_flow_flush(struct datapath *dp);
+void ovs_hw_port_add(struct datapath *dp, struct vport *vport);
+void ovs_hw_port_del(struct datapath *dp, struct vport *vport);
+
+#endif
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index aaf3d14..c5953de 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -32,6 +32,7 @@
 #include "datapath.h"
 #include "vport-internal_dev.h"
 #include "vport-netdev.h"
+#include "hw_offload.h"
 
 struct netdev_vport {
 	struct rcu_head rcu;
@@ -136,6 +137,7 @@ static struct vport *netdev_create(const struct vport_parms *parms)
 	dev_set_promiscuity(netdev_vport->dev, 1);
 	netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH;
 	rtnl_unlock();
+	ovs_hw_port_add(vport->dp, vport);
 
 	return vport;
 
@@ -176,6 +178,7 @@ static void netdev_destroy(struct vport *vport)
 {
 	struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
 
+	ovs_hw_port_del(vport->dp, vport);
 	rtnl_lock();
 	if (netdev_vport->dev->priv_flags & IFF_OVS_DATAPATH)
 		ovs_netdev_detach_dev(vport);
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index f434271..c28604a 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -110,6 +110,8 @@ struct vport {
 
 	spinlock_t stats_lock;
 	struct vport_err_stats err_stats;
+
+	struct list_head swdev_rep_list;
 };
 
 /**
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ