lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1380212024-3417-3-git-send-email-nikolay@redhat.com>
Date:	Thu, 26 Sep 2013 18:13:43 +0200
From:	Nikolay Aleksandrov <nikolay@...hat.com>
To:	netdev@...r.kernel.org
Cc:	davem@...emloft.net, andy@...yhouse.net, fubar@...ibm.com,
	eric.dumazet@...il.com, vfalico@...hat.com
Subject: [PATCH net-next v4 2/3] bonding: modify the old and add new xmit hash policies

This patch adds two new hash policy modes which use skb_flow_dissect:
3 - Encapsulated layer 2+3
4 - Encapsulated layer 3+4
There should be a good improvement for tunnel users in those modes.
It also changes the old hash functions to:
hash ^= (__force u32)flow.dst ^ (__force u32)flow.src;
hash ^= (hash >> 16);
hash ^= (hash >> 8);

Where hash will be initialized either to L2 hash, that is
SRCMAC[5] XOR DSTMAC[5], or to flow->ports which should be extracted
from the upper layer. Flow's dst and src are also extracted based on the
xmit policy either directly from the buffer or by using skb_flow_dissect,
but in both cases if the protocol is IPv6 then dst and src are obtained by
ipv6_addr_hash() on the real addresses. In case of a non-dissectable
packet, the algorithms fall back to L2 hashing.
The bond_set_mode_ops() function is now obsolete and thus deleted
because it was used only to set the proper hash policy. Also we trim a
pointer from struct bonding because we no longer need to keep the hash
function, now there's only a single hash function - bond_xmit_hash that
works based on bond->params.xmit_policy.

The hash function and skb_flow_dissect were suggested by Eric Dumazet.
The layer names were suggested by Andy Gospodarek, because I suck at
semantics.

Signed-off-by: Nikolay Aleksandrov <nikolay@...hat.com>
---
v2: fix a bug in bond_flow_dissect which might've caused the use of
    uninitalized flow_keys and make use of skb_flow_get_ports
v3, v4: no change
One line is intentionally left at 82 chars since it's the whole function
and IMO looks better that way.

 drivers/net/bonding/bond_3ad.c   |   2 +-
 drivers/net/bonding/bond_main.c  | 197 ++++++++++++++-------------------------
 drivers/net/bonding/bond_sysfs.c |   2 -
 drivers/net/bonding/bonding.h    |   3 +-
 include/uapi/linux/if_bonding.h  |   2 +
 5 files changed, 72 insertions(+), 134 deletions(-)

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 0d8f427..b3ab703 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -2442,7 +2442,7 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
 		goto out;
 	}
 
-	slave_agg_no = bond->xmit_hash_policy(skb, slaves_in_agg);
+	slave_agg_no = bond_xmit_hash(bond, skb, slaves_in_agg);
 
 	bond_for_each_slave(bond, slave) {
 		struct aggregator *agg = SLAVE_AD_INFO(slave).port.aggregator;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 55bbb8b..ef6d2f7 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -78,6 +78,7 @@
 #include <net/netns/generic.h>
 #include <net/pkt_sched.h>
 #include <linux/rculist.h>
+#include <net/flow_keys.h>
 #include "bonding.h"
 #include "bond_3ad.h"
 #include "bond_alb.h"
@@ -159,7 +160,8 @@ MODULE_PARM_DESC(min_links, "Minimum number of available links before turning on
 module_param(xmit_hash_policy, charp, 0);
 MODULE_PARM_DESC(xmit_hash_policy, "balance-xor and 802.3ad hashing method; "
 				   "0 for layer 2 (default), 1 for layer 3+4, "
-				   "2 for layer 2+3");
+				   "2 for layer 2+3, 3 for encap layer 2+3, "
+				   "4 for encap layer 3+4");
 module_param(arp_interval, int, 0);
 MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");
 module_param_array(arp_ip_target, charp, NULL, 0);
@@ -217,6 +219,8 @@ const struct bond_parm_tbl xmit_hashtype_tbl[] = {
 {	"layer2",		BOND_XMIT_POLICY_LAYER2},
 {	"layer3+4",		BOND_XMIT_POLICY_LAYER34},
 {	"layer2+3",		BOND_XMIT_POLICY_LAYER23},
+{	"encap2+3",		BOND_XMIT_POLICY_ENCAP23},
+{	"encap3+4",		BOND_XMIT_POLICY_ENCAP34},
 {	NULL,			-1},
 };
 
@@ -3026,99 +3030,85 @@ static struct notifier_block bond_netdev_notifier = {
 
 /*---------------------------- Hashing Policies -----------------------------*/
 
-/*
- * Hash for the output device based upon layer 2 data
- */
-static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count)
+/* L2 hash helper */
+static inline u32 bond_eth_hash(struct sk_buff *skb)
 {
 	struct ethhdr *data = (struct ethhdr *)skb->data;
 
 	if (skb_headlen(skb) >= offsetof(struct ethhdr, h_proto))
-		return (data->h_dest[5] ^ data->h_source[5]) % count;
+		return data->h_dest[5] ^ data->h_source[5];
 
 	return 0;
 }
 
-/*
- * Hash for the output device based upon layer 2 and layer 3 data. If
- * the packet is not IP, fall back on bond_xmit_hash_policy_l2()
- */
-static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count)
+/* Extract the appropriate headers based on bond's xmit policy */
+static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
+			      struct flow_keys *fk)
 {
-	const struct ethhdr *data;
+	const struct ipv6hdr *iph6;
 	const struct iphdr *iph;
-	const struct ipv6hdr *ipv6h;
-	u32 v6hash;
-	const __be32 *s, *d;
+	int noff, proto = -1;
 
-	if (skb->protocol == htons(ETH_P_IP) &&
-	    pskb_network_may_pull(skb, sizeof(*iph))) {
+	if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23)
+		return skb_flow_dissect(skb, fk);
+
+	fk->ports = 0;
+	noff = skb_network_offset(skb);
+	if (skb->protocol == htons(ETH_P_IP)) {
+		if (!pskb_may_pull(skb, noff + sizeof(*iph)))
+			return false;
 		iph = ip_hdr(skb);
-		data = (struct ethhdr *)skb->data;
-		return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^
-			(data->h_dest[5] ^ data->h_source[5])) % count;
-	} else if (skb->protocol == htons(ETH_P_IPV6) &&
-		   pskb_network_may_pull(skb, sizeof(*ipv6h))) {
-		ipv6h = ipv6_hdr(skb);
-		data = (struct ethhdr *)skb->data;
-		s = &ipv6h->saddr.s6_addr32[0];
-		d = &ipv6h->daddr.s6_addr32[0];
-		v6hash = (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]);
-		v6hash ^= (v6hash >> 24) ^ (v6hash >> 16) ^ (v6hash >> 8);
-		return (v6hash ^ data->h_dest[5] ^ data->h_source[5]) % count;
-	}
-
-	return bond_xmit_hash_policy_l2(skb, count);
+		fk->src = iph->saddr;
+		fk->dst = iph->daddr;
+		noff += iph->ihl << 2;
+		if (!ip_is_fragment(iph))
+			proto = iph->protocol;
+	} else if (skb->protocol == htons(ETH_P_IPV6)) {
+		if (!pskb_may_pull(skb, noff + sizeof(*iph6)))
+			return false;
+		iph6 = ipv6_hdr(skb);
+		fk->src = (__force __be32)ipv6_addr_hash(&iph6->saddr);
+		fk->dst = (__force __be32)ipv6_addr_hash(&iph6->daddr);
+		noff += sizeof(*iph6);
+		proto = iph6->nexthdr;
+	} else {
+		return false;
+	}
+	if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0)
+		fk->ports = skb_flow_get_ports(skb, noff, proto);
+
+	return true;
 }
 
-/*
- * Hash for the output device based upon layer 3 and layer 4 data. If
- * the packet is a frag or not TCP or UDP, just use layer 3 data.  If it is
- * altogether not IP, fall back on bond_xmit_hash_policy_l2()
+/**
+ * bond_xmit_hash - generate a hash value based on the xmit policy
+ * @bond: bonding device
+ * @skb: buffer to use for headers
+ * @count: modulo value
+ *
+ * This function will extract the necessary headers from the skb buffer and use
+ * them to generate a hash based on the xmit_policy set in the bonding device
+ * which will be reduced modulo count before returning.
  */
-static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count)
+int bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, int count)
 {
-	u32 layer4_xor = 0;
-	const struct iphdr *iph;
-	const struct ipv6hdr *ipv6h;
-	const __be32 *s, *d;
-	const __be16 *l4 = NULL;
-	__be16 _l4[2];
-	int noff = skb_network_offset(skb);
-	int poff;
-
-	if (skb->protocol == htons(ETH_P_IP) &&
-	    pskb_may_pull(skb, noff + sizeof(*iph))) {
-		iph = ip_hdr(skb);
-		poff = proto_ports_offset(iph->protocol);
+	struct flow_keys flow;
+	u32 hash;
 
-		if (!ip_is_fragment(iph) && poff >= 0) {
-			l4 = skb_header_pointer(skb, noff + (iph->ihl << 2) + poff,
-						sizeof(_l4), &_l4);
-			if (l4)
-				layer4_xor = ntohs(l4[0] ^ l4[1]);
-		}
-		return (layer4_xor ^
-			((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count;
-	} else if (skb->protocol == htons(ETH_P_IPV6) &&
-		   pskb_may_pull(skb, noff + sizeof(*ipv6h))) {
-		ipv6h = ipv6_hdr(skb);
-		poff = proto_ports_offset(ipv6h->nexthdr);
-		if (poff >= 0) {
-			l4 = skb_header_pointer(skb, noff + sizeof(*ipv6h) + poff,
-						sizeof(_l4), &_l4);
-			if (l4)
-				layer4_xor = ntohs(l4[0] ^ l4[1]);
-		}
-		s = &ipv6h->saddr.s6_addr32[0];
-		d = &ipv6h->daddr.s6_addr32[0];
-		layer4_xor ^= (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]);
-		layer4_xor ^= (layer4_xor >> 24) ^ (layer4_xor >> 16) ^
-			       (layer4_xor >> 8);
-		return layer4_xor % count;
-	}
+	if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
+	    !bond_flow_dissect(bond, skb, &flow))
+		return bond_eth_hash(skb) % count;
+
+	if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 ||
+	    bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23)
+		hash = bond_eth_hash(skb);
+	else
+		hash = (__force u32)flow.ports;
+	hash ^= (__force u32)flow.dst ^ (__force u32)flow.src;
+	hash ^= (hash >> 16);
+	hash ^= (hash >> 8);
 
-	return bond_xmit_hash_policy_l2(skb, count);
+	return hash % count;
 }
 
 /*-------------------------- Device entry points ----------------------------*/
@@ -3700,8 +3690,7 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
 	return NETDEV_TX_OK;
 }
 
-/*
- * In bond_xmit_xor() , we determine the output device by using a pre-
+/* In bond_xmit_xor() , we determine the output device by using a pre-
  * determined xmit_hash_policy(), If the selected device is not enabled,
  * find the next active slave.
  */
@@ -3709,8 +3698,7 @@ static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
 
-	bond_xmit_slave_id(bond, skb,
-			   bond->xmit_hash_policy(skb, bond->slave_cnt));
+	bond_xmit_slave_id(bond, skb, bond_xmit_hash(bond, skb, bond->slave_cnt));
 
 	return NETDEV_TX_OK;
 }
@@ -3746,22 +3734,6 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev)
 
 /*------------------------- Device initialization ---------------------------*/
 
-static void bond_set_xmit_hash_policy(struct bonding *bond)
-{
-	switch (bond->params.xmit_policy) {
-	case BOND_XMIT_POLICY_LAYER23:
-		bond->xmit_hash_policy = bond_xmit_hash_policy_l23;
-		break;
-	case BOND_XMIT_POLICY_LAYER34:
-		bond->xmit_hash_policy = bond_xmit_hash_policy_l34;
-		break;
-	case BOND_XMIT_POLICY_LAYER2:
-	default:
-		bond->xmit_hash_policy = bond_xmit_hash_policy_l2;
-		break;
-	}
-}
-
 /*
  * Lookup the slave that corresponds to a qid
  */
@@ -3871,38 +3843,6 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	return ret;
 }
 
-/*
- * set bond mode specific net device operations
- */
-void bond_set_mode_ops(struct bonding *bond, int mode)
-{
-	struct net_device *bond_dev = bond->dev;
-
-	switch (mode) {
-	case BOND_MODE_ROUNDROBIN:
-		break;
-	case BOND_MODE_ACTIVEBACKUP:
-		break;
-	case BOND_MODE_XOR:
-		bond_set_xmit_hash_policy(bond);
-		break;
-	case BOND_MODE_BROADCAST:
-		break;
-	case BOND_MODE_8023AD:
-		bond_set_xmit_hash_policy(bond);
-		break;
-	case BOND_MODE_ALB:
-		/* FALLTHRU */
-	case BOND_MODE_TLB:
-		break;
-	default:
-		/* Should never happen, mode already checked */
-		pr_err("%s: Error: Unknown bonding mode %d\n",
-		       bond_dev->name, mode);
-		break;
-	}
-}
-
 static int bond_ethtool_get_settings(struct net_device *bond_dev,
 				     struct ethtool_cmd *ecmd)
 {
@@ -4004,7 +3944,6 @@ static void bond_setup(struct net_device *bond_dev)
 	ether_setup(bond_dev);
 	bond_dev->netdev_ops = &bond_netdev_ops;
 	bond_dev->ethtool_ops = &bond_ethtool_ops;
-	bond_set_mode_ops(bond, bond->params.mode);
 
 	bond_dev->destructor = bond_destructor;
 
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index c29b836..dba3b9b 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -352,7 +352,6 @@ static ssize_t bonding_store_mode(struct device *d,
 	/* don't cache arp_validate between modes */
 	bond->params.arp_validate = BOND_ARP_VALIDATE_NONE;
 	bond->params.mode = new_value;
-	bond_set_mode_ops(bond, bond->params.mode);
 	pr_info("%s: setting mode to %s (%d).\n",
 		bond->dev->name, bond_mode_tbl[new_value].modename,
 		new_value);
@@ -392,7 +391,6 @@ static ssize_t bonding_store_xmit_hash(struct device *d,
 		ret = -EINVAL;
 	} else {
 		bond->params.xmit_policy = new_value;
-		bond_set_mode_ops(bond, bond->params.mode);
 		pr_info("%s: setting xmit hash policy to %s (%d).\n",
 			bond->dev->name,
 			xmit_hashtype_tbl[new_value].modename, new_value);
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 03cf3fd..4db9ec4 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -245,7 +245,6 @@ struct bonding {
 	char     proc_file_name[IFNAMSIZ];
 #endif /* CONFIG_PROC_FS */
 	struct   list_head bond_list;
-	int      (*xmit_hash_policy)(struct sk_buff *, int);
 	u16      rr_tx_counter;
 	struct   ad_bond_info ad_info;
 	struct   alb_bond_info alb_info;
@@ -446,7 +445,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev);
 void bond_mii_monitor(struct work_struct *);
 void bond_loadbalance_arp_mon(struct work_struct *);
 void bond_activebackup_arp_mon(struct work_struct *);
-void bond_set_mode_ops(struct bonding *bond, int mode);
+int bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, int count);
 int bond_parse_parm(const char *mode_arg, const struct bond_parm_tbl *tbl);
 void bond_select_active_slave(struct bonding *bond);
 void bond_change_active_slave(struct bonding *bond, struct slave *new_active);
diff --git a/include/uapi/linux/if_bonding.h b/include/uapi/linux/if_bonding.h
index a17edda..9635a62 100644
--- a/include/uapi/linux/if_bonding.h
+++ b/include/uapi/linux/if_bonding.h
@@ -91,6 +91,8 @@
 #define BOND_XMIT_POLICY_LAYER2		0 /* layer 2 (MAC only), default */
 #define BOND_XMIT_POLICY_LAYER34	1 /* layer 3+4 (IP ^ (TCP || UDP)) */
 #define BOND_XMIT_POLICY_LAYER23	2 /* layer 2+3 (IP ^ MAC) */
+#define BOND_XMIT_POLICY_ENCAP23	3 /* encapsulated layer 2+3 */
+#define BOND_XMIT_POLICY_ENCAP34	4 /* encapsulated layer 3+4 */
 
 typedef struct ifbond {
 	__s32 bond_mode;
-- 
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ