[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20130925162606.GA27752@redhat.com>
Date: Wed, 25 Sep 2013 18:26:06 +0200
From: Veaceslav Falico <vfalico@...hat.com>
To: Nikolay Aleksandrov <nikolay@...hat.com>
Cc: netdev@...r.kernel.org, davem@...emloft.net, andy@...yhouse.net,
fubar@...ibm.com, eric.dumazet@...il.com
Subject: Re: [PATCH net-next 1/2] bonding: modify the old and add new xmit
hash functions
On Wed, Sep 25, 2013 at 05:19:57PM +0200, Nikolay Aleksandrov wrote:
>This patch adds two new hash policy modes which use skb_flow_dissect:
>3 - Encapsulated layer 2+3
>4 - Encapsulated layer 3+4
>There should be a good improvement for tunnel users in those modes.
>It also changes the old hash functions to:
>hash ^= (__force u32)flow.dst ^ (__force u32)flow.src;
>hash ^= (hash >> 16);
>hash ^= (hash >> 8);
>
>Where hash will be initialized either to L2 hash, that is
>SRCMAC[5] XOR DSTMAC[5], or to flow->ports which should be extracted
>from the upper layer. Flow's dst and src are also extracted based on the
>xmit policy either directly from the buffer or by using skb_flow_dissect,
>but in both cases if the protocol is IPv6 then dst and src are obtained by
>ipv6_addr_hash() on the real addresses. In case of a non-dissectable
>packet, the algorithms fall back to L2 hashing.
>The bond_set_mode_ops() function is now obsolete and thus deleted
>because it was used only to set the proper hash policy. Also we trim a
>pointer from struct bonding because we no longer need to keep the hash
>function, now there's only a single hash function - bond_xmit_hash that
>works based on bond->params.xmit_policy.
>
>The hash function and skb_flow_dissect were suggested by Eric Dumazet.
>The layer names were suggested by Andy Gospodarek, because I suck at
>semantics.
>
>Signed-off-by: Nikolay Aleksandrov <nikolay@...hat.com>
>---
>One line is intentionally left at 82 chars since it's the whole function
>and IMO looks better that way.
>
> drivers/net/bonding/bond_3ad.c | 2 +-
> drivers/net/bonding/bond_main.c | 211 +++++++++++++++------------------------
> drivers/net/bonding/bond_sysfs.c | 2 -
> drivers/net/bonding/bonding.h | 3 +-
> include/uapi/linux/if_bonding.h | 2 +
> 5 files changed, 86 insertions(+), 134 deletions(-)
Nice!!! Though some comments below.
>
>diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
>index 0d8f427..b3ab703 100644
>--- a/drivers/net/bonding/bond_3ad.c
>+++ b/drivers/net/bonding/bond_3ad.c
>@@ -2442,7 +2442,7 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
> goto out;
> }
>
>- slave_agg_no = bond->xmit_hash_policy(skb, slaves_in_agg);
>+ slave_agg_no = bond_xmit_hash(bond, skb, slaves_in_agg);
>
> bond_for_each_slave(bond, slave) {
> struct aggregator *agg = SLAVE_AD_INFO(slave).port.aggregator;
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 55bbb8b..73e416b 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -78,6 +78,7 @@
> #include <net/netns/generic.h>
> #include <net/pkt_sched.h>
> #include <linux/rculist.h>
>+#include <net/flow_keys.h>
> #include "bonding.h"
> #include "bond_3ad.h"
> #include "bond_alb.h"
>@@ -159,7 +160,8 @@ MODULE_PARM_DESC(min_links, "Minimum number of available links before turning on
> module_param(xmit_hash_policy, charp, 0);
> MODULE_PARM_DESC(xmit_hash_policy, "balance-xor and 802.3ad hashing method; "
> "0 for layer 2 (default), 1 for layer 3+4, "
>- "2 for layer 2+3");
>+ "2 for layer 2+3, 3 for encap layer 2+3, "
>+ "4 for encap layer 3+4");
> module_param(arp_interval, int, 0);
> MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");
> module_param_array(arp_ip_target, charp, NULL, 0);
>@@ -217,6 +219,8 @@ const struct bond_parm_tbl xmit_hashtype_tbl[] = {
> { "layer2", BOND_XMIT_POLICY_LAYER2},
> { "layer3+4", BOND_XMIT_POLICY_LAYER34},
> { "layer2+3", BOND_XMIT_POLICY_LAYER23},
>+{ "encap2+3", BOND_XMIT_POLICY_ENCAP23},
>+{ "encap3+4", BOND_XMIT_POLICY_ENCAP34},
> { NULL, -1},
> };
>
>@@ -3026,99 +3030,99 @@ static struct notifier_block bond_netdev_notifier = {
>
> /*---------------------------- Hashing Policies -----------------------------*/
>
>-/*
>- * Hash for the output device based upon layer 2 data
>- */
>-static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count)
>+/* L2 hash helper */
>+static inline u32 bond_eth_hash(struct sk_buff *skb)
> {
> struct ethhdr *data = (struct ethhdr *)skb->data;
>
> if (skb_headlen(skb) >= offsetof(struct ethhdr, h_proto))
>- return (data->h_dest[5] ^ data->h_source[5]) % count;
>+ return data->h_dest[5] ^ data->h_source[5];
>
> return 0;
> }
>
>-/*
>- * Hash for the output device based upon layer 2 and layer 3 data. If
>- * the packet is not IP, fall back on bond_xmit_hash_policy_l2()
>- */
>-static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count)
>+static void bond_flow_get_ports(struct sk_buff *skb, struct flow_keys *fk,
>+ int offset)
>+{
>+ __be32 *ports;
>+
>+ ports = skb_header_pointer(skb, offset, sizeof(fk->ports), &fk->ports);
>+ if (ports)
>+ fk->ports = *ports;
>+}
>+
>+/* Extract the appropriate headers based on bond's xmit policy */
>+static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
>+ struct flow_keys *fk)
> {
>- const struct ethhdr *data;
>+ const struct ipv6hdr *iph6;
> const struct iphdr *iph;
>- const struct ipv6hdr *ipv6h;
>- u32 v6hash;
>- const __be32 *s, *d;
>+ int noff, proto = -1, poff = -1;
Nitpick: Useless initialization of poff.
>+ bool ret = false;
>+
>+ if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23)
>+ return skb_flow_dissect(skb, fk);
>
>- if (skb->protocol == htons(ETH_P_IP) &&
>- pskb_network_may_pull(skb, sizeof(*iph))) {
>+ fk->ports = 0;
>+ noff = skb_network_offset(skb);
>+ if (skb->protocol == htons(ETH_P_IP)) {
>+ if (!pskb_may_pull(skb, noff + sizeof(*iph)))
>+ goto out;
> iph = ip_hdr(skb);
>- data = (struct ethhdr *)skb->data;
>- return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^
>- (data->h_dest[5] ^ data->h_source[5])) % count;
>- } else if (skb->protocol == htons(ETH_P_IPV6) &&
>- pskb_network_may_pull(skb, sizeof(*ipv6h))) {
>- ipv6h = ipv6_hdr(skb);
>- data = (struct ethhdr *)skb->data;
>- s = &ipv6h->saddr.s6_addr32[0];
>- d = &ipv6h->daddr.s6_addr32[0];
>- v6hash = (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]);
>- v6hash ^= (v6hash >> 24) ^ (v6hash >> 16) ^ (v6hash >> 8);
>- return (v6hash ^ data->h_dest[5] ^ data->h_source[5]) % count;
>- }
>-
>- return bond_xmit_hash_policy_l2(skb, count);
>+ fk->src = iph->saddr;
>+ fk->dst = iph->daddr;
>+ noff += iph->ihl << 2;
>+ if (!ip_is_fragment(iph))
>+ proto = iph->protocol;
>+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
>+ if (!pskb_may_pull(skb, noff + sizeof(*iph6)))
>+ goto out;
>+ iph6 = ipv6_hdr(skb);
>+ fk->src = (__force __be32)ipv6_addr_hash(&iph6->saddr);
>+ fk->dst = (__force __be32)ipv6_addr_hash(&iph6->daddr);
>+ noff += sizeof(*iph6);
>+ proto = iph6->nexthdr;
>+ }
else
return false;
?
Otherwise we might report false-positive for vlans, per example, without
populating the flow keys.
>+ if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34) {
>+ poff = proto_ports_offset(proto);
>+ if (poff >= 0)
>+ bond_flow_get_ports(skb, fk, poff + noff);
>+ }
One idea would be to move the same initialization code from
skb_flow_dissect() to an external function and re-use it here?
>+ ret = true;
>+
>+out:
>+ return ret;
> }
>
>-/*
>- * Hash for the output device based upon layer 3 and layer 4 data. If
>- * the packet is a frag or not TCP or UDP, just use layer 3 data. If it is
>- * altogether not IP, fall back on bond_xmit_hash_policy_l2()
>+/**
>+ * bond_xmit_hash - generate a hash value based on the xmit policy
>+ * @bond: bonding device
>+ * @skb: buffer to use for headers
>+ * @count: modulo value
>+ *
>+ * This function will extract the necessary headers from the skb buffer and use
>+ * them to generate a hash based on the xmit_policy set in the bonding device
>+ * which will be reduced modulo count before returning.
> */
>-static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count)
>+int bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, int count)
> {
>- u32 layer4_xor = 0;
>- const struct iphdr *iph;
>- const struct ipv6hdr *ipv6h;
>- const __be32 *s, *d;
>- const __be16 *l4 = NULL;
>- __be16 _l4[2];
>- int noff = skb_network_offset(skb);
>- int poff;
>-
>- if (skb->protocol == htons(ETH_P_IP) &&
>- pskb_may_pull(skb, noff + sizeof(*iph))) {
>- iph = ip_hdr(skb);
>- poff = proto_ports_offset(iph->protocol);
>+ struct flow_keys flow;
>+ u32 hash;
>
>- if (!ip_is_fragment(iph) && poff >= 0) {
>- l4 = skb_header_pointer(skb, noff + (iph->ihl << 2) + poff,
>- sizeof(_l4), &_l4);
>- if (l4)
>- layer4_xor = ntohs(l4[0] ^ l4[1]);
>- }
>- return (layer4_xor ^
>- ((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count;
>- } else if (skb->protocol == htons(ETH_P_IPV6) &&
>- pskb_may_pull(skb, noff + sizeof(*ipv6h))) {
>- ipv6h = ipv6_hdr(skb);
>- poff = proto_ports_offset(ipv6h->nexthdr);
>- if (poff >= 0) {
>- l4 = skb_header_pointer(skb, noff + sizeof(*ipv6h) + poff,
>- sizeof(_l4), &_l4);
>- if (l4)
>- layer4_xor = ntohs(l4[0] ^ l4[1]);
>- }
>- s = &ipv6h->saddr.s6_addr32[0];
>- d = &ipv6h->daddr.s6_addr32[0];
>- layer4_xor ^= (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]);
>- layer4_xor ^= (layer4_xor >> 24) ^ (layer4_xor >> 16) ^
>- (layer4_xor >> 8);
>- return layer4_xor % count;
>- }
>+ if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
>+ !bond_flow_dissect(bond, skb, &flow))
>+ return bond_eth_hash(skb) % count;
>+
>+ if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 ||
>+ bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23)
>+ hash = bond_eth_hash(skb);
>+ else
>+ hash = (__force u32)flow.ports;
>+ hash ^= (__force u32)flow.dst ^ (__force u32)flow.src;
>+ hash ^= (hash >> 16);
>+ hash ^= (hash >> 8);
>
>- return bond_xmit_hash_policy_l2(skb, count);
>+ return hash % count;
> }
>
> /*-------------------------- Device entry points ----------------------------*/
>@@ -3700,8 +3704,7 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
> return NETDEV_TX_OK;
> }
>
>-/*
>- * In bond_xmit_xor() , we determine the output device by using a pre-
>+/* In bond_xmit_xor() , we determine the output device by using a pre-
> * determined xmit_hash_policy(), If the selected device is not enabled,
> * find the next active slave.
> */
>@@ -3709,8 +3712,7 @@ static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev)
> {
> struct bonding *bond = netdev_priv(bond_dev);
>
>- bond_xmit_slave_id(bond, skb,
>- bond->xmit_hash_policy(skb, bond->slave_cnt));
>+ bond_xmit_slave_id(bond, skb, bond_xmit_hash(bond, skb, bond->slave_cnt));
>
> return NETDEV_TX_OK;
> }
>@@ -3746,22 +3748,6 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev)
>
> /*------------------------- Device initialization ---------------------------*/
>
>-static void bond_set_xmit_hash_policy(struct bonding *bond)
>-{
>- switch (bond->params.xmit_policy) {
>- case BOND_XMIT_POLICY_LAYER23:
>- bond->xmit_hash_policy = bond_xmit_hash_policy_l23;
>- break;
>- case BOND_XMIT_POLICY_LAYER34:
>- bond->xmit_hash_policy = bond_xmit_hash_policy_l34;
>- break;
>- case BOND_XMIT_POLICY_LAYER2:
>- default:
>- bond->xmit_hash_policy = bond_xmit_hash_policy_l2;
>- break;
>- }
>-}
>-
> /*
> * Lookup the slave that corresponds to a qid
> */
>@@ -3871,38 +3857,6 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
> return ret;
> }
>
>-/*
>- * set bond mode specific net device operations
>- */
>-void bond_set_mode_ops(struct bonding *bond, int mode)
>-{
>- struct net_device *bond_dev = bond->dev;
>-
>- switch (mode) {
>- case BOND_MODE_ROUNDROBIN:
>- break;
>- case BOND_MODE_ACTIVEBACKUP:
>- break;
>- case BOND_MODE_XOR:
>- bond_set_xmit_hash_policy(bond);
>- break;
>- case BOND_MODE_BROADCAST:
>- break;
>- case BOND_MODE_8023AD:
>- bond_set_xmit_hash_policy(bond);
>- break;
>- case BOND_MODE_ALB:
>- /* FALLTHRU */
>- case BOND_MODE_TLB:
>- break;
>- default:
>- /* Should never happen, mode already checked */
>- pr_err("%s: Error: Unknown bonding mode %d\n",
>- bond_dev->name, mode);
>- break;
>- }
>-}
>-
> static int bond_ethtool_get_settings(struct net_device *bond_dev,
> struct ethtool_cmd *ecmd)
> {
>@@ -4004,7 +3958,6 @@ static void bond_setup(struct net_device *bond_dev)
> ether_setup(bond_dev);
> bond_dev->netdev_ops = &bond_netdev_ops;
> bond_dev->ethtool_ops = &bond_ethtool_ops;
>- bond_set_mode_ops(bond, bond->params.mode);
>
> bond_dev->destructor = bond_destructor;
>
>diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
>index c29b836..dba3b9b 100644
>--- a/drivers/net/bonding/bond_sysfs.c
>+++ b/drivers/net/bonding/bond_sysfs.c
>@@ -352,7 +352,6 @@ static ssize_t bonding_store_mode(struct device *d,
> /* don't cache arp_validate between modes */
> bond->params.arp_validate = BOND_ARP_VALIDATE_NONE;
> bond->params.mode = new_value;
>- bond_set_mode_ops(bond, bond->params.mode);
> pr_info("%s: setting mode to %s (%d).\n",
> bond->dev->name, bond_mode_tbl[new_value].modename,
> new_value);
>@@ -392,7 +391,6 @@ static ssize_t bonding_store_xmit_hash(struct device *d,
> ret = -EINVAL;
> } else {
> bond->params.xmit_policy = new_value;
>- bond_set_mode_ops(bond, bond->params.mode);
> pr_info("%s: setting xmit hash policy to %s (%d).\n",
> bond->dev->name,
> xmit_hashtype_tbl[new_value].modename, new_value);
>diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
>index 03cf3fd..4db9ec4 100644
>--- a/drivers/net/bonding/bonding.h
>+++ b/drivers/net/bonding/bonding.h
>@@ -245,7 +245,6 @@ struct bonding {
> char proc_file_name[IFNAMSIZ];
> #endif /* CONFIG_PROC_FS */
> struct list_head bond_list;
>- int (*xmit_hash_policy)(struct sk_buff *, int);
> u16 rr_tx_counter;
> struct ad_bond_info ad_info;
> struct alb_bond_info alb_info;
>@@ -446,7 +445,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev);
> void bond_mii_monitor(struct work_struct *);
> void bond_loadbalance_arp_mon(struct work_struct *);
> void bond_activebackup_arp_mon(struct work_struct *);
>-void bond_set_mode_ops(struct bonding *bond, int mode);
>+int bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, int count);
> int bond_parse_parm(const char *mode_arg, const struct bond_parm_tbl *tbl);
> void bond_select_active_slave(struct bonding *bond);
> void bond_change_active_slave(struct bonding *bond, struct slave *new_active);
>diff --git a/include/uapi/linux/if_bonding.h b/include/uapi/linux/if_bonding.h
>index a17edda..9635a62 100644
>--- a/include/uapi/linux/if_bonding.h
>+++ b/include/uapi/linux/if_bonding.h
>@@ -91,6 +91,8 @@
> #define BOND_XMIT_POLICY_LAYER2 0 /* layer 2 (MAC only), default */
> #define BOND_XMIT_POLICY_LAYER34 1 /* layer 3+4 (IP ^ (TCP || UDP)) */
> #define BOND_XMIT_POLICY_LAYER23 2 /* layer 2+3 (IP ^ MAC) */
>+#define BOND_XMIT_POLICY_ENCAP23 3 /* encapsulated layer 2+3 */
>+#define BOND_XMIT_POLICY_ENCAP34 4 /* encapsulated layer 3+4 */
>
> typedef struct ifbond {
> __s32 bond_mode;
>--
>1.8.1.4
>
>--
>To unsubscribe from this list: send the line "unsubscribe netdev" in
>the body of a message to majordomo@...r.kernel.org
>More majordomo info at http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists