netdev - Re: [PATCH net-next 1/2] bonding: modify the old and add new xmit hash functions

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <52431111.8000301@redhat.com>
Date:	Wed, 25 Sep 2013 18:36:33 +0200
From:	Nikolay Aleksandrov <nikolay@...hat.com>
To:	Veaceslav Falico <vfalico@...hat.com>
CC:	netdev@...r.kernel.org, davem@...emloft.net, andy@...yhouse.net,
	fubar@...ibm.com, eric.dumazet@...il.com
Subject: Re: [PATCH net-next 1/2] bonding: modify the old and add new xmit
 hash functions

On 09/25/2013 06:26 PM, Veaceslav Falico wrote:
> On Wed, Sep 25, 2013 at 05:19:57PM +0200, Nikolay Aleksandrov wrote:
>> This patch adds two new hash policy modes which use skb_flow_dissect:
>> 3 - Encapsulated layer 2+3
>> 4 - Encapsulated layer 3+4
>> There should be a good improvement for tunnel users in those modes.
>> It also changes the old hash functions to:
>> hash ^= (__force u32)flow.dst ^ (__force u32)flow.src;
>> hash ^= (hash >> 16);
>> hash ^= (hash >> 8);
>>
>> Where hash will be initialized either to L2 hash, that is
>> SRCMAC[5] XOR DSTMAC[5], or to flow->ports which should be extracted
>> from the upper layer. Flow's dst and src are also extracted based on the
>> xmit policy either directly from the buffer or by using skb_flow_dissect,
>> but in both cases if the protocol is IPv6 then dst and src are obtained by
>> ipv6_addr_hash() on the real addresses. In case of a non-dissectable
>> packet, the algorithms fall back to L2 hashing.
>> The bond_set_mode_ops() function is now obsolete and thus deleted
>> because it was used only to set the proper hash policy. Also we trim a
>> pointer from struct bonding because we no longer need to keep the hash
>> function, now there's only a single hash function - bond_xmit_hash that
>> works based on bond->params.xmit_policy.
>>
>> The hash function and skb_flow_dissect were suggested by Eric Dumazet.
>> The layer names were suggested by Andy Gospodarek, because I suck at
>> semantics.
>>
>> Signed-off-by: Nikolay Aleksandrov <nikolay@...hat.com>
>> ---
>> One line is intentionally left at 82 chars since it's the whole function
>> and IMO looks better that way.
>>
>> drivers/net/bonding/bond_3ad.c   |   2 +-
>> drivers/net/bonding/bond_main.c  | 211
>> +++++++++++++++------------------------
>> drivers/net/bonding/bond_sysfs.c |   2 -
>> drivers/net/bonding/bonding.h    |   3 +-
>> include/uapi/linux/if_bonding.h  |   2 +
>> 5 files changed, 86 insertions(+), 134 deletions(-)
> 
> Nice!!! Though some comments below.
> 
>>
>> diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
>> index 0d8f427..b3ab703 100644
>> --- a/drivers/net/bonding/bond_3ad.c
>> +++ b/drivers/net/bonding/bond_3ad.c
>> @@ -2442,7 +2442,7 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct
>> net_device *dev)
>>         goto out;
>>     }
>>
>> -    slave_agg_no = bond->xmit_hash_policy(skb, slaves_in_agg);
>> +    slave_agg_no = bond_xmit_hash(bond, skb, slaves_in_agg);
>>
>>     bond_for_each_slave(bond, slave) {
>>         struct aggregator *agg = SLAVE_AD_INFO(slave).port.aggregator;
>> diff --git a/drivers/net/bonding/bond_main.c
>> b/drivers/net/bonding/bond_main.c
>> index 55bbb8b..73e416b 100644
>> --- a/drivers/net/bonding/bond_main.c
>> +++ b/drivers/net/bonding/bond_main.c
>> @@ -78,6 +78,7 @@
>> #include <net/netns/generic.h>
>> #include <net/pkt_sched.h>
>> #include <linux/rculist.h>
>> +#include <net/flow_keys.h>
>> #include "bonding.h"
>> #include "bond_3ad.h"
>> #include "bond_alb.h"
>> @@ -159,7 +160,8 @@ MODULE_PARM_DESC(min_links, "Minimum number of
>> available links before turning on
>> module_param(xmit_hash_policy, charp, 0);
>> MODULE_PARM_DESC(xmit_hash_policy, "balance-xor and 802.3ad hashing
>> method; "
>>                    "0 for layer 2 (default), 1 for layer 3+4, "
>> -                   "2 for layer 2+3");
>> +                   "2 for layer 2+3, 3 for encap layer 2+3, "
>> +                   "4 for encap layer 3+4");
>> module_param(arp_interval, int, 0);
>> MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");
>> module_param_array(arp_ip_target, charp, NULL, 0);
>> @@ -217,6 +219,8 @@ const struct bond_parm_tbl xmit_hashtype_tbl[] = {
>> {    "layer2",        BOND_XMIT_POLICY_LAYER2},
>> {    "layer3+4",        BOND_XMIT_POLICY_LAYER34},
>> {    "layer2+3",        BOND_XMIT_POLICY_LAYER23},
>> +{    "encap2+3",        BOND_XMIT_POLICY_ENCAP23},
>> +{    "encap3+4",        BOND_XMIT_POLICY_ENCAP34},
>> {    NULL,            -1},
>> };
>>
>> @@ -3026,99 +3030,99 @@ static struct notifier_block bond_netdev_notifier
>> = {
>>
>> /*---------------------------- Hashing Policies
>> -----------------------------*/
>>
>> -/*
>> - * Hash for the output device based upon layer 2 data
>> - */
>> -static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count)
>> +/* L2 hash helper */
>> +static inline u32 bond_eth_hash(struct sk_buff *skb)
>> {
>>     struct ethhdr *data = (struct ethhdr *)skb->data;
>>
>>     if (skb_headlen(skb) >= offsetof(struct ethhdr, h_proto))
>> -        return (data->h_dest[5] ^ data->h_source[5]) % count;
>> +        return data->h_dest[5] ^ data->h_source[5];
>>
>>     return 0;
>> }
>>
>> -/*
>> - * Hash for the output device based upon layer 2 and layer 3 data. If
>> - * the packet is not IP, fall back on bond_xmit_hash_policy_l2()
>> - */
>> -static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count)
>> +static void bond_flow_get_ports(struct sk_buff *skb, struct flow_keys *fk,
>> +                int offset)
>> +{
>> +    __be32 *ports;
>> +
>> +    ports = skb_header_pointer(skb, offset, sizeof(fk->ports), &fk->ports);
>> +    if (ports)
>> +        fk->ports = *ports;
>> +}
>> +
>> +/* Extract the appropriate headers based on bond's xmit policy */
>> +static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
>> +                  struct flow_keys *fk)
>> {
>> -    const struct ethhdr *data;
>> +    const struct ipv6hdr *iph6;
>>     const struct iphdr *iph;
>> -    const struct ipv6hdr *ipv6h;
>> -    u32 v6hash;
>> -    const __be32 *s, *d;
>> +    int noff, proto = -1, poff = -1;
> 
> Nitpick: Useless initialization of poff.
> 
Yeah, I saw it after posting and have this in the prepared v2 :-)

>> +    bool ret = false;
>> +
>> +    if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23)
>> +        return skb_flow_dissect(skb, fk);
>>
>> -    if (skb->protocol == htons(ETH_P_IP) &&
>> -        pskb_network_may_pull(skb, sizeof(*iph))) {
>> +    fk->ports = 0;
>> +    noff = skb_network_offset(skb);
>> +    if (skb->protocol == htons(ETH_P_IP)) {
>> +        if (!pskb_may_pull(skb, noff + sizeof(*iph)))
>> +            goto out;
>>         iph = ip_hdr(skb);
>> -        data = (struct ethhdr *)skb->data;
>> -        return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^
>> -            (data->h_dest[5] ^ data->h_source[5])) % count;
>> -    } else if (skb->protocol == htons(ETH_P_IPV6) &&
>> -           pskb_network_may_pull(skb, sizeof(*ipv6h))) {
>> -        ipv6h = ipv6_hdr(skb);
>> -        data = (struct ethhdr *)skb->data;
>> -        s = &ipv6h->saddr.s6_addr32[0];
>> -        d = &ipv6h->daddr.s6_addr32[0];
>> -        v6hash = (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]);
>> -        v6hash ^= (v6hash >> 24) ^ (v6hash >> 16) ^ (v6hash >> 8);
>> -        return (v6hash ^ data->h_dest[5] ^ data->h_source[5]) % count;
>> -    }
>> -
>> -    return bond_xmit_hash_policy_l2(skb, count);
>> +        fk->src = iph->saddr;
>> +        fk->dst = iph->daddr;
>> +        noff += iph->ihl << 2;
>> +        if (!ip_is_fragment(iph))
>> +            proto = iph->protocol;
>> +    } else if (skb->protocol == htons(ETH_P_IPV6)) {
>> +        if (!pskb_may_pull(skb, noff + sizeof(*iph6)))
>> +            goto out;
>> +        iph6 = ipv6_hdr(skb);
>> +        fk->src = (__force __be32)ipv6_addr_hash(&iph6->saddr);
>> +        fk->dst = (__force __be32)ipv6_addr_hash(&iph6->daddr);
>> +        noff += sizeof(*iph6);
>> +        proto = iph6->nexthdr;
>> +    }
> 
> else
>     return false;
> 
> ?
> 
Hehe, I actually had that in v1 few months ago, I was wondering why I did
it that way...

> Otherwise we might report false-positive for vlans, per example, without
> populating the flow keys.
> 
Right.

>> +    if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34) {
>> +        poff = proto_ports_offset(proto);
>> +        if (poff >= 0)
>> +            bond_flow_get_ports(skb, fk, poff + noff);
>> +    }
> 
> One idea would be to move the same initialization code from
> skb_flow_dissect() to an external function and re-use it here?
> 
Yep, I could do an inline skb_flow_get_ports() and re-use it.

>> +    ret = true;
>> +
>> +out:
>> +    return ret;
>> }
>>
>> -/*
>> - * Hash for the output device based upon layer 3 and layer 4 data. If
>> - * the packet is a frag or not TCP or UDP, just use layer 3 data.  If it is
>> - * altogether not IP, fall back on bond_xmit_hash_policy_l2()
>> +/**
>> + * bond_xmit_hash - generate a hash value based on the xmit policy
>> + * @bond: bonding device
>> + * @skb: buffer to use for headers
>> + * @count: modulo value
>> + *
>> + * This function will extract the necessary headers from the skb buffer
>> and use
>> + * them to generate a hash based on the xmit_policy set in the bonding
>> device
>> + * which will be reduced modulo count before returning.
>>  */
>> -static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count)
>> +int bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, int count)
>> {
>> -    u32 layer4_xor = 0;
>> -    const struct iphdr *iph;
>> -    const struct ipv6hdr *ipv6h;
>> -    const __be32 *s, *d;
>> -    const __be16 *l4 = NULL;
>> -    __be16 _l4[2];
>> -    int noff = skb_network_offset(skb);
>> -    int poff;
>> -
>> -    if (skb->protocol == htons(ETH_P_IP) &&
>> -        pskb_may_pull(skb, noff + sizeof(*iph))) {
>> -        iph = ip_hdr(skb);
>> -        poff = proto_ports_offset(iph->protocol);
>> +    struct flow_keys flow;
>> +    u32 hash;
>>
>> -        if (!ip_is_fragment(iph) && poff >= 0) {
>> -            l4 = skb_header_pointer(skb, noff + (iph->ihl << 2) + poff,
>> -                        sizeof(_l4), &_l4);
>> -            if (l4)
>> -                layer4_xor = ntohs(l4[0] ^ l4[1]);
>> -        }
>> -        return (layer4_xor ^
>> -            ((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count;
>> -    } else if (skb->protocol == htons(ETH_P_IPV6) &&
>> -           pskb_may_pull(skb, noff + sizeof(*ipv6h))) {
>> -        ipv6h = ipv6_hdr(skb);
>> -        poff = proto_ports_offset(ipv6h->nexthdr);
>> -        if (poff >= 0) {
>> -            l4 = skb_header_pointer(skb, noff + sizeof(*ipv6h) + poff,
>> -                        sizeof(_l4), &_l4);
>> -            if (l4)
>> -                layer4_xor = ntohs(l4[0] ^ l4[1]);
>> -        }
>> -        s = &ipv6h->saddr.s6_addr32[0];
>> -        d = &ipv6h->daddr.s6_addr32[0];
>> -        layer4_xor ^= (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]);
>> -        layer4_xor ^= (layer4_xor >> 24) ^ (layer4_xor >> 16) ^
>> -                   (layer4_xor >> 8);
>> -        return layer4_xor % count;
>> -    }
>> +    if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
>> +        !bond_flow_dissect(bond, skb, &flow))
>> +        return bond_eth_hash(skb) % count;
>> +
>> +    if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 ||
>> +        bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23)
>> +        hash = bond_eth_hash(skb);
>> +    else
>> +        hash = (__force u32)flow.ports;
>> +    hash ^= (__force u32)flow.dst ^ (__force u32)flow.src;
>> +    hash ^= (hash >> 16);
>> +    hash ^= (hash >> 8);
>>
>> -    return bond_xmit_hash_policy_l2(skb, count);
>> +    return hash % count;
>> }
>>
>> /*-------------------------- Device entry points
>> ----------------------------*/
>> @@ -3700,8 +3704,7 @@ static int bond_xmit_activebackup(struct sk_buff
>> *skb, struct net_device *bond_d
>>     return NETDEV_TX_OK;
>> }
>>
>> -/*
>> - * In bond_xmit_xor() , we determine the output device by using a pre-
>> +/* In bond_xmit_xor() , we determine the output device by using a pre-
>>  * determined xmit_hash_policy(), If the selected device is not enabled,
>>  * find the next active slave.
>>  */
>> @@ -3709,8 +3712,7 @@ static int bond_xmit_xor(struct sk_buff *skb,
>> struct net_device *bond_dev)
>> {
>>     struct bonding *bond = netdev_priv(bond_dev);
>>
>> -    bond_xmit_slave_id(bond, skb,
>> -               bond->xmit_hash_policy(skb, bond->slave_cnt));
>> +    bond_xmit_slave_id(bond, skb, bond_xmit_hash(bond, skb,
>> bond->slave_cnt));
>>
>>     return NETDEV_TX_OK;
>> }
>> @@ -3746,22 +3748,6 @@ static int bond_xmit_broadcast(struct sk_buff
>> *skb, struct net_device *bond_dev)
>>
>> /*------------------------- Device initialization
>> ---------------------------*/
>>
>> -static void bond_set_xmit_hash_policy(struct bonding *bond)
>> -{
>> -    switch (bond->params.xmit_policy) {
>> -    case BOND_XMIT_POLICY_LAYER23:
>> -        bond->xmit_hash_policy = bond_xmit_hash_policy_l23;
>> -        break;
>> -    case BOND_XMIT_POLICY_LAYER34:
>> -        bond->xmit_hash_policy = bond_xmit_hash_policy_l34;
>> -        break;
>> -    case BOND_XMIT_POLICY_LAYER2:
>> -    default:
>> -        bond->xmit_hash_policy = bond_xmit_hash_policy_l2;
>> -        break;
>> -    }
>> -}
>> -
>> /*
>>  * Lookup the slave that corresponds to a qid
>>  */
>> @@ -3871,38 +3857,6 @@ static netdev_tx_t bond_start_xmit(struct sk_buff
>> *skb, struct net_device *dev)
>>     return ret;
>> }
>>
>> -/*
>> - * set bond mode specific net device operations
>> - */
>> -void bond_set_mode_ops(struct bonding *bond, int mode)
>> -{
>> -    struct net_device *bond_dev = bond->dev;
>> -
>> -    switch (mode) {
>> -    case BOND_MODE_ROUNDROBIN:
>> -        break;
>> -    case BOND_MODE_ACTIVEBACKUP:
>> -        break;
>> -    case BOND_MODE_XOR:
>> -        bond_set_xmit_hash_policy(bond);
>> -        break;
>> -    case BOND_MODE_BROADCAST:
>> -        break;
>> -    case BOND_MODE_8023AD:
>> -        bond_set_xmit_hash_policy(bond);
>> -        break;
>> -    case BOND_MODE_ALB:
>> -        /* FALLTHRU */
>> -    case BOND_MODE_TLB:
>> -        break;
>> -    default:
>> -        /* Should never happen, mode already checked */
>> -        pr_err("%s: Error: Unknown bonding mode %d\n",
>> -               bond_dev->name, mode);
>> -        break;
>> -    }
>> -}
>> -
>> static int bond_ethtool_get_settings(struct net_device *bond_dev,
>>                      struct ethtool_cmd *ecmd)
>> {
>> @@ -4004,7 +3958,6 @@ static void bond_setup(struct net_device *bond_dev)
>>     ether_setup(bond_dev);
>>     bond_dev->netdev_ops = &bond_netdev_ops;
>>     bond_dev->ethtool_ops = &bond_ethtool_ops;
>> -    bond_set_mode_ops(bond, bond->params.mode);
>>
>>     bond_dev->destructor = bond_destructor;
>>
>> diff --git a/drivers/net/bonding/bond_sysfs.c
>> b/drivers/net/bonding/bond_sysfs.c
>> index c29b836..dba3b9b 100644
>> --- a/drivers/net/bonding/bond_sysfs.c
>> +++ b/drivers/net/bonding/bond_sysfs.c
>> @@ -352,7 +352,6 @@ static ssize_t bonding_store_mode(struct device *d,
>>     /* don't cache arp_validate between modes */
>>     bond->params.arp_validate = BOND_ARP_VALIDATE_NONE;
>>     bond->params.mode = new_value;
>> -    bond_set_mode_ops(bond, bond->params.mode);
>>     pr_info("%s: setting mode to %s (%d).\n",
>>         bond->dev->name, bond_mode_tbl[new_value].modename,
>>         new_value);
>> @@ -392,7 +391,6 @@ static ssize_t bonding_store_xmit_hash(struct device *d,
>>         ret = -EINVAL;
>>     } else {
>>         bond->params.xmit_policy = new_value;
>> -        bond_set_mode_ops(bond, bond->params.mode);
>>         pr_info("%s: setting xmit hash policy to %s (%d).\n",
>>             bond->dev->name,
>>             xmit_hashtype_tbl[new_value].modename, new_value);
>> diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
>> index 03cf3fd..4db9ec4 100644
>> --- a/drivers/net/bonding/bonding.h
>> +++ b/drivers/net/bonding/bonding.h
>> @@ -245,7 +245,6 @@ struct bonding {
>>     char     proc_file_name[IFNAMSIZ];
>> #endif /* CONFIG_PROC_FS */
>>     struct   list_head bond_list;
>> -    int      (*xmit_hash_policy)(struct sk_buff *, int);
>>     u16      rr_tx_counter;
>>     struct   ad_bond_info ad_info;
>>     struct   alb_bond_info alb_info;
>> @@ -446,7 +445,7 @@ int bond_release(struct net_device *bond_dev, struct
>> net_device *slave_dev);
>> void bond_mii_monitor(struct work_struct *);
>> void bond_loadbalance_arp_mon(struct work_struct *);
>> void bond_activebackup_arp_mon(struct work_struct *);
>> -void bond_set_mode_ops(struct bonding *bond, int mode);
>> +int bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, int count);
>> int bond_parse_parm(const char *mode_arg, const struct bond_parm_tbl *tbl);
>> void bond_select_active_slave(struct bonding *bond);
>> void bond_change_active_slave(struct bonding *bond, struct slave
>> *new_active);
>> diff --git a/include/uapi/linux/if_bonding.h
>> b/include/uapi/linux/if_bonding.h
>> index a17edda..9635a62 100644
>> --- a/include/uapi/linux/if_bonding.h
>> +++ b/include/uapi/linux/if_bonding.h
>> @@ -91,6 +91,8 @@
>> #define BOND_XMIT_POLICY_LAYER2        0 /* layer 2 (MAC only), default */
>> #define BOND_XMIT_POLICY_LAYER34    1 /* layer 3+4 (IP ^ (TCP || UDP)) */
>> #define BOND_XMIT_POLICY_LAYER23    2 /* layer 2+3 (IP ^ MAC) */
>> +#define BOND_XMIT_POLICY_ENCAP23    3 /* encapsulated layer 2+3 */
>> +#define BOND_XMIT_POLICY_ENCAP34    4 /* encapsulated layer 3+4 */
>>
>> typedef struct ifbond {
>>     __s32 bond_mode;
>> -- 
>> 1.8.1.4
>>
>> -- 
>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>> the body of a message to majordomo@...r.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html