lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <eb4c1dfe-d7c6-3011-e550-7498ccd023d7@chinatelecom.cn>
Date:   Mon, 20 Dec 2021 09:57:03 +0800
From:   孙守鑫 <sunshouxin@...natelecom.cn>
To:     Eric Dumazet <eric.dumazet@...il.com>, j.vosburgh@...il.com,
        vfalico@...il.com, andy@...yhouse.net, davem@...emloft.net,
        kuba@...nel.org
Cc:     netdev@...r.kernel.org, linux-kernel@...r.kernel.org,
        huyd12@...natelecom.cn
Subject: Re: [PATCH V2] net: bonding: Add support for IPV6 ns/na


在 2021/12/14 16:05, Eric Dumazet 写道:
>
> On 12/10/21 5:08 AM, Sun Shouxin wrote:
>> Since ipv6 neighbor solicitation and advertisement messages
>> isn't handled gracefully in bonding6 driver, we can see packet
>> drop due to inconsistency bewteen mac address in the option
>> message and source MAC .
>>
>> Another examples is ipv6 neighbor solicitation and advertisement
>> messages from VM via tap attached to host brighe, the src mac
>> mighe be changed through balance-alb mode, but it is not synced
>> with Link-layer address in the option message.
>>
>> The patch implements bond6's tx handle for ipv6 neighbor
>> solicitation and advertisement messages.
>>
>>             Border-Leaf
>>             /        \
>>                /          \
>>             Tunnel1    Tunnel2
>>              /              \
>>                 /                \
>>           Leaf-1--Tunnel3--Leaf-2
>>             \                /
>>              \              /
>>               \            /
>>                \          /
>>                NIC1    NIC2
>>             \      /
>>             server
>>
>> We can see in our lab the Border-Leaf receives occasionally
>> a NA packet which is assigned to NIC1 mac in ND/NS option
>> message, but actaully send out via NIC2 mac due to tx-alb,
>> as a result, it will cause inconsistency between MAC table
>> and ND Table in Border-Leaf, i.e, NIC1 = Tunnel2 in ND table
>> and  NIC1 = Tunnel1 in mac table.
>>
>> And then, Border-Leaf starts to forward packet destinated
>> to the Server, it will only check the ND table entry in some
>> switch to encapsulate the destination MAC of the message as
>> NIC1 MAC, and then send it out from Tunnel2 by ND table.
>> Then, Leaf-2 receives the packet, it notices the destination
>> MAC of message is NIC1 MAC and should forword it to Tunne1
>> by Tunnel3.
>>
>> However, this traffic forward will be failure due to split
>> horizon of VxLAN tunnels.
>>
>> Suggested-by: Hu Yadi <huyd12@...natelecom.cn>
>> Signed-off-by: Sun Shouxin <sunshouxin@...natelecom.cn>
>> ---
>>   drivers/net/bonding/bond_alb.c | 131 
>> +++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 131 insertions(+)
>>
>> diff --git a/drivers/net/bonding/bond_alb.c 
>> b/drivers/net/bonding/bond_alb.c
>> index 533e476..afa386b 100644
>> --- a/drivers/net/bonding/bond_alb.c
>> +++ b/drivers/net/bonding/bond_alb.c
>> @@ -22,6 +22,7 @@
>>   #include <asm/byteorder.h>
>>   #include <net/bonding.h>
>>   #include <net/bond_alb.h>
>> +#include <net/ndisc.h>
>>     static const u8 mac_v6_allmcast[ETH_ALEN + 2] __long_aligned = {
>>       0x33, 0x33, 0x00, 0x00, 0x00, 0x01
>> @@ -1269,6 +1270,119 @@ static int alb_set_mac_address(struct bonding 
>> *bond, void *addr)
>>       return res;
>>   }
>>   +/*determine if the packet is NA or NS*/
>> +static bool alb_determine_nd(struct icmp6hdr *hdr)
>> +{
>> +    if (hdr->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT ||
>> +        hdr->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) {
>> +        return true;
>> +    }
>> +
>> +    return false;
>> +}
>> +
>> +static void alb_change_nd_option(struct sk_buff *skb, void *data)
>> +{
>> +    struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
>> +    struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)msg->opt;
>> +    struct net_device *dev = skb->dev;
>> +    struct icmp6hdr *icmp6h = icmp6_hdr(skb);
>> +    struct ipv6hdr *ip6hdr = ipv6_hdr(skb);
>> +    u8 *lladdr = NULL;
>> +    u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
>> +                offsetof(struct nd_msg, opt));
>> +
>> +    while (ndoptlen) {
>> +        int l;
>> +
>> +        switch (nd_opt->nd_opt_type) {
>> +        case ND_OPT_SOURCE_LL_ADDR:
>> +        case ND_OPT_TARGET_LL_ADDR:
>> +        lladdr = ndisc_opt_addr_data(nd_opt, dev);
>> +        break;
>> +
>> +        default:
>> +        lladdr = NULL;
>> +        break;
>> +        }
>> +
>> +        l = nd_opt->nd_opt_len << 3;
>> +
>> +        if (ndoptlen < l || l == 0)
>> +            return;
>> +
>> +        if (lladdr) {
>> +            memcpy(lladdr, data, dev->addr_len);
>
> I am not sure it is allowed to change skb content without
>
> making sure skb ->head is private.
>
> (Think of tcpdump -i slaveX : we want to see the packet content before 
> your change)
>
> I would think skb_cow_head() or something similar is needed.
>
> This is tricky of course, since all cached pointers (icmp6h, ip6hdr, 
> msg, nd_opt)
>
> would need to be fetched again, since skb->head/data might be changed
>
> by skb_cow_head().
The tcpdump should show the last packet which sent off from NIC in the end.
could you light me up specific conditions?
>
>
>
>
>
>> +            icmp6h->icmp6_cksum = 0;
>> +
>> +            icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6hdr->saddr,
>> +                                  &ip6hdr->daddr,
>> +                        ntohs(ip6hdr->payload_len),
>> +                        IPPROTO_ICMPV6,
>> +                        csum_partial(icmp6h,
>> +                                 ntohs(ip6hdr->payload_len), 0));
>> +        }
>> +        ndoptlen -= l;
>> +        nd_opt = ((void *)nd_opt) + l;
>> +    }
>> +}
>> +
>> +static u8 *alb_get_lladdr(struct sk_buff *skb)
>> +{
>> +    struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
>> +    struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)msg->opt;
>> +    struct net_device *dev = skb->dev;
>> +    u8 *lladdr = NULL;
>> +    u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
>> +                offsetof(struct nd_msg, opt));
>> +
>> +    while (ndoptlen) {
>> +        int l;
>> +
>> +        switch (nd_opt->nd_opt_type) {
>> +        case ND_OPT_SOURCE_LL_ADDR:
>> +        case ND_OPT_TARGET_LL_ADDR:
>> +            lladdr = ndisc_opt_addr_data(nd_opt, dev);
>> +            break;
>> +
>> +        default:
>> +            break;
>> +        }
>> +
>> +        l = nd_opt->nd_opt_len << 3;
>> +
>> +        if (ndoptlen < l || l == 0)
>> +            return lladdr;
>
>                          return NULL ?
>
>                     (or risk out-of-bound access ?)
Thanks your comment, I'll adjust it and send out V4 soon.
>
>> +
>> +        if (lladdr)
>> +            return lladdr;
>> +
>> +        ndoptlen -= l;
>> +        nd_opt = ((void *)nd_opt) + l;
>> +    }
>> +
>> +    return lladdr;
>> +}
>> +
>> +static void alb_set_nd_option(struct sk_buff *skb, struct bonding 
>> *bond,
>> +                  struct slave *tx_slave)
>> +{
>> +    struct ipv6hdr *ip6hdr;
>> +    struct icmp6hdr *hdr = NULL;
>> +
>> +    if (skb->protocol == htons(ETH_P_IPV6)) {
>> +        if (tx_slave && tx_slave !=
>> +            rcu_access_pointer(bond->curr_active_slave)) {
>> +            ip6hdr = ipv6_hdr(skb);
>> +            if (ip6hdr->nexthdr == IPPROTO_ICMPV6) {
>> +                hdr = icmp6_hdr(skb);
>> +                if (alb_determine_nd(hdr))
>> +                    alb_change_nd_option(skb, tx_slave->dev->dev_addr);
>> +            }
>> +        }
>> +    }
>> +}
>> +
>>   /************************ exported alb functions 
>> ************************/
>>     int bond_alb_initialize(struct bonding *bond, int rlb_enabled)
>> @@ -1415,6 +1529,7 @@ struct slave *bond_xmit_alb_slave_get(struct 
>> bonding *bond,
>>       }
>>       case ETH_P_IPV6: {
>>           const struct ipv6hdr *ip6hdr;
>> +        struct icmp6hdr *hdr = NULL;
>>             /* IPv6 doesn't really use broadcast mac address, but leave
>>            * that here just in case.
>> @@ -1446,6 +1561,21 @@ struct slave *bond_xmit_alb_slave_get(struct 
>> bonding *bond,
>>               break;
>>           }
>>   +        if (ip6hdr->nexthdr == IPPROTO_ICMPV6) {
>> +            hdr = icmp6_hdr(skb);
>> +            if (alb_determine_nd(hdr)) {
>> +                u8 *lladdr = NULL;
>> +
>> +                lladdr = alb_get_lladdr(skb);
>> +                if (lladdr) {
>> +                    if (!bond_slave_has_mac_rx(bond, lladdr)) {
>> +                        do_tx_balance = false;
>> +                        break;
>> +                    }
>> +                }
>> +            }
>> +        }
>> +
>>           hash_start = (char *)&ip6hdr->daddr;
>>           hash_size = sizeof(ip6hdr->daddr);
>>           break;
>> @@ -1489,6 +1619,7 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, 
>> struct net_device *bond_dev)
>>       struct slave *tx_slave = NULL;
>>         tx_slave = bond_xmit_alb_slave_get(bond, skb);
>> +    alb_set_nd_option(skb, bond, tx_slave);
>>       return bond_do_alb_xmit(skb, bond, tx_slave);
>>   }

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ