[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <dad92c6d-d5b2-38a9-a8ad-e36a6a987a79@gmail.com>
Date: Tue, 14 Dec 2021 00:05:20 -0800
From: Eric Dumazet <eric.dumazet@...il.com>
To: Sun Shouxin <sunshouxin@...natelecom.cn>, j.vosburgh@...il.com,
vfalico@...il.com, andy@...yhouse.net, davem@...emloft.net,
kuba@...nel.org
Cc: netdev@...r.kernel.org, linux-kernel@...r.kernel.org,
huyd12@...natelecom.cn
Subject: Re: [PATCH V2] net: bonding: Add support for IPV6 ns/na
On 12/10/21 5:08 AM, Sun Shouxin wrote:
> Since ipv6 neighbor solicitation and advertisement messages
> isn't handled gracefully in bonding6 driver, we can see packet
> drop due to inconsistency bewteen mac address in the option
> message and source MAC .
>
> Another examples is ipv6 neighbor solicitation and advertisement
> messages from VM via tap attached to host brighe, the src mac
> mighe be changed through balance-alb mode, but it is not synced
> with Link-layer address in the option message.
>
> The patch implements bond6's tx handle for ipv6 neighbor
> solicitation and advertisement messages.
>
> Border-Leaf
> / \
> / \
> Tunnel1 Tunnel2
> / \
> / \
> Leaf-1--Tunnel3--Leaf-2
> \ /
> \ /
> \ /
> \ /
> NIC1 NIC2
> \ /
> server
>
> We can see in our lab the Border-Leaf receives occasionally
> a NA packet which is assigned to NIC1 mac in ND/NS option
> message, but actaully send out via NIC2 mac due to tx-alb,
> as a result, it will cause inconsistency between MAC table
> and ND Table in Border-Leaf, i.e, NIC1 = Tunnel2 in ND table
> and NIC1 = Tunnel1 in mac table.
>
> And then, Border-Leaf starts to forward packet destinated
> to the Server, it will only check the ND table entry in some
> switch to encapsulate the destination MAC of the message as
> NIC1 MAC, and then send it out from Tunnel2 by ND table.
> Then, Leaf-2 receives the packet, it notices the destination
> MAC of message is NIC1 MAC and should forword it to Tunne1
> by Tunnel3.
>
> However, this traffic forward will be failure due to split
> horizon of VxLAN tunnels.
>
> Suggested-by: Hu Yadi <huyd12@...natelecom.cn>
> Signed-off-by: Sun Shouxin <sunshouxin@...natelecom.cn>
> ---
> drivers/net/bonding/bond_alb.c | 131 +++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 131 insertions(+)
>
> diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
> index 533e476..afa386b 100644
> --- a/drivers/net/bonding/bond_alb.c
> +++ b/drivers/net/bonding/bond_alb.c
> @@ -22,6 +22,7 @@
> #include <asm/byteorder.h>
> #include <net/bonding.h>
> #include <net/bond_alb.h>
> +#include <net/ndisc.h>
>
> static const u8 mac_v6_allmcast[ETH_ALEN + 2] __long_aligned = {
> 0x33, 0x33, 0x00, 0x00, 0x00, 0x01
> @@ -1269,6 +1270,119 @@ static int alb_set_mac_address(struct bonding *bond, void *addr)
> return res;
> }
>
> +/*determine if the packet is NA or NS*/
> +static bool alb_determine_nd(struct icmp6hdr *hdr)
> +{
> + if (hdr->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT ||
> + hdr->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) {
> + return true;
> + }
> +
> + return false;
> +}
> +
> +static void alb_change_nd_option(struct sk_buff *skb, void *data)
> +{
> + struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
> + struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)msg->opt;
> + struct net_device *dev = skb->dev;
> + struct icmp6hdr *icmp6h = icmp6_hdr(skb);
> + struct ipv6hdr *ip6hdr = ipv6_hdr(skb);
> + u8 *lladdr = NULL;
> + u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
> + offsetof(struct nd_msg, opt));
> +
> + while (ndoptlen) {
> + int l;
> +
> + switch (nd_opt->nd_opt_type) {
> + case ND_OPT_SOURCE_LL_ADDR:
> + case ND_OPT_TARGET_LL_ADDR:
> + lladdr = ndisc_opt_addr_data(nd_opt, dev);
> + break;
> +
> + default:
> + lladdr = NULL;
> + break;
> + }
> +
> + l = nd_opt->nd_opt_len << 3;
> +
> + if (ndoptlen < l || l == 0)
> + return;
> +
> + if (lladdr) {
> + memcpy(lladdr, data, dev->addr_len);
I am not sure it is allowed to change skb content without
making sure skb ->head is private.
(Think of tcpdump -i slaveX : we want to see the packet content before
your change)
I would think skb_cow_head() or something similar is needed.
This is tricky of course, since all cached pointers (icmp6h, ip6hdr,
msg, nd_opt)
would need to be fetched again, since skb->head/data might be changed
by skb_cow_head().
> + icmp6h->icmp6_cksum = 0;
> +
> + icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6hdr->saddr,
> + &ip6hdr->daddr,
> + ntohs(ip6hdr->payload_len),
> + IPPROTO_ICMPV6,
> + csum_partial(icmp6h,
> + ntohs(ip6hdr->payload_len), 0));
> + }
> + ndoptlen -= l;
> + nd_opt = ((void *)nd_opt) + l;
> + }
> +}
> +
> +static u8 *alb_get_lladdr(struct sk_buff *skb)
> +{
> + struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
> + struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)msg->opt;
> + struct net_device *dev = skb->dev;
> + u8 *lladdr = NULL;
> + u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
> + offsetof(struct nd_msg, opt));
> +
> + while (ndoptlen) {
> + int l;
> +
> + switch (nd_opt->nd_opt_type) {
> + case ND_OPT_SOURCE_LL_ADDR:
> + case ND_OPT_TARGET_LL_ADDR:
> + lladdr = ndisc_opt_addr_data(nd_opt, dev);
> + break;
> +
> + default:
> + break;
> + }
> +
> + l = nd_opt->nd_opt_len << 3;
> +
> + if (ndoptlen < l || l == 0)
> + return lladdr;
return NULL ?
(or risk out-of-bound access ?)
> +
> + if (lladdr)
> + return lladdr;
> +
> + ndoptlen -= l;
> + nd_opt = ((void *)nd_opt) + l;
> + }
> +
> + return lladdr;
> +}
> +
> +static void alb_set_nd_option(struct sk_buff *skb, struct bonding *bond,
> + struct slave *tx_slave)
> +{
> + struct ipv6hdr *ip6hdr;
> + struct icmp6hdr *hdr = NULL;
> +
> + if (skb->protocol == htons(ETH_P_IPV6)) {
> + if (tx_slave && tx_slave !=
> + rcu_access_pointer(bond->curr_active_slave)) {
> + ip6hdr = ipv6_hdr(skb);
> + if (ip6hdr->nexthdr == IPPROTO_ICMPV6) {
> + hdr = icmp6_hdr(skb);
> + if (alb_determine_nd(hdr))
> + alb_change_nd_option(skb, tx_slave->dev->dev_addr);
> + }
> + }
> + }
> +}
> +
> /************************ exported alb functions ************************/
>
> int bond_alb_initialize(struct bonding *bond, int rlb_enabled)
> @@ -1415,6 +1529,7 @@ struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
> }
> case ETH_P_IPV6: {
> const struct ipv6hdr *ip6hdr;
> + struct icmp6hdr *hdr = NULL;
>
> /* IPv6 doesn't really use broadcast mac address, but leave
> * that here just in case.
> @@ -1446,6 +1561,21 @@ struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
> break;
> }
>
> + if (ip6hdr->nexthdr == IPPROTO_ICMPV6) {
> + hdr = icmp6_hdr(skb);
> + if (alb_determine_nd(hdr)) {
> + u8 *lladdr = NULL;
> +
> + lladdr = alb_get_lladdr(skb);
> + if (lladdr) {
> + if (!bond_slave_has_mac_rx(bond, lladdr)) {
> + do_tx_balance = false;
> + break;
> + }
> + }
> + }
> + }
> +
> hash_start = (char *)&ip6hdr->daddr;
> hash_size = sizeof(ip6hdr->daddr);
> break;
> @@ -1489,6 +1619,7 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
> struct slave *tx_slave = NULL;
>
> tx_slave = bond_xmit_alb_slave_get(bond, skb);
> + alb_set_nd_option(skb, bond, tx_slave);
> return bond_do_alb_xmit(skb, bond, tx_slave);
> }
>
Powered by blists - more mailing lists