[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <05765FB0-BF27-4648-B7B7-93490EB50C13@cumulusnetworks.com>
Date:   Tue, 14 Mar 2017 20:55:22 +0200
From:   Nikolay Aleksandrov <nikolay@...ulusnetworks.com>
To:     netdev@...r.kernel.org
Cc:     "David S. Miller" <davem@...emloft.net>,
        Roopa Prabhu <roopa@...ulusnetworks.com>,
        David Ahern <dsa@...ulusnetworks.com>, jkbs@...hat.com,
        edumazet@...gle.com, pch@...bogen.com
Subject: Re: [PATCH net-next v3] net: ipv4: add support for ECMP hash policy choice
> On Mar 14, 2017, at 5:36 PM, Nikolay Aleksandrov <nikolay@...ulusnetworks.com> wrote:
> 
> This patch adds support for ECMP hash policy choice via a new sysctl
> called fib_multipath_hash_policy and also adds support for L4 hashes.
> The current values for fib_multipath_hash_policy are:
> 0 - layer 3 (default)
> 1 - layer 4
> If there's an skb hash already set and it matches the chosen policy then it
> will be used instead of being calculated (currently only for L4).
> In L3 mode we always calculate the hash due to the ICMP error special
> case, the flow dissector's field consistentification should handle the
> address order thus we can remove the address reversals.
> 
> Signed-off-by: Nikolay Aleksandrov <nikolay@...ulusnetworks.com>
> ---
> v3:
> - keep the ICMP error special handling and always calc L3 hash
>   Jakub, could you please run your tests with this version ?
> 
> v2:
> - removed the output_key_hash as it's not needed anymore
> - reverted to my original/internal patch with L3 as default hash
> 
> Documentation/networking/ip-sysctl.txt |  8 +++
> include/net/ip_fib.h                   | 14 ++----
> include/net/netns/ipv4.h               |  1 +
> include/net/route.h                    |  9 +---
> net/ipv4/fib_semantics.c               | 11 ++--
> net/ipv4/icmp.c                        | 19 +------
> net/ipv4/route.c                       | 92 ++++++++++++++++++++++++++--------
> net/ipv4/sysctl_net_ipv4.c             |  9 ++++
> 8 files changed, 98 insertions(+), 65 deletions(-)
> 
[snip]
> /* To make ICMP packets follow the right flow, the multipath hash is
> - * calculated from the inner IP addresses in reverse order.
> + * calculated from the inner IP addresses.
>  */
> -static int ip_multipath_icmp_hash(struct sk_buff *skb)
> +static void ip_multipath_icmp_hash(const struct sk_buff *skb,
> +				   struct flowi4 *fl4)
> {
> 	const struct iphdr *outer_iph = ip_hdr(skb);
> 	struct icmphdr _icmph;
> @@ -1746,33 +1746,85 @@ static int ip_multipath_icmp_hash(struct sk_buff *skb)
> 	struct iphdr _inner_iph;
> 	const struct iphdr *inner_iph;
> 
> +	fl4->saddr = outer_iph->saddr;
> +	fl4->daddr = outer_iph->daddr;
> 	if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
> -		goto standard_hash;
> +		return;
> 
> 	icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
> 				   &_icmph);
> 	if (!icmph)
> -		goto standard_hash;
> +		return;
> 
> 	if (icmph->type != ICMP_DEST_UNREACH &&
> 	    icmph->type != ICMP_REDIRECT &&
> 	    icmph->type != ICMP_TIME_EXCEEDED &&
> -	    icmph->type != ICMP_PARAMETERPROB) {
> -		goto standard_hash;
> -	}
> +	    icmph->type != ICMP_PARAMETERPROB)
> +		return;
> 
> 	inner_iph = skb_header_pointer(skb,
> 				       outer_iph->ihl * 4 + sizeof(_icmph),
> 				       sizeof(_inner_iph), &_inner_iph);
> 	if (!inner_iph)
> -		goto standard_hash;
> +		return;
> +	fl4->saddr = inner_iph->saddr;
> +	fl4->daddr = inner_iph->daddr;
> +}
> 
> -	return fib_multipath_hash(inner_iph->daddr, inner_iph->saddr);
> +int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
> +		       const struct sk_buff *skb)
> +{
> +	struct net *net = fi->fib_net;
> +	struct flow_keys hash_keys;
> +	u32 mhash;
> 
> -standard_hash:
> -	return fib_multipath_hash(outer_iph->saddr, outer_iph->daddr);
> -}
> +	switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
> +	case 0:
> +		memset(&hash_keys, 0, sizeof(hash_keys));
> +		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
> +		if (skb && ip_hdr(skb)->protocol == IPPROTO_ICMP) {
> +			struct flowi4 _fl4;
> 
> +			ip_multipath_icmp_hash(skb, &_fl4);
Ugh, obviously I could’ve just passed hash_keys here, will  change this in v4 but I’ll wait
to see if there aren’t any other comments or issues.
Thanks,
 Nik
> +			hash_keys.addrs.v4addrs.src = _fl4.saddr;
> +			hash_keys.addrs.v4addrs.dst = _fl4.daddr;
> +		} else {
> +			hash_keys.addrs.v4addrs.src = fl4->saddr;
> +			hash_keys.addrs.v4addrs.dst = fl4->daddr;
> +		}
> +		break;
> +	case 1:
> +		/* skb is currently provided only when forwarding */
> +		if (skb) {
> +			unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
> +			struct flow_keys keys;
> +
> +			/* short-circuit if we already have L4 hash present */
> +			if (skb->l4_hash)
> +				return skb_get_hash_raw(skb) >> 1;
> +			memset(&hash_keys, 0, sizeof(hash_keys));
> +			skb_flow_dissect_flow_keys(skb, &keys, flag);
> +			hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
> +			hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
> +			hash_keys.ports.src = keys.ports.src;
> +			hash_keys.ports.dst = keys.ports.dst;
> +			hash_keys.basic.ip_proto = keys.basic.ip_proto;
> +		} else {
> +			memset(&hash_keys, 0, sizeof(hash_keys));
> +			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
> +			hash_keys.addrs.v4addrs.src = fl4->saddr;
> +			hash_keys.addrs.v4addrs.dst = fl4->daddr;
> +			hash_keys.ports.src = fl4->fl4_sport;
> +			hash_keys.ports.dst = fl4->fl4_dport;
> +			hash_keys.basic.ip_proto = fl4->flowi4_proto;
> +		}
> +		break;
> +	}
> +	mhash = flow_hash_from_keys(&hash_keys);
> +
> +	return mhash >> 1;
> +}
> +EXPORT_SYMBOL_GPL(fib_multipath_hash);
> #endif /* CONFIG_IP_ROUTE_MULTIPATH */
Powered by blists - more mailing lists
 
