lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 18 Nov 2019 16:15:47 +0000
From:   Edward Cree <ecree@...arflare.com>
To:     Paolo Abeni <pabeni@...hat.com>, <netdev@...r.kernel.org>
CC:     "David S. Miller" <davem@...emloft.net>,
        Willem de Bruijn <willemdebruijn.kernel@...il.com>
Subject: Re: [PATCH net-next 2/2] ipv4: use dst hint for ipv4 list receive

On 16/11/2019 09:14, Paolo Abeni wrote:
> This is alike the previous change, with some additional ipv4 specific
> quirk. Even when using the route hint we still have to do perform
> additional per packet checks about source address validity: a new
> helper is added to wrap them.
>
> Moreover, the ipv4 route lookup, even in the absence of policy routing,
> may depend on pkts ToS, so we cache that values, too.
>
> Explicitly avoid hints for local broadcast: this simplify the code
> and broadcasts are slower path anyway.
>
> UDP flood performances vs recvmmsg() receiver:
>
> vanilla		patched		delta
> Kpps		Kpps		%
> 1683		1833		+8
>
> In the worst case scenario - each packet has a different
> destination address - the performance delta is within noise
> range.
>
> Signed-off-by: Paolo Abeni <pabeni@...hat.com>
> ---
>  include/net/route.h | 11 +++++++++++
>  net/ipv4/ip_input.c | 29 ++++++++++++++++++++++++-----
>  net/ipv4/route.c    | 38 ++++++++++++++++++++++++++++++++++++++
>  3 files changed, 73 insertions(+), 5 deletions(-)
>
> diff --git a/include/net/route.h b/include/net/route.h
> index 6c516840380d..f7a8a52318cd 100644
> --- a/include/net/route.h
> +++ b/include/net/route.h
> @@ -185,6 +185,17 @@ int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src,
>  		       u8 tos, struct net_device *devin,
>  		       struct fib_result *res);
>  
> +struct ip_route_input_hint {
> +	unsigned long	refdst;
> +	__be32		daddr;
> +	char		tos;
Why isn't this a u8?

> +	bool		local;
> +};
> +
> +int ip_route_use_hint(struct sk_buff *skb, __be32 dst, __be32 src,
> +		      u8 tos, struct net_device *devin,
> +		      struct ip_route_input_hint *hint);
> +
>  static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
>  				 u8 tos, struct net_device *devin)
>  {
> diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
> index 24a95126e698..78fd60bf1c8a 100644
> --- a/net/ipv4/ip_input.c
> +++ b/net/ipv4/ip_input.c
> @@ -305,7 +305,8 @@ static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev)
>  INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *));
>  INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *));
>  static int ip_rcv_finish_core(struct net *net, struct sock *sk,
> -			      struct sk_buff *skb, struct net_device *dev)
> +			      struct sk_buff *skb, struct net_device *dev,
> +			      struct ip_route_input_hint *hint)
>  {
>  	const struct iphdr *iph = ip_hdr(skb);
>  	int (*edemux)(struct sk_buff *skb);
> @@ -335,8 +336,12 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
>  	 *	how the packet travels inside Linux networking.
>  	 */
>  	if (!skb_valid_dst(skb)) {
> -		err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
> -					   iph->tos, dev);
> +		if (hint && hint->daddr == iph->daddr && hint->tos == iph->tos)
> +			err = ip_route_use_hint(skb, iph->daddr, iph->saddr,
> +						iph->tos, dev, hint);
> +		else
> +			err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
> +						   iph->tos, dev);
>  		if (unlikely(err))
>  			goto drop_error;
>  	}
> @@ -408,7 +413,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
>  	if (!skb)
>  		return NET_RX_SUCCESS;
>  
> -	ret = ip_rcv_finish_core(net, sk, skb, dev);
> +	ret = ip_rcv_finish_core(net, sk, skb, dev, NULL);
>  	if (ret != NET_RX_DROP)
>  		ret = dst_input(skb);
>  	return ret;
> @@ -538,6 +543,7 @@ static void ip_sublist_rcv_finish(struct list_head *head)
>  static void ip_list_rcv_finish(struct net *net, struct sock *sk,
>  			       struct list_head *head)
>  {
> +	struct ip_route_input_hint _hint, *hint = NULL;
>  	struct dst_entry *curr_dst = NULL;
>  	struct sk_buff *skb, *next;
>  	struct list_head sublist;
> @@ -554,11 +560,24 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk,
>  		skb = l3mdev_ip_rcv(skb);
>  		if (!skb)
>  			continue;
> -		if (ip_rcv_finish_core(net, sk, skb, dev) == NET_RX_DROP)
> +		if (ip_rcv_finish_core(net, sk, skb, dev, hint) == NET_RX_DROP)
>  			continue;
>  
>  		dst = skb_dst(skb);
>  		if (curr_dst != dst) {
> +			struct rtable *rt = (struct rtable *)dst;
> +
> +			if (!net->ipv4.fib_has_custom_rules &&
> +			    rt->rt_type != RTN_BROADCAST) {
> +				_hint.refdst = skb->_skb_refdst;
> +				_hint.daddr = ip_hdr(skb)->daddr;
> +				_hint.tos = ip_hdr(skb)->tos;
> +				_hint.local = rt->rt_type == RTN_LOCAL;
> +				hint = &_hint;
> +			} else {
> +				hint = NULL;
> +			}
Perhaps factor this block out into a function?  Just because it's getting
 deeply indented and giving it a name would make it more obvious what it's
 for.  hint = ipv4_extract_route_hint(skb, &_hint)?

> +
>  			/* dispatch old sublist */
>  			if (!list_empty(&sublist))
>  				ip_sublist_rcv_finish(&sublist);
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index dcc4fa10138d..b0ddff17db80 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -2019,6 +2019,44 @@ static int ip_mkroute_input(struct sk_buff *skb,
>  	return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
>  }
>  
> +/* Implements all the saddr-related checks as ip_route_input_slow(),
> + * assuming daddr is valid and this is not a local broadcast.
> + * Uses the provided hint instead of performing a route lookup.
> + */
> +int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
> +		      u8 tos, struct net_device *dev,
> +		      struct ip_route_input_hint *hint)
Mostly I like the idea of these patches, but it bugs me that this seems
 to be reimplementing a little bit, and might get out of sync.  Is it
 possible to factor out the checks from ip_route_input_slow() and just
 call them here?
Otherwise maybe stick something in the comment to ip_route_input_slow()
 reminding to propagate changes to ip_route_use_hint()?

Or perhaps better still would be to come up with a single function that
 always takes a hint, that may be NULL, in which case it performs normal
 routing; and use that in all paths?  (Plumbing the hint through from
 ip_route_input_noref() etc.)

-Ed

> +{
> +	struct in_device *in_dev = __in_dev_get_rcu(dev);
> +	struct net *net = dev_net(dev);
> +	int err = -EINVAL;
> +	u32 itag = 0;
> +
> +	if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
> +		goto martian_source;
> +
> +	if (ipv4_is_zeronet(saddr))
> +		goto martian_source;
> +
> +	if (ipv4_is_loopback(saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
> +		goto martian_source;
> +
> +	if (hint->local) {
> +		err = fib_validate_source(skb, saddr, daddr, tos, 0, dev,
> +					  in_dev, &itag);
> +		if (err < 0)
> +			goto martian_source;
> +	}
> +
> +	err = 0;
> +	__skb_dst_copy(skb, hint->refdst);
> +	return err;
> +
> +martian_source:
> +	ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
> +	return err;
> +}
> +
>  /*
>   *	NOTE. We drop all the packets that has local source
>   *	addresses, because every properly looped back packet

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ