[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <2393b7ba-2f58-421d-ef9b-a6ccd3804907@solarflare.com>
Date: Mon, 18 Nov 2019 16:15:47 +0000
From: Edward Cree <ecree@...arflare.com>
To: Paolo Abeni <pabeni@...hat.com>, <netdev@...r.kernel.org>
CC: "David S. Miller" <davem@...emloft.net>,
Willem de Bruijn <willemdebruijn.kernel@...il.com>
Subject: Re: [PATCH net-next 2/2] ipv4: use dst hint for ipv4 list receive
On 16/11/2019 09:14, Paolo Abeni wrote:
> This is alike the previous change, with some additional ipv4 specific
> quirk. Even when using the route hint we still have to do perform
> additional per packet checks about source address validity: a new
> helper is added to wrap them.
>
> Moreover, the ipv4 route lookup, even in the absence of policy routing,
> may depend on pkts ToS, so we cache that values, too.
>
> Explicitly avoid hints for local broadcast: this simplify the code
> and broadcasts are slower path anyway.
>
> UDP flood performances vs recvmmsg() receiver:
>
> vanilla patched delta
> Kpps Kpps %
> 1683 1833 +8
>
> In the worst case scenario - each packet has a different
> destination address - the performance delta is within noise
> range.
>
> Signed-off-by: Paolo Abeni <pabeni@...hat.com>
> ---
> include/net/route.h | 11 +++++++++++
> net/ipv4/ip_input.c | 29 ++++++++++++++++++++++++-----
> net/ipv4/route.c | 38 ++++++++++++++++++++++++++++++++++++++
> 3 files changed, 73 insertions(+), 5 deletions(-)
>
> diff --git a/include/net/route.h b/include/net/route.h
> index 6c516840380d..f7a8a52318cd 100644
> --- a/include/net/route.h
> +++ b/include/net/route.h
> @@ -185,6 +185,17 @@ int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src,
> u8 tos, struct net_device *devin,
> struct fib_result *res);
>
> +struct ip_route_input_hint {
> + unsigned long refdst;
> + __be32 daddr;
> + char tos;
Why isn't this a u8?
> + bool local;
> +};
> +
> +int ip_route_use_hint(struct sk_buff *skb, __be32 dst, __be32 src,
> + u8 tos, struct net_device *devin,
> + struct ip_route_input_hint *hint);
> +
> static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
> u8 tos, struct net_device *devin)
> {
> diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
> index 24a95126e698..78fd60bf1c8a 100644
> --- a/net/ipv4/ip_input.c
> +++ b/net/ipv4/ip_input.c
> @@ -305,7 +305,8 @@ static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev)
> INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *));
> INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *));
> static int ip_rcv_finish_core(struct net *net, struct sock *sk,
> - struct sk_buff *skb, struct net_device *dev)
> + struct sk_buff *skb, struct net_device *dev,
> + struct ip_route_input_hint *hint)
> {
> const struct iphdr *iph = ip_hdr(skb);
> int (*edemux)(struct sk_buff *skb);
> @@ -335,8 +336,12 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
> * how the packet travels inside Linux networking.
> */
> if (!skb_valid_dst(skb)) {
> - err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
> - iph->tos, dev);
> + if (hint && hint->daddr == iph->daddr && hint->tos == iph->tos)
> + err = ip_route_use_hint(skb, iph->daddr, iph->saddr,
> + iph->tos, dev, hint);
> + else
> + err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
> + iph->tos, dev);
> if (unlikely(err))
> goto drop_error;
> }
> @@ -408,7 +413,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
> if (!skb)
> return NET_RX_SUCCESS;
>
> - ret = ip_rcv_finish_core(net, sk, skb, dev);
> + ret = ip_rcv_finish_core(net, sk, skb, dev, NULL);
> if (ret != NET_RX_DROP)
> ret = dst_input(skb);
> return ret;
> @@ -538,6 +543,7 @@ static void ip_sublist_rcv_finish(struct list_head *head)
> static void ip_list_rcv_finish(struct net *net, struct sock *sk,
> struct list_head *head)
> {
> + struct ip_route_input_hint _hint, *hint = NULL;
> struct dst_entry *curr_dst = NULL;
> struct sk_buff *skb, *next;
> struct list_head sublist;
> @@ -554,11 +560,24 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk,
> skb = l3mdev_ip_rcv(skb);
> if (!skb)
> continue;
> - if (ip_rcv_finish_core(net, sk, skb, dev) == NET_RX_DROP)
> + if (ip_rcv_finish_core(net, sk, skb, dev, hint) == NET_RX_DROP)
> continue;
>
> dst = skb_dst(skb);
> if (curr_dst != dst) {
> + struct rtable *rt = (struct rtable *)dst;
> +
> + if (!net->ipv4.fib_has_custom_rules &&
> + rt->rt_type != RTN_BROADCAST) {
> + _hint.refdst = skb->_skb_refdst;
> + _hint.daddr = ip_hdr(skb)->daddr;
> + _hint.tos = ip_hdr(skb)->tos;
> + _hint.local = rt->rt_type == RTN_LOCAL;
> + hint = &_hint;
> + } else {
> + hint = NULL;
> + }
Perhaps factor this block out into a function? Just because it's getting
deeply indented and giving it a name would make it more obvious what it's
for. hint = ipv4_extract_route_hint(skb, &_hint)?
> +
> /* dispatch old sublist */
> if (!list_empty(&sublist))
> ip_sublist_rcv_finish(&sublist);
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index dcc4fa10138d..b0ddff17db80 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -2019,6 +2019,44 @@ static int ip_mkroute_input(struct sk_buff *skb,
> return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
> }
>
> +/* Implements all the saddr-related checks as ip_route_input_slow(),
> + * assuming daddr is valid and this is not a local broadcast.
> + * Uses the provided hint instead of performing a route lookup.
> + */
> +int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
> + u8 tos, struct net_device *dev,
> + struct ip_route_input_hint *hint)
Mostly I like the idea of these patches, but it bugs me that this seems
to be reimplementing a little bit, and might get out of sync. Is it
possible to factor out the checks from ip_route_input_slow() and just
call them here?
Otherwise maybe stick something in the comment to ip_route_input_slow()
reminding to propagate changes to ip_route_use_hint()?
Or perhaps better still would be to come up with a single function that
always takes a hint, that may be NULL, in which case it performs normal
routing; and use that in all paths? (Plumbing the hint through from
ip_route_input_noref() etc.)
-Ed
> +{
> + struct in_device *in_dev = __in_dev_get_rcu(dev);
> + struct net *net = dev_net(dev);
> + int err = -EINVAL;
> + u32 itag = 0;
> +
> + if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
> + goto martian_source;
> +
> + if (ipv4_is_zeronet(saddr))
> + goto martian_source;
> +
> + if (ipv4_is_loopback(saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
> + goto martian_source;
> +
> + if (hint->local) {
> + err = fib_validate_source(skb, saddr, daddr, tos, 0, dev,
> + in_dev, &itag);
> + if (err < 0)
> + goto martian_source;
> + }
> +
> + err = 0;
> + __skb_dst_copy(skb, hint->refdst);
> + return err;
> +
> +martian_source:
> + ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
> + return err;
> +}
> +
> /*
> * NOTE. We drop all the packets that has local source
> * addresses, because every properly looped back packet
Powered by blists - more mailing lists