[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <c6d67eb8-623e-9265-567c-3d5cc1de7477@gmail.com>
Date: Tue, 19 Nov 2019 09:34:22 -0800
From: Eric Dumazet <eric.dumazet@...il.com>
To: Paolo Abeni <pabeni@...hat.com>, netdev@...r.kernel.org
Cc: "David S. Miller" <davem@...emloft.net>,
Willem de Bruijn <willemdebruijn.kernel@...il.com>,
Edward Cree <ecree@...arflare.com>,
David Ahern <dsahern@...il.com>
Subject: Re: [PATCH net-next v3 1/2] ipv6: introduce and uses route look hints
for list input
On 11/19/19 6:38 AM, Paolo Abeni wrote:
> When doing RX batch packet processing, we currently always repeat
> the route lookup for each ingress packet. If policy routing is
> configured, and IPV6_SUBTREES is disabled at build time, we
> know that packets with the same destination address will use
> the same dst.
>
> This change tries to avoid per packet route lookup caching
> the destination address of the latest successful lookup, and
> reusing it for the next packet when the above conditions are
> in place. Ingress traffic for most servers should fit.
>
> The measured performance delta under UDP flood vs a recvmmsg
> receiver is as follow:
>
> vanilla patched delta
> Kpps Kpps %
> 1431 1674 +17
>
> In the worst-case scenario - each packet has a different
> destination address - the performance delta is within noise
> range.
>
> v2 -> v3:
> - add fib6_has_custom_rules() helpers (David A.)
> - add ip6_extract_route_hint() helper (Edward C.)
> - use hint directly in ip6_list_rcv_finish() (Willem)
>
> v1 -> v2:
> - fix build issue with !CONFIG_IPV6_MULTIPLE_TABLES
> - fix potential race when fib6_has_custom_rules is set
> while processing a packet batch
>
> Signed-off-by: Paolo Abeni <pabeni@...hat.com>
> ---
> include/net/ip6_fib.h | 9 +++++++++
> net/ipv6/ip6_input.c | 26 ++++++++++++++++++++++++--
> 2 files changed, 33 insertions(+), 2 deletions(-)
>
> diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
> index 5d1615463138..9ab60611b97b 100644
> --- a/include/net/ip6_fib.h
> +++ b/include/net/ip6_fib.h
> @@ -502,6 +502,11 @@ static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric)
> }
>
> #ifdef CONFIG_IPV6_MULTIPLE_TABLES
> +static inline bool fib6_has_custom_rules(struct net *net)
const struct net *net
> +{
> + return net->ipv6.fib6_has_custom_rules;
It would be nice to be able to detect that some custom rules only impact egress routes :/
> +}
> +
> int fib6_rules_init(void);
> void fib6_rules_cleanup(void);
> bool fib6_rule_default(const struct fib_rule *rule);
> @@ -527,6 +532,10 @@ static inline bool fib6_rules_early_flow_dissect(struct net *net,
> return true;
> }
> #else
> +static inline bool fib6_has_custom_rules(struct net *net)
const struct net *net
> +{
> + return 0;
return false;
BTW, this deserves a patch on its own :)
> +}
> static inline int fib6_rules_init(void)
> {
> return 0;
> diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
> index ef7f707d9ae3..792b52aa9fc9 100644
> --- a/net/ipv6/ip6_input.c
> +++ b/net/ipv6/ip6_input.c
> @@ -59,6 +59,7 @@ static void ip6_rcv_finish_core(struct net *net, struct sock *sk,
> INDIRECT_CALL_2(edemux, tcp_v6_early_demux,
> udp_v6_early_demux, skb);
> }
> +
Why adding a new line ? Please refrain adding noise to a patch.
> if (!skb_valid_dst(skb))
> ip6_route_input(skb);
> }
> @@ -86,11 +87,26 @@ static void ip6_sublist_rcv_finish(struct list_head *head)
> }
> }
>
> +static bool ip6_can_use_hint(struct sk_buff *skb, const struct sk_buff *hint)
> +{
> + return hint && !skb_dst(skb) &&
> + ipv6_addr_equal(&ipv6_hdr(hint)->daddr, &ipv6_hdr(skb)->daddr);
> +}
> +
Why keeping whole skb as the hint, since all you want is the ipv6_hdr(skb)->daddr ?
Remembering the pointer to daddr would avoid de-referencing many skb fields.
> +static struct sk_buff *ip6_extract_route_hint(struct net *net,
> + struct sk_buff *skb)
> +{
> + if (IS_ENABLED(IPV6_SUBTREES) || fib6_has_custom_rules(net))
> + return NULL;
> +
> + return skb;
> +}
> +
> static void ip6_list_rcv_finish(struct net *net, struct sock *sk,
> struct list_head *head)
> {
> + struct sk_buff *skb, *next, *hint = NULL;
> struct dst_entry *curr_dst = NULL;
> - struct sk_buff *skb, *next;
> struct list_head sublist;
>
> INIT_LIST_HEAD(&sublist);
> @@ -104,9 +120,15 @@ static void ip6_list_rcv_finish(struct net *net, struct sock *sk,
> skb = l3mdev_ip6_rcv(skb);
> if (!skb)
> continue;
> - ip6_rcv_finish_core(net, sk, skb);
> +
> + if (ip6_can_use_hint(skb, hint))
> + skb_dst_copy(skb, hint);
> + else
> + ip6_rcv_finish_core(net, sk, skb);
> dst = skb_dst(skb);
> if (curr_dst != dst) {
> + hint = ip6_extract_route_hint(net, skb);
> +
> /* dispatch old sublist */
> if (!list_empty(&sublist))
> ip6_sublist_rcv_finish(&sublist);
>
Powered by blists - more mailing lists