[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <b1f5fdb2-70a7-1391-19ba-5cdd6e0023dc@cumulusnetworks.com>
Date: Tue, 26 May 2020 18:09:13 +0300
From: Nikolay Aleksandrov <nikolay@...ulusnetworks.com>
To: David Ahern <dsahern@...nel.org>, netdev@...r.kernel.org
Cc: davem@...emloft.net, kuba@...nel.org,
David Ahern <dsahern@...il.com>
Subject: Re: [PATCH net 4/5] ipv4: Refactor nhc evaluation in fib_table_lookup
On 26/05/2020 18:01, David Ahern wrote:
> From: David Ahern <dsahern@...il.com>
>
> FIB lookups can return an entry that references an external nexthop.
> While walking the nexthop struct we do not want to make multiple calls
> into the nexthop code which can result in 2 different structs getting
> accessed - one returning the number of paths the rest of the loop
> seeing a different nh_grp struct. If the nexthop group shrunk, the
> result is an attempt to access a fib_nh_common that does not exist for
> the new nh_grp struct but did for the old one.
>
> To fix that move the device evaluation code to a helper that can be
> used for inline fib_nh path as well as external nexthops.
>
> Update the existing check for fi->nh in fib_table_lookup to call a
> new helper, nexthop_get_nhc_lookup, which walks the external nexthop
> with a single rcu dereference.
>
> Fixes: 430a049190de ("nexthop: Add support for nexthop groups")
> Signed-off-by: David Ahern <dsahern@...il.com>
> ---
> include/net/ip_fib.h | 2 ++
> include/net/nexthop.h | 33 ++++++++++++++++++++++++++++
> net/ipv4/fib_trie.c | 51 ++++++++++++++++++++++++++++++-------------
> 3 files changed, 71 insertions(+), 15 deletions(-)
>
Acked-by: Nikolay Aleksandrov <nikolay@...ulusnetworks.com>
> diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
> index 59e0d4e99f94..1f1dd22980e4 100644
> --- a/include/net/ip_fib.h
> +++ b/include/net/ip_fib.h
> @@ -480,6 +480,8 @@ void fib_nh_common_release(struct fib_nh_common *nhc);
> void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri);
> void fib_trie_init(void);
> struct fib_table *fib_trie_table(u32 id, struct fib_table *alias);
> +bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags,
> + const struct flowi4 *flp);
>
> static inline void fib_combine_itag(u32 *itag, const struct fib_result *res)
> {
> diff --git a/include/net/nexthop.h b/include/net/nexthop.h
> index f09e8d7d9886..9414ae46fc1c 100644
> --- a/include/net/nexthop.h
> +++ b/include/net/nexthop.h
> @@ -233,6 +233,39 @@ struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel)
> return &nhi->fib_nhc;
> }
>
> +/* called from fib_table_lookup with rcu_lock */
> +static inline
> +struct fib_nh_common *nexthop_get_nhc_lookup(const struct nexthop *nh,
> + int fib_flags,
> + const struct flowi4 *flp,
> + int *nhsel)
> +{
> + struct nh_info *nhi;
> +
> + if (nh->is_group) {
> + struct nh_group *nhg = rcu_dereference(nh->nh_grp);
> + int i;
> +
> + for (i = 0; i < nhg->num_nh; i++) {
> + struct nexthop *nhe = nhg->nh_entries[i].nh;
> +
> + nhi = rcu_dereference(nhe->nh_info);
> + if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) {
> + *nhsel = i;
> + return &nhi->fib_nhc;
> + }
> + }
> + } else {
> + nhi = rcu_dereference(nh->nh_info);
> + if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) {
> + *nhsel = 0;
> + return &nhi->fib_nhc;
> + }
> + }
> +
> + return NULL;
> +}
> +
> static inline unsigned int fib_info_num_path(const struct fib_info *fi)
> {
> if (unlikely(fi->nh))
> diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
> index 4f334b425538..248f1c1959a6 100644
> --- a/net/ipv4/fib_trie.c
> +++ b/net/ipv4/fib_trie.c
> @@ -1371,6 +1371,26 @@ static inline t_key prefix_mismatch(t_key key, struct key_vector *n)
> return (key ^ prefix) & (prefix | -prefix);
> }
>
> +bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags,
> + const struct flowi4 *flp)
> +{
> + if (nhc->nhc_flags & RTNH_F_DEAD)
> + return false;
> +
> + if (ip_ignore_linkdown(nhc->nhc_dev) &&
> + nhc->nhc_flags & RTNH_F_LINKDOWN &&
> + !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
> + return false;
> +
> + if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) {
> + if (flp->flowi4_oif &&
> + flp->flowi4_oif != nhc->nhc_oif)
> + return false;
> + }
> +
> + return true;
> +}
> +
> /* should be called with rcu_read_lock */
> int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
> struct fib_result *res, int fib_flags)
> @@ -1503,6 +1523,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
> /* Step 3: Process the leaf, if that fails fall back to backtracing */
> hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
> struct fib_info *fi = fa->fa_info;
> + struct fib_nh_common *nhc;
> int nhsel, err;
>
> if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) {
> @@ -1528,26 +1549,25 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
> if (fi->fib_flags & RTNH_F_DEAD)
> continue;
>
> - if (unlikely(fi->nh && nexthop_is_blackhole(fi->nh))) {
> - err = fib_props[RTN_BLACKHOLE].error;
> - goto out_reject;
> + if (unlikely(fi->nh)) {
> + if (nexthop_is_blackhole(fi->nh)) {
> + err = fib_props[RTN_BLACKHOLE].error;
> + goto out_reject;
> + }
> +
> + nhc = nexthop_get_nhc_lookup(fi->nh, fib_flags, flp,
> + &nhsel);
> + if (nhc)
> + goto set_result;
> + goto miss;
> }
>
> for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) {
> - struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel);
> + nhc = fib_info_nhc(fi, nhsel);
>
> - if (nhc->nhc_flags & RTNH_F_DEAD)
> + if (!fib_lookup_good_nhc(nhc, fib_flags, flp))
> continue;
> - if (ip_ignore_linkdown(nhc->nhc_dev) &&
> - nhc->nhc_flags & RTNH_F_LINKDOWN &&
> - !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
> - continue;
> - if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) {
> - if (flp->flowi4_oif &&
> - flp->flowi4_oif != nhc->nhc_oif)
> - continue;
> - }
> -
> +set_result:
> if (!(fib_flags & FIB_LOOKUP_NOREF))
> refcount_inc(&fi->fib_clntref);
>
> @@ -1568,6 +1588,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
> return err;
> }
> }
> +miss:
> #ifdef CONFIG_IP_FIB_TRIE_STATS
> this_cpu_inc(stats->semantic_match_miss);
> #endif
>
Powered by blists - more mailing lists