[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20100308180603.GC23634@hmsreliant.think-freely.org>
Date: Mon, 8 Mar 2010 13:06:03 -0500
From: Neil Horman <nhorman@...driver.com>
To: Eric Dumazet <eric.dumazet@...il.com>
Cc: David Miller <davem@...emloft.net>,
netdev <netdev@...r.kernel.org>,
Paweł Staszewski <pstaszewski@...are.pl>
Subject: Re: [PATCH] net: fix route cache rebuilds
On Mon, Mar 08, 2010 at 02:20:00PM +0100, Eric Dumazet wrote:
> We added an automatic route cache rebuilding in commit 1080d709fb9d8cd43
> but had to correct few bugs. One of the assumption of original patch,
> was that entries where kept sorted in a given way.
>
> This assumption is known to be wrong (commit 1ddbcb005c395518 gave an
> explanation of this and corrected a leak) and expensive to respect.
>
> Paweł Staszewski reported to me one of his machine got its routing cache
> disabled after few messages like :
>
> [ 2677.850065] Route hash chain too long!
> [ 2677.850080] Adjust your secret_interval!
> [82839.662993] Route hash chain too long!
> [82839.662996] Adjust your secret_interval!
> [155843.731650] Route hash chain too long!
> [155843.731664] Adjust your secret_interval!
> [155843.811881] Route hash chain too long!
> [155843.811891] Adjust your secret_interval!
> [155843.858209] vlan0811: 5 rebuilds is over limit, route caching
> disabled
> [155843.858212] Route hash chain too long!
> [155843.858213] Adjust your secret_interval!
>
> This is because rt_intern_hash() might be fooled when computing a chain
> length, because multiple entries with same keys can differ because of
> TOS (or mark/oif) bits.
>
> In the rare case the fast algorithm see a too long chain, and before
> taking expensive path, we call a helper function in order to not count
> duplicates of same routes, that only differ with tos/mark/oif bits. This
> helper works with data already in cpu cache and is not be very
> expensive, despite its O(N^2) implementation.
>
> Paweł Staszewski sucessfully tested this patch on his loaded router.
>
> Reported-and-tested-by: Paweł Staszewski <pstaszewski@...are.pl>
> Signed-off-by: Eric Dumazet <eric.dumazet@...il.com>
> ---
> net/ipv4/route.c | 50 ++++++++++++++++++++++++++++++++++-----------
> 1 file changed, 38 insertions(+), 12 deletions(-)
>
This looks pretty reasonable to me, Thanks Eric!
Acked-by: Neil Horman <nhorman@...driver.com>
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index b2ba558..d9b4024 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -146,7 +146,6 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
> static void ipv4_link_failure(struct sk_buff *skb);
> static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
> static int rt_garbage_collect(struct dst_ops *ops);
> -static void rt_emergency_hash_rebuild(struct net *net);
>
>
> static struct dst_ops ipv4_dst_ops = {
> @@ -780,11 +779,30 @@ static void rt_do_flush(int process_context)
> #define FRACT_BITS 3
> #define ONE (1UL << FRACT_BITS)
>
> +/*
> + * Given a hash chain and an item in this hash chain,
> + * find if a previous entry has the same hash_inputs
> + * (but differs on tos, mark or oif)
> + * Returns 0 if an alias is found.
> + * Returns ONE if rth has no alias before itself.
> + */
> +static int has_noalias(const struct rtable *head, const struct rtable *rth)
> +{
> + const struct rtable *aux = head;
> +
> + while (aux != rth) {
> + if (compare_hash_inputs(&aux->fl, &rth->fl))
> + return 0;
> + aux = aux->u.dst.rt_next;
> + }
> + return ONE;
> +}
> +
> static void rt_check_expire(void)
> {
> static unsigned int rover;
> unsigned int i = rover, goal;
> - struct rtable *rth, *aux, **rthp;
> + struct rtable *rth, **rthp;
> unsigned long samples = 0;
> unsigned long sum = 0, sum2 = 0;
> unsigned long delta;
> @@ -835,15 +853,7 @@ nofree:
> * attributes don't unfairly skew
> * the length computation
> */
> - for (aux = rt_hash_table[i].chain;;) {
> - if (aux == rth) {
> - length += ONE;
> - break;
> - }
> - if (compare_hash_inputs(&aux->fl, &rth->fl))
> - break;
> - aux = aux->u.dst.rt_next;
> - }
> + length += has_noalias(rt_hash_table[i].chain, rth);
> continue;
> }
> } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
> @@ -1073,6 +1083,21 @@ work_done:
> out: return 0;
> }
>
> +/*
> + * Returns number of entries in a hash chain that have different hash_inputs
> + */
> +static int slow_chain_length(const struct rtable *head)
> +{
> + int length = 0;
> + const struct rtable *rth = head;
> +
> + while (rth) {
> + length += has_noalias(head, rth);
> + rth = rth->u.dst.rt_next;
> + }
> + return length >> FRACT_BITS;
> +}
> +
> static int rt_intern_hash(unsigned hash, struct rtable *rt,
> struct rtable **rp, struct sk_buff *skb)
> {
> @@ -1185,7 +1210,8 @@ restart:
> rt_free(cand);
> }
> } else {
> - if (chain_length > rt_chain_length_max) {
> + if (chain_length > rt_chain_length_max &&
> + slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) {
> struct net *net = dev_net(rt->u.dst.dev);
> int num = ++net->ipv4.current_rt_cache_rebuild_count;
> if (!rt_caching(dev_net(rt->u.dst.dev))) {
>
>
>
>
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists