[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAOJe8K0+duRrj19OoPjdoX725Jwh4T=im=_aCp+N8UTNudp=3w@mail.gmail.com>
Date: Wed, 23 Sep 2015 08:43:49 +0300
From: Denis Kirjanov <kda@...ux-powerpc.org>
To: Christophe Leroy <christophe.leroy@....fr>
Cc: Benjamin Herrenschmidt <benh@...nel.crashing.org>,
Paul Mackerras <paulus@...ba.org>,
Michael Ellerman <mpe@...erman.id.au>, scottwood@...escale.com,
linux-kernel@...r.kernel.org, linuxppc-dev@...ts.ozlabs.org,
netdev@...r.kernel.org
Subject: Re: [PATCH 4/9] powerpc: inline ip_fast_csum()
On 9/22/15, Christophe Leroy <christophe.leroy@....fr> wrote:
> In several architectures, ip_fast_csum() is inlined
> There are functions like ip_send_check() which do nothing
> much more than calling ip_fast_csum().
> Inlining ip_fast_csum() allows the compiler to optimise better
Hi Christophe,
I did try it and see no difference on ppc64. Did you test with socklib
with modified loopback and if so do you have any numbers?
>
> Suggested-by: Eric Dumazet <eric.dumazet@...il.com>
> Signed-off-by: Christophe Leroy <christophe.leroy@....fr>
> ---
> arch/powerpc/include/asm/checksum.h | 46
> +++++++++++++++++++++++++++++++------
> arch/powerpc/lib/checksum_32.S | 21 -----------------
> arch/powerpc/lib/checksum_64.S | 27 ----------------------
> arch/powerpc/lib/ppc_ksyms.c | 1 -
> 4 files changed, 39 insertions(+), 56 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/checksum.h
> b/arch/powerpc/include/asm/checksum.h
> index afa6722..56deea8 100644
> --- a/arch/powerpc/include/asm/checksum.h
> +++ b/arch/powerpc/include/asm/checksum.h
> @@ -9,16 +9,9 @@
> * 2 of the License, or (at your option) any later version.
> */
>
> -/*
> - * This is a version of ip_compute_csum() optimized for IP headers,
> - * which always checksum on 4 octet boundaries. ihl is the number
> - * of 32-bit words and is always >= 5.
> - */
> #ifdef CONFIG_GENERIC_CSUM
> #include <asm-generic/checksum.h>
> #else
> -extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
> -
> /*
> * computes the checksum of a memory block at buff, length len,
> * and adds in "sum" (32-bit)
> @@ -137,6 +130,45 @@ static inline __wsum csum_add(__wsum csum, __wsum
> addend)
> #endif
> }
>
> +/*
> + * This is a version of ip_compute_csum() optimized for IP headers,
> + * which always checksum on 4 octet boundaries. ihl is the number
> + * of 32-bit words and is always >= 5.
> + */
> +static inline __wsum ip_fast_csum_nofold(const void *iph, unsigned int
> ihl)
> +{
> + u32 *ptr = (u32 *)iph + 1;
> +#ifdef __powerpc64__
> + unsigned int i;
> + u64 s = *(__force u32 *)iph;
> +
> + for (i = 0; i < ihl - 1; i++, ptr++)
> + s += *ptr;
> + s += (s >> 32);
> + return (__force __wsum)s;
> +
> +#else
> + __wsum sum, tmp;
> +
> + asm("mtctr %3;"
> + "addc %0,%4,%5;"
> + "1:lwzu %1, 4(%2);"
> + "adde %0,%0,%1;"
> + "bdnz 1b;"
> + "addze %0,%0;"
> + : "=r"(sum), "=r"(tmp), "+b"(ptr)
> + : "r"(ihl - 2), "r"(*(u32 *)iph), "r"(*ptr)
> + : "ctr", "xer", "memory");
> +
> + return sum;
> +#endif
> +}
> +
> +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
> +{
> + return csum_fold(ip_fast_csum_nofold(iph, ihl));
> +}
> +
> #endif
> #endif /* __KERNEL__ */
> #endif
> diff --git a/arch/powerpc/lib/checksum_32.S
> b/arch/powerpc/lib/checksum_32.S
> index 6d67e05..0d7eba3 100644
> --- a/arch/powerpc/lib/checksum_32.S
> +++ b/arch/powerpc/lib/checksum_32.S
> @@ -20,27 +20,6 @@
> .text
>
> /*
> - * ip_fast_csum(buf, len) -- Optimized for IP header
> - * len is in words and is always >= 5.
> - */
> -_GLOBAL(ip_fast_csum)
> - lwz r0,0(r3)
> - lwzu r5,4(r3)
> - addic. r4,r4,-2
> - addc r0,r0,r5
> - mtctr r4
> - blelr-
> -1: lwzu r4,4(r3)
> - adde r0,r0,r4
> - bdnz 1b
> - addze r0,r0 /* add in final carry */
> - rlwinm r3,r0,16,0,31 /* fold two halves together */
> - add r3,r0,r3
> - not r3,r3
> - srwi r3,r3,16
> - blr
> -
> -/*
> * computes the checksum of a memory block at buff, length len,
> * and adds in "sum" (32-bit)
> *
> diff --git a/arch/powerpc/lib/checksum_64.S
> b/arch/powerpc/lib/checksum_64.S
> index f3ef354..f53f4ab 100644
> --- a/arch/powerpc/lib/checksum_64.S
> +++ b/arch/powerpc/lib/checksum_64.S
> @@ -18,33 +18,6 @@
> #include <asm/ppc_asm.h>
>
> /*
> - * ip_fast_csum(r3=buf, r4=len) -- Optimized for IP header
> - * len is in words and is always >= 5.
> - *
> - * In practice len == 5, but this is not guaranteed. So this code does
> not
> - * attempt to use doubleword instructions.
> - */
> -_GLOBAL(ip_fast_csum)
> - lwz r0,0(r3)
> - lwzu r5,4(r3)
> - addic. r4,r4,-2
> - addc r0,r0,r5
> - mtctr r4
> - blelr-
> -1: lwzu r4,4(r3)
> - adde r0,r0,r4
> - bdnz 1b
> - addze r0,r0 /* add in final carry */
> - rldicl r4,r0,32,0 /* fold two 32-bit halves together */
> - add r0,r0,r4
> - srdi r0,r0,32
> - rlwinm r3,r0,16,0,31 /* fold two halves together */
> - add r3,r0,r3
> - not r3,r3
> - srwi r3,r3,16
> - blr
> -
> -/*
> * Computes the checksum of a memory block at buff, length len,
> * and adds in "sum" (32-bit).
> *
> diff --git a/arch/powerpc/lib/ppc_ksyms.c b/arch/powerpc/lib/ppc_ksyms.c
> index f5e427e..8cd5c0b 100644
> --- a/arch/powerpc/lib/ppc_ksyms.c
> +++ b/arch/powerpc/lib/ppc_ksyms.c
> @@ -19,7 +19,6 @@ EXPORT_SYMBOL(strncmp);
> #ifndef CONFIG_GENERIC_CSUM
> EXPORT_SYMBOL(csum_partial);
> EXPORT_SYMBOL(csum_partial_copy_generic);
> -EXPORT_SYMBOL(ip_fast_csum);
> #endif
>
> EXPORT_SYMBOL(__copy_tofrom_user);
> --
> 2.1.0
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists