[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5448A72D.1050806@cumulusnetworks.com>
Date: Thu, 23 Oct 2014 02:58:53 -0400
From: Jonathan Toppins <jtoppins@...ulusnetworks.com>
To: Eric Dumazet <eric.dumazet@...il.com>,
David Ahern <dsahern@...il.com>
CC: Crestez Dan Leonard <cdleonard@...il.com>, netdev@...r.kernel.org
Subject: Re: [RFC] tcp md5 use of alloc_percpu
On 10/23/14, 1:38 AM, Eric Dumazet wrote:
> On Wed, 2014-10-22 at 22:23 -0700, Eric Dumazet wrote:
>> On Wed, 2014-10-22 at 22:40 -0600, David Ahern wrote:
>>> On 10/22/14, 12:55 PM, Crestez Dan Leonard wrote:
>>>> Hello,
>>>>
>>>> It seems that the TCP MD5 feature allocates a percpu struct tcp_md5sig_pool and uses part of that memory for a scratch buffer to do crypto on. Here is the relevant code:
>>>
>>> This is a forward port of a local change to address the problem (local
>>> kernel version is 3.4 so perhaps my quick bump to top of tree is off but
>>> it shows the general idea). Been on my to-do list to figure out why this
>>> is needed, but it seems related to your problem:
>>>
>>> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
>>> index 1bec4e76d88c..833a676bd4b0 100644
>>> --- a/net/ipv4/tcp.c
>>> +++ b/net/ipv4/tcp.c
>>> @@ -2941,7 +2941,7 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
>>> local_bh_disable();
>>> p = ACCESS_ONCE(tcp_md5sig_pool);
>>> if (p)
>>> - return raw_cpu_ptr(p);
>>> + return __va(per_cpu_ptr_to_phys(raw_cpu_ptr(p)));
>>>
>>> local_bh_enable();
>>> return NULL;
>>
>> per_cpu_ptr_to_phys() can be pretty expensive and should not be called
>> in fast path.
>>
>
> My updated patch would be :
>
> net/ipv4/tcp.c | 66 +++++++++++++++++++----------------------------
> 1 file changed, 28 insertions(+), 38 deletions(-)
>
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index 1bec4e76d88c..af4dc16b61f6 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -2868,61 +2868,51 @@ EXPORT_SYMBOL(compat_tcp_getsockopt);
> #endif
>
> #ifdef CONFIG_TCP_MD5SIG
> -static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool __read_mostly;
> +static DEFINE_PER_CPU(struct tcp_md5sig_pool, *tcp_md5sig_pool);
> static DEFINE_MUTEX(tcp_md5sig_mutex);
> -
> -static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool)
> -{
> - int cpu;
> -
> - for_each_possible_cpu(cpu) {
> - struct tcp_md5sig_pool *p = per_cpu_ptr(pool, cpu);
> -
> - if (p->md5_desc.tfm)
> - crypto_free_hash(p->md5_desc.tfm);
> - }
> - free_percpu(pool);
> -}
> +static bool tcp_md5sig_pool_populated = false;
>
> static void __tcp_alloc_md5sig_pool(void)
> {
> int cpu;
> - struct tcp_md5sig_pool __percpu *pool;
> -
> - pool = alloc_percpu(struct tcp_md5sig_pool);
> - if (!pool)
> - return;
>
> for_each_possible_cpu(cpu) {
> + struct tcp_md5sig_pool *pool;
> struct crypto_hash *hash;
>
> - hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
> - if (IS_ERR_OR_NULL(hash))
> - goto out_free;
> -
> - per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash;
> + pool = per_cpu(tcp_md5sig_pool, cpu);
> + if (!pool) {
> + pool = kzalloc_node(sizeof(*pool), GFP_KERNEL,
GFP_DMA | GFP_KERNEL
This memory will possibly be used in a DMA correct? (thinking crypto
hardware offload)
> + cpu_to_node(cpu));
> + if (!pool)
> + return;
> + per_cpu(tcp_md5sig_pool, cpu) = pool;
> + }
> + if (!pool->md5_desc.tfm) {
> + hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
> + if (IS_ERR_OR_NULL(hash))
> + return;
> + pool->md5_desc.tfm = hash;
> + }
> }
> - /* before setting tcp_md5sig_pool, we must commit all writes
> - * to memory. See ACCESS_ONCE() in tcp_get_md5sig_pool()
> + /* before setting tcp_md5sig_pool_populated, we must commit all writes
> + * to memory. See smp_rmb() in tcp_get_md5sig_pool()
> */
> smp_wmb();
> - tcp_md5sig_pool = pool;
> - return;
> -out_free:
> - __tcp_free_md5sig_pool(pool);
> + tcp_md5sig_pool_populated = true;
> }
>
> bool tcp_alloc_md5sig_pool(void)
> {
> - if (unlikely(!tcp_md5sig_pool)) {
> + if (unlikely(!tcp_md5sig_pool_populated)) {
> mutex_lock(&tcp_md5sig_mutex);
>
> - if (!tcp_md5sig_pool)
> + if (!tcp_md5sig_pool_populated)
> __tcp_alloc_md5sig_pool();
>
> mutex_unlock(&tcp_md5sig_mutex);
> }
> - return tcp_md5sig_pool != NULL;
> + return tcp_md5sig_pool_populated;
> }
> EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
>
> @@ -2936,13 +2926,13 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
> */
> struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
> {
> - struct tcp_md5sig_pool __percpu *p;
> -
> local_bh_disable();
> - p = ACCESS_ONCE(tcp_md5sig_pool);
> - if (p)
> - return raw_cpu_ptr(p);
>
> + if (tcp_md5sig_pool_populated) {
> + /* coupled with smp_wmb() in __tcp_alloc_md5sig_pool */
> + smp_rmb();
> + return this_cpu_read(tcp_md5sig_pool);
> + }
> local_bh_enable();
> return NULL;
> }
>
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists