netdev - Re: [PATCH net-next V2 2/6] net/tls: Multi-threaded calls to TX tls_dev

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [day] [month] [year] [list]

Message-ID: <20220713201050.3aab0cb8@kernel.org>
Date:   Wed, 13 Jul 2022 20:10:50 -0700
From:   Jakub Kicinski <kuba@...nel.org>
To:     Tariq Toukan <tariqt@...dia.com>
Cc:     Boris Pismenny <borisp@...dia.com>,
        John Fastabend <john.fastabend@...il.com>,
        "David S . Miller" <davem@...emloft.net>,
        Eric Dumazet <edumazet@...gle.com>,
        Paolo Abeni <pabeni@...hat.com>, <netdev@...r.kernel.org>,
        Saeed Mahameed <saeedm@...dia.com>,
        Gal Pressman <galp@...dia.com>,
        Maxim Mikityanskiy <maximmi@...dia.com>
Subject: Re: [PATCH net-next V2 2/6] net/tls: Multi-threaded calls to TX
 tls_dev_del

On Wed, 13 Jul 2022 08:15:59 +0300 Tariq Toukan wrote:
> @@ -99,21 +85,17 @@ static void tls_device_queue_ctx_destruction(struct tls_context *ctx)
>  	bool async_cleanup;
>  
>  	spin_lock_irqsave(&tls_device_lock, flags);
> +	list_del(&ctx->list); /* Remove from tls_device_list / tls_device_down_list */
> +	spin_unlock_irqrestore(&tls_device_lock, flags);
> +
>  	async_cleanup = ctx->netdev && ctx->tx_conf == TLS_HW;
>  	if (async_cleanup) {
> -		list_move_tail(&ctx->list, &tls_device_gc_list);
> +		struct tls_offload_context_tx *offload_ctx = tls_offload_ctx_tx(ctx);
>  
> -		/* schedule_work inside the spinlock
> -		 * to make sure tls_device_down waits for that work.
> -		 */
> -		schedule_work(&tls_device_gc_work);
> +		queue_work(destruct_wq, &offload_ctx->destruct_work);

Doesn't queue_work() need to be under the tls_device_lock?
Otherwise I think there's a race between removing the context from 
the list and the netdev down notifier searching that list and flushing
the wq.

>  	} else {
> -		list_del(&ctx->list);
> -	}
> -	spin_unlock_irqrestore(&tls_device_lock, flags);
> -
> -	if (!async_cleanup)
>  		tls_device_free_ctx(ctx);
> +	}
>  }
>  
>  /* We assume that the socket is already connected */
> @@ -1150,6 +1132,9 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
>  	start_marker_record->len = 0;
>  	start_marker_record->num_frags = 0;
>  
> +	INIT_WORK(&offload_ctx->destruct_work, tls_device_tx_del_task);
> +	offload_ctx->ctx = ctx;
> +
>  	INIT_LIST_HEAD(&offload_ctx->records_list);
>  	list_add_tail(&start_marker_record->list, &offload_ctx->records_list);
>  	spin_lock_init(&offload_ctx->lock);
> @@ -1389,7 +1374,7 @@ static int tls_device_down(struct net_device *netdev)
>  
>  	up_write(&device_offload_lock);
>  
> -	flush_work(&tls_device_gc_work);
> +	flush_workqueue(destruct_wq);
>  
>  	return NOTIFY_DONE;
>  }
> @@ -1428,14 +1413,20 @@ static struct notifier_block tls_dev_notifier = {
>  	.notifier_call	= tls_dev_event,
>  };
>  
> -void __init tls_device_init(void)
> +int __init tls_device_init(void)
>  {
> +	destruct_wq = alloc_workqueue("ktls_device_destruct", 0, 0);
> +	if (!destruct_wq)
> +		return -ENOMEM;
> +
>  	register_netdevice_notifier(&tls_dev_notifier);

For a future cleanup - we should probably check for errors here.
Or perhaps we should take the fix via net? If you spin a quick
patch it can still make tomorrows net -> net-next merge.

> +	return 0;
>  }