lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aXBlQJnsJ6PbqysE@devvm11784.nha0.facebook.com>
Date: Tue, 20 Jan 2026 21:33:52 -0800
From: Bobby Eshleman <bobbyeshleman@...il.com>
To: Jakub Kicinski <kuba@...nel.org>
Cc: "David S. Miller" <davem@...emloft.net>,
	Eric Dumazet <edumazet@...gle.com>, Paolo Abeni <pabeni@...hat.com>,
	Simon Horman <horms@...nel.org>,
	Kuniyuki Iwashima <kuniyu@...gle.com>,
	Willem de Bruijn <willemb@...gle.com>,
	Neal Cardwell <ncardwell@...gle.com>,
	David Ahern <dsahern@...nel.org>,
	Mina Almasry <almasrymina@...gle.com>,
	Arnd Bergmann <arnd@...db.de>, Jonathan Corbet <corbet@....net>,
	Andrew Lunn <andrew+netdev@...n.ch>, Shuah Khan <shuah@...nel.org>,
	Donald Hunter <donald.hunter@...il.com>,
	Stanislav Fomichev <sdf@...ichev.me>, netdev@...r.kernel.org,
	linux-kernel@...r.kernel.org, linux-arch@...r.kernel.org,
	linux-doc@...r.kernel.org, linux-kselftest@...r.kernel.org,
	asml.silence@...il.com, matttbe@...nel.org, skhawaja@...gle.com,
	Bobby Eshleman <bobbyeshleman@...a.com>
Subject: Re: [PATCH net-next v10 3/5] net: devmem: implement autorelease
 token management

On Tue, Jan 20, 2026 at 05:00:42PM -0800, Jakub Kicinski wrote:
> On Thu, 15 Jan 2026 21:02:14 -0800 Bobby Eshleman wrote:
> > diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
> > index 596c306ce52b..a5301b150663 100644
> > --- a/Documentation/netlink/specs/netdev.yaml
> > +++ b/Documentation/netlink/specs/netdev.yaml
> > @@ -562,6 +562,17 @@ attribute-sets:
> >          type: u32
> >          checks:
> >            min: 1
> > +      -
> > +        name: autorelease
> > +        doc: |
> > +          Token autorelease mode. If true (1), leaked tokens are automatically
> > +          released when the socket closes. If false (0), leaked tokens are only
> > +          released when the dmabuf is torn down. Once a binding is created with
> > +          a specific mode, all subsequent bindings system-wide must use the
> > +          same mode.
> > +
> > +          Optional. Defaults to false if not specified.
> > +        type: u8
> 
> if you plan to have more values - u32, if not - flag
> u8 is 8b value + 24b of padding, it's only useful for proto fields
> 
> >  operations:
> >    list:
> > @@ -769,6 +780,7 @@ operations:
> >              - ifindex
> >              - fd
> >              - queues
> > +            - autorelease
> >          reply:
> >            attributes:
> >              - id
> 
> >  static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1);
> > +static DEFINE_MUTEX(devmem_ar_lock);
> > +DEFINE_STATIC_KEY_FALSE(tcp_devmem_ar_key);
> > +EXPORT_SYMBOL(tcp_devmem_ar_key);
> 
> I don't think you need the export, perhaps move the helper in here in
> the first place (while keeping the static inline wrapper when devmem=n)?
> 
> > +	if (autorelease)
> > +		static_branch_enable(&tcp_devmem_ar_key);
> 
> This is user-controlled (non-root), right? So I think we need 
> the deferred version of key helpers. 
> 
> > -	if (direction == DMA_TO_DEVICE) {
> > -		binding->vec = kvmalloc_array(dmabuf->size / PAGE_SIZE,
> > -					      sizeof(struct net_iov *),
> > -					      GFP_KERNEL);
> > -		if (!binding->vec) {
> > -			err = -ENOMEM;
> > -			goto err_unmap;
> > -		}
> > +	binding->vec = kvmalloc_array(dmabuf->size / PAGE_SIZE,
> > +				      sizeof(struct net_iov *),
> > +				      GFP_KERNEL | __GFP_ZERO);
> 
> make it a kvcalloc() while we're touching it, pls
> 
> > +	if (!binding->vec) {
> > +		err = -ENOMEM;
> > +		goto err_unmap;
> >  	}
> >  
> >  	/* For simplicity we expect to make PAGE_SIZE allocations, but the
> > @@ -306,25 +386,41 @@ net_devmem_bind_dmabuf(struct net_device *dev,
> >  			niov = &owner->area.niovs[i];
> >  			niov->type = NET_IOV_DMABUF;
> >  			niov->owner = &owner->area;
> > +			atomic_set(&niov->uref, 0);
> 
> Isn't it zero'ed during alloc?
> 
> >  			page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov),
> >  						      net_devmem_get_dma_addr(niov));
> > -			if (direction == DMA_TO_DEVICE)
> > -				binding->vec[owner->area.base_virtual / PAGE_SIZE + i] = niov;
> > +			binding->vec[owner->area.base_virtual / PAGE_SIZE + i] = niov;
> >  		}
> >  
> >  		virtual += len;
> >  	}
> >  
> 
> > +	if (info->attrs[NETDEV_A_DMABUF_AUTORELEASE])
> > +		autorelease =
> > +			!!nla_get_u8(info->attrs[NETDEV_A_DMABUF_AUTORELEASE]);
> 
> nla_get_u8_default() 
> 
> >  	priv = genl_sk_priv_get(&netdev_nl_family, NETLINK_CB(skb).sk);
> >  	if (IS_ERR(priv))
> >  		return PTR_ERR(priv);
> 
> > +static noinline_for_stack int
> > +sock_devmem_dontneed_manual_release(struct sock *sk,
> > +				    struct dmabuf_token *tokens,
> > +				    unsigned int num_tokens)
> > +{
> > +	struct net_iov *niov;
> > +	unsigned int i, j;
> > +	netmem_ref netmem;
> > +	unsigned int token;
> > +	int num_frags = 0;
> > +	int ret = 0;
> > +
> > +	if (!sk->sk_devmem_info.binding)
> > +		return -EINVAL;
> > +
> > +	for (i = 0; i < num_tokens; i++) {
> > +		for (j = 0; j < tokens[i].token_count; j++) {
> > +			size_t size = sk->sk_devmem_info.binding->dmabuf->size;
> > +
> > +			token = tokens[i].token_start + j;
> > +			if (token >= size / PAGE_SIZE)
> > +				break;
> > +
> > +			if (++num_frags > MAX_DONTNEED_FRAGS)
> > +				return ret;
> > +
> > +			niov = sk->sk_devmem_info.binding->vec[token];
> > +			if (atomic_dec_and_test(&niov->uref)) {
> 
> Don't you need something like "atomic dec non zero and test" ?
> refcount has refcount_dec_not_one() 🤔️
> 

Good point, that would be better for sure.

> > +				netmem = net_iov_to_netmem(niov);
> > +				WARN_ON_ONCE(!napi_pp_put_page(netmem));
> > +			}
> > +			ret++;
> > +		}
> 
> >  frag_limit_reached:
> > -	xa_unlock_bh(&sk->sk_user_frags);
> > +	xa_unlock_bh(&sk->sk_devmem_info.frags);
> 
> may be worth separating the sk_devmem_info change out for clarity
> 
> >  	for (k = 0; k < netmem_num; k++)
> >  		WARN_ON_ONCE(!napi_pp_put_page(netmems[k]));
> 
> > @@ -2503,7 +2506,15 @@ void tcp_v4_destroy_sock(struct sock *sk)
> >  
> >  	tcp_release_user_frags(sk);
> >  
> > -	xa_destroy(&sk->sk_user_frags);
> > +	if (!net_devmem_autorelease_enabled() && sk->sk_devmem_info.binding) {
> > +		net_devmem_dmabuf_binding_user_put(sk->sk_devmem_info.binding);
> > +		net_devmem_dmabuf_binding_put(sk->sk_devmem_info.binding);
> > +		sk->sk_devmem_info.binding = NULL;
> > +		WARN_ONCE(!xa_empty(&sk->sk_devmem_info.frags),
> > +			  "non-empty xarray discovered in autorelease off mode");
> > +	}
> > +
> > +	xa_destroy(&sk->sk_devmem_info.frags);
> 
> Let's wrap this up in a helper that'll live in devmem.c

All of the above SGTM!

Thanks,
Bobby

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ