[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260120170042.43f038a2@kernel.org>
Date: Tue, 20 Jan 2026 17:00:42 -0800
From: Jakub Kicinski <kuba@...nel.org>
To: Bobby Eshleman <bobbyeshleman@...il.com>
Cc: "David S. Miller" <davem@...emloft.net>, Eric Dumazet
<edumazet@...gle.com>, Paolo Abeni <pabeni@...hat.com>, Simon Horman
<horms@...nel.org>, Kuniyuki Iwashima <kuniyu@...gle.com>, Willem de Bruijn
<willemb@...gle.com>, Neal Cardwell <ncardwell@...gle.com>, David Ahern
<dsahern@...nel.org>, Mina Almasry <almasrymina@...gle.com>, Arnd Bergmann
<arnd@...db.de>, Jonathan Corbet <corbet@....net>, Andrew Lunn
<andrew+netdev@...n.ch>, Shuah Khan <shuah@...nel.org>, Donald Hunter
<donald.hunter@...il.com>, Stanislav Fomichev <sdf@...ichev.me>,
netdev@...r.kernel.org, linux-kernel@...r.kernel.org,
linux-arch@...r.kernel.org, linux-doc@...r.kernel.org,
linux-kselftest@...r.kernel.org, asml.silence@...il.com,
matttbe@...nel.org, skhawaja@...gle.com, Bobby Eshleman
<bobbyeshleman@...a.com>
Subject: Re: [PATCH net-next v10 3/5] net: devmem: implement autorelease
token management
On Thu, 15 Jan 2026 21:02:14 -0800 Bobby Eshleman wrote:
> diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
> index 596c306ce52b..a5301b150663 100644
> --- a/Documentation/netlink/specs/netdev.yaml
> +++ b/Documentation/netlink/specs/netdev.yaml
> @@ -562,6 +562,17 @@ attribute-sets:
> type: u32
> checks:
> min: 1
> + -
> + name: autorelease
> + doc: |
> + Token autorelease mode. If true (1), leaked tokens are automatically
> + released when the socket closes. If false (0), leaked tokens are only
> + released when the dmabuf is torn down. Once a binding is created with
> + a specific mode, all subsequent bindings system-wide must use the
> + same mode.
> +
> + Optional. Defaults to false if not specified.
> + type: u8
if you plan to have more values - u32, if not - flag
u8 is 8b value + 24b of padding, it's only useful for proto fields
> operations:
> list:
> @@ -769,6 +780,7 @@ operations:
> - ifindex
> - fd
> - queues
> + - autorelease
> reply:
> attributes:
> - id
> static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1);
> +static DEFINE_MUTEX(devmem_ar_lock);
> +DEFINE_STATIC_KEY_FALSE(tcp_devmem_ar_key);
> +EXPORT_SYMBOL(tcp_devmem_ar_key);
I don't think you need the export, perhaps move the helper in here in
the first place (while keeping the static inline wrapper when devmem=n)?
> + if (autorelease)
> + static_branch_enable(&tcp_devmem_ar_key);
This is user-controlled (non-root), right? So I think we need
the deferred version of key helpers.
> - if (direction == DMA_TO_DEVICE) {
> - binding->vec = kvmalloc_array(dmabuf->size / PAGE_SIZE,
> - sizeof(struct net_iov *),
> - GFP_KERNEL);
> - if (!binding->vec) {
> - err = -ENOMEM;
> - goto err_unmap;
> - }
> + binding->vec = kvmalloc_array(dmabuf->size / PAGE_SIZE,
> + sizeof(struct net_iov *),
> + GFP_KERNEL | __GFP_ZERO);
make it a kvcalloc() while we're touching it, pls
> + if (!binding->vec) {
> + err = -ENOMEM;
> + goto err_unmap;
> }
>
> /* For simplicity we expect to make PAGE_SIZE allocations, but the
> @@ -306,25 +386,41 @@ net_devmem_bind_dmabuf(struct net_device *dev,
> niov = &owner->area.niovs[i];
> niov->type = NET_IOV_DMABUF;
> niov->owner = &owner->area;
> + atomic_set(&niov->uref, 0);
Isn't it zero'ed during alloc?
> page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov),
> net_devmem_get_dma_addr(niov));
> - if (direction == DMA_TO_DEVICE)
> - binding->vec[owner->area.base_virtual / PAGE_SIZE + i] = niov;
> + binding->vec[owner->area.base_virtual / PAGE_SIZE + i] = niov;
> }
>
> virtual += len;
> }
>
> + if (info->attrs[NETDEV_A_DMABUF_AUTORELEASE])
> + autorelease =
> + !!nla_get_u8(info->attrs[NETDEV_A_DMABUF_AUTORELEASE]);
nla_get_u8_default()
> priv = genl_sk_priv_get(&netdev_nl_family, NETLINK_CB(skb).sk);
> if (IS_ERR(priv))
> return PTR_ERR(priv);
> +static noinline_for_stack int
> +sock_devmem_dontneed_manual_release(struct sock *sk,
> + struct dmabuf_token *tokens,
> + unsigned int num_tokens)
> +{
> + struct net_iov *niov;
> + unsigned int i, j;
> + netmem_ref netmem;
> + unsigned int token;
> + int num_frags = 0;
> + int ret = 0;
> +
> + if (!sk->sk_devmem_info.binding)
> + return -EINVAL;
> +
> + for (i = 0; i < num_tokens; i++) {
> + for (j = 0; j < tokens[i].token_count; j++) {
> + size_t size = sk->sk_devmem_info.binding->dmabuf->size;
> +
> + token = tokens[i].token_start + j;
> + if (token >= size / PAGE_SIZE)
> + break;
> +
> + if (++num_frags > MAX_DONTNEED_FRAGS)
> + return ret;
> +
> + niov = sk->sk_devmem_info.binding->vec[token];
> + if (atomic_dec_and_test(&niov->uref)) {
Don't you need something like "atomic dec non zero and test" ?
refcount has refcount_dec_not_one() 🤔️
> + netmem = net_iov_to_netmem(niov);
> + WARN_ON_ONCE(!napi_pp_put_page(netmem));
> + }
> + ret++;
> + }
> frag_limit_reached:
> - xa_unlock_bh(&sk->sk_user_frags);
> + xa_unlock_bh(&sk->sk_devmem_info.frags);
may be worth separating the sk_devmem_info change out for clarity
> for (k = 0; k < netmem_num; k++)
> WARN_ON_ONCE(!napi_pp_put_page(netmems[k]));
> @@ -2503,7 +2506,15 @@ void tcp_v4_destroy_sock(struct sock *sk)
>
> tcp_release_user_frags(sk);
>
> - xa_destroy(&sk->sk_user_frags);
> + if (!net_devmem_autorelease_enabled() && sk->sk_devmem_info.binding) {
> + net_devmem_dmabuf_binding_user_put(sk->sk_devmem_info.binding);
> + net_devmem_dmabuf_binding_put(sk->sk_devmem_info.binding);
> + sk->sk_devmem_info.binding = NULL;
> + WARN_ONCE(!xa_empty(&sk->sk_devmem_info.frags),
> + "non-empty xarray discovered in autorelease off mode");
> + }
> +
> + xa_destroy(&sk->sk_devmem_info.frags);
Let's wrap this up in a helper that'll live in devmem.c
Powered by blists - more mailing lists