[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250320121604.3342831-1-edumazet@google.com>
Date: Thu, 20 Mar 2025 12:16:04 +0000
From: Eric Dumazet <edumazet@...gle.com>
To: "David S . Miller" <davem@...emloft.net>, Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>, Neal Cardwell <ncardwell@...gle.com>
Cc: Kuniyuki Iwashima <kuniyu@...zon.com>, Simon Horman <horms@...nel.org>, netdev@...r.kernel.org,
eric.dumazet@...il.com, Eric Dumazet <edumazet@...gle.com>
Subject: [PATCH net-next] tcp: avoid atomic operations on sk->sk_rmem_alloc
TCP uses generic skb_set_owner_r() and sock_rfree()
for received packets, with socket lock being owned.
Switch to private versions, avoiding two atomic operations
per packet.
Signed-off-by: Eric Dumazet <edumazet@...gle.com>
---
include/net/tcp.h | 15 +++++++++++++++
net/ipv4/tcp.c | 18 ++++++++++++++++--
net/ipv4/tcp_fastopen.c | 2 +-
net/ipv4/tcp_input.c | 6 +++---
4 files changed, 35 insertions(+), 6 deletions(-)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index d08fbf90495de69b157d3c87c50e82d781a365df..dd6d63a6f42b99774e9461b69d3e7932cf629082 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -779,6 +779,7 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
/* tcp.c */
void tcp_get_info(struct sock *, struct tcp_info *);
+void tcp_sock_rfree(struct sk_buff *skb);
/* Read 'sendfile()'-style from a TCP socket */
int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
@@ -2898,4 +2899,18 @@ enum skb_drop_reason tcp_inbound_hash(struct sock *sk,
const void *saddr, const void *daddr,
int family, int dif, int sdif);
+/* version of skb_set_owner_r() avoiding one atomic_add() */
+static inline void tcp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
+{
+ skb_orphan(skb);
+ skb->sk = sk;
+ skb->destructor = tcp_sock_rfree;
+
+ sock_owned_by_me(sk);
+ atomic_set(&sk->sk_rmem_alloc,
+ atomic_read(&sk->sk_rmem_alloc) + skb->truesize);
+
+ sk_forward_alloc_add(sk, -skb->truesize);
+}
+
#endif /* _TCP_H */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 989c3c3d8e757361a0ac4a9f039a3cfca10d9612..b1306038b8e6e8c55fd1b4803c5d8ca626491aae 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1525,11 +1525,25 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
__tcp_cleanup_rbuf(sk, copied);
}
+/* private version of sock_rfree() avoiding one atomic_sub() */
+void tcp_sock_rfree(struct sk_buff *skb)
+{
+ struct sock *sk = skb->sk;
+ unsigned int len = skb->truesize;
+
+ sock_owned_by_me(sk);
+ atomic_set(&sk->sk_rmem_alloc,
+ atomic_read(&sk->sk_rmem_alloc) - len);
+
+ sk_forward_alloc_add(sk, len);
+ sk_mem_reclaim(sk);
+}
+
static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb)
{
__skb_unlink(skb, &sk->sk_receive_queue);
- if (likely(skb->destructor == sock_rfree)) {
- sock_rfree(skb);
+ if (likely(skb->destructor == tcp_sock_rfree)) {
+ tcp_sock_rfree(skb);
skb->destructor = NULL;
skb->sk = NULL;
return skb_attempt_defer_free(skb);
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 1a6b1bc5424514e27a99cbb2fcedf001afd51d98..ca40665145c692ce0de518886bb366406606f7ac 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -189,7 +189,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
tcp_segs_in(tp, skb);
__skb_pull(skb, tcp_hdrlen(skb));
sk_forced_mem_schedule(sk, skb->truesize);
- skb_set_owner_r(skb, sk);
+ tcp_skb_set_owner_r(skb, sk);
TCP_SKB_CB(skb)->seq++;
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 72382ee4456dbd89fd1b69f3bdbf6b9c8ef5aa78..cac6e86196975cc793f499126302fc5220a1dfc7 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5171,7 +5171,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
if (tcp_is_sack(tp))
tcp_grow_window(sk, skb, false);
skb_condense(skb);
- skb_set_owner_r(skb, sk);
+ tcp_skb_set_owner_r(skb, sk);
}
}
@@ -5187,7 +5187,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb,
tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
if (!eaten) {
tcp_add_receive_queue(sk, skb);
- skb_set_owner_r(skb, sk);
+ tcp_skb_set_owner_r(skb, sk);
}
return eaten;
}
@@ -5504,7 +5504,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
__skb_queue_before(list, skb, nskb);
else
__skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */
- skb_set_owner_r(nskb, sk);
+ tcp_skb_set_owner_r(nskb, sk);
mptcp_skb_ext_move(nskb, skb);
/* Copy data, releasing collapsed skbs. */
--
2.49.0.rc1.451.g8f38331e32-goog
Powered by blists - more mailing lists