[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230731134536.4058181-1-xukuohai@huaweicloud.com>
Date: Mon, 31 Jul 2023 09:45:36 -0400
From: Xu Kuohai <xukuohai@...weicloud.com>
To: bpf@...r.kernel.org,
netdev@...r.kernel.org
Cc: John Fastabend <john.fastabend@...il.com>,
Jakub Sitnicki <jakub@...udflare.com>,
"David S . Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>,
Daniel Borkmann <daniel@...earbox.net>,
Alexei Starovoitov <ast@...nel.org>,
Cong Wang <cong.wang@...edance.com>
Subject: [PATCH bpf] bpf, sockmap: Fix NULL deref in sk_psock_backlog
From: Xu Kuohai <xukuohai@...wei.com>
sk_psock_backlog triggers a NULL dereference:
BUG: kernel NULL pointer dereference, address: 000000000000000e
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not-present page
PGD 0 P4D 0
Oops: 0000 [#1] PREEMPT SMP PTI
CPU: 0 PID: 70 Comm: kworker/0:3 Not tainted 6.5.0-rc2-00585-gb11bbbe4c66e #26
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b3f840-p4
Workqueue: events sk_psock_backlog
RIP: 0010:0xffffffffc0205254
Code: 00 00 48 89 94 24 a0 00 00 00 41 5f 41 5e 41 5d 41 5c 5d 5b 41 5b 41 5a 41 59 41 50
RSP: 0018:ffffc90000acbcb8 EFLAGS: 00010246
RAX: ffffffff81c5ee10 RBX: ffff888018260000 RCX: 0000000000000001
RDX: 0000000000000003 RSI: ffffc90000acbd58 RDI: 0000000000000000
RBP: 0000000000000000 R08: 0000000000000003 R09: 0000000080100005
R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000003
R13: 0000000000000000 R14: 0000000000000021 R15: 0000000000000003
FS: 0000000000000000(0000) GS:ffff88803ea00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000000000000000e CR3: 000000000b0de002 CR4: 0000000000170ef0
Call Trace:
<TASK>
? __die+0x24/0x70
? page_fault_oops+0x15d/0x480
? fixup_exception+0x26/0x330
? exc_page_fault+0x72/0x1d0
? asm_exc_page_fault+0x26/0x30
? __pfx_inet_sendmsg+0x10/0x10
? 0xffffffffc0205254
? inet_sendmsg+0x20/0x80
? sock_sendmsg+0x8f/0xa0
? __skb_send_sock+0x315/0x360
? __pfx_sendmsg_unlocked+0x10/0x10
? sk_psock_backlog+0xb4/0x300
? process_one_work+0x292/0x560
? worker_thread+0x53/0x3e0
? __pfx_worker_thread+0x10/0x10
? kthread+0x102/0x130
? __pfx_kthread+0x10/0x10
? ret_from_fork+0x34/0x50
? __pfx_kthread+0x10/0x10
? ret_from_fork_asm+0x1b/0x30
</TASK>
The bug flow is as follows:
thread 1 thread 2
sk_psock_backlog sock_close
sk_psock_handle_skb __sock_release
__skb_send_sock inet_release
sendmsg_unlocked tcp_close
sock_sendmsg lock_sock
__tcp_close
release_sock
sock->sk = NULL // (1)
inet_sendmsg
sk = sock->sk // (2)
inet_send_prepare
inet_sk(sk)->inet_num // (3)
sock->sk is set to NULL by thread 2 at time (1), then fetched by
thread 1 at time (2), and used by thread 1 to access memory at
time (3), resulting in NULL pointer dereference.
To fix it, add lock_sock back on the egress path for sk_psock_handle_skb.
Fixes: 799aa7f98d53 ("skmsg: Avoid lock_sock() in sk_psock_backlog()")
Signed-off-by: Xu Kuohai <xukuohai@...wei.com>
---
net/core/skmsg.c | 44 ++++++++++++++++++++++++++++++++++----------
1 file changed, 34 insertions(+), 10 deletions(-)
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 7c2764beeb04..8b758c51aa0d 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -609,15 +609,42 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
return err;
}
+static int sk_psock_handle_ingress_skb(struct sk_psock *psock,
+ struct sk_buff *skb,
+ u32 off, u32 len)
+{
+ if (sock_flag(psock->sk, SOCK_DEAD))
+ return -EIO;
+ return sk_psock_skb_ingress(psock, skb, off, len);
+}
+
+static int sk_psock_handle_egress_skb(struct sk_psock *psock,
+ struct sk_buff *skb,
+ u32 off, u32 len)
+{
+ int ret;
+
+ lock_sock(psock->sk);
+
+ if (sock_flag(psock->sk, SOCK_DEAD))
+ ret = -EIO;
+ else if (!sock_writeable(psock->sk))
+ ret = -EAGAIN;
+ else
+ ret = skb_send_sock_locked(psock->sk, skb, off, len);
+
+ release_sock(psock->sk);
+
+ return ret;
+}
+
static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len, bool ingress)
{
- if (!ingress) {
- if (!sock_writeable(psock->sk))
- return -EAGAIN;
- return skb_send_sock(psock->sk, skb, off, len);
- }
- return sk_psock_skb_ingress(psock, skb, off, len);
+ if (ingress)
+ return sk_psock_handle_ingress_skb(psock, skb, off, len);
+ else
+ return sk_psock_handle_egress_skb(psock, skb, off, len);
}
static void sk_psock_skb_state(struct sk_psock *psock,
@@ -660,10 +687,7 @@ static void sk_psock_backlog(struct work_struct *work)
ingress = skb_bpf_ingress(skb);
skb_bpf_redirect_clear(skb);
do {
- ret = -EIO;
- if (!sock_flag(psock->sk, SOCK_DEAD))
- ret = sk_psock_handle_skb(psock, skb, off,
- len, ingress);
+ ret = sk_psock_handle_skb(psock, skb, off, len, ingress);
if (ret <= 0) {
if (ret == -EAGAIN) {
sk_psock_skb_state(psock, state, len, off);
--
2.30.2
Powered by blists - more mailing lists