lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230731134536.4058181-1-xukuohai@huaweicloud.com>
Date: Mon, 31 Jul 2023 09:45:36 -0400
From: Xu Kuohai <xukuohai@...weicloud.com>
To: bpf@...r.kernel.org,
	netdev@...r.kernel.org
Cc: John Fastabend <john.fastabend@...il.com>,
	Jakub Sitnicki <jakub@...udflare.com>,
	"David S . Miller" <davem@...emloft.net>,
	Eric Dumazet <edumazet@...gle.com>,
	Jakub Kicinski <kuba@...nel.org>,
	Paolo Abeni <pabeni@...hat.com>,
	Daniel Borkmann <daniel@...earbox.net>,
	Alexei Starovoitov <ast@...nel.org>,
	Cong Wang <cong.wang@...edance.com>
Subject: [PATCH bpf] bpf, sockmap: Fix NULL deref in sk_psock_backlog

From: Xu Kuohai <xukuohai@...wei.com>

sk_psock_backlog triggers a NULL dereference:

 BUG: kernel NULL pointer dereference, address: 000000000000000e
 #PF: supervisor read access in kernel mode
 #PF: error_code(0x0000) - not-present page
 PGD 0 P4D 0
 Oops: 0000 [#1] PREEMPT SMP PTI
 CPU: 0 PID: 70 Comm: kworker/0:3 Not tainted 6.5.0-rc2-00585-gb11bbbe4c66e #26
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b3f840-p4
 Workqueue: events sk_psock_backlog
 RIP: 0010:0xffffffffc0205254
 Code: 00 00 48 89 94 24 a0 00 00 00 41 5f 41 5e 41 5d 41 5c 5d 5b 41 5b 41 5a 41 59 41 50
 RSP: 0018:ffffc90000acbcb8 EFLAGS: 00010246
 RAX: ffffffff81c5ee10 RBX: ffff888018260000 RCX: 0000000000000001
 RDX: 0000000000000003 RSI: ffffc90000acbd58 RDI: 0000000000000000
 RBP: 0000000000000000 R08: 0000000000000003 R09: 0000000080100005
 R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000003
 R13: 0000000000000000 R14: 0000000000000021 R15: 0000000000000003
 FS:  0000000000000000(0000) GS:ffff88803ea00000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 000000000000000e CR3: 000000000b0de002 CR4: 0000000000170ef0
 Call Trace:
  <TASK>
  ? __die+0x24/0x70
  ? page_fault_oops+0x15d/0x480
  ? fixup_exception+0x26/0x330
  ? exc_page_fault+0x72/0x1d0
  ? asm_exc_page_fault+0x26/0x30
  ? __pfx_inet_sendmsg+0x10/0x10
  ? 0xffffffffc0205254
  ? inet_sendmsg+0x20/0x80
  ? sock_sendmsg+0x8f/0xa0
  ? __skb_send_sock+0x315/0x360
  ? __pfx_sendmsg_unlocked+0x10/0x10
  ? sk_psock_backlog+0xb4/0x300
  ? process_one_work+0x292/0x560
  ? worker_thread+0x53/0x3e0
  ? __pfx_worker_thread+0x10/0x10
  ? kthread+0x102/0x130
  ? __pfx_kthread+0x10/0x10
  ? ret_from_fork+0x34/0x50
  ? __pfx_kthread+0x10/0x10
  ? ret_from_fork_asm+0x1b/0x30
  </TASK>

The bug flow is as follows:

thread 1                                   thread 2

sk_psock_backlog                           sock_close
  sk_psock_handle_skb                        __sock_release
    __skb_send_sock                            inet_release
      sendmsg_unlocked                           tcp_close
        sock_sendmsg                               lock_sock
                                                     __tcp_close
                                                   release_sock
                                                 sock->sk = NULL // (1)
          inet_sendmsg
            sk = sock->sk // (2)
            inet_send_prepare
              inet_sk(sk)->inet_num // (3)

sock->sk is set to NULL by thread 2 at time (1), then fetched by
thread 1 at time (2), and used by thread 1 to access memory at
time (3), resulting in NULL pointer dereference.

To fix it, add lock_sock back on the egress path for sk_psock_handle_skb.

Fixes: 799aa7f98d53 ("skmsg: Avoid lock_sock() in sk_psock_backlog()")
Signed-off-by: Xu Kuohai <xukuohai@...wei.com>
---
 net/core/skmsg.c | 44 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 34 insertions(+), 10 deletions(-)

diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 7c2764beeb04..8b758c51aa0d 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -609,15 +609,42 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
 	return err;
 }
 
+static int sk_psock_handle_ingress_skb(struct sk_psock *psock,
+				       struct sk_buff *skb,
+				       u32 off, u32 len)
+{
+	if (sock_flag(psock->sk, SOCK_DEAD))
+		return -EIO;
+	return sk_psock_skb_ingress(psock, skb, off, len);
+}
+
+static int sk_psock_handle_egress_skb(struct sk_psock *psock,
+				      struct sk_buff *skb,
+				      u32 off, u32 len)
+{
+	int ret;
+
+	lock_sock(psock->sk);
+
+	if (sock_flag(psock->sk, SOCK_DEAD))
+		ret = -EIO;
+	else if (!sock_writeable(psock->sk))
+		ret = -EAGAIN;
+	else
+		ret = skb_send_sock_locked(psock->sk, skb, off, len);
+
+	release_sock(psock->sk);
+
+	return ret;
+}
+
 static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
 			       u32 off, u32 len, bool ingress)
 {
-	if (!ingress) {
-		if (!sock_writeable(psock->sk))
-			return -EAGAIN;
-		return skb_send_sock(psock->sk, skb, off, len);
-	}
-	return sk_psock_skb_ingress(psock, skb, off, len);
+	if (ingress)
+		return sk_psock_handle_ingress_skb(psock, skb, off, len);
+	else
+		return sk_psock_handle_egress_skb(psock, skb, off, len);
 }
 
 static void sk_psock_skb_state(struct sk_psock *psock,
@@ -660,10 +687,7 @@ static void sk_psock_backlog(struct work_struct *work)
 		ingress = skb_bpf_ingress(skb);
 		skb_bpf_redirect_clear(skb);
 		do {
-			ret = -EIO;
-			if (!sock_flag(psock->sk, SOCK_DEAD))
-				ret = sk_psock_handle_skb(psock, skb, off,
-							  len, ingress);
+			ret = sk_psock_handle_skb(psock, skb, off, len, ingress);
 			if (ret <= 0) {
 				if (ret == -EAGAIN) {
 					sk_psock_skb_state(psock, state, len, off);
-- 
2.30.2


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ