lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20180320155501.1370612-2-yhs@fb.com>
Date:   Tue, 20 Mar 2018 08:55:00 -0700
From:   Yonghong Song <yhs@...com>
To:     <edumazet@...gle.com>, <ast@...com>, <daniel@...earbox.net>,
        <diptanu@...com>, <netdev@...r.kernel.org>
CC:     <kernel-team@...com>
Subject: [PATCH net-next v2 1/2] net: permit skb_segment on head_frag frag_list skb

One of our in-house projects, bpf-based NAT, hits a kernel BUG_ON at
function skb_segment(), line 3667. The bpf program attaches to
clsact ingress, calls bpf_skb_change_proto to change protocol
from ipv4 to ipv6 or from ipv6 to ipv4, and then calls bpf_redirect
to send the changed packet out.

3472 struct sk_buff *skb_segment(struct sk_buff *head_skb,
3473                             netdev_features_t features)
3474 {
3475         struct sk_buff *segs = NULL;
3476         struct sk_buff *tail = NULL;
...
3665                 while (pos < offset + len) {
3666                         if (i >= nfrags) {
3667                                 BUG_ON(skb_headlen(list_skb));
3668
3669                                 i = 0;
3670                                 nfrags = skb_shinfo(list_skb)->nr_frags;
3671                                 frag = skb_shinfo(list_skb)->frags;
3672                                 frag_skb = list_skb;
...

call stack:
...
 #1 [ffff883ffef03558] __crash_kexec at ffffffff8110c525
 #2 [ffff883ffef03620] crash_kexec at ffffffff8110d5cc
 #3 [ffff883ffef03640] oops_end at ffffffff8101d7e7
 #4 [ffff883ffef03668] die at ffffffff8101deb2
 #5 [ffff883ffef03698] do_trap at ffffffff8101a700
 #6 [ffff883ffef036e8] do_error_trap at ffffffff8101abfe
 #7 [ffff883ffef037a0] do_invalid_op at ffffffff8101acd0
 #8 [ffff883ffef037b0] invalid_op at ffffffff81a00bab
    [exception RIP: skb_segment+3044]
    RIP: ffffffff817e4dd4  RSP: ffff883ffef03860  RFLAGS: 00010216
    RAX: 0000000000002bf6  RBX: ffff883feb7aaa00  RCX: 0000000000000011
    RDX: ffff883fb87910c0  RSI: 0000000000000011  RDI: ffff883feb7ab500
    RBP: ffff883ffef03928   R8: 0000000000002ce2   R9: 00000000000027da
    R10: 000001ea00000000  R11: 0000000000002d82  R12: ffff883f90a1ee80
    R13: ffff883fb8791120  R14: ffff883feb7abc00  R15: 0000000000002ce2
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
 #9 [ffff883ffef03930] tcp_gso_segment at ffffffff818713e7
--- <IRQ stack> ---
...

The triggering input skb has the following properties:
    list_skb = skb->frag_list;
    skb->nfrags != NULL && skb_headlen(list_skb) != 0
and skb_segment() is not able to handle a frag_list skb
if its headlen (list_skb->len - list_skb->data_len) is not 0.

This patch addressed the issue by handling skb_headlen(list_skb) != 0
case properly if list_skb->head_frag is true, which is expected in
most cases. A one-element frag array is created for the list_skb head
and processed before list_skb->frags are processed.

Reported-by: Diptanu Gon Choudhury <diptanu@...com>
Signed-off-by: Yonghong Song <yhs@...com>
---
 net/core/skbuff.c | 42 +++++++++++++++++++++++++++++-------------
 1 file changed, 29 insertions(+), 13 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 715c134..0ad4cda 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3475,9 +3475,10 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
 	struct sk_buff *segs = NULL;
 	struct sk_buff *tail = NULL;
 	struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
-	skb_frag_t *frag = skb_shinfo(head_skb)->frags;
+	skb_frag_t *frag = skb_shinfo(head_skb)->frags, head_frag;
 	unsigned int mss = skb_shinfo(head_skb)->gso_size;
 	unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
+	struct sk_buff *check_list_skb = list_skb;
 	struct sk_buff *frag_skb = head_skb;
 	unsigned int offset = doffset;
 	unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
@@ -3590,6 +3591,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
 
 			nskb = skb_clone(list_skb, GFP_ATOMIC);
 			list_skb = list_skb->next;
+			check_list_skb = list_skb;
 
 			if (unlikely(!nskb))
 				goto err;
@@ -3664,21 +3666,35 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
 
 		while (pos < offset + len) {
 			if (i >= nfrags) {
-				BUG_ON(skb_headlen(list_skb));
-
-				i = 0;
-				nfrags = skb_shinfo(list_skb)->nr_frags;
-				frag = skb_shinfo(list_skb)->frags;
-				frag_skb = list_skb;
+				if (skb_headlen(list_skb) && check_list_skb == list_skb) {
+					struct page *page;
+
+					BUG_ON(!list_skb->head_frag);
+
+					i = 0;
+					nfrags = 1;
+					page = virt_to_head_page(list_skb->head);
+					head_frag.page.p = page;
+					head_frag.page_offset = list_skb->data -
+						(unsigned char *)page_address(page);
+					head_frag.size = skb_headlen(list_skb);
+					frag = &head_frag;
+					check_list_skb = list_skb->next;
+				} else {
+					i = 0;
+					nfrags = skb_shinfo(list_skb)->nr_frags;
+					frag = skb_shinfo(list_skb)->frags;
+					frag_skb = list_skb;
 
-				BUG_ON(!nfrags);
+					BUG_ON(!nfrags);
 
-				if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
-				    skb_zerocopy_clone(nskb, frag_skb,
-						       GFP_ATOMIC))
-					goto err;
+					if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
+					    skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
+						goto err;
 
-				list_skb = list_skb->next;
+					list_skb = list_skb->next;
+					check_list_skb = list_skb;
+				}
 			}
 
 			if (unlikely(skb_shinfo(nskb)->nr_frags >=
-- 
2.9.5

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ