[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20180320232156.3455738-2-yhs@fb.com>
Date: Tue, 20 Mar 2018 16:21:55 -0700
From: Yonghong Song <yhs@...com>
To: <edumazet@...gle.com>, <ast@...com>, <daniel@...earbox.net>,
<diptanu@...com>, <netdev@...r.kernel.org>,
<alexander.duyck@...il.com>
CC: <kernel-team@...com>
Subject: [PATCH net-next v3 1/2] net: permit skb_segment on head_frag frag_list skb
One of our in-house projects, bpf-based NAT, hits a kernel BUG_ON at
function skb_segment(), line 3667. The bpf program attaches to
clsact ingress, calls bpf_skb_change_proto to change protocol
from ipv4 to ipv6 or from ipv6 to ipv4, and then calls bpf_redirect
to send the changed packet out.
3472 struct sk_buff *skb_segment(struct sk_buff *head_skb,
3473 netdev_features_t features)
3474 {
3475 struct sk_buff *segs = NULL;
3476 struct sk_buff *tail = NULL;
...
3665 while (pos < offset + len) {
3666 if (i >= nfrags) {
3667 BUG_ON(skb_headlen(list_skb));
3668
3669 i = 0;
3670 nfrags = skb_shinfo(list_skb)->nr_frags;
3671 frag = skb_shinfo(list_skb)->frags;
3672 frag_skb = list_skb;
...
call stack:
...
#1 [ffff883ffef03558] __crash_kexec at ffffffff8110c525
#2 [ffff883ffef03620] crash_kexec at ffffffff8110d5cc
#3 [ffff883ffef03640] oops_end at ffffffff8101d7e7
#4 [ffff883ffef03668] die at ffffffff8101deb2
#5 [ffff883ffef03698] do_trap at ffffffff8101a700
#6 [ffff883ffef036e8] do_error_trap at ffffffff8101abfe
#7 [ffff883ffef037a0] do_invalid_op at ffffffff8101acd0
#8 [ffff883ffef037b0] invalid_op at ffffffff81a00bab
[exception RIP: skb_segment+3044]
RIP: ffffffff817e4dd4 RSP: ffff883ffef03860 RFLAGS: 00010216
RAX: 0000000000002bf6 RBX: ffff883feb7aaa00 RCX: 0000000000000011
RDX: ffff883fb87910c0 RSI: 0000000000000011 RDI: ffff883feb7ab500
RBP: ffff883ffef03928 R8: 0000000000002ce2 R9: 00000000000027da
R10: 000001ea00000000 R11: 0000000000002d82 R12: ffff883f90a1ee80
R13: ffff883fb8791120 R14: ffff883feb7abc00 R15: 0000000000002ce2
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
#9 [ffff883ffef03930] tcp_gso_segment at ffffffff818713e7
--- <IRQ stack> ---
...
The triggering input skb has the following properties:
list_skb = skb->frag_list;
skb->nfrags != NULL && skb_headlen(list_skb) != 0
and skb_segment() is not able to handle a frag_list skb
if its headlen (list_skb->len - list_skb->data_len) is not 0.
This patch addressed the issue by handling skb_headlen(list_skb) != 0
case properly if list_skb->head_frag is true, which is expected in
most cases. The head frag is processed before list_skb->frags
are processed.
Reported-by: Diptanu Gon Choudhury <diptanu@...com>
Signed-off-by: Yonghong Song <yhs@...com>
---
net/core/skbuff.c | 51 +++++++++++++++++++++++++++++++++++++--------------
1 file changed, 37 insertions(+), 14 deletions(-)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 715c134..59bbc06 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3475,7 +3475,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
struct sk_buff *segs = NULL;
struct sk_buff *tail = NULL;
struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
- skb_frag_t *frag = skb_shinfo(head_skb)->frags;
+ skb_frag_t *frag = skb_shinfo(head_skb)->frags, *head_frag = NULL;
unsigned int mss = skb_shinfo(head_skb)->gso_size;
unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
struct sk_buff *frag_skb = head_skb;
@@ -3664,19 +3664,39 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
while (pos < offset + len) {
if (i >= nfrags) {
- BUG_ON(skb_headlen(list_skb));
-
i = 0;
+ if (skb_headlen(list_skb)) {
+ struct page *page;
+
+ BUG_ON(!list_skb->head_frag);
+
+ page = virt_to_head_page(list_skb->head);
+ if (!head_frag) {
+ head_frag = kmalloc(sizeof(skb_frag_t),
+ GFP_KERNEL);
+ if (!head_frag)
+ goto err;
+ }
+ head_frag->page.p = page;
+ head_frag->page_offset = list_skb->data -
+ (unsigned char *)page_address(page);
+ head_frag->size = skb_headlen(list_skb);
+ /* set i = -1 so we will pick head_frag
+ * instead of skb_shinfo(list_skb)->frags
+ * when i == -1.
+ */
+ i = -1;
+ }
nfrags = skb_shinfo(list_skb)->nr_frags;
- frag = skb_shinfo(list_skb)->frags;
- frag_skb = list_skb;
-
- BUG_ON(!nfrags);
-
- if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
- skb_zerocopy_clone(nskb, frag_skb,
- GFP_ATOMIC))
- goto err;
+ if (nfrags) {
+ frag = skb_shinfo(list_skb)->frags;
+ frag_skb = list_skb;
+
+ if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
+ skb_zerocopy_clone(nskb, frag_skb,
+ GFP_ATOMIC))
+ goto err;
+ }
list_skb = list_skb->next;
}
@@ -3689,7 +3709,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
goto err;
}
- *nskb_frag = *frag;
+ *nskb_frag = (i == -1) ? *head_frag : *frag;
__skb_frag_ref(nskb_frag);
size = skb_frag_size(nskb_frag);
@@ -3702,7 +3722,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
if (pos + size <= offset + len) {
i++;
- frag++;
+ if (i != 0)
+ frag++;
pos += size;
} else {
skb_frag_size_sub(nskb_frag, pos + size - (offset + len));
@@ -3774,10 +3795,12 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
swap(tail->destructor, head_skb->destructor);
swap(tail->sk, head_skb->sk);
}
+ kfree(head_frag);
return segs;
err:
kfree_skb_list(segs);
+ kfree(head_frag);
return ERR_PTR(err);
}
EXPORT_SYMBOL_GPL(skb_segment);
--
2.9.5
Powered by blists - more mailing lists