[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1534547305-25140-4-git-send-email-tushar.n.dave@oracle.com>
Date: Sat, 18 Aug 2018 01:08:23 +0200
From: Tushar Dave <tushar.n.dave@...cle.com>
To: john.fastabend@...il.com, ast@...nel.org, daniel@...earbox.net,
davem@...emloft.net, sowmini.varadhan@...cle.com,
santosh.shilimkar@...cle.com, jakub.kicinski@...ronome.com,
quentin.monnet@...ronome.com, jiong.wang@...ronome.com,
sandipan@...ux.vnet.ibm.com, kafai@...com, rdna@...com, yhs@...com,
netdev@...r.kernel.org
Subject: [RFC v3 net-next 3/5] ebpf: fix bpf_msg_pull_data
Like sockmap (sk_msg), socksg also deals with struct scatterlist
therefore socksg programs can use existing bpf helper bpf_msg_pull_data
to access packet data contained in struct scatterlist. While doing some
prelimnary testing, there are couple of issues found with
bpf_msg_pull_data that are fixed in this patch.
Also, there cannot be more than MAX_SKB_FRAGS entries in sg_data
therefore any checks for sg entry more than MAX_SKB_FRAGS in
bpf_msg_pull_data() is removed.
Besides that, I also ran into issues while put_page() is invoked.
e.g.
[ 450.568723] BUG: Bad page state in process swapper/10 pfn:2021540
[ 450.575632] page:ffffea0080855000 count:0 mapcount:0
mapping:ffff88103d006840 index:0xffff882021540000 compound_mapcount: 0
[ 450.588069] flags: 0x6fffff80008100(slab|head)
[ 450.593033] raw: 006fffff80008100 dead000000000100 dead000000000200
ffff88103d006840
[ 450.601683] raw: ffff882021540000 0000000080080007 00000000ffffffff
0000000000000000
[ 450.610337] page dumped because: PAGE_FLAGS_CHECK_AT_FREE flag(s) set
[ 450.617530] bad because of flags: 0x100(slab)
To avoid above issue, currently put_page() is disabled in this patch
temporarily. I am working on alternatives so that page allocated via
slab (in this case) can be freed without any issue.
Signed-off-by: Tushar Dave <tushar.n.dave@...cle.com>
Acked-by: Sowmini Varadhan <sowmini.varadhan@...cle.com>
---
net/core/filter.c | 61 +++++++++++++++++++++++++++++--------------------------
1 file changed, 32 insertions(+), 29 deletions(-)
diff --git a/net/core/filter.c b/net/core/filter.c
index e427c8e..cc52baa 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2316,7 +2316,7 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
BPF_CALL_4(bpf_msg_pull_data,
struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
{
- unsigned int len = 0, offset = 0, copy = 0;
+ unsigned int len = 0, offset = 0, copy = 0, off = 0;
struct scatterlist *sg = msg->sg_data;
int first_sg, last_sg, i, shift;
unsigned char *p, *to, *from;
@@ -2330,22 +2330,28 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
i = msg->sg_start;
do {
len = sg[i].length;
- offset += len;
if (start < offset + len)
break;
+ offset += len;
i++;
- if (i == MAX_SKB_FRAGS)
- i = 0;
- } while (i != msg->sg_end);
+ } while (i <= msg->sg_end);
+ /* return error if start is out of range */
if (unlikely(start >= offset + len))
return -EINVAL;
- if (!msg->sg_copy[i] && bytes <= len)
- goto out;
+ /* return error if i is last entry in sglist and end is out of range */
+ if (msg->sg_copy[i] && end > offset + len)
+ return -EINVAL;
first_sg = i;
+ /* if i is not last entry in sg list and end (i.e start + bytes) is
+ * within this sg[i] then goto out and calculate data and data_end
+ */
+ if (!msg->sg_copy[i] && end <= offset + len)
+ goto out;
+
/* At this point we need to linearize multiple scatterlist
* elements or a single shared page. Either way we need to
* copy into a linear buffer exclusively owned by BPF. Then
@@ -2359,11 +2365,14 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
do {
copy += sg[i].length;
i++;
- if (i == MAX_SKB_FRAGS)
- i = 0;
- if (bytes < copy)
+ if (end < copy)
break;
- } while (i != msg->sg_end);
+ } while (i <= msg->sg_end);
+
+ /* return error if i is last entry in sglist and end is out of range */
+ if (i > msg->sg_end && end > offset + copy)
+ return -EINVAL;
+
last_sg = i;
if (unlikely(copy < end - start))
@@ -2373,23 +2382,25 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
if (unlikely(!page))
return -ENOMEM;
p = page_address(page);
- offset = 0;
i = first_sg;
do {
from = sg_virt(&sg[i]);
len = sg[i].length;
- to = p + offset;
+ to = p + off;
memcpy(to, from, len);
- offset += len;
+ off += len;
sg[i].length = 0;
- put_page(sg_page(&sg[i]));
+ /* if original page is allocated via slab then put_page
+ * causes error BUG: Bad page state in process. So temporarily
+ * disabled put_page.
+ * Todo: fix it
+ */
+ //put_page(sg_page(&sg[i]));
i++;
- if (i == MAX_SKB_FRAGS)
- i = 0;
- } while (i != last_sg);
+ } while (i < last_sg);
sg[first_sg].length = copy;
sg_set_page(&sg[first_sg], page, copy, 0);
@@ -2406,12 +2417,8 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
do {
int move_from;
- if (i + shift >= MAX_SKB_FRAGS)
- move_from = i + shift - MAX_SKB_FRAGS;
- else
- move_from = i + shift;
-
- if (move_from == msg->sg_end)
+ move_from = i + shift;
+ if (move_from > msg->sg_end)
break;
sg[i] = sg[move_from];
@@ -2420,14 +2427,10 @@ struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
sg[move_from].offset = 0;
i++;
- if (i == MAX_SKB_FRAGS)
- i = 0;
} while (1);
msg->sg_end -= shift;
- if (msg->sg_end < 0)
- msg->sg_end += MAX_SKB_FRAGS;
out:
- msg->data = sg_virt(&sg[i]) + start - offset;
+ msg->data = sg_virt(&sg[first_sg]) + start - offset;
msg->data_end = msg->data + bytes;
return 0;
--
1.8.3.1
Powered by blists - more mailing lists