[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <6ce1df16-6bcd-fb0a-f7fb-8dee45916438@iogearbox.net>
Date: Wed, 28 Nov 2018 11:34:55 +0100
From: Daniel Borkmann <daniel@...earbox.net>
To: David Miller <davem@...emloft.net>, netdev@...r.kernel.org
Cc: ast@...nel.org, john.fastabend@...il.com
Subject: Re: BPF uapi structures and 32-bit
On 11/27/2018 11:25 PM, David Miller wrote:
>
> In the linux/bpf.h UAPI header, we must absolutely avoid any
> non-fixed-sized types.
>
> Otherwise we have serious problems on 32-bit.
>
> Unfortunately I discovered today that we have take on two such cases,
> sk_msg_md and sk_reuseport_md, both of which start with two void
> pointers.
>
> I hit this because test_verifier.c on my sparc64 box was built as
> a 32-bit binary and this causes a hundred or so tests to fail, and
> many if not all are because of the changing struct layout.
>
> I could built 64-bit but this absolutely should work properly.
>
> But for fully native 32-bit it is even worse, this will really
> need to be resolved because llvm/clang is always going to build
> the BPF programs with 64-bit void pointers.
>
> I would strongly suggest we try and fix this now if we can.
Yeah fully agree. Thinking diff below should address it, do you
have a chance to give this a spin for sparc / 32 bit to check if
test_verifier still explodes?
Thanks a lot,
Daniel
include/linux/filter.h | 7 +++++++
include/uapi/linux/bpf.h | 17 ++++++++++++-----
net/core/filter.c | 16 ++++++++--------
tools/include/uapi/linux/bpf.h | 17 ++++++++++++-----
4 files changed, 39 insertions(+), 18 deletions(-)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 448dcc4..795ff0b 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -449,6 +449,13 @@ struct sock_reuseport;
offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1
#define bpf_ctx_range_till(TYPE, MEMBER1, MEMBER2) \
offsetof(TYPE, MEMBER1) ... offsetofend(TYPE, MEMBER2) - 1
+#if BITS_PER_LONG == 64
+# define bpf_ctx_range_ptr(TYPE, MEMBER) \
+ offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1
+#else
+# define bpf_ctx_range_ptr(TYPE, MEMBER) \
+ offsetof(TYPE, MEMBER) ... offsetof(TYPE, MEMBER) + 8 - 1
+#endif /* BITS_PER_LONG == 64 */
#define bpf_target_off(TYPE, MEMBER, SIZE, PTR_SIZE) \
({ \
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 852dc17..426b5c8 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2422,6 +2422,12 @@ enum bpf_lwt_encap_mode {
BPF_LWT_ENCAP_SEG6_INLINE
};
+#define __bpf_md_ptr(type, name) \
+union { \
+ type name; \
+ __u64 :64; \
+} __attribute__((aligned(8)))
+
/* user accessible mirror of in-kernel sk_buff.
* new fields can only be added to the end of this structure
*/
@@ -2456,7 +2462,7 @@ struct __sk_buff {
/* ... here. */
__u32 data_meta;
- struct bpf_flow_keys *flow_keys;
+ __bpf_md_ptr(struct bpf_flow_keys *, flow_keys);
};
struct bpf_tunnel_key {
@@ -2572,8 +2578,8 @@ enum sk_action {
* be added to the end of this structure
*/
struct sk_msg_md {
- void *data;
- void *data_end;
+ __bpf_md_ptr(void *, data);
+ __bpf_md_ptr(void *, data_end);
__u32 family;
__u32 remote_ip4; /* Stored in network byte order */
@@ -2589,8 +2595,9 @@ struct sk_reuseport_md {
* Start of directly accessible data. It begins from
* the tcp/udp header.
*/
- void *data;
- void *data_end; /* End of directly accessible data */
+ __bpf_md_ptr(void *, data);
+ /* End of directly accessible data */
+ __bpf_md_ptr(void *, data_end);
/*
* Total length of packet (starting from the tcp/udp header).
* Note that the directly accessible bytes (data_end - data)
diff --git a/net/core/filter.c b/net/core/filter.c
index 9a1327e..6ee605d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5435,8 +5435,8 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
if (size != size_default)
return false;
break;
- case bpf_ctx_range(struct __sk_buff, flow_keys):
- if (size != sizeof(struct bpf_flow_keys *))
+ case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
+ if (size != sizeof(__u64))
return false;
break;
default:
@@ -5464,7 +5464,7 @@ static bool sk_filter_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data):
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, data_end):
- case bpf_ctx_range(struct __sk_buff, flow_keys):
+ case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
return false;
}
@@ -5489,7 +5489,7 @@ static bool cg_skb_is_valid_access(int off, int size,
switch (off) {
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data_meta):
- case bpf_ctx_range(struct __sk_buff, flow_keys):
+ case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
return false;
case bpf_ctx_range(struct __sk_buff, data):
case bpf_ctx_range(struct __sk_buff, data_end):
@@ -5530,7 +5530,7 @@ static bool lwt_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
case bpf_ctx_range(struct __sk_buff, data_meta):
- case bpf_ctx_range(struct __sk_buff, flow_keys):
+ case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
return false;
}
@@ -5756,7 +5756,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data_end):
info->reg_type = PTR_TO_PACKET_END;
break;
- case bpf_ctx_range(struct __sk_buff, flow_keys):
+ case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
return false;
}
@@ -5958,7 +5958,7 @@ static bool sk_skb_is_valid_access(int off, int size,
switch (off) {
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data_meta):
- case bpf_ctx_range(struct __sk_buff, flow_keys):
+ case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
return false;
}
@@ -6039,7 +6039,7 @@ static bool flow_dissector_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data_end):
info->reg_type = PTR_TO_PACKET_END;
break;
- case bpf_ctx_range(struct __sk_buff, flow_keys):
+ case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
info->reg_type = PTR_TO_FLOW_KEYS;
break;
case bpf_ctx_range(struct __sk_buff, tc_classid):
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 852dc17..426b5c8 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2422,6 +2422,12 @@ enum bpf_lwt_encap_mode {
BPF_LWT_ENCAP_SEG6_INLINE
};
+#define __bpf_md_ptr(type, name) \
+union { \
+ type name; \
+ __u64 :64; \
+} __attribute__((aligned(8)))
+
/* user accessible mirror of in-kernel sk_buff.
* new fields can only be added to the end of this structure
*/
@@ -2456,7 +2462,7 @@ struct __sk_buff {
/* ... here. */
__u32 data_meta;
- struct bpf_flow_keys *flow_keys;
+ __bpf_md_ptr(struct bpf_flow_keys *, flow_keys);
};
struct bpf_tunnel_key {
@@ -2572,8 +2578,8 @@ enum sk_action {
* be added to the end of this structure
*/
struct sk_msg_md {
- void *data;
- void *data_end;
+ __bpf_md_ptr(void *, data);
+ __bpf_md_ptr(void *, data_end);
__u32 family;
__u32 remote_ip4; /* Stored in network byte order */
@@ -2589,8 +2595,9 @@ struct sk_reuseport_md {
* Start of directly accessible data. It begins from
* the tcp/udp header.
*/
- void *data;
- void *data_end; /* End of directly accessible data */
+ __bpf_md_ptr(void *, data);
+ /* End of directly accessible data */
+ __bpf_md_ptr(void *, data_end);
/*
* Total length of packet (starting from the tcp/udp header).
* Note that the directly accessible bytes (data_end - data)
--
2.9.5
Powered by blists - more mailing lists