[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1529431217-5264-2-git-send-email-tushar.n.dave@oracle.com>
Date: Tue, 19 Jun 2018 20:00:14 +0200
From: Tushar Dave <tushar.n.dave@...cle.com>
To: ast@...nel.org, daniel@...earbox.net, davem@...emloft.net,
jakub.kicinski@...ronome.com, quentin.monnet@...ronome.com,
jiong.wang@...ronome.com, guro@...com, sandipan@...ux.vnet.ibm.com,
john.fastabend@...il.com, kafai@...com, rdna@...com, brakmo@...com,
netdev@...r.kernel.org, acme@...hat.com,
sowmini.varadhan@...cle.com
Subject: [RFC v2 PATCH 1/4] eBPF: Add new eBPF prog type BPF_PROG_TYPE_SOCKET_SG_FILTER
Add new eBPF prog type BPF_PROG_TYPE_SOCKET_SG_FILTER which uses the
existing socket filter infrastructure for bpf program attach and load.
SOCKET_SG_FILTER eBPF program receives struct scatterlist as bpf context
contrast to SOCKET_FILTER which deals with struct skb. This is useful
for kernel entities that don't have skb to represent packet data but
want to run eBPF socket filter on packet data that is in form of struct
scatterlist e.g. IB/RDMA
Signed-off-by: Tushar Dave <tushar.n.dave@...cle.com>
Acked-by: Sowmini Varadhan <sowmini.varadhan@...cle.com>
---
include/linux/bpf_types.h | 1 +
include/linux/filter.h | 8 +++++
include/uapi/linux/bpf.h | 7 ++++
kernel/bpf/syscall.c | 1 +
kernel/bpf/verifier.c | 1 +
net/core/filter.c | 77 ++++++++++++++++++++++++++++++++++++++++--
samples/bpf/bpf_load.c | 11 ++++--
tools/bpf/bpftool/prog.c | 1 +
tools/include/uapi/linux/bpf.h | 7 ++++
tools/lib/bpf/libbpf.c | 3 ++
tools/lib/bpf/libbpf.h | 2 ++
11 files changed, 114 insertions(+), 5 deletions(-)
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index c5700c2..f8b4b56 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -16,6 +16,7 @@
BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops)
BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb)
BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_SG_FILTER, socksg_filter)
#endif
#ifdef CONFIG_BPF_EVENTS
BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 45fc0f5..71618b1 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -517,6 +517,14 @@ struct bpf_skb_data_end {
void *data_end;
};
+struct bpf_scatterlist {
+ struct scatterlist *sg;
+ void *start;
+ void *end;
+ int cur_sg;
+ int num_sg;
+};
+
struct sk_msg_buff {
void *data;
void *data_end;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 59b19b6..ef0a7b6 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -144,6 +144,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
BPF_PROG_TYPE_LWT_SEG6LOCAL,
BPF_PROG_TYPE_LIRC_MODE2,
+ BPF_PROG_TYPE_SOCKET_SG_FILTER,
};
enum bpf_attach_type {
@@ -2358,6 +2359,12 @@ enum sk_action {
SK_PASS,
};
+/* use accessible scatterlist */
+struct sg_filter_md {
+ void *data; /* sg_virt(sg) */
+ void *data_end; /* sg_virt(sg) + sg->length */
+};
+
/* user accessible metadata for SK_MSG packet hook, new fields must
* be added to the end of this structure
*/
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0fa2062..74193a8 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1300,6 +1300,7 @@ static int bpf_prog_load(union bpf_attr *attr)
if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
type != BPF_PROG_TYPE_CGROUP_SKB &&
+ type != BPF_PROG_TYPE_SOCKET_SG_FILTER &&
!capable(CAP_SYS_ADMIN))
return -EPERM;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d6403b5..a00d3eb 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1320,6 +1320,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
case BPF_PROG_TYPE_LWT_XMIT:
case BPF_PROG_TYPE_SK_SKB:
case BPF_PROG_TYPE_SK_MSG:
+ case BPF_PROG_TYPE_SOCKET_SG_FILTER:
if (meta)
return meta->pkt_access;
diff --git a/net/core/filter.c b/net/core/filter.c
index 3d9ba7e..8f67942 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1130,7 +1130,8 @@ static void bpf_release_orig_filter(struct bpf_prog *fp)
static void __bpf_prog_release(struct bpf_prog *prog)
{
- if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER) {
+ if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER ||
+ prog->type == BPF_PROG_TYPE_SOCKET_SG_FILTER) {
bpf_prog_put(prog);
} else {
bpf_release_orig_filter(prog);
@@ -1551,10 +1552,16 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
{
+ struct bpf_prog *prog;
+
if (sock_flag(sk, SOCK_FILTER_LOCKED))
return ERR_PTR(-EPERM);
- return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
+ prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
+ if (IS_ERR(prog))
+ prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_SG_FILTER);
+
+ return prog;
}
int sk_attach_bpf(u32 ufd, struct sock *sk)
@@ -4710,6 +4717,15 @@ bool bpf_helper_changes_pkt_data(void *func)
}
static const struct bpf_func_proto *
+socksg_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+}
+
+static const struct bpf_func_proto *
tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
@@ -5037,6 +5053,30 @@ static bool sk_filter_is_valid_access(int off, int size,
return bpf_skb_is_valid_access(off, size, type, prog, info);
}
+static bool socksg_filter_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ switch (off) {
+ case offsetof(struct sg_filter_md, data):
+ info->reg_type = PTR_TO_PACKET;
+ break;
+ case offsetof(struct sg_filter_md, data_end):
+ info->reg_type = PTR_TO_PACKET_END;
+ break;
+ }
+
+ if (off < 0 || off >= sizeof(struct sg_filter_md))
+ return false;
+ if (off % size != 0)
+ return false;
+ if (size != sizeof(__u64))
+ return false;
+
+ return true;
+}
+
static bool lwt_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
@@ -6516,6 +6556,30 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
}
+static u32 socksg_filter_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog,
+ u32 *target_size)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (si->off) {
+ case offsetof(struct sg_filter_md, data):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_scatterlist, start),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_scatterlist, start));
+ break;
+ case offsetof(struct sg_filter_md, data_end):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_scatterlist, end),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_scatterlist, end));
+ break;
+ }
+
+ return insn - insn_buf;
+}
+
static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
@@ -6654,6 +6718,15 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
.test_run = bpf_prog_test_run_skb,
};
+const struct bpf_verifier_ops socksg_filter_verifier_ops = {
+ .get_func_proto = socksg_filter_func_proto,
+ .is_valid_access = socksg_filter_is_valid_access,
+ .convert_ctx_access = socksg_filter_convert_ctx_access,
+};
+
+const struct bpf_prog_ops socksg_filter_prog_ops = {
+};
+
const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
.get_func_proto = tc_cls_act_func_proto,
.is_valid_access = tc_cls_act_is_valid_access,
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 89161c9..15c355e 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -69,6 +69,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
bool is_sockops = strncmp(event, "sockops", 7) == 0;
bool is_sk_skb = strncmp(event, "sk_skb", 6) == 0;
bool is_sk_msg = strncmp(event, "sk_msg", 6) == 0;
+ bool is_socksg = strncmp(event, "socksg", 6) == 0;
+
size_t insns_cnt = size / sizeof(struct bpf_insn);
enum bpf_prog_type prog_type;
char buf[256];
@@ -102,6 +104,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
prog_type = BPF_PROG_TYPE_SK_SKB;
} else if (is_sk_msg) {
prog_type = BPF_PROG_TYPE_SK_MSG;
+ } else if (is_socksg) {
+ prog_type = BPF_PROG_TYPE_SOCKET_SG_FILTER;
} else {
printf("Unknown event '%s'\n", event);
return -1;
@@ -119,8 +123,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
return 0;
- if (is_socket || is_sockops || is_sk_skb || is_sk_msg) {
- if (is_socket)
+ if (is_socket || is_sockops || is_sk_skb || is_sk_msg || is_socksg) {
+ if (is_socket || is_socksg)
event += 6;
else
event += 7;
@@ -624,7 +628,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
memcmp(shname, "cgroup/", 7) == 0 ||
memcmp(shname, "sockops", 7) == 0 ||
memcmp(shname, "sk_skb", 6) == 0 ||
- memcmp(shname, "sk_msg", 6) == 0) {
+ memcmp(shname, "sk_msg", 6) == 0 ||
+ memcmp(shname, "socksg", 6) == 0) {
ret = load_and_attach(shname, data->d_buf,
data->d_size);
if (ret != 0)
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index a4f4352..06b2fef 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -72,6 +72,7 @@
[BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
[BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
[BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2",
+ [BPF_PROG_TYPE_SOCKET_SG_FILTER] = "socket_sg_filter",
};
static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e0b0678..c87ae16 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -144,6 +144,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
BPF_PROG_TYPE_LWT_SEG6LOCAL,
BPF_PROG_TYPE_LIRC_MODE2,
+ BPF_PROG_TYPE_SOCKET_SG_FILTER,
};
enum bpf_attach_type {
@@ -2358,6 +2359,12 @@ enum sk_action {
SK_PASS,
};
+/* use accessible scatterlist */
+struct sg_filter_md {
+ void *data; /* sg_virt(sg) */
+ void *data_end; /* sg_virt(sg) + sg->length */
+};
+
/* user accessible metadata for SK_MSG packet hook, new fields must
* be added to the end of this structure
*/
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index a1e96b5..7628278 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1463,6 +1463,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
case BPF_PROG_TYPE_SK_MSG:
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
case BPF_PROG_TYPE_LIRC_MODE2:
+ case BPF_PROG_TYPE_SOCKET_SG_FILTER:
return false;
case BPF_PROG_TYPE_UNSPEC:
case BPF_PROG_TYPE_KPROBE:
@@ -1998,6 +1999,7 @@ static bool bpf_program__is_type(struct bpf_program *prog,
BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
+BPF_PROG_TYPE_FNS(socket_sg_filter, BPF_PROG_TYPE_SOCKET_SG_FILTER);
void bpf_program__set_expected_attach_type(struct bpf_program *prog,
enum bpf_attach_type type)
@@ -2048,6 +2050,7 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog,
BPF_SA_PROG_SEC("cgroup/sendmsg6", BPF_CGROUP_UDP6_SENDMSG),
BPF_S_PROG_SEC("cgroup/post_bind4", BPF_CGROUP_INET4_POST_BIND),
BPF_S_PROG_SEC("cgroup/post_bind6", BPF_CGROUP_INET6_POST_BIND),
+ BPF_PROG_SEC("socksg", BPF_PROG_TYPE_SOCKET_SG_FILTER),
};
#undef BPF_PROG_SEC
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 0997653..3be165b 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -195,6 +195,7 @@ int bpf_program__set_prep(struct bpf_program *prog, int nr_instance,
void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type);
void bpf_program__set_expected_attach_type(struct bpf_program *prog,
enum bpf_attach_type type);
+int bpf_program__set_socket_sg_filter(struct bpf_program *prog);
bool bpf_program__is_socket_filter(struct bpf_program *prog);
bool bpf_program__is_tracepoint(struct bpf_program *prog);
@@ -204,6 +205,7 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog,
bool bpf_program__is_sched_act(struct bpf_program *prog);
bool bpf_program__is_xdp(struct bpf_program *prog);
bool bpf_program__is_perf_event(struct bpf_program *prog);
+bool bpf_program__is_socket_sg_filter(struct bpf_program *prog);
/*
* No need for __attribute__((packed)), all members of 'bpf_map_def'
--
1.8.3.1
Powered by blists - more mailing lists