[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1529431217-5264-3-git-send-email-tushar.n.dave@oracle.com>
Date: Tue, 19 Jun 2018 20:00:15 +0200
From: Tushar Dave <tushar.n.dave@...cle.com>
To: ast@...nel.org, daniel@...earbox.net, davem@...emloft.net,
jakub.kicinski@...ronome.com, quentin.monnet@...ronome.com,
jiong.wang@...ronome.com, guro@...com, sandipan@...ux.vnet.ibm.com,
john.fastabend@...il.com, kafai@...com, rdna@...com, brakmo@...com,
netdev@...r.kernel.org, acme@...hat.com,
sowmini.varadhan@...cle.com
Subject: [RFC v2 PATCH 2/4] ebpf: Add sg_filter_run and sg helper
When sg_filter_run() is invoked it runs the attached eBPF
SOCKET_SG_FILTER program which deals with struct scatterlist.
In addition, this patch also adds bpf_sg_next helper function that
allows users to retrieve the next sg element from sg list.
Signed-off-by: Tushar Dave <tushar.n.dave@...cle.com>
Acked-by: Sowmini Varadhan <sowmini.varadhan@...cle.com>
---
include/linux/filter.h | 2 +
include/uapi/linux/bpf.h | 10 ++++-
net/core/filter.c | 72 +++++++++++++++++++++++++++++++
tools/include/uapi/linux/bpf.h | 10 ++++-
tools/testing/selftests/bpf/bpf_helpers.h | 3 ++
5 files changed, 95 insertions(+), 2 deletions(-)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 71618b1..d176402 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1072,4 +1072,6 @@ struct bpf_sock_ops_kern {
*/
};
+int sg_filter_run(struct sock *sk, struct scatterlist *sg);
+
#endif /* __LINUX_FILTER_H__ */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ef0a7b6..036432b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2076,6 +2076,13 @@ struct bpf_stack_build_id {
* Return
* A 64-bit integer containing the current cgroup id based
* on the cgroup within which the current task is running.
+ *
+ * int bpf_sg_next(struct bpf_scatterlist *sg)
+ * Description
+ * This helper allows user to retrieve next sg element from
+ * sg list.
+ * Return
+ * Returns 0 on success, or a negative error in case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2158,7 +2165,8 @@ struct bpf_stack_build_id {
FN(rc_repeat), \
FN(rc_keydown), \
FN(skb_cgroup_id), \
- FN(get_current_cgroup_id),
+ FN(get_current_cgroup_id), \
+ FN(sg_next),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c
index 8f67942..702ff5b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -121,6 +121,53 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
}
EXPORT_SYMBOL(sk_filter_trim_cap);
+int sg_filter_run(struct sock *sk, struct scatterlist *sg)
+{
+ struct sk_filter *filter;
+ int err;
+
+ rcu_read_lock();
+ filter = rcu_dereference(sk->sk_filter);
+ if (filter) {
+ struct bpf_scatterlist bpfsg;
+ int num_sg;
+
+ if (!sg) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ num_sg = sg_nents(sg);
+ if (num_sg <= 0) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* We store a reference to the sg list so it can later used by
+ * eBPF helpers to retrieve the next sg element.
+ */
+ bpfsg.num_sg = num_sg;
+ bpfsg.cur_sg = 0;
+ bpfsg.sg = sg;
+
+ /* For the first sg element, we store the pkt access pointers
+ * into start and end so eBPF program can have pkt access using
+ * data and data_end. The pkt access for subsequent element of
+ * sg list is possible when eBPF program invokes bpf_sg_next
+ * which takes care of setting start and end to the correct sg
+ * element.
+ */
+ bpfsg.start = sg_virt(sg);
+ bpfsg.end = bpfsg.start + sg->length;
+ BPF_PROG_RUN(filter->prog, &bpfsg);
+ }
+out:
+ rcu_read_unlock();
+
+ return err;
+}
+EXPORT_SYMBOL(sg_filter_run);
+
BPF_CALL_1(bpf_skb_get_pay_offset, struct sk_buff *, skb)
{
return skb_get_poff(skb);
@@ -3753,6 +3800,29 @@ static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
.arg1_type = ARG_PTR_TO_CTX,
};
+BPF_CALL_1(bpf_sg_next, struct bpf_scatterlist *, bpfsg)
+{
+ struct scatterlist *sg = bpfsg->sg;
+ int cur_sg = bpfsg->cur_sg;
+
+ cur_sg++;
+ if (cur_sg >= bpfsg->num_sg)
+ return -ENODATA;
+
+ bpfsg->cur_sg = cur_sg;
+ bpfsg->start = sg_virt(&sg[cur_sg]);
+ bpfsg->end = bpfsg->start + sg[cur_sg].length;
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_sg_next_proto = {
+ .func = bpf_sg_next,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
int, level, int, optname, char *, optval, int, optlen)
{
@@ -4720,6 +4790,8 @@ bool bpf_helper_changes_pkt_data(void *func)
socksg_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
+ case BPF_FUNC_sg_next:
+ return &bpf_sg_next_proto;
default:
return bpf_base_func_proto(func_id);
}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c87ae16..a298498 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2076,6 +2076,13 @@ struct bpf_stack_build_id {
* Return
* A 64-bit integer containing the current cgroup id based
* on the cgroup within which the current task is running.
+ *
+ * int bpf_sg_next(struct bpf_scatterlist *sg)
+ * Description
+ * This helper allows user to retrieve next sg element from
+ * sg list.
+ * Return
+ * Returns 0 on success, or a negative error in case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2158,7 +2165,8 @@ struct bpf_stack_build_id {
FN(rc_repeat), \
FN(rc_keydown), \
FN(skb_cgroup_id), \
- FN(get_current_cgroup_id),
+ FN(get_current_cgroup_id), \
+ FN(sg_next),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index f2f28b6..1997ba2 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -133,6 +133,9 @@ static int (*bpf_rc_keydown)(void *ctx, unsigned int protocol,
(void *) BPF_FUNC_rc_keydown;
static unsigned long long (*bpf_get_current_cgroup_id)(void) =
(void *) BPF_FUNC_get_current_cgroup_id;
+static unsigned long long (*bpf_sg_next)(void *ctx) =
+ (void *) BPF_FUNC_sg_next;
+
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
--
1.8.3.1
Powered by blists - more mailing lists