linux-kernel - [PATCH bpf-next] net: bpf: support direct packet access in tracing program

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220427070644.319661-1-imagedong@tencent.com>
Date:   Wed, 27 Apr 2022 15:06:44 +0800
From:   menglong8.dong@...il.com
To:     ast@...nel.org
Cc:     daniel@...earbox.net, andrii@...nel.org, kafai@...com,
        songliubraving@...com, yhs@...com, john.fastabend@...il.com,
        kpsingh@...nel.org, rostedt@...dmis.org, mingo@...hat.com,
        netdev@...r.kernel.org, bpf@...r.kernel.org,
        linux-kernel@...r.kernel.org,
        Menglong Dong <imagedong@...cent.com>,
        Jiang Biao <benbjiang@...cent.com>,
        Hao Peng <flyingpeng@...cent.com>
Subject: [PATCH bpf-next] net: bpf: support direct packet access in tracing program

From: Menglong Dong <imagedong@...cent.com>

For now, eBPF program of type TRACING is able to access the arguments
of the function or raw_tracepoint directly, which makes data access
easier and efficient. And we can also output the raw data in skb to
user space in tracing program by bpf_skb_output() helper.

However, we still can't access the packet data in 'struct sk_buff'
directly and have to use the helper bpf_probe_read_kernel() to analyse
packet data.

Network tools, which based on eBPF TRACING, often do packet analyse
works in tracing program for filtering, statistics, etc. For example,
we want to trace abnormal skb free through 'kfree_skb' tracepoint with
special ip address or tcp port.

In this patch, 2 helpers are introduced: bpf_skb_get_header() and
bpf_skb_get_end(). The pointer returned by bpf_skb_get_header() has
the same effect with the 'data' in 'struct __sk_buff', and
bpf_skb_get_end() has the same effect with the 'data_end'.

Therefore, we can now access packet data in tracing program in this
way:

  SEC("fentry/icmp_rcv")
  int BPF_PROG(tracing_open, struct sk_buff* skb)
  {
  	void *data, *data_end;
  	struct ethhdr *eth;

  	data = bpf_skb_get_header(skb, BPF_SKB_HEADER_MAC);
  	data_end = bpf_skb_get_end(skb);

  	if (!data || !data_end)
  		return 0;

  	if (data + sizeof(*eth) > data_end)
  		return 0;

  	eth = data;
  	bpf_printk("proto:%d\n", bpf_ntohs(eth->h_proto));

  	return 0;
  }

With any positive reply, I'll complete the selftests programs.

Reviewed-by: Jiang Biao <benbjiang@...cent.com>
Reviewed-by: Hao Peng <flyingpeng@...cent.com>
Signed-off-by: Menglong Dong <imagedong@...cent.com>
---
 include/linux/bpf.h      |  4 +++
 include/uapi/linux/bpf.h | 29 ++++++++++++++++++++
 kernel/bpf/verifier.c    |  6 +++++
 kernel/trace/bpf_trace.c | 58 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 97 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index bdb5298735ce..69dff736331a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -419,6 +419,8 @@ enum bpf_return_type {
 	RET_PTR_TO_ALLOC_MEM,		/* returns a pointer to dynamically allocated memory */
 	RET_PTR_TO_MEM_OR_BTF_ID,	/* returns a pointer to a valid memory or a btf_id */
 	RET_PTR_TO_BTF_ID,		/* returns a pointer to a btf_id */
+	RET_PTR_TO_PACKET,		/* returns a pointer to packet */
+	RET_PTR_TO_PACKET_END,		/* returns a pointer to skb->data + headlen */
 	__BPF_RET_TYPE_MAX,
 
 	/* Extended ret_types. */
@@ -428,6 +430,8 @@ enum bpf_return_type {
 	RET_PTR_TO_SOCK_COMMON_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON,
 	RET_PTR_TO_ALLOC_MEM_OR_NULL	= PTR_MAYBE_NULL | MEM_ALLOC | RET_PTR_TO_ALLOC_MEM,
 	RET_PTR_TO_BTF_ID_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID,
+	RET_PTR_TO_PACKET_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_PACKET,
+	RET_PTR_TO_PACKET_END_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_PACKET_END,
 
 	/* This must be the last entry. Its purpose is to ensure the enum is
 	 * wide enough to hold the higher bits reserved for bpf_type_flag.
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d14b10b85e51..841f6e7216f4 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5143,6 +5143,27 @@ union bpf_attr {
  *		The **hash_algo** is returned on success,
  *		**-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if
  *		invalid arguments are passed.
+ *
+ * void *bpf_skb_get_header(struct sk_buff *skb, u32 flags)
+ *	Description
+ *		Get packet header from skb in tracing program, which can
+ *		be access (read) directly. According to the *flags*,
+ *		different packet header is returned:
+ *
+ *			**BPF_SKB_HEADER_MAC**: get mac (L2) header
+ *			**BPF_SKB_HEADER_NETWORK**: get network (L3) header
+ *			**BPF_SKB_HEADER_TRANSPORT**:
+ *				get transport (L4) header
+ *	Return
+ *		The pointer to packet header on success, NULL on fail.
+ *
+ * void *bpf_skb_get_end(struct sk_buff *skb)
+ *	Description
+ *		Get packet head end pointer from skb in tracing program,
+ *		which is equal to *data_end* in *struct __sk_buff*.
+ *	Return
+ *		The pointer to packet head end on success, and NULL on
+ *		failing.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5339,6 +5360,8 @@ union bpf_attr {
 	FN(copy_from_user_task),	\
 	FN(skb_set_tstamp),		\
 	FN(ima_file_hash),		\
+	FN(skb_get_header),		\
+	FN(skb_get_end),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -5537,6 +5560,12 @@ enum {
 	 */
 };
 
+enum {
+	BPF_SKB_HEADER_MAC,
+	BPF_SKB_HEADER_NETWORK,
+	BPF_SKB_HEADER_TRANSPORT,
+};
+
 /* user accessible mirror of in-kernel sk_buff.
  * new fields can only be added to the end of this structure
  */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9c1a02b82ecd..caf4e09cc114 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6877,6 +6877,12 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 		 */
 		regs[BPF_REG_0].btf = btf_vmlinux;
 		regs[BPF_REG_0].btf_id = ret_btf_id;
+	} else if (base_type(ret_type) == RET_PTR_TO_PACKET) {
+		mark_reg_known_zero(env, regs, BPF_REG_0);
+		regs[BPF_REG_0].type = PTR_TO_PACKET | ret_flag;
+	} else if (base_type(ret_type) == RET_PTR_TO_PACKET_END) {
+		mark_reg_known_zero(env, regs, BPF_REG_0);
+		regs[BPF_REG_0].type = PTR_TO_PACKET_END | ret_flag;
 	} else {
 		verbose(env, "unknown return type %u of func %s#%d\n",
 			base_type(ret_type), func_id_name(func_id), func_id);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index b26f3da943de..6f2cd30aac07 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -810,6 +810,60 @@ static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
 	.arg2_type      = ARG_ANYTHING,
 };
 
+BPF_CALL_2(bpf_skb_get_header, struct sk_buff *, skb, u32, flags)
+{
+	void *header = NULL;
+
+	if (!skb)
+		return (unsigned long)NULL;
+
+	switch (flags) {
+	case BPF_SKB_HEADER_MAC:
+		if (skb_mac_header_was_set(skb) && skb->mac_header)
+			header = skb_mac_header(skb);
+		break;
+	case BPF_SKB_HEADER_TRANSPORT:
+		if (skb_transport_header_was_set(skb) &&
+		    skb->transport_header)
+			header = skb_transport_header(skb);
+		break;
+	case BPF_SKB_HEADER_NETWORK:
+		if (skb->network_header)
+			header = skb_network_header(skb);
+		break;
+	default:
+		break;
+	}
+
+	return (unsigned long)header;
+}
+
+BTF_ID_LIST_SINGLE(bpf_get_skb_ids, struct, sk_buff);
+static const struct bpf_func_proto bpf_skb_get_header_proto = {
+	.func           = bpf_skb_get_header,
+	.gpl_only       = false,
+	.ret_type       = RET_PTR_TO_PACKET_OR_NULL,
+	.arg1_type      = ARG_PTR_TO_BTF_ID,
+	.arg1_btf_id	= &bpf_get_skb_ids[0],
+	.arg2_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_1(bpf_skb_get_end, struct sk_buff *, skb)
+{
+	if (!skb)
+		return (unsigned long)NULL;
+
+	return (unsigned long)skb->data + skb_headlen(skb);
+}
+
+static const struct bpf_func_proto bpf_skb_get_end_proto = {
+	.func           = bpf_skb_get_end,
+	.gpl_only       = false,
+	.ret_type       = RET_PTR_TO_PACKET_END_OR_NULL,
+	.arg1_type      = ARG_PTR_TO_BTF_ID,
+	.arg1_btf_id	= &bpf_get_skb_ids[0],
+};
+
 struct send_signal_irq_work {
 	struct irq_work irq_work;
 	struct task_struct *task;
@@ -1282,6 +1336,10 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_get_branch_snapshot_proto;
 	case BPF_FUNC_find_vma:
 		return &bpf_find_vma_proto;
+	case BPF_FUNC_skb_get_header:
+		return &bpf_skb_get_header_proto;
+	case BPF_FUNC_skb_get_end:
+		return &bpf_skb_get_end_proto;
 	case BPF_FUNC_trace_vprintk:
 		return bpf_get_trace_vprintk_proto();
 	default:
-- 
2.36.0