netdev - [RFC bpf-next 1/7] bpf: add bpf_pcap() helper to simplify packet capture

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1567892444-16344-2-git-send-email-alan.maguire@oracle.com>
Date:   Sat,  7 Sep 2019 22:40:38 +0100
From:   Alan Maguire <alan.maguire@...cle.com>
To:     ast@...nel.org, daniel@...earbox.net, kafai@...com,
        songliubraving@...com, yhs@...com, davem@...emloft.net,
        jakub.kicinski@...ronome.com, hawk@...nel.org,
        john.fastabend@...il.com, rostedt@...dmis.org, mingo@...hat.com,
        quentin.monnet@...ronome.com, rdna@...com, joe@...d.net.nz,
        acme@...hat.com, jolsa@...nel.org, alexey.budankov@...ux.intel.com,
        gregkh@...uxfoundation.org, namhyung@...nel.org, sdf@...gle.com,
        f.fainelli@...il.com, shuah@...nel.org, peter@...ensteyn.nl,
        ivan@...udflare.com, andriin@...com,
        bhole_prashant_q7@....ntt.co.jp, david.calavera@...il.com,
        danieltimlee@...il.com, ctakshak@...com, netdev@...r.kernel.org,
        bpf@...r.kernel.org, linux-kselftest@...r.kernel.org
Cc:     Alan Maguire <alan.maguire@...cle.com>
Subject: [RFC bpf-next 1/7] bpf: add bpf_pcap() helper to simplify packet capture

bpf_pcap() simplifies packet capture for skb and XDP
BPF programs by creating a BPF perf event containing information
relevant for packet capture (protocol, actual/captured packet
size, time of capture, etc) along with the packet payload itself.
All of this is stored in a "struct bpf_pcap_hdr".

This header information can then be retrieved from the perf
event map and used by packet capture frameworks such as libpcap
to carry out packet capture.

skb and XDP programs currently deal in Ethernet-based traffic
exclusively, so should specify BPF_PCAP_TYPE_ETH or
BPF_PCAP_TYPE_UNSET.  The protocol parameter will be used
in a later commit.

Note that libpcap assumes times are relative to the epoch while
we record nanoseconds since boot; as a result any times need
to be normalized with respect to the boot time for libpcap
storage; sysinfo(2) can be used to retrieve boot time to normalize
values appropriately.

Example usage for a tc-bpf program:

struct bpf_map_def SEC("maps") pcap_map = {
	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
	.key_size = sizeof(int),
	.value_size = sizeof(int),
	.max_entries = 1024,
};

SEC("cap")
int cap(struct __sk_buff *skb)
{
	bpf_pcap(skb, 1514, &pcap_map, BPF_PCAP_TYPE_ETH, 0);

	return TC_ACT_OK;
}

Signed-off-by: Alan Maguire <alan.maguire@...cle.com>
---
 include/linux/bpf.h      | 20 +++++++++++++
 include/uapi/linux/bpf.h | 75 +++++++++++++++++++++++++++++++++++++++++++++++-
 kernel/bpf/verifier.c    |  4 ++-
 net/core/filter.c        | 67 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 164 insertions(+), 2 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5b9d223..033c9cf 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1145,4 +1145,24 @@ static inline u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
 }
 #endif /* CONFIG_INET */
 
+
+static inline int bpf_pcap_prepare(int protocol, u32 cap_len, u32 tot_len,
+				   u64 flags, struct bpf_pcap_hdr *pcap)
+{
+	if (protocol < 0 || pcap == NULL)
+		return -EINVAL;
+
+	pcap->magic = BPF_PCAP_MAGIC;
+	pcap->protocol = protocol;
+	pcap->flags = flags;
+
+	if (cap_len == 0 || tot_len < cap_len)
+		cap_len = tot_len;
+	pcap->cap_len = cap_len;
+	pcap->tot_len = tot_len;
+	pcap->ktime_ns = ktime_get_mono_fast_ns();
+
+	return 0;
+}
+
 #endif /* _LINUX_BPF_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 77c6be9..a27e58e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2750,6 +2750,39 @@ struct bpf_stack_build_id {
  *		**-EOPNOTSUPP** kernel configuration does not enable SYN cookies
  *
  *		**-EPROTONOSUPPORT** IP packet version is not 4 or 6
+ *
+ * int bpf_pcap(void *data, u32 size, struct bpf_map *map, int protocol,
+ *		u64 flags)
+ *	Description
+ *		Write packet data from *data* into a special BPF perf event
+ *              held by *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This
+ *		perf event has the same attributes as perf events generated
+ *		by bpf_perf_event_output.  For skb and xdp programs, *data*
+ *		is the relevant context.
+ *
+ *		Metadata for this event is a **struct bpf_pcap_hdr**; this
+ *		contains the capture length, actual packet length and
+ *		the starting protocol.
+ *
+ *		The max number of bytes of context to store is specified via
+ *		*size*.
+ *
+ *		The flags value can be used to specify an id value of up
+ *		to 48 bits; the id can be used to correlate captured packets
+ *		with other trace data, since the passed-in flags value is stored
+ *		stored in the **struct bpf_pcap_hdr** in the **flags** field.
+ *
+ *		The *protocol* value specifies the protocol type of the start
+ *		of the packet so that packet capture can carry out
+ *		interpretation.  See **pcap-linktype** (7) for details on
+ *		the supported values.
+ *
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *		-ENOENT will be returned if the associated perf event
+ *		map entry is empty, or the skb is zero-length.
+ *		-EINVAL will be returned if the flags value is invalid.
+ *
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2862,7 +2895,8 @@ struct bpf_stack_build_id {
 	FN(sk_storage_get),		\
 	FN(sk_storage_delete),		\
 	FN(send_signal),		\
-	FN(tcp_gen_syncookie),
+	FN(tcp_gen_syncookie),		\
+	FN(pcap),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -2941,6 +2975,9 @@ enum bpf_func_id {
 /* BPF_FUNC_sk_storage_get flags */
 #define BPF_SK_STORAGE_GET_F_CREATE	(1ULL << 0)
 
+/* BPF_FUNC_pcap flags */
+#define	BPF_F_PCAP_ID_MASK		0xffffffffffff
+
 /* Mode for BPF_FUNC_skb_adjust_room helper. */
 enum bpf_adj_room_mode {
 	BPF_ADJ_ROOM_NET,
@@ -3613,4 +3650,40 @@ struct bpf_sockopt {
 	__s32	retval;
 };
 
+/* bpf_pcap_hdr contains information related to a particular packet capture
+ * flow.  It specifies
+ *
+ * - a magic number BPF_PCAP_MAGIC which identifies the perf event as
+ *   a pcap-related event.
+ * - a starting protocol is the protocol associated with the header
+ * - a flags value, copied from the flags value passed into bpf_pcap().
+ *   IDs can be used to correlate packet capture data and other tracing data.
+ *
+ * bpf_pcap_hdr also contains the information relating to the to-be-captured
+ * packet, and closely corresponds to the struct pcap_pkthdr used by
+ * pcap_dump (3PCAP).  The bpf_pcap helper sets ktime_ns (nanoseconds since
+ * boot) to the ktime_ns value; to get sensible pcap times this value should
+ * be converted to a struct timeval time since epoch in the struct pcap_pkthdr.
+ *
+ * When bpf_pcap() is used, a "struct bpf_pcap_hdr" is stored as we
+ * need both information about the particular packet and the protocol
+ * we are capturing.
+ */
+
+#define BPF_PCAP_MAGIC		0xb7fca7
+
+struct bpf_pcap_hdr {
+	__u32			magic;
+	int			protocol;
+	__u64			flags;
+	__u64			ktime_ns;
+	__u32			tot_len;
+	__u32			cap_len;
+	__u8			data[0];
+};
+
+#define BPF_PCAP_TYPE_UNSET	-1
+#define BPF_PCAP_TYPE_ETH	1
+#define	BPF_PCAP_TYPE_IP	12
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 3fb5075..a33ed24 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3440,7 +3440,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
 		if (func_id != BPF_FUNC_perf_event_read &&
 		    func_id != BPF_FUNC_perf_event_output &&
-		    func_id != BPF_FUNC_perf_event_read_value)
+		    func_id != BPF_FUNC_perf_event_read_value &&
+		    func_id != BPF_FUNC_pcap)
 			goto error;
 		break;
 	case BPF_MAP_TYPE_STACK_TRACE:
@@ -3527,6 +3528,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 	case BPF_FUNC_perf_event_read:
 	case BPF_FUNC_perf_event_output:
 	case BPF_FUNC_perf_event_read_value:
+	case BPF_FUNC_pcap:
 		if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
 			goto error;
 		break;
diff --git a/net/core/filter.c b/net/core/filter.c
index ed65636..e0e23ee 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4158,6 +4158,35 @@ static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
 	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
 };
 
+BPF_CALL_5(bpf_xdp_pcap, struct xdp_buff *, xdp, u32, size,
+	   struct bpf_map *, map, int, protocol, u64, flags)
+{
+	unsigned long len = (unsigned long)(xdp->data_end - xdp->data);
+	struct bpf_pcap_hdr pcap;
+	int ret;
+
+	if (unlikely(flags & ~BPF_F_PCAP_ID_MASK))
+		return -EINVAL;
+
+	ret = bpf_pcap_prepare(protocol, size, len, flags, &pcap);
+	if (ret)
+		return ret;
+
+	return bpf_event_output(map, BPF_F_CURRENT_CPU, &pcap, sizeof(pcap),
+				xdp->data, pcap.cap_len, bpf_xdp_copy);
+}
+
+static const struct bpf_func_proto bpf_xdp_pcap_proto = {
+	.func		= bpf_xdp_pcap,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_CONST_MAP_PTR,
+	.arg4_type	= ARG_ANYTHING,
+	.arg5_type	= ARG_ANYTHING,
+};
+
 BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
 {
 	return skb->sk ? sock_gen_cookie(skb->sk) : 0;
@@ -5926,6 +5955,34 @@ u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
 
 #endif /* CONFIG_INET */
 
+BPF_CALL_5(bpf_skb_pcap, struct sk_buff *, skb, u32, size,
+	   struct bpf_map *, map, int, protocol, u64, flags)
+{
+	struct bpf_pcap_hdr pcap;
+	int ret;
+
+	if (unlikely(flags & ~BPF_F_PCAP_ID_MASK))
+		return -EINVAL;
+
+	ret = bpf_pcap_prepare(protocol, size, skb->len, flags, &pcap);
+	if (ret)
+		return ret;
+
+	return bpf_event_output(map, BPF_F_CURRENT_CPU, &pcap, sizeof(pcap),
+				skb, pcap.cap_len, bpf_skb_copy);
+}
+
+static const struct bpf_func_proto bpf_skb_pcap_proto = {
+	.func		= bpf_skb_pcap,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_CONST_MAP_PTR,
+	.arg4_type      = ARG_ANYTHING,
+	.arg5_type	= ARG_ANYTHING,
+};
+
 bool bpf_helper_changes_pkt_data(void *func)
 {
 	if (func == bpf_skb_vlan_push ||
@@ -6075,6 +6132,8 @@ bool bpf_helper_changes_pkt_data(void *func)
 		return &bpf_get_socket_uid_proto;
 	case BPF_FUNC_perf_event_output:
 		return &bpf_skb_event_output_proto;
+	case BPF_FUNC_pcap:
+		return &bpf_skb_pcap_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -6216,6 +6275,8 @@ bool bpf_helper_changes_pkt_data(void *func)
 	case BPF_FUNC_tcp_gen_syncookie:
 		return &bpf_tcp_gen_syncookie_proto;
 #endif
+	case BPF_FUNC_pcap:
+		return &bpf_skb_pcap_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -6256,6 +6317,8 @@ bool bpf_helper_changes_pkt_data(void *func)
 		return &bpf_tcp_check_syncookie_proto;
 	case BPF_FUNC_tcp_gen_syncookie:
 		return &bpf_tcp_gen_syncookie_proto;
+	case BPF_FUNC_pcap:
+		return &bpf_xdp_pcap_proto;
 #endif
 	default:
 		return bpf_base_func_proto(func_id);
@@ -6361,6 +6424,8 @@ bool bpf_helper_changes_pkt_data(void *func)
 	case BPF_FUNC_skc_lookup_tcp:
 		return &bpf_skc_lookup_tcp_proto;
 #endif
+	case BPF_FUNC_pcap:
+		return &bpf_skb_pcap_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -6399,6 +6464,8 @@ bool bpf_helper_changes_pkt_data(void *func)
 		return &bpf_get_smp_processor_id_proto;
 	case BPF_FUNC_skb_under_cgroup:
 		return &bpf_skb_under_cgroup_proto;
+	case BPF_FUNC_pcap:
+		return &bpf_skb_pcap_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
-- 
1.8.3.1