netdev - [RFC net-next PATCH 4/5] net: new XDP feature for reading HW rxhash from drivers

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <149512210827.14733.13997041998775151648.stgit@firesoul>
Date:   Thu, 18 May 2017 17:41:48 +0200
From:   Jesper Dangaard Brouer <brouer@...hat.com>
To:     Daniel Borkmann <borkmann@...earbox.net>,
        Alexei Starovoitov <alexei.starovoitov@...il.com>
Cc:     John Fastabend <john.r.fastabend@...el.com>,
        netdev@...r.kernel.org, Jesper Dangaard Brouer <brouer@...hat.com>
Subject: [RFC net-next PATCH 4/5] net: new XDP feature for reading HW rxhash
 from drivers

Introducing a new XDP feature and associated bpf helper bpf_xdp_rxhash.

The rxhash and type allow filtering on packets without touching
packet memory.  The performance difference on my system with a
100 Gbit/s mlx5 NIC is 12Mpps to 19Mpps.

TODO: desc RXHASH and associated type, and how XDP choose to map
and export these to bpf_prog's.

TODO: desc how this interacts with XDP driver features system.
---
 include/linux/filter.h          |   31 ++++++++++++++++-
 include/linux/netdev_features.h |    4 ++
 include/uapi/linux/bpf.h        |   56 +++++++++++++++++++++++++++++-
 kernel/bpf/verifier.c           |    3 ++
 net/core/dev.c                  |   16 ++++++++-
 net/core/filter.c               |   73 +++++++++++++++++++++++++++++++++++++++
 samples/bpf/bpf_helpers.h       |    2 +
 tools/include/uapi/linux/bpf.h  |   10 +++++
 8 files changed, 190 insertions(+), 5 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 9a7786db14fa..33a254ccd47d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -413,7 +413,8 @@ struct bpf_prog {
 				locked:1,	/* Program image locked? */
 				gpl_compatible:1, /* Is filter GPL compatible? */
 				cb_access:1,	/* Is control block accessed? */
-				dst_needed:1;	/* Do we need dst entry? */
+				dst_needed:1,	/* Do we need dst entry? */
+				xdp_rxhash_needed:1;/* Req XDP RXHASH support */
 	kmemcheck_bitfield_end(meta);
 	enum bpf_prog_type	type;		/* Type of BPF program */
 	u32			len;		/* Number of filter blocks */
@@ -444,12 +445,40 @@ struct bpf_skb_data_end {
 	void *data_end;
 };
 
+/* Kernel internal xdp_buff->flags */
+#define XDP_CTX_F_RXHASH_TYPE_MASK	XDP_HASH_TYPE_MASK
+#define XDP_CTX_F_RXHASH_TYPE_BITS	XDP_HASH_TYPE_BITS
+#define XDP_CTX_F_RXHASH_SW		(1ULL <<  XDP_CTX_F_RXHASH_TYPE_BITS)
+#define XDP_CTX_F_RXHASH_HW		(1ULL << (XDP_CTX_F_RXHASH_TYPE_BITS+1))
+
 struct xdp_buff {
 	void *data;
 	void *data_end;
 	void *data_hard_start;
+	u64 flags;
+	u32 rxhash;
 };
 
+/* helper functions for driver setting rxhash */
+static inline void
+xdp_record_hash(struct xdp_buff *xdp, u32 hash, u32 type)
+{
+	xdp->flags |= XDP_CTX_F_RXHASH_HW;
+	xdp->flags |= type & XDP_CTX_F_RXHASH_TYPE_MASK;
+	xdp->rxhash = hash;
+}
+
+static inline void
+xdp_set_skb_hash(struct xdp_buff *xdp, struct sk_buff *skb)
+{
+	if (likely(xdp->flags & (XDP_CTX_F_RXHASH_HW|XDP_CTX_F_RXHASH_SW))) {
+		bool is_sw = !!(xdp->flags | XDP_CTX_F_RXHASH_SW);
+		bool is_l4 = !!(xdp->flags & XDP_HASH_TYPE_L4_MASK);
+
+		__skb_set_hash(skb, xdp->rxhash, is_sw, is_l4);
+	}
+}
+
 /* compute the linear packet data range [data, data_end) which
  * will be accessed by cls_bpf, act_bpf and lwt programs
  */
diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index ff81ee231410..4b50e8c606c5 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -219,11 +219,13 @@ enum {
 /* XDP driver flags */
 enum {
 	XDP_DRV_F_ENABLED_BIT,
+	XDP_DRV_F_RXHASH_BIT,
 };
 
 #define __XDP_DRV_F_BIT(bit)	((netdev_features_t)1 << (bit))
 #define __XDP_DRV_F(name)	__XDP_DRV_F_BIT(XDP_DRV_F_##name##_BIT)
 #define XDP_DRV_F_ENABLED	__XDP_DRV_F(ENABLED)
+#define XDP_DRV_F_RXHASH	__XDP_DRV_F(RXHASH)
 
 /* XDP driver MUST support these features, else kernel MUST reject
  * bpf_prog to guarantee safe access to data structures
@@ -233,7 +235,7 @@ enum {
 /* Some XDP features are under development. Based on bpf_prog loading
  * detect if kernel feature can be activated.
  */
-#define XDP_DRV_FEATURES_DEVEL		0
+#define XDP_DRV_FEATURES_DEVEL		XDP_DRV_F_RXHASH
 
 /* Some XDP features are optional, like action return code, as they
  * are handled safely runtime.
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 945a1f5f63c5..1d9d3a46217d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -482,6 +482,9 @@ union bpf_attr {
  *     Get the owner uid of the socket stored inside sk_buff.
  *     @skb: pointer to skb
  *     Return: uid of the socket owner on success or overflowuid if failed.
+ *
+ * u64 bpf_xdp_rxhash(xdp_md, new_hash, type, flags)
+ *	TODO: MISSING DESC
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -531,7 +534,8 @@ union bpf_attr {
 	FN(xdp_adjust_head),		\
 	FN(probe_read_str),		\
 	FN(get_socket_cookie),		\
-	FN(get_socket_uid),
+	FN(get_socket_uid),		\
+	FN(xdp_rxhash),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -581,6 +585,10 @@ enum bpf_func_id {
 /* BPF_FUNC_perf_event_output for sk_buff input context. */
 #define BPF_F_CTXLEN_MASK		(0xfffffULL << 32)
 
+/* BPF_FUNC_xdp_rxhash flags */
+#define BPF_F_RXHASH_SET		0ULL
+#define BPF_F_RXHASH_GET		(1ULL << 0)
+
 /* user accessible mirror of in-kernel sk_buff.
  * new fields can only be added to the end of this structure
  */
@@ -660,6 +668,52 @@ enum xdp_action {
 struct xdp_md {
 	__u32 data;
 	__u32 data_end;
+	__u32 rxhash;
+	/* (FIXME delete comment)
+	 * Discussion: If choosing to support direct read, then I
+	 * (believe) having a separate 'rxhash_type' is easier and
+	 * faster to implement. (Else I have to do BPF instruction
+	 * hacks to move the type into upper bits of 'rxhash', which I
+	 * couldn't figureout how to do ;-))
+	*/
+	__u32 rxhash_type;
 };
 
+/* XDP rxhash have an associated type, which is related to the RSS
+ * (Receive Side Scaling) standard, but NIC HW have different mapping
+ * and support. Thus, create mapping that is interesting for XDP.  XDP
+ * would primarly want insight into L3 and L4 protocol info.
+ *
+ * TODO: Likely need to get extended with "L3_IPV6_EX" due RSS standard
+ *
+ * The HASH_TYPE will be returned from bpf helper as the top 32-bit of
+ * the 64-bit rxhash (internally type stored in xdp_buff->flags).
+ */
+#define XDP_HASH(x)		((x) & ((1ULL << 32)-1))
+#define XDP_HASH_TYPE(x)	((x) >> 32)
+
+#define XDP_HASH_TYPE_L3_SHIFT	0
+#define XDP_HASH_TYPE_L3_BITS	3
+#define XDP_HASH_TYPE_L3_MASK	((1ULL << XDP_HASH_TYPE_L3_BITS)-1)
+#define XDP_HASH_TYPE_L3(x)	((x) & XDP_HASH_TYPE_L3_MASK)
+enum {
+	XDP_HASH_TYPE_L3_IPV4 = 1,
+	XDP_HASH_TYPE_L3_IPV6,
+};
+
+#define XDP_HASH_TYPE_L4_SHIFT	XDP_HASH_TYPE_L3_BITS
+#define XDP_HASH_TYPE_L4_BITS	5
+#define XDP_HASH_TYPE_L4_MASK						\
+	(((1ULL << XDP_HASH_TYPE_L4_BITS)-1) << XDP_HASH_TYPE_L4_SHIFT)
+#define XDP_HASH_TYPE_L4(x)	((x) & XDP_HASH_TYPE_L4_MASK)
+enum {
+	_XDP_HASH_TYPE_L4_TCP = 1,
+	_XDP_HASH_TYPE_L4_UDP,
+};
+#define XDP_HASH_TYPE_L4_TCP (_XDP_HASH_TYPE_L4_TCP << XDP_HASH_TYPE_L4_SHIFT)
+#define XDP_HASH_TYPE_L4_UDP (_XDP_HASH_TYPE_L4_UDP << XDP_HASH_TYPE_L4_SHIFT)
+
+#define XDP_HASH_TYPE_BITS   (XDP_HASH_TYPE_L3_BITS + XDP_HASH_TYPE_L4_BITS)
+#define XDP_HASH_TYPE_MASK   (XDP_HASH_TYPE_L3_MASK | XDP_HASH_TYPE_L4_MASK)
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 6f8b6ed690be..248bc113ad18 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3346,6 +3346,8 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 			prog->dst_needed = 1;
 		if (insn->imm == BPF_FUNC_get_prandom_u32)
 			bpf_user_rnd_init_once();
+		if (insn->imm == BPF_FUNC_xdp_rxhash)
+			prog->xdp_rxhash_needed = 1;
 		if (insn->imm == BPF_FUNC_tail_call) {
 			/* If we tail call into other programs, we
 			 * cannot make any assumptions since they can
@@ -3353,6 +3355,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 			 * the program array.
 			 */
 			prog->cb_access = 1;
+			prog->xdp_rxhash_needed = 1;
 
 			/* mark bpf_tail_call as different opcode to avoid
 			 * conditional branch in the interpeter for every normal
diff --git a/net/core/dev.c b/net/core/dev.c
index b4af5fbbd9da..28082067ac00 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4318,9 +4318,13 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 	xdp.data_end = xdp.data + hlen;
 	xdp.data_hard_start = skb->data - skb_headroom(skb);
 	orig_data = xdp.data;
+	xdp.flags  = 0;
+	xdp.rxhash = skb->hash;
 
 	act = bpf_prog_run_xdp(xdp_prog, &xdp);
 
+	xdp_set_skb_hash(&xdp, skb);
+
 	off = xdp.data - orig_data;
 	if (off > 0)
 		__skb_pull(skb, off);
@@ -6851,10 +6855,20 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
 }
 EXPORT_SYMBOL(dev_change_proto_down);
 
+netdev_features_t bpf_get_xdp_features(struct bpf_prog *prog)
+{
+	netdev_features_t features = XDP_DRV_FEATURES_REQUIRED;
+
+	if (prog->xdp_rxhash_needed)
+		features |= XDP_DRV_F_RXHASH;
+
+	return features;
+}
+
 bool xdp_features_check(struct net_device *dev, struct bpf_prog *xdp_prog,
 			struct netlink_ext_ack *extack, u32 flags)
 {
-	netdev_features_t req_features = XDP_DRV_FEATURES_REQUIRED;
+	netdev_features_t req_features = bpf_get_xdp_features(xdp_prog);
 	netdev_features_t dev_xdp_features;
 
 	/* Generic XDP naturally support all features */
diff --git a/net/core/filter.c b/net/core/filter.c
index a253a6197e6b..df04ac73f581 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2272,6 +2272,54 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_4(bpf_xdp_rxhash, struct xdp_buff *, xdp, u32, new_hash, u32, type,
+	   unsigned int, flags)
+{
+	/* Read+write access to xdp_buff->rxhash is safe, because
+	 * fixup_bpf_calls() detect when helper is used, and drivers
+	 * not implemeting rxhash will not be allowed to load bpf_prog.
+	 */
+
+	/* Set hash and type */
+	if (flags == BPF_F_RXHASH_SET) {
+		xdp->rxhash = new_hash;
+		xdp->flags |= XDP_CTX_F_RXHASH_SW; /* Need for skb "is_sw" */
+		xdp->flags |= type & XDP_CTX_F_RXHASH_TYPE_MASK;
+	}
+
+	/* Get can specify "type" interested in */
+	if ((flags == BPF_F_RXHASH_GET) &&
+	    (type & XDP_CTX_F_RXHASH_TYPE_MASK)) {
+		u32 f_type = (xdp->flags & XDP_CTX_F_RXHASH_TYPE_MASK);
+		bool match = false;
+
+		/* Match on either L3 or L4 type rxhash */
+		if (!((type ^ f_type) & XDP_HASH_TYPE_L3_MASK))
+			match = true;
+		if (!((type ^ f_type) & XDP_HASH_TYPE_L4_MASK))
+			match = true;
+		if (match == false)
+			return 0;
+	}
+
+	/* Drivers only xdp_record_hash if NETIF_F_RXHASH enabled */
+	if (likely(xdp->flags & (XDP_CTX_F_RXHASH_HW|XDP_CTX_F_RXHASH_SW))) {
+		u64 rxhash_type = xdp->flags & XDP_CTX_F_RXHASH_TYPE_MASK;
+
+		return (u64)(xdp->rxhash | (rxhash_type << 32));
+	}
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_rxhash_proto = {
+	.func           = bpf_xdp_rxhash,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER, // Q: how do I say "u64" ?
+	.arg1_type      = ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_ANYTHING,
+	.arg3_type      = ARG_ANYTHING,
+};
+
 bool bpf_helper_changes_pkt_data(void *func)
 {
 	if (func == bpf_skb_vlan_push ||
@@ -2760,6 +2808,8 @@ xdp_func_proto(enum bpf_func_id func_id)
 		return &bpf_get_smp_processor_id_proto;
 	case BPF_FUNC_xdp_adjust_head:
 		return &bpf_xdp_adjust_head_proto;
+	case BPF_FUNC_xdp_rxhash:
+		return &bpf_xdp_rxhash_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -3308,6 +3358,29 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
 				      si->dst_reg, si->src_reg,
 				      offsetof(struct xdp_buff, data_end));
 		break;
+	case offsetof(struct xdp_md, rxhash):
+		/* Direct read-access to rxhash is safe, as drivers
+		 * not implementing will not be allowed to load bpf_prog.
+		 *
+		 * Driver gotchas: Even if NETIF_F_RXHASH is disabled
+		 * drivers must init xdp_buff->rxhash, due to this
+		 * direct read.
+		 */
+		prog->xdp_rxhash_needed = 1;
+
+		BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_buff, rxhash) != 4);
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+				      offsetof(struct xdp_buff, rxhash));
+		break;
+	case offsetof(struct xdp_md, rxhash_type):
+		/* rxhash_type stored in lower 8-bits of xdp_buff->flags */
+		prog->xdp_rxhash_needed = 1;
+
+		BUILD_BUG_ON(XDP_HASH_TYPE_BITS != 8);
+		/* Load first 8 bits (BPF_B) of flags */
+		*insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg,
+				      offsetof(struct xdp_buff, flags));
+		break;
 	}
 
 	return insn - insn_buf;
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index 9a9c95f2c9fb..634a976a02c6 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -59,6 +59,8 @@ static unsigned long long (*bpf_get_prandom_u32)(void) =
 	(void *) BPF_FUNC_get_prandom_u32;
 static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
 	(void *) BPF_FUNC_xdp_adjust_head;
+static unsigned long long (*bpf_xdp_rxhash)(void *ctx, __u32 new_hash, __u32 type, unsigned int flags) =
+	(void *) BPF_FUNC_xdp_rxhash;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e553529929f6..a38c544bf6f0 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -483,6 +483,9 @@ union bpf_attr {
  *     @skb: pointer to skb
  *     Return: uid of the socket owner on success or 0 if the socket pointer
  *     inside sk_buff is NULL
+ *
+ * u64 bpf_xdp_rxhash(xdp_md, new_hash, type, flags)
+ *	FIXME: Copy desc from include/uapi/linux/bpf.h
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -532,7 +535,8 @@ union bpf_attr {
 	FN(xdp_adjust_head),		\
 	FN(probe_read_str),		\
 	FN(get_socket_cookie),		\
-	FN(get_socket_uid),
+	FN(get_socket_uid),		\
+	FN(xdp_rxhash),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -661,6 +665,10 @@ enum xdp_action {
 struct xdp_md {
 	__u32 data;
 	__u32 data_end;
+	__u32 rxhash;
+	__u32 rxhash_type;
 };
 
+// FIXME: Sync with include/uapi/linux/bpf.h
+
 #endif /* _UAPI__LINUX_BPF_H__ */