lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Date:   Thu, 19 Sep 2019 15:45:42 -0700
From:   Matthew Cover <werekraken@...il.com>
To:     davem@...emloft.net, ast@...nel.org, daniel@...earbox.net,
        kafai@...com, songliubraving@...com, yhs@...com,
        nikolay@...ulusnetworks.com, sd@...asysnail.net,
        sbrivio@...hat.com, vincent@...nat.ch, kda@...ux-powerpc.org,
        matthew.cover@...ckpath.com, jiri@...lanox.com,
        edumazet@...gle.com, pabeni@...hat.com, idosch@...lanox.com,
        petrm@...lanox.com, f.fainelli@...il.com,
        stephen@...workplumber.org, dsahern@...il.com,
        christian@...uner.io, jakub.kicinski@...ronome.com,
        roopa@...ulusnetworks.com, johannes.berg@...el.com,
        mkubecek@...e.cz, netdev@...r.kernel.org,
        linux-kernel@...r.kernel.org, bpf@...r.kernel.org
Subject: [RFC net-next 1/2] Introduce an eBPF hookpoint for tx queue selection in the XPS (Transmit Packet Steering) code.

WORK IN PROGRESS:
  * bpf program loading works!
  * txq steering via bpf program return code works!
  * bpf program unloading not working.
  * bpf program attached query not working.
---
 include/linux/netdevice.h    |  3 +++
 include/uapi/linux/if_link.h | 12 +++++++++
 net/core/dev.c               | 61 ++++++++++++++++++++++++++++++++++++-------
 net/core/rtnetlink.c         | 62 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 129 insertions(+), 9 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9eda1c3..88e37d5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1966,6 +1966,7 @@ struct net_device {
 #ifdef CONFIG_XPS
 	struct xps_dev_maps __rcu *xps_cpus_map;
 	struct xps_dev_maps __rcu *xps_rxqs_map;
+	struct bpf_prog __rcu     *xps_prog;
 #endif
 #ifdef CONFIG_NET_CLS_ACT
 	struct mini_Qdisc __rcu	*miniq_egress;
@@ -2147,6 +2148,8 @@ struct netdev_queue *netdev_core_pick_tx(struct net_device *dev,
 					 struct sk_buff *skb,
 					 struct net_device *sb_dev);
 
+int dev_change_xps_fd(struct net_device *dev, int fd);
+
 /* returns the headroom that the master device needs to take in account
  * when forwarding to this dev
  */
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 4a8c02c..a23d241 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -167,6 +167,7 @@ enum {
 	IFLA_NEW_IFINDEX,
 	IFLA_MIN_MTU,
 	IFLA_MAX_MTU,
+	IFLA_XPS,
 	__IFLA_MAX
 };
 
@@ -979,6 +980,17 @@ enum {
 
 #define IFLA_XDP_MAX (__IFLA_XDP_MAX - 1)
 
+/* XPS section */
+
+enum {
+	IFLA_XPS_UNSPEC,
+	IFLA_XPS_FD,
+	IFLA_XPS_ATTACHED,
+	__IFLA_XPS_MAX,
+};
+
+#define IFLA_XPS_MAX (__IFLA_XPS_MAX - 1)
+
 enum {
 	IFLA_EVENT_NONE,
 	IFLA_EVENT_REBOOT,		/* internal reset / reboot */
diff --git a/net/core/dev.c b/net/core/dev.c
index 71b18e8..a46d42b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3663,26 +3663,34 @@ static int get_xps_queue(struct net_device *dev, struct net_device *sb_dev,
 {
 #ifdef CONFIG_XPS
 	struct xps_dev_maps *dev_maps;
+	struct bpf_prog *prog;
 	struct sock *sk = skb->sk;
+	int bpf_ret = -1;
 	int queue_index = -1;
 
 	if (!static_key_false(&xps_needed))
 		return -1;
 
 	rcu_read_lock();
-	if (!static_key_false(&xps_rxqs_needed))
-		goto get_cpus_map;
 
-	dev_maps = rcu_dereference(sb_dev->xps_rxqs_map);
-	if (dev_maps) {
-		int tci = sk_rx_queue_get(sk);
+	prog = rcu_dereference(dev->xps_prog);
+	if (prog) {
+		bpf_ret = bpf_prog_run_clear_cb(prog, skb);
+		if (bpf_ret >= 0)
+			queue_index = bpf_ret % dev->num_tx_queues;
+	}
 
-		if (tci >= 0 && tci < dev->num_rx_queues)
-			queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
-							  tci);
+	if (queue_index < 0 && static_key_false(&xps_rxqs_needed)) {
+		dev_maps = rcu_dereference(sb_dev->xps_rxqs_map);
+		if (dev_maps) {
+			int tci = sk_rx_queue_get(sk);
+
+			if (tci >= 0 && tci < dev->num_rx_queues)
+				queue_index = __get_xps_queue_idx(dev, skb,
+								dev_maps, tci);
+		}
 	}
 
-get_cpus_map:
 	if (queue_index < 0) {
 		dev_maps = rcu_dereference(sb_dev->xps_cpus_map);
 		if (dev_maps) {
@@ -8170,6 +8178,41 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 	return err;
 }
 
+static void dev_xps_install(struct net_device *dev, struct bpf_prog *prog)
+{
+#ifdef CONFIG_XPS
+	struct bpf_prog *old = rtnl_dereference(dev->xps_prog);
+	struct bpf_prog *new = prog;
+
+	rcu_assign_pointer(dev->xps_prog, new);
+	if (old)
+		bpf_prog_put(old);
+#endif
+}
+
+/**
+ *	dev_change_xps_fd - set or clear a bpf program for tx queue selection for a device
+ *	@dev: device
+ *	@fd: new program fd or negative value to clear
+ *
+ *	Set or clear a bpf program for a device
+ */
+int dev_change_xps_fd(struct net_device *dev, int fd)
+{
+	struct bpf_prog *prog = NULL;
+
+	ASSERT_RTNL();
+
+	prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
+
+	if (IS_ERR(prog))
+		return PTR_ERR(prog);
+
+	dev_xps_install(dev, prog);
+
+	return 0;
+}
+
 /**
  *	dev_new_index	-	allocate an ifindex
  *	@net: the applicable net namespace
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1ee6460..202b59a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -980,6 +980,15 @@ static size_t rtnl_xdp_size(void)
 	return xdp_size;
 }
 
+static size_t rtnl_xps_size(void)
+{
+	size_t xps_size = nla_total_size(0) +	/* nest IFLA_XPS */
+			  nla_total_size(1) +	/* XPS_ATTACHED */
+			  nla_total_size(4);	/* XPS_PROG_ID */
+
+	return xps_size;
+}
+
 static noinline size_t if_nlmsg_size(const struct net_device *dev,
 				     u32 ext_filter_mask)
 {
@@ -1018,6 +1027,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
 	       + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */
 	       + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
 	       + rtnl_xdp_size() /* IFLA_XDP */
+	       + rtnl_xps_size() /* IFLA_XPS */
 	       + nla_total_size(4)  /* IFLA_EVENT */
 	       + nla_total_size(4)  /* IFLA_NEW_NETNSID */
 	       + nla_total_size(4)  /* IFLA_NEW_IFINDEX */
@@ -1455,6 +1465,31 @@ static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
 	return err;
 }
 
+static int rtnl_xps_fill(struct sk_buff *skb, struct net_device *dev)
+{
+	struct nlattr *xps;
+	struct bpf_prog *xps_prog;
+	int err;
+
+	ASSERT_RTNL();
+
+	xps = nla_nest_start(skb, IFLA_XPS);
+	if (!xps)
+		return -EMSGSIZE;
+
+	xps_prog = rtnl_dereference(dev->xps_prog);
+	if (xps_prog) {
+		err = nla_put_u8(skb, IFLA_XPS_ATTACHED, xps_prog->aux->id);
+		if (err) {
+			nla_nest_cancel(skb, xps);
+			return err;
+		}
+	}
+
+	nla_nest_end(skb, xps);
+	return 0;
+}
+
 static u32 rtnl_get_event(unsigned long event)
 {
 	u32 rtnl_event_type = IFLA_EVENT_NONE;
@@ -1697,6 +1732,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
 		goto nla_put_failure_rcu;
 	rcu_read_unlock();
 
+	if (rtnl_xps_fill(skb, dev))
+		goto nla_put_failure;
+
 	nlmsg_end(skb, nlh);
 	return 0;
 
@@ -1750,6 +1788,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
 	[IFLA_CARRIER_DOWN_COUNT] = { .type = NLA_U32 },
 	[IFLA_MIN_MTU]		= { .type = NLA_U32 },
 	[IFLA_MAX_MTU]		= { .type = NLA_U32 },
+	[IFLA_XPS]		= { .type = NLA_NESTED },
 };
 
 static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -1801,6 +1840,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
 	[IFLA_XDP_PROG_ID]	= { .type = NLA_U32 },
 };
 
+static const struct nla_policy ifla_xps_policy[IFLA_XPS_MAX + 1] = {
+	[IFLA_XPS_FD]		= { .type = NLA_S32 },
+	[IFLA_XPS_ATTACHED]	= { .type = NLA_U8 },
+};
+
 static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla)
 {
 	const struct rtnl_link_ops *ops = NULL;
@@ -2709,6 +2753,24 @@ static int do_setlink(const struct sk_buff *skb,
 		}
 	}
 
+	if (tb[IFLA_XPS]) {
+		struct nlattr  *xps[IFLA_XPS_MAX + 1];
+
+		err = nla_parse_nested_deprecated(xps, IFLA_XPS_MAX,
+						  tb[IFLA_XPS],
+						  ifla_xps_policy, NULL);
+		if (err < 0)
+			goto errout;
+
+		if (xps[IFLA_XPS_FD]) {
+			err = dev_change_xps_fd(dev,
+						nla_get_s32(xps[IFLA_XPS_FD]));
+			if (err)
+				goto errout;
+			status |= DO_SETLINK_NOTIFY;
+		}
+	}
+
 errout:
 	if (status & DO_SETLINK_MODIFIED) {
 		if ((status & DO_SETLINK_NOTIFY) == DO_SETLINK_NOTIFY)
-- 
1.8.3.1

Powered by blists - more mailing lists