[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20231015141644.260646-6-akihiko.odaki@daynix.com>
Date: Sun, 15 Oct 2023 23:16:33 +0900
From: Akihiko Odaki <akihiko.odaki@...nix.com>
To:
Cc: Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Andrii Nakryiko <andrii@...nel.org>,
Martin KaFai Lau <martin.lau@...ux.dev>,
Song Liu <song@...nel.org>,
Yonghong Song <yonghong.song@...ux.dev>,
John Fastabend <john.fastabend@...il.com>,
KP Singh <kpsingh@...nel.org>,
Stanislav Fomichev <sdf@...gle.com>,
Hao Luo <haoluo@...gle.com>,
Jiri Olsa <jolsa@...nel.org>,
Jonathan Corbet <corbet@....net>,
Willem de Bruijn <willemdebruijn.kernel@...il.com>,
Jason Wang <jasowang@...hat.com>,
"David S. Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>,
"Michael S. Tsirkin" <mst@...hat.com>,
Xuan Zhuo <xuanzhuo@...ux.alibaba.com>,
Mykola Lysenko <mykolal@...com>,
Shuah Khan <shuah@...nel.org>,
bpf@...r.kernel.org,
linux-doc@...r.kernel.org,
linux-kernel@...r.kernel.org,
netdev@...r.kernel.org,
kvm@...r.kernel.org,
virtualization@...ts.linux-foundation.org,
linux-kselftest@...r.kernel.org,
Yuri Benditovich <yuri.benditovich@...nix.com>,
Andrew Melnychenko <andrew@...nix.com>,
Akihiko Odaki <akihiko.odaki@...nix.com>
Subject: [RFC PATCH v2 5/7] tun: Support BPF_PROG_TYPE_VNET_HASH
Support BPF_PROG_TYPE_VNET_HASH with TUNSETSTEERINGEBPF ioctl to make
it possible to report hash values and types when steering packets.
Signed-off-by: Akihiko Odaki <akihiko.odaki@...nix.com>
---
drivers/net/tun.c | 158 ++++++++++++++++++++++++++++++++++------------
1 file changed, 117 insertions(+), 41 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 89ab9efe522c..e0b453572a64 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -543,19 +543,37 @@ static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb)
{
+ struct bpf_skb_vnet_hash_end *cb = (struct bpf_skb_vnet_hash_end *)skb->cb;
+ struct tun_vnet_hash *ext;
struct tun_prog *prog;
u32 numqueues;
- u16 ret = 0;
+ u16 queue = 0;
+
+ BUILD_BUG_ON(sizeof(*cb) > sizeof(skb->cb));
numqueues = READ_ONCE(tun->numqueues);
if (!numqueues)
return 0;
prog = rcu_dereference(tun->steering_prog);
- if (prog)
- ret = bpf_prog_run_clear_cb(prog->prog, skb);
+ if (prog) {
+ if (prog->prog->type == BPF_PROG_TYPE_VNET_HASH) {
+ memset(skb->cb, 0, sizeof(*cb) - sizeof(struct qdisc_skb_cb));
+ bpf_prog_run_clear_cb(prog->prog, skb);
+
+ ext = skb_ext_add(skb, SKB_EXT_TUN_VNET_HASH);
+ if (ext) {
+ ext->value = cb->hash_value;
+ ext->report = cb->hash_report;
+ }
- return ret % numqueues;
+ queue = cb->rss_queue;
+ } else {
+ queue = bpf_prog_run_clear_cb(prog->prog, skb);
+ }
+ }
+
+ return queue % numqueues;
}
static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
@@ -2116,31 +2134,74 @@ static ssize_t tun_put_user(struct tun_struct *tun,
}
if (vnet_hdr_sz) {
- struct virtio_net_hdr gso;
+ struct bpf_skb_vnet_hash_end *cb = (struct bpf_skb_vnet_hash_end *)skb->cb;
+ struct tun_prog *prog;
+ struct tun_vnet_hash *vnet_hash_p;
+ struct tun_vnet_hash vnet_hash;
+ size_t vnet_hdr_content_sz = sizeof(struct virtio_net_hdr);
+ union {
+ struct virtio_net_hdr hdr;
+ struct virtio_net_hdr_v1_hash hdr_v1_hash;
+ } vnet_hdr;
+ int ret;
if (iov_iter_count(iter) < vnet_hdr_sz)
return -EINVAL;
- if (virtio_net_hdr_from_skb(skb, &gso,
- tun_is_little_endian(tun), true,
- vlan_hlen)) {
+ if (vnet_hdr_sz >= sizeof(struct virtio_net_hdr_v1_hash)) {
+ vnet_hash_p = skb_ext_find(skb, SKB_EXT_TUN_VNET_HASH);
+ if (vnet_hash_p) {
+ vnet_hash = *vnet_hash_p;
+ vnet_hdr_content_sz = sizeof(struct virtio_net_hdr_v1_hash);
+ } else {
+ rcu_read_lock();
+ prog = rcu_dereference(tun->steering_prog);
+ if (prog && prog->prog->type == BPF_PROG_TYPE_VNET_HASH) {
+ memset(skb->cb, 0,
+ sizeof(*cb) - sizeof(struct qdisc_skb_cb));
+ bpf_prog_run_clear_cb(prog->prog, skb);
+ vnet_hash.value = cb->hash_value;
+ vnet_hash.report = cb->hash_report;
+ vnet_hdr_content_sz =
+ sizeof(struct virtio_net_hdr_v1_hash);
+ }
+ rcu_read_unlock();
+ }
+ }
+
+ switch (vnet_hdr_content_sz) {
+ case sizeof(struct virtio_net_hdr):
+ ret = virtio_net_hdr_from_skb(skb, &vnet_hdr.hdr,
+ tun_is_little_endian(tun), true,
+ vlan_hlen);
+ break;
+
+ case sizeof(struct virtio_net_hdr_v1_hash):
+ ret = virtio_net_hdr_v1_hash_from_skb(skb, &vnet_hdr.hdr_v1_hash,
+ tun_is_little_endian(tun), true,
+ vlan_hlen,
+ vnet_hash.value, vnet_hash.report);
+ break;
+ }
+
+ if (ret) {
struct skb_shared_info *sinfo = skb_shinfo(skb);
pr_err("unexpected GSO type: "
"0x%x, gso_size %d, hdr_len %d\n",
- sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size),
- tun16_to_cpu(tun, gso.hdr_len));
+ sinfo->gso_type, tun16_to_cpu(tun, vnet_hdr.hdr.gso_size),
+ tun16_to_cpu(tun, vnet_hdr.hdr.hdr_len));
print_hex_dump(KERN_ERR, "tun: ",
DUMP_PREFIX_NONE,
16, 1, skb->head,
- min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true);
+ min((int)tun16_to_cpu(tun, vnet_hdr.hdr.hdr_len), 64), true);
WARN_ON_ONCE(1);
return -EINVAL;
}
- if (copy_to_iter(&gso, sizeof(gso), iter) != sizeof(gso))
+ if (copy_to_iter(&vnet_hdr, vnet_hdr_content_sz, iter) != vnet_hdr_content_sz)
return -EFAULT;
- iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso));
+ iov_iter_advance(iter, vnet_hdr_sz - vnet_hdr_content_sz);
}
if (vlan_hlen) {
@@ -2276,13 +2337,13 @@ static void tun_prog_free(struct rcu_head *rcu)
{
struct tun_prog *prog = container_of(rcu, struct tun_prog, rcu);
- bpf_prog_destroy(prog->prog);
+ bpf_prog_put(prog->prog);
kfree(prog);
}
-static int __tun_set_ebpf(struct tun_struct *tun,
- struct tun_prog __rcu **prog_p,
- struct bpf_prog *prog)
+static int tun_set_ebpf(struct tun_struct *tun,
+ struct tun_prog __rcu **prog_p,
+ struct bpf_prog *prog)
{
struct tun_prog *old, *new = NULL;
@@ -2314,8 +2375,8 @@ static void tun_free_netdev(struct net_device *dev)
free_percpu(dev->tstats);
tun_flow_uninit(tun);
security_tun_dev_free_security(tun->security);
- __tun_set_ebpf(tun, &tun->steering_prog, NULL);
- __tun_set_ebpf(tun, &tun->filter_prog, NULL);
+ tun_set_ebpf(tun, &tun->steering_prog, NULL);
+ tun_set_ebpf(tun, &tun->filter_prog, NULL);
}
static void tun_setup(struct net_device *dev)
@@ -3007,26 +3068,6 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
return ret;
}
-static int tun_set_ebpf(struct tun_struct *tun, struct tun_prog __rcu **prog_p,
- void __user *data)
-{
- struct bpf_prog *prog;
- int fd;
-
- if (copy_from_user(&fd, data, sizeof(fd)))
- return -EFAULT;
-
- if (fd == -1) {
- prog = NULL;
- } else {
- prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
- if (IS_ERR(prog))
- return PTR_ERR(prog);
- }
-
- return __tun_set_ebpf(tun, prog_p, prog);
-}
-
/* Return correct value for tun->dev->addr_len based on tun->dev->type. */
static unsigned char tun_get_addr_len(unsigned short type)
{
@@ -3077,6 +3118,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
struct ifreq ifr;
kuid_t owner;
kgid_t group;
+ struct bpf_prog *prog;
+ int fd;
int sndbuf;
int vnet_hdr_sz;
int le;
@@ -3360,11 +3403,44 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
break;
case TUNSETSTEERINGEBPF:
- ret = tun_set_ebpf(tun, &tun->steering_prog, argp);
+ if (copy_from_user(&fd, argp, sizeof(fd))) {
+ ret = -EFAULT;
+ break;
+ }
+
+ if (fd == -1) {
+ prog = NULL;
+ } else {
+ prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_VNET_HASH);
+ if (IS_ERR(prog)) {
+ prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
+ if (IS_ERR(prog)) {
+ ret = PTR_ERR(prog);
+ break;
+ }
+ }
+ }
+
+ ret = tun_set_ebpf(tun, &tun->steering_prog, prog);
break;
case TUNSETFILTEREBPF:
- ret = tun_set_ebpf(tun, &tun->filter_prog, argp);
+ if (copy_from_user(&fd, argp, sizeof(fd))) {
+ ret = -EFAULT;
+ break;
+ }
+
+ if (fd == -1) {
+ prog = NULL;
+ } else {
+ prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
+ if (IS_ERR(prog)) {
+ ret = PTR_ERR(prog);
+ break;
+ }
+ }
+
+ ret = tun_set_ebpf(tun, &tun->filter_prog, prog);
break;
case TUNSETCARRIER:
--
2.42.0
Powered by blists - more mailing lists