[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <yc5vdkuwpyrr7mfjp6ohqf4fzq4avgjh5pwrmox7rhipwnh7nk@26cyegopbks2>
Date: Tue, 18 Feb 2025 09:52:02 +0100
From: Stefano Garzarella <sgarzare@...hat.com>
To: Michal Luczaj <mhal@...x.co>
Cc: John Fastabend <john.fastabend@...il.com>,
Jakub Sitnicki <jakub@...udflare.com>, Eric Dumazet <edumazet@...gle.com>,
Kuniyuki Iwashima <kuniyu@...zon.com>, Paolo Abeni <pabeni@...hat.com>,
Willem de Bruijn <willemb@...gle.com>, "David S. Miller" <davem@...emloft.net>,
Jakub Kicinski <kuba@...nel.org>, Simon Horman <horms@...nel.org>,
"Michael S. Tsirkin" <mst@...hat.com>, Bobby Eshleman <bobby.eshleman@...edance.com>,
Alexei Starovoitov <ast@...nel.org>, Daniel Borkmann <daniel@...earbox.net>,
Andrii Nakryiko <andrii@...nel.org>, Martin KaFai Lau <martin.lau@...ux.dev>,
Eduard Zingerman <eddyz87@...il.com>, Song Liu <song@...nel.org>,
Yonghong Song <yonghong.song@...ux.dev>, KP Singh <kpsingh@...nel.org>,
Stanislav Fomichev <sdf@...ichev.me>, Hao Luo <haoluo@...gle.com>, Jiri Olsa <jolsa@...nel.org>,
Mykola Lysenko <mykolal@...com>, Shuah Khan <shuah@...nel.org>, netdev@...r.kernel.org,
bpf@...r.kernel.org, linux-kernel@...r.kernel.org, linux-kselftest@...r.kernel.org
Subject: Re: [PATCH net 1/4] sockmap, vsock: For connectible sockets allow
only connected
On Fri, Feb 14, 2025 at 02:11:48PM +0100, Michal Luczaj wrote:
>> ...
>> Another design detail is that listening vsocks are not supposed to have any
>> transport assigned at all. Which implies they are not supported by the
>> sockmap. But this is complicated by the fact that a socket, before
>> switching to TCP_LISTEN, may have had some transport assigned during a
>> failed connect() attempt. Hence, we may end up with a listening vsock in a
>> sockmap, which blows up quickly:
>>
>> KASAN: null-ptr-deref in range [0x0000000000000120-0x0000000000000127]
>> CPU: 7 UID: 0 PID: 56 Comm: kworker/7:0 Not tainted 6.14.0-rc1+
>> Workqueue: vsock-loopback vsock_loopback_work
>> RIP: 0010:vsock_read_skb+0x4b/0x90
>> Call Trace:
>> sk_psock_verdict_data_ready+0xa4/0x2e0
>> virtio_transport_recv_pkt+0x1ca8/0x2acc
>> vsock_loopback_work+0x27d/0x3f0
>> process_one_work+0x846/0x1420
>> worker_thread+0x5b3/0xf80
>> kthread+0x35a/0x700
>> ret_from_fork+0x2d/0x70
>> ret_from_fork_asm+0x1a/0x30
>
>Perhaps I should have expanded more on the null-ptr-deref itself.
>
>The idea is: force a vsock into assigning a transport and add it to the
>sockmap (with a verdict program), but keep it unconnected. Then, drop
>the transport and set the vsock to TCP_LISTEN. The moment a new
>connection is established:
>
>virtio_transport_recv_pkt()
> virtio_transport_recv_listen()
> sk->sk_data_ready(sk) i.e. sk_psock_verdict_data_ready()
> ops->read_skb() i.e. vsock_read_skb()
> vsk->transport->read_skb() vsk->transport is NULL, boom
>
Yes I agree, it's a little clearer with this, but I think it was also
clear the concept before. So with or without:
Acked-by: Stefano Garzarella <sgarzare@...hat.com>
>Here's a stand-alone repro:
>
>/*
> * # modprobe -a vsock_loopback vhost_vsock
> * # gcc test.c && ./a.out
> */
>#include <stdio.h>
>#include <stdint.h>
>#include <stdlib.h>
>#include <unistd.h>
>#include <errno.h>
>#include <sys/socket.h>
>#include <sys/syscall.h>
>#include <linux/bpf.h>
>#include <linux/vm_sockets.h>
>
>static void die(const char *msg)
>{
> perror(msg);
> exit(-1);
>}
>
>static int sockmap_create(void)
>{
> union bpf_attr attr = {
> .map_type = BPF_MAP_TYPE_SOCKMAP,
> .key_size = sizeof(int),
> .value_size = sizeof(int),
> .max_entries = 1
> };
> int map;
>
> map = syscall(SYS_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
> if (map < 0)
> die("map_create");
>
> return map;
>}
>
>static void map_update_elem(int fd, int key, int value)
>{
> union bpf_attr attr = {
> .map_fd = fd,
> .key = (uint64_t)&key,
> .value = (uint64_t)&value,
> .flags = BPF_ANY
> };
>
> if (syscall(SYS_bpf, BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)))
> die("map_update_elem");
>}
>
>static int prog_load(void)
>{
> /* mov %r0, 1; exit */
> struct bpf_insn insns[] = {
> { .code = BPF_ALU64 | BPF_MOV | BPF_K, .dst_reg = 0, .imm = 1 },
> { .code = BPF_JMP | BPF_EXIT }
> };
> union bpf_attr attr = {
> .prog_type = BPF_PROG_TYPE_SK_SKB,
> .insn_cnt = sizeof(insns)/sizeof(insns[0]),
> .insns = (long)insns,
> .license = (long)"",
> };
>
> int prog = syscall(SYS_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
> if (prog < 0)
> die("prog_load");
>
> return prog;
>}
>
>static void link_create(int prog_fd, int target_fd)
>{
> union bpf_attr attr = {
> .link_create = {
> .prog_fd = prog_fd,
> .target_fd = target_fd,
> .attach_type = BPF_SK_SKB_VERDICT
> }
> };
>
> if (syscall(SYS_bpf, BPF_LINK_CREATE, &attr, sizeof(attr)) < 0)
> die("link_create");
>}
>
>int main(void)
>{
> struct sockaddr_vm addr = {
> .svm_family = AF_VSOCK,
> .svm_cid = VMADDR_CID_LOCAL,
> .svm_port = VMADDR_PORT_ANY
> };
> socklen_t alen = sizeof(addr);
> int s, map, prog, c;
>
> s = socket(AF_VSOCK, SOCK_SEQPACKET, 0);
> if (s < 0)
> die("socket");
>
> if (bind(s, (struct sockaddr *)&addr, alen))
> die("bind");
>
> if (!connect(s, (struct sockaddr *)&addr, alen) || errno != ECONNRESET)
> die("connect #1");
>
> map = sockmap_create();
> prog = prog_load();
> link_create(prog, map);
> map_update_elem(map, 0, s);
>
> addr.svm_cid = 0x42424242; /* non-existing */
> if (!connect(s, (struct sockaddr *)&addr, alen) || errno != ESOCKTNOSUPPORT)
> die("connect #2");
>
> if (listen(s, 1))
> die("listen");
>
> if (getsockname(s, (struct sockaddr *)&addr, &alen))
> die("getsockname");
>
> c = socket(AF_VSOCK, SOCK_SEQPACKET, 0);
> if (c < 0)
> die("socket c");
>
> if (connect(c, (struct sockaddr *)&addr, alen))
> die("connect #3");
>
> return 0;
>}
>
Powered by blists - more mailing lists