lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <878r7bjb1a.fsf@cloudflare.com>
Date: Mon, 06 Nov 2023 13:44:02 +0100
From: Jakub Sitnicki <jakub@...udflare.com>
To: John Fastabend <john.fastabend@...il.com>
Cc: bpf@...r.kernel.org, netdev@...r.kernel.org, yangyingliang@...wei.com,
 martin.lau@...nel.org
Subject: Re: [PATCH bpf 2/2] bpf: sockmap, add af_unix test with both
 sockets in map

On Mon, Oct 16, 2023 at 12:08 PM -07, John Fastabend wrote:
> This adds a test where both pairs of a af_unix paired socket are put into
> a BPF map. This ensures that when we tear down the af_unix pair we don't
> have any issues on sockmap side with ordering and reference counting.
>
> Signed-off-by: John Fastabend <john.fastabend@...il.com>
> ---
>  .../selftests/bpf/prog_tests/sockmap_listen.c | 39 ++++++++++++++++---
>  .../selftests/bpf/progs/test_sockmap_listen.c |  7 ++++
>  2 files changed, 40 insertions(+), 6 deletions(-)
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
> index 8df8cbb447f1..90e97907c1c1 100644
> --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
> +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
> @@ -1824,8 +1824,10 @@ static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
>  	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
>  }
>  
> -static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
> -					int verd_mapfd, enum redir_mode mode)
> +static void unix_inet_redir_to_connected(int family, int type,
> +					int sock_mapfd, int nop_mapfd,
> +					int verd_mapfd,
> +					enum redir_mode mode)
>  {
>  	const char *log_prefix = redir_mode_str(mode);
>  	int c0, c1, p0, p1;
> @@ -1849,6 +1851,12 @@ static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
>  	if (err)
>  		goto close;
>  
> +	if (nop_mapfd >= 0) {
> +		err = add_to_sockmap(nop_mapfd, c0, c1);
> +		if (err)
> +			goto close;
> +	}
> +
>  	n = write(c1, "a", 1);
>  	if (n < 0)
>  		FAIL_ERRNO("%s: write", log_prefix);
> @@ -1883,6 +1891,7 @@ static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
>  					    struct bpf_map *inner_map, int family)
>  {
>  	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
> +	int nop_map = bpf_map__fd(skel->maps.nop_map);
>  	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
>  	int sock_map = bpf_map__fd(inner_map);
>  	int err;
> @@ -1892,14 +1901,32 @@ static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
>  		return;
>  
>  	skel->bss->test_ingress = false;
> -	unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
> +	unix_inet_redir_to_connected(family, SOCK_DGRAM,
> +				     sock_map, -1, verdict_map,
> +				     REDIR_EGRESS);
> +	unix_inet_redir_to_connected(family, SOCK_DGRAM,
> +				     sock_map, -1, verdict_map,
>  				     REDIR_EGRESS);
> -	unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
> +
> +	unix_inet_redir_to_connected(family, SOCK_DGRAM,
> +				     sock_map, nop_map, verdict_map,
> +				     REDIR_EGRESS);
> +	unix_inet_redir_to_connected(family, SOCK_STREAM,
> +				     sock_map, nop_map, verdict_map,
>  				     REDIR_EGRESS);
>  	skel->bss->test_ingress = true;
> -	unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
> +	unix_inet_redir_to_connected(family, SOCK_DGRAM,
> +				     sock_map, -1, verdict_map,
> +				     REDIR_INGRESS);
> +	unix_inet_redir_to_connected(family, SOCK_STREAM,
> +				     sock_map, -1, verdict_map,
> +				     REDIR_INGRESS);
> +
> +	unix_inet_redir_to_connected(family, SOCK_DGRAM,
> +				     sock_map, nop_map, verdict_map,
>  				     REDIR_INGRESS);
> -	unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
> +	unix_inet_redir_to_connected(family, SOCK_STREAM,
> +				     sock_map, nop_map, verdict_map,
>  				     REDIR_INGRESS);
>  
>  	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
> diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
> index 464d35bd57c7..b7250eb9c30c 100644
> --- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
> +++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
> @@ -14,6 +14,13 @@ struct {
>  	__type(value, __u64);
>  } sock_map SEC(".maps");
>  
> +struct {
> +	__uint(type, BPF_MAP_TYPE_SOCKMAP);
> +	__uint(max_entries, 2);
> +	__type(key, __u32);
> +	__type(value, __u64);
> +} nop_map SEC(".maps");
> +
>  struct {
>  	__uint(type, BPF_MAP_TYPE_SOCKHASH);
>  	__uint(max_entries, 2);

So... we have a bug in unix_inet_redir_to_connected() - it happily
ignores the passed socket type, which is currently hardcoded to
SOCK_DGRAM :-)

Which means these tests don't exercise unix_stream paths where the added
psock->skpair is actually needed.

But I'm able to reproduce the bug by running the VSOCK redir test:

bash-5.2# ./test_progs -n 212/79
[   23.232282] ==================================================================
[   23.232634] BUG: KASAN: slab-use-after-free in sock_def_readable+0xe3/0x400
[   23.232942] Read of size 8 at addr ffff8881013f9860 by task kworker/1:2/220
[   23.233253]
[   23.233326] CPU: 1 PID: 220 Comm: kworker/1:2 Tainted: G           OE      6.6.0 #30
[   23.233697] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-1.fc38 04/01/2014
[   23.234074] Workqueue: events sk_psock_backlog
[   23.234271] Call Trace:
[   23.234381]  <TASK>
[   23.234477]  dump_stack_lvl+0x4a/0x90
[   23.234640]  print_address_description.constprop.0+0x33/0x400
[   23.234888]  ? preempt_count_sub+0x13/0xc0
[   23.235071]  print_report+0xb6/0x260
[   23.235228]  ? kasan_complete_mode_report_info+0x7c/0x1f0
[   23.235462]  kasan_report+0xd0/0x110
[   23.235619]  ? sock_def_readable+0xe3/0x400
[   23.235801]  ? sock_def_readable+0xe3/0x400
[   23.235989]  kasan_check_range+0xf7/0x1b0
[   23.236164]  __kasan_check_read+0x11/0x20
[   23.236340]  sock_def_readable+0xe3/0x400
[   23.236514]  unix_stream_sendmsg+0x3c5/0x7d0
[   23.236704]  ? queue_oob+0x300/0x300
[   23.236865]  sock_sendmsg+0x229/0x250
[   23.237030]  ? sock_write_iter+0x320/0x320
[   23.237211]  ? __this_cpu_preempt_check+0x13/0x20
[   23.237416]  ? lock_acquire+0x191/0x410
[   23.237607]  ? lock_sync+0x110/0x110
[   23.237766]  ? lock_is_held_type+0xd0/0x130
[   23.237948]  ? __asan_storeN+0x12/0x20
[   23.238115]  __skb_send_sock+0x4fa/0x670
[   23.238288]  ? preempt_count_sub+0x13/0xc0
[   23.238494]  ? sendmsg_locked+0x90/0x90
[   23.238721]  ? sendmsg_unlocked+0x40/0x40
[   23.238975]  ? __lock_acquire+0x765/0xf00
[   23.239252]  ? __this_cpu_preempt_check+0x13/0x20
[   23.239570]  ? lock_acquire+0x191/0x410
[   23.239831]  skb_send_sock+0x10/0x20
[   23.240079]  sk_psock_backlog+0x141/0x5e0
[   23.240340]  ? __this_cpu_preempt_check+0x13/0x20
[   23.240638]  process_one_work+0x49d/0x970
[   23.240900]  ? drain_workqueue+0x1c0/0x1c0
[   23.241173]  ? assign_work+0xe1/0x120
[   23.241404]  worker_thread+0x380/0x680
[   23.241660]  ? trace_hardirqs_on+0x22/0x100
[   23.241933]  ? preempt_count_sub+0x13/0xc0
[   23.242213]  ? process_one_work+0x970/0x970
[   23.242491]  kthread+0x1ba/0x200
[   23.242687]  ? kthread+0xfe/0x200
[   23.242890]  ? kthread_complete_and_exit+0x20/0x20
[   23.243193]  ret_from_fork+0x35/0x60
[   23.243418]  ? kthread_complete_and_exit+0x20/0x20
[   23.243718]  ret_from_fork_asm+0x11/0x20
[   23.243995]  </TASK>
[   23.244145]
[   23.244227] Allocated by task 227:
[   23.244446]  kasan_save_stack+0x26/0x50
[   23.244709]  kasan_set_track+0x25/0x40
[   23.244951]  kasan_save_alloc_info+0x1e/0x30
[   23.245220]  __kasan_slab_alloc+0x72/0x80
[   23.245491]  kmem_cache_alloc+0x182/0x360
[   23.245758]  sk_prot_alloc+0x43/0x160
[   23.246007]  sk_alloc+0x2c/0x3a0
[   23.246216]  unix_create1+0x86/0x440
[   23.246462]  unix_create+0x7d/0x100
[   23.246701]  __sock_create+0x1bc/0x420
[   23.246960]  __sys_socketpair+0x1ac/0x3a0
[   23.247237]  __x64_sys_socketpair+0x4f/0x60
[   23.247521]  do_syscall_64+0x38/0x90
[   23.247769]  entry_SYSCALL_64_after_hwframe+0x63/0xcd
[   23.248113]
[   23.248225] Freed by task 227:
[   23.248444]  kasan_save_stack+0x26/0x50
[   23.248707]  kasan_set_track+0x25/0x40
[   23.248963]  kasan_save_free_info+0x2b/0x50
[   23.249249]  ____kasan_slab_free+0x154/0x1c0
[   23.249541]  __kasan_slab_free+0x12/0x20
[   23.249810]  kmem_cache_free+0x1e7/0x480
[   23.250084]  __sk_destruct+0x270/0x3f0
[   23.250342]  sk_destruct+0x78/0x90
[   23.250577]  __sk_free+0x51/0x160
[   23.250807]  sk_free+0x45/0x70
[   23.251025]  unix_release_sock+0x5cc/0x700
[   23.251301]  unix_release+0x50/0x70
[   23.251536]  __sock_release+0x5f/0x120
[   23.251754]  sock_close+0x13/0x20
[   23.252109]  __fput+0x1f3/0x470
[   23.252451]  __fput_sync+0x2f/0x40
[   23.252811]  __x64_sys_close+0x51/0x90
[   23.253169]  do_syscall_64+0x38/0x90
[   23.253480]  entry_SYSCALL_64_after_hwframe+0x63/0xcd
[   23.253940]
[   23.254097] The buggy address belongs to the object at ffff8881013f9800
[   23.254097]  which belongs to the cache UNIX-STREAM of size 1920
[   23.254936] The buggy address is located 96 bytes inside of
[   23.254936]  freed 1920-byte region [ffff8881013f9800, ffff8881013f9f80)
[   23.255731]
[   23.255844] The buggy address belongs to the physical page:
[   23.256225] page:00000000c005ecb3 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff8881013f8000 pfn:0x1013f8
[   23.256905] head:00000000c005ecb3 order:3 entire_mapcount:0 nr_pages_mapped:0 pincount:0
[   23.257382] flags: 0x2fffe000000840(slab|head|node=0|zone=2|lastcpupid=0x7fff)
[   23.257791] page_type: 0xffffffff()
[   23.257988] raw: 002fffe000000840 ffff888100961a40 dead000000000122 0000000000000000
[   23.258418] raw: ffff8881013f8000 000000008010000e 00000001ffffffff 0000000000000000
[   23.258817] page dumped because: kasan: bad access detected
[   23.259131]
[   23.259205] Memory state around the buggy address:
[   23.259469]  ffff8881013f9700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[   23.259871]  ffff8881013f9780: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[   23.260290] >ffff8881013f9800: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[   23.260704]                                                        ^
[   23.261056]  ffff8881013f9880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[   23.261469]  ffff8881013f9900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[   23.261854] ==================================================================
[   23.262453] Disabling lock debugging due to kernel taint
#212/79  sockmap_listen/sockmap VSOCK test_vsock_redir:OK
#212     sockmap_listen:OK
Summary: 1/1 PASSED, 0 SKIPPED, 0 FAILED
bash-5.2#

If I modify the test to use (AF_UNIX, SOCK_DGRAM) instead of
SOCK_STREAM, the bug no longer reproduces.

Which confirms my thinking that unix_dgram_sendmsg is safe to use from
sockmap because it grabs a ref to skpair.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ