[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <0a16fcbb-1c17-dfe2-24b0-6f1d1e6a91bd@fb.com>
Date: Thu, 29 Jul 2021 23:54:26 -0700
From: Yonghong Song <yhs@...com>
To: Kuniyuki Iwashima <kuniyu@...zon.co.jp>,
"David S. Miller" <davem@...emloft.net>,
Jakub Kicinski <kuba@...nel.org>,
Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Andrii Nakryiko <andrii@...nel.org>,
Martin KaFai Lau <kafai@...com>,
Song Liu <songliubraving@...com>,
John Fastabend <john.fastabend@...il.com>,
KP Singh <kpsingh@...nel.org>
CC: Benjamin Herrenschmidt <benh@...zon.com>,
Kuniyuki Iwashima <kuni1840@...il.com>, <bpf@...r.kernel.org>,
<netdev@...r.kernel.org>
Subject: Re: [PATCH bpf-next 2/2] selftest/bpf: Implement sample UNIX domain
socket iterator program.
On 7/29/21 4:36 PM, Kuniyuki Iwashima wrote:
> If there are no abstract sockets, this prog can output the same result
> compared to /proc/net/unix.
>
> # cat /sys/fs/bpf/unix | head -n 2
> Num RefCount Protocol Flags Type St Inode Path
> ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
>
> # cat /proc/net/unix | head -n 2
> Num RefCount Protocol Flags Type St Inode Path
> ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@...zon.co.jp>
> ---
> .../selftests/bpf/prog_tests/bpf_iter.c | 17 +++++
> .../selftests/bpf/progs/bpf_iter_unix.c | 75 +++++++++++++++++++
> 2 files changed, 92 insertions(+)
> create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_unix.c
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> index 1f1aade56504..4746bac68d36 100644
> --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> @@ -13,6 +13,7 @@
> #include "bpf_iter_tcp6.skel.h"
> #include "bpf_iter_udp4.skel.h"
> #include "bpf_iter_udp6.skel.h"
> +#include "bpf_iter_unix.skel.h"
> #include "bpf_iter_test_kern1.skel.h"
> #include "bpf_iter_test_kern2.skel.h"
> #include "bpf_iter_test_kern3.skel.h"
> @@ -313,6 +314,20 @@ static void test_udp6(void)
> bpf_iter_udp6__destroy(skel);
> }
>
> +static void test_unix(void)
> +{
> + struct bpf_iter_unix *skel;
> +
> + skel = bpf_iter_unix__open_and_load();
> + if (CHECK(!skel, "bpf_iter_unix__open_and_load",
> + "skeleton open_and_load failed\n"))
> + return;
> +
> + do_dummy_read(skel->progs.dump_unix);
> +
> + bpf_iter_unix__destroy(skel);
> +}
> +
> /* The expected string is less than 16 bytes */
> static int do_read_with_fd(int iter_fd, const char *expected,
> bool read_one_char)
> @@ -1255,6 +1270,8 @@ void test_bpf_iter(void)
> test_udp4();
> if (test__start_subtest("udp6"))
> test_udp6();
> + if (test__start_subtest("unix"))
> + test_unix();
> if (test__start_subtest("anon"))
> test_anon_iter(false);
> if (test__start_subtest("anon-read-one-char"))
> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> new file mode 100644
> index 000000000000..285ec2f7944d
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> @@ -0,0 +1,75 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright Amazon.com Inc. or its affiliates. */
> +#include "bpf_iter.h"
Could you add bpf_iter__unix to bpf_iter.h similar to bpf_iter__sockmap?
The main purpose is to make test tolerating with old vmlinux.h.
> +#include "bpf_tracing_net.h"
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_endian.h>
> +
> +char _license[] SEC("license") = "GPL";
> +
> +#define __SO_ACCEPTCON (1 << 16)
> +#define UNIX_HASH_SIZE 256
> +#define UNIX_ABSTRACT(unix_sk) (unix_sk->addr->hash < UNIX_HASH_SIZE)
Could you add the above three define's in bpf_tracing_net.h?
We try to keep all these common defines in a common header for
potential reusability.
> +
> +static long sock_i_ino(const struct sock *sk)
> +{
> + const struct socket *sk_socket = sk->sk_socket;
> + const struct inode *inode;
> + unsigned long ino;
> +
> + if (!sk_socket)
> + return 0;
> +
> + inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
> + bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
> + return ino;
> +}
> +
> +SEC("iter/unix")
> +int dump_unix(struct bpf_iter__unix *ctx)
> +{
> + struct unix_sock *unix_sk = ctx->unix_sk;
> + struct sock *sk = (struct sock *)unix_sk;
> + struct seq_file *seq;
> + __u32 seq_num;
> +
> + if (!unix_sk)
> + return 0;
> +
> + seq = ctx->meta->seq;
> + seq_num = ctx->meta->seq_num;
> + if (seq_num == 0)
> + BPF_SEQ_PRINTF(seq, "Num RefCount Protocol Flags "
> + "Type St Inode Path\n");
> +
> + BPF_SEQ_PRINTF(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
> + unix_sk,
> + sk->sk_refcnt.refs.counter,
> + 0,
> + sk->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
> + sk->sk_type,
> + sk->sk_socket ?
> + (sk->sk_state == TCP_ESTABLISHED ?
> + SS_CONNECTED : SS_UNCONNECTED) :
> + (sk->sk_state == TCP_ESTABLISHED ?
> + SS_CONNECTING : SS_DISCONNECTING),
> + sock_i_ino(sk));
> +
> + if (unix_sk->addr) {
> + if (UNIX_ABSTRACT(unix_sk))
> + /* Abstract UNIX domain socket can contain '\0' in
> + * the path, and it should be escaped. However, it
> + * requires loops and the BPF verifier rejects it.
> + * So here, print only the escaped first byte to
> + * indicate it is an abstract UNIX domain socket.
> + * (See: unix_seq_show() and commit e7947ea770d0d)
> + */
> + BPF_SEQ_PRINTF(seq, " @");
> + else
> + BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
> + }
I looked at af_unix.c, for the above "if (unix_sk->addr) { ... }" code,
the following is the kernel source code,
if (u->addr) { // under unix_table_lock here
int i, len;
seq_putc(seq, ' ');
i = 0;
len = u->addr->len - sizeof(short);
if (!UNIX_ABSTRACT(s))
len--;
else {
seq_putc(seq, '@');
i++;
}
for ( ; i < len; i++)
seq_putc(seq, u->addr->name->sun_path[i] ?:
'@');
}
It does not seem to match bpf program non UNIX_ABSTRACT case.
I am not familiar with unix socket so it would be good if you can
explain a little more.
For verifier issue with loops, do we have a maximum upper bound for
u->addr->len? If yes, does bounded loop work?
> +
> + BPF_SEQ_PRINTF(seq, "\n");
> +
> + return 0;
> +}
>
Powered by blists - more mailing lists