[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200311204106.GA2125642@mini-arch.hsd1.ca.comcast.net>
Date: Wed, 11 Mar 2020 13:41:06 -0700
From: Stanislav Fomichev <sdf@...ichev.me>
To: Andrii Nakryiko <andriin@...com>
Cc: bpf@...r.kernel.org, netdev@...r.kernel.org, ast@...com,
daniel@...earbox.net, sdf@...gle.com, andrii.nakryiko@...il.com,
kernel-team@...com
Subject: Re: [PATCH bpf-next] selftests/bpf: make tcp_rtt test more robust to
failures
On 03/11, Andrii Nakryiko wrote:
> Switch to non-blocking accept and wait for server thread to exit before
> proceeding. I noticed that sometimes tcp_rtt server thread failure would
> "spill over" into other tests (that would run after tcp_rtt), probably just
> because server thread exits much later and tcp_rtt doesn't wait for it.
>
> Fixes: 8a03222f508b ("selftests/bpf: test_progs: fix client/server race in tcp_rtt")
> Signed-off-by: Andrii Nakryiko <andriin@...com>
> ---
> .../selftests/bpf/prog_tests/tcp_rtt.c | 30 +++++++++++--------
> 1 file changed, 18 insertions(+), 12 deletions(-)
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
> index f4cd60d6fba2..d235eea0de27 100644
> --- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
> +++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
> @@ -188,7 +188,7 @@ static int start_server(void)
> };
> int fd;
>
> - fd = socket(AF_INET, SOCK_STREAM, 0);
> + fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0);
> if (fd < 0) {
> log_err("Failed to create server socket");
> return -1;
> @@ -205,6 +205,7 @@ static int start_server(void)
>
> static pthread_mutex_t server_started_mtx = PTHREAD_MUTEX_INITIALIZER;
> static pthread_cond_t server_started = PTHREAD_COND_INITIALIZER;
> +static volatile bool server_done = false;
>
> static void *server_thread(void *arg)
> {
> @@ -222,23 +223,22 @@ static void *server_thread(void *arg)
>
> if (CHECK_FAIL(err < 0)) {
> perror("Failed to listed on socket");
> - return NULL;
> + return ERR_PTR(err);
> }
>
> - client_fd = accept(fd, (struct sockaddr *)&addr, &len);
> + while (!server_done) {
> + client_fd = accept(fd, (struct sockaddr *)&addr, &len);
> + if (client_fd == -1 && errno == EAGAIN)
> + continue;
> + break;
> + }
> if (CHECK_FAIL(client_fd < 0)) {
> perror("Failed to accept client");
> - return NULL;
> + return ERR_PTR(err);
> }
>
> - /* Wait for the next connection (that never arrives)
> - * to keep this thread alive to prevent calling
> - * close() on client_fd.
> - */
> - if (CHECK_FAIL(accept(fd, (struct sockaddr *)&addr, &len) >= 0)) {
> - perror("Unexpected success in second accept");
> - return NULL;
> - }
> + while (!server_done)
> + usleep(50);
>
> close(client_fd);
>
> @@ -249,6 +249,7 @@ void test_tcp_rtt(void)
> {
> int server_fd, cgroup_fd;
> pthread_t tid;
> + void *server_res;
>
> cgroup_fd = test__join_cgroup("/tcp_rtt");
> if (CHECK_FAIL(cgroup_fd < 0))
> @@ -267,6 +268,11 @@ void test_tcp_rtt(void)
> pthread_mutex_unlock(&server_started_mtx);
>
> CHECK_FAIL(run_test(cgroup_fd, server_fd));
> +
> + server_done = true;
[..]
> + pthread_join(tid, &server_res);
> + CHECK_FAIL(IS_ERR(server_res));
I wonder if we add (move) close(server_fd) before pthread_join(), can we
fix this issue without using non-blocking socket? The accept() should
return as soon as server_fd is closed so it's essentially your
'server_done'.
> +
> close_server_fd:
> close(server_fd);
> close_cgroup_fd:
> --
> 2.17.1
>
Powered by blists - more mailing lists