lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CA+FuTSewU6bjYLsyLzZ1Yne=6YBPDJZ=U1mZc+6cJVdr06BhiQ@mail.gmail.com>
Date:   Fri, 27 Jan 2023 17:03:39 -0500
From:   Willem de Bruijn <willemb@...gle.com>
To:     Andrei Gherzan <andrei.gherzan@...onical.com>
Cc:     "David S. Miller" <davem@...emloft.net>,
        Eric Dumazet <edumazet@...gle.com>,
        Jakub Kicinski <kuba@...nel.org>,
        Paolo Abeni <pabeni@...hat.com>, Shuah Khan <shuah@...nel.org>,
        netdev@...r.kernel.org, linux-kselftest@...r.kernel.org,
        linux-kernel@...r.kernel.org
Subject: Re: [PATCH] selftests: net: udpgso_bench_tx: Introduce exponential
 back-off retries

On Fri, Jan 27, 2023 at 1:16 PM Andrei Gherzan
<andrei.gherzan@...onical.com> wrote:
>
> The tx and rx test programs are used in a couple of test scripts including
> "udpgro_bench.sh". Taking this as an example, when the rx/tx programs
> are invoked subsequently, there is a chance that the rx one is not ready to
> accept socket connections. This racing bug could fail the test with at
> least one of the following:
>
> ./udpgso_bench_tx: connect: Connection refused
> ./udpgso_bench_tx: sendmsg: Connection refused
> ./udpgso_bench_tx: write: Connection refused
>
> This change addresses this by adding routines that retry the socket
> operations with an exponential back off algorithm from 100ms to 2s.
>
> Fixes: 3a687bef148d ("selftests: udp gso benchmark")
> Signed-off-by: Andrei Gherzan <andrei.gherzan@...onical.com>

Synchronizing the two processes is indeed tricky.

Perhaps more robust is opening an initial TCP connection, with
SO_RCVTIMEO to bound the waiting time. That covers all tests in one
go.

> ---
>  tools/testing/selftests/net/udpgso_bench_tx.c | 57 +++++++++++++------
>  1 file changed, 41 insertions(+), 16 deletions(-)
>
> diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
> index f1fdaa270291..4dea9ee7eb46 100644
> --- a/tools/testing/selftests/net/udpgso_bench_tx.c
> +++ b/tools/testing/selftests/net/udpgso_bench_tx.c
> @@ -53,6 +53,9 @@
>
>  #define NUM_PKT                100
>
> +#define MAX_DELAY      2000000
> +#define INIT_DELAY     100000
> +
>  static bool    cfg_cache_trash;
>  static int     cfg_cpu         = -1;
>  static int     cfg_connected   = true;
> @@ -257,13 +260,18 @@ static void flush_errqueue(int fd, const bool do_poll)
>  static int send_tcp(int fd, char *data)
>  {
>         int ret, done = 0, count = 0;
> +       useconds_t delay = INIT_DELAY;
>
>         while (done < cfg_payload_len) {
> -               ret = send(fd, data + done, cfg_payload_len - done,
> -                          cfg_zerocopy ? MSG_ZEROCOPY : 0);
> -               if (ret == -1)
> -                       error(1, errno, "write");
> -
> +               delay = INIT_DELAY;
> +               while ((ret = send(fd, data + done, cfg_payload_len - done,
> +                               cfg_zerocopy ? MSG_ZEROCOPY : 0)) == -1) {
> +                       usleep(delay);
> +                       if (delay < MAX_DELAY)
> +                               delay *= 2;
> +                       else
> +                               error(1, errno, "write");
> +               }
>                 done += ret;
>                 count++;
>         }

send_tcp should not be affected, as connect will by then already have
succeeded. Also, as a reliable protocol it will internally retry,
after returning with success.

> @@ -274,17 +282,23 @@ static int send_tcp(int fd, char *data)
>  static int send_udp(int fd, char *data)
>  {
>         int ret, total_len, len, count = 0;
> +       useconds_t delay = INIT_DELAY;
>
>         total_len = cfg_payload_len;
>
>         while (total_len) {
>                 len = total_len < cfg_mss ? total_len : cfg_mss;
>
> -               ret = sendto(fd, data, len, cfg_zerocopy ? MSG_ZEROCOPY : 0,
> -                            cfg_connected ? NULL : (void *)&cfg_dst_addr,
> -                            cfg_connected ? 0 : cfg_alen);
> -               if (ret == -1)
> -                       error(1, errno, "write");
> +               delay = INIT_DELAY;
> +               while ((ret = sendto(fd, data, len, cfg_zerocopy ? MSG_ZEROCOPY : 0,
> +                               cfg_connected ? NULL : (void *)&cfg_dst_addr,
> +                               cfg_connected ? 0 : cfg_alen)) == -1) {

should ideally only retry on the expected errno. Unreliable datagram
sendto will succeed and initially. It will fail with error later,
after reception of an ICMP dst unreachable response.

> +                       usleep(delay);
> +                       if (delay < MAX_DELAY)
> +                               delay *= 2;
> +                       else
> +                               error(1, errno, "write");
> +               }
>                 if (ret != len)
>                         error(1, errno, "write: %uB != %uB\n", ret, len);
>
> @@ -378,6 +392,7 @@ static int send_udp_segment(int fd, char *data)
>         struct iovec iov = {0};
>         size_t msg_controllen;
>         struct cmsghdr *cmsg;
> +       useconds_t delay = INIT_DELAY;
>         int ret;
>
>         iov.iov_base = data;
> @@ -401,9 +416,13 @@ static int send_udp_segment(int fd, char *data)
>         msg.msg_name = (void *)&cfg_dst_addr;
>         msg.msg_namelen = cfg_alen;
>
> -       ret = sendmsg(fd, &msg, cfg_zerocopy ? MSG_ZEROCOPY : 0);
> -       if (ret == -1)
> -               error(1, errno, "sendmsg");
> +       while ((ret = sendmsg(fd, &msg, cfg_zerocopy ? MSG_ZEROCOPY : 0)) == -1) {
> +               usleep(delay);
> +               if (delay < MAX_DELAY)
> +                       delay *= 2;
> +               else
> +                       error(1, errno, "sendmsg");
> +       }
>         if (ret != iov.iov_len)
>                 error(1, 0, "sendmsg: %u != %llu\n", ret,
>                         (unsigned long long)iov.iov_len);
> @@ -616,6 +635,7 @@ int main(int argc, char **argv)
>  {
>         unsigned long num_msgs, num_sends;
>         unsigned long tnow, treport, tstop;
> +       useconds_t delay = INIT_DELAY;
>         int fd, i, val, ret;
>
>         parse_opts(argc, argv);
> @@ -648,9 +668,14 @@ int main(int argc, char **argv)
>                 }
>         }
>
> -       if (cfg_connected &&
> -           connect(fd, (void *)&cfg_dst_addr, cfg_alen))
> -               error(1, errno, "connect");
> +       if (cfg_connected)
> +               while (connect(fd, (void *)&cfg_dst_addr, cfg_alen)) {
> +                       usleep(delay);
> +                       if (delay < MAX_DELAY)
> +                               delay *= 2;
> +                       else
> +                               error(1, errno, "connect");
> +               }
>
>         if (cfg_segment)
>                 set_pmtu_discover(fd, cfg_family == PF_INET);
> --
> 2.34.1
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ