[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <7a3b7d98-d882-5197-3dae-80ffe1e59af6@redhat.com>
Date: Sat, 27 Nov 2021 11:41:21 +0100
From: Jesper Dangaard Brouer <jbrouer@...hat.com>
To: Ong Boon Leong <boon.leong.ong@...el.com>, bpf@...r.kernel.org,
netdev@...r.kernel.org
Cc: brouer@...hat.com, bjorn@...nel.org,
Magnus Karlsson <magnus.karlsson@...el.com>,
Jonathan Lemon <jonathan.lemon@...il.com>,
Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
"David S . Miller" <davem@...emloft.net>,
Jakub Kicinski <kuba@...nel.org>,
Jesper Dangaard Brouer <hawk@...nel.org>,
John Fastabend <john.fastabend@...il.com>,
Andrii Nakryiko <andrii@...nel.org>,
Martin KaFai Lau <kafai@...com>,
Song Liu <songliubraving@...com>, Yonghong Song <yhs@...com>,
KP Singh <kpsingh@...nel.org>
Subject: Re: [PATCH bpf-next 3/4] samples/bpf: xdpsock: add period cycle time
to Tx operation
On 24/11/2021 10.18, Ong Boon Leong wrote:
> Tx cycle time is in micro-seconds unit. By combining the batch size (-b M)
> and Tx cycle time (-T|--tx-cycle N), xdpsock now can transmit batch-size of
> packets every N-us periodically.
Does this also work for --poll mode (which is a wakeup mode) ?
> For example to transmit 1 packet each 1ms cycle time for total of 2000000
> packets:
>
> $ xdpsock -i eth0 -T -N -z -T 1000 -b 1 -C 2000000
>
> sock0@...0s29f1:2 txonly xdp-drv
> pps pkts 1.00
> rx 0 0
> tx 1000 1996872
>
> sock0@...0s29f1:2 txonly xdp-drv
> pps pkts 1.00
> rx 0 0
> tx 1000 1997872
>
> sock0@...0s29f1:2 txonly xdp-drv
> pps pkts 1.00
> rx 0 0
> tx 1000 1998872
>
> sock0@...0s29f1:2 txonly xdp-drv
> pps pkts 1.00
> rx 0 0
> tx 1000 1999872
>
> sock0@...0s29f1:2 txonly xdp-drv
> pps pkts 1.00
> rx 0 0
> tx 128 2000000
>
> sock0@...0s29f1:2 txonly xdp-drv
> pps pkts 0.00
> rx 0 0
> tx 0 2000000
>
> Signed-off-by: Ong Boon Leong <boon.leong.ong@...el.com>
> ---
> samples/bpf/xdpsock_user.c | 36 +++++++++++++++++++++++++++++++-----
> 1 file changed, 31 insertions(+), 5 deletions(-)
>
> diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
> index 691f442bbb2..61d4063f11a 100644
> --- a/samples/bpf/xdpsock_user.c
> +++ b/samples/bpf/xdpsock_user.c
> @@ -111,6 +111,7 @@ static u32 opt_num_xsks = 1;
> static u32 prog_id;
> static bool opt_busy_poll;
> static bool opt_reduced_cap;
> +static unsigned long opt_cycle_time;
>
> struct vlan_ethhdr {
> unsigned char h_dest[6];
> @@ -173,6 +174,8 @@ struct xsk_socket_info {
> struct xsk_app_stats app_stats;
> struct xsk_driver_stats drv_stats;
> u32 outstanding_tx;
> + unsigned long prev_tx_time;
> + unsigned long tx_cycle_time;
> };
>
> static int num_socks;
> @@ -972,6 +975,7 @@ static struct option long_options[] = {
> {"tx-vlan-pri", required_argument, 0, 'K'},
> {"tx-dmac", required_argument, 0, 'G'},
> {"tx-smac", required_argument, 0, 'H'},
> + {"tx-cycle", required_argument, 0, 'T'},
> {"extra-stats", no_argument, 0, 'x'},
> {"quiet", no_argument, 0, 'Q'},
> {"app-stats", no_argument, 0, 'a'},
> @@ -1017,6 +1021,7 @@ static void usage(const char *prog)
> " -K, --tx-vlan-pri=n Tx VLAN Priority [0-7]. Default: %d (For -V|--tx-vlan)\n"
> " -G, --tx-dmac=<MAC> Dest MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n"
> " -H, --tx-smac=<MAC> Src MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n"
> + " -T, --tx-cycle=n Tx cycle time in micro-seconds (For -t|--txonly).\n"
> " -x, --extra-stats Display extra statistics.\n"
> " -Q, --quiet Do not display any stats.\n"
> " -a, --app-stats Display application (syscall) statistics.\n"
> @@ -1039,7 +1044,7 @@ static void parse_command_line(int argc, char **argv)
> opterr = 0;
>
> for (;;) {
> - c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:VJ:K:G:H:xQaI:BR",
> + c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:VJ:K:G:H:T:xQaI:BR",
> long_options, &option_index);
> if (c == -1)
> break;
> @@ -1145,6 +1150,10 @@ static void parse_command_line(int argc, char **argv)
> usage(basename(argv[0]));
> }
> break;
> + case 'T':
> + opt_cycle_time = atoi(optarg);
> + opt_cycle_time *= 1000;
Converting to nanosec, right(?).
> + break;
> case 'x':
> opt_extra_stats = 1;
> break;
> @@ -1350,16 +1359,25 @@ static void rx_drop_all(void)
> }
> }
>
> -static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size)
> +static int tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size)
> {
> u32 idx;
> unsigned int i;
>
> + if (xsk->tx_cycle_time) {
> + unsigned long now = get_nsecs();
> +
> + if ((now - xsk->prev_tx_time) < xsk->tx_cycle_time)
> + return 0;
So, this test is actively spinning until the time is reached, spending
100% CPU time on this. I guess we can have this as a test for most
accurate transmit (cyclic period) with AF_XDP.
Do you have a use-case for this?
I have a customer use-case, but my customer don't want to actively spin.
My plan is to use clock_nanosleep() and wakeup slightly before the
target time and then we can spin shortly for the Tx time slot.
I will need to code this up for the customer soon anyway... perhaps we
can extend your code with this idea?
I have coded the period cycle Tx with UDP packets, here[1], if you like
to see some code using clock_nanosleep(). Next step (for me) is doing
this for AF_XDP (likely in my example[2].
[1]
https://github.com/netoptimizer/network-testing/blob/master/src/udp_pacer.c
[2]
https://github.com/xdp-project/bpf-examples/tree/master/AF_XDP-interaction
> +
> + xsk->prev_tx_time = now;
Would it be valuable to know how-much we shoot "over" the tx_cycle_time?
For my use-case, I will be monitoring the other-side receiving the
packets (and using HW RX-time) to evaluate how accurate my sender is. In
this case, I would like to know if my software "knew" if was not 100%
accurate.
> + }
> +
> while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) <
> batch_size) {
> complete_tx_only(xsk, batch_size);
> if (benchmark_done)
> - return;
> + return 0;
> }
I wonder if this step can introduce jitter/delay before the actual Tx
happens?
I mean, the real transmit cannot happen before xsk_ring_prod__submit()
is called. If the cycles spend are exactly the same, it doesn't matter
if you tx_cycle_time timestamp is done above.
Here you have a potential call to complete_tx_only(), which can
introduce variance for your period.
I will suggest moving the TX completion handling, so it doesn't
interfere with accurate TX.
>
> for (i = 0; i < batch_size; i++) {
> @@ -1375,6 +1393,8 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size)
> *frame_nb += batch_size;
> *frame_nb %= NUM_FRAMES;
> complete_tx_only(xsk, batch_size);
> +
> + return batch_size;
> }
>
> static inline int get_batch_size(int pkt_cnt)
> @@ -1407,6 +1427,7 @@ static void complete_tx_only_all(void)
> static void tx_only_all(void)
> {
> struct pollfd fds[MAX_SOCKS] = {};
> + unsigned long now = get_nsecs();
> u32 frame_nb[MAX_SOCKS] = {};
> int pkt_cnt = 0;
> int i, ret;
> @@ -1414,10 +1435,15 @@ static void tx_only_all(void)
> for (i = 0; i < num_socks; i++) {
> fds[0].fd = xsk_socket__fd(xsks[i]->xsk);
> fds[0].events = POLLOUT;
> + if (opt_cycle_time) {
> + xsks[i]->prev_tx_time = now;
> + xsks[i]->tx_cycle_time = opt_cycle_time;
> + }
> }
>
> while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
> int batch_size = get_batch_size(pkt_cnt);
> + int tx_cnt = 0;
>
> if (opt_poll) {
> for (i = 0; i < num_socks; i++)
> @@ -1431,9 +1457,9 @@ static void tx_only_all(void)
> }
>
> for (i = 0; i < num_socks; i++)
> - tx_only(xsks[i], &frame_nb[i], batch_size);
> + tx_cnt += tx_only(xsks[i], &frame_nb[i], batch_size);
>
> - pkt_cnt += batch_size;
> + pkt_cnt += tx_cnt;
>
> if (benchmark_done)
> break;
>
Powered by blists - more mailing lists