lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 14 Aug 2019 07:53:19 -0700
From:   "Jonathan Lemon" <jonathan.lemon@...il.com>
To:     "Magnus Karlsson" <magnus.karlsson@...el.com>
Cc:     bjorn.topel@...el.com, ast@...nel.org, daniel@...earbox.net,
        netdev@...r.kernel.org, brouer@...hat.com, maximmi@...lanox.com,
        bpf@...r.kernel.org, bruce.richardson@...el.com,
        ciara.loftus@...el.com, jakub.kicinski@...ronome.com,
        xiaolong.ye@...el.com, qi.z.zhang@...el.com,
        sridhar.samudrala@...el.com, kevin.laatz@...el.com,
        ilias.apalodimas@...aro.org, kiran.patil@...el.com,
        axboe@...nel.dk, maciej.fijalkowski@...el.com,
        maciejromanfijalkowski@...il.com, intel-wired-lan@...ts.osuosl.org
Subject: Re: [PATCH bpf-next v4 6/8] samples/bpf: add use of need_wakeup flag
 in xdpsock



On 14 Aug 2019, at 0:27, Magnus Karlsson wrote:

> This commit adds using the need_wakeup flag to the xdpsock sample
> application. It is turned on by default as we think it is a feature
> that seems to always produce a performance benefit, if the application
> has been written taking advantage of it. It can be turned off in the
> sample app by using the '-m' command line option.
>
> The txpush and l2fwd sub applications have also been updated to
> support poll() with multiple sockets.
>
> Signed-off-by: Magnus Karlsson <magnus.karlsson@...el.com>

Acked-by: Jonathan Lemon <jonathan.lemon@...il.com>


> ---
>  samples/bpf/xdpsock_user.c | 192 
> ++++++++++++++++++++++++++++-----------------
>  1 file changed, 120 insertions(+), 72 deletions(-)
>
> diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
> index 93eaaf7..da84c76 100644
> --- a/samples/bpf/xdpsock_user.c
> +++ b/samples/bpf/xdpsock_user.c
> @@ -67,8 +67,10 @@ static int opt_ifindex;
>  static int opt_queue;
>  static int opt_poll;
>  static int opt_interval = 1;
> -static u32 opt_xdp_bind_flags;
> +static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
>  static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
> +static int opt_timeout = 1000;
> +static bool opt_need_wakeup = true;
>  static __u32 prog_id;
>
>  struct xsk_umem_info {
> @@ -352,6 +354,7 @@ static struct option long_options[] = {
>  	{"zero-copy", no_argument, 0, 'z'},
>  	{"copy", no_argument, 0, 'c'},
>  	{"frame-size", required_argument, 0, 'f'},
> +	{"no-need-wakeup", no_argument, 0, 'm'},
>  	{0, 0, 0, 0}
>  };
>
> @@ -372,6 +375,7 @@ static void usage(const char *prog)
>  		"  -z, --zero-copy      Force zero-copy mode.\n"
>  		"  -c, --copy           Force copy mode.\n"
>  		"  -f, --frame-size=n   Set the frame size (must be a power of two, 
> default is %d).\n"
> +		"  -m, --no-need-wakeup Turn off use of driver need wakeup flag.\n"
>  		"\n";
>  	fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE);
>  	exit(EXIT_FAILURE);
> @@ -384,8 +388,9 @@ static void parse_command_line(int argc, char 
> **argv)
>  	opterr = 0;
>
>  	for (;;) {
> -		c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:", long_options,
> -				&option_index);
> +
> +		c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:m",
> +				long_options, &option_index);
>  		if (c == -1)
>  			break;
>
> @@ -429,6 +434,9 @@ static void parse_command_line(int argc, char 
> **argv)
>  			break;
>  		case 'f':
>  			opt_xsk_frame_size = atoi(optarg);
> +		case 'm':
> +			opt_need_wakeup = false;
> +			opt_xdp_bind_flags &= ~XDP_USE_NEED_WAKEUP;
>  			break;
>  		default:
>  			usage(basename(argv[0]));
> @@ -459,7 +467,8 @@ static void kick_tx(struct xsk_socket_info *xsk)
>  	exit_with_error(errno);
>  }
>
> -static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk)
> +static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk,
> +				     struct pollfd *fds)
>  {
>  	u32 idx_cq = 0, idx_fq = 0;
>  	unsigned int rcvd;
> @@ -468,7 +477,9 @@ static inline void complete_tx_l2fwd(struct 
> xsk_socket_info *xsk)
>  	if (!xsk->outstanding_tx)
>  		return;
>
> -	kick_tx(xsk);
> +	if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
> +		kick_tx(xsk);
> +
>  	ndescs = (xsk->outstanding_tx > BATCH_SIZE) ? BATCH_SIZE :
>  		xsk->outstanding_tx;
>
> @@ -482,6 +493,8 @@ static inline void complete_tx_l2fwd(struct 
> xsk_socket_info *xsk)
>  		while (ret != rcvd) {
>  			if (ret < 0)
>  				exit_with_error(-ret);
> +			if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
> +				ret = poll(fds, num_socks, opt_timeout);
>  			ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd,
>  						     &idx_fq);
>  		}
> @@ -505,7 +518,8 @@ static inline void complete_tx_only(struct 
> xsk_socket_info *xsk)
>  	if (!xsk->outstanding_tx)
>  		return;
>
> -	kick_tx(xsk);
> +	if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
> +		kick_tx(xsk);
>
>  	rcvd = xsk_ring_cons__peek(&xsk->umem->cq, BATCH_SIZE, &idx);
>  	if (rcvd > 0) {
> @@ -515,20 +529,25 @@ static inline void complete_tx_only(struct 
> xsk_socket_info *xsk)
>  	}
>  }
>
> -static void rx_drop(struct xsk_socket_info *xsk)
> +static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds)
>  {
>  	unsigned int rcvd, i;
>  	u32 idx_rx = 0, idx_fq = 0;
>  	int ret;
>
>  	rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
> -	if (!rcvd)
> +	if (!rcvd) {
> +		if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
> +			ret = poll(fds, num_socks, opt_timeout);
>  		return;
> +	}
>
>  	ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
>  	while (ret != rcvd) {
>  		if (ret < 0)
>  			exit_with_error(-ret);
> +		if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
> +			ret = poll(fds, num_socks, opt_timeout);
>  		ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
>  	}

I'll just point out that it seems a little odd that if one ring
needs a wakeup, all the rings get polled again.  However, I suppose
that it does amortize costs of a kernel call.
-- 
Jonathan


> @@ -549,42 +568,65 @@ static void rx_drop(struct xsk_socket_info *xsk)
>  static void rx_drop_all(void)
>  {
>  	struct pollfd fds[MAX_SOCKS + 1];
> -	int i, ret, timeout, nfds = 1;
> +	int i, ret;
>
>  	memset(fds, 0, sizeof(fds));
>
>  	for (i = 0; i < num_socks; i++) {
>  		fds[i].fd = xsk_socket__fd(xsks[i]->xsk);
>  		fds[i].events = POLLIN;
> -		timeout = 1000; /* 1sn */
>  	}
>
>  	for (;;) {
>  		if (opt_poll) {
> -			ret = poll(fds, nfds, timeout);
> +			ret = poll(fds, num_socks, opt_timeout);
>  			if (ret <= 0)
>  				continue;
>  		}
>
>  		for (i = 0; i < num_socks; i++)
> -			rx_drop(xsks[i]);
> +			rx_drop(xsks[i], fds);
> +	}
> +}
> +
> +static void tx_only(struct xsk_socket_info *xsk, u32 frame_nb)
> +{
> +	u32 idx;
> +
> +	if (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) == 
> BATCH_SIZE) {
> +		unsigned int i;
> +
> +		for (i = 0; i < BATCH_SIZE; i++) {
> +			xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr	=
> +				(frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
> +			xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len =
> +				sizeof(pkt_data) - 1;
> +		}
> +
> +		xsk_ring_prod__submit(&xsk->tx, BATCH_SIZE);
> +		xsk->outstanding_tx += BATCH_SIZE;
> +		frame_nb += BATCH_SIZE;
> +		frame_nb %= NUM_FRAMES;
>  	}
> +
> +	complete_tx_only(xsk);
>  }
>
> -static void tx_only(struct xsk_socket_info *xsk)
> +static void tx_only_all(void)
>  {
> -	int timeout, ret, nfds = 1;
> -	struct pollfd fds[nfds + 1];
> -	u32 idx, frame_nb = 0;
> +	struct pollfd fds[MAX_SOCKS];
> +	u32 frame_nb[MAX_SOCKS] = {};
> +	int i, ret;
>
>  	memset(fds, 0, sizeof(fds));
> -	fds[0].fd = xsk_socket__fd(xsk->xsk);
> -	fds[0].events = POLLOUT;
> -	timeout = 1000; /* 1sn */
> +	for (i = 0; i < num_socks; i++) {
> +		fds[0].fd = xsk_socket__fd(xsks[i]->xsk);
> +		fds[0].events = POLLOUT;
> +	}
>
>  	for (;;) {
>  		if (opt_poll) {
> -			ret = poll(fds, nfds, timeout);
> +			ret = poll(fds, num_socks, opt_timeout);
>  			if (ret <= 0)
>  				continue;
>
> @@ -592,69 +634,75 @@ static void tx_only(struct xsk_socket_info *xsk)
>  				continue;
>  		}
>
> -		if (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) ==
> -		    BATCH_SIZE) {
> -			unsigned int i;
> -
> -			for (i = 0; i < BATCH_SIZE; i++) {
> -				xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr
> -					= (frame_nb + i) * opt_xsk_frame_size;
> -				xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len =
> -					sizeof(pkt_data) - 1;
> -			}
> -
> -			xsk_ring_prod__submit(&xsk->tx, BATCH_SIZE);
> -			xsk->outstanding_tx += BATCH_SIZE;
> -			frame_nb += BATCH_SIZE;
> -			frame_nb %= NUM_FRAMES;
> -		}
> -
> -		complete_tx_only(xsk);
> +		for (i = 0; i < num_socks; i++)
> +			tx_only(xsks[i], frame_nb[i]);
>  	}
>  }
>
> -static void l2fwd(struct xsk_socket_info *xsk)
> +static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds)
>  {
> -	for (;;) {
> -		unsigned int rcvd, i;
> -		u32 idx_rx = 0, idx_tx = 0;
> -		int ret;
> +	unsigned int rcvd, i;
> +	u32 idx_rx = 0, idx_tx = 0;
> +	int ret;
>
> -		for (;;) {
> -			complete_tx_l2fwd(xsk);
> +	complete_tx_l2fwd(xsk, fds);
>
> -			rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE,
> -						   &idx_rx);
> -			if (rcvd > 0)
> -				break;
> -		}
> +	rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
> +	if (!rcvd) {
> +		if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
> +			ret = poll(fds, num_socks, opt_timeout);
> +		return;
> +	}
>
> +	ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
> +	while (ret != rcvd) {
> +		if (ret < 0)
> +			exit_with_error(-ret);
> +		if (xsk_ring_prod__needs_wakeup(&xsk->tx))
> +			kick_tx(xsk);
>  		ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
> -		while (ret != rcvd) {
> -			if (ret < 0)
> -				exit_with_error(-ret);
> -			ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
> -		}
> +	}
> +
> +	for (i = 0; i < rcvd; i++) {
> +		u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
> +		u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len;
> +		char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
> +
> +		swap_mac_addresses(pkt);
>
> -		for (i = 0; i < rcvd; i++) {
> -			u64 addr = xsk_ring_cons__rx_desc(&xsk->rx,
> -							  idx_rx)->addr;
> -			u32 len = xsk_ring_cons__rx_desc(&xsk->rx,
> -							 idx_rx++)->len;
> -			char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
> +		hex_dump(pkt, len, addr);
> +		xsk_ring_prod__tx_desc(&xsk->tx, idx_tx)->addr = addr;
> +		xsk_ring_prod__tx_desc(&xsk->tx, idx_tx++)->len = len;
> +	}
>
> -			swap_mac_addresses(pkt);
> +	xsk_ring_prod__submit(&xsk->tx, rcvd);
> +	xsk_ring_cons__release(&xsk->rx, rcvd);
>
> -			hex_dump(pkt, len, addr);
> -			xsk_ring_prod__tx_desc(&xsk->tx, idx_tx)->addr = addr;
> -			xsk_ring_prod__tx_desc(&xsk->tx, idx_tx++)->len = len;
> -		}
> +	xsk->rx_npkts += rcvd;
> +	xsk->outstanding_tx += rcvd;
> +}
> +
> +static void l2fwd_all(void)
> +{
> +	struct pollfd fds[MAX_SOCKS];
> +	int i, ret;
> +
> +	memset(fds, 0, sizeof(fds));
> +
> +	for (i = 0; i < num_socks; i++) {
> +		fds[i].fd = xsk_socket__fd(xsks[i]->xsk);
> +		fds[i].events = POLLOUT | POLLIN;
> +	}
>
> -		xsk_ring_prod__submit(&xsk->tx, rcvd);
> -		xsk_ring_cons__release(&xsk->rx, rcvd);
> +	for (;;) {
> +		if (opt_poll) {
> +			ret = poll(fds, num_socks, opt_timeout);
> +			if (ret <= 0)
> +				continue;
> +		}
>
> -		xsk->rx_npkts += rcvd;
> -		xsk->outstanding_tx += rcvd;
> +		for (i = 0; i < num_socks; i++)
> +			l2fwd(xsks[i], fds);
>  	}
>  }
>
> @@ -705,9 +753,9 @@ int main(int argc, char **argv)
>  	if (opt_bench == BENCH_RXDROP)
>  		rx_drop_all();
>  	else if (opt_bench == BENCH_TXONLY)
> -		tx_only(xsks[0]);
> +		tx_only_all();
>  	else
> -		l2fwd(xsks[0]);
> +		l2fwd_all();
>
>  	return 0;
>  }
> -- 
> 2.7.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ