lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20170222163901.90834-13-willemdebruijn.kernel@gmail.com>
Date:   Wed, 22 Feb 2017 11:39:01 -0500
From:   Willem de Bruijn <willemdebruijn.kernel@...il.com>
To:     netdev@...r.kernel.org
Cc:     Willem de Bruijn <willemb@...gle.com>
Subject: [PATCH RFC v2 12/12] test: add sendmsg zerocopy tests

From: Willem de Bruijn <willemb@...gle.com>

Introduce the tests uses to verify MSG_ZEROCOPY behavior:

snd_zerocopy:
  send zerocopy fragments out over the default route.

snd_zerocopy_lo:
  send data between a pair of local sockets and report throughput.

These tests are not suitable for inclusion in /tools/testing/selftest
as is, as they do not return a pass/fail verdict. Including them in
this RFC for demonstration, only.

Signed-off-by: Willem de Bruijn <willemb@...gle.com>
---
 tools/testing/selftests/net/.gitignore        |   2 +
 tools/testing/selftests/net/Makefile          |   1 +
 tools/testing/selftests/net/snd_zerocopy.c    | 354 +++++++++++++++
 tools/testing/selftests/net/snd_zerocopy_lo.c | 596 ++++++++++++++++++++++++++
 4 files changed, 953 insertions(+)
 create mode 100644 tools/testing/selftests/net/snd_zerocopy.c
 create mode 100644 tools/testing/selftests/net/snd_zerocopy_lo.c

diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index afe109e5508a..7dfb030f0c9b 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -5,3 +5,5 @@ reuseport_bpf
 reuseport_bpf_cpu
 reuseport_bpf_numa
 reuseport_dualstack
+snd_zerocopy
+snd_zerocopy_lo
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index e24e4c82542e..aa663c791f7a 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -7,6 +7,7 @@ NET_PROGS =  socket
 NET_PROGS += psock_fanout psock_tpacket
 NET_PROGS += reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
 NET_PROGS += reuseport_dualstack
+NET_PROGS += snd_zerocopy snd_zerocopy_lo
 
 all: $(NET_PROGS)
 reuseport_bpf_numa: LDFLAGS += -lnuma
diff --git a/tools/testing/selftests/net/snd_zerocopy.c b/tools/testing/selftests/net/snd_zerocopy.c
new file mode 100644
index 000000000000..052d0d14e62d
--- /dev/null
+++ b/tools/testing/selftests/net/snd_zerocopy.c
@@ -0,0 +1,354 @@
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/errqueue.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#define MSG_ZEROCOPY	0x4000000
+
+#define SK_FUDGE_FACTOR	2		/* allow for overhead in SNDBUF */
+#define BUFLEN		(400 * 1000)	/* max length of send call */
+#define DEST_PORT	9000
+
+uint32_t sent = UINT32_MAX, acked = UINT32_MAX;
+
+int cfg_batch_notify = 10;
+int cfg_num_runs = 16;
+size_t cfg_socksize = 1 << 20;
+int cfg_stress_sec;
+int cfg_verbose;
+bool cfg_zerocopy;
+
+static unsigned long gettime_now_ms(void)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static void do_set_socksize(int fd)
+{
+	if (setsockopt(fd, SOL_SOCKET, SO_SNDBUFFORCE,
+		       &cfg_socksize, sizeof(cfg_socksize)))
+		error(1, 0, "setsockopt sndbufforce");
+
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE,
+		       &cfg_socksize, sizeof(cfg_socksize)))
+		error(1, 0, "setsockopt sndbufforce");
+}
+
+static bool do_read_notification(int fd)
+{
+	struct sock_extended_err *serr;
+	struct cmsghdr *cm;
+	struct msghdr msg = {};
+	char control[100];
+	int64_t hi, lo;
+	int ret;
+
+	msg.msg_control = control;
+	msg.msg_controllen = sizeof(control);
+
+	ret = recvmsg(fd, &msg, MSG_DONTWAIT | MSG_ERRQUEUE);
+	if (ret == -1 && errno == EAGAIN)
+		return false;
+	if (ret == -1)
+		error(1, errno, "recvmsg notification");
+	if (msg.msg_flags & MSG_CTRUNC)
+		error(1, errno, "recvmsg notification: truncated");
+
+	cm = CMSG_FIRSTHDR(&msg);
+	if (!cm || cm->cmsg_level != SOL_IP ||
+	    (cm->cmsg_type != IP_RECVERR && cm->cmsg_type != IPV6_RECVERR))
+		error(1, 0, "cmsg: wrong type");
+
+	serr = (void *) CMSG_DATA(cm);
+	if (serr->ee_errno != 0 || serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
+		error(1, 0, "serr: wrong type");
+
+	hi = serr->ee_data;
+	lo = serr->ee_info;
+	if (lo != (uint32_t) (acked + 1))
+		error(1, 0, "notify: %lu..%lu, expected %u\n",
+		      lo, hi, acked + 1);
+	acked = hi;
+
+	if (cfg_verbose)
+		fprintf(stderr, "completed: %lu..%lu\n", lo, hi);
+
+	return true;
+}
+
+static void do_poll(int fd, int events, int timeout)
+{
+	struct pollfd pfd;
+	int ret;
+
+	pfd.fd = fd;
+	pfd.events = events;
+	pfd.revents = 0;
+
+	ret = poll(&pfd, 1, timeout);
+	if (ret == -1)
+		error(1, errno, "poll");
+	if (ret != 1)
+		error(1, 0, "poll timeout. events=0x%x acked=%u sent=%u",
+		      pfd.events, acked, sent);
+
+	if (cfg_verbose >= 2)
+		fprintf(stderr, "poll ok. events=0x%x revents=0x%x\n",
+			pfd.events, pfd.revents);
+}
+
+static void do_send(int fd, int len, int flags)
+{
+	static char data[BUFLEN];
+	struct msghdr msg = {};
+	struct iovec iov = {};
+	int ret;
+
+	if (len > BUFLEN)
+		error(1, 0, "write out of bounds");
+
+	iov.iov_base = data;
+	iov.iov_len = len;
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+
+	ret = sendmsg(fd, &msg, flags);
+	if (ret == -1)
+		error(1, errno, "sendmsg");
+	if (ret != len)
+		error(1, errno, "sendmsg: %u < %u", ret, len);
+
+	if (cfg_verbose >= 2)
+		fprintf(stderr, "  sent %6u B\n", len);
+
+	if (flags & MSG_ZEROCOPY && len) {
+		sent++;
+		if (cfg_verbose)
+			fprintf(stderr, "    add %u\n", sent);
+		do_read_notification(fd);
+	}
+}
+
+/* wait for all outstanding notifications to arrive */
+static void wait_for_notifications(int fd)
+{
+	unsigned long tstop, tnow;
+
+	if (acked == sent)
+		return;
+
+	tnow = gettime_now_ms();
+	tstop = tnow + 10000;
+	do {
+		do_poll(fd, 0 /* POLLERR is always reported */, tstop - tnow);
+
+		while (do_read_notification(fd)) {}
+		if (acked == sent)
+			return;
+
+		tnow = gettime_now_ms();
+	} while (tnow < tstop);
+
+	error(1, 0, "notify timeout. acked=%u sent=%u", acked, sent);
+}
+
+static void run_test(int fd, int len_cp, int len_zc, int batch)
+{
+	int i;
+
+	fprintf(stderr, "\ncp=%u zc=%u batch=%u\n", len_cp, len_zc, batch);
+
+	if (acked != sent)
+		error(1, 0, "not empty when expected");
+
+	if (batch * BUFLEN * SK_FUDGE_FACTOR > cfg_socksize) {
+		batch = cfg_socksize / BUFLEN / SK_FUDGE_FACTOR;
+		if (!batch)
+			error(1, 0, "cannot batch: increase socksize ('-s')");
+	}
+
+	for (i = 0; i < cfg_num_runs; i++) {
+		if (len_cp) {
+			do_poll(fd, POLLOUT, 1000);
+			do_send(fd, len_cp, 0);
+		}
+
+		do_poll(fd, POLLOUT, 1000);
+		do_send(fd, len_zc, cfg_zerocopy ? MSG_ZEROCOPY : 0);
+
+		if (i % batch == 0)
+			wait_for_notifications(fd);
+	}
+
+	wait_for_notifications(fd);
+}
+
+static void run_single(int fd, int len, int batch)
+{
+	run_test(fd, 0, len, batch);
+}
+
+/* combine zerocopy fragments with regular fragments */
+static void run_mix_zerocopy(int fd, int len_cp, int len_zc)
+{
+	run_test(fd, len_cp, len_zc, 1);
+}
+
+static void run_tests(int fd)
+{
+	/* test basic use */
+	run_single(fd, 4096, 1);
+	run_single(fd, 1500, 1);
+	run_single(fd, 1472, 1);
+	run_single(fd, 32000, 1);
+	run_single(fd, 65000, 1);
+	run_single(fd, BUFLEN, 1);
+
+	/* test notification on copybreak: data fits in skb head, no frags */
+	run_single(fd, 1, 1);
+
+	/* test coalescing */
+	run_single(fd, 32000, 4);
+	run_single(fd, 3000, 10);
+	run_single(fd, 100, 100);
+
+	run_mix_zerocopy(fd, 2000, 2000);
+	run_mix_zerocopy(fd, 100, 100);
+	run_mix_zerocopy(fd, 100, 1500);	/* fits coalesce in skb head */
+	run_mix_zerocopy(fd, 100, BUFLEN - 100);
+	run_mix_zerocopy(fd, 2000, 2000);
+
+	run_mix_zerocopy(fd, 1000, 12000);
+	run_mix_zerocopy(fd, 12000, 1000);
+	run_mix_zerocopy(fd, 12000, 12000);
+	run_mix_zerocopy(fd, 16000, 16000);
+
+	/* test more realistic async notifications */
+	run_single(fd, 1472, cfg_batch_notify);
+	run_single(fd, 1, cfg_batch_notify);
+	run_single(fd, BUFLEN, cfg_batch_notify);
+}
+
+static void run_stress_test(int fd, int runtime_sec)
+{
+	const int max_batch = 32;
+	unsigned long tstop, i = 0;
+	int len, len_cp, batch;
+
+	cfg_socksize = BUFLEN * max_batch * SK_FUDGE_FACTOR;
+	do_set_socksize(fd);
+
+	tstop = gettime_now_ms() + (runtime_sec * 1000);
+	do {
+		len = random() % BUFLEN;
+
+		/* create some skbs with only zerocopy frags */
+		if (len && ((i % 200) < 100))
+			len_cp = random() % BUFLEN;
+		else
+			len_cp = 0;
+
+		batch = random() % max_batch;
+
+		fprintf(stderr, "stress: cnt=%lu len_cp=%u len=%u batch=%u\n",
+			i, len_cp, len, batch);
+		run_test(fd, len_cp, len, batch);
+
+		i++;
+	} while (gettime_now_ms() < tstop);
+}
+
+static void parse_opts(int argc, char **argv, struct in_addr *addr)
+{
+	int c;
+
+	addr->s_addr = 0;
+
+	while ((c = getopt(argc, argv, "b:H:n:s:S:vV:z")) != -1) {
+		switch (c) {
+		case 'b':
+			cfg_batch_notify = strtol(optarg, NULL, 0);
+			break;
+		case 'H':
+			if (inet_pton(AF_INET, optarg, addr) != 1)
+				error(1, 0, "inet_pton: could not parse host");
+			break;
+		case 'n':
+			cfg_num_runs = strtol(optarg, NULL, 0);
+			break;
+		case 's':
+			cfg_socksize = strtol(optarg, NULL, 0);
+			break;
+		case 'S':
+			cfg_stress_sec = strtol(optarg, NULL, 0);
+		case 'v':
+			cfg_verbose = 1;
+			break;
+		case 'V':
+			cfg_verbose = strtol(optarg, NULL, 0);
+			break;
+		case 'z':
+			cfg_zerocopy = true;
+			break;
+		}
+	}
+
+	if (addr->s_addr == 0)
+		error(1, 0, "host ('-H') argument required");
+
+	if (cfg_verbose) {
+		fprintf(stderr, "batch_notify:  %u\n", cfg_batch_notify);
+		fprintf(stderr, "num_runs:      %u\n", cfg_num_runs);
+		fprintf(stderr, "socksize:      %lu\n", cfg_socksize);
+		fprintf(stderr, "stress:        %u\n", cfg_stress_sec);
+		fprintf(stderr, "zerocopy:      %s\n", cfg_zerocopy ? "ON" : "OFF");
+	}
+}
+
+int main(int argc, char **argv)
+{
+	struct sockaddr_in addr = {};
+	int fd;
+
+	parse_opts(argc, argv, &addr.sin_addr);
+
+	fd = socket(PF_INET, SOCK_STREAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket");
+
+	do_set_socksize(fd);
+
+	addr.sin_family = AF_INET;
+	addr.sin_port = htons(DEST_PORT);
+	if (connect(fd, (void *) &addr, sizeof(addr)))
+		error(1, errno, "connect");
+
+	if (cfg_num_runs)
+		run_tests(fd);
+
+	if (cfg_stress_sec)
+		run_stress_test(fd, cfg_stress_sec);
+
+	if (close(fd))
+		error(1, errno, "close");
+
+	fprintf(stderr, "OK. All tests passed\n");
+	return 0;
+}
diff --git a/tools/testing/selftests/net/snd_zerocopy_lo.c b/tools/testing/selftests/net/snd_zerocopy_lo.c
new file mode 100644
index 000000000000..309b016a4fd5
--- /dev/null
+++ b/tools/testing/selftests/net/snd_zerocopy_lo.c
@@ -0,0 +1,596 @@
+/* evaluate MSG_ZEROCOPY over the loopback interface */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/errqueue.h>
+#include <linux/if_packet.h>
+#include <linux/socket.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#define MSG_ZEROCOPY	0x4000000
+
+#define NUM_LOOPS	4	/* MUST BE > 1 for corking to work */
+#define TXC_FUDGE	100
+
+static int  cfg_len_ms		= 4200;
+static int  cfg_report_len_ms	= 1000;
+static int  cfg_payload_len	= ((1 << 16) - 100);
+static bool cfg_test_packet;
+static bool cfg_test_raw;
+static bool cfg_test_raw_hdrincl;
+static bool cfg_test_tcp;
+static bool cfg_test_udp;
+static bool cfg_test_udp_cork;
+static bool cfg_verbose;
+static bool cfg_zerocopy;
+
+static bool flag_cork;
+
+static uint64_t tstop, treport;
+
+static unsigned long gettimeofday_ms(void)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static uint16_t get_ip_csum(const uint16_t *start, int num_words)
+{
+	unsigned long sum = 0;
+	int i;
+
+	for (i = 0; i < num_words; i++)
+		sum += start[i];
+
+	while (sum >> 16)
+		sum = (sum & 0xFFFF) + (sum >> 16);
+
+	return ~sum;
+}
+
+static void timer_start(int timeout_ms)
+{
+	uint64_t tstart;
+
+	tstart = gettimeofday_ms();
+	treport = tstart + cfg_report_len_ms;
+	tstop = tstart + timeout_ms;
+}
+
+static bool timer_report(void)
+{
+	uint64_t tstart;
+
+	tstart = gettimeofday_ms();
+	if (tstart < treport)
+		return false;
+
+	treport = tstart + cfg_report_len_ms;
+	return true;
+}
+
+static bool timer_stop(void)
+{
+	return gettimeofday_ms() > tstop;
+}
+
+static int getnumcpus(void)
+{
+	int num = sysconf(_SC_NPROCESSORS_ONLN);
+
+	if (num < 1)
+		error(1, 0, "get num cpus\n");
+	return num;
+}
+
+static int setcpu(int cpu)
+{
+	cpu_set_t mask;
+
+	CPU_ZERO(&mask);
+	CPU_SET(cpu, &mask);
+	if (sched_setaffinity(0, sizeof(mask), &mask)) {
+		fprintf(stderr, "setaffinity %d\n", cpu);
+		return 1;
+	}
+
+	return 0;
+}
+
+static void test_mtu_is_max(int fd)
+{
+	struct ifreq ifr = {
+		.ifr_name = "lo",
+	};
+
+	if (ioctl(fd, SIOCGIFMTU, &ifr))
+		error(1, errno, "ioctl get mtu");
+
+	if (ifr.ifr_mtu != 1 << 16)
+		error(1, 0, "mtu=%u expected=2^16\n", ifr.ifr_mtu);
+}
+
+static void do_poll(int fd, int dir)
+{
+	struct pollfd pfd;
+	int ret;
+
+	pfd.events = dir;
+	pfd.revents = 0;
+	pfd.fd = fd;
+
+	ret = poll(&pfd, 1, 10);
+	if (ret == -1)
+		error(1, errno, "poll");
+	if (ret == 0)
+		error(1, 0, "poll: EAGAIN");
+}
+
+static bool do_write_once(int fd, struct msghdr *msg, int total_len, bool zcopy)
+{
+	int ret, flags;
+
+	flags = MSG_DONTWAIT;
+	if (zcopy)
+		flags |= MSG_ZEROCOPY;
+
+	ret = sendmsg(fd, msg, flags);
+	if (ret == -1 && (errno == EAGAIN || errno == ENOBUFS))
+		return false;
+
+	if (ret == -1)
+		error(1, errno, "send");
+	if (ret != total_len)
+		error(1, 0, "send: ret=%u\n", ret);
+
+	return true;
+}
+
+static void do_print_data_mismatch(char *tx, char *rx, int len)
+{
+	int i;
+
+	fprintf(stderr, "tx: ");
+	for (i = 0; i < len; i++)
+		fprintf(stderr, "%hx ", tx[i] & 0xff);
+	fprintf(stderr, "\nrx: ");
+	for (i = 0; i < len; i++)
+		fprintf(stderr, "%hx ", rx[i] & 0xff);
+	fprintf(stderr, "\n");
+}
+
+/* Flush @remaining bytes from the socket, blocking if necessary */
+static void do_flush_tcp(int fd, long remaining)
+{
+	unsigned long tstop;
+	int ret;
+
+	tstop = gettimeofday_ms() + 500;
+	while (remaining > 0 && gettimeofday_ms() < tstop) {
+		ret = recv(fd, NULL, remaining, MSG_TRUNC);
+		if (ret == -1)
+			error(1, errno, "recv (flush)");
+		remaining -= ret;
+		if (!remaining)
+			return;
+		fprintf(stderr, "recv (flush): %dB, %ldB left\n",
+			ret, remaining);
+	}
+
+	error(1, 0, "recv (flush): %ldB at timeout", remaining);
+}
+
+static bool do_read_once(int fd, char *tbuf, int type, bool corked, long *bytes)
+{
+	char rbuf[32], *payload;
+	int ret, len, expected, flags;
+
+	flags = MSG_DONTWAIT;
+	/* MSG_TRUNC differs on SOCK_STREAM: it flushes the buffer */
+	if (type != SOCK_STREAM)
+		flags |= MSG_TRUNC;
+
+	ret = recv(fd, rbuf, sizeof(rbuf), flags);
+	if (ret == -1 && errno == EAGAIN)
+		return false;
+	if (ret == -1)
+		error(1, errno, "recv");
+	if (type == SOCK_RAW)
+		ret -= sizeof(struct iphdr);
+
+	expected = sizeof(rbuf);
+	if (flags & MSG_TRUNC) {
+		expected = cfg_payload_len;
+		if (corked)
+			expected *= NUM_LOOPS;
+		*bytes += expected;
+	} else {
+		*bytes += cfg_payload_len;
+	}
+	if (ret != expected)
+		error(1, 0, "recv: ret=%u (exp=%u)\n", ret, expected);
+
+	payload = rbuf;
+	len = sizeof(rbuf);
+	if (type == SOCK_RAW) {
+		payload += sizeof(struct iphdr);
+		len -= sizeof(struct iphdr);
+	}
+
+	if (memcmp(payload, tbuf, len)) {
+		do_print_data_mismatch(tbuf, payload, len);
+		error(1, 0, "\nrecv: data mismatch\n");
+	}
+
+	/* Stream sockets are not truncated, so flush explicitly */
+	if (type == SOCK_STREAM)
+		do_flush_tcp(fd, cfg_payload_len - sizeof(rbuf));
+
+	return true;
+}
+
+static void setup_iph(struct iphdr *iph, uint16_t payload_len)
+{
+	memset(iph, 0, sizeof(*iph));
+	iph->version	= 4;
+	iph->tos	= 0;
+	iph->ihl	= 5;
+	iph->ttl	= 8;
+	iph->saddr	= htonl(INADDR_LOOPBACK);
+	iph->daddr	= htonl(INADDR_LOOPBACK);
+	iph->protocol	= IPPROTO_EGP;
+	iph->tot_len	= htons(sizeof(*iph) + payload_len);
+	iph->check	= get_ip_csum((void *) iph, iph->ihl << 1);
+	/* No need to calculate checksum: set by kernel */
+}
+
+static void do_cork(int fd, bool enable)
+{
+	int cork = !!enable;
+
+	if (setsockopt(fd, IPPROTO_UDP, UDP_CORK, &cork, sizeof(cork)))
+		error(1, errno, "cork %u", enable);
+}
+
+static int do_read_notification(int fd)
+{
+	struct sock_extended_err *serr;
+	struct cmsghdr *cm;
+	struct msghdr msg = {};
+	char control[100];
+	int64_t hi, lo, range;
+	int ret;
+
+	msg.msg_control = control;
+	msg.msg_controllen = sizeof(control);
+
+	ret = recvmsg(fd, &msg, MSG_DONTWAIT | MSG_ERRQUEUE);
+	if (ret == -1 && errno == EAGAIN)
+		return 0;
+
+	if (ret == -1)
+		error(1, errno, "recvmsg notification");
+	if (msg.msg_flags & MSG_CTRUNC)
+		error(1, errno, "recvmsg notification: truncated");
+
+	cm = CMSG_FIRSTHDR(&msg);
+	if (!cm)
+		error(1, 0, "cmsg: no cmsg");
+	if (!((cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR) ||
+	      (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_RECVERR) ||
+	      (cm->cmsg_level == SOL_PACKET && cm->cmsg_type == PACKET_TX_TIMESTAMP)))
+		error(1, 0, "serr: wrong type");
+
+	serr = (void *) CMSG_DATA(cm);
+	if (serr->ee_errno != 0 || serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
+		error(1, 0, "serr: wrong type");
+
+	hi = serr->ee_data;
+	lo = serr->ee_info;
+	range = hi - lo + 1;
+	if (range < 0)
+		range += UINT32_MAX;
+
+	if (cfg_verbose)
+		fprintf(stderr, "completed: %lu (h=%lu l=%lu)\n",
+			range, hi, lo);
+
+	return (int) range;
+}
+
+static int do_read_notifications(int fd)
+{
+	int ret, len = 0;
+
+	do {
+		ret = do_read_notification(fd);
+		len += ret;
+	} while (ret);
+
+	return len;
+}
+
+static void do_run(int fdt, int fdr, int domain, int type, int protocol)
+{
+	static char tbuf[1 << 16];
+	struct sockaddr_ll laddr;
+	struct msghdr msg;
+	struct iovec iov[2];
+	struct iphdr iph;
+	long numtx = 0, numrx = 0, bytesrx = 0, numtxc = 0, expected_txc = 0;
+	int cpu, i, total_len = 0, type_r = type;
+
+	memset(&msg, 0, sizeof(msg));
+	memset(&iov, 0, sizeof(iov));
+	for (i = 0; i < sizeof(tbuf); i++)
+		tbuf[i] = 'a' + (i % 26);
+
+	i = 0;
+
+	/* for packet sockets, must prepare link layer information */
+	if (domain == PF_PACKET) {
+		memset(&laddr, 0, sizeof(laddr));
+		laddr.sll_family	= AF_PACKET;
+		laddr.sll_ifindex	= 1;	/* lo */
+		laddr.sll_protocol	= htons(ETH_P_IP);
+		laddr.sll_halen		= ETH_ALEN;
+
+		msg.msg_name		= &laddr;
+		msg.msg_namelen		= sizeof(laddr);
+
+		/* with PF_PACKET tx, do not expect ip_hdr on Rx */
+		type_r			= SOCK_DGRAM;
+	}
+
+	if (domain == PF_PACKET || protocol == IPPROTO_RAW) {
+		setup_iph(&iph, cfg_payload_len);
+		iov[i].iov_base = (void *) &iph;
+		iov[i].iov_len = sizeof(iph);
+		total_len += iov[i].iov_len;
+		i++;
+	}
+	iov[i].iov_base = tbuf;
+	iov[i].iov_len = cfg_payload_len;
+	total_len += iov[i].iov_len;
+
+	msg.msg_iovlen = i + 1;
+	msg.msg_iov = iov;
+
+	cpu = getnumcpus() - 1;
+	setcpu(cpu);
+	fprintf(stderr, "cpu: %u\n", cpu);
+
+	do {
+		if (cfg_zerocopy)
+			numtxc += do_read_notifications(fdt);
+
+		if (flag_cork)
+			do_cork(fdt, true);
+
+		for (i = 0; i < NUM_LOOPS; i++) {
+			bool do_zcopy = cfg_zerocopy;
+
+			if (flag_cork && (i & 0x1))
+				do_zcopy = false;
+
+			if (!do_write_once(fdt, &msg, total_len, do_zcopy)) {
+				do_poll(fdt, POLLOUT);
+				break;
+			}
+
+			numtx++;
+			if (do_zcopy)
+				expected_txc++;
+		}
+		if (flag_cork)
+			do_cork(fdt, false);
+
+		while (do_read_once(fdr, tbuf, type_r, flag_cork, &bytesrx))
+			numrx++;
+
+		if (timer_report()) {
+			fprintf(stderr, "rx=%lu (%lu MB) tx=%lu txc=%lu\n",
+				numrx, bytesrx >> 20, numtx, numtxc);
+		}
+	} while (!timer_stop());
+
+	if (cfg_zerocopy)
+		numtxc += do_read_notifications(fdt);
+
+	if (flag_cork)
+		numtx /= NUM_LOOPS;
+
+	if (labs(numtx - numrx) > TXC_FUDGE)
+		error(1, 0, "missing packets: %lu != %lu\n", numrx, numtx);
+	if (cfg_zerocopy && labs(expected_txc - numtxc) > TXC_FUDGE)
+		error(1, 0, "missing completions: rx=%lu expected=%lu\n",
+			    numtxc, expected_txc);
+}
+
+static int do_setup_rx(int domain, int type, int protocol)
+{
+	int fdr;
+
+	if (domain == PF_PACKET) {
+		/* Even when testing PF_PACKET Tx, Rx on PF_INET */
+		domain = PF_INET;
+		type = SOCK_RAW;
+		protocol = IPPROTO_EGP;
+	} else if (protocol == IPPROTO_RAW) {
+		protocol = IPPROTO_EGP;
+	}
+
+	fdr = socket(domain, type, protocol);
+	if (fdr == -1)
+		error(1, errno, "socket r");
+
+	return fdr;
+}
+
+static void do_setup_and_run(int domain, int type, int protocol)
+{
+	struct sockaddr_in addr;
+	socklen_t alen;
+	int fdr, fdt, ret;
+
+	fprintf(stderr, "test socket(%u, %u, %u)\n", domain, type, protocol);
+
+	fdr = do_setup_rx(domain, type, protocol);
+	fdt = socket(domain, type, protocol);
+	if (fdt == -1)
+		error(1, errno, "socket t");
+
+	test_mtu_is_max(fdr);
+
+	if (domain != PF_PACKET) {
+		memset(&addr, 0, sizeof(addr));
+		addr.sin_family = AF_INET;
+		addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+		alen = sizeof(addr);
+
+		if (bind(fdr, (void *) &addr, sizeof(addr)))
+			error(1, errno, "bind");
+		if (type == SOCK_STREAM && listen(fdr, 1))
+			error(1, errno, "listen");
+		if (getsockname(fdr, (void *) &addr, &alen) ||
+		    alen != sizeof(addr))
+			error(1, 0, "getsockname");
+		if (connect(fdt, (void *) &addr, sizeof(addr)))
+			error(1, errno, "connect");
+	}
+
+	if (type == SOCK_STREAM) {
+		int fda = fdr;
+
+		fdr = accept(fda, NULL, NULL);
+		if (fdr == -1)
+			error(1, errno, "accept");
+		if (close(fda))
+			error(1, errno, "close listen sock");
+	}
+
+	ret = 1 << 21;
+	if (setsockopt(fdr, SOL_SOCKET, SO_RCVBUF, &ret, sizeof(ret)))
+		error(1, errno, "socklen r");
+	if (setsockopt(fdt, SOL_SOCKET, SO_SNDBUF, &ret, sizeof(ret)))
+		error(1, errno, "socklen t");
+
+	timer_start(cfg_len_ms);
+	do_run(fdt, fdr, domain, type, protocol);
+
+	if (close(fdt))
+		error(1, errno, "close t");
+	if (close(fdr))
+		error(1, errno, "close r");
+
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	const char on[] = "ON", off[] = "OFF";
+	const int max_payload = IP_MAXPACKET - sizeof(struct iphdr);
+	int c;
+
+	while ((c = getopt(argc, argv, "l:prRs:tuUvz")) != -1) {
+		switch (c) {
+		case 'l':
+			cfg_len_ms = strtoul(optarg, NULL, 10) * 1000;
+			break;
+		case 'p':
+			cfg_test_packet = true;
+			break;
+		case 'r':
+			cfg_test_raw = true;
+			break;
+		case 'R':
+			cfg_test_raw_hdrincl = true;
+			break;
+		case 's':
+			cfg_payload_len = strtoul(optarg, NULL, 0);
+			break;
+		case 't':
+			cfg_test_tcp = true;
+			break;
+		case 'u':
+			cfg_test_udp = true;
+			break;
+		case 'U':
+			cfg_test_udp_cork = true;
+			break;
+		case 'v':
+			cfg_verbose = true;
+			break;
+		case 'z':
+			cfg_zerocopy = true;
+			break;
+		}
+	}
+
+	if (cfg_payload_len > max_payload)
+		error(1, 0, "-s: payload too long");
+	if (cfg_payload_len >= (max_payload - sizeof(struct tcphdr) - 10))
+		fprintf(stderr, "warn: len may exceed limit\n");
+
+	if (cfg_verbose) {
+		fprintf(stderr, "time:     %u ms\n"
+				"size:     %u B\n"
+				"zerocopy: %s\n",
+			cfg_len_ms,
+			cfg_payload_len,
+			cfg_zerocopy ? on : off);
+	}
+}
+
+int main(int argc, char **argv)
+{
+	parse_opts(argc, argv);
+
+	if (cfg_test_packet)
+		do_setup_and_run(PF_PACKET, SOCK_DGRAM, 0);
+	if (cfg_test_udp)
+		do_setup_and_run(PF_INET, SOCK_DGRAM, 0);
+	if (cfg_test_udp_cork) {
+		int saved_payload_len = cfg_payload_len;
+
+		cfg_payload_len /= NUM_LOOPS;
+
+		flag_cork = true;
+		do_setup_and_run(PF_INET, SOCK_DGRAM, 0);
+		flag_cork = false;
+
+		cfg_payload_len = saved_payload_len;
+	}
+	if (cfg_test_raw)
+		do_setup_and_run(PF_INET, SOCK_RAW, IPPROTO_EGP);
+	if (cfg_test_raw_hdrincl)
+		do_setup_and_run(PF_INET, SOCK_RAW, IPPROTO_RAW);
+	if (cfg_test_tcp)
+		do_setup_and_run(PF_INET, SOCK_STREAM, 0);
+
+	fprintf(stderr, "OK. All tests passed\n");
+	return 0;
+}
-- 
2.11.0.483.g087da7b7c-goog

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ