lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <200905252031.47868.rusty@rustcorp.com.au>
Date:	Mon, 25 May 2009 20:31:47 +0930
From:	Rusty Russell <rusty@...tcorp.com.au>
To:	David Miller <davem@...emloft.net>
Cc:	netdev@...r.kernel.org, virtualization@...ts.linux-foundation.org
Subject: Re: [RFC] virtio: orphan skbs if we're relying on timer to free them

On Thu, 21 May 2009 04:45:03 pm David Miller wrote:
> Probably the most profitable avenue is to see if this is a real issue
> afterall (see above).  If we can get away with having the socket
> buffer represent socket --> device space only, that's the most ideal
> solution.  It will probably also improve performance a lot across the
> board, especially on NUMA/SMP boxes as our TX complete events tend to
> be in difference places than the SKB producer.

OK, hacked in skb_orphan() and couldn't see any behavior change here.

Details: I wrote a little test program to do a 1 byte ping pong on a TCP
socket while doing 1k writes to a number of udp sockets.  We never completely
starve the TCP connection, though that may be because it begins at the same
time as the others and gets some traffic before the buffers fill up.

Running it with 0 to 20 sockets, from my gigabit laptop to my 100Mbit
desktop.  Sorry about the attachment, but sometimes a graph is worth about
1k words.

Not sure why write rate shoots up on serious overload though.  Perhaps we're
discarding earlier in the path?

Cheers,
Rusty.

diff --git a/net/core/dev.c b/net/core/dev.c
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1677,6 +1677,8 @@ int dev_hard_start_xmit(struct sk_buff *
 	const struct net_device_ops *ops = dev->netdev_ops;
 	int rc;
 
+	skb_orphan(skb);
+
 	if (likely(!skb->next)) {
 		if (!list_empty(&ptype_all))
 			dev_queue_xmit_nit(skb, dev);

Sender: for i in `seq 0 20`; do ~/devel/junk/udpstress 9999 $i debussy 5; sleep 1; done
Receiver: for i in `seq 0 20`; do ./udpstress -r 9999 $i; done

/* udpstress.c: Simple server & client to test multiple UDP streams. */
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/sendfile.h>
#include <sys/time.h>
#include <time.h>
#include <netinet/in.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/wait.h>
#include <sys/mman.h>
#include <err.h>
#include <netdb.h>

static volatile unsigned int bytes;

static void dumpstats(int signal)
{
	char buffer[128];

	sprintf(buffer, "%u\n", bytes);
	write(STDOUT_FILENO, buffer, strlen(buffer));
	_exit(0);
}

static void tcp_dumpstats(int signal)
{
	char buffer[128];

	sprintf(buffer, "TCP: %u\n", bytes);
	write(STDOUT_FILENO, buffer, strlen(buffer));
	_exit(0);
}

static void udp_receiver(unsigned int port)
{
	struct sockaddr_in saddr;
	int sock, ret;
	char buffer[65536];
	int val = 1;

	sock = socket(PF_INET, SOCK_DGRAM, 0);
	if (sock < 0)
		err(1, "creating socket");

	if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(&val)) != 0)
		err(1, "setsockopt(SO_REUSEADDR)");

	/* Wait for incoming. */
	saddr.sin_family = AF_INET;
	saddr.sin_port = htons(port);
	saddr.sin_addr.s_addr = INADDR_ANY;
	if (bind(sock, (struct sockaddr *)&saddr, sizeof(saddr)) != 0)
		err(1, "bind to port %u", port);

	/* Read packets until we get signal. */
	signal(SIGALRM, dumpstats);

	while ((ret = read(sock, buffer, sizeof(buffer))) > 0)
		bytes += ret;
	err(1, "Short read %i", ret);
}

static void udp_client(unsigned int ip, unsigned int port, int fd)
{
	/* We're going to send UDP packets to that addr. */
	struct sockaddr_in saddr;
	int sock, ret;
	char buffer[1024];

	sock = socket(PF_INET, SOCK_DGRAM, 0);
	if (sock < 0)
		err(1, "creating socket");

	/* Connect to remote. */
	saddr.sin_family = AF_INET;
	saddr.sin_port = htons(port);
	saddr.sin_addr.s_addr = ip;
	if (connect(sock, (struct sockaddr *)&saddr, sizeof(saddr)))
		err(1, "connecting socket");

	/* Write packets until we get signal. */
	signal(SIGALRM, dumpstats);

	/* Wait for "go" signal. */
	if (read(fd, buffer, 1) != 1)
		err(1, "Reading go signal");

	while ((ret = write(sock, buffer, sizeof(buffer))) > 0)
		bytes += ret;
	err(1, "Short write %i", ret);
}

/* Exits when tcp socket closes. */
static void tcp_receiver(unsigned int port)
{
	/* We're going to send TCP packets to that addr. */
	struct sockaddr_in saddr;
	int sock;
	char c;
	int val = 1;

	sock = socket(PF_INET, SOCK_STREAM, 0);
	if (sock < 0)
		err(1, "creating socket");

	if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(&val)) != 0)
		err(1, "setsockopt(SO_REUSEADDR)");

	/* Wait for incoming. */
	saddr.sin_family = AF_INET;
	saddr.sin_port = htons(port);
	saddr.sin_addr.s_addr = INADDR_ANY;
	if (bind(sock, (struct sockaddr *)&saddr, sizeof(saddr)) != 0)
		err(1, "bind to port %u", port);

	if (listen(sock, 1) != 0)
		err(1, "listen");

	sock = accept(sock, NULL, NULL);
	if (sock < 0)
		err(1, "Accepting");

	while (read(sock, &c, 1) > 0) {
		bytes++;
		if (write(sock, &c, 1) != 1)
			break;
	}
}

static void tcp_client(unsigned int ip, unsigned int port, int fd)
{
	/* We're going to send TCP packets to that addr. */
	struct sockaddr_in saddr;
	int sock;
	char c;

	sock = socket(PF_INET, SOCK_STREAM, 0);
	if (sock < 0)
		err(1, "creating socket");

	/* Connect to remote. */
	saddr.sin_family = AF_INET;
	saddr.sin_port = htons(port);
	saddr.sin_addr.s_addr = ip;
	if (connect(sock, (struct sockaddr *)&saddr, sizeof(saddr)))
		err(1, "connecting socket");

	/* Write packets until we get signal. */
	signal(SIGALRM, tcp_dumpstats);

	/* Wait for "go" signal. */
	if (read(fd, &c, 1) != 1)
		err(1, "Reading go signal");

	while (write(sock, &c, 1) > 0) {
		bytes++;
		if (read(sock, &c, 1) != 1)
			err(1, "Short read");
	}
	err(1, "Short write");
}

static unsigned int host_to_ipaddr(const char *name)
{
	struct hostent *host;
	unsigned int ip = 0;

	host = gethostbyname(name);
	if (!host)
		err(1, "gethostbyname(%s)", name);

	if (host->h_addrtype != AF_INET ||
	    host->h_length != sizeof(struct in_addr))
		errx(1, "gethostbyname gave wrong type");

	memcpy(&ip, host->h_addr_list[0], 4);
	return ip;
}

static void usage(void)
{
	errx(1,
	     "Usage: udpstress -r portnum number OR\n"
	     "	udpstress portnum number ipaddr timeout");
}

int main(int argc, char *argv[])
{
	unsigned int ip, i, port, kids;
	int fds[2];

	if (argc < 4)
		usage();

	if (pipe(fds) != 0)
		err(1, "Creating pipe");

	signal(SIGALRM, SIG_IGN);
	setsid();

	if (strcmp(argv[1], "-r") == 0) {
		port = atoi(argv[2]);
		kids = atoi(argv[3]);
		for (i = 0; i < kids; i++) {
			switch (fork()) {
			case 0:
				udp_receiver(port + i);
			case -1:
				err(1, "forking");
			}
		}

		/* Finally, receive TCP until the sender finishes. */
		tcp_receiver(port);
		{
			char buffer[128];

			/* Don't just use printf here; gets overwritten by
			 * other children when writing to file. */
			sprintf(buffer, "TCP: %u\n", bytes);
			write(STDOUT_FILENO, buffer, strlen(buffer));
		}
	} else {
		char buffer[kids = atoi(argv[2])+1];
		port = atoi(argv[1]);
		ip = host_to_ipaddr(argv[3]);
		for (i = 0; i < kids - 1; i++) {
			switch (fork()) {
			case 0:
				udp_client(ip, port + i, fds[0]);
			case -1:
				err(1, "forking");
			}
		}
		/* Finally, TCP sender. */
		switch (fork()) {
		case 0:
			tcp_client(ip, port, fds[0]);
		case -1:
			err(1, "forking");
		}

		/* Wait for them to settle. */
		sleep(1);

		if (write(fds[1], buffer, kids) != kids)
			errx(1, "Short write on pipe");
		sleep(atoi(argv[4]));
	}

	/* Kill kids. */
	kill(-getpid(), SIGALRM);

	/* We want to exit last. */
	for (i = 0; i < kids; i++)
		wait(NULL);
	return 0;
}


Download attachment "results.gnumeric" of type "application/x-gnumeric" (12248 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ