lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <200804181442.17251.rusty@rustcorp.com.au>
Date:	Fri, 18 Apr 2008 14:42:16 +1000
From:	Rusty Russell <rusty@...tcorp.com.au>
To:	netdev@...r.kernel.org
Cc:	Max Krasnyansky <maxk@...lcomm.com>,
	virtualization@...ts.linux-foundation.org,
	linux-kernel@...r.kernel.org
Subject: [PATCH 4/5] tun: vringfd receive support.

This patch modifies tun to allow a vringfd to specify the receive
buffer.  Because we can't copy to userspace in bh context, we queue
like normal then use the "pull" hook to actually do the copy.

We use struct virtio_net_hdr prepended to packets in the ring to allow
userspace to receive GSO packets in future (at the moment, the tun
driver doesn't tell the stack it can handle them, so these cases are
never taken).  This will need to be something that userspace tells us
it can handle.

Signed-off-by: Rusty Russell <rusty@...tcorp.com.au>
---
 drivers/net/Kconfig    |    2 
 drivers/net/tun.c      |  159 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/if_tun.h |    1 
 3 files changed, 162 insertions(+)

diff -r 9bafcef88e1b drivers/net/Kconfig
--- a/drivers/net/Kconfig	Fri Apr 18 05:54:45 2008 +1000
+++ b/drivers/net/Kconfig	Fri Apr 18 05:58:40 2008 +1000
@@ -120,6 +120,8 @@ config TUN
 config TUN
 	tristate "Universal TUN/TAP device driver support"
 	select CRC32
+# If no VRING at all, that's fine, but if it's a module, we must be, too.
+	depends on !VRING || VRING
 	---help---
 	  TUN/TAP provides packet reception and transmission for user space
 	  programs.  It can be viewed as a simple Point-to-Point or Ethernet
diff -r 9bafcef88e1b drivers/net/tun.c
--- a/drivers/net/tun.c	Fri Apr 18 05:54:45 2008 +1000
+++ b/drivers/net/tun.c	Fri Apr 18 05:58:40 2008 +1000
@@ -62,6 +62,9 @@
 #include <linux/if_ether.h>
 #include <linux/if_tun.h>
 #include <linux/crc32.h>
+#include <linux/vring.h>
+#include <linux/virtio_net.h>
+#include <linux/file.h>
 #include <net/net_namespace.h>
 
 #include <asm/system.h>
@@ -98,6 +101,9 @@ struct tun_struct {
 	u8 dev_addr[ETH_ALEN];
 	u32 chr_filter[2];
 	u32 net_filter[2];
+
+	struct vring_info	*inring;
+	struct file		*infile;
 
 #ifdef TUN_DEBUG
 	int debug;
@@ -158,6 +164,10 @@ static int tun_net_xmit(struct sk_buff *
 	/* Notify and wake up reader process */
 	if (tun->flags & TUN_FASYNC)
 		kill_fasync(&tun->fasync, SIGIO, POLL_IN);
+
+	if (tun->inring)
+		vring_wake(tun->inring);
+
 	wake_up_interruptible(&tun->read_wait);
 	return 0;
 
@@ -249,6 +259,149 @@ static void tun_net_init(struct net_devi
 		break;
 	}
 }
+
+#if defined(CONFIG_VRING) || defined(CONFIG_VRING_MODULE)
+/* Returns whether there are queued buffers */
+static bool pending_recv_skbs(void *_tun)
+{
+	struct tun_struct *tun = _tun;
+
+	return !skb_queue_empty(&tun->readq);
+}
+
+/* Returns 0, or negative errno. */
+static int pull_recv_skbs(void *_tun)
+{
+	struct tun_struct *tun = _tun;
+	int err = 0, num_copied = 0;
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(&tun->readq)) != NULL) {
+		struct iovec iov[1+MAX_SKB_FRAGS];
+		struct virtio_net_hdr gso = { 0 }; /* no info leak */
+		unsigned int iovnum = ARRAY_SIZE(iov);
+		unsigned long len;
+		int id;
+
+		id = vring_get_buffer(tun->inring, iov, &iovnum, &len,
+				      NULL, NULL, NULL);
+		if (id <= 0) {
+			err = id;
+			break;
+		}
+
+		/* FIXME: we could stash this descriptor and go looking for a
+		 * better-sized one.  That would allow them to mix different
+		 * buffer sizes for efficiency. */
+		if (unlikely(len < sizeof(gso) + skb->len)) {
+			tun->dev->stats.tx_aborted_errors++;
+			err = -ENOBUFS; /* PS. You suck! */
+			break;
+		}
+
+		if (skb_is_gso(skb)) {
+			struct skb_shared_info *sinfo = skb_shinfo(skb);
+
+			/* This is a hint as to how much should be linear. */
+			gso.hdr_len = skb_headlen(skb);
+			gso.gso_size = sinfo->gso_size;
+			if (sinfo->gso_type & SKB_GSO_TCPV4)
+				gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
+			else if (sinfo->gso_type & SKB_GSO_TCPV6)
+				gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
+			else if (sinfo->gso_type & SKB_GSO_UDP)
+				gso.gso_type = VIRTIO_NET_HDR_GSO_UDP;
+			else
+				BUG();
+			if (sinfo->gso_type & SKB_GSO_TCP_ECN)
+				gso.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
+		} else
+			gso.gso_type = VIRTIO_NET_HDR_GSO_NONE;
+
+		if (skb->ip_summed == CHECKSUM_PARTIAL) {
+			gso.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+			gso.csum_start = skb->csum_start - skb_headroom(skb);
+			gso.csum_offset = skb->csum_offset;
+		} /* else everything is zero */
+
+		err = memcpy_toiovec(iov, (void *)&gso, sizeof(gso));
+		if (unlikely(err)) {
+			tun->dev->stats.tx_fifo_errors++;
+			break;
+		}
+
+		err = skb_copy_datagram_iovec(skb, 0, iov, skb->len);
+		if (unlikely(err)) {
+			tun->dev->stats.tx_fifo_errors++;
+			break;
+		}
+
+		vring_used_buffer(tun->inring, id, sizeof(gso) + skb->len);
+		num_copied++;
+	}
+
+	/* We took an skb, but ring isn't ready for it.  Put it back */
+	if (skb)
+		skb_queue_head(&tun->readq, skb);
+
+	if (num_copied)
+		netif_wake_queue(tun->dev);
+
+	return err;
+}
+
+static struct vring_ops recvops = {
+	.needs_pull = pending_recv_skbs,
+	.pull = pull_recv_skbs,
+};
+
+static int set_recv_vring(struct tun_struct *tun, int fd)
+{
+	int err;
+
+	if (tun->inring)
+		return -EBUSY;
+
+	tun->infile = fget(fd);
+	if (!tun->infile)
+		return -EBADF;
+
+	tun->inring = vring_get(tun->infile);
+	if (!tun->inring) {
+		err = -EBADF;
+		goto put;
+	}
+
+	err = vring_set_ops(tun->inring, &recvops, tun);
+	if (err) {
+		tun->inring = NULL;
+		goto put;
+	}
+	return 0;
+
+put:
+	fput(tun->infile);
+	tun->infile = NULL;
+	return err;
+}
+
+static void unset_vrings(struct tun_struct *tun)
+{
+	if (tun->inring) {
+		vring_unset_ops(tun->inring);
+		fput(tun->infile);
+	}
+}
+#else /* ... !CONFIG_VRING */
+static int set_recv_vring(struct tun_struct *tun, int fd)
+{
+	return -ENOTTY;
+}
+
+static void unset_vrings(struct tun_struct *tun)
+{
+}
+#endif
 
 /* Character device part */
 
@@ -465,6 +618,7 @@ static void tun_setup(struct net_device 
 
 	tun->owner = -1;
 	tun->group = -1;
+	tun->inring = NULL;
 
 	dev->open = tun_net_open;
 	dev->hard_start_xmit = tun_net_xmit;
@@ -674,6 +828,9 @@ static int tun_chr_ioctl(struct inode *i
 		break;
 #endif
 
+	case TUNSETRECVVRING:
+		return set_recv_vring(tun, arg);
+
 	case SIOCGIFFLAGS:
 		ifr.ifr_flags = tun->if_flags;
 		if (copy_to_user( argp, &ifr, sizeof ifr))
@@ -784,6 +941,8 @@ static int tun_chr_close(struct inode *i
 	DBG(KERN_INFO "%s: tun_chr_close\n", tun->dev->name);
 
 	tun_chr_fasync(-1, file, 0);
+
+	unset_vrings(tun);
 
 	rtnl_lock();
 
diff -r 9bafcef88e1b include/linux/if_tun.h
--- a/include/linux/if_tun.h	Fri Apr 18 05:54:45 2008 +1000
+++ b/include/linux/if_tun.h	Fri Apr 18 05:58:40 2008 +1000
@@ -42,6 +42,7 @@
 #define TUNSETOWNER   _IOW('T', 204, int)
 #define TUNSETLINK    _IOW('T', 205, int)
 #define TUNSETGROUP   _IOW('T', 206, int)
+#define TUNSETRECVVRING _IOW('T', 207, int)
 
 /* TUNSETIFF ifr flags */
 #define IFF_TUN		0x0001
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ