[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20090409163134.32740.15238.stgit@dev.haskins.net>
Date: Thu, 09 Apr 2009 12:31:34 -0400
From: Gregory Haskins <ghaskins@...ell.com>
To: linux-kernel@...r.kernel.org
Cc: agraf@...e.de, pmullaney@...ell.com, pmorreale@...ell.com,
anthony@...emonkey.ws, rusty@...tcorp.com.au,
netdev@...r.kernel.org, kvm@...r.kernel.org, avi@...hat.com,
bhutchings@...arflare.com, andi@...stfloor.org, gregkh@...e.de,
herber@...dor.apana.org.au, chrisw@...s-sol.org,
shemminger@...tta.com
Subject: [RFC PATCH v2 10/19] venet-tap: Adds a "venet" compatible "tap"
device to VBUS
This module is similar in concept to a "tuntap". A tuntap module provides
a netif() interface on one side, and a char-dev interface on the other.
Packets that ingress on one interface, egress on the other (and vice versa).
This module offers a similar concept, except that it substitues the
char-dev for a VBUS/IOQ interface. This allows a VBUS compatible entity
(e.g. userspace or a guest) to directly inject and receive packets
from the host/kernel stack.
Thanks to Pat Mullaney for contributing the maxcount modification
Signed-off-by: Gregory Haskins <ghaskins@...ell.com>
---
drivers/Makefile | 1
drivers/vbus/devices/Kconfig | 17
drivers/vbus/devices/Makefile | 1
drivers/vbus/devices/venet-tap.c | 1388 ++++++++++++++++++++++++++++++++++++++
kernel/vbus/Kconfig | 13
5 files changed, 1420 insertions(+), 0 deletions(-)
create mode 100644 drivers/vbus/devices/Kconfig
create mode 100644 drivers/vbus/devices/Makefile
create mode 100644 drivers/vbus/devices/venet-tap.c
diff --git a/drivers/Makefile b/drivers/Makefile
index c1bf417..98fab51 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -106,3 +106,4 @@ obj-$(CONFIG_SSB) += ssb/
obj-$(CONFIG_VIRTIO) += virtio/
obj-$(CONFIG_STAGING) += staging/
obj-y += platform/
+obj-$(CONFIG_VBUS_DEVICES) += vbus/devices/
diff --git a/drivers/vbus/devices/Kconfig b/drivers/vbus/devices/Kconfig
new file mode 100644
index 0000000..64e4731
--- /dev/null
+++ b/drivers/vbus/devices/Kconfig
@@ -0,0 +1,17 @@
+#
+# Virtual-Bus (VBus) configuration
+#
+
+config VBUS_VENETTAP
+ tristate "Virtual-Bus Ethernet Tap Device"
+ depends on VBUS_DEVICES
+ default n
+ help
+ Provides a virtual ethernet adapter to a vbus, which in turn
+ manifests itself as a standard netif based adapter to the
+ kernel. It can be used similarly to a "tuntap" device,
+ except that the char-dev transport is replaced with a vbus/ioq
+ interface.
+
+ If unsure, say N
+
diff --git a/drivers/vbus/devices/Makefile b/drivers/vbus/devices/Makefile
new file mode 100644
index 0000000..2ea7d2a
--- /dev/null
+++ b/drivers/vbus/devices/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_VBUS_VENETTAP) += venet-tap.o
diff --git a/drivers/vbus/devices/venet-tap.c b/drivers/vbus/devices/venet-tap.c
new file mode 100644
index 0000000..148e2c8
--- /dev/null
+++ b/drivers/vbus/devices/venet-tap.c
@@ -0,0 +1,1388 @@
+/*
+ * venettap - A 802.x virtual network device based on the VBUS/IOQ interface
+ *
+ * Copyright (C) 2009 Novell, Gregory Haskins <ghaskins@...ell.com>
+ *
+ * Derived from the SNULL example from the book "Linux Device Drivers" by
+ * Alessandro Rubini, Jonathan Corbet, and Greg Kroah-Hartman, published
+ * by O'Reilly & Associates.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/moduleparam.h>
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/wait.h>
+
+#include <linux/in.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/skbuff.h>
+#include <linux/ioq.h>
+#include <linux/vbus.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+
+#include <linux/venet.h>
+
+#include <linux/in6.h>
+#include <asm/checksum.h>
+
+MODULE_AUTHOR("Gregory Haskins");
+MODULE_LICENSE("GPL");
+
+#undef PDEBUG /* undef it, just in case */
+#ifdef VENETTAP_DEBUG
+# define PDEBUG(fmt, args...) printk(KERN_DEBUG "venet-tap: " fmt, ## args)
+#else
+# define PDEBUG(fmt, args...) /* not debugging: nothing */
+#endif
+
+static int maxcount = 2048;
+module_param(maxcount, int, 0600);
+MODULE_PARM_DESC(maxcount, "maximum size for rx/tx ioq ring");
+
+static void venettap_tx_isr(struct ioq_notifier *notifier);
+static int venettap_rx_thread(void *__priv);
+static int venettap_tx_thread(void *__priv);
+
+struct venettap_queue {
+ struct ioq *queue;
+ struct ioq_notifier notifier;
+};
+
+struct venettap;
+
+enum {
+ RX_SCHED,
+ TX_SCHED,
+ TX_NETIF_CONGESTED,
+ TX_IOQ_CONGESTED,
+};
+
+struct venettap {
+ spinlock_t lock;
+ unsigned char hmac[ETH_ALEN]; /* host-mac */
+ unsigned char cmac[ETH_ALEN]; /* client-mac */
+ struct task_struct *rxthread;
+ struct task_struct *txthread;
+ unsigned long flags;
+
+ struct {
+ struct net_device *dev;
+ struct net_device_stats stats;
+ struct {
+ struct sk_buff_head list;
+ size_t len;
+ int irqdepth;
+ } txq;
+ int enabled:1;
+ int link:1;
+ } netif;
+
+ struct {
+ struct vbus_device dev;
+ struct vbus_device_interface intf;
+ struct vbus_connection conn;
+ struct vbus_memctx *ctx;
+ struct venettap_queue rxq;
+ struct venettap_queue txq;
+ wait_queue_head_t rx_empty;
+ int connected:1;
+ int opened:1;
+ int link:1;
+ } vbus;
+};
+
+static int
+venettap_queue_init(struct venettap_queue *q,
+ struct vbus_shm *shm,
+ struct shm_signal *signal,
+ void (*func)(struct ioq_notifier *))
+{
+ struct ioq *ioq;
+ int ret;
+
+ if (q->queue)
+ return -EEXIST;
+
+ /* FIXME: make maxcount a tunable */
+ ret = vbus_shm_ioq_attach(shm, signal, maxcount, &ioq);
+ if (ret < 0)
+ return ret;
+
+ q->queue = ioq;
+ ioq_get(ioq);
+
+ if (func) {
+ q->notifier.signal = func;
+ q->queue->notifier = &q->notifier;
+ }
+
+ return 0;
+}
+
+static void
+venettap_queue_release(struct venettap_queue *q)
+{
+ if (!q->queue)
+ return;
+
+ ioq_put(q->queue);
+ q->queue = NULL;
+}
+
+/* Assumes priv->lock is held */
+static void
+venettap_txq_notify_inc(struct venettap *priv)
+{
+ priv->netif.txq.irqdepth++;
+ if (priv->netif.txq.irqdepth == 1 && priv->vbus.link)
+ ioq_notify_enable(priv->vbus.txq.queue, 0);
+}
+
+/* Assumes priv->lock is held */
+static void
+venettap_txq_notify_dec(struct venettap *priv)
+{
+ BUG_ON(!priv->netif.txq.irqdepth);
+ priv->netif.txq.irqdepth--;
+ if (!priv->netif.txq.irqdepth && priv->vbus.link)
+ ioq_notify_disable(priv->vbus.txq.queue, 0);
+}
+
+/*
+ *----------------------------------------------------------------------
+ * netif link
+ *----------------------------------------------------------------------
+ */
+
+static struct venettap *conn_to_priv(struct vbus_connection *conn)
+{
+ return container_of(conn, struct venettap, vbus.conn);
+}
+
+static struct venettap *intf_to_priv(struct vbus_device_interface *intf)
+{
+ return container_of(intf, struct venettap, vbus.intf);
+}
+
+static struct venettap *vdev_to_priv(struct vbus_device *vdev)
+{
+ return container_of(vdev, struct venettap, vbus.dev);
+}
+
+static int
+venettap_netdev_open(struct net_device *dev)
+{
+ struct venettap *priv = netdev_priv(dev);
+ unsigned long flags;
+
+ BUG_ON(priv->netif.link);
+
+ /*
+ * We need rx-polling to be done in process context, and we want
+ * ingress processing to occur independent of the producer thread
+ * to maximize multi-core distribution. Since the built in NAPI uses a
+ * softirq, we cannot guarantee this wont call us back in interrupt
+ * context, so we cant use it. And both a work-queue or softirq
+ * solution would tend to process requests on the same CPU as the
+ * producer. Therefore, we create a special thread to handle ingress.
+ *
+ * The downside to this type of approach is that we may still need to
+ * ctx-switch to the NAPI polling thread (presumably running on the same
+ * core as the rx-thread) by virtue of the netif_rx() backlog mechanism.
+ * However, this can be mitigated by the use of netif_rx_ni().
+ */
+ priv->rxthread = kthread_create(venettap_rx_thread, priv,
+ "%s-rx", priv->netif.dev->name);
+
+ priv->txthread = kthread_create(venettap_tx_thread, priv,
+ "%s-tx", priv->netif.dev->name);
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ priv->netif.link = true;
+
+ if (!priv->vbus.link)
+ netif_carrier_off(dev);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ return 0;
+}
+
+static int
+venettap_netdev_stop(struct net_device *dev)
+{
+ struct venettap *priv = netdev_priv(dev);
+ unsigned long flags;
+ int needs_stop = false;
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ if (priv->netif.link) {
+ needs_stop = true;
+ priv->netif.link = false;
+ }
+
+ /* FIXME: free priv->netif.txq */
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ if (needs_stop) {
+ kthread_stop(priv->rxthread);
+ priv->rxthread = NULL;
+
+ kthread_stop(priv->txthread);
+ priv->txthread = NULL;
+ }
+
+ return 0;
+}
+
+/*
+ * Configuration changes (passed on by ifconfig)
+ */
+static int
+venettap_netdev_config(struct net_device *dev, struct ifmap *map)
+{
+ if (dev->flags & IFF_UP) /* can't act on a running interface */
+ return -EBUSY;
+
+ /* Don't allow changing the I/O address */
+ if (map->base_addr != dev->base_addr) {
+ printk(KERN_WARNING "venettap: Can't change I/O address\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* ignore other fields */
+ return 0;
+}
+
+static int
+venettap_change_mtu(struct net_device *dev, int new_mtu)
+{
+ dev->mtu = new_mtu;
+
+ return 0;
+}
+
+/*
+ * The poll implementation.
+ */
+static int
+venettap_rx(struct venettap *priv)
+{
+ struct ioq *ioq;
+ struct vbus_memctx *ctx;
+ int npackets = 0;
+ int dirty = 0;
+ struct ioq_iterator iter;
+ int ret;
+ unsigned long flags;
+ struct vbus_connection *conn;
+
+ PDEBUG("polling...\n");
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ if (!priv->vbus.link) {
+ spin_unlock_irqrestore(&priv->lock, flags);
+ return 0;
+ }
+
+ /*
+ * We take a reference to the connection object to ensure that the
+ * ioq/ctx references do not disappear out from under us. We could
+ * acommplish the same thing more directly by acquiring a reference
+ * to the ioq and ctx explictly, but this would require an extra
+ * atomic_inc+dec pair, for no additional benefit
+ */
+ conn = &priv->vbus.conn;
+ vbus_connection_get(conn);
+
+ ioq = priv->vbus.rxq.queue;
+ ctx = priv->vbus.ctx;
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ /* We want to iterate on the head of the in-use index */
+ ret = ioq_iter_init(ioq, &iter, ioq_idxtype_inuse, 0);
+ BUG_ON(ret < 0);
+
+ ret = ioq_iter_seek(&iter, ioq_seek_head, 0, 0);
+ BUG_ON(ret < 0);
+
+ /*
+ * The EOM is indicated by finding a packet that is still owned by
+ * the north side
+ */
+ while (iter.desc->sown) {
+ size_t len = iter.desc->len;
+ size_t maxlen = priv->netif.dev->mtu + ETH_HLEN;
+ struct sk_buff *skb = NULL;
+
+ if (unlikely(len > maxlen)) {
+ priv->netif.stats.rx_errors++;
+ priv->netif.stats.rx_length_errors++;
+ goto next;
+ }
+
+ skb = dev_alloc_skb(len+2);
+ if (unlikely(!skb)) {
+ printk(KERN_INFO "VENETTAP: skb alloc failed:" \
+ " memory squeeze.\n");
+ priv->netif.stats.rx_errors++;
+ priv->netif.stats.rx_dropped++;
+ goto next;
+ }
+
+ /* align IP on 16B boundary */
+ skb_reserve(skb, 2);
+
+ ret = ctx->ops->copy_from(ctx, skb->data,
+ (void *)iter.desc->ptr,
+ len);
+ if (unlikely(ret)) {
+ priv->netif.stats.rx_errors++;
+ goto next;
+ }
+
+ /* Maintain stats */
+ npackets++;
+ priv->netif.stats.rx_packets++;
+ priv->netif.stats.rx_bytes += len;
+
+ /* Pass the buffer up to the stack */
+ skb->dev = priv->netif.dev;
+ skb->protocol = eth_type_trans(skb, priv->netif.dev);
+
+ netif_rx_ni(skb);
+next:
+ dirty = 1;
+
+ /* Advance the in-use head */
+ ret = ioq_iter_pop(&iter, 0);
+ BUG_ON(ret < 0);
+
+ /* send up to N packets before sending tx-complete */
+ if (!(npackets % 10)) {
+ ioq_signal(ioq, 0);
+ dirty = 0;
+ }
+
+ }
+
+ PDEBUG("poll: %d packets received\n", npackets);
+
+ if (dirty)
+ ioq_signal(ioq, 0);
+
+ /*
+ * If we processed all packets we're done, so reenable ints
+ */
+ if (ioq_empty(ioq, ioq_idxtype_inuse)) {
+ clear_bit(RX_SCHED, &priv->flags);
+ ioq_notify_enable(ioq, 0);
+ wake_up(&priv->vbus.rx_empty);
+ }
+
+ vbus_connection_put(conn);
+
+ return 0;
+}
+
+static int venettap_rx_thread(void *__priv)
+{
+ struct venettap *priv = __priv;
+
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (!freezing(current) &&
+ !kthread_should_stop() &&
+ !test_bit(RX_SCHED, &priv->flags))
+ schedule();
+ set_current_state(TASK_RUNNING);
+
+ try_to_freeze();
+
+ if (kthread_should_stop())
+ break;
+
+ venettap_rx(priv);
+ }
+
+ return 0;
+}
+
+/* assumes priv->lock is held */
+static void
+venettap_check_netif_congestion(struct venettap *priv)
+{
+ struct ioq *ioq = priv->vbus.txq.queue;
+
+ if (priv->vbus.link
+ && priv->netif.txq.len < ioq_remain(ioq, ioq_idxtype_inuse)
+ && test_and_clear_bit(TX_NETIF_CONGESTED, &priv->flags)) {
+ PDEBUG("NETIF congestion cleared\n");
+ venettap_txq_notify_dec(priv);
+
+ if (priv->netif.link)
+ netif_wake_queue(priv->netif.dev);
+ }
+}
+
+static int
+venettap_tx(struct venettap *priv)
+{
+ struct sk_buff *skb;
+ struct ioq_iterator iter;
+ struct ioq *ioq = NULL;
+ struct vbus_memctx *ctx;
+ int ret;
+ int npackets = 0;
+ unsigned long flags;
+ struct vbus_connection *conn;
+
+ PDEBUG("tx-thread\n");
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ if (unlikely(!priv->vbus.link)) {
+ spin_unlock_irqrestore(&priv->lock, flags);
+ return 0;
+ }
+
+ /*
+ * We take a reference to the connection object to ensure that the
+ * ioq/ctx references do not disappear out from under us. We could
+ * acommplish the same thing more directly by acquiring a reference
+ * to the ioq and ctx explictly, but this would require an extra
+ * atomic_inc+dec pair, for no additional benefit
+ */
+ conn = &priv->vbus.conn;
+ vbus_connection_get(conn);
+
+ ioq = priv->vbus.txq.queue;
+ ctx = priv->vbus.ctx;
+
+ ret = ioq_iter_init(ioq, &iter, ioq_idxtype_inuse, IOQ_ITER_AUTOUPDATE);
+ BUG_ON(ret < 0);
+
+ ret = ioq_iter_seek(&iter, ioq_seek_tail, 0, 0);
+ BUG_ON(ret < 0);
+
+ while (priv->vbus.link && iter.desc->sown && priv->netif.txq.len) {
+
+ skb = __skb_dequeue(&priv->netif.txq.list);
+ if (!skb)
+ break;
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ PDEBUG("tx-thread: sending %d bytes\n", skb->len);
+
+ if (skb->len <= iter.desc->len) {
+ ret = ctx->ops->copy_to(ctx, (void *)iter.desc->ptr,
+ skb->data, skb->len);
+ BUG_ON(ret);
+
+ iter.desc->len = skb->len;
+
+ npackets++;
+ priv->netif.stats.tx_packets++;
+ priv->netif.stats.tx_bytes += skb->len;
+
+ ret = ioq_iter_push(&iter, 0);
+ BUG_ON(ret < 0);
+ } else {
+ printk(KERN_WARNING \
+ "VENETTAP: discarding packet: buf too small " \
+ "(%d > %lld)\n", skb->len, iter.desc->len);
+ priv->netif.stats.tx_errors++;
+ }
+
+ dev_kfree_skb(skb);
+ priv->netif.dev->trans_start = jiffies; /* save the timestamp */
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ priv->netif.txq.len--;
+ }
+
+ PDEBUG("send complete\n");
+
+ if (!priv->vbus.link || !priv->netif.txq.len) {
+ PDEBUG("descheduling TX: link=%d, len=%d\n",
+ priv->vbus.link, priv->netif.txq.len);
+ clear_bit(TX_SCHED, &priv->flags);
+ } else if (!test_and_set_bit(TX_IOQ_CONGESTED, &priv->flags)) {
+ PDEBUG("congested with %d packets still queued\n",
+ priv->netif.txq.len);
+ venettap_txq_notify_inc(priv);
+ }
+
+ venettap_check_netif_congestion(priv);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ vbus_connection_put(conn);
+
+ return npackets;
+}
+
+static int venettap_tx_thread(void *__priv)
+{
+ struct venettap *priv = __priv;
+
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (!freezing(current) &&
+ !kthread_should_stop() &&
+ (test_bit(TX_IOQ_CONGESTED, &priv->flags) ||
+ !test_bit(TX_SCHED, &priv->flags)))
+ schedule();
+ set_current_state(TASK_RUNNING);
+
+ PDEBUG("tx wakeup: %s%s%s\n",
+ test_bit(TX_SCHED, &priv->flags) ? "s" : "-",
+ test_bit(TX_IOQ_CONGESTED, &priv->flags) ? "c" : "-",
+ test_bit(TX_NETIF_CONGESTED, &priv->flags) ? "b" : "-"
+ );
+
+ try_to_freeze();
+
+ if (kthread_should_stop())
+ break;
+
+ venettap_tx(priv);
+ }
+
+ return 0;
+}
+
+static void
+venettap_deferred_tx(struct venettap *priv)
+{
+ PDEBUG("wake up txthread\n");
+ wake_up_process(priv->txthread);
+}
+
+/* assumes priv->lock is held */
+static void
+venettap_apply_backpressure(struct venettap *priv)
+{
+ PDEBUG("backpressure\n");
+
+ if (!test_and_set_bit(TX_NETIF_CONGESTED, &priv->flags)) {
+ /*
+ * We must flow-control the kernel by disabling the queue
+ */
+ netif_stop_queue(priv->netif.dev);
+ venettap_txq_notify_inc(priv);
+ }
+}
+
+/*
+ * Transmit a packet (called by the kernel)
+ *
+ * We want to perform ctx->copy_to() operations from a sleepable process
+ * context, so we defer the actual tx operations to a thread.
+ * However, we want to be careful that we do not double-buffer the
+ * queue, so we create a buffer whose space dynamically grows and
+ * shrinks with the availability of the actual IOQ. This means that
+ * the netif flow control is still managed by the actual consumer,
+ * thereby avoiding the creation of an extra servo-loop to the equation.
+ */
+static int
+venettap_netdev_tx(struct sk_buff *skb, struct net_device *dev)
+{
+ struct venettap *priv = netdev_priv(dev);
+ struct ioq *ioq = NULL;
+ unsigned long flags;
+
+ PDEBUG("queuing %d bytes\n", skb->len);
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ ioq = priv->vbus.txq.queue;
+
+ BUG_ON(test_bit(TX_NETIF_CONGESTED, &priv->flags));
+
+ if (!priv->vbus.link) {
+ /*
+ * We have a link-down condition
+ */
+ printk(KERN_ERR "VENETTAP: tx on link down\n");
+ goto flowcontrol;
+ }
+
+ __skb_queue_tail(&priv->netif.txq.list, skb);
+ priv->netif.txq.len++;
+ set_bit(TX_SCHED, &priv->flags);
+
+ if (priv->netif.txq.len >= ioq_remain(ioq, ioq_idxtype_inuse))
+ venettap_apply_backpressure(priv);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ venettap_deferred_tx(priv);
+
+ return NETDEV_TX_OK;
+
+flowcontrol:
+ venettap_apply_backpressure(priv);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ return NETDEV_TX_BUSY;
+}
+
+/*
+ * Ioctl commands
+ */
+static int
+venettap_netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+ PDEBUG("ioctl\n");
+ return 0;
+}
+
+/*
+ * Return statistics to the caller
+ */
+struct net_device_stats *
+venettap_netdev_stats(struct net_device *dev)
+{
+ struct venettap *priv = netdev_priv(dev);
+ return &priv->netif.stats;
+}
+
+static void
+venettap_netdev_unregister(struct venettap *priv)
+{
+ if (priv->netif.enabled) {
+ venettap_netdev_stop(priv->netif.dev);
+ unregister_netdev(priv->netif.dev);
+ }
+}
+
+/*
+ * Assumes priv->lock held
+ */
+static void
+venettap_rx_schedule(struct venettap *priv)
+{
+ if (!priv->vbus.link)
+ return;
+
+ if (priv->netif.link
+ && !ioq_empty(priv->vbus.rxq.queue, ioq_idxtype_inuse)) {
+ ioq_notify_disable(priv->vbus.rxq.queue, 0);
+
+ if (!test_and_set_bit(RX_SCHED, &priv->flags))
+ wake_up_process(priv->rxthread);
+ }
+}
+
+/*
+ * receive interrupt-service-routine - called whenever the vbus-driver signals
+ * our IOQ to indicate more inbound packets are ready.
+ */
+static void
+venettap_rx_isr(struct ioq_notifier *notifier)
+{
+ struct venettap *priv;
+ unsigned long flags;
+
+ priv = container_of(notifier, struct venettap, vbus.rxq.notifier);
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ /* Disable future interrupts and schedule our napi-poll */
+ venettap_rx_schedule(priv);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+/*
+ * transmit interrupt-service-routine - called whenever the vbus-driver signals
+ * our IOQ to indicate there is more room in the TX queue
+ */
+static void
+venettap_tx_isr(struct ioq_notifier *notifier)
+{
+ struct venettap *priv;
+ unsigned long flags;
+
+ priv = container_of(notifier, struct venettap, vbus.txq.notifier);
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ if (priv->vbus.link
+ && !ioq_full(priv->vbus.txq.queue, ioq_idxtype_inuse)
+ && test_and_clear_bit(TX_IOQ_CONGESTED, &priv->flags)) {
+ PDEBUG("IOQ congestion cleared\n");
+ venettap_txq_notify_dec(priv);
+
+ if (priv->netif.link)
+ wake_up_process(priv->txthread);
+ }
+
+ venettap_check_netif_congestion(priv);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static int
+venettap_vlink_up(struct venettap *priv)
+{
+ int ret = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ if (priv->vbus.link) {
+ ret = -EEXIST;
+ goto out;
+ }
+
+ if (!priv->vbus.rxq.queue || !priv->vbus.txq.queue) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ priv->vbus.link = 1;
+
+ if (priv->netif.link)
+ netif_carrier_on(priv->netif.dev);
+
+ venettap_check_netif_congestion(priv);
+
+ ioq_notify_enable(priv->vbus.rxq.queue, 0);
+
+out:
+ spin_unlock_irqrestore(&priv->lock, flags);
+ return ret;
+}
+
+/* Assumes priv->lock held */
+static int
+_venettap_vlink_down(struct venettap *priv)
+{
+ struct sk_buff *skb;
+
+ if (!priv->vbus.link)
+ return -ENOENT;
+
+ priv->vbus.link = 0;
+
+ if (priv->netif.link)
+ netif_carrier_off(priv->netif.dev);
+
+ /* just trash whatever might have been pending */
+ while ((skb = __skb_dequeue(&priv->netif.txq.list)))
+ dev_kfree_skb(skb);
+
+ priv->netif.txq.len = 0;
+
+ /* And deschedule any pending processing */
+ clear_bit(RX_SCHED, &priv->flags);
+ clear_bit(TX_SCHED, &priv->flags);
+
+ ioq_notify_disable(priv->vbus.rxq.queue, 0);
+
+ return 0;
+}
+
+static int
+venettap_vlink_down(struct venettap *priv)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&priv->lock, flags);
+ ret = _venettap_vlink_down(priv);
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ return ret;
+}
+
+static int
+venettap_macquery(struct venettap *priv, void *data, unsigned long len)
+{
+ struct vbus_memctx *ctx = priv->vbus.ctx;
+ int ret;
+
+ if (len != ETH_ALEN)
+ return -EINVAL;
+
+ ret = ctx->ops->copy_to(ctx, data, priv->cmac, ETH_ALEN);
+ if (ret)
+ return -EFAULT;
+
+ return 0;
+}
+
+/*
+ * Negotiate Capabilities - This function is provided so that the
+ * interface may be extended without breaking ABI compatability
+ *
+ * The caller is expected to send down any capabilities they would like
+ * to enable, and the device will OR them with capabilities that it
+ * supports. This value is then returned so that both sides may
+ * ascertain the lowest-common-denominator of features to enable
+ */
+static int
+venettap_negcap(struct venettap *priv, void *data, unsigned long len)
+{
+ struct vbus_memctx *ctx = priv->vbus.ctx;
+ struct venet_capabilities caps;
+ int ret;
+
+ if (len != sizeof(caps))
+ return -EINVAL;
+
+ if (priv->vbus.link)
+ return -EINVAL;
+
+ ret = ctx->ops->copy_from(ctx, &caps, data, sizeof(caps));
+ if (ret)
+ return -EFAULT;
+
+ switch (caps.gid) {
+ default:
+ caps.bits = 0;
+ break;
+ }
+
+ ret = ctx->ops->copy_to(ctx, data, &caps, sizeof(caps));
+ if (ret)
+ return -EFAULT;
+
+ return 0;
+}
+
+/*
+ * Walk through and flush each remaining descriptor by returning
+ * a zero length packet.
+ *
+ * This is useful, for instance, when the driver is changing the MTU
+ * and wants to reclaim all the existing buffers outstanding which
+ * are a different size than the new MTU
+ */
+static int
+venettap_flushrx(struct venettap *priv)
+{
+ struct ioq_iterator iter;
+ struct ioq *ioq = NULL;
+ int ret;
+ unsigned long flags;
+
+ PDEBUG("flushrx\n");
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ if (unlikely(!priv->vbus.link)) {
+ spin_unlock_irqrestore(&priv->lock, flags);
+ return -EINVAL;
+ }
+
+ ioq = priv->vbus.txq.queue;
+
+ ret = ioq_iter_init(ioq, &iter, ioq_idxtype_inuse, 0);
+ BUG_ON(ret < 0);
+
+ ret = ioq_iter_seek(&iter, ioq_seek_tail, 0, 0);
+ BUG_ON(ret < 0);
+
+ while (iter.desc->sown) {
+ iter.desc->len = 0;
+ ret = ioq_iter_push(&iter, 0);
+ if (ret < 0)
+ SHM_SIGNAL_FAULT(ioq->signal, "could not flushrx");
+ }
+
+ PDEBUG("flushrx complete\n");
+
+ if (!test_and_set_bit(TX_IOQ_CONGESTED, &priv->flags)) {
+ PDEBUG("congested with %d packets still queued\n",
+ priv->netif.txq.len);
+ venettap_txq_notify_inc(priv);
+ }
+
+ /*
+ * we purposely do not ioq_signal() the other side here. Since
+ * this function was invoked by the client, they can take care
+ * of explcitly calling any reclaim code if they like. This also
+ * avoids a potential deadlock in case turning around and injecting
+ * a signal while we are in a call() is problematic to the
+ * connector design
+ */
+
+ venettap_check_netif_congestion(priv);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ return 0;
+}
+
+/*
+ * This is called whenever a driver wants to perform a synchronous
+ * "function call" to our device. It is similar to the notion of
+ * an ioctl(). The parameters are part of the ABI between the device
+ * and driver.
+ */
+static int
+venettap_vlink_call(struct vbus_connection *conn,
+ unsigned long func,
+ void *data,
+ unsigned long len,
+ unsigned long flags)
+{
+ struct venettap *priv = conn_to_priv(conn);
+
+ PDEBUG("call -> %d with %p/%d\n", func, data, len);
+
+ switch (func) {
+ case VENET_FUNC_LINKUP:
+ return venettap_vlink_up(priv);
+ case VENET_FUNC_LINKDOWN:
+ return venettap_vlink_down(priv);
+ case VENET_FUNC_MACQUERY:
+ return venettap_macquery(priv, data, len);
+ case VENET_FUNC_NEGCAP:
+ return venettap_negcap(priv, data, len);
+ case VENET_FUNC_FLUSHRX:
+ return venettap_flushrx(priv);
+ default:
+ return -EINVAL;
+ }
+}
+
+/*
+ * This is called whenever a driver wants to open a new IOQ between itself
+ * and our device. The "id" field is meant to convey meaning to the device
+ * as to what the intended use of this IOQ is. For instance, for venet "id=0"
+ * means "rx" and "id=1" = "tx". That namespace is managed by the device
+ * and should be understood by the driver as part of its ABI agreement.
+ *
+ * The device should take a reference to the IOQ via ioq_get() and hold it
+ * until the connection is released.
+ */
+static int
+venettap_vlink_shm(struct vbus_connection *conn,
+ unsigned long id,
+ struct vbus_shm *shm,
+ struct shm_signal *signal,
+ unsigned long flags)
+{
+ struct venettap *priv = conn_to_priv(conn);
+
+ PDEBUG("queue -> %p/%d attached\n", ioq, id);
+
+ switch (id) {
+ case VENET_QUEUE_RX:
+ return venettap_queue_init(&priv->vbus.txq, shm, signal,
+ venettap_tx_isr);
+ case VENET_QUEUE_TX:
+ return venettap_queue_init(&priv->vbus.rxq, shm, signal,
+ venettap_rx_isr);
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void
+venettap_vlink_close(struct vbus_connection *conn)
+{
+ struct venettap *priv = conn_to_priv(conn);
+ DEFINE_WAIT(wait);
+ unsigned long flags;
+
+ PDEBUG("connection closed\n");
+
+ /* Block until all posted packets from the client have been processed */
+ prepare_to_wait(&priv->vbus.rx_empty, &wait, TASK_UNINTERRUPTIBLE);
+
+ while (test_bit(RX_SCHED, &priv->flags))
+ schedule();
+
+ finish_wait(&priv->vbus.rx_empty, &wait);
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ priv->vbus.opened = false;
+ _venettap_vlink_down(priv);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+/*
+ * This is called whenever the driver closes all references to our device
+ */
+static void
+venettap_vlink_release(struct vbus_connection *conn)
+{
+ struct venettap *priv = conn_to_priv(conn);
+
+ PDEBUG("connection released\n");
+
+ venettap_queue_release(&priv->vbus.rxq);
+ venettap_queue_release(&priv->vbus.txq);
+ vbus_memctx_put(priv->vbus.ctx);
+
+ kobject_put(priv->vbus.dev.kobj);
+}
+
+static struct vbus_connection_ops venettap_vbus_link_ops = {
+ .call = venettap_vlink_call,
+ .shm = venettap_vlink_shm,
+ .close = venettap_vlink_close,
+ .release = venettap_vlink_release,
+};
+
+/*
+ * This is called whenever a driver wants to open our device_interface
+ * for communication. The connection is represented by a
+ * vbus_connection object. It is up to the implementation to decide
+ * if it allows more than one connection at a time. This simple example
+ * does not.
+ */
+static int
+venettap_intf_open(struct vbus_device_interface *intf,
+ struct vbus_memctx *ctx,
+ int version,
+ struct vbus_connection **conn)
+{
+ struct venettap *priv = intf_to_priv(intf);
+ unsigned long flags;
+
+ PDEBUG("open\n");
+
+ if (version != VENET_VERSION)
+ return -EINVAL;
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ /*
+ * We only allow one connection to this device
+ */
+ if (priv->vbus.opened) {
+ spin_unlock_irqrestore(&priv->lock, flags);
+ return -EBUSY;
+ }
+
+ kobject_get(intf->dev->kobj);
+
+ vbus_connection_init(&priv->vbus.conn, &venettap_vbus_link_ops);
+
+ priv->vbus.opened = true;
+ priv->vbus.ctx = ctx;
+
+ vbus_memctx_get(ctx);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ *conn = &priv->vbus.conn;
+
+ return 0;
+}
+
+static void
+venettap_intf_release(struct vbus_device_interface *intf)
+{
+ kobject_put(intf->dev->kobj);
+}
+
+static struct vbus_device_interface_ops venettap_device_interface_ops = {
+ .open = venettap_intf_open,
+ .release = venettap_intf_release,
+};
+
+/*
+ * This is called whenever the admin creates a symbolic link between
+ * a bus in /config/vbus/buses and our device. It represents a bus
+ * connection. Your device can chose to allow more than one bus to
+ * connect, or it can restrict it to one bus. It can also choose to
+ * register one or more device_interfaces on each bus that it
+ * successfully connects to.
+ *
+ * This example device only registers a single interface
+ */
+static int
+venettap_device_bus_connect(struct vbus_device *dev, struct vbus *vbus)
+{
+ struct venettap *priv = vdev_to_priv(dev);
+ struct vbus_device_interface *intf = &priv->vbus.intf;
+
+ /* We only allow one bus to connect */
+ if (priv->vbus.connected)
+ return -EBUSY;
+
+ kobject_get(dev->kobj);
+
+ intf->name = "0";
+ intf->type = VENET_TYPE;
+ intf->ops = &venettap_device_interface_ops;
+
+ priv->vbus.connected = true;
+
+ /*
+ * Our example only registers one interface. If you need
+ * more, simply call interface_register() multiple times
+ */
+ return vbus_device_interface_register(dev, vbus, intf);
+}
+
+/*
+ * This is called whenever the admin removes the symbolic link between
+ * a bus in /config/vbus/buses and our device.
+ */
+static int
+venettap_device_bus_disconnect(struct vbus_device *dev, struct vbus *vbus)
+{
+ struct venettap *priv = vdev_to_priv(dev);
+ struct vbus_device_interface *intf = &priv->vbus.intf;
+
+ if (!priv->vbus.connected)
+ return -EINVAL;
+
+ vbus_device_interface_unregister(intf);
+
+ priv->vbus.connected = false;
+ kobject_put(dev->kobj);
+
+ return 0;
+}
+
+static void
+venettap_device_release(struct vbus_device *dev)
+{
+ struct venettap *priv = vdev_to_priv(dev);
+
+ venettap_netdev_unregister(priv);
+ free_netdev(priv->netif.dev);
+ module_put(THIS_MODULE);
+}
+
+
+static struct vbus_device_ops venettap_device_ops = {
+ .bus_connect = venettap_device_bus_connect,
+ .bus_disconnect = venettap_device_bus_disconnect,
+ .release = venettap_device_release,
+};
+
+#define VENETTAP_TYPE "venet-tap"
+
+/*
+ * Interface attributes show up as files under
+ * /sys/vbus/devices/$devid
+ */
+static ssize_t
+host_mac_show(struct vbus_device *dev, struct vbus_device_attribute *attr,
+ char *buf)
+{
+ struct venettap *priv = vdev_to_priv(dev);
+
+ return sysfs_format_mac(buf, priv->hmac, ETH_ALEN);
+}
+
+static struct vbus_device_attribute attr_hmac =
+ __ATTR_RO(host_mac);
+
+static ssize_t
+client_mac_show(struct vbus_device *dev, struct vbus_device_attribute *attr,
+ char *buf)
+{
+ struct venettap *priv = vdev_to_priv(dev);
+
+ return sysfs_format_mac(buf, priv->cmac, ETH_ALEN);
+}
+
+static struct vbus_device_attribute attr_cmac =
+ __ATTR_RO(client_mac);
+
+static ssize_t
+enabled_show(struct vbus_device *dev, struct vbus_device_attribute *attr,
+ char *buf)
+{
+ struct venettap *priv = vdev_to_priv(dev);
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", priv->netif.enabled);
+}
+
+static ssize_t
+enabled_store(struct vbus_device *dev, struct vbus_device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct venettap *priv = vdev_to_priv(dev);
+ int enabled = -1;
+ int ret = 0;
+
+ if (count > 0)
+ sscanf(buf, "%d", &enabled);
+
+ if (enabled != 0 && enabled != 1)
+ return -EINVAL;
+
+ if (enabled && !priv->netif.enabled)
+ ret = register_netdev(priv->netif.dev);
+
+ if (!enabled && priv->netif.enabled)
+ venettap_netdev_unregister(priv);
+
+ if (ret < 0)
+ return ret;
+
+ priv->netif.enabled = enabled;
+
+ return count;
+}
+
+static struct vbus_device_attribute attr_enabled =
+ __ATTR(enabled, S_IRUGO | S_IWUSR, enabled_show, enabled_store);
+
+static ssize_t
+ifname_show(struct vbus_device *dev, struct vbus_device_attribute *attr,
+ char *buf)
+{
+ struct venettap *priv = vdev_to_priv(dev);
+
+ if (!priv->netif.enabled)
+ return sprintf(buf, "<disabled>\n");
+
+ return snprintf(buf, PAGE_SIZE, "%s\n", priv->netif.dev->name);
+}
+
+static struct vbus_device_attribute attr_ifname =
+ __ATTR_RO(ifname);
+
+static struct attribute *attrs[] = {
+ &attr_hmac.attr,
+ &attr_cmac.attr,
+ &attr_enabled.attr,
+ &attr_ifname.attr,
+ NULL,
+};
+
+static struct attribute_group venettap_attr_group = {
+ .attrs = attrs,
+};
+
+static struct net_device_ops venettap_netdev_ops = {
+ .ndo_open = venettap_netdev_open,
+ .ndo_stop = venettap_netdev_stop,
+ .ndo_set_config = venettap_netdev_config,
+ .ndo_change_mtu = venettap_change_mtu,
+ .ndo_start_xmit = venettap_netdev_tx,
+ .ndo_do_ioctl = venettap_netdev_ioctl,
+ .ndo_get_stats = venettap_netdev_stats,
+};
+
+/*
+ * This is called whenever the admin instantiates our devclass via
+ * "mkdir /config/vbus/devices/$(inst)/venet-tap"
+ */
+static int
+venettap_device_create(struct vbus_devclass *dc,
+ struct vbus_device **vdev)
+{
+ struct net_device *dev;
+ struct venettap *priv;
+ struct vbus_device *_vdev;
+
+ dev = alloc_etherdev(sizeof(struct venettap));
+ if (!dev)
+ return -ENOMEM;
+
+ priv = netdev_priv(dev);
+ memset(priv, 0, sizeof(*priv));
+
+ spin_lock_init(&priv->lock);
+ random_ether_addr(priv->hmac);
+ random_ether_addr(priv->cmac);
+
+ /*
+ * vbus init
+ */
+ _vdev = &priv->vbus.dev;
+
+ _vdev->type = VENETTAP_TYPE;
+ _vdev->ops = &venettap_device_ops;
+ _vdev->attrs = &venettap_attr_group;
+
+ init_waitqueue_head(&priv->vbus.rx_empty);
+
+ /*
+ * netif init
+ */
+ skb_queue_head_init(&priv->netif.txq.list);
+ priv->netif.txq.len = 0;
+
+ priv->netif.dev = dev;
+
+ ether_setup(dev); /* assign some of the fields */
+
+ dev->netdev_ops = &venettap_netdev_ops;
+ memcpy(dev->dev_addr, priv->hmac, ETH_ALEN);
+
+ dev->features |= NETIF_F_HIGHDMA;
+
+ *vdev = _vdev;
+
+ /*
+ * We don't need a try_get because the reference is held by the
+ * infrastructure during a create() operation
+ */
+ __module_get(THIS_MODULE);
+
+ return 0;
+}
+
+static struct vbus_devclass_ops venettap_devclass_ops = {
+ .create = venettap_device_create,
+};
+
+static struct vbus_devclass venettap_devclass = {
+ .name = VENETTAP_TYPE,
+ .ops = &venettap_devclass_ops,
+ .owner = THIS_MODULE,
+};
+
+static int __init venettap_init(void)
+{
+ return vbus_devclass_register(&venettap_devclass);
+}
+
+static void __exit venettap_cleanup(void)
+{
+ vbus_devclass_unregister(&venettap_devclass);
+}
+
+module_init(venettap_init);
+module_exit(venettap_cleanup);
diff --git a/kernel/vbus/Kconfig b/kernel/vbus/Kconfig
index 71acd6f..3ce0adc 100644
--- a/kernel/vbus/Kconfig
+++ b/kernel/vbus/Kconfig
@@ -14,6 +14,17 @@ config VBUS
If unsure, say N
+config VBUS_DEVICES
+ bool "Virtual-Bus Devices"
+ depends on VBUS
+ default n
+ help
+ Provides device-class modules for instantiation on a virtual-bus
+
+ If unsure, say N
+
+source "drivers/vbus/devices/Kconfig"
+
config VBUS_DRIVERS
tristate "VBUS Driver support"
select IOQ
@@ -23,3 +34,5 @@ config VBUS_DRIVERS
If unsure, say N
+
+
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists