[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20101020085505.15579.94591.sendpatchset@krkumar2.in.ibm.com>
Date: Wed, 20 Oct 2010 14:25:05 +0530
From: Krishna Kumar <krkumar2@...ibm.com>
To: rusty@...tcorp.com.au, davem@...emloft.net, mst@...hat.com
Cc: kvm@...r.kernel.org, arnd@...db.de, netdev@...r.kernel.org,
avi@...hat.com, anthony@...emonkey.ws, eric.dumazet@...il.com,
Krishna Kumar <krkumar2@...ibm.com>
Subject: [v3 RFC PATCH 2/4] Changes for virtio-net
Implement mq virtio-net driver.
Though struct virtio_net_config changes, it works with old
qemu's since the last element is not accessed, unless qemu
sets VIRTIO_NET_F_NUMTXQS. Patch also adds a macro for the
maximum number of TX vq's (VIRTIO_MAX_SQ) that the user can
specify.
Signed-off-by: Krishna Kumar <krkumar2@...ibm.com>
---
drivers/net/virtio_net.c | 234 ++++++++++++++++++++++++++---------
include/linux/virtio_net.h | 6
2 files changed, 185 insertions(+), 55 deletions(-)
diff -ruNp org/include/linux/virtio_net.h new.dynamic.optimize_vhost/include/linux/virtio_net.h
--- org/include/linux/virtio_net.h 2010-10-11 10:20:22.000000000 +0530
+++ new.dynamic.optimize_vhost/include/linux/virtio_net.h 2010-10-19 13:24:38.000000000 +0530
@@ -7,6 +7,9 @@
#include <linux/virtio_config.h>
#include <linux/if_ether.h>
+/* Maximum number of TX queues supported */
+#define VIRTIO_MAX_SQ 32
+
/* The feature bitmap for virtio net */
#define VIRTIO_NET_F_CSUM 0 /* Host handles pkts w/ partial csum */
#define VIRTIO_NET_F_GUEST_CSUM 1 /* Guest handles pkts w/ partial csum */
@@ -26,6 +29,7 @@
#define VIRTIO_NET_F_CTRL_RX 18 /* Control channel RX mode support */
#define VIRTIO_NET_F_CTRL_VLAN 19 /* Control channel VLAN filtering */
#define VIRTIO_NET_F_CTRL_RX_EXTRA 20 /* Extra RX mode control support */
+#define VIRTIO_NET_F_NUMTXQS 21 /* Device supports multiple TX queue */
#define VIRTIO_NET_S_LINK_UP 1 /* Link is up */
@@ -34,6 +38,8 @@ struct virtio_net_config {
__u8 mac[6];
/* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
__u16 status;
+ /* number of transmit queues */
+ __u16 numtxqs;
} __attribute__((packed));
/* This is the first element of the scatter-gather list. If you don't
diff -ruNp org/drivers/net/virtio_net.c new.dynamic.optimize_vhost/drivers/net/virtio_net.c
--- org/drivers/net/virtio_net.c 2010-10-11 10:20:02.000000000 +0530
+++ new.dynamic.optimize_vhost/drivers/net/virtio_net.c 2010-10-19 17:01:53.000000000 +0530
@@ -40,11 +40,24 @@ module_param(gso, bool, 0444);
#define VIRTNET_SEND_COMMAND_SG_MAX 2
+/* Our representation of a send virtqueue */
+struct send_queue {
+ struct virtqueue *svq;
+
+ /* TX: fragments + linear part + virtio header */
+ struct scatterlist tx_sg[MAX_SKB_FRAGS + 2];
+};
+
struct virtnet_info {
+ struct send_queue **sq;
+ struct napi_struct napi ____cacheline_aligned_in_smp;
+
+ /* read-mostly variables */
+ int numtxqs ____cacheline_aligned_in_smp;
struct virtio_device *vdev;
- struct virtqueue *rvq, *svq, *cvq;
+ struct virtqueue *rvq;
+ struct virtqueue *cvq;
struct net_device *dev;
- struct napi_struct napi;
unsigned int status;
/* Number of input buffers, and max we've ever had. */
@@ -62,9 +75,8 @@ struct virtnet_info {
/* Chain pages by the private ptr. */
struct page *pages;
- /* fragments + linear part + virtio header */
+ /* RX: fragments + linear part + virtio header */
struct scatterlist rx_sg[MAX_SKB_FRAGS + 2];
- struct scatterlist tx_sg[MAX_SKB_FRAGS + 2];
};
struct skb_vnet_hdr {
@@ -120,12 +132,13 @@ static struct page *get_a_page(struct vi
static void skb_xmit_done(struct virtqueue *svq)
{
struct virtnet_info *vi = svq->vdev->priv;
+ int qnum = svq->queue_index - 1; /* 0 is RX vq */
/* Suppress further interrupts. */
virtqueue_disable_cb(svq);
/* We were probably waiting for more output buffers. */
- netif_wake_queue(vi->dev);
+ netif_wake_subqueue(vi->dev, qnum);
}
static void set_skb_frag(struct sk_buff *skb, struct page *page,
@@ -495,12 +508,13 @@ again:
return received;
}
-static unsigned int free_old_xmit_skbs(struct virtnet_info *vi)
+static unsigned int free_old_xmit_skbs(struct virtnet_info *vi,
+ struct virtqueue *svq)
{
struct sk_buff *skb;
unsigned int len, tot_sgs = 0;
- while ((skb = virtqueue_get_buf(vi->svq, &len)) != NULL) {
+ while ((skb = virtqueue_get_buf(svq, &len)) != NULL) {
pr_debug("Sent skb %p\n", skb);
vi->dev->stats.tx_bytes += skb->len;
vi->dev->stats.tx_packets++;
@@ -510,7 +524,8 @@ static unsigned int free_old_xmit_skbs(s
return tot_sgs;
}
-static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
+static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb,
+ struct virtqueue *svq, struct scatterlist *tx_sg)
{
struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
@@ -548,12 +563,12 @@ static int xmit_skb(struct virtnet_info
/* Encode metadata header at front. */
if (vi->mergeable_rx_bufs)
- sg_set_buf(vi->tx_sg, &hdr->mhdr, sizeof hdr->mhdr);
+ sg_set_buf(tx_sg, &hdr->mhdr, sizeof hdr->mhdr);
else
- sg_set_buf(vi->tx_sg, &hdr->hdr, sizeof hdr->hdr);
+ sg_set_buf(tx_sg, &hdr->hdr, sizeof hdr->hdr);
- hdr->num_sg = skb_to_sgvec(skb, vi->tx_sg + 1, 0, skb->len) + 1;
- return virtqueue_add_buf(vi->svq, vi->tx_sg, hdr->num_sg,
+ hdr->num_sg = skb_to_sgvec(skb, tx_sg + 1, 0, skb->len) + 1;
+ return virtqueue_add_buf(svq, tx_sg, hdr->num_sg,
0, skb);
}
@@ -561,31 +576,34 @@ static netdev_tx_t start_xmit(struct sk_
{
struct virtnet_info *vi = netdev_priv(dev);
int capacity;
+ int qnum = skb_get_queue_mapping(skb);
+ struct virtqueue *svq = vi->sq[qnum]->svq;
/* Free up any pending old buffers before queueing new ones. */
- free_old_xmit_skbs(vi);
+ free_old_xmit_skbs(vi, svq);
/* Try to transmit */
- capacity = xmit_skb(vi, skb);
+ capacity = xmit_skb(vi, skb, svq, vi->sq[qnum]->tx_sg);
/* This can happen with OOM and indirect buffers. */
if (unlikely(capacity < 0)) {
if (net_ratelimit()) {
if (likely(capacity == -ENOMEM)) {
dev_warn(&dev->dev,
- "TX queue failure: out of memory\n");
+ "TXQ (%d) failure: out of memory\n",
+ qnum);
} else {
dev->stats.tx_fifo_errors++;
dev_warn(&dev->dev,
- "Unexpected TX queue failure: %d\n",
- capacity);
+ "Unexpected TXQ (%d) failure: %d\n",
+ qnum, capacity);
}
}
dev->stats.tx_dropped++;
kfree_skb(skb);
return NETDEV_TX_OK;
}
- virtqueue_kick(vi->svq);
+ virtqueue_kick(svq);
/* Don't wait up for transmitted skbs to be freed. */
skb_orphan(skb);
@@ -594,13 +612,13 @@ static netdev_tx_t start_xmit(struct sk_
/* Apparently nice girls don't return TX_BUSY; stop the queue
* before it gets out of hand. Naturally, this wastes entries. */
if (capacity < 2+MAX_SKB_FRAGS) {
- netif_stop_queue(dev);
- if (unlikely(!virtqueue_enable_cb(vi->svq))) {
+ netif_stop_subqueue(dev, qnum);
+ if (unlikely(!virtqueue_enable_cb(svq))) {
/* More just got used, free them then recheck. */
- capacity += free_old_xmit_skbs(vi);
+ capacity += free_old_xmit_skbs(vi, svq);
if (capacity >= 2+MAX_SKB_FRAGS) {
- netif_start_queue(dev);
- virtqueue_disable_cb(vi->svq);
+ netif_start_subqueue(dev, qnum);
+ virtqueue_disable_cb(svq);
}
}
}
@@ -871,10 +889,10 @@ static void virtnet_update_status(struct
if (vi->status & VIRTIO_NET_S_LINK_UP) {
netif_carrier_on(vi->dev);
- netif_wake_queue(vi->dev);
+ netif_tx_wake_all_queues(vi->dev);
} else {
netif_carrier_off(vi->dev);
- netif_stop_queue(vi->dev);
+ netif_tx_stop_all_queues(vi->dev);
}
}
@@ -885,18 +903,122 @@ static void virtnet_config_changed(struc
virtnet_update_status(vi);
}
+#define MAX_DEVICE_NAME 16
+static int initialize_vqs(struct virtnet_info *vi, int numtxqs)
+{
+ vq_callback_t **callbacks;
+ struct virtqueue **vqs;
+ int i, err = -ENOMEM;
+ int totalvqs;
+ char **names;
+
+ vi->sq = kzalloc(numtxqs * sizeof(*vi->sq), GFP_KERNEL);
+ if (!vi->sq)
+ goto out;
+ for (i = 0; i < numtxqs; i++) {
+ vi->sq[i] = kzalloc(sizeof(*vi->sq[i]), GFP_KERNEL);
+ if (!vi->sq[i])
+ goto out;
+ }
+
+ /* setup initial send queue parameters */
+ for (i = 0; i < numtxqs; i++)
+ sg_init_table(vi->sq[i]->tx_sg, ARRAY_SIZE(vi->sq[i]->tx_sg));
+
+ /*
+ * We expect 1 RX virtqueue followed by 'numtxqs' TX virtqueues, and
+ * optionally one control virtqueue.
+ */
+ totalvqs = 1 + numtxqs +
+ virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ);
+
+ /* Setup parameters for find_vqs */
+ vqs = kmalloc(totalvqs * sizeof(*vqs), GFP_KERNEL);
+ callbacks = kmalloc(totalvqs * sizeof(*callbacks), GFP_KERNEL);
+ names = kzalloc(totalvqs * sizeof(*names), GFP_KERNEL);
+ if (!vqs || !callbacks || !names)
+ goto free_mem;
+
+ /* Parameters for recv virtqueue */
+ callbacks[0] = skb_recv_done;
+ names[0] = "input";
+
+ /* Parameters for send virtqueues */
+ for (i = 1; i <= numtxqs; i++) {
+ callbacks[i] = skb_xmit_done;
+ names[i] = kmalloc(MAX_DEVICE_NAME * sizeof(*names[i]),
+ GFP_KERNEL);
+ if (!names[i])
+ goto free_mem;
+ sprintf(names[i], "output.%d", i - 1);
+ }
+
+ /* Parameters for control virtqueue, if any */
+ if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
+ callbacks[i] = NULL;
+ names[i] = "control";
+ }
+
+ err = vi->vdev->config->find_vqs(vi->vdev, totalvqs, vqs, callbacks,
+ (const char **)names);
+ if (err)
+ goto free_mem;
+
+ vi->rvq = vqs[0];
+ for (i = 0; i < numtxqs; i++)
+ vi->sq[i]->svq = vqs[i + 1];
+
+ if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
+ vi->cvq = vqs[i + 1];
+
+ if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
+ vi->dev->features |= NETIF_F_HW_VLAN_FILTER;
+ }
+
+free_mem:
+ if (names) {
+ for (i = 1; i <= numtxqs; i++)
+ kfree(names[i]);
+ kfree(names);
+ }
+
+ kfree(callbacks);
+ kfree(vqs);
+
+out:
+ if (err) {
+ for (i = 0; i < numtxqs; i++)
+ kfree(vi->sq[i]);
+ kfree(vi->sq);
+ }
+
+ return err;
+}
+
static int virtnet_probe(struct virtio_device *vdev)
{
- int err;
+ int i, err;
+ u16 numtxqs;
struct net_device *dev;
struct virtnet_info *vi;
- struct virtqueue *vqs[3];
- vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL};
- const char *names[] = { "input", "output", "control" };
- int nvqs;
+
+ /*
+ * Find if host passed the number of transmit queues supported
+ * by the device
+ */
+ err = virtio_config_val(vdev, VIRTIO_NET_F_NUMTXQS,
+ offsetof(struct virtio_net_config, numtxqs),
+ &numtxqs);
+
+ /* We need atleast one txq */
+ if (err || !numtxqs)
+ numtxqs = 1;
+
+ if (numtxqs > VIRTIO_MAX_SQ)
+ return -EINVAL;
/* Allocate ourselves a network device with room for our info */
- dev = alloc_etherdev(sizeof(struct virtnet_info));
+ dev = alloc_etherdev_mq(sizeof(struct virtnet_info), numtxqs);
if (!dev)
return -ENOMEM;
@@ -940,9 +1062,9 @@ static int virtnet_probe(struct virtio_d
vi->vdev = vdev;
vdev->priv = vi;
vi->pages = NULL;
+ vi->numtxqs = numtxqs;
INIT_DELAYED_WORK(&vi->refill, refill_work);
sg_init_table(vi->rx_sg, ARRAY_SIZE(vi->rx_sg));
- sg_init_table(vi->tx_sg, ARRAY_SIZE(vi->tx_sg));
/* If we can receive ANY GSO packets, we must allocate large ones. */
if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
@@ -953,23 +1075,10 @@ static int virtnet_probe(struct virtio_d
if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
vi->mergeable_rx_bufs = true;
- /* We expect two virtqueues, receive then send,
- * and optionally control. */
- nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;
-
- err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
+ /* Initialize our rx/tx queue parameters, and invoke find_vqs */
+ err = initialize_vqs(vi, numtxqs);
if (err)
- goto free;
-
- vi->rvq = vqs[0];
- vi->svq = vqs[1];
-
- if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
- vi->cvq = vqs[2];
-
- if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
- dev->features |= NETIF_F_HW_VLAN_FILTER;
- }
+ goto free_netdev;
err = register_netdev(dev);
if (err) {
@@ -986,6 +1095,9 @@ static int virtnet_probe(struct virtio_d
goto unregister;
}
+ dev_info(&dev->dev, "(virtio-net) Allocated 1 RX and %d TX vq's\n",
+ numtxqs);
+
vi->status = VIRTIO_NET_S_LINK_UP;
virtnet_update_status(vi);
netif_carrier_on(dev);
@@ -998,7 +1110,10 @@ unregister:
cancel_delayed_work_sync(&vi->refill);
free_vqs:
vdev->config->del_vqs(vdev);
-free:
+ for (i = 0; i < numtxqs; i++)
+ kfree(vi->sq[i]);
+ kfree(vi->sq);
+free_netdev:
free_netdev(dev);
return err;
}
@@ -1006,12 +1121,21 @@ free:
static void free_unused_bufs(struct virtnet_info *vi)
{
void *buf;
- while (1) {
- buf = virtqueue_detach_unused_buf(vi->svq);
- if (!buf)
- break;
- dev_kfree_skb(buf);
+ int i;
+
+ for (i = 0; i < vi->numtxqs; i++) {
+ struct virtqueue *svq = vi->sq[i]->svq;
+
+ while (1) {
+ buf = virtqueue_detach_unused_buf(svq);
+ if (!buf)
+ break;
+ dev_kfree_skb(buf);
+ }
+ kfree(vi->sq[i]);
}
+ kfree(vi->sq);
+
while (1) {
buf = virtqueue_detach_unused_buf(vi->rvq);
if (!buf)
@@ -1059,7 +1183,7 @@ static unsigned int features[] = {
VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO,
VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ,
- VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
+ VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, VIRTIO_NET_F_NUMTXQS,
};
static struct virtio_driver virtio_net_driver = {
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists