[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20110405150920.20501.12605.sendpatchset@krkumar2.in.ibm.com>
Date: Tue, 05 Apr 2011 20:39:20 +0530
From: Krishna Kumar <krkumar2@...ibm.com>
To: rusty@...tcorp.com.au, davem@...emloft.net, mst@...hat.com
Cc: eric.dumazet@...il.com, arnd@...db.de, netdev@...r.kernel.org,
horms@...ge.net.au, avi@...hat.com, anthony@...emonkey.ws,
kvm@...r.kernel.org, Krishna Kumar <krkumar2@...ibm.com>
Subject: [PATCH 4/4] [RFC rev2] qemu changes
diff -ruNp org/hw/vhost.c new/hw/vhost.c
--- org/hw/vhost.c 2011-04-05 14:15:18.000000000 +0530
+++ new/hw/vhost.c 2011-04-05 14:15:18.000000000 +0530
@@ -581,7 +581,7 @@ static void vhost_virtqueue_cleanup(stru
0, virtio_queue_get_desc_size(vdev, idx));
}
-int vhost_dev_init(struct vhost_dev *hdev, int devfd, bool force)
+int vhost_dev_init(struct vhost_dev *hdev, int devfd, bool force, int numtxqs)
{
uint64_t features;
int r;
@@ -593,11 +593,13 @@ int vhost_dev_init(struct vhost_dev *hde
return -errno;
}
}
- r = ioctl(hdev->control, VHOST_SET_OWNER, NULL);
+ r = ioctl(hdev->control, VHOST_SET_OWNER, numtxqs);
if (r < 0) {
goto fail;
}
+ hdev->nvqs = numtxqs * 2;
+
r = ioctl(hdev->control, VHOST_GET_FEATURES, &features);
if (r < 0) {
goto fail;
diff -ruNp org/hw/vhost.h new/hw/vhost.h
--- org/hw/vhost.h 2011-04-05 14:15:18.000000000 +0530
+++ new/hw/vhost.h 2011-04-05 14:15:18.000000000 +0530
@@ -41,7 +41,7 @@ struct vhost_dev {
bool force;
};
-int vhost_dev_init(struct vhost_dev *hdev, int devfd, bool force);
+int vhost_dev_init(struct vhost_dev *hdev, int devfd, bool force, int numtxqs);
void vhost_dev_cleanup(struct vhost_dev *hdev);
bool vhost_dev_query(struct vhost_dev *hdev, VirtIODevice *vdev);
int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev);
diff -ruNp org/hw/vhost_net.c new/hw/vhost_net.c
--- org/hw/vhost_net.c 2011-04-05 14:15:18.000000000 +0530
+++ new/hw/vhost_net.c 2011-04-05 20:27:01.000000000 +0530
@@ -36,8 +36,9 @@
struct vhost_net {
struct vhost_dev dev;
- struct vhost_virtqueue vqs[2];
- int backend;
+ struct vhost_virtqueue *vqs;
+ int nvqs;
+ int *backend;
VLANClientState *vc;
};
@@ -70,11 +71,11 @@ void vhost_net_ack_features(struct vhost
}
}
-static int vhost_net_get_fd(VLANClientState *backend)
+static int vhost_net_get_fd(VLANClientState *backend, int index)
{
switch (backend->info->type) {
case NET_CLIENT_TYPE_TAP:
- return tap_get_fd(backend);
+ return tap_get_fd(backend, index);
default:
fprintf(stderr, "vhost-net requires tap backend\n");
return -EBADFD;
@@ -82,27 +83,36 @@ static int vhost_net_get_fd(VLANClientSt
}
struct vhost_net *vhost_net_init(VLANClientState *backend, int devfd,
- bool force)
+ bool force, int numtxqs)
{
- int r;
+ int i, r;
struct vhost_net *net = qemu_malloc(sizeof *net);
if (!backend) {
fprintf(stderr, "vhost-net requires backend to be setup\n");
goto fail;
}
- r = vhost_net_get_fd(backend);
- if (r < 0) {
- goto fail;
+
+ net->backend = qemu_malloc(numtxqs * (sizeof *net->backend));
+ for (i = 0; i < numtxqs; i++) {
+ r = vhost_net_get_fd(backend, i);
+ if (r < 0) {
+ goto fail;
+ }
+ net->backend[i] = r;
}
+
net->vc = backend;
net->dev.backend_features = tap_has_vnet_hdr(backend) ? 0 :
(1 << VHOST_NET_F_VIRTIO_NET_HDR);
- net->backend = r;
- r = vhost_dev_init(&net->dev, devfd, force);
+ r = vhost_dev_init(&net->dev, devfd, force, numtxqs);
if (r < 0) {
goto fail;
}
+
+ net->nvqs = numtxqs * 2;
+ net->vqs = qemu_malloc(net->nvqs * (sizeof *net->vqs));
+
if (!tap_has_vnet_hdr_len(backend,
sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
net->dev.features &= ~(1 << VIRTIO_NET_F_MRG_RXBUF);
@@ -137,7 +147,6 @@ int vhost_net_start(struct vhost_net *ne
sizeof(struct virtio_net_hdr_mrg_rxbuf));
}
- net->dev.nvqs = 2;
net->dev.vqs = net->vqs;
r = vhost_dev_start(&net->dev, dev);
if (r < 0) {
@@ -145,9 +154,9 @@ int vhost_net_start(struct vhost_net *ne
}
net->vc->info->poll(net->vc, false);
- qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
- file.fd = net->backend;
for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
+ qemu_set_fd_handler(net->backend[file.index/2], NULL, NULL, NULL);
+ file.fd = net->backend[(file.index / 2) % (net->dev.nvqs / 2)];
r = ioctl(net->dev.control, VHOST_NET_SET_BACKEND, &file);
if (r < 0) {
r = -errno;
@@ -195,7 +204,7 @@ void vhost_net_cleanup(struct vhost_net
}
#else
struct vhost_net *vhost_net_init(VLANClientState *backend, int devfd,
- bool force)
+ bool force, int numtxqs)
{
return NULL;
}
diff -ruNp org/hw/vhost_net.h new/hw/vhost_net.h
--- org/hw/vhost_net.h 2011-04-05 14:15:18.000000000 +0530
+++ new/hw/vhost_net.h 2011-04-05 14:15:18.000000000 +0530
@@ -6,7 +6,8 @@
struct vhost_net;
typedef struct vhost_net VHostNetState;
-VHostNetState *vhost_net_init(VLANClientState *backend, int devfd, bool force);
+VHostNetState *vhost_net_init(VLANClientState *backend, int devfd, bool force,
+ int numtxqs);
bool vhost_net_query(VHostNetState *net, VirtIODevice *dev);
int vhost_net_start(VHostNetState *net, VirtIODevice *dev);
diff -ruNp org/hw/virtio-net.c new/hw/virtio-net.c
--- org/hw/virtio-net.c 2011-04-05 14:15:18.000000000 +0530
+++ new/hw/virtio-net.c 2011-04-05 14:15:18.000000000 +0530
@@ -31,8 +31,8 @@ typedef struct VirtIONet
VirtIODevice vdev;
uint8_t mac[ETH_ALEN];
uint16_t status;
- VirtQueue *rx_vq;
- VirtQueue *tx_vq;
+ VirtQueue **rx_vq;
+ VirtQueue **tx_vq;
VirtQueue *ctrl_vq;
NICState *nic;
QEMUTimer *tx_timer;
@@ -63,6 +63,7 @@ typedef struct VirtIONet
} mac_table;
uint32_t *vlans;
DeviceState *qdev;
+ uint16_t numtxqs;
} VirtIONet;
/* TODO
@@ -80,6 +81,7 @@ static void virtio_net_get_config(VirtIO
struct virtio_net_config netcfg;
stw_p(&netcfg.status, n->status);
+ netcfg.num_queue_pairs = n->numtxqs * 2;
memcpy(netcfg.mac, n->mac, ETH_ALEN);
memcpy(config, &netcfg, sizeof(netcfg));
}
@@ -228,6 +230,9 @@ static uint32_t virtio_net_get_features(
VirtIONet *n = to_virtio_net(vdev);
features |= (1 << VIRTIO_NET_F_MAC);
+ if (n->numtxqs > 1)
+ features |= (1 << VIRTIO_NET_F_MULTIQUEUE);
+
if (peer_has_vnet_hdr(n)) {
tap_using_vnet_hdr(n->nic->nc.peer, 1);
@@ -460,7 +465,7 @@ static int virtio_net_can_receive(VLANCl
return 0;
}
- if (!virtio_queue_ready(n->rx_vq) ||
+ if (!virtio_queue_ready(n->rx_vq[0]) ||
!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
return 0;
@@ -469,22 +474,22 @@ static int virtio_net_can_receive(VLANCl
static int virtio_net_has_buffers(VirtIONet *n, int bufsize)
{
- if (virtio_queue_empty(n->rx_vq) ||
+ if (virtio_queue_empty(n->rx_vq[0]) ||
(n->mergeable_rx_bufs &&
- !virtqueue_avail_bytes(n->rx_vq, bufsize, 0))) {
- virtio_queue_set_notification(n->rx_vq, 1);
+ !virtqueue_avail_bytes(n->rx_vq[0], bufsize, 0))) {
+ virtio_queue_set_notification(n->rx_vq[0], 1);
/* To avoid a race condition where the guest has made some buffers
* available after the above check but before notification was
* enabled, check for available buffers again.
*/
- if (virtio_queue_empty(n->rx_vq) ||
+ if (virtio_queue_empty(n->rx_vq[0]) ||
(n->mergeable_rx_bufs &&
- !virtqueue_avail_bytes(n->rx_vq, bufsize, 0)))
+ !virtqueue_avail_bytes(n->rx_vq[0], bufsize, 0)))
return 0;
}
- virtio_queue_set_notification(n->rx_vq, 0);
+ virtio_queue_set_notification(n->rx_vq[0], 0);
return 1;
}
@@ -623,7 +628,7 @@ static ssize_t virtio_net_receive(VLANCl
total = 0;
- if (virtqueue_pop(n->rx_vq, &elem) == 0) {
+ if (virtqueue_pop(n->rx_vq[0], &elem) == 0) {
if (i == 0)
return -1;
error_report("virtio-net unexpected empty queue: "
@@ -675,15 +680,15 @@ static ssize_t virtio_net_receive(VLANCl
}
/* signal other side */
- virtqueue_fill(n->rx_vq, &elem, total, i++);
+ virtqueue_fill(n->rx_vq[0], &elem, total, i++);
}
if (mhdr) {
stw_p(&mhdr->num_buffers, i);
}
- virtqueue_flush(n->rx_vq, i);
- virtio_notify(&n->vdev, n->rx_vq);
+ virtqueue_flush(n->rx_vq[0], i);
+ virtio_notify(&n->vdev, n->rx_vq[0]);
return size;
}
@@ -694,13 +699,13 @@ static void virtio_net_tx_complete(VLANC
{
VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
- virtqueue_push(n->tx_vq, &n->async_tx.elem, n->async_tx.len);
- virtio_notify(&n->vdev, n->tx_vq);
+ virtqueue_push(n->tx_vq[0], &n->async_tx.elem, n->async_tx.len);
+ virtio_notify(&n->vdev, n->tx_vq[0]);
n->async_tx.elem.out_num = n->async_tx.len = 0;
- virtio_queue_set_notification(n->tx_vq, 1);
- virtio_net_flush_tx(n, n->tx_vq);
+ virtio_queue_set_notification(n->tx_vq[0], 1);
+ virtio_net_flush_tx(n, n->tx_vq[0]);
}
/* TX */
@@ -715,7 +720,7 @@ static int32_t virtio_net_flush_tx(VirtI
assert(n->vdev.vm_running);
if (n->async_tx.elem.out_num) {
- virtio_queue_set_notification(n->tx_vq, 0);
+ virtio_queue_set_notification(n->tx_vq[0], 0);
return num_packets;
}
@@ -750,7 +755,7 @@ static int32_t virtio_net_flush_tx(VirtI
ret = qemu_sendv_packet_async(&n->nic->nc, out_sg, out_num,
virtio_net_tx_complete);
if (ret == 0) {
- virtio_queue_set_notification(n->tx_vq, 0);
+ virtio_queue_set_notification(n->tx_vq[0], 0);
n->async_tx.elem = elem;
n->async_tx.len = len;
return -EBUSY;
@@ -818,8 +823,8 @@ static void virtio_net_tx_timer(void *op
if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
return;
- virtio_queue_set_notification(n->tx_vq, 1);
- virtio_net_flush_tx(n, n->tx_vq);
+ virtio_queue_set_notification(n->tx_vq[0], 1);
+ virtio_net_flush_tx(n, n->tx_vq[0]);
}
static void virtio_net_tx_bh(void *opaque)
@@ -835,7 +840,7 @@ static void virtio_net_tx_bh(void *opaqu
if (unlikely(!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)))
return;
- ret = virtio_net_flush_tx(n, n->tx_vq);
+ ret = virtio_net_flush_tx(n, n->tx_vq[0]);
if (ret == -EBUSY) {
return; /* Notification re-enable handled by tx_complete */
}
@@ -851,9 +856,9 @@ static void virtio_net_tx_bh(void *opaqu
/* If less than a full burst, re-enable notification and flush
* anything that may have come in while we weren't looking. If
* we find something, assume the guest is still active and reschedule */
- virtio_queue_set_notification(n->tx_vq, 1);
- if (virtio_net_flush_tx(n, n->tx_vq) > 0) {
- virtio_queue_set_notification(n->tx_vq, 0);
+ virtio_queue_set_notification(n->tx_vq[0], 1);
+ if (virtio_net_flush_tx(n, n->tx_vq[0]) > 0) {
+ virtio_queue_set_notification(n->tx_vq[0], 0);
qemu_bh_schedule(n->tx_bh);
n->tx_waiting = 1;
}
@@ -869,6 +874,7 @@ static void virtio_net_save(QEMUFile *f,
virtio_save(&n->vdev, f);
qemu_put_buffer(f, n->mac, ETH_ALEN);
+ qemu_put_be16(f, n->numtxqs);
qemu_put_be32(f, n->tx_waiting);
qemu_put_be32(f, n->mergeable_rx_bufs);
qemu_put_be16(f, n->status);
@@ -898,6 +904,7 @@ static int virtio_net_load(QEMUFile *f,
virtio_load(&n->vdev, f);
qemu_get_buffer(f, n->mac, ETH_ALEN);
+ n->numtxqs = qemu_get_be32(f);
n->tx_waiting = qemu_get_be32(f);
n->mergeable_rx_bufs = qemu_get_be32(f);
@@ -996,11 +1003,13 @@ VirtIODevice *virtio_net_init(DeviceStat
virtio_net_conf *net)
{
VirtIONet *n;
+ int i;
n = (VirtIONet *)virtio_common_init("virtio-net", VIRTIO_ID_NET,
sizeof(struct virtio_net_config),
sizeof(VirtIONet));
+ n->numtxqs = conf->peer->numtxqs;
n->vdev.get_config = virtio_net_get_config;
n->vdev.set_config = virtio_net_set_config;
n->vdev.get_features = virtio_net_get_features;
@@ -1008,7 +1017,6 @@ VirtIODevice *virtio_net_init(DeviceStat
n->vdev.bad_features = virtio_net_bad_features;
n->vdev.reset = virtio_net_reset;
n->vdev.set_status = virtio_net_set_status;
- n->rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
if (net->tx && strcmp(net->tx, "timer") && strcmp(net->tx, "bh")) {
error_report("virtio-net: "
@@ -1017,12 +1025,25 @@ VirtIODevice *virtio_net_init(DeviceStat
error_report("Defaulting to \"bh\"");
}
+ /* Allocate per rx/tx vq's */
+ n->rx_vq = qemu_mallocz(n->numtxqs * sizeof(*n->rx_vq));
+ n->tx_vq = qemu_mallocz(n->numtxqs * sizeof(*n->tx_vq));
+
+ for (i = 0; i < n->numtxqs; i++) {
+ n->rx_vq[i] = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
+ if (net->tx && !strcmp(net->tx, "timer")) {
+ n->tx_vq[i] = virtio_add_queue(&n->vdev, 256,
+ virtio_net_handle_tx_timer);
+ } else {
+ n->tx_vq[i] = virtio_add_queue(&n->vdev, 256,
+ virtio_net_handle_tx_bh);
+ }
+ }
+
if (net->tx && !strcmp(net->tx, "timer")) {
- n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx_timer);
n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
n->tx_timeout = net->txtimer;
} else {
- n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx_bh);
n->tx_bh = qemu_bh_new(virtio_net_tx_bh, n);
}
n->ctrl_vq = virtio_add_queue(&n->vdev, 64, virtio_net_handle_ctrl);
diff -ruNp org/hw/virtio-net.h new/hw/virtio-net.h
--- org/hw/virtio-net.h 2011-04-05 14:15:18.000000000 +0530
+++ new/hw/virtio-net.h 2011-04-05 14:15:18.000000000 +0530
@@ -44,6 +44,7 @@
#define VIRTIO_NET_F_CTRL_RX 18 /* Control channel RX mode support */
#define VIRTIO_NET_F_CTRL_VLAN 19 /* Control channel VLAN filtering */
#define VIRTIO_NET_F_CTRL_RX_EXTRA 20 /* Extra RX mode control support */
+#define VIRTIO_NET_F_MULTIQUEUE 21 /* Supports multiple RX/TX queues */
#define VIRTIO_NET_S_LINK_UP 1 /* Link is up */
@@ -72,6 +73,7 @@ struct virtio_net_config
uint8_t mac[ETH_ALEN];
/* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
uint16_t status;
+ uint16_t num_queue_pairs; /* number of rx+tx queues */
} __attribute__((packed));
/* This is the first element of the scatter-gather list. If you don't
diff -ruNp org/hw/virtio-pci.c new/hw/virtio-pci.c
--- org/hw/virtio-pci.c 2011-04-05 14:15:18.000000000 +0530
+++ new/hw/virtio-pci.c 2011-04-05 14:15:18.000000000 +0530
@@ -103,6 +103,7 @@ typedef struct {
uint32_t addr;
uint32_t class_code;
uint32_t nvectors;
+ uint32_t mq;
BlockConf block;
NICConf nic;
uint32_t host_features;
@@ -965,6 +966,7 @@ static PCIDeviceInfo virtio_info[] = {
DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags,
VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, false),
DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3),
+ DEFINE_PROP_UINT32("mq", VirtIOPCIProxy, mq, 1),
DEFINE_VIRTIO_NET_FEATURES(VirtIOPCIProxy, host_features),
DEFINE_NIC_PROPERTIES(VirtIOPCIProxy, nic),
DEFINE_PROP_UINT32("x-txtimer", VirtIOPCIProxy,
diff -ruNp org/net/tap.c new/net/tap.c
--- org/net/tap.c 2011-04-05 14:15:18.000000000 +0530
+++ new/net/tap.c 2011-04-05 14:15:18.000000000 +0530
@@ -49,16 +49,20 @@
*/
#define TAP_BUFSIZE (4096 + 65536)
+#define VIRTIO_MAX_TXQS 8
+
typedef struct TAPState {
VLANClientState nc;
- int fd;
+ int *fds;
+ int numfds;
char down_script[1024];
- char down_script_arg[128];
+ char down_script_arg[VIRTIO_MAX_TXQS][128];
uint8_t buf[TAP_BUFSIZE];
unsigned int read_poll : 1;
unsigned int write_poll : 1;
unsigned int using_vnet_hdr : 1;
unsigned int has_ufo: 1;
+ unsigned int do_script: 1;
VHostNetState *vhost_net;
unsigned host_vnet_hdr_len;
} TAPState;
@@ -71,11 +75,16 @@ static void tap_writable(void *opaque);
static void tap_update_fd_handler(TAPState *s)
{
- qemu_set_fd_handler2(s->fd,
- s->read_poll ? tap_can_send : NULL,
- s->read_poll ? tap_send : NULL,
- s->write_poll ? tap_writable : NULL,
- s);
+ int i;
+
+ for (i = 0; i < s->numfds; i++) {
+ qemu_set_fd_handler2(s->fds[i],
+ s->read_poll ? tap_can_send : NULL,
+ s->read_poll ? tap_send : NULL,
+ s->write_poll ? tap_writable : NULL,
+ s);
+ }
+ }
}
static void tap_read_poll(TAPState *s, int enable)
@@ -104,7 +113,7 @@ static ssize_t tap_write_packet(TAPState
ssize_t len;
do {
- len = writev(s->fd, iov, iovcnt);
+ len = writev(s->fds[0], iov, iovcnt);
} while (len == -1 && errno == EINTR);
if (len == -1 && errno == EAGAIN) {
@@ -197,7 +206,7 @@ static void tap_send(void *opaque)
do {
uint8_t *buf = s->buf;
- size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
+ size = tap_read_packet(s->fds[0], s->buf, sizeof(s->buf));
if (size <= 0) {
break;
}
@@ -238,18 +247,20 @@ int tap_has_vnet_hdr_len(VLANClientState
assert(nc->info->type == NET_CLIENT_TYPE_TAP);
- return tap_probe_vnet_hdr_len(s->fd, len);
+ return tap_probe_vnet_hdr_len(s->fds[0], len);
}
void tap_set_vnet_hdr_len(VLANClientState *nc, int len)
{
TAPState *s = DO_UPCAST(TAPState, nc, nc);
+ int i;
assert(nc->info->type == NET_CLIENT_TYPE_TAP);
assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
len == sizeof(struct virtio_net_hdr));
- tap_fd_set_vnet_hdr_len(s->fd, len);
+ for (i = 0; i < s->numfds; i++)
+ tap_fd_set_vnet_hdr_len(s->fds[i], len);
s->host_vnet_hdr_len = len;
}
@@ -269,16 +280,27 @@ void tap_set_offload(VLANClientState *nc
int tso6, int ecn, int ufo)
{
TAPState *s = DO_UPCAST(TAPState, nc, nc);
- if (s->fd < 0) {
- return;
+ int i;
+
+ for (i = 0; i < s->numfds; i++) {
+ if (s->fds[i] >= 0)
+ tap_fd_set_offload(s->fds[i], csum, tso4, tso6, ecn, ufo);
}
+}
- tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo);
+static void close_tap_fds(int *fds, int numtxqs)
+{
+ int i;
+
+ for (i = 0; i < numtxqs; i++) {
+ close(fds[i]);
+ }
}
static void tap_cleanup(VLANClientState *nc)
{
TAPState *s = DO_UPCAST(TAPState, nc, nc);
+ int i;
if (s->vhost_net) {
vhost_net_cleanup(s->vhost_net);
@@ -287,13 +309,15 @@ static void tap_cleanup(VLANClientState
qemu_purge_queued_packets(nc);
- if (s->down_script[0])
- launch_script(s->down_script, s->down_script_arg, s->fd);
+ for (i = 0; i < s->numfds; i++) {
+ if (s->down_script[0])
+ launch_script(s->down_script, s->down_script_arg[i], s->fds[i]);
+ }
tap_read_poll(s, 0);
tap_write_poll(s, 0);
- close(s->fd);
- s->fd = -1;
+
+ close_tap_fds(s->fds, s->numfds);
}
static void tap_poll(VLANClientState *nc, bool enable)
@@ -303,11 +327,12 @@ static void tap_poll(VLANClientState *nc
tap_write_poll(s, enable);
}
-int tap_get_fd(VLANClientState *nc)
+int tap_get_fd(VLANClientState *nc, int index)
{
TAPState *s = DO_UPCAST(TAPState, nc, nc);
assert(nc->info->type == NET_CLIENT_TYPE_TAP);
- return s->fd;
+ assert(index < s->numfds);
+ return s->fds[index];
}
/* fd support */
@@ -325,20 +350,25 @@ static NetClientInfo net_tap_info = {
static TAPState *net_tap_fd_init(VLANState *vlan,
const char *model,
const char *name,
- int fd,
+ int *fds, int numtxqs,
int vnet_hdr)
{
VLANClientState *nc;
TAPState *s;
+ int i;
nc = qemu_new_net_client(&net_tap_info, vlan, NULL, model, name);
+ nc->numtxqs = numtxqs;
s = DO_UPCAST(TAPState, nc, nc);
- s->fd = fd;
+ s->fds = fds;
+ s->numfds = numtxqs;
s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
s->using_vnet_hdr = 0;
- s->has_ufo = tap_probe_has_ufo(s->fd);
+ for (i = 0; i < s->numfds; i++) {
+ s->has_ufo = tap_probe_has_ufo(s->fds[i]);
+ }
tap_set_offload(&s->nc, 0, 0, 0, 0, 0);
tap_read_poll(s, 1);
s->vhost_net = NULL;
@@ -389,11 +419,28 @@ static int launch_script(const char *set
return -1;
}
-static int net_tap_init(QemuOpts *opts, int *vnet_hdr)
+static int net_tap_init(QemuOpts *opts, int *vnet_hdr, int *fds, int numtxqs,
+ int *script)
{
- int fd, vnet_hdr_required;
+ int i, vnet_hdr_required;
char ifname[128] = {0,};
const char *setup_script;
+ int launch = 0;
+ const char *dev;
+
+ if (qemu_opt_get(opts, "vtap")) {
+ *vnet_hdr = 1;
+ *script = 0; /* we don't need start/stop script */
+ dev = qemu_opt_get(opts, "vtap");
+ for (i = 0; i < numtxqs; i++) {
+ TFR(fds[i] = vtap_open(dev, vnet_hdr, 1));
+ if (fds[i] < 0)
+ goto err;
+ fcntl(fds[i], F_SETFL, O_NONBLOCK);
+ }
+ *vnet_hdr = !!tap_probe_vnet_hdr(fds[0]);
+ return 0;
+ }
if (qemu_opt_get(opts, "ifname")) {
pstrcpy(ifname, sizeof(ifname), qemu_opt_get(opts, "ifname"));
@@ -406,29 +453,76 @@ static int net_tap_init(QemuOpts *opts,
vnet_hdr_required = 0;
}
- TFR(fd = tap_open(ifname, sizeof(ifname), vnet_hdr, vnet_hdr_required));
- if (fd < 0) {
- return -1;
- }
-
setup_script = qemu_opt_get(opts, "script");
if (setup_script &&
setup_script[0] != '\0' &&
- strcmp(setup_script, "no") != 0 &&
- launch_script(setup_script, ifname, fd)) {
- close(fd);
- return -1;
+ strcmp(setup_script, "no") != 0) {
+ launch = 1;
+ *script = 1;
+ }
+
+ if (numtxqs == 1) {
+ fprintf(stderr, "Device: %s\n", ifname);
+ TFR(fds[0] = tap_open(ifname, sizeof(ifname), vnet_hdr,
+ vnet_hdr_required));
+ if (fds[0] < 0) {
+ goto err;
+ }
+
+ if (launch && launch_script(setup_script, ifname, fds[0]))
+ goto err;
+ } else {
+ char alt_name[128];
+
+ for (i = 0; i < numtxqs; i++) {
+ sprintf(alt_name, "%s.%d", ifname, i);
+ fprintf(stderr, "Device: %s\n", alt_name);
+ TFR(fds[i] = tap_open(alt_name, sizeof(alt_name), vnet_hdr,
+ vnet_hdr_required));
+ if (fds[i] < 0) {
+ goto err;
+ }
+
+ if (launch && launch_script(setup_script, alt_name, fds[i]))
+ goto err;
+ }
}
qemu_opt_set(opts, "ifname", ifname);
- return fd;
+ return 0;
+
+err:
+ close_tap_fds(fds, numtxqs);
+ return -1;
}
int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan)
{
TAPState *s;
- int fd, vnet_hdr = 0;
+ int *fds, vnet_hdr = 0;
+ int i, vhost;
+ int script = 0, numtxqs = 1;
+
+ vhost = qemu_opt_get_bool(opts, "vhost", 0);
+
+ /*
+ * We support multiple tx queues if:
+ * 1. smp > 1
+ * 2. vhost=on
+ * 3. mq=on
+ * In this case, #txqueues = #cpus. This value can be changed by
+ * using the "numtxqs" option.
+ */
+ if (vhost && smp_cpus > 1) {
+ if (qemu_opt_get_bool(opts, "mq", 0)) {
+ int dflt = MIN(smp_cpus, VIRTIO_MAX_TXQS);
+
+ numtxqs = qemu_opt_get_number(opts, "numtxqs", dflt);
+ }
+ }
+
+ fds = qemu_mallocz(numtxqs * sizeof(*fds));
if (qemu_opt_get(opts, "fd")) {
if (qemu_opt_get(opts, "ifname") ||
@@ -439,14 +533,14 @@ int net_init_tap(QemuOpts *opts, Monitor
return -1;
}
- fd = net_handle_fd_param(mon, qemu_opt_get(opts, "fd"));
- if (fd == -1) {
+ fds[0] = net_handle_fd_param(mon, qemu_opt_get(opts, "fd"));
+ if (fds[0] == -1) {
return -1;
}
- fcntl(fd, F_SETFL, O_NONBLOCK);
+ fcntl(fds[0], F_SETFL, O_NONBLOCK);
- vnet_hdr = tap_probe_vnet_hdr(fd);
+ vnet_hdr = tap_probe_vnet_hdr(fds[0]);
} else {
if (!qemu_opt_get(opts, "script")) {
qemu_opt_set(opts, "script", DEFAULT_NETWORK_SCRIPT);
@@ -456,24 +550,28 @@ int net_init_tap(QemuOpts *opts, Monitor
qemu_opt_set(opts, "downscript", DEFAULT_NETWORK_DOWN_SCRIPT);
}
- fd = net_tap_init(opts, &vnet_hdr);
- if (fd == -1) {
+ if (net_tap_init(opts, &vnet_hdr, fds, numtxqs, &script) == -1) {
return -1;
}
}
- s = net_tap_fd_init(vlan, "tap", name, fd, vnet_hdr);
+ s = net_tap_fd_init(vlan, "tap", name, fds, numtxqs, vnet_hdr);
if (!s) {
- close(fd);
+ close_tap_fds(fds, numtxqs);
return -1;
}
- if (tap_set_sndbuf(s->fd, opts) < 0) {
- return -1;
+ s->do_script = script;
+
+ for (i = 0; i < s->numfds; i++) {
+ if (tap_set_sndbuf(s->fds[i], opts) < 0) {
+ close_tap_fds(fds, numtxqs);
+ return -1;
+ }
}
if (qemu_opt_get(opts, "fd")) {
- snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd);
+ snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fds[0]);
} else {
const char *ifname, *script, *downscript;
@@ -487,12 +585,20 @@ int net_init_tap(QemuOpts *opts, Monitor
if (strcmp(downscript, "no") != 0) {
snprintf(s->down_script, sizeof(s->down_script), "%s", downscript);
- snprintf(s->down_script_arg, sizeof(s->down_script_arg), "%s", ifname);
+ for (i = 0; i < s->numfds; i++) {
+ char alt_name[128];
+
+ if (s->numfds == 1) {
+ pstrcpy(alt_name, sizeof(ifname), ifname);
+ } else {
+ sprintf(alt_name, "%s.%d", ifname, i);
+ }
+ snprintf(s->down_script_arg[i], sizeof(s->down_script_arg[i]), "%s", alt_name);
+ }
}
}
- if (qemu_opt_get_bool(opts, "vhost", !!qemu_opt_get(opts, "vhostfd") ||
- qemu_opt_get_bool(opts, "vhostforce", false))) {
+ if (vhost) {
int vhostfd, r;
bool force = qemu_opt_get_bool(opts, "vhostforce", false);
if (qemu_opt_get(opts, "vhostfd")) {
@@ -504,9 +610,13 @@ int net_init_tap(QemuOpts *opts, Monitor
} else {
vhostfd = -1;
}
- s->vhost_net = vhost_net_init(&s->nc, vhostfd, force);
+ s->vhost_net = vhost_net_init(&s->nc, vhostfd, force, numtxqs);
if (!s->vhost_net) {
error_report("vhost-net requested but could not be initialized");
+ if (numtxqs > 1) {
+ error_report("Need vhost support for numtxqs > 1, exiting...");
+ exit(1);
+ }
return -1;
}
} else if (qemu_opt_get(opts, "vhostfd")) {
diff -ruNp org/net/tap.h new/net/tap.h
--- org/net/tap.h 2011-04-05 14:15:18.000000000 +0530
+++ new/net/tap.h 2011-04-05 14:15:18.000000000 +0530
@@ -35,6 +35,7 @@
int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan);
int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required);
+int vtap_open(const char *devname, int *vnet_hdr, int vnet_hdr_required);
ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen);
@@ -52,7 +53,7 @@ int tap_probe_has_ufo(int fd);
void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo);
void tap_fd_set_vnet_hdr_len(int fd, int len);
-int tap_get_fd(VLANClientState *vc);
+int tap_get_fd(VLANClientState *vc, int index);
struct vhost_net;
struct vhost_net *tap_get_vhost_net(VLANClientState *vc);
diff -ruNp org/net/tap-linux.c new/net/tap-linux.c
--- org/net/tap-linux.c 2011-04-05 14:15:18.000000000 +0530
+++ new/net/tap-linux.c 2011-04-05 14:15:18.000000000 +0530
@@ -82,6 +82,48 @@ int tap_open(char *ifname, int ifname_si
return fd;
}
+int vtap_open(const char *devname, int *vnet_hdr, int vnet_hdr_required)
+{
+ struct ifreq ifr;
+ int fd, ret;
+
+ TFR(fd = open(devname, O_RDWR));
+ if (fd < 0) {
+ fprintf(stderr, "warning: could not open %s: no virtual network emulation\n", devname);
+ return -1;
+ }
+ memset(&ifr, 0, sizeof(ifr));
+ ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+
+ if (*vnet_hdr) {
+ unsigned int features;
+
+ if (ioctl(fd, TUNGETFEATURES, &features) == 0 &&
+ features & IFF_VNET_HDR) {
+ *vnet_hdr = 1;
+ ifr.ifr_flags |= IFF_VNET_HDR;
+ } else {
+ *vnet_hdr = 0;
+ }
+
+ if (vnet_hdr_required && !*vnet_hdr) {
+ error_report("vnet_hdr=1 requested, but no kernel "
+ "support for IFF_VNET_HDR available");
+ close(fd);
+ return -1;
+ }
+ }
+
+ ret = ioctl(fd, TUNSETIFF, (void *) &ifr);
+ if (ret != 0) {
+ fprintf(stderr, "warning: could not configure %s: no virtual network emulation\n", devname);
+ close(fd);
+ return -1;
+ }
+ fcntl(fd, F_SETFL, O_NONBLOCK);
+ return fd;
+}
+
/* sndbuf implements a kind of flow control for tap.
* Unfortunately when it's enabled, and packets are sent
* to other guests on the same host, the receiver
diff -ruNp org/net.c new/net.c
--- org/net.c 2011-04-05 14:15:18.000000000 +0530
+++ new/net.c 2011-04-05 14:15:18.000000000 +0530
@@ -798,6 +798,16 @@ static int net_init_nic(QemuOpts *opts,
return -1;
}
+ if (nd->netdev->numtxqs > 1 && nd->nvectors == DEV_NVECTORS_UNSPECIFIED) {
+ /*
+ * User specified mq for guest, but no "vectors=", tune
+ * it automatically to 'numtxqs' TX + 'numtxqs' RX + 1 controlq.
+ */
+ nd->nvectors = nd->netdev->numtxqs * 2 + 1;
+ monitor_printf(mon, "nvectors tuned to %d\n", nd->nvectors);
+ }
+
+
nd->used = 1;
nb_nics++;
@@ -941,6 +951,18 @@ static const struct {
},
#ifndef _WIN32
{
+ .name = "vtap",
+ .type = QEMU_OPT_STRING,
+ .help = "name of macvtap device to use",
+ }, {
+ .name = "mq",
+ .type = QEMU_OPT_BOOL,
+ .help = "enable multiqueue on network i/f",
+ }, {
+ .name = "numtxqs",
+ .type = QEMU_OPT_NUMBER,
+ .help = "optional number of RX/TX queues, if mq is enabled",
+ }, {
.name = "fd",
.type = QEMU_OPT_STRING,
.help = "file descriptor of an already opened tap",
diff -ruNp org/net.h new/net.h
--- org/net.h 2011-04-05 14:15:18.000000000 +0530
+++ new/net.h 2011-04-05 14:15:18.000000000 +0530
@@ -64,6 +64,7 @@ struct VLANClientState {
struct VLANState *vlan;
VLANClientState *peer;
NetQueue *send_queue;
+ int numtxqs;
char *model;
char *name;
char info_str[256];
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists