[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1369994005-5943-8-git-send-email-jasowang@redhat.com>
Date: Fri, 31 May 2013 17:53:24 +0800
From: Jason Wang <jasowang@...hat.com>
To: davem@...emloft.net, netdev@...r.kernel.org,
linux-kernel@...r.kernel.org, mst@...hat.com
Cc: Jason Wang <jasowang@...hat.com>
Subject: [net-next rfc V2 7/8] macvtap: add TUNSETQUEUE ioctl
This patch adds TUNSETQUEUE ioctl to let userspace can temporarily disable or
enable a queue of macvtap. This is used to be compatible at API layer of tuntap
to simplify the userspace to manage the queues.
This is done by split the taps array into three different areas:
- [0, numvtaps) : enabled taps
- [numvtaps, numvtaps + numdisabled) : disabled taps
- [numvtaps + numdisabled, MAX_MAXVTAP_QUEUES) : unused slots
When a tap were enabled and disabled, it was moved to another area.
Signed-off-by: Jason Wang <jasowang@...hat.com>
---
drivers/net/macvtap.c | 167 ++++++++++++++++++++++++++++++++++++++++----
include/linux/if_macvlan.h | 7 ++
2 files changed, 159 insertions(+), 15 deletions(-)
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index eac49cb..03b781c 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -85,32 +85,126 @@ static const struct proto_ops macvtap_socket_ops;
*/
static DEFINE_SPINLOCK(macvtap_lock);
-static int macvtap_set_queue(struct net_device *dev, struct file *file,
+static void macvtap_swap_slot(struct macvlan_dev *vlan, int a, int b)
+{
+ struct macvtap_queue *q1, *q2;
+
+ if (a == b)
+ return;
+
+ q1 = rcu_dereference_protected(vlan->taps[a],
+ lockdep_is_held(&macvtap_lock));
+ q2 = rcu_dereference_protected(vlan->taps[b],
+ lockdep_is_held(&macvtap_lock));
+
+ BUG_ON(q1 == NULL || q2 == NULL);
+
+ rcu_assign_pointer(vlan->taps[a], q2);
+ rcu_assign_pointer(vlan->taps[b], q1);
+
+ q1->queue_index = b;
+ q2->queue_index = a;
+}
+
+static int macvtap_enable_queue(struct net_device *dev, struct file *file,
struct macvtap_queue *q)
{
struct macvlan_dev *vlan = netdev_priv(dev);
+ int err = -EINVAL;
+ int total;
+
+ spin_lock(&macvtap_lock);
+ total = vlan->numvtaps + vlan->numdisabled;
+
+ if (q->queue_index < vlan->numvtaps)
+ goto out;
+
+ err = 0;
+
+ BUG_ON(q->queue_index >= total);
+ macvtap_swap_slot(vlan, q->queue_index, vlan->numvtaps);
+
+ /* Make sure the pointers were seen before indices. */
+ wmb();
+
+ vlan->numdisabled--;
+ vlan->numvtaps++;
+out:
+ spin_unlock(&macvtap_lock);
+ return err;
+}
+
+static int macvtap_set_queue(struct net_device *dev, struct file *file,
+ struct macvtap_queue *q)
+{
+ struct macvlan_dev *vlan = netdev_priv(dev);
int err = -EBUSY;
+ int total;
spin_lock(&macvtap_lock);
- if (vlan->numvtaps == MAX_MACVTAP_QUEUES)
+
+ total = vlan->numvtaps + vlan->numdisabled;
+ if (total == MAX_MACVTAP_QUEUES)
goto out;
err = 0;
+
rcu_assign_pointer(q->vlan, vlan);
- rcu_assign_pointer(vlan->taps[vlan->numvtaps], q);
+ rcu_assign_pointer(vlan->taps[total], q);
sock_hold(&q->sk);
q->file = file;
- q->queue_index = vlan->numvtaps;
+ q->queue_index = total;
file->private_data = q;
+ if (vlan->numdisabled)
+ macvtap_swap_slot(vlan, vlan->numvtaps, total);
- vlan->numvtaps++;
+ /* Make sure the pointers were seen before indices. */
+ wmb();
+ vlan->numvtaps++;
out:
spin_unlock(&macvtap_lock);
return err;
}
+static int macvtap_disable_queue(struct macvtap_queue *q)
+{
+ struct macvlan_dev *vlan;
+ int err = -EINVAL;
+
+ spin_lock(&macvtap_lock);
+ vlan = rcu_dereference_protected(q->vlan,
+ lockdep_is_held(&macvtap_lock));
+
+ if (vlan) {
+ int total = vlan->numvtaps + vlan->numdisabled;
+ int index = q->queue_index;
+
+ BUG_ON(q->queue_index >= total);
+ if (q->queue_index >= vlan->numvtaps)
+ goto out;
+
+ err = 0;
+ macvtap_swap_slot(vlan, index, total - 1);
+ if (vlan->numdisabled)
+ /* If there's disabled taps, the above swap will cause
+ * a disabled tap to be moved to enabled area. So
+ * another swap is needed to keep the right order.
+ */
+ macvtap_swap_slot(vlan, index, vlan->numvtaps - 1);
+
+ /* make sure the pointers were seen before indices */
+ wmb();
+
+ vlan->numvtaps--;
+ vlan->numdisabled++;
+ }
+
+out:
+ spin_unlock(&macvtap_lock);
+ return err;
+}
/*
* The file owning the queue got closed, give up both
* the reference that the files holds as well as the
@@ -121,25 +215,38 @@ out:
*/
static void macvtap_put_queue(struct macvtap_queue *q)
{
- struct macvtap_queue *nq;
struct macvlan_dev *vlan;
spin_lock(&macvtap_lock);
vlan = rcu_dereference_protected(q->vlan,
lockdep_is_held(&macvtap_lock));
+
if (vlan) {
+ int total = vlan->numvtaps + vlan->numdisabled;
int index = q->queue_index;
- BUG_ON(index >= vlan->numvtaps);
+ bool disabled = q->queue_index >= vlan->numvtaps;
+
+ BUG_ON(q->queue_index >= total);
+ macvtap_swap_slot(vlan, index, total - 1);
+ if (!disabled && vlan->numdisabled)
+ /* If there's disabled taps, the above swap will cause
+ * a disabled tap to be moved to enabled area. So
+ * another swap is needed to keep the right order.
+ */
+ macvtap_swap_slot(vlan, index, vlan->numvtaps - 1);
+
+ RCU_INIT_POINTER(vlan->taps[total - 1], NULL);
+ RCU_INIT_POINTER(q->vlan, NULL);
+ sock_put(&q->sk);
- nq = rcu_dereference_protected(vlan->taps[vlan->numvtaps - 1],
- lockdep_is_held(&macvtap_lock));
- rcu_assign_pointer(vlan->taps[index], nq);
- nq->queue_index = index;
+ /* Make sure the pointers were seen before indices */
+ wmb();
- RCU_INIT_POINTER(q->vlan, NULL);
+ if (disabled)
+ vlan->numdisabled--;
+ else
+ vlan->numvtaps--;
- sock_put(&q->sk);
- --vlan->numvtaps;
}
spin_unlock(&macvtap_lock);
@@ -166,6 +273,9 @@ static struct macvtap_queue *macvtap_get_queue(struct net_device *dev,
if (!numvtaps)
goto out;
+ /* Check taps after numvtaps were exposed. */
+ rmb();
+
/* Check if we can use flow to select a queue */
rxq = skb_get_rxhash(skb);
if (rxq) {
@@ -201,7 +311,7 @@ static void macvtap_del_queues(struct net_device *dev)
/* macvtap_put_queue can free some slots, so go through all slots */
spin_lock(&macvtap_lock);
- for (i = 0; i < vlan->numvtaps; i++) {
+ for (i = 0; i < vlan->numvtaps + vlan->numdisabled; i++) {
q = rcu_dereference_protected(vlan->taps[i],
lockdep_is_held(&macvtap_lock));
BUG_ON(q == NULL);
@@ -211,6 +321,7 @@ static void macvtap_del_queues(struct net_device *dev)
}
/* guarantee that any future macvtap_set_queue will fail */
vlan->numvtaps = MAX_MACVTAP_QUEUES;
+ vlan->numdisabled = 0;
spin_unlock(&macvtap_lock);
synchronize_rcu();
@@ -927,6 +1038,27 @@ static int macvtap_set_iff(struct file *file, struct ifreq __user *ifr_u)
return 0;
}
+static int macvtap_ioctl_set_queue(struct file *file, unsigned int flags)
+{
+ struct macvtap_queue *q = file->private_data;
+ struct macvlan_dev *vlan;
+ int ret = -EINVAL;
+
+ vlan = macvtap_get_vlan(q);
+ if (!vlan)
+ goto done;
+
+ if (flags & IFF_ATTACH_QUEUE)
+ ret = macvtap_enable_queue(vlan->dev, file, q);
+ else if (flags & IFF_DETACH_QUEUE)
+ ret = macvtap_disable_queue(q);
+
+ macvtap_put_vlan(vlan);
+
+done:
+ return ret;
+}
+
/*
* provide compatibility with generic tun/tap interface
*/
@@ -959,6 +1091,11 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd,
macvtap_put_vlan(vlan);
return ret;
+ case TUNSETQUEUE:
+ if (get_user(u, &ifr->ifr_flags))
+ return -EFAULT;
+ return macvtap_ioctl_set_queue(file, u);
+
case TUNGETFEATURES:
if (put_user(IFF_TAP | IFF_NO_PI | IFF_VNET_HDR, up))
return -EFAULT;
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 62d8bda..d528f38 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -69,8 +69,15 @@ struct macvlan_dev {
u16 flags;
int (*receive)(struct sk_buff *skb);
int (*forward)(struct net_device *dev, struct sk_buff *skb);
+ /* This array tracks all taps (include disabled ones) and will be
+ * reshuffled to keep the following order:
+ * [0, numvtaps) : enabled taps,
+ * [numvtaps, numvtaps + numdisabled) : disabled taps,
+ * [numvtaps + numdisabled, MAX_MACVTAP_QUEUES) : unused slots
+ */
struct macvtap_queue *taps[MAX_MACVTAP_QUEUES];
int numvtaps;
+ int numdisabled;
int minor;
};
--
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists