[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20180605034231.31610-1-sthemmin@microsoft.com>
Date: Mon, 4 Jun 2018 20:42:31 -0700
From: Stephen Hemminger <stephen@...workplumber.org>
To: kys@...rosoft.com, haiyangz@...rosoft.com, davem@...emloft.net,
mst@...hat.com, sridhar.samudrala@...el.com
Cc: netdev@...r.kernel.org, Stephen Hemminger <sthemmin@...rosoft.com>
Subject: [PATCH net] failover: eliminate callback hell
The net failover should be a simple library, not a virtual
object with function callbacks (see callback hell).
The code is simpler is smaller both for the netvsc and virtio use case.
The code is restructured in many ways. I should have given these
as review comments to net_failover during review
but did not want to overwhelm the original submitter.
Therefore it was merged prematurely.
Some of the many items changed are:
* The support routines should just be selected as needed in
kernel config, no need for them to be visible config items.
* Both netvsc and net_failover should keep their list of their
own devices. Not a common list.
* The matching of secondary device to primary device policy
is up to the network device. Both net_failover and netvsc
will use MAC for now but can change separately.
* The match policy is only used during initial discovery; after
that the secondary device knows what the upper device is because
of the parent/child relationship; no searching is required.
* Now, netvsc and net_failover use the same delayed work type
mechanism for setup. Previously, net_failover code was triggering off
name change but a similar policy was rejected for netvsc.
"what is good for the goose is good for the gander"
* The net_failover private device info 'struct net_failover_info'
should have been private to the driver file, not a visible
API.
* The net_failover device should use SET_NETDEV_DEV
that is intended only for physical devices not virtual devices.
* No point in having DocBook style comments on a driver file.
They only make sense on an external exposed API.
* net_failover only supports Ethernet, so use ether_addr_copy.
* Set permanent and current address of net_failover device
to match the primary.
* Carrier should be marked off before registering device
the net_failover device.
* Use netdev_XXX for log messages, in net_failover (not dev_xxx)
* Since failover infrastructure is about linking devices just
use RTNL no need for other locking in init and teardown.
* Don't bother with ERR_PTR() style return if only possible
return is success or no memory.
* As much as possible, the terms master and slave should be avoided
because of their cultural connotations.
Note; this code has been tested on Hyper-V
but is compile tested only on virtio.
Fixes: 30c8bd5aa8b2 ("net: Introduce generic failover module")
Signed-off-by: Stephen Hemminger <sthemmin@...rosoft.com>
---
Although this patch needs to go into 4.18 (linux-net),
this version is based against net-next because net-next
hasn't been merged into linux-net yet.
drivers/net/hyperv/hyperv_net.h | 3 +-
drivers/net/hyperv/netvsc_drv.c | 173 +++++++++++------
drivers/net/net_failover.c | 312 ++++++++++++++++++++-----------
drivers/net/virtio_net.c | 9 +-
include/net/failover.h | 31 +---
include/net/net_failover.h | 32 +---
net/Kconfig | 13 +-
net/core/failover.c | 316 ++++----------------------------
8 files changed, 373 insertions(+), 516 deletions(-)
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 99d8e7398a5b..c7d25d10765e 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -902,6 +902,8 @@ struct net_device_context {
struct hv_device *device_ctx;
/* netvsc_device */
struct netvsc_device __rcu *nvdev;
+ /* list of netvsc net_devices */
+ struct list_head list;
/* reconfigure work */
struct delayed_work dwork;
/* last reconfig time */
@@ -933,7 +935,6 @@ struct net_device_context {
/* Serial number of the VF to team with */
u32 vf_serial;
- struct failover *failover;
};
/* Per channel data */
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index bef4d55a108c..074e6b8578df 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -70,6 +70,8 @@ static int debug = -1;
module_param(debug, int, 0444);
MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+static LIST_HEAD(netvsc_dev_list);
+
static void netvsc_change_rx_flags(struct net_device *net, int change)
{
struct net_device_context *ndev_ctx = netdev_priv(net);
@@ -1846,101 +1848,120 @@ static void netvsc_vf_setup(struct work_struct *w)
}
vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
- if (vf_netdev)
+ if (vf_netdev) {
__netvsc_vf_setup(ndev, vf_netdev);
-
+ dev_put(vf_netdev);
+ }
rtnl_unlock();
}
-static int netvsc_pre_register_vf(struct net_device *vf_netdev,
- struct net_device *ndev)
+static struct net_device *get_netvsc_bymac(const u8 *mac)
{
- struct net_device_context *net_device_ctx;
- struct netvsc_device *netvsc_dev;
+ struct net_device_context *ndev_ctx;
- net_device_ctx = netdev_priv(ndev);
- netvsc_dev = rtnl_dereference(net_device_ctx->nvdev);
- if (!netvsc_dev || rtnl_dereference(net_device_ctx->vf_netdev))
- return -ENODEV;
+ ASSERT_RTNL();
- return 0;
+ list_for_each_entry(ndev_ctx, &netvsc_dev_list, list) {
+ struct net_device *dev = hv_get_drvdata(ndev_ctx->device_ctx);
+
+ if (ether_addr_equal(mac, dev->perm_addr))
+ return dev;
+ }
+
+ return NULL;
}
-static int netvsc_register_vf(struct net_device *vf_netdev,
- struct net_device *ndev)
+static int netvsc_register_vf(struct net_device *vf_netdev)
{
- struct net_device_context *ndev_ctx = netdev_priv(ndev);
+ struct net_device *ndev;
+ struct net_device_context *ndev_ctx;
+
+ /* Must use Ethernet addresses */
+ if (vf_netdev->addr_len != ETH_ALEN)
+ return NOTIFY_DONE;
+
+ /* VF must be a physical device not VLAN, etc */
+ if (!vf_netdev->dev.parent)
+ return NOTIFY_DONE;
+
+ /* Use the MAC address to locate the synthetic interface to
+ * associate with the VF interface.
+ */
+ ndev = get_netvsc_bymac(vf_netdev->perm_addr);
+ if (!ndev)
+ return NOTIFY_DONE;
+
+ /* If network device is being removed, don't do anything */
+ ndev_ctx = netdev_priv(ndev);
+ if (!rtnl_dereference(ndev_ctx->nvdev))
+ return NOTIFY_DONE;
+
+ if (netdev_failover_join(vf_netdev, ndev, netvsc_vf_handle_frame)) {
+ netdev_err(vf_netdev, "could not join: %s", ndev->name);
+ return NOTIFY_DONE;
+ }
/* set slave flag before open to prevent IPv6 addrconf */
vf_netdev->flags |= IFF_SLAVE;
+ dev_hold(vf_netdev);
+
schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT);
call_netdevice_notifiers(NETDEV_JOIN, vf_netdev);
netdev_info(vf_netdev, "joined to %s\n", ndev->name);
- dev_hold(vf_netdev);
rcu_assign_pointer(ndev_ctx->vf_netdev, vf_netdev);
- return 0;
+ return NOTIFY_OK;
}
/* VF up/down change detected, schedule to change data path */
-static int netvsc_vf_changed(struct net_device *vf_netdev,
- struct net_device *ndev)
+static int netvsc_vf_changed(struct net_device *vf_netdev)
{
struct net_device_context *net_device_ctx;
struct netvsc_device *netvsc_dev;
+ struct net_device *ndev;
bool vf_is_up = netif_running(vf_netdev);
+ ndev = netdev_failover_upper_get(vf_netdev);
+ if (!ndev)
+ return NOTIFY_DONE;
+
net_device_ctx = netdev_priv(ndev);
netvsc_dev = rtnl_dereference(net_device_ctx->nvdev);
if (!netvsc_dev)
- return -ENODEV;
+ return NOTIFY_DONE;
netvsc_switch_datapath(ndev, vf_is_up);
netdev_info(ndev, "Data path switched %s VF: %s\n",
vf_is_up ? "to" : "from", vf_netdev->name);
- return 0;
+ return NOTIFY_OK;
}
-static int netvsc_pre_unregister_vf(struct net_device *vf_netdev,
- struct net_device *ndev)
+static int netvsc_unregister_vf(struct net_device *vf_netdev)
{
struct net_device_context *net_device_ctx;
+ struct net_device *ndev;
- net_device_ctx = netdev_priv(ndev);
- cancel_delayed_work_sync(&net_device_ctx->vf_takeover);
-
- return 0;
-}
-
-static int netvsc_unregister_vf(struct net_device *vf_netdev,
- struct net_device *ndev)
-{
- struct net_device_context *net_device_ctx;
+ ndev = netdev_failover_upper_get(vf_netdev);
+ if (!ndev)
+ return NOTIFY_DONE;
net_device_ctx = netdev_priv(ndev);
+ if (cancel_delayed_work_sync(&net_device_ctx->vf_takeover))
+ dev_put(vf_netdev);
netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
+ netdev_failover_unjoin(vf_netdev, ndev);
RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL);
- dev_put(vf_netdev);
- return 0;
+ return NOTIFY_OK;
}
-static struct failover_ops netvsc_failover_ops = {
- .slave_pre_register = netvsc_pre_register_vf,
- .slave_register = netvsc_register_vf,
- .slave_pre_unregister = netvsc_pre_unregister_vf,
- .slave_unregister = netvsc_unregister_vf,
- .slave_link_change = netvsc_vf_changed,
- .slave_handle_frame = netvsc_vf_handle_frame,
-};
-
static int netvsc_probe(struct hv_device *dev,
const struct hv_vmbus_device_id *dev_id)
{
@@ -2009,6 +2030,8 @@ static int netvsc_probe(struct hv_device *dev,
memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN);
+ net->priv_flags |= IFF_FAILOVER;
+
/* hw_features computed in rndis_netdev_set_hwcaps() */
net->features = net->hw_features |
NETIF_F_HIGHDMA | NETIF_F_SG |
@@ -2024,23 +2047,19 @@ static int netvsc_probe(struct hv_device *dev,
else
net->max_mtu = ETH_DATA_LEN;
- ret = register_netdev(net);
+ rtnl_lock();
+ ret = register_netdevice(net);
if (ret != 0) {
pr_err("Unable to register netdev.\n");
goto register_failed;
}
- net_device_ctx->failover = failover_register(net, &netvsc_failover_ops);
- if (IS_ERR(net_device_ctx->failover)) {
- ret = PTR_ERR(net_device_ctx->failover);
- goto err_failover;
- }
-
- return ret;
+ list_add(&net_device_ctx->list, &netvsc_dev_list);
+ rtnl_unlock();
+ return 0;
-err_failover:
- unregister_netdev(net);
register_failed:
+ rtnl_unlock();
rndis_filter_device_remove(dev, nvdev);
rndis_failed:
free_percpu(net_device_ctx->vf_stats);
@@ -2079,15 +2098,17 @@ static int netvsc_remove(struct hv_device *dev)
*/
rtnl_lock();
vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
- if (vf_netdev)
- failover_slave_unregister(vf_netdev);
+ if (vf_netdev) {
+ netdev_failover_unjoin(vf_netdev, net);
+ dev_put(vf_netdev);
+ }
if (nvdev)
rndis_filter_device_remove(dev, nvdev);
unregister_netdevice(net);
- failover_unregister(ndev_ctx->failover);
+ list_del(&ndev_ctx->list);
rtnl_unlock();
rcu_read_unlock();
@@ -2115,8 +2136,47 @@ static struct hv_driver netvsc_drv = {
.remove = netvsc_remove,
};
+/* On Hyper-V, every VF interface is matched with a corresponding
+ * synthetic interface. The synthetic interface is presented first
+ * to the guest. When the corresponding VF instance is registered,
+ * we will take care of switching the data path.
+ */
+static int netvsc_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
+
+ /* Skip parent events */
+ if (netif_is_failover(event_dev))
+ return NOTIFY_DONE;
+
+ /* Avoid non-Ethernet type devices */
+ if (event_dev->type != ARPHRD_ETHER)
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ return netvsc_register_vf(event_dev);
+
+ case NETDEV_UNREGISTER:
+ return netvsc_unregister_vf(event_dev);
+
+ case NETDEV_UP:
+ case NETDEV_DOWN:
+ return netvsc_vf_changed(event_dev);
+
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
+static struct notifier_block netvsc_netdev_notifier = {
+ .notifier_call = netvsc_netdev_event,
+};
+
static void __exit netvsc_drv_exit(void)
{
+ unregister_netdevice_notifier(&netvsc_netdev_notifier);
vmbus_driver_unregister(&netvsc_drv);
}
@@ -2136,6 +2196,7 @@ static int __init netvsc_drv_init(void)
if (ret)
return ret;
+ register_netdevice_notifier(&netvsc_netdev_notifier);
return 0;
}
diff --git a/drivers/net/net_failover.c b/drivers/net/net_failover.c
index 83f7420ddea5..e0d30527f748 100644
--- a/drivers/net/net_failover.c
+++ b/drivers/net/net_failover.c
@@ -28,6 +28,46 @@
#include <uapi/linux/if_arp.h>
#include <net/net_failover.h>
+static LIST_HEAD(net_failover_list);
+
+/* failover state */
+struct net_failover_info {
+ struct net_device *failover_dev;
+
+ /* list of failover virtual devices */
+ struct list_head list;
+
+ /* primary netdev with same MAC */
+ struct net_device __rcu *primary_dev;
+
+ /* standby netdev */
+ struct net_device __rcu *standby_dev;
+
+ /* primary netdev stats */
+ struct rtnl_link_stats64 primary_stats;
+
+ /* standby netdev stats */
+ struct rtnl_link_stats64 standby_stats;
+
+ /* aggregated stats */
+ struct rtnl_link_stats64 failover_stats;
+
+ /* spinlock while updating stats */
+ spinlock_t stats_lock;
+
+ /* delayed setup of slave */
+ struct delayed_work standby_init;
+};
+
+#define FAILOVER_VLAN_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \
+ NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \
+ NETIF_F_HIGHDMA | NETIF_F_LRO)
+
+#define FAILOVER_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \
+ NETIF_F_RXCSUM | NETIF_F_ALL_TSO)
+
+#define FAILOVER_SETUP_INTERVAL (HZ / 10)
+
static bool net_failover_xmit_ready(struct net_device *dev)
{
return netif_running(dev) && netif_carrier_ok(dev);
@@ -460,22 +500,42 @@ static void net_failover_lower_state_changed(struct net_device *slave_dev,
netdev_lower_state_changed(slave_dev, &info);
}
-static int net_failover_slave_pre_register(struct net_device *slave_dev,
- struct net_device *failover_dev)
+static struct net_device *get_net_failover_bymac(const u8 *mac)
{
- struct net_device *standby_dev, *primary_dev;
+ struct net_failover_info *nfo_info;
+
+ ASSERT_RTNL();
+
+ list_for_each_entry(nfo_info, &net_failover_list, list) {
+ struct net_device *failover_dev = nfo_info->failover_dev;
+
+ if (ether_addr_equal(mac, failover_dev->perm_addr))
+ return failover_dev;
+ }
+
+ return NULL;
+}
+
+static int net_failover_register_event(struct net_device *slave_dev)
+{
+ struct net_device *failover_dev, *standby_dev, *primary_dev;
struct net_failover_info *nfo_info;
bool slave_is_standby;
+ failover_dev = get_net_failover_bymac(slave_dev->perm_addr);
+ if (!failover_dev)
+ return NOTIFY_DONE;
+
nfo_info = netdev_priv(failover_dev);
standby_dev = rtnl_dereference(nfo_info->standby_dev);
primary_dev = rtnl_dereference(nfo_info->primary_dev);
slave_is_standby = slave_dev->dev.parent == failover_dev->dev.parent;
if (slave_is_standby ? standby_dev : primary_dev) {
- netdev_err(failover_dev, "%s attempting to register as slave dev when %s already present\n",
+ netdev_err(failover_dev,
+ "%s attempting to register as slave dev when %s already present\n",
slave_dev->name,
slave_is_standby ? "standby" : "primary");
- return -EINVAL;
+ return NOTIFY_DONE;
}
/* We want to allow only a direct attached VF device as a primary
@@ -484,23 +544,33 @@ static int net_failover_slave_pre_register(struct net_device *slave_dev,
*/
if (!slave_is_standby && (!slave_dev->dev.parent ||
!dev_is_pci(slave_dev->dev.parent)))
- return -EINVAL;
+ return NOTIFY_DONE;
if (failover_dev->features & NETIF_F_VLAN_CHALLENGED &&
vlan_uses_dev(failover_dev)) {
- netdev_err(failover_dev, "Device %s is VLAN challenged and failover device has VLAN set up\n",
+ netdev_err(failover_dev,
+ "Device %s is VLAN challenged and failover device has VLAN set up\n",
failover_dev->name);
- return -EINVAL;
+ return NOTIFY_DONE;
}
- return 0;
+ if (netdev_failover_join(slave_dev, failover_dev,
+ net_failover_handle_frame)) {
+ netdev_err(failover_dev, "could not join: %s", slave_dev->name);
+ return NOTIFY_DONE;
+ }
+
+ /* Trigger rest of setup in process context */
+ schedule_delayed_work(&nfo_info->standby_init, FAILOVER_SETUP_INTERVAL);
+
+ return NOTIFY_OK;
}
-static int net_failover_slave_register(struct net_device *slave_dev,
- struct net_device *failover_dev)
+static void __net_failover_setup(struct net_device *failover_dev)
{
+ struct net_failover_info *nfo_info = netdev_priv(failover_dev);
+ struct net_device *slave_dev = rtnl_dereference(nfo_info->standby_dev);
struct net_device *standby_dev, *primary_dev;
- struct net_failover_info *nfo_info;
bool slave_is_standby;
u32 orig_mtu;
int err;
@@ -509,13 +579,12 @@ static int net_failover_slave_register(struct net_device *slave_dev,
orig_mtu = slave_dev->mtu;
err = dev_set_mtu(slave_dev, failover_dev->mtu);
if (err) {
- netdev_err(failover_dev, "unable to change mtu of %s to %u register failed\n",
+ netdev_err(failover_dev,
+ "unable to change mtu of %s to %u register failed\n",
slave_dev->name, failover_dev->mtu);
goto done;
}
- dev_hold(slave_dev);
-
if (netif_running(failover_dev)) {
err = dev_open(slave_dev);
if (err && (err != -EBUSY)) {
@@ -537,7 +606,6 @@ static int net_failover_slave_register(struct net_device *slave_dev,
goto err_vlan_add;
}
- nfo_info = netdev_priv(failover_dev);
standby_dev = rtnl_dereference(nfo_info->standby_dev);
primary_dev = rtnl_dereference(nfo_info->primary_dev);
slave_is_standby = slave_dev->dev.parent == failover_dev->dev.parent;
@@ -562,52 +630,56 @@ static int net_failover_slave_register(struct net_device *slave_dev,
netdev_info(failover_dev, "failover %s slave:%s registered\n",
slave_is_standby ? "standby" : "primary", slave_dev->name);
- return 0;
+ return;
err_vlan_add:
dev_uc_unsync(slave_dev, failover_dev);
dev_mc_unsync(slave_dev, failover_dev);
dev_close(slave_dev);
err_dev_open:
- dev_put(slave_dev);
dev_set_mtu(slave_dev, orig_mtu);
done:
- return err;
+ return;
}
-static int net_failover_slave_pre_unregister(struct net_device *slave_dev,
- struct net_device *failover_dev)
+static void net_failover_setup(struct work_struct *w)
{
- struct net_device *standby_dev, *primary_dev;
- struct net_failover_info *nfo_info;
+ struct net_failover_info *nfo_info
+ = container_of(w, struct net_failover_info, standby_init.work);
+ struct net_device *failover_dev = nfo_info->failover_dev;
- nfo_info = netdev_priv(failover_dev);
- primary_dev = rtnl_dereference(nfo_info->primary_dev);
- standby_dev = rtnl_dereference(nfo_info->standby_dev);
-
- if (slave_dev != primary_dev && slave_dev != standby_dev)
- return -ENODEV;
+ /* handle race with cancel delayed work on removal */
+ if (!rtnl_trylock()) {
+ schedule_delayed_work(&nfo_info->standby_init, 0);
+ return;
+ }
- return 0;
+ __net_failover_setup(failover_dev);
+ rtnl_unlock();
}
-static int net_failover_slave_unregister(struct net_device *slave_dev,
- struct net_device *failover_dev)
+static int net_failover_unregister_event(struct net_device *slave_dev)
{
- struct net_device *standby_dev, *primary_dev;
+ struct net_device *failover_dev, *primary_dev, *standby_dev;
struct net_failover_info *nfo_info;
bool slave_is_standby;
+ failover_dev = netdev_failover_upper_get(slave_dev);
+ if (!failover_dev)
+ return NOTIFY_DONE;
+
nfo_info = netdev_priv(failover_dev);
primary_dev = rtnl_dereference(nfo_info->primary_dev);
standby_dev = rtnl_dereference(nfo_info->standby_dev);
+ if (slave_dev != primary_dev && slave_dev != standby_dev)
+ return NOTIFY_DONE;
+
vlan_vids_del_by_dev(slave_dev, failover_dev);
dev_uc_unsync(slave_dev, failover_dev);
dev_mc_unsync(slave_dev, failover_dev);
dev_close(slave_dev);
- nfo_info = netdev_priv(failover_dev);
dev_get_stats(failover_dev, &nfo_info->failover_stats);
slave_is_standby = slave_dev->dev.parent == failover_dev->dev.parent;
@@ -628,22 +700,25 @@ static int net_failover_slave_unregister(struct net_device *slave_dev,
netdev_info(failover_dev, "failover %s slave:%s unregistered\n",
slave_is_standby ? "standby" : "primary", slave_dev->name);
- return 0;
+ return NOTIFY_OK;
}
-static int net_failover_slave_link_change(struct net_device *slave_dev,
- struct net_device *failover_dev)
+static int net_failover_link_event(struct net_device *slave_dev)
+
{
- struct net_device *primary_dev, *standby_dev;
+ struct net_device *failover_dev, *primary_dev, *standby_dev;
struct net_failover_info *nfo_info;
- nfo_info = netdev_priv(failover_dev);
+ failover_dev = netdev_failover_upper_get(slave_dev);
+ if (!failover_dev)
+ return NOTIFY_DONE;
+ nfo_info = netdev_priv(failover_dev);
primary_dev = rtnl_dereference(nfo_info->primary_dev);
standby_dev = rtnl_dereference(nfo_info->standby_dev);
if (slave_dev != primary_dev && slave_dev != standby_dev)
- return -ENODEV;
+ return NOTIFY_DONE;
if ((primary_dev && net_failover_xmit_ready(primary_dev)) ||
(standby_dev && net_failover_xmit_ready(standby_dev))) {
@@ -657,43 +732,11 @@ static int net_failover_slave_link_change(struct net_device *slave_dev,
net_failover_lower_state_changed(slave_dev, primary_dev, standby_dev);
- return 0;
+ return NOTIFY_DONE;
}
-static int net_failover_slave_name_change(struct net_device *slave_dev,
- struct net_device *failover_dev)
-{
- struct net_device *primary_dev, *standby_dev;
- struct net_failover_info *nfo_info;
-
- nfo_info = netdev_priv(failover_dev);
-
- primary_dev = rtnl_dereference(nfo_info->primary_dev);
- standby_dev = rtnl_dereference(nfo_info->standby_dev);
-
- if (slave_dev != primary_dev && slave_dev != standby_dev)
- return -ENODEV;
-
- /* We need to bring up the slave after the rename by udev in case
- * open failed with EBUSY when it was registered.
- */
- dev_open(slave_dev);
-
- return 0;
-}
-
-static struct failover_ops net_failover_ops = {
- .slave_pre_register = net_failover_slave_pre_register,
- .slave_register = net_failover_slave_register,
- .slave_pre_unregister = net_failover_slave_pre_unregister,
- .slave_unregister = net_failover_slave_unregister,
- .slave_link_change = net_failover_slave_link_change,
- .slave_name_change = net_failover_slave_name_change,
- .slave_handle_frame = net_failover_handle_frame,
-};
-
/**
- * net_failover_create - Create and register a failover instance
+ * net_failover_create - Create and register a failover device
*
* @dev: standby netdev
*
@@ -703,13 +746,12 @@ static struct failover_ops net_failover_ops = {
* the original standby netdev and a VF netdev with the same MAC gets
* registered as primary netdev.
*
- * Return: pointer to failover instance
+ * Return: pointer to failover network device
*/
-struct failover *net_failover_create(struct net_device *standby_dev)
+struct net_device *net_failover_create(struct net_device *standby_dev)
{
- struct device *dev = standby_dev->dev.parent;
+ struct net_failover_info *nfo_info;
struct net_device *failover_dev;
- struct failover *failover;
int err;
/* Alloc at least 2 queues, for now we are going with 16 assuming
@@ -717,18 +759,22 @@ struct failover *net_failover_create(struct net_device *standby_dev)
*/
failover_dev = alloc_etherdev_mq(sizeof(struct net_failover_info), 16);
if (!failover_dev) {
- dev_err(dev, "Unable to allocate failover_netdev!\n");
- return ERR_PTR(-ENOMEM);
+ netdev_err(standby_dev, "Unable to allocate failover_netdev!\n");
+ return NULL;
}
+ nfo_info = netdev_priv(failover_dev);
dev_net_set(failover_dev, dev_net(standby_dev));
- SET_NETDEV_DEV(failover_dev, dev);
+ nfo_info->failover_dev = failover_dev;
+ INIT_DELAYED_WORK(&nfo_info->standby_init, net_failover_setup);
failover_dev->netdev_ops = &failover_dev_ops;
failover_dev->ethtool_ops = &failover_ethtool_ops;
/* Initialize the device options */
- failover_dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE;
+ failover_dev->priv_flags |= IFF_UNICAST_FLT |
+ IFF_NO_QUEUE |
+ IFF_FAILOVER;
failover_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE |
IFF_TX_SKB_SHARING);
@@ -746,29 +792,38 @@ struct failover *net_failover_create(struct net_device *standby_dev)
failover_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL;
failover_dev->features |= failover_dev->hw_features;
- memcpy(failover_dev->dev_addr, standby_dev->dev_addr,
- failover_dev->addr_len);
+ ether_addr_copy(failover_dev->dev_addr, standby_dev->dev_addr);
+ ether_addr_copy(failover_dev->perm_addr, standby_dev->perm_addr);
failover_dev->min_mtu = standby_dev->min_mtu;
failover_dev->max_mtu = standby_dev->max_mtu;
- err = register_netdev(failover_dev);
+ netif_carrier_off(failover_dev);
+
+ rtnl_lock();
+ err = register_netdevice(failover_dev);
if (err) {
- dev_err(dev, "Unable to register failover_dev!\n");
+ netdev_err(standby_dev, "Unable to register failover_dev!\n");
goto err_register_netdev;
}
- netif_carrier_off(failover_dev);
+ err = netdev_failover_join(standby_dev, failover_dev,
+ net_failover_handle_frame);
+ if (err) {
+ netdev_err(failover_dev, "Unable to join with %s\n",
+ standby_dev->name);
+ goto err_failover_join;
+ }
- failover = failover_register(failover_dev, &net_failover_ops);
- if (IS_ERR(failover))
- goto err_failover_register;
+ list_add(&nfo_info->list, &net_failover_list);
+ rtnl_unlock();
- return failover;
+ return failover_dev;
-err_failover_register:
- unregister_netdev(failover_dev);
+err_failover_join:
+ unregister_netdevice(failover_dev);
err_register_netdev:
+ rtnl_unlock();
free_netdev(failover_dev);
return ERR_PTR(err);
@@ -786,31 +841,27 @@ EXPORT_SYMBOL_GPL(net_failover_create);
* netdev. Used by paravirtual drivers that use 3-netdev model.
*
*/
-void net_failover_destroy(struct failover *failover)
+void net_failover_destroy(struct net_device *failover_dev)
{
- struct net_failover_info *nfo_info;
- struct net_device *failover_dev;
+ struct net_failover_info *nfo_info = netdev_priv(failover_dev);
struct net_device *slave_dev;
- if (!failover)
- return;
-
- failover_dev = rcu_dereference(failover->failover_dev);
- nfo_info = netdev_priv(failover_dev);
-
netif_device_detach(failover_dev);
rtnl_lock();
-
slave_dev = rtnl_dereference(nfo_info->primary_dev);
- if (slave_dev)
- failover_slave_unregister(slave_dev);
+ if (slave_dev) {
+ netdev_failover_unjoin(slave_dev, failover_dev);
+ dev_put(slave_dev);
+ }
slave_dev = rtnl_dereference(nfo_info->standby_dev);
- if (slave_dev)
- failover_slave_unregister(slave_dev);
+ if (slave_dev) {
+ netdev_failover_unjoin(slave_dev, failover_dev);
+ dev_put(slave_dev);
+ }
- failover_unregister(failover);
+ list_del(&nfo_info->list);
unregister_netdevice(failover_dev);
@@ -820,9 +871,53 @@ void net_failover_destroy(struct failover *failover)
}
EXPORT_SYMBOL_GPL(net_failover_destroy);
+static int net_failover_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
+
+ /* Skip parent events */
+ if (netif_is_failover(event_dev))
+ return NOTIFY_DONE;
+
+ /* Avoid non-Ethernet type devices */
+ if (event_dev->type != ARPHRD_ETHER)
+ return NOTIFY_DONE;
+
+ /* Avoid Vlan dev with same MAC registering as VF */
+ if (is_vlan_dev(event_dev))
+ return NOTIFY_DONE;
+
+ /* Avoid Bonding master dev with same MAC registering as VF */
+ if ((event_dev->priv_flags & IFF_BONDING) &&
+ (event_dev->flags & IFF_MASTER))
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ return net_failover_register_event(event_dev);
+
+ case NETDEV_UNREGISTER:
+ return net_failover_unregister_event(event_dev);
+
+ case NETDEV_UP:
+ case NETDEV_DOWN:
+ case NETDEV_CHANGE:
+ return net_failover_link_event(event_dev);
+
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
+static struct notifier_block net_failover_notifier = {
+ .notifier_call = net_failover_event,
+};
+
static __init int
net_failover_init(void)
{
+ register_netdevice_notifier(&net_failover_notifier);
return 0;
}
module_init(net_failover_init);
@@ -830,6 +925,7 @@ module_init(net_failover_init);
static __exit
void net_failover_exit(void)
{
+ unregister_netdevice_notifier(&net_failover_notifier);
}
module_exit(net_failover_exit);
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 6d710b8b41c5..b40ae28dac93 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -215,7 +215,7 @@ struct virtnet_info {
unsigned long guest_offloads;
/* failover when STANDBY feature enabled */
- struct failover *failover;
+ struct net_device *failover;
};
struct padded_vnet_hdr {
@@ -2930,11 +2930,10 @@ static int virtnet_probe(struct virtio_device *vdev)
virtnet_init_settings(dev);
if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
- vi->failover = net_failover_create(vi->dev);
- if (IS_ERR(vi->failover)) {
- err = PTR_ERR(vi->failover);
+ err = -ENOMEM;
+ vi->failover = net_failover_create(dev);
+ if (!vi->failover)
goto free_vqs;
- }
}
err = register_netdev(dev);
diff --git a/include/net/failover.h b/include/net/failover.h
index bb15438f39c7..22d6c1369101 100644
--- a/include/net/failover.h
+++ b/include/net/failover.h
@@ -6,31 +6,10 @@
#include <linux/netdevice.h>
-struct failover_ops {
- int (*slave_pre_register)(struct net_device *slave_dev,
- struct net_device *failover_dev);
- int (*slave_register)(struct net_device *slave_dev,
- struct net_device *failover_dev);
- int (*slave_pre_unregister)(struct net_device *slave_dev,
- struct net_device *failover_dev);
- int (*slave_unregister)(struct net_device *slave_dev,
- struct net_device *failover_dev);
- int (*slave_link_change)(struct net_device *slave_dev,
- struct net_device *failover_dev);
- int (*slave_name_change)(struct net_device *slave_dev,
- struct net_device *failover_dev);
- rx_handler_result_t (*slave_handle_frame)(struct sk_buff **pskb);
-};
-
-struct failover {
- struct list_head list;
- struct net_device __rcu *failover_dev;
- struct failover_ops __rcu *ops;
-};
-
-struct failover *failover_register(struct net_device *dev,
- struct failover_ops *ops);
-void failover_unregister(struct failover *failover);
-int failover_slave_unregister(struct net_device *slave_dev);
+int netdev_failover_join(struct net_device *lower, struct net_device *upper,
+ rx_handler_func_t *rx_handler);
+struct net_device *netdev_failover_upper_get(struct net_device *lower);
+void netdev_failover_unjoin(struct net_device *lower,
+ struct net_device *upper);
#endif /* _FAILOVER_H */
diff --git a/include/net/net_failover.h b/include/net/net_failover.h
index b12a1c469d1c..a99b3b00b4e3 100644
--- a/include/net/net_failover.h
+++ b/include/net/net_failover.h
@@ -6,35 +6,7 @@
#include <net/failover.h>
-/* failover state */
-struct net_failover_info {
- /* primary netdev with same MAC */
- struct net_device __rcu *primary_dev;
-
- /* standby netdev */
- struct net_device __rcu *standby_dev;
-
- /* primary netdev stats */
- struct rtnl_link_stats64 primary_stats;
-
- /* standby netdev stats */
- struct rtnl_link_stats64 standby_stats;
-
- /* aggregated stats */
- struct rtnl_link_stats64 failover_stats;
-
- /* spinlock while updating stats */
- spinlock_t stats_lock;
-};
-
-struct failover *net_failover_create(struct net_device *standby_dev);
-void net_failover_destroy(struct failover *failover);
-
-#define FAILOVER_VLAN_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \
- NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \
- NETIF_F_HIGHDMA | NETIF_F_LRO)
-
-#define FAILOVER_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \
- NETIF_F_RXCSUM | NETIF_F_ALL_TSO)
+struct net_device *net_failover_create(struct net_device *standby_dev);
+void net_failover_destroy(struct net_device *failover_dev);
#endif /* _NET_FAILOVER_H */
diff --git a/net/Kconfig b/net/Kconfig
index f738a6f27665..697d84202695 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -433,17 +433,8 @@ config PAGE_POOL
bool
config FAILOVER
- tristate "Generic failover module"
- help
- The failover module provides a generic interface for paravirtual
- drivers to register a netdev and a set of ops with a failover
- instance. The ops are used as event handlers that get called to
- handle netdev register/unregister/link change/name change events
- on slave pci ethernet devices with the same mac address as the
- failover netdev. This enables paravirtual drivers to use a
- VF as an accelerated low latency datapath. It also allows live
- migration of VMs with direct attached VFs by failing over to the
- paravirtual datapath when the VF is unplugged.
+ bool
+ default n
endif # if NET
diff --git a/net/core/failover.c b/net/core/failover.c
index 4a92a98ccce9..499f0fd7e4d3 100644
--- a/net/core/failover.c
+++ b/net/core/failover.c
@@ -1,10 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2018, Intel Corporation. */
-/* A common module to handle registrations and notifications for paravirtual
+/* A library for managing chained upper/oower devices such as
* drivers to enable accelerated datapath and support VF live migration.
- *
- * The notifier and event handling code is based on netvsc driver.
*/
#include <linux/module.h>
@@ -14,302 +12,62 @@
#include <linux/if_vlan.h>
#include <net/failover.h>
-static LIST_HEAD(failover_list);
-static DEFINE_SPINLOCK(failover_lock);
-
-static struct net_device *failover_get_bymac(u8 *mac, struct failover_ops **ops)
-{
- struct net_device *failover_dev;
- struct failover *failover;
-
- spin_lock(&failover_lock);
- list_for_each_entry(failover, &failover_list, list) {
- failover_dev = rtnl_dereference(failover->failover_dev);
- if (ether_addr_equal(failover_dev->perm_addr, mac)) {
- *ops = rtnl_dereference(failover->ops);
- spin_unlock(&failover_lock);
- return failover_dev;
- }
- }
- spin_unlock(&failover_lock);
- return NULL;
-}
-
-/**
- * failover_slave_register - Register a slave netdev
- *
- * @slave_dev: slave netdev that is being registered
- *
- * Registers a slave device to a failover instance. Only ethernet devices
- * are supported.
- */
-static int failover_slave_register(struct net_device *slave_dev)
+/* failover_join - Join an lower netdev with an upper device. */
+int netdev_failover_join(struct net_device *lower_dev,
+ struct net_device *upper_dev,
+ rx_handler_func_t *rx_handler)
{
- struct netdev_lag_upper_info lag_upper_info;
- struct net_device *failover_dev;
- struct failover_ops *fops;
int err;
- if (slave_dev->type != ARPHRD_ETHER)
- goto done;
-
ASSERT_RTNL();
- failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
- if (!failover_dev)
- goto done;
+ /* Don't allow joining devices of different protocols */
+ if (upper_dev->type != lower_dev->type)
+ return -EINVAL;
- if (fops && fops->slave_pre_register &&
- fops->slave_pre_register(slave_dev, failover_dev))
- goto done;
-
- err = netdev_rx_handler_register(slave_dev, fops->slave_handle_frame,
- failover_dev);
+ err = netdev_rx_handler_register(lower_dev, rx_handler, upper_dev);
if (err) {
- netdev_err(slave_dev, "can not register failover rx handler (err = %d)\n",
+ netdev_err(lower_dev,
+ "can not register failover rx handler (err = %d)\n",
err);
- goto done;
+ return err;
}
- lag_upper_info.tx_type = NETDEV_LAG_TX_TYPE_ACTIVEBACKUP;
- err = netdev_master_upper_dev_link(slave_dev, failover_dev, NULL,
- &lag_upper_info, NULL);
+ err = netdev_master_upper_dev_link(lower_dev, upper_dev, NULL,
+ NULL, NULL);
if (err) {
- netdev_err(slave_dev, "can not set failover device %s (err = %d)\n",
- failover_dev->name, err);
- goto err_upper_link;
+ netdev_err(lower_dev,
+ "can not set failover device %s (err = %d)\n",
+ upper_dev->name, err);
+ netdev_rx_handler_unregister(lower_dev);
+ return err;
}
- slave_dev->priv_flags |= IFF_FAILOVER_SLAVE;
-
- if (fops && fops->slave_register &&
- !fops->slave_register(slave_dev, failover_dev))
- return NOTIFY_OK;
-
- netdev_upper_dev_unlink(slave_dev, failover_dev);
- slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
-err_upper_link:
- netdev_rx_handler_unregister(slave_dev);
-done:
- return NOTIFY_DONE;
-}
-
-/**
- * failover_slave_unregister - Unregister a slave netdev
- *
- * @slave_dev: slave netdev that is being unregistered
- *
- * Unregisters a slave device from a failover instance.
- */
-int failover_slave_unregister(struct net_device *slave_dev)
-{
- struct net_device *failover_dev;
- struct failover_ops *fops;
-
- if (!netif_is_failover_slave(slave_dev))
- goto done;
-
- ASSERT_RTNL();
-
- failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
- if (!failover_dev)
- goto done;
-
- if (fops && fops->slave_pre_unregister &&
- fops->slave_pre_unregister(slave_dev, failover_dev))
- goto done;
-
- netdev_rx_handler_unregister(slave_dev);
- netdev_upper_dev_unlink(slave_dev, failover_dev);
- slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
-
- if (fops && fops->slave_unregister &&
- !fops->slave_unregister(slave_dev, failover_dev))
- return NOTIFY_OK;
-
-done:
- return NOTIFY_DONE;
+ dev_hold(lower_dev);
+ lower_dev->priv_flags |= IFF_FAILOVER_SLAVE;
+ return 0;
}
-EXPORT_SYMBOL_GPL(failover_slave_unregister);
+EXPORT_SYMBOL_GPL(netdev_failover_join);
-static int failover_slave_link_change(struct net_device *slave_dev)
+/* Find upper network device for failover slave device */
+struct net_device *netdev_failover_upper_get(struct net_device *lower_dev)
{
- struct net_device *failover_dev;
- struct failover_ops *fops;
-
- if (!netif_is_failover_slave(slave_dev))
- goto done;
-
- ASSERT_RTNL();
-
- failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
- if (!failover_dev)
- goto done;
-
- if (!netif_running(failover_dev))
- goto done;
+ if (!netif_is_failover_slave(lower_dev))
+ return NULL;
- if (fops && fops->slave_link_change &&
- !fops->slave_link_change(slave_dev, failover_dev))
- return NOTIFY_OK;
-
-done:
- return NOTIFY_DONE;
+ return netdev_master_upper_dev_get(lower_dev);
}
+EXPORT_SYMBOL_GPL(netdev_failover_upper_get);
-static int failover_slave_name_change(struct net_device *slave_dev)
+/* failover_unjoin - Break connection between lower and upper device. */
+void netdev_failover_unjoin(struct net_device *lower_dev,
+ struct net_device *upper_dev)
{
- struct net_device *failover_dev;
- struct failover_ops *fops;
-
- if (!netif_is_failover_slave(slave_dev))
- goto done;
-
ASSERT_RTNL();
- failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
- if (!failover_dev)
- goto done;
-
- if (!netif_running(failover_dev))
- goto done;
-
- if (fops && fops->slave_name_change &&
- !fops->slave_name_change(slave_dev, failover_dev))
- return NOTIFY_OK;
-
-done:
- return NOTIFY_DONE;
-}
-
-static int
-failover_event(struct notifier_block *this, unsigned long event, void *ptr)
-{
- struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
-
- /* Skip parent events */
- if (netif_is_failover(event_dev))
- return NOTIFY_DONE;
-
- switch (event) {
- case NETDEV_REGISTER:
- return failover_slave_register(event_dev);
- case NETDEV_UNREGISTER:
- return failover_slave_unregister(event_dev);
- case NETDEV_UP:
- case NETDEV_DOWN:
- case NETDEV_CHANGE:
- return failover_slave_link_change(event_dev);
- case NETDEV_CHANGENAME:
- return failover_slave_name_change(event_dev);
- default:
- return NOTIFY_DONE;
- }
-}
-
-static struct notifier_block failover_notifier = {
- .notifier_call = failover_event,
-};
-
-static void
-failover_existing_slave_register(struct net_device *failover_dev)
-{
- struct net *net = dev_net(failover_dev);
- struct net_device *dev;
-
- rtnl_lock();
- for_each_netdev(net, dev) {
- if (netif_is_failover(dev))
- continue;
- if (ether_addr_equal(failover_dev->perm_addr, dev->perm_addr))
- failover_slave_register(dev);
- }
- rtnl_unlock();
-}
-
-/**
- * failover_register - Register a failover instance
- *
- * @dev: failover netdev
- * @ops: failover ops
- *
- * Allocate and register a failover instance for a failover netdev. ops
- * provides handlers for slave device register/unregister/link change/
- * name change events.
- *
- * Return: pointer to failover instance
- */
-struct failover *failover_register(struct net_device *dev,
- struct failover_ops *ops)
-{
- struct failover *failover;
-
- if (dev->type != ARPHRD_ETHER)
- return ERR_PTR(-EINVAL);
-
- failover = kzalloc(sizeof(*failover), GFP_KERNEL);
- if (!failover)
- return ERR_PTR(-ENOMEM);
-
- rcu_assign_pointer(failover->ops, ops);
- dev_hold(dev);
- dev->priv_flags |= IFF_FAILOVER;
- rcu_assign_pointer(failover->failover_dev, dev);
-
- spin_lock(&failover_lock);
- list_add_tail(&failover->list, &failover_list);
- spin_unlock(&failover_lock);
-
- netdev_info(dev, "failover master:%s registered\n", dev->name);
-
- failover_existing_slave_register(dev);
-
- return failover;
-}
-EXPORT_SYMBOL_GPL(failover_register);
-
-/**
- * failover_unregister - Unregister a failover instance
- *
- * @failover: pointer to failover instance
- *
- * Unregisters and frees a failover instance.
- */
-void failover_unregister(struct failover *failover)
-{
- struct net_device *failover_dev;
-
- failover_dev = rcu_dereference(failover->failover_dev);
-
- netdev_info(failover_dev, "failover master:%s unregistered\n",
- failover_dev->name);
-
- failover_dev->priv_flags &= ~IFF_FAILOVER;
- dev_put(failover_dev);
-
- spin_lock(&failover_lock);
- list_del(&failover->list);
- spin_unlock(&failover_lock);
-
- kfree(failover);
+ netdev_rx_handler_unregister(lower_dev);
+ netdev_upper_dev_unlink(lower_dev, upper_dev);
+ dev_put(lower_dev);
+ lower_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
}
-EXPORT_SYMBOL_GPL(failover_unregister);
-
-static __init int
-failover_init(void)
-{
- register_netdevice_notifier(&failover_notifier);
-
- return 0;
-}
-module_init(failover_init);
-
-static __exit
-void failover_exit(void)
-{
- unregister_netdevice_notifier(&failover_notifier);
-}
-module_exit(failover_exit);
-
-MODULE_DESCRIPTION("Generic failover infrastructure/interface");
-MODULE_LICENSE("GPL v2");
+EXPORT_SYMBOL_GPL(netdev_failover_unjoin);
--
2.17.1
Powered by blists - more mailing lists