[b/oracle][PATCH] UBUNTU: SAUCE: net_failover: delay taking over primary device to accommodate udevd renaming

Marcelo Henrique Cerri marcelo.cerri at canonical.com
Fri Mar 1 02:17:41 UTC 2019


From: Si-Wei Liu <si-wei.liu at oracle.com>

BugLink: http://bugs.launchpad.net/bugs/1815268

There is an inherent race with udev rename in userspace due to the exposure
of two lower slave devices while kernel attempts to manage the creation
for failover bonding itself automatically. The existing userspace naming
logic in udevd was not specifically written for this in-kernel automagic.

The clean fix for the problem is either to update the udevd to not try
rename the 3-netdev (ideally rename the device in a coordinated manner),
or to fix the kernel to hide the 2 lower devices which does not have to
be shown to userspace unless needed (1-netdev model).

However, our pursuance of 1-netdev model has not been acknowledged by
upstream, and there's no motivation in the systemd/udevd community at
this point to refactor the rename logic and make it work well with
3-netdev.

Hyper-V's netvsc mitigated this by postponing the VF's dev_open() to
allow a userspace thread to rename the device within a 100ms worth of
window. For the interim, we follow the same as done by netvsc to avoid
the renaming failure, until we move to the point where a clean solution
is available in upstream.

OraBug: 28938696

Signed-off-by: Si-Wei Liu <si-wei.liu at oracle.com>
[marcelo.cerri: small fixes to suppress checkpatch.pl warnings]
Signed-off-by: Marcelo Henrique Cerri <marcelo.cerri at canonical.com>
---
 drivers/net/net_failover.c | 73 +++++++++++++++++++++++++++++++++-----
 include/net/net_failover.h |  6 ++++
 2 files changed, 71 insertions(+), 8 deletions(-)

diff --git a/drivers/net/net_failover.c b/drivers/net/net_failover.c
index 4f390fa557e4..eeeed475a6a5 100644
--- a/drivers/net/net_failover.c
+++ b/drivers/net/net_failover.c
@@ -28,6 +28,10 @@
 #include <uapi/linux/if_arp.h>
 #include <net/net_failover.h>
 
+#define TAKEOVER_DELAY_DEFAULT 100
+static unsigned long takeover_delay = TAKEOVER_DELAY_DEFAULT;
+module_param(takeover_delay, ulong, 0000);
+
 static bool net_failover_xmit_ready(struct net_device *dev)
 {
 	return netif_running(dev) && netif_carrier_ok(dev);
@@ -501,6 +505,7 @@ static int net_failover_slave_register(struct net_device *slave_dev,
 {
 	struct net_device *standby_dev, *primary_dev;
 	struct net_failover_info *nfo_info;
+	bool work_scheduled = false;
 	bool slave_is_standby;
 	u32 orig_mtu;
 	int err;
@@ -516,12 +521,21 @@ static int net_failover_slave_register(struct net_device *slave_dev,
 
 	dev_hold(slave_dev);
 
+	slave_is_standby = slave_dev->dev.parent == failover_dev->dev.parent;
+	nfo_info = netdev_priv(failover_dev);
+
 	if (netif_running(failover_dev)) {
-		err = dev_open(slave_dev);
-		if (err && (err != -EBUSY)) {
-			netdev_err(failover_dev, "Opening slave %s failed err:%d\n",
-				   slave_dev->name, err);
-			goto err_dev_open;
+		if (takeover_delay && !slave_is_standby) {
+			schedule_delayed_work(&nfo_info->takeover,
+					      takeover_delay * HZ / 1000);
+			work_scheduled = true;
+		} else {
+			err = dev_open(slave_dev);
+			if (err && (err != -EBUSY)) {
+				netdev_err(failover_dev, "Opening slave %s failed err:%d\n",
+					   slave_dev->name, err);
+				goto err_dev_open;
+			}
 		}
 	}
 
@@ -534,13 +548,13 @@ static int net_failover_slave_register(struct net_device *slave_dev,
 	if (err) {
 		netdev_err(failover_dev, "Failed to add vlan ids to device %s err:%d\n",
 			   slave_dev->name, err);
+		if (work_scheduled)
+			cancel_delayed_work(&nfo_info->takeover);
 		goto err_vlan_add;
 	}
 
-	nfo_info = netdev_priv(failover_dev);
 	standby_dev = rtnl_dereference(nfo_info->standby_dev);
 	primary_dev = rtnl_dereference(nfo_info->primary_dev);
-	slave_is_standby = slave_dev->dev.parent == failover_dev->dev.parent;
 
 	if (slave_is_standby) {
 		rcu_assign_pointer(nfo_info->standby_dev, slave_dev);
@@ -677,11 +691,48 @@ static int net_failover_slave_name_change(struct net_device *slave_dev,
 	/* We need to bring up the slave after the rename by udev in case
 	 * open failed with EBUSY when it was registered.
 	 */
-	dev_open(slave_dev);
+	if (netif_running(failover_dev)) {
+		dev_open(slave_dev);
+
+		net_failover_lower_state_changed(slave_dev,
+						 primary_dev, standby_dev);
+	}
 
 	return 0;
 }
 
+static void net_failover_takeover_primary(struct work_struct *w)
+{
+	struct net_failover_info *nfo_info
+		= container_of(w, struct net_failover_info, takeover.work);
+	struct net_device *primary_dev, *standby_dev;
+	struct net_device *failover_dev;
+	int err;
+
+	if (!rtnl_trylock()) {
+		schedule_delayed_work(&nfo_info->takeover, 0);
+		return;
+	}
+
+	failover_dev = nfo_info->failover_dev;
+	primary_dev = rtnl_dereference(nfo_info->primary_dev);
+	standby_dev = rtnl_dereference(nfo_info->standby_dev);
+
+	if (primary_dev && netif_running(failover_dev)) {
+		err = dev_open(primary_dev);
+		if (err) {
+			netdev_err(failover_dev, "Opening primary %s failed err:%d\n",
+				   primary_dev->name, err);
+		} else {
+			net_failover_lower_state_changed(primary_dev,
+							 primary_dev,
+							 standby_dev);
+		}
+	}
+
+	rtnl_unlock();
+}
+
 static struct failover_ops net_failover_ops = {
 	.slave_pre_register	= net_failover_slave_pre_register,
 	.slave_register		= net_failover_slave_register,
@@ -708,6 +759,7 @@ static struct failover_ops net_failover_ops = {
 struct failover *net_failover_create(struct net_device *standby_dev)
 {
 	struct device *dev = standby_dev->dev.parent;
+	struct net_failover_info *nfo_info;
 	struct net_device *failover_dev;
 	struct failover *failover;
 	int err;
@@ -759,6 +811,9 @@ struct failover *net_failover_create(struct net_device *standby_dev)
 	}
 
 	netif_carrier_off(failover_dev);
+	nfo_info = netdev_priv(failover_dev);
+	nfo_info->failover_dev = failover_dev;
+	INIT_DELAYED_WORK(&nfo_info->takeover, net_failover_takeover_primary);
 
 	failover = failover_register(failover_dev, &net_failover_ops);
 	if (IS_ERR(failover))
@@ -798,6 +853,8 @@ void net_failover_destroy(struct failover *failover)
 	failover_dev = rcu_dereference(failover->failover_dev);
 	nfo_info = netdev_priv(failover_dev);
 
+	cancel_delayed_work_sync(&nfo_info->takeover);
+
 	netif_device_detach(failover_dev);
 
 	rtnl_lock();
diff --git a/include/net/net_failover.h b/include/net/net_failover.h
index b12a1c469d1c..3cd0a6142b2b 100644
--- a/include/net/net_failover.h
+++ b/include/net/net_failover.h
@@ -25,6 +25,12 @@ struct net_failover_info {
 
 	/* spinlock while updating stats */
 	spinlock_t stats_lock;
+
+	/* back reference to associated net_device */
+	struct net_device *failover_dev;
+
+	/* delayed work to take over primary netdev */
+	struct delayed_work takeover;
 };
 
 struct failover *net_failover_create(struct net_device *standby_dev);
-- 
2.17.1




More information about the kernel-team mailing list