ACK/Cmnt: [b/oracle][PATCH] UBUNTU: SAUCE: net_failover: delay taking over primary device to accommodate udevd renaming

Stefan Bader stefan.bader at canonical.com
Fri Mar 1 13:32:56 UTC 2019


On 01.03.19 03:17, Marcelo Henrique Cerri wrote:
> From: Si-Wei Liu <si-wei.liu at oracle.com>
> 
> BugLink: http://bugs.launchpad.net/bugs/1815268
> 
> There is an inherent race with udev rename in userspace due to the exposure
> of two lower slave devices while kernel attempts to manage the creation
> for failover bonding itself automatically. The existing userspace naming
> logic in udevd was not specifically written for this in-kernel automagic.
> 
> The clean fix for the problem is either to update the udevd to not try
> rename the 3-netdev (ideally rename the device in a coordinated manner),
> or to fix the kernel to hide the 2 lower devices which does not have to
> be shown to userspace unless needed (1-netdev model).
> 
> However, our pursuance of 1-netdev model has not been acknowledged by
> upstream, and there's no motivation in the systemd/udevd community at
> this point to refactor the rename logic and make it work well with
> 3-netdev.
> 
> Hyper-V's netvsc mitigated this by postponing the VF's dev_open() to
> allow a userspace thread to rename the device within a 100ms worth of
> window. For the interim, we follow the same as done by netvsc to avoid
> the renaming failure, until we move to the point where a clean solution
> is available in upstream.
> 
> OraBug: 28938696
> 
> Signed-off-by: Si-Wei Liu <si-wei.liu at oracle.com>
> [marcelo.cerri: small fixes to suppress checkpatch.pl warnings]
> Signed-off-by: Marcelo Henrique Cerri <marcelo.cerri at canonical.com>
Acked-by: Stefan Bader <stefan.bader at canonical.com>
> ---

Since this is isolated to the custom kernel it is testable and only might have
any effect in that environment.

>  drivers/net/net_failover.c | 73 +++++++++++++++++++++++++++++++++-----
>  include/net/net_failover.h |  6 ++++
>  2 files changed, 71 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/net/net_failover.c b/drivers/net/net_failover.c
> index 4f390fa557e4..eeeed475a6a5 100644
> --- a/drivers/net/net_failover.c
> +++ b/drivers/net/net_failover.c
> @@ -28,6 +28,10 @@
>  #include <uapi/linux/if_arp.h>
>  #include <net/net_failover.h>
>  
> +#define TAKEOVER_DELAY_DEFAULT 100
> +static unsigned long takeover_delay = TAKEOVER_DELAY_DEFAULT;
> +module_param(takeover_delay, ulong, 0000);
> +
>  static bool net_failover_xmit_ready(struct net_device *dev)
>  {
>  	return netif_running(dev) && netif_carrier_ok(dev);
> @@ -501,6 +505,7 @@ static int net_failover_slave_register(struct net_device *slave_dev,
>  {
>  	struct net_device *standby_dev, *primary_dev;
>  	struct net_failover_info *nfo_info;
> +	bool work_scheduled = false;
>  	bool slave_is_standby;
>  	u32 orig_mtu;
>  	int err;
> @@ -516,12 +521,21 @@ static int net_failover_slave_register(struct net_device *slave_dev,
>  
>  	dev_hold(slave_dev);
>  
> +	slave_is_standby = slave_dev->dev.parent == failover_dev->dev.parent;
> +	nfo_info = netdev_priv(failover_dev);
> +
>  	if (netif_running(failover_dev)) {
> -		err = dev_open(slave_dev);
> -		if (err && (err != -EBUSY)) {
> -			netdev_err(failover_dev, "Opening slave %s failed err:%d\n",
> -				   slave_dev->name, err);
> -			goto err_dev_open;
> +		if (takeover_delay && !slave_is_standby) {
> +			schedule_delayed_work(&nfo_info->takeover,
> +					      takeover_delay * HZ / 1000);
> +			work_scheduled = true;
> +		} else {
> +			err = dev_open(slave_dev);
> +			if (err && (err != -EBUSY)) {
> +				netdev_err(failover_dev, "Opening slave %s failed err:%d\n",
> +					   slave_dev->name, err);
> +				goto err_dev_open;
> +			}
>  		}
>  	}
>  
> @@ -534,13 +548,13 @@ static int net_failover_slave_register(struct net_device *slave_dev,
>  	if (err) {
>  		netdev_err(failover_dev, "Failed to add vlan ids to device %s err:%d\n",
>  			   slave_dev->name, err);
> +		if (work_scheduled)
> +			cancel_delayed_work(&nfo_info->takeover);
>  		goto err_vlan_add;
>  	}
>  
> -	nfo_info = netdev_priv(failover_dev);
>  	standby_dev = rtnl_dereference(nfo_info->standby_dev);
>  	primary_dev = rtnl_dereference(nfo_info->primary_dev);
> -	slave_is_standby = slave_dev->dev.parent == failover_dev->dev.parent;
>  
>  	if (slave_is_standby) {
>  		rcu_assign_pointer(nfo_info->standby_dev, slave_dev);
> @@ -677,11 +691,48 @@ static int net_failover_slave_name_change(struct net_device *slave_dev,
>  	/* We need to bring up the slave after the rename by udev in case
>  	 * open failed with EBUSY when it was registered.
>  	 */
> -	dev_open(slave_dev);
> +	if (netif_running(failover_dev)) {
> +		dev_open(slave_dev);
> +
> +		net_failover_lower_state_changed(slave_dev,
> +						 primary_dev, standby_dev);
> +	}
>  
>  	return 0;
>  }
>  
> +static void net_failover_takeover_primary(struct work_struct *w)
> +{
> +	struct net_failover_info *nfo_info
> +		= container_of(w, struct net_failover_info, takeover.work);
> +	struct net_device *primary_dev, *standby_dev;
> +	struct net_device *failover_dev;
> +	int err;
> +
> +	if (!rtnl_trylock()) {
> +		schedule_delayed_work(&nfo_info->takeover, 0);
> +		return;
> +	}
> +
> +	failover_dev = nfo_info->failover_dev;
> +	primary_dev = rtnl_dereference(nfo_info->primary_dev);
> +	standby_dev = rtnl_dereference(nfo_info->standby_dev);
> +
> +	if (primary_dev && netif_running(failover_dev)) {
> +		err = dev_open(primary_dev);
> +		if (err) {
> +			netdev_err(failover_dev, "Opening primary %s failed err:%d\n",
> +				   primary_dev->name, err);
> +		} else {
> +			net_failover_lower_state_changed(primary_dev,
> +							 primary_dev,
> +							 standby_dev);
> +		}
> +	}
> +
> +	rtnl_unlock();
> +}
> +
>  static struct failover_ops net_failover_ops = {
>  	.slave_pre_register	= net_failover_slave_pre_register,
>  	.slave_register		= net_failover_slave_register,
> @@ -708,6 +759,7 @@ static struct failover_ops net_failover_ops = {
>  struct failover *net_failover_create(struct net_device *standby_dev)
>  {
>  	struct device *dev = standby_dev->dev.parent;
> +	struct net_failover_info *nfo_info;
>  	struct net_device *failover_dev;
>  	struct failover *failover;
>  	int err;
> @@ -759,6 +811,9 @@ struct failover *net_failover_create(struct net_device *standby_dev)
>  	}
>  
>  	netif_carrier_off(failover_dev);
> +	nfo_info = netdev_priv(failover_dev);
> +	nfo_info->failover_dev = failover_dev;
> +	INIT_DELAYED_WORK(&nfo_info->takeover, net_failover_takeover_primary);
>  
>  	failover = failover_register(failover_dev, &net_failover_ops);
>  	if (IS_ERR(failover))
> @@ -798,6 +853,8 @@ void net_failover_destroy(struct failover *failover)
>  	failover_dev = rcu_dereference(failover->failover_dev);
>  	nfo_info = netdev_priv(failover_dev);
>  
> +	cancel_delayed_work_sync(&nfo_info->takeover);
> +
>  	netif_device_detach(failover_dev);
>  
>  	rtnl_lock();
> diff --git a/include/net/net_failover.h b/include/net/net_failover.h
> index b12a1c469d1c..3cd0a6142b2b 100644
> --- a/include/net/net_failover.h
> +++ b/include/net/net_failover.h
> @@ -25,6 +25,12 @@ struct net_failover_info {
>  
>  	/* spinlock while updating stats */
>  	spinlock_t stats_lock;
> +
> +	/* back reference to associated net_device */
> +	struct net_device *failover_dev;
> +
> +	/* delayed work to take over primary netdev */
> +	struct delayed_work takeover;
>  };
>  
>  struct failover *net_failover_create(struct net_device *standby_dev);
> 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: OpenPGP digital signature
URL: <https://lists.ubuntu.com/archives/kernel-team/attachments/20190301/25051194/attachment.sig>


More information about the kernel-team mailing list