APPLIED/cmt: [b/oracle][PATCH] UBUNTU: SAUCE: net_failover: delay taking over primary device to accommodate udevd renaming
Khaled Elmously
khalid.elmously at canonical.com
Mon Mar 4 02:11:29 UTC 2019
Applied to bionic/oracle after changing BugLink to use https://
On 2019-02-28 23:17:41 , Marcelo Henrique Cerri wrote:
> From: Si-Wei Liu <si-wei.liu at oracle.com>
>
> BugLink: http://bugs.launchpad.net/bugs/1815268
>
> There is an inherent race with udev rename in userspace due to the exposure
> of two lower slave devices while kernel attempts to manage the creation
> for failover bonding itself automatically. The existing userspace naming
> logic in udevd was not specifically written for this in-kernel automagic.
>
> The clean fix for the problem is either to update the udevd to not try
> rename the 3-netdev (ideally rename the device in a coordinated manner),
> or to fix the kernel to hide the 2 lower devices which does not have to
> be shown to userspace unless needed (1-netdev model).
>
> However, our pursuance of 1-netdev model has not been acknowledged by
> upstream, and there's no motivation in the systemd/udevd community at
> this point to refactor the rename logic and make it work well with
> 3-netdev.
>
> Hyper-V's netvsc mitigated this by postponing the VF's dev_open() to
> allow a userspace thread to rename the device within a 100ms worth of
> window. For the interim, we follow the same as done by netvsc to avoid
> the renaming failure, until we move to the point where a clean solution
> is available in upstream.
>
> OraBug: 28938696
>
> Signed-off-by: Si-Wei Liu <si-wei.liu at oracle.com>
> [marcelo.cerri: small fixes to suppress checkpatch.pl warnings]
> Signed-off-by: Marcelo Henrique Cerri <marcelo.cerri at canonical.com>
> ---
> drivers/net/net_failover.c | 73 +++++++++++++++++++++++++++++++++-----
> include/net/net_failover.h | 6 ++++
> 2 files changed, 71 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/net/net_failover.c b/drivers/net/net_failover.c
> index 4f390fa557e4..eeeed475a6a5 100644
> --- a/drivers/net/net_failover.c
> +++ b/drivers/net/net_failover.c
> @@ -28,6 +28,10 @@
> #include <uapi/linux/if_arp.h>
> #include <net/net_failover.h>
>
> +#define TAKEOVER_DELAY_DEFAULT 100
> +static unsigned long takeover_delay = TAKEOVER_DELAY_DEFAULT;
> +module_param(takeover_delay, ulong, 0000);
> +
> static bool net_failover_xmit_ready(struct net_device *dev)
> {
> return netif_running(dev) && netif_carrier_ok(dev);
> @@ -501,6 +505,7 @@ static int net_failover_slave_register(struct net_device *slave_dev,
> {
> struct net_device *standby_dev, *primary_dev;
> struct net_failover_info *nfo_info;
> + bool work_scheduled = false;
> bool slave_is_standby;
> u32 orig_mtu;
> int err;
> @@ -516,12 +521,21 @@ static int net_failover_slave_register(struct net_device *slave_dev,
>
> dev_hold(slave_dev);
>
> + slave_is_standby = slave_dev->dev.parent == failover_dev->dev.parent;
> + nfo_info = netdev_priv(failover_dev);
> +
> if (netif_running(failover_dev)) {
> - err = dev_open(slave_dev);
> - if (err && (err != -EBUSY)) {
> - netdev_err(failover_dev, "Opening slave %s failed err:%d\n",
> - slave_dev->name, err);
> - goto err_dev_open;
> + if (takeover_delay && !slave_is_standby) {
> + schedule_delayed_work(&nfo_info->takeover,
> + takeover_delay * HZ / 1000);
> + work_scheduled = true;
> + } else {
> + err = dev_open(slave_dev);
> + if (err && (err != -EBUSY)) {
> + netdev_err(failover_dev, "Opening slave %s failed err:%d\n",
> + slave_dev->name, err);
> + goto err_dev_open;
> + }
> }
> }
>
> @@ -534,13 +548,13 @@ static int net_failover_slave_register(struct net_device *slave_dev,
> if (err) {
> netdev_err(failover_dev, "Failed to add vlan ids to device %s err:%d\n",
> slave_dev->name, err);
> + if (work_scheduled)
> + cancel_delayed_work(&nfo_info->takeover);
> goto err_vlan_add;
> }
>
> - nfo_info = netdev_priv(failover_dev);
> standby_dev = rtnl_dereference(nfo_info->standby_dev);
> primary_dev = rtnl_dereference(nfo_info->primary_dev);
> - slave_is_standby = slave_dev->dev.parent == failover_dev->dev.parent;
>
> if (slave_is_standby) {
> rcu_assign_pointer(nfo_info->standby_dev, slave_dev);
> @@ -677,11 +691,48 @@ static int net_failover_slave_name_change(struct net_device *slave_dev,
> /* We need to bring up the slave after the rename by udev in case
> * open failed with EBUSY when it was registered.
> */
> - dev_open(slave_dev);
> + if (netif_running(failover_dev)) {
> + dev_open(slave_dev);
> +
> + net_failover_lower_state_changed(slave_dev,
> + primary_dev, standby_dev);
> + }
>
> return 0;
> }
>
> +static void net_failover_takeover_primary(struct work_struct *w)
> +{
> + struct net_failover_info *nfo_info
> + = container_of(w, struct net_failover_info, takeover.work);
> + struct net_device *primary_dev, *standby_dev;
> + struct net_device *failover_dev;
> + int err;
> +
> + if (!rtnl_trylock()) {
> + schedule_delayed_work(&nfo_info->takeover, 0);
> + return;
> + }
> +
> + failover_dev = nfo_info->failover_dev;
> + primary_dev = rtnl_dereference(nfo_info->primary_dev);
> + standby_dev = rtnl_dereference(nfo_info->standby_dev);
> +
> + if (primary_dev && netif_running(failover_dev)) {
> + err = dev_open(primary_dev);
> + if (err) {
> + netdev_err(failover_dev, "Opening primary %s failed err:%d\n",
> + primary_dev->name, err);
> + } else {
> + net_failover_lower_state_changed(primary_dev,
> + primary_dev,
> + standby_dev);
> + }
> + }
> +
> + rtnl_unlock();
> +}
> +
> static struct failover_ops net_failover_ops = {
> .slave_pre_register = net_failover_slave_pre_register,
> .slave_register = net_failover_slave_register,
> @@ -708,6 +759,7 @@ static struct failover_ops net_failover_ops = {
> struct failover *net_failover_create(struct net_device *standby_dev)
> {
> struct device *dev = standby_dev->dev.parent;
> + struct net_failover_info *nfo_info;
> struct net_device *failover_dev;
> struct failover *failover;
> int err;
> @@ -759,6 +811,9 @@ struct failover *net_failover_create(struct net_device *standby_dev)
> }
>
> netif_carrier_off(failover_dev);
> + nfo_info = netdev_priv(failover_dev);
> + nfo_info->failover_dev = failover_dev;
> + INIT_DELAYED_WORK(&nfo_info->takeover, net_failover_takeover_primary);
>
> failover = failover_register(failover_dev, &net_failover_ops);
> if (IS_ERR(failover))
> @@ -798,6 +853,8 @@ void net_failover_destroy(struct failover *failover)
> failover_dev = rcu_dereference(failover->failover_dev);
> nfo_info = netdev_priv(failover_dev);
>
> + cancel_delayed_work_sync(&nfo_info->takeover);
> +
> netif_device_detach(failover_dev);
>
> rtnl_lock();
> diff --git a/include/net/net_failover.h b/include/net/net_failover.h
> index b12a1c469d1c..3cd0a6142b2b 100644
> --- a/include/net/net_failover.h
> +++ b/include/net/net_failover.h
> @@ -25,6 +25,12 @@ struct net_failover_info {
>
> /* spinlock while updating stats */
> spinlock_t stats_lock;
> +
> + /* back reference to associated net_device */
> + struct net_device *failover_dev;
> +
> + /* delayed work to take over primary netdev */
> + struct delayed_work takeover;
> };
>
> struct failover *net_failover_create(struct net_device *standby_dev);
> --
> 2.17.1
>
>
> --
> kernel-team mailing list
> kernel-team at lists.ubuntu.com
> https://lists.ubuntu.com/mailman/listinfo/kernel-team
More information about the kernel-team
mailing list