ACK: [E][F][SRU][PATCH 1/1] bonding: fix state transition issue in link monitoring

Stefan Bader stefan.bader at canonical.com
Thu Nov 28 11:23:29 UTC 2019


On 13.11.19 08:33, Po-Hsu Lin wrote:
> From: Jay Vosburgh <jay.vosburgh at canonical.com>
> 
> BugLink: https://bugs.launchpad.net/bugs/1852077
> 
> Since de77ecd4ef02 ("bonding: improve link-status update in
> mii-monitoring"), the bonding driver has utilized two separate variables
> to indicate the next link state a particular slave should transition to.
> Each is used to communicate to a different portion of the link state
> change commit logic; one to the bond_miimon_commit function itself, and
> another to the state transition logic.
> 
> 	Unfortunately, the two variables can become unsynchronized,
> resulting in incorrect link state transitions within bonding.  This can
> cause slaves to become stuck in an incorrect link state until a
> subsequent carrier state transition.
> 
> 	The issue occurs when a special case in bond_slave_netdev_event
> sets slave->link directly to BOND_LINK_FAIL.  On the next pass through
> bond_miimon_inspect after the slave goes carrier up, the BOND_LINK_FAIL
> case will set the proposed next state (link_new_state) to BOND_LINK_UP,
> but the new_link to BOND_LINK_DOWN.  The setting of the final link state
> from new_link comes after that from link_new_state, and so the slave
> will end up incorrectly in _DOWN state.
> 
> 	Resolve this by combining the two variables into one.
> 
> Reported-by: Aleksei Zakharov <zakharov.a.g at yandex.ru>
> Reported-by: Sha Zhang <zhangsha.zhang at huawei.com>
> Cc: Mahesh Bandewar <maheshb at google.com>
> Fixes: de77ecd4ef02 ("bonding: improve link-status update in mii-monitoring")
> Signed-off-by: Jay Vosburgh <jay.vosburgh at canonical.com>
> Signed-off-by: David S. Miller <davem at davemloft.net>
> (cherry picked from commit 1899bb325149e481de31a4f32b59ea6f24e176ea)
> Signed-off-by: Po-Hsu Lin <po-hsu.lin at canonical.com>
Acked-by: Stefan Bader <stefan.bader at canonical.com>
> ---
>  drivers/net/bonding/bond_main.c | 44 ++++++++++++++++++++---------------------
>  include/net/bonding.h           |  3 +--
>  2 files changed, 23 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index 21d8fcc..4edb69b 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -2086,8 +2086,7 @@ static int bond_miimon_inspect(struct bonding *bond)
>  	ignore_updelay = !rcu_dereference(bond->curr_active_slave);
>  
>  	bond_for_each_slave_rcu(bond, slave, iter) {
> -		slave->new_link = BOND_LINK_NOCHANGE;
> -		slave->link_new_state = slave->link;
> +		bond_propose_link_state(slave, BOND_LINK_NOCHANGE);
>  
>  		link_state = bond_check_dev_link(bond, slave->dev, 0);
>  
> @@ -2121,7 +2120,7 @@ static int bond_miimon_inspect(struct bonding *bond)
>  			}
>  
>  			if (slave->delay <= 0) {
> -				slave->new_link = BOND_LINK_DOWN;
> +				bond_propose_link_state(slave, BOND_LINK_DOWN);
>  				commit++;
>  				continue;
>  			}
> @@ -2158,7 +2157,7 @@ static int bond_miimon_inspect(struct bonding *bond)
>  				slave->delay = 0;
>  
>  			if (slave->delay <= 0) {
> -				slave->new_link = BOND_LINK_UP;
> +				bond_propose_link_state(slave, BOND_LINK_UP);
>  				commit++;
>  				ignore_updelay = false;
>  				continue;
> @@ -2196,7 +2195,7 @@ static void bond_miimon_commit(struct bonding *bond)
>  	struct slave *slave, *primary;
>  
>  	bond_for_each_slave(bond, slave, iter) {
> -		switch (slave->new_link) {
> +		switch (slave->link_new_state) {
>  		case BOND_LINK_NOCHANGE:
>  			/* For 802.3ad mode, check current slave speed and
>  			 * duplex again in case its port was disabled after
> @@ -2268,8 +2267,8 @@ static void bond_miimon_commit(struct bonding *bond)
>  
>  		default:
>  			slave_err(bond->dev, slave->dev, "invalid new link %d on slave\n",
> -				  slave->new_link);
> -			slave->new_link = BOND_LINK_NOCHANGE;
> +				  slave->link_new_state);
> +			bond_propose_link_state(slave, BOND_LINK_NOCHANGE);
>  
>  			continue;
>  		}
> @@ -2677,13 +2676,13 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
>  	bond_for_each_slave_rcu(bond, slave, iter) {
>  		unsigned long trans_start = dev_trans_start(slave->dev);
>  
> -		slave->new_link = BOND_LINK_NOCHANGE;
> +		bond_propose_link_state(slave, BOND_LINK_NOCHANGE);
>  
>  		if (slave->link != BOND_LINK_UP) {
>  			if (bond_time_in_interval(bond, trans_start, 1) &&
>  			    bond_time_in_interval(bond, slave->last_rx, 1)) {
>  
> -				slave->new_link = BOND_LINK_UP;
> +				bond_propose_link_state(slave, BOND_LINK_UP);
>  				slave_state_changed = 1;
>  
>  				/* primary_slave has no meaning in round-robin
> @@ -2708,7 +2707,7 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
>  			if (!bond_time_in_interval(bond, trans_start, 2) ||
>  			    !bond_time_in_interval(bond, slave->last_rx, 2)) {
>  
> -				slave->new_link = BOND_LINK_DOWN;
> +				bond_propose_link_state(slave, BOND_LINK_DOWN);
>  				slave_state_changed = 1;
>  
>  				if (slave->link_failure_count < UINT_MAX)
> @@ -2739,8 +2738,8 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
>  			goto re_arm;
>  
>  		bond_for_each_slave(bond, slave, iter) {
> -			if (slave->new_link != BOND_LINK_NOCHANGE)
> -				slave->link = slave->new_link;
> +			if (slave->link_new_state != BOND_LINK_NOCHANGE)
> +				slave->link = slave->link_new_state;
>  		}
>  
>  		if (slave_state_changed) {
> @@ -2763,9 +2762,9 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
>  }
>  
>  /* Called to inspect slaves for active-backup mode ARP monitor link state
> - * changes.  Sets new_link in slaves to specify what action should take
> - * place for the slave.  Returns 0 if no changes are found, >0 if changes
> - * to link states must be committed.
> + * changes.  Sets proposed link state in slaves to specify what action
> + * should take place for the slave.  Returns 0 if no changes are found, >0
> + * if changes to link states must be committed.
>   *
>   * Called with rcu_read_lock held.
>   */
> @@ -2777,12 +2776,12 @@ static int bond_ab_arp_inspect(struct bonding *bond)
>  	int commit = 0;
>  
>  	bond_for_each_slave_rcu(bond, slave, iter) {
> -		slave->new_link = BOND_LINK_NOCHANGE;
> +		bond_propose_link_state(slave, BOND_LINK_NOCHANGE);
>  		last_rx = slave_last_rx(bond, slave);
>  
>  		if (slave->link != BOND_LINK_UP) {
>  			if (bond_time_in_interval(bond, last_rx, 1)) {
> -				slave->new_link = BOND_LINK_UP;
> +				bond_propose_link_state(slave, BOND_LINK_UP);
>  				commit++;
>  			}
>  			continue;
> @@ -2810,7 +2809,7 @@ static int bond_ab_arp_inspect(struct bonding *bond)
>  		if (!bond_is_active_slave(slave) &&
>  		    !rcu_access_pointer(bond->current_arp_slave) &&
>  		    !bond_time_in_interval(bond, last_rx, 3)) {
> -			slave->new_link = BOND_LINK_DOWN;
> +			bond_propose_link_state(slave, BOND_LINK_DOWN);
>  			commit++;
>  		}
>  
> @@ -2823,7 +2822,7 @@ static int bond_ab_arp_inspect(struct bonding *bond)
>  		if (bond_is_active_slave(slave) &&
>  		    (!bond_time_in_interval(bond, trans_start, 2) ||
>  		     !bond_time_in_interval(bond, last_rx, 2))) {
> -			slave->new_link = BOND_LINK_DOWN;
> +			bond_propose_link_state(slave, BOND_LINK_DOWN);
>  			commit++;
>  		}
>  	}
> @@ -2843,7 +2842,7 @@ static void bond_ab_arp_commit(struct bonding *bond)
>  	struct slave *slave;
>  
>  	bond_for_each_slave(bond, slave, iter) {
> -		switch (slave->new_link) {
> +		switch (slave->link_new_state) {
>  		case BOND_LINK_NOCHANGE:
>  			continue;
>  
> @@ -2893,8 +2892,9 @@ static void bond_ab_arp_commit(struct bonding *bond)
>  			continue;
>  
>  		default:
> -			slave_err(bond->dev, slave->dev, "impossible: new_link %d on slave\n",
> -				  slave->new_link);
> +			slave_err(bond->dev, slave->dev,
> +				  "impossible: link_new_state %d on slave\n",
> +				  slave->link_new_state);
>  			continue;
>  		}
>  
> diff --git a/include/net/bonding.h b/include/net/bonding.h
> index f7fe456..d416af7 100644
> --- a/include/net/bonding.h
> +++ b/include/net/bonding.h
> @@ -159,7 +159,6 @@ struct slave {
>  	unsigned long target_last_arp_rx[BOND_MAX_ARP_TARGETS];
>  	s8     link;		/* one of BOND_LINK_XXXX */
>  	s8     link_new_state;	/* one of BOND_LINK_XXXX */
> -	s8     new_link;
>  	u8     backup:1,   /* indicates backup slave. Value corresponds with
>  			      BOND_STATE_ACTIVE and BOND_STATE_BACKUP */
>  	       inactive:1, /* indicates inactive slave */
> @@ -549,7 +548,7 @@ static inline void bond_propose_link_state(struct slave *slave, int state)
>  
>  static inline void bond_commit_link_state(struct slave *slave, bool notify)
>  {
> -	if (slave->link == slave->link_new_state)
> +	if (slave->link_new_state == BOND_LINK_NOCHANGE)
>  		return;
>  
>  	slave->link = slave->link_new_state;
> 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: OpenPGP digital signature
URL: <https://lists.ubuntu.com/archives/kernel-team/attachments/20191128/0a852be0/attachment-0001.sig>


More information about the kernel-team mailing list