diff options
Diffstat (limited to 'drivers/net/bonding')
| -rw-r--r-- | drivers/net/bonding/Makefile | 2 | ||||
| -rw-r--r-- | drivers/net/bonding/bond_3ad.c | 1200 | ||||
| -rw-r--r-- | drivers/net/bonding/bond_3ad.h | 176 | ||||
| -rw-r--r-- | drivers/net/bonding/bond_alb.c | 490 | ||||
| -rw-r--r-- | drivers/net/bonding/bond_alb.h | 17 | ||||
| -rw-r--r-- | drivers/net/bonding/bond_debugfs.c | 12 | ||||
| -rw-r--r-- | drivers/net/bonding/bond_main.c | 1992 | ||||
| -rw-r--r-- | drivers/net/bonding/bond_netlink.c | 573 | ||||
| -rw-r--r-- | drivers/net/bonding/bond_options.c | 1394 | ||||
| -rw-r--r-- | drivers/net/bonding/bond_options.h | 130 | ||||
| -rw-r--r-- | drivers/net/bonding/bond_procfs.c | 86 | ||||
| -rw-r--r-- | drivers/net/bonding/bond_sysfs.c | 1344 | ||||
| -rw-r--r-- | drivers/net/bonding/bond_sysfs_slave.c | 144 | ||||
| -rw-r--r-- | drivers/net/bonding/bonding.h | 353 | 
14 files changed, 4625 insertions, 3288 deletions
diff --git a/drivers/net/bonding/Makefile b/drivers/net/bonding/Makefile index 4c21bf6b8b2..6f4e80853ed 100644 --- a/drivers/net/bonding/Makefile +++ b/drivers/net/bonding/Makefile @@ -4,7 +4,7 @@  obj-$(CONFIG_BONDING) += bonding.o -bonding-objs := bond_main.o bond_3ad.o bond_alb.o bond_sysfs.o bond_debugfs.o +bonding-objs := bond_main.o bond_3ad.o bond_alb.o bond_sysfs.o bond_sysfs_slave.o bond_debugfs.o bond_netlink.o bond_options.o  proc-$(CONFIG_PROC_FS) += bond_procfs.o  bonding-objs += $(proc-y) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 0d8f427ade9..0dfeaf5da3f 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -34,14 +34,14 @@  #include "bonding.h"  #include "bond_3ad.h" -// General definitions +/* General definitions */  #define AD_SHORT_TIMEOUT           1  #define AD_LONG_TIMEOUT            0  #define AD_STANDBY                 0x2  #define AD_MAX_TX_IN_SECOND        3  #define AD_COLLECTOR_MAX_DELAY     0 -// Timer definitions(43.4.4 in the 802.3ad standard) +/* Timer definitions (43.4.4 in the 802.3ad standard) */  #define AD_FAST_PERIODIC_TIME      1  #define AD_SLOW_PERIODIC_TIME      30  #define AD_SHORT_TIMEOUT_TIME      (3*AD_FAST_PERIODIC_TIME) @@ -49,7 +49,7 @@  #define AD_CHURN_DETECTION_TIME    60  #define AD_AGGREGATE_WAIT_TIME     2 -// Port state definitions(43.4.2.2 in the 802.3ad standard) +/* Port state definitions (43.4.2.2 in the 802.3ad standard) */  #define AD_STATE_LACP_ACTIVITY   0x1  #define AD_STATE_LACP_TIMEOUT    0x2  #define AD_STATE_AGGREGATION     0x4 @@ -59,7 +59,9 @@  #define AD_STATE_DEFAULTED       0x40  #define AD_STATE_EXPIRED         0x80 -// Port Variables definitions used by the State Machines(43.4.7 in the 802.3ad standard) +/* Port Variables definitions used by the State Machines (43.4.7 in the + * 802.3ad standard) + */  #define AD_PORT_BEGIN           0x1  #define AD_PORT_LACP_ENABLED    0x2  #define AD_PORT_ACTOR_CHURN     0x4 @@ -71,27 +73,27 @@  #define AD_PORT_SELECTED        0x100  #define AD_PORT_MOVED           0x200 -// Port Key definitions -// key is determined according to the link speed, duplex and -// user key(which is yet not supported) -//              ------------------------------------------------------------ -// Port key :   | User key                       |      Speed       |Duplex| -//              ------------------------------------------------------------ -//              16                               6               1 0 +/* Port Key definitions + * key is determined according to the link speed, duplex and + * user key (which is yet not supported) + * -------------------------------------------------------------- + * Port key :	| User key	| Speed		| Duplex	| + * -------------------------------------------------------------- + * 16		  6		  1		  0 + */  #define  AD_DUPLEX_KEY_BITS    0x1  #define  AD_SPEED_KEY_BITS     0x3E  #define  AD_USER_KEY_BITS      0xFFC0 -//dalloun  #define     AD_LINK_SPEED_BITMASK_1MBPS       0x1  #define     AD_LINK_SPEED_BITMASK_10MBPS      0x2  #define     AD_LINK_SPEED_BITMASK_100MBPS     0x4  #define     AD_LINK_SPEED_BITMASK_1000MBPS    0x8  #define     AD_LINK_SPEED_BITMASK_10000MBPS   0x10 -//endalloun -// compare MAC addresses -#define MAC_ADDRESS_COMPARE(A, B) memcmp(A, B, ETH_ALEN) +/* compare MAC addresses */ +#define MAC_ADDRESS_EQUAL(A, B)	\ +	ether_addr_equal_64bits((const u8 *)A, (const u8 *)B)  static struct mac_addr null_mac_addr = { { 0, 0, 0, 0, 0, 0 } };  static u16 ad_ticks_per_sec; @@ -99,7 +101,7 @@ static const int ad_delta_in_ticks = (AD_TIMER_INTERVAL * HZ) / 1000;  static const u8 lacpdu_mcast_addr[ETH_ALEN] = MULTICAST_LACPDU_ADDR; -// ================= main 802.3ad protocol functions ================== +/* ================= main 802.3ad protocol functions ================== */  static int ad_lacpdu_send(struct port *port);  static int ad_marker_send(struct port *port, struct bond_marker *marker);  static void ad_mux_machine(struct port *port); @@ -113,13 +115,13 @@ static void ad_initialize_agg(struct aggregator *aggregator);  static void ad_initialize_port(struct port *port, int lacp_fast);  static void ad_enable_collecting_distributing(struct port *port);  static void ad_disable_collecting_distributing(struct port *port); -static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port); -static void ad_marker_response_received(struct bond_marker *marker, struct port *port); +static void ad_marker_info_received(struct bond_marker *marker_info, +				    struct port *port); +static void ad_marker_response_received(struct bond_marker *marker, +					struct port *port); -///////////////////////////////////////////////////////////////////////////////// -// ================= api to bonding and kernel code ================== -///////////////////////////////////////////////////////////////////////////////// +/* ================= api to bonding and kernel code ================== */  /**   * __get_bond_by_port - get the port's bonding struct @@ -136,87 +138,37 @@ static inline struct bonding *__get_bond_by_port(struct port *port)  }  /** - * __get_first_port - get the first port in the bond - * @bond: the bond we're looking at - * - * Return the port of the first slave in @bond, or %NULL if it can't be found. - */ -static inline struct port *__get_first_port(struct bonding *bond) -{ -	struct slave *first_slave = bond_first_slave(bond); - -	return first_slave ? &(SLAVE_AD_INFO(first_slave).port) : NULL; -} - -/** - * __get_next_port - get the next port in the bond - * @port: the port we're looking at - * - * Return the port of the slave that is next in line of @port's slave in the - * bond, or %NULL if it can't be found. - */ -static inline struct port *__get_next_port(struct port *port) -{ -	struct bonding *bond = __get_bond_by_port(port); -	struct slave *slave = port->slave, *slave_next; - -	// If there's no bond for this port, or this is the last slave -	if (bond == NULL) -		return NULL; -	slave_next = bond_next_slave(bond, slave); -	if (!slave_next || bond_is_first_slave(bond, slave_next)) -		return NULL; - -	return &(SLAVE_AD_INFO(slave_next).port); -} - -/**   * __get_first_agg - get the first aggregator in the bond   * @bond: the bond we're looking at   *   * Return the aggregator of the first slave in @bond, or %NULL if it can't be   * found. + * The caller must hold RCU or RTNL lock.   */  static inline struct aggregator *__get_first_agg(struct port *port)  {  	struct bonding *bond = __get_bond_by_port(port);  	struct slave *first_slave; +	struct aggregator *agg; -	// If there's no bond for this port, or bond has no slaves +	/* If there's no bond for this port, or bond has no slaves */  	if (bond == NULL)  		return NULL; -	first_slave = bond_first_slave(bond); -	return first_slave ? &(SLAVE_AD_INFO(first_slave).aggregator) : NULL; -} +	rcu_read_lock(); +	first_slave = bond_first_slave_rcu(bond); +	agg = first_slave ? &(SLAVE_AD_INFO(first_slave)->aggregator) : NULL; +	rcu_read_unlock(); -/** - * __get_next_agg - get the next aggregator in the bond - * @aggregator: the aggregator we're looking at - * - * Return the aggregator of the slave that is next in line of @aggregator's - * slave in the bond, or %NULL if it can't be found. - */ -static inline struct aggregator *__get_next_agg(struct aggregator *aggregator) -{ -	struct slave *slave = aggregator->slave, *slave_next; -	struct bonding *bond = bond_get_bond_by_slave(slave); - -	// If there's no bond for this aggregator, or this is the last slave -	if (bond == NULL) -		return NULL; -	slave_next = bond_next_slave(bond, slave); -	if (!slave_next || bond_is_first_slave(bond, slave_next)) -		return NULL; - -	return &(SLAVE_AD_INFO(slave_next).aggregator); +	return agg;  } -/* - * __agg_has_partner +/** + * __agg_has_partner - see if we have a partner + * @agg: the agregator we're looking at   *   * Return nonzero if aggregator has a partner (denoted by a non-zero ether - * address for the partner).  Return 0 if not. + * address for the partner). Return 0 if not.   */  static inline int __agg_has_partner(struct aggregator *agg)  { @@ -226,30 +178,27 @@ static inline int __agg_has_partner(struct aggregator *agg)  /**   * __disable_port - disable the port's slave   * @port: the port we're looking at - *   */  static inline void __disable_port(struct port *port)  { -	bond_set_slave_inactive_flags(port->slave); +	bond_set_slave_inactive_flags(port->slave, BOND_SLAVE_NOTIFY_LATER);  }  /**   * __enable_port - enable the port's slave, if it's up   * @port: the port we're looking at - *   */  static inline void __enable_port(struct port *port)  {  	struct slave *slave = port->slave; -	if ((slave->link == BOND_LINK_UP) && IS_UP(slave->dev)) -		bond_set_slave_active_flags(slave); +	if ((slave->link == BOND_LINK_UP) && bond_slave_is_up(slave)) +		bond_set_slave_active_flags(slave, BOND_SLAVE_NOTIFY_LATER);  }  /**   * __port_is_enabled - check if the port's slave is in active state   * @port: the port we're looking at - *   */  static inline int __port_is_enabled(struct port *port)  { @@ -275,7 +224,6 @@ static inline u32 __get_agg_selection_mode(struct port *port)  /**   * __check_agg_selection_timer - check if the selection timer has expired   * @port: the port we're looking at - *   */  static inline int __check_agg_selection_timer(struct port *port)  { @@ -290,21 +238,19 @@ static inline int __check_agg_selection_timer(struct port *port)  /**   * __get_state_machine_lock - lock the port's state machines   * @port: the port we're looking at - *   */  static inline void __get_state_machine_lock(struct port *port)  { -	spin_lock_bh(&(SLAVE_AD_INFO(port->slave).state_machine_lock)); +	spin_lock_bh(&(SLAVE_AD_INFO(port->slave)->state_machine_lock));  }  /**   * __release_state_machine_lock - unlock the port's state machines   * @port: the port we're looking at - *   */  static inline void __release_state_machine_lock(struct port *port)  { -	spin_unlock_bh(&(SLAVE_AD_INFO(port->slave).state_machine_lock)); +	spin_unlock_bh(&(SLAVE_AD_INFO(port->slave)->state_machine_lock));  }  /** @@ -323,10 +269,11 @@ static u16 __get_link_speed(struct port *port)  	struct slave *slave = port->slave;  	u16 speed; -	/* this if covers only a special case: when the configuration starts with -	 * link down, it sets the speed to 0. -	 * This is done in spite of the fact that the e100 driver reports 0 to be -	 * compatible with MVT in the future.*/ +	/* this if covers only a special case: when the configuration starts +	 * with link down, it sets the speed to 0. +	 * This is done in spite of the fact that the e100 driver reports 0 +	 * to be compatible with MVT in the future. +	 */  	if (slave->link != BOND_LINK_UP)  		speed = 0;  	else { @@ -348,7 +295,8 @@ static u16 __get_link_speed(struct port *port)  			break;  		default: -			speed = 0; // unknown speed value from ethtool. shouldn't happen +			/* unknown speed value from ethtool. shouldn't happen */ +			speed = 0;  			break;  		}  	} @@ -372,8 +320,9 @@ static u8 __get_duplex(struct port *port)  	u8 retval; -	//  handling a special case: when the configuration starts with -	// link down, it sets the duplex to 0. +	/* handling a special case: when the configuration starts with +	 * link down, it sets the duplex to 0. +	 */  	if (slave->link != BOND_LINK_UP)  		retval = 0x0;  	else { @@ -397,15 +346,14 @@ static u8 __get_duplex(struct port *port)  /**   * __initialize_port_locks - initialize a port's STATE machine spinlock   * @port: the slave of the port we're looking at - *   */  static inline void __initialize_port_locks(struct slave *slave)  { -	// make sure it isn't called twice -	spin_lock_init(&(SLAVE_AD_INFO(slave).state_machine_lock)); +	/* make sure it isn't called twice */ +	spin_lock_init(&(SLAVE_AD_INFO(slave)->state_machine_lock));  } -//conversions +/* Conversions */  /**   * __ad_timer_to_ticks - convert a given timer type to AD module ticks @@ -414,39 +362,38 @@ static inline void __initialize_port_locks(struct slave *slave)   *   * If @timer_type is %current_while_timer, @par indicates long/short timer.   * If @timer_type is %periodic_timer, @par is one of %FAST_PERIODIC_TIME, - *						    %SLOW_PERIODIC_TIME. + *						     %SLOW_PERIODIC_TIME.   */  static u16 __ad_timer_to_ticks(u16 timer_type, u16 par)  {  	u16 retval = 0; /* to silence the compiler */  	switch (timer_type) { -	case AD_CURRENT_WHILE_TIMER:   // for rx machine usage +	case AD_CURRENT_WHILE_TIMER:	/* for rx machine usage */  		if (par) -			retval = (AD_SHORT_TIMEOUT_TIME*ad_ticks_per_sec); // short timeout +			retval = (AD_SHORT_TIMEOUT_TIME*ad_ticks_per_sec);  		else -			retval = (AD_LONG_TIMEOUT_TIME*ad_ticks_per_sec); // long timeout +			retval = (AD_LONG_TIMEOUT_TIME*ad_ticks_per_sec);  		break; -	case AD_ACTOR_CHURN_TIMER:	    // for local churn machine +	case AD_ACTOR_CHURN_TIMER:	/* for local churn machine */  		retval = (AD_CHURN_DETECTION_TIME*ad_ticks_per_sec);  		break; -	case AD_PERIODIC_TIMER:	    // for periodic machine -		retval = (par*ad_ticks_per_sec); // long timeout +	case AD_PERIODIC_TIMER:		/* for periodic machine */ +		retval = (par*ad_ticks_per_sec); /* long timeout */  		break; -	case AD_PARTNER_CHURN_TIMER:   // for remote churn machine +	case AD_PARTNER_CHURN_TIMER:	/* for remote churn machine */  		retval = (AD_CHURN_DETECTION_TIME*ad_ticks_per_sec);  		break; -	case AD_WAIT_WHILE_TIMER:	    // for selection machine +	case AD_WAIT_WHILE_TIMER:	/* for selection machine */  		retval = (AD_AGGREGATE_WAIT_TIME*ad_ticks_per_sec);  		break;  	} +  	return retval;  } -///////////////////////////////////////////////////////////////////////////////// -// ================= ad_rx_machine helper functions ================== -///////////////////////////////////////////////////////////////////////////////// +/* ================= ad_rx_machine helper functions ================== */  /**   * __choose_matched - update a port's matched variable from a received lacpdu @@ -473,17 +420,18 @@ static u16 __ad_timer_to_ticks(u16 timer_type, u16 par)   */  static void __choose_matched(struct lacpdu *lacpdu, struct port *port)  { -	// check if all parameters are alike +	/* check if all parameters are alike +	 * or this is individual link(aggregation == FALSE) +	 * then update the state machine Matched variable. +	 */  	if (((ntohs(lacpdu->partner_port) == port->actor_port_number) &&  	     (ntohs(lacpdu->partner_port_priority) == port->actor_port_priority) && -	     !MAC_ADDRESS_COMPARE(&(lacpdu->partner_system), &(port->actor_system)) && +	     MAC_ADDRESS_EQUAL(&(lacpdu->partner_system), &(port->actor_system)) &&  	     (ntohs(lacpdu->partner_system_priority) == port->actor_system_priority) &&  	     (ntohs(lacpdu->partner_key) == port->actor_oper_port_key) &&  	     ((lacpdu->partner_state & AD_STATE_AGGREGATION) == (port->actor_oper_port_state & AD_STATE_AGGREGATION))) || -	    // or this is individual link(aggregation == FALSE)  	    ((lacpdu->actor_state & AD_STATE_AGGREGATION) == 0)  		) { -		// update the state machine Matched variable  		port->sm_vars |= AD_PORT_MATCHED;  	} else {  		port->sm_vars &= ~AD_PORT_MATCHED; @@ -505,7 +453,9 @@ static void __record_pdu(struct lacpdu *lacpdu, struct port *port)  		struct port_params *partner = &port->partner_oper;  		__choose_matched(lacpdu, port); -		// record the new parameter values for the partner operational +		/* record the new parameter values for the partner +		 * operational +		 */  		partner->port_number = ntohs(lacpdu->actor_port);  		partner->port_priority = ntohs(lacpdu->actor_port_priority);  		partner->system = lacpdu->actor_system; @@ -513,10 +463,12 @@ static void __record_pdu(struct lacpdu *lacpdu, struct port *port)  		partner->key = ntohs(lacpdu->actor_key);  		partner->port_state = lacpdu->actor_state; -		// set actor_oper_port_state.defaulted to FALSE +		/* set actor_oper_port_state.defaulted to FALSE */  		port->actor_oper_port_state &= ~AD_STATE_DEFAULTED; -		// set the partner sync. to on if the partner is sync. and the port is matched +		/* set the partner sync. to on if the partner is sync, +		 * and the port is matched +		 */  		if ((port->sm_vars & AD_PORT_MATCHED)  		    && (lacpdu->actor_state & AD_STATE_SYNCHRONIZATION))  			partner->port_state |= AD_STATE_SYNCHRONIZATION; @@ -536,11 +488,11 @@ static void __record_pdu(struct lacpdu *lacpdu, struct port *port)  static void __record_default(struct port *port)  {  	if (port) { -		// record the partner admin parameters +		/* record the partner admin parameters */  		memcpy(&port->partner_oper, &port->partner_admin,  		       sizeof(struct port_params)); -		// set actor_oper_port_state.defaulted to true +		/* set actor_oper_port_state.defaulted to true */  		port->actor_oper_port_state |= AD_STATE_DEFAULTED;  	}  } @@ -563,14 +515,15 @@ static void __update_selected(struct lacpdu *lacpdu, struct port *port)  	if (lacpdu && port) {  		const struct port_params *partner = &port->partner_oper; -		// check if any parameter is different +		/* check if any parameter is different then +		 * update the state machine selected variable. +		 */  		if (ntohs(lacpdu->actor_port) != partner->port_number ||  		    ntohs(lacpdu->actor_port_priority) != partner->port_priority || -		    MAC_ADDRESS_COMPARE(&lacpdu->actor_system, &partner->system) || +		    !MAC_ADDRESS_EQUAL(&lacpdu->actor_system, &partner->system) ||  		    ntohs(lacpdu->actor_system_priority) != partner->system_priority ||  		    ntohs(lacpdu->actor_key) != partner->key ||  		    (lacpdu->actor_state & AD_STATE_AGGREGATION) != (partner->port_state & AD_STATE_AGGREGATION)) { -			// update the state machine Selected variable  			port->sm_vars &= ~AD_PORT_SELECTED;  		}  	} @@ -594,15 +547,16 @@ static void __update_default_selected(struct port *port)  		const struct port_params *admin = &port->partner_admin;  		const struct port_params *oper = &port->partner_oper; -		// check if any parameter is different +		/* check if any parameter is different then +		 * update the state machine selected variable. +		 */  		if (admin->port_number != oper->port_number ||  		    admin->port_priority != oper->port_priority || -		    MAC_ADDRESS_COMPARE(&admin->system, &oper->system) || +		    !MAC_ADDRESS_EQUAL(&admin->system, &oper->system) ||  		    admin->system_priority != oper->system_priority ||  		    admin->key != oper->key ||  		    (admin->port_state & AD_STATE_AGGREGATION)  			!= (oper->port_state & AD_STATE_AGGREGATION)) { -			// update the state machine Selected variable  			port->sm_vars &= ~AD_PORT_SELECTED;  		}  	} @@ -622,12 +576,14 @@ static void __update_default_selected(struct port *port)   */  static void __update_ntt(struct lacpdu *lacpdu, struct port *port)  { -	// validate lacpdu and port +	/* validate lacpdu and port */  	if (lacpdu && port) { -		// check if any parameter is different +		/* check if any parameter is different then +		 * update the port->ntt. +		 */  		if ((ntohs(lacpdu->partner_port) != port->actor_port_number) ||  		    (ntohs(lacpdu->partner_port_priority) != port->actor_port_priority) || -		    MAC_ADDRESS_COMPARE(&(lacpdu->partner_system), &(port->actor_system)) || +		    !MAC_ADDRESS_EQUAL(&(lacpdu->partner_system), &(port->actor_system)) ||  		    (ntohs(lacpdu->partner_system_priority) != port->actor_system_priority) ||  		    (ntohs(lacpdu->partner_key) != port->actor_oper_port_key) ||  		    ((lacpdu->partner_state & AD_STATE_LACP_ACTIVITY) != (port->actor_oper_port_state & AD_STATE_LACP_ACTIVITY)) || @@ -635,43 +591,12 @@ static void __update_ntt(struct lacpdu *lacpdu, struct port *port)  		    ((lacpdu->partner_state & AD_STATE_SYNCHRONIZATION) != (port->actor_oper_port_state & AD_STATE_SYNCHRONIZATION)) ||  		    ((lacpdu->partner_state & AD_STATE_AGGREGATION) != (port->actor_oper_port_state & AD_STATE_AGGREGATION))  		   ) { -  			port->ntt = true;  		}  	}  }  /** - * __attach_bond_to_agg - * @port: the port we're looking at - * - * Handle the attaching of the port's control parser/multiplexer and the - * aggregator. This function does nothing since the parser/multiplexer of the - * receive and the parser/multiplexer of the aggregator are already combined. - */ -static void __attach_bond_to_agg(struct port *port) -{ -	port = NULL; /* just to satisfy the compiler */ -	// This function does nothing since the parser/multiplexer of the receive -	// and the parser/multiplexer of the aggregator are already combined -} - -/** - * __detach_bond_from_agg - * @port: the port we're looking at - * - * Handle the detaching of the port's control parser/multiplexer from the - * aggregator. This function does nothing since the parser/multiplexer of the - * receive and the parser/multiplexer of the aggregator are already combined. - */ -static void __detach_bond_from_agg(struct port *port) -{ -	port = NULL; /* just to satisfy the compiler */ -	// This function does nothing since the parser/multiplexer of the receive -	// and the parser/multiplexer of the aggregator are already combined -} - -/**   * __agg_ports_are_ready - check if all ports in an aggregator are ready   * @aggregator: the aggregator we're looking at   * @@ -682,7 +607,9 @@ static int __agg_ports_are_ready(struct aggregator *aggregator)  	int retval = 1;  	if (aggregator) { -		// scan all ports in this aggregator to verfy if they are all ready +		/* scan all ports in this aggregator to verfy if they are +		 * all ready. +		 */  		for (port = aggregator->lag_ports;  		     port;  		     port = port->next_port_in_aggregator) { @@ -742,7 +669,7 @@ static u32 __get_agg_bandwidth(struct aggregator *aggregator)  			bandwidth = aggregator->num_of_ports * 10000;  			break;  		default: -			bandwidth = 0; /*to silence the compiler ....*/ +			bandwidth = 0; /* to silence the compiler */  		}  	}  	return bandwidth; @@ -752,33 +679,32 @@ static u32 __get_agg_bandwidth(struct aggregator *aggregator)   * __get_active_agg - get the current active aggregator   * @aggregator: the aggregator we're looking at   * + * Caller must hold RCU lock.   */  static struct aggregator *__get_active_agg(struct aggregator *aggregator)  { -	struct aggregator *retval = NULL; +	struct bonding *bond = aggregator->slave->bond; +	struct list_head *iter; +	struct slave *slave; -	for (; aggregator; aggregator = __get_next_agg(aggregator)) { -		if (aggregator->is_active) { -			retval = aggregator; -			break; -		} -	} +	bond_for_each_slave_rcu(bond, slave, iter) +		if (SLAVE_AD_INFO(slave)->aggregator.is_active) +			return &(SLAVE_AD_INFO(slave)->aggregator); -	return retval; +	return NULL;  }  /**   * __update_lacpdu_from_port - update a port's lacpdu fields   * @port: the port we're looking at - *   */  static inline void __update_lacpdu_from_port(struct port *port)  {  	struct lacpdu *lacpdu = &port->lacpdu;  	const struct port_params *partner = &port->partner_oper; -	/* update current actual Actor parameters */ -	/* lacpdu->subtype                   initialized +	/* update current actual Actor parameters +	 * lacpdu->subtype                   initialized  	 * lacpdu->version_number            initialized  	 * lacpdu->tlv_type_actor_info       initialized  	 * lacpdu->actor_information_length  initialized @@ -814,9 +740,7 @@ static inline void __update_lacpdu_from_port(struct port *port)  	 */  } -////////////////////////////////////////////////////////////////////////////////////// -// ================= main 802.3ad protocol code ====================================== -////////////////////////////////////////////////////////////////////////////////////// +/* ================= main 802.3ad protocol code ========================= */  /**   * ad_lacpdu_send - send out a lacpdu packet on a given port @@ -844,13 +768,14 @@ static int ad_lacpdu_send(struct port *port)  	lacpdu_header = (struct lacpdu_header *)skb_put(skb, length); -	memcpy(lacpdu_header->hdr.h_dest, lacpdu_mcast_addr, ETH_ALEN); +	ether_addr_copy(lacpdu_header->hdr.h_dest, lacpdu_mcast_addr);  	/* Note: source address is set to be the member's PERMANENT address, -	   because we use it to identify loopback lacpdus in receive. */ -	memcpy(lacpdu_header->hdr.h_source, slave->perm_hwaddr, ETH_ALEN); +	 * because we use it to identify loopback lacpdus in receive. +	 */ +	ether_addr_copy(lacpdu_header->hdr.h_source, slave->perm_hwaddr);  	lacpdu_header->hdr.h_proto = PKT_TYPE_LACPDU; -	lacpdu_header->lacpdu = port->lacpdu; // struct copy +	lacpdu_header->lacpdu = port->lacpdu;  	dev_queue_xmit(skb); @@ -885,13 +810,14 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker)  	marker_header = (struct bond_marker_header *)skb_put(skb, length); -	memcpy(marker_header->hdr.h_dest, lacpdu_mcast_addr, ETH_ALEN); +	ether_addr_copy(marker_header->hdr.h_dest, lacpdu_mcast_addr);  	/* Note: source address is set to be the member's PERMANENT address, -	   because we use it to identify loopback MARKERs in receive. */ -	memcpy(marker_header->hdr.h_source, slave->perm_hwaddr, ETH_ALEN); +	 * because we use it to identify loopback MARKERs in receive. +	 */ +	ether_addr_copy(marker_header->hdr.h_source, slave->perm_hwaddr);  	marker_header->hdr.h_proto = PKT_TYPE_LACPDU; -	marker_header->marker = *marker; // struct copy +	marker_header->marker = *marker;  	dev_queue_xmit(skb); @@ -901,72 +827,90 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker)  /**   * ad_mux_machine - handle a port's mux state machine   * @port: the port we're looking at - *   */  static void ad_mux_machine(struct port *port)  {  	mux_states_t last_state; -	// keep current State Machine state to compare later if it was changed +	/* keep current State Machine state to compare later if it was +	 * changed +	 */  	last_state = port->sm_mux_state;  	if (port->sm_vars & AD_PORT_BEGIN) { -		port->sm_mux_state = AD_MUX_DETACHED;		 // next state +		port->sm_mux_state = AD_MUX_DETACHED;  	} else {  		switch (port->sm_mux_state) {  		case AD_MUX_DETACHED:  			if ((port->sm_vars & AD_PORT_SELECTED)  			    || (port->sm_vars & AD_PORT_STANDBY))  				/* if SELECTED or STANDBY */ -				port->sm_mux_state = AD_MUX_WAITING; // next state +				port->sm_mux_state = AD_MUX_WAITING;  			break;  		case AD_MUX_WAITING: -			// if SELECTED == FALSE return to DETACH state -			if (!(port->sm_vars & AD_PORT_SELECTED)) { // if UNSELECTED +			/* if SELECTED == FALSE return to DETACH state */ +			if (!(port->sm_vars & AD_PORT_SELECTED)) {  				port->sm_vars &= ~AD_PORT_READY_N; -				// in order to withhold the Selection Logic to check all ports READY_N value -				// every callback cycle to update ready variable, we check READY_N and update READY here +				/* in order to withhold the Selection Logic to +				 * check all ports READY_N value every callback +				 * cycle to update ready variable, we check +				 * READY_N and update READY here +				 */  				__set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); -				port->sm_mux_state = AD_MUX_DETACHED;	 // next state +				port->sm_mux_state = AD_MUX_DETACHED;  				break;  			} -			// check if the wait_while_timer expired +			/* check if the wait_while_timer expired */  			if (port->sm_mux_timer_counter  			    && !(--port->sm_mux_timer_counter))  				port->sm_vars |= AD_PORT_READY_N; -			// in order to withhold the selection logic to check all ports READY_N value -			// every callback cycle to update ready variable, we check READY_N and update READY here +			/* in order to withhold the selection logic to check +			 * all ports READY_N value every callback cycle to +			 * update ready variable, we check READY_N and update +			 * READY here +			 */  			__set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); -			// if the wait_while_timer expired, and the port is in READY state, move to ATTACHED state +			/* if the wait_while_timer expired, and the port is +			 * in READY state, move to ATTACHED state +			 */  			if ((port->sm_vars & AD_PORT_READY)  			    && !port->sm_mux_timer_counter) -				port->sm_mux_state = AD_MUX_ATTACHED;	 // next state +				port->sm_mux_state = AD_MUX_ATTACHED;  			break;  		case AD_MUX_ATTACHED: -			// check also if agg_select_timer expired(so the edable port will take place only after this timer) -			if ((port->sm_vars & AD_PORT_SELECTED) && (port->partner_oper.port_state & AD_STATE_SYNCHRONIZATION) && !__check_agg_selection_timer(port)) { -				port->sm_mux_state = AD_MUX_COLLECTING_DISTRIBUTING;// next state -			} else if (!(port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY)) {	  // if UNSELECTED or STANDBY +			/* check also if agg_select_timer expired (so the +			 * edable port will take place only after this timer) +			 */ +			if ((port->sm_vars & AD_PORT_SELECTED) && +			    (port->partner_oper.port_state & AD_STATE_SYNCHRONIZATION) && +			    !__check_agg_selection_timer(port)) { +				port->sm_mux_state = AD_MUX_COLLECTING_DISTRIBUTING; +			} else if (!(port->sm_vars & AD_PORT_SELECTED) || +				   (port->sm_vars & AD_PORT_STANDBY)) { +				/* if UNSELECTED or STANDBY */  				port->sm_vars &= ~AD_PORT_READY_N; -				// in order to withhold the selection logic to check all ports READY_N value -				// every callback cycle to update ready variable, we check READY_N and update READY here +				/* in order to withhold the selection logic to +				 * check all ports READY_N value every callback +				 * cycle to update ready variable, we check +				 * READY_N and update READY here +				 */  				__set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); -				port->sm_mux_state = AD_MUX_DETACHED;// next state +				port->sm_mux_state = AD_MUX_DETACHED;  			}  			break;  		case AD_MUX_COLLECTING_DISTRIBUTING: -			if (!(port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY) || -			    !(port->partner_oper.port_state & AD_STATE_SYNCHRONIZATION) -			   ) { -				port->sm_mux_state = AD_MUX_ATTACHED;// next state - +			if (!(port->sm_vars & AD_PORT_SELECTED) || +			    (port->sm_vars & AD_PORT_STANDBY) || +			    !(port->partner_oper.port_state & AD_STATE_SYNCHRONIZATION)) { +				port->sm_mux_state = AD_MUX_ATTACHED;  			} else { -				// if port state hasn't changed make -				// sure that a collecting distributing -				// port in an active aggregator is enabled +				/* if port state hasn't changed make +				 * sure that a collecting distributing +				 * port in an active aggregator is enabled +				 */  				if (port->aggregator &&  				    port->aggregator->is_active &&  				    !__port_is_enabled(port)) { @@ -975,19 +919,18 @@ static void ad_mux_machine(struct port *port)  				}  			}  			break; -		default:    //to silence the compiler +		default:  			break;  		}  	} -	// check if the state machine was changed +	/* check if the state machine was changed */  	if (port->sm_mux_state != last_state) {  		pr_debug("Mux Machine: Port=%d, Last State=%d, Curr State=%d\n",  			 port->actor_port_number, last_state,  			 port->sm_mux_state);  		switch (port->sm_mux_state) {  		case AD_MUX_DETACHED: -			__detach_bond_from_agg(port);  			port->actor_oper_port_state &= ~AD_STATE_SYNCHRONIZATION;  			ad_disable_collecting_distributing(port);  			port->actor_oper_port_state &= ~AD_STATE_COLLECTING; @@ -998,7 +941,6 @@ static void ad_mux_machine(struct port *port)  			port->sm_mux_timer_counter = __ad_timer_to_ticks(AD_WAIT_WHILE_TIMER, 0);  			break;  		case AD_MUX_ATTACHED: -			__attach_bond_to_agg(port);  			port->actor_oper_port_state |= AD_STATE_SYNCHRONIZATION;  			port->actor_oper_port_state &= ~AD_STATE_COLLECTING;  			port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING; @@ -1011,7 +953,7 @@ static void ad_mux_machine(struct port *port)  			ad_enable_collecting_distributing(port);  			port->ntt = true;  			break; -		default:    //to silence the compiler +		default:  			break;  		}  	} @@ -1030,59 +972,63 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)  {  	rx_states_t last_state; -	// keep current State Machine state to compare later if it was changed +	/* keep current State Machine state to compare later if it was +	 * changed +	 */  	last_state = port->sm_rx_state; -	// check if state machine should change state -	// first, check if port was reinitialized +	/* check if state machine should change state */ + +	/* first, check if port was reinitialized */  	if (port->sm_vars & AD_PORT_BEGIN) -		/* next state */  		port->sm_rx_state = AD_RX_INITIALIZE; -	// check if port is not enabled +	/* check if port is not enabled */  	else if (!(port->sm_vars & AD_PORT_BEGIN)  		 && !port->is_enabled && !(port->sm_vars & AD_PORT_MOVED)) -		/* next state */  		port->sm_rx_state = AD_RX_PORT_DISABLED; -	// check if new lacpdu arrived -	else if (lacpdu && ((port->sm_rx_state == AD_RX_EXPIRED) || (port->sm_rx_state == AD_RX_DEFAULTED) || (port->sm_rx_state == AD_RX_CURRENT))) { -		port->sm_rx_timer_counter = 0; // zero timer +	/* check if new lacpdu arrived */ +	else if (lacpdu && ((port->sm_rx_state == AD_RX_EXPIRED) || +		 (port->sm_rx_state == AD_RX_DEFAULTED) || +		 (port->sm_rx_state == AD_RX_CURRENT))) { +		port->sm_rx_timer_counter = 0;  		port->sm_rx_state = AD_RX_CURRENT;  	} else { -		// if timer is on, and if it is expired -		if (port->sm_rx_timer_counter && !(--port->sm_rx_timer_counter)) { +		/* if timer is on, and if it is expired */ +		if (port->sm_rx_timer_counter && +		    !(--port->sm_rx_timer_counter)) {  			switch (port->sm_rx_state) {  			case AD_RX_EXPIRED: -				port->sm_rx_state = AD_RX_DEFAULTED;		// next state +				port->sm_rx_state = AD_RX_DEFAULTED;  				break;  			case AD_RX_CURRENT: -				port->sm_rx_state = AD_RX_EXPIRED;	    // next state +				port->sm_rx_state = AD_RX_EXPIRED;  				break; -			default:    //to silence the compiler +			default:  				break;  			}  		} else { -			// if no lacpdu arrived and no timer is on +			/* if no lacpdu arrived and no timer is on */  			switch (port->sm_rx_state) {  			case AD_RX_PORT_DISABLED:  				if (port->sm_vars & AD_PORT_MOVED) -					port->sm_rx_state = AD_RX_INITIALIZE;	    // next state +					port->sm_rx_state = AD_RX_INITIALIZE;  				else if (port->is_enabled  					 && (port->sm_vars  					     & AD_PORT_LACP_ENABLED)) -					port->sm_rx_state = AD_RX_EXPIRED;	// next state +					port->sm_rx_state = AD_RX_EXPIRED;  				else if (port->is_enabled  					 && ((port->sm_vars  					      & AD_PORT_LACP_ENABLED) == 0)) -					port->sm_rx_state = AD_RX_LACP_DISABLED;    // next state +					port->sm_rx_state = AD_RX_LACP_DISABLED;  				break; -			default:    //to silence the compiler +			default:  				break;  			}  		}  	} -	// check if the State machine was changed or new lacpdu arrived +	/* check if the State machine was changed or new lacpdu arrived */  	if ((port->sm_rx_state != last_state) || (lacpdu)) {  		pr_debug("Rx Machine: Port=%d, Last State=%d, Curr State=%d\n",  			 port->actor_port_number, last_state, @@ -1097,10 +1043,9 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)  			__record_default(port);  			port->actor_oper_port_state &= ~AD_STATE_EXPIRED;  			port->sm_vars &= ~AD_PORT_MOVED; -			port->sm_rx_state = AD_RX_PORT_DISABLED;	// next state - -			/*- Fall Through -*/ +			port->sm_rx_state = AD_RX_PORT_DISABLED; +			/* Fall Through */  		case AD_RX_PORT_DISABLED:  			port->sm_vars &= ~AD_PORT_MATCHED;  			break; @@ -1112,13 +1057,15 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)  			port->actor_oper_port_state &= ~AD_STATE_EXPIRED;  			break;  		case AD_RX_EXPIRED: -			//Reset of the Synchronization flag. (Standard 43.4.12) -			//This reset cause to disable this port in the COLLECTING_DISTRIBUTING state of the -			//mux machine in case of EXPIRED even if LINK_DOWN didn't arrive for the port. +			/* Reset of the Synchronization flag (Standard 43.4.12) +			 * This reset cause to disable this port in the +			 * COLLECTING_DISTRIBUTING state of the mux machine in +			 * case of EXPIRED even if LINK_DOWN didn't arrive for +			 * the port. +			 */  			port->partner_oper.port_state &= ~AD_STATE_SYNCHRONIZATION;  			port->sm_vars &= ~AD_PORT_MATCHED; -			port->partner_oper.port_state |= -				AD_STATE_LACP_ACTIVITY; +			port->partner_oper.port_state |= AD_STATE_LACP_ACTIVITY;  			port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(AD_SHORT_TIMEOUT));  			port->actor_oper_port_state |= AD_STATE_EXPIRED;  			break; @@ -1129,12 +1076,13 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)  			port->actor_oper_port_state &= ~AD_STATE_EXPIRED;  			break;  		case AD_RX_CURRENT: -			// detect loopback situation -			if (!MAC_ADDRESS_COMPARE(&(lacpdu->actor_system), &(port->actor_system))) { -				// INFO_RECEIVED_LOOPBACK_FRAMES -				pr_err("%s: An illegal loopback occurred on adapter (%s).\n" +			/* detect loopback situation */ +			if (MAC_ADDRESS_EQUAL(&(lacpdu->actor_system), +					      &(port->actor_system))) { +				pr_err("%s: An illegal loopback occurred on adapter (%s)\n"  				       "Check the configuration to verify that all adapters are connected to 802.3ad compliant switch ports\n", -				       port->slave->bond->dev->name, port->slave->dev->name); +				       port->slave->bond->dev->name, +				       port->slave->dev->name);  				return;  			}  			__update_selected(lacpdu, port); @@ -1143,7 +1091,7 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)  			port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(port->actor_oper_port_state & AD_STATE_LACP_TIMEOUT));  			port->actor_oper_port_state &= ~AD_STATE_EXPIRED;  			break; -		default:    //to silence the compiler +		default:  			break;  		}  	} @@ -1152,13 +1100,14 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)  /**   * ad_tx_machine - handle a port's tx state machine   * @port: the port we're looking at - *   */  static void ad_tx_machine(struct port *port)  { -	// check if tx timer expired, to verify that we do not send more than 3 packets per second +	/* check if tx timer expired, to verify that we do not send more than +	 * 3 packets per second +	 */  	if (port->sm_tx_timer_counter && !(--port->sm_tx_timer_counter)) { -		// check if there is something to send +		/* check if there is something to send */  		if (port->ntt && (port->sm_vars & AD_PORT_LACP_ENABLED)) {  			__update_lacpdu_from_port(port); @@ -1166,14 +1115,16 @@ static void ad_tx_machine(struct port *port)  				pr_debug("Sent LACPDU on port %d\n",  					 port->actor_port_number); -				/* mark ntt as false, so it will not be sent again until -				   demanded */ +				/* mark ntt as false, so it will not be sent +				 * again until demanded +				 */  				port->ntt = false;  			}  		} -		// restart tx timer(to verify that we will not exceed AD_MAX_TX_IN_SECOND -		port->sm_tx_timer_counter = -			ad_ticks_per_sec/AD_MAX_TX_IN_SECOND; +		/* restart tx timer(to verify that we will not exceed +		 * AD_MAX_TX_IN_SECOND +		 */ +		port->sm_tx_timer_counter = ad_ticks_per_sec/AD_MAX_TX_IN_SECOND;  	}  } @@ -1187,76 +1138,79 @@ static void ad_periodic_machine(struct port *port)  {  	periodic_states_t last_state; -	// keep current state machine state to compare later if it was changed +	/* keep current state machine state to compare later if it was changed */  	last_state = port->sm_periodic_state; -	// check if port was reinitialized +	/* check if port was reinitialized */  	if (((port->sm_vars & AD_PORT_BEGIN) || !(port->sm_vars & AD_PORT_LACP_ENABLED) || !port->is_enabled) ||  	    (!(port->actor_oper_port_state & AD_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & AD_STATE_LACP_ACTIVITY))  	   ) { -		port->sm_periodic_state = AD_NO_PERIODIC;	     // next state +		port->sm_periodic_state = AD_NO_PERIODIC;  	} -	// check if state machine should change state +	/* check if state machine should change state */  	else if (port->sm_periodic_timer_counter) { -		// check if periodic state machine expired +		/* check if periodic state machine expired */  		if (!(--port->sm_periodic_timer_counter)) { -			// if expired then do tx -			port->sm_periodic_state = AD_PERIODIC_TX;    // next state +			/* if expired then do tx */ +			port->sm_periodic_state = AD_PERIODIC_TX;  		} else { -			// If not expired, check if there is some new timeout parameter from the partner state +			/* If not expired, check if there is some new timeout +			 * parameter from the partner state +			 */  			switch (port->sm_periodic_state) {  			case AD_FAST_PERIODIC:  				if (!(port->partner_oper.port_state  				      & AD_STATE_LACP_TIMEOUT)) -					port->sm_periodic_state = AD_SLOW_PERIODIC;  // next state +					port->sm_periodic_state = AD_SLOW_PERIODIC;  				break;  			case AD_SLOW_PERIODIC:  				if ((port->partner_oper.port_state & AD_STATE_LACP_TIMEOUT)) { -					// stop current timer  					port->sm_periodic_timer_counter = 0; -					port->sm_periodic_state = AD_PERIODIC_TX;	 // next state +					port->sm_periodic_state = AD_PERIODIC_TX;  				}  				break; -			default:    //to silence the compiler +			default:  				break;  			}  		}  	} else {  		switch (port->sm_periodic_state) {  		case AD_NO_PERIODIC: -			port->sm_periodic_state = AD_FAST_PERIODIC;	 // next state +			port->sm_periodic_state = AD_FAST_PERIODIC;  			break;  		case AD_PERIODIC_TX: -			if (!(port->partner_oper.port_state -			      & AD_STATE_LACP_TIMEOUT)) -				port->sm_periodic_state = AD_SLOW_PERIODIC;  // next state +			if (!(port->partner_oper.port_state & +			    AD_STATE_LACP_TIMEOUT)) +				port->sm_periodic_state = AD_SLOW_PERIODIC;  			else -				port->sm_periodic_state = AD_FAST_PERIODIC;  // next state +				port->sm_periodic_state = AD_FAST_PERIODIC;  			break; -		default:    //to silence the compiler +		default:  			break;  		}  	} -	// check if the state machine was changed +	/* check if the state machine was changed */  	if (port->sm_periodic_state != last_state) {  		pr_debug("Periodic Machine: Port=%d, Last State=%d, Curr State=%d\n",  			 port->actor_port_number, last_state,  			 port->sm_periodic_state);  		switch (port->sm_periodic_state) {  		case AD_NO_PERIODIC: -			port->sm_periodic_timer_counter = 0;	   // zero timer +			port->sm_periodic_timer_counter = 0;  			break;  		case AD_FAST_PERIODIC: -			port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_FAST_PERIODIC_TIME))-1; // decrement 1 tick we lost in the PERIODIC_TX cycle +			/* decrement 1 tick we lost in the PERIODIC_TX cycle */ +			port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_FAST_PERIODIC_TIME))-1;  			break;  		case AD_SLOW_PERIODIC: -			port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_SLOW_PERIODIC_TIME))-1; // decrement 1 tick we lost in the PERIODIC_TX cycle +			/* decrement 1 tick we lost in the PERIODIC_TX cycle */ +			port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_SLOW_PERIODIC_TIME))-1;  			break;  		case AD_PERIODIC_TX:  			port->ntt = true;  			break; -		default:    //to silence the compiler +		default:  			break;  		}  	} @@ -1274,30 +1228,43 @@ static void ad_port_selection_logic(struct port *port)  {  	struct aggregator *aggregator, *free_aggregator = NULL, *temp_aggregator;  	struct port *last_port = NULL, *curr_port; +	struct list_head *iter; +	struct bonding *bond; +	struct slave *slave;  	int found = 0; -	// if the port is already Selected, do nothing +	/* if the port is already Selected, do nothing */  	if (port->sm_vars & AD_PORT_SELECTED)  		return; -	// if the port is connected to other aggregator, detach it +	bond = __get_bond_by_port(port); + +	/* if the port is connected to other aggregator, detach it */  	if (port->aggregator) { -		// detach the port from its former aggregator +		/* detach the port from its former aggregator */  		temp_aggregator = port->aggregator;  		for (curr_port = temp_aggregator->lag_ports; curr_port;  		     last_port = curr_port, -			     curr_port = curr_port->next_port_in_aggregator) { +		     curr_port = curr_port->next_port_in_aggregator) {  			if (curr_port == port) {  				temp_aggregator->num_of_ports--; -				if (!last_port) {// if it is the first port attached to the aggregator +				/* if it is the first port attached to the +				 * aggregator +				 */ +				if (!last_port) {  					temp_aggregator->lag_ports =  						port->next_port_in_aggregator; -				} else {// not the first port attached to the aggregator +				} else { +					/* not the first port attached to the +					 * aggregator +					 */  					last_port->next_port_in_aggregator =  						port->next_port_in_aggregator;  				} -				// clear the port's relations to this aggregator +				/* clear the port's relations to this +				 * aggregator +				 */  				port->aggregator = NULL;  				port->next_port_in_aggregator = NULL;  				port->actor_port_aggregator_identifier = 0; @@ -1305,41 +1272,46 @@ static void ad_port_selection_logic(struct port *port)  				pr_debug("Port %d left LAG %d\n",  					 port->actor_port_number,  					 temp_aggregator->aggregator_identifier); -				// if the aggregator is empty, clear its parameters, and set it ready to be attached +				/* if the aggregator is empty, clear its +				 * parameters, and set it ready to be attached +				 */  				if (!temp_aggregator->lag_ports)  					ad_clear_agg(temp_aggregator);  				break;  			}  		} -		if (!curr_port) { // meaning: the port was related to an aggregator but was not on the aggregator port list -			pr_warning("%s: Warning: Port %d (on %s) was related to aggregator %d but was not on its port list\n", -				   port->slave->bond->dev->name, -				   port->actor_port_number, -				   port->slave->dev->name, -				   port->aggregator->aggregator_identifier); +		if (!curr_port) { +			/* meaning: the port was related to an aggregator +			 * but was not on the aggregator port list +			 */ +			pr_warn_ratelimited("%s: Warning: Port %d (on %s) was related to aggregator %d but was not on its port list\n", +					    port->slave->bond->dev->name, +					    port->actor_port_number, +					    port->slave->dev->name, +					    port->aggregator->aggregator_identifier);  		}  	} -	// search on all aggregators for a suitable aggregator for this port -	for (aggregator = __get_first_agg(port); aggregator; -	     aggregator = __get_next_agg(aggregator)) { +	/* search on all aggregators for a suitable aggregator for this port */ +	bond_for_each_slave(bond, slave, iter) { +		aggregator = &(SLAVE_AD_INFO(slave)->aggregator); -		// keep a free aggregator for later use(if needed) +		/* keep a free aggregator for later use(if needed) */  		if (!aggregator->lag_ports) {  			if (!free_aggregator)  				free_aggregator = aggregator;  			continue;  		} -		// check if current aggregator suits us -		if (((aggregator->actor_oper_aggregator_key == port->actor_oper_port_key) && // if all parameters match AND -		     !MAC_ADDRESS_COMPARE(&(aggregator->partner_system), &(port->partner_oper.system)) && +		/* check if current aggregator suits us */ +		if (((aggregator->actor_oper_aggregator_key == port->actor_oper_port_key) && /* if all parameters match AND */ +		     MAC_ADDRESS_EQUAL(&(aggregator->partner_system), &(port->partner_oper.system)) &&  		     (aggregator->partner_system_priority == port->partner_oper.system_priority) &&  		     (aggregator->partner_oper_aggregator_key == port->partner_oper.key)  		    ) && -		    ((MAC_ADDRESS_COMPARE(&(port->partner_oper.system), &(null_mac_addr)) && // partner answers -		      !aggregator->is_individual)  // but is not individual OR +		    ((!MAC_ADDRESS_EQUAL(&(port->partner_oper.system), &(null_mac_addr)) && /* partner answers */ +		      !aggregator->is_individual)  /* but is not individual OR */  		    )  		   ) { -			// attach to the founded aggregator +			/* attach to the founded aggregator */  			port->aggregator = aggregator;  			port->actor_port_aggregator_identifier =  				port->aggregator->aggregator_identifier; @@ -1350,23 +1322,26 @@ static void ad_port_selection_logic(struct port *port)  				 port->actor_port_number,  				 port->aggregator->aggregator_identifier); -			// mark this port as selected +			/* mark this port as selected */  			port->sm_vars |= AD_PORT_SELECTED;  			found = 1;  			break;  		}  	} -	// the port couldn't find an aggregator - attach it to a new aggregator +	/* the port couldn't find an aggregator - attach it to a new +	 * aggregator +	 */  	if (!found) {  		if (free_aggregator) { -			// assign port a new aggregator +			/* assign port a new aggregator */  			port->aggregator = free_aggregator;  			port->actor_port_aggregator_identifier =  				port->aggregator->aggregator_identifier; -			// update the new aggregator's parameters -			// if port was responsed from the end-user +			/* update the new aggregator's parameters +			 * if port was responsed from the end-user +			 */  			if (port->actor_oper_port_key & AD_DUPLEX_KEY_BITS)  				/* if port is full duplex */  				port->aggregator->is_individual = false; @@ -1385,7 +1360,7 @@ static void ad_port_selection_logic(struct port *port)  			port->aggregator->lag_ports = port;  			port->aggregator->num_of_ports++; -			// mark this port as selected +			/* mark this port as selected */  			port->sm_vars |= AD_PORT_SELECTED;  			pr_debug("Port %d joined LAG %d(new LAG)\n", @@ -1397,23 +1372,24 @@ static void ad_port_selection_logic(struct port *port)  			       port->actor_port_number, port->slave->dev->name);  		}  	} -	// if all aggregator's ports are READY_N == TRUE, set ready=TRUE in all aggregator's ports -	// else set ready=FALSE in all aggregator's ports -	__set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); +	/* if all aggregator's ports are READY_N == TRUE, set ready=TRUE +	 * in all aggregator's ports, else set ready=FALSE in all +	 * aggregator's ports +	 */ +	__set_agg_ports_ready(port->aggregator, +			      __agg_ports_are_ready(port->aggregator));  	aggregator = __get_first_agg(port);  	ad_agg_selection_logic(aggregator);  } -/* - * Decide if "agg" is a better choice for the new active aggregator that +/* Decide if "agg" is a better choice for the new active aggregator that   * the current best, according to the ad_select policy.   */  static struct aggregator *ad_agg_selection_test(struct aggregator *best,  						struct aggregator *curr)  { -	/* -	 * 0. If no best, select current. +	/* 0. If no best, select current.  	 *  	 * 1. If the current agg is not individual, and the best is  	 *    individual, select current. @@ -1469,9 +1445,9 @@ static struct aggregator *ad_agg_selection_test(struct aggregator *best,  		break;  	default: -		pr_warning("%s: Impossible agg select mode %d\n", -			   curr->slave->bond->dev->name, -			   __get_agg_selection_mode(curr->lag_ports)); +		pr_warn_ratelimited("%s: Impossible agg select mode %d\n", +				    curr->slave->bond->dev->name, +				    __get_agg_selection_mode(curr->lag_ports));  		break;  	} @@ -1481,10 +1457,12 @@ static struct aggregator *ad_agg_selection_test(struct aggregator *best,  static int agg_device_up(const struct aggregator *agg)  {  	struct port *port = agg->lag_ports; +  	if (!port)  		return 0; -	return (netif_running(port->slave->dev) && -		netif_carrier_ok(port->slave->dev)); + +	return netif_running(port->slave->dev) && +	       netif_carrier_ok(port->slave->dev);  }  /** @@ -1515,24 +1493,28 @@ static int agg_device_up(const struct aggregator *agg)  static void ad_agg_selection_logic(struct aggregator *agg)  {  	struct aggregator *best, *active, *origin; +	struct bonding *bond = agg->slave->bond; +	struct list_head *iter; +	struct slave *slave;  	struct port *port; +	rcu_read_lock();  	origin = agg;  	active = __get_active_agg(agg);  	best = (active && agg_device_up(active)) ? active : NULL; -	do { +	bond_for_each_slave_rcu(bond, slave, iter) { +		agg = &(SLAVE_AD_INFO(slave)->aggregator); +  		agg->is_active = 0;  		if (agg->num_of_ports && agg_device_up(agg))  			best = ad_agg_selection_test(best, agg); - -	} while ((agg = __get_next_agg(agg))); +	}  	if (best &&  	    __get_agg_selection_mode(best->lag_ports) == BOND_AD_STABLE) { -		/* -		 * For the STABLE policy, don't replace the old active +		/* For the STABLE policy, don't replace the old active  		 * aggregator if it's still active (it has an answering  		 * partner) or if both the best and active don't have an  		 * answering partner. @@ -1540,7 +1522,8 @@ static void ad_agg_selection_logic(struct aggregator *agg)  		if (active && active->lag_ports &&  		    active->lag_ports->is_enabled &&  		    (__agg_has_partner(active) || -		     (!__agg_has_partner(active) && !__agg_has_partner(best)))) { +		     (!__agg_has_partner(active) && +		     !__agg_has_partner(best)))) {  			if (!(!active->actor_oper_aggregator_key &&  			      best->actor_oper_aggregator_key)) {  				best = NULL; @@ -1554,7 +1537,7 @@ static void ad_agg_selection_logic(struct aggregator *agg)  		active->is_active = 1;  	} -	// if there is new best aggregator, activate it +	/* if there is new best aggregator, activate it */  	if (best) {  		pr_debug("best Agg=%d; P=%d; a k=%d; p k=%d; Ind=%d; Act=%d\n",  			 best->aggregator_identifier, best->num_of_ports, @@ -1565,8 +1548,8 @@ static void ad_agg_selection_logic(struct aggregator *agg)  			 best->lag_ports, best->slave,  			 best->slave ? best->slave->dev->name : "NULL"); -		for (agg = __get_first_agg(best->lag_ports); agg; -		     agg = __get_next_agg(agg)) { +		bond_for_each_slave_rcu(bond, slave, iter) { +			agg = &(SLAVE_AD_INFO(slave)->aggregator);  			pr_debug("Agg=%d; P=%d; a k=%d; p k=%d; Ind=%d; Act=%d\n",  				 agg->aggregator_identifier, agg->num_of_ports, @@ -1575,10 +1558,11 @@ static void ad_agg_selection_logic(struct aggregator *agg)  				 agg->is_individual, agg->is_active);  		} -		// check if any partner replys +		/* check if any partner replys */  		if (best->is_individual) { -			pr_warning("%s: Warning: No 802.3ad response from the link partner for any adapters in the bond\n", -				   best->slave ? best->slave->bond->dev->name : "NULL"); +			pr_warn_ratelimited("%s: Warning: No 802.3ad response from the link partner for any adapters in the bond\n", +					    best->slave ? +					    best->slave->bond->dev->name : "NULL");  		}  		best->is_active = 1; @@ -1590,7 +1574,9 @@ static void ad_agg_selection_logic(struct aggregator *agg)  			 best->partner_oper_aggregator_key,  			 best->is_individual, best->is_active); -		// disable the ports that were related to the former active_aggregator +		/* disable the ports that were related to the former +		 * active_aggregator +		 */  		if (active) {  			for (port = active->lag_ports; port;  			     port = port->next_port_in_aggregator) { @@ -1599,8 +1585,7 @@ static void ad_agg_selection_logic(struct aggregator *agg)  		}  	} -	/* -	 * if the selected aggregator is of join individuals +	/* if the selected aggregator is of join individuals  	 * (partner_system is NULL), enable their ports  	 */  	active = __get_active_agg(origin); @@ -1614,19 +1599,14 @@ static void ad_agg_selection_logic(struct aggregator *agg)  		}  	} -	if (origin->slave) { -		struct bonding *bond; +	rcu_read_unlock(); -		bond = bond_get_bond_by_slave(origin->slave); -		if (bond) -			bond_3ad_set_carrier(bond); -	} +	bond_3ad_set_carrier(bond);  }  /**   * ad_clear_agg - clear a given aggregator's parameters   * @aggregator: the aggregator we're looking at - *   */  static void ad_clear_agg(struct aggregator *aggregator)  { @@ -1650,7 +1630,6 @@ static void ad_clear_agg(struct aggregator *aggregator)  /**   * ad_initialize_agg - initialize a given aggregator's parameters   * @aggregator: the aggregator we're looking at - *   */  static void ad_initialize_agg(struct aggregator *aggregator)  { @@ -1667,7 +1646,6 @@ static void ad_initialize_agg(struct aggregator *aggregator)   * ad_initialize_port - initialize a given port's parameters   * @aggregator: the aggregator we're looking at   * @lacp_fast: boolean. whether fast periodic should be used - *   */  static void ad_initialize_port(struct port *port, int lacp_fast)  { @@ -1699,8 +1677,10 @@ static void ad_initialize_port(struct port *port, int lacp_fast)  		port->ntt = false;  		port->actor_admin_port_key = 1;  		port->actor_oper_port_key  = 1; -		port->actor_admin_port_state = AD_STATE_AGGREGATION | AD_STATE_LACP_ACTIVITY; -		port->actor_oper_port_state  = AD_STATE_AGGREGATION | AD_STATE_LACP_ACTIVITY; +		port->actor_admin_port_state = AD_STATE_AGGREGATION | +					       AD_STATE_LACP_ACTIVITY; +		port->actor_oper_port_state  = AD_STATE_AGGREGATION | +					       AD_STATE_LACP_ACTIVITY;  		if (lacp_fast)  			port->actor_oper_port_state |= AD_STATE_LACP_TIMEOUT; @@ -1709,7 +1689,7 @@ static void ad_initialize_port(struct port *port, int lacp_fast)  		memcpy(&port->partner_oper, &tmpl, sizeof(tmpl));  		port->is_enabled = true; -		// ****** private parameters ****** +		/* private parameters */  		port->sm_vars = 0x3;  		port->sm_rx_state = 0;  		port->sm_rx_timer_counter = 0; @@ -1747,11 +1727,12 @@ static void ad_enable_collecting_distributing(struct port *port)  /**   * ad_disable_collecting_distributing - disable a port's transmit/receive   * @port: the port we're looking at - *   */  static void ad_disable_collecting_distributing(struct port *port)  { -	if (port->aggregator && MAC_ADDRESS_COMPARE(&(port->aggregator->partner_system), &(null_mac_addr))) { +	if (port->aggregator && +	    !MAC_ADDRESS_EQUAL(&(port->aggregator->partner_system), +			       &(null_mac_addr))) {  		pr_debug("Disabling port %d(LAG %d)\n",  			 port->actor_port_number,  			 port->aggregator->aggregator_identifier); @@ -1759,66 +1740,22 @@ static void ad_disable_collecting_distributing(struct port *port)  	}  } -#if 0 -/** - * ad_marker_info_send - send a marker information frame - * @port: the port we're looking at - * - * This function does nothing since we decided not to implement send and handle - * response for marker PDU's, in this stage, but only to respond to marker - * information. - */ -static void ad_marker_info_send(struct port *port) -{ -	struct bond_marker marker; -	u16 index; - -	// fill the marker PDU with the appropriate values -	marker.subtype = 0x02; -	marker.version_number = 0x01; -	marker.tlv_type = AD_MARKER_INFORMATION_SUBTYPE; -	marker.marker_length = 0x16; -	// convert requester_port to Big Endian -	marker.requester_port = (((port->actor_port_number & 0xFF) << 8) |((u16)(port->actor_port_number & 0xFF00) >> 8)); -	marker.requester_system = port->actor_system; -	// convert requester_port(u32) to Big Endian -	marker.requester_transaction_id = -		(((++port->transaction_id & 0xFF) << 24) -		 | ((port->transaction_id & 0xFF00) << 8) -		 | ((port->transaction_id & 0xFF0000) >> 8) -		 | ((port->transaction_id & 0xFF000000) >> 24)); -	marker.pad = 0; -	marker.tlv_type_terminator = 0x00; -	marker.terminator_length = 0x00; -	for (index = 0; index < 90; index++) -		marker.reserved_90[index] = 0; - -	// send the marker information -	if (ad_marker_send(port, &marker) >= 0) { -		pr_debug("Sent Marker Information on port %d\n", -			 port->actor_port_number); -	} -} -#endif -  /**   * ad_marker_info_received - handle receive of a Marker information frame   * @marker_info: Marker info received   * @port: the port we're looking at - *   */  static void ad_marker_info_received(struct bond_marker *marker_info,  	struct port *port)  {  	struct bond_marker marker; -	// copy the received marker data to the response marker -	//marker = *marker_info; +	/* copy the received marker data to the response marker */  	memcpy(&marker, marker_info, sizeof(struct bond_marker)); -	// change the marker subtype to marker response +	/* change the marker subtype to marker response */  	marker.tlv_type = AD_MARKER_RESPONSE_SUBTYPE; -	// send the marker response +	/* send the marker response */  	if (ad_marker_send(port, &marker) >= 0) {  		pr_debug("Sent Marker Response on port %d\n",  			 port->actor_port_number); @@ -1835,22 +1772,21 @@ static void ad_marker_info_received(struct bond_marker *marker_info,   * information.   */  static void ad_marker_response_received(struct bond_marker *marker, -	struct port *port) +					struct port *port)  { -	marker = NULL; /* just to satisfy the compiler */ -	port = NULL;  /* just to satisfy the compiler */ -	// DO NOTHING, SINCE WE DECIDED NOT TO IMPLEMENT THIS FEATURE FOR NOW +	marker = NULL; +	port = NULL; +	/* DO NOTHING, SINCE WE DECIDED NOT TO IMPLEMENT THIS FEATURE FOR NOW */  } -////////////////////////////////////////////////////////////////////////////////////// -// ================= AD exported functions to the main bonding code ================== -////////////////////////////////////////////////////////////////////////////////////// +/* ========= AD exported functions to the main bonding code ========= */ -// Check aggregators status in team every T seconds +/* Check aggregators status in team every T seconds */  #define AD_AGGREGATOR_SELECTION_TIMER  8 -/* - * bond_3ad_initiate_agg_selection(struct bonding *bond) +/** + * bond_3ad_initiate_agg_selection - initate aggregator selection + * @bond: bonding struct   *   * Set the aggregation selection timer, to initiate an agg selection in   * the very near future.  Called during first initialization, and during @@ -1861,8 +1797,6 @@ void bond_3ad_initiate_agg_selection(struct bonding *bond, int timeout)  	BOND_AD_INFO(bond).agg_select_timer = timeout;  } -static u16 aggregator_identifier; -  /**   * bond_3ad_initialize - initialize a bond's 802.3ad parameters and structures   * @bond: bonding struct to work on @@ -1872,16 +1806,18 @@ static u16 aggregator_identifier;   */  void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution)  { -	// check that the bond is not initialized yet -	if (MAC_ADDRESS_COMPARE(&(BOND_AD_INFO(bond).system.sys_mac_addr), +	/* check that the bond is not initialized yet */ +	if (!MAC_ADDRESS_EQUAL(&(BOND_AD_INFO(bond).system.sys_mac_addr),  				bond->dev->dev_addr)) { -		aggregator_identifier = 0; +		BOND_AD_INFO(bond).aggregator_identifier = 0;  		BOND_AD_INFO(bond).system.sys_priority = 0xFFFF;  		BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->dev->dev_addr); -		// initialize how many times this module is called in one second(should be about every 100ms) +		/* initialize how many times this module is called in one +		 * second (should be about every 100ms) +		 */  		ad_ticks_per_sec = tick_resolution;  		bond_3ad_initiate_agg_selection(bond, @@ -1897,63 +1833,57 @@ void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution)   * Returns:   0 on success   *          < 0 on error   */ -int bond_3ad_bind_slave(struct slave *slave) +void bond_3ad_bind_slave(struct slave *slave)  {  	struct bonding *bond = bond_get_bond_by_slave(slave);  	struct port *port;  	struct aggregator *aggregator; -	if (bond == NULL) { -		pr_err("%s: The slave %s is not attached to its bond\n", -		       slave->bond->dev->name, slave->dev->name); -		return -1; -	} - -	//check that the slave has not been initialized yet. -	if (SLAVE_AD_INFO(slave).port.slave != slave) { +	/* check that the slave has not been initialized yet. */ +	if (SLAVE_AD_INFO(slave)->port.slave != slave) { -		// port initialization -		port = &(SLAVE_AD_INFO(slave).port); +		/* port initialization */ +		port = &(SLAVE_AD_INFO(slave)->port);  		ad_initialize_port(port, bond->params.lacp_fast);  		__initialize_port_locks(slave);  		port->slave = slave; -		port->actor_port_number = SLAVE_AD_INFO(slave).id; -		// key is determined according to the link speed, duplex and user key(which is yet not supported) -		//              ------------------------------------------------------------ -		// Port key :   | User key                       |      Speed       |Duplex| -		//              ------------------------------------------------------------ -		//              16                               6               1 0 -		port->actor_admin_port_key = 0;	// initialize this parameter +		port->actor_port_number = SLAVE_AD_INFO(slave)->id; +		/* key is determined according to the link speed, duplex and user key(which +		 * is yet not supported) +		 */ +		port->actor_admin_port_key = 0;  		port->actor_admin_port_key |= __get_duplex(port);  		port->actor_admin_port_key |= (__get_link_speed(port) << 1);  		port->actor_oper_port_key = port->actor_admin_port_key; -		// if the port is not full duplex, then the port should be not lacp Enabled +		/* if the port is not full duplex, then the port should be not +		 * lacp Enabled +		 */  		if (!(port->actor_oper_port_key & AD_DUPLEX_KEY_BITS))  			port->sm_vars &= ~AD_PORT_LACP_ENABLED; -		// actor system is the bond's system +		/* actor system is the bond's system */  		port->actor_system = BOND_AD_INFO(bond).system.sys_mac_addr; -		// tx timer(to verify that no more than MAX_TX_IN_SECOND lacpdu's are sent in one second) +		/* tx timer(to verify that no more than MAX_TX_IN_SECOND +		 * lacpdu's are sent in one second) +		 */  		port->sm_tx_timer_counter = ad_ticks_per_sec/AD_MAX_TX_IN_SECOND;  		port->aggregator = NULL;  		port->next_port_in_aggregator = NULL;  		__disable_port(port); -		// aggregator initialization -		aggregator = &(SLAVE_AD_INFO(slave).aggregator); +		/* aggregator initialization */ +		aggregator = &(SLAVE_AD_INFO(slave)->aggregator);  		ad_initialize_agg(aggregator);  		aggregator->aggregator_mac_address = *((struct mac_addr *)bond->dev->dev_addr); -		aggregator->aggregator_identifier = (++aggregator_identifier); +		aggregator->aggregator_identifier = ++BOND_AD_INFO(bond).aggregator_identifier;  		aggregator->slave = slave;  		aggregator->is_active = 0;  		aggregator->num_of_ports = 0;  	} - -	return 0;  }  /** @@ -1969,17 +1899,17 @@ void bond_3ad_unbind_slave(struct slave *slave)  	struct port *port, *prev_port, *temp_port;  	struct aggregator *aggregator, *new_aggregator, *temp_aggregator;  	int select_new_active_agg = 0; +	struct bonding *bond = slave->bond; +	struct slave *slave_iter; +	struct list_head *iter; -	// find the aggregator related to this slave -	aggregator = &(SLAVE_AD_INFO(slave).aggregator); +	aggregator = &(SLAVE_AD_INFO(slave)->aggregator); +	port = &(SLAVE_AD_INFO(slave)->port); -	// find the port related to this slave -	port = &(SLAVE_AD_INFO(slave).port); - -	// if slave is null, the whole port is not initialized +	/* if slave is null, the whole port is not initialized */  	if (!port->slave) { -		pr_warning("Warning: %s: Trying to unbind an uninitialized port on %s\n", -			   slave->bond->dev->name, slave->dev->name); +		pr_warn("Warning: %s: Trying to unbind an uninitialized port on %s\n", +			slave->bond->dev->name, slave->dev->name);  		return;  	} @@ -1991,32 +1921,42 @@ void bond_3ad_unbind_slave(struct slave *slave)  	__update_lacpdu_from_port(port);  	ad_lacpdu_send(port); -	// check if this aggregator is occupied +	/* check if this aggregator is occupied */  	if (aggregator->lag_ports) { -		// check if there are other ports related to this aggregator except -		// the port related to this slave(thats ensure us that there is a -		// reason to search for new aggregator, and that we will find one -		if ((aggregator->lag_ports != port) || (aggregator->lag_ports->next_port_in_aggregator)) { -			// find new aggregator for the related port(s) -			new_aggregator = __get_first_agg(port); -			for (; new_aggregator; new_aggregator = __get_next_agg(new_aggregator)) { -				// if the new aggregator is empty, or it is connected to our port only -				if (!new_aggregator->lag_ports -				    || ((new_aggregator->lag_ports == port) -					&& !new_aggregator->lag_ports->next_port_in_aggregator)) +		/* check if there are other ports related to this aggregator +		 * except the port related to this slave(thats ensure us that +		 * there is a reason to search for new aggregator, and that we +		 * will find one +		 */ +		if ((aggregator->lag_ports != port) || +		    (aggregator->lag_ports->next_port_in_aggregator)) { +			/* find new aggregator for the related port(s) */ +			bond_for_each_slave(bond, slave_iter, iter) { +				new_aggregator = &(SLAVE_AD_INFO(slave_iter)->aggregator); +				/* if the new aggregator is empty, or it is +				 * connected to our port only +				 */ +				if (!new_aggregator->lag_ports || +				    ((new_aggregator->lag_ports == port) && +				     !new_aggregator->lag_ports->next_port_in_aggregator))  					break;  			} -			// if new aggregator found, copy the aggregator's parameters -			// and connect the related lag_ports to the new aggregator +			if (!slave_iter) +				new_aggregator = NULL; + +			/* if new aggregator found, copy the aggregator's +			 * parameters and connect the related lag_ports to the +			 * new aggregator +			 */  			if ((new_aggregator) && ((!new_aggregator->lag_ports) || ((new_aggregator->lag_ports == port) && !new_aggregator->lag_ports->next_port_in_aggregator))) { -				pr_debug("Some port(s) related to LAG %d - replaceing with LAG %d\n", +				pr_debug("Some port(s) related to LAG %d - replacing with LAG %d\n",  					 aggregator->aggregator_identifier,  					 new_aggregator->aggregator_identifier); -				if ((new_aggregator->lag_ports == port) && new_aggregator->is_active) { +				if ((new_aggregator->lag_ports == port) && +				    new_aggregator->is_active) {  					pr_info("%s: Removing an active aggregator\n",  						aggregator->slave->bond->dev->name); -					// select new active aggregator  					 select_new_active_agg = 1;  				} @@ -2032,45 +1972,54 @@ void bond_3ad_unbind_slave(struct slave *slave)  				new_aggregator->is_active = aggregator->is_active;  				new_aggregator->num_of_ports = aggregator->num_of_ports; -				// update the information that is written on the ports about the aggregator +				/* update the information that is written on +				 * the ports about the aggregator +				 */  				for (temp_port = aggregator->lag_ports; temp_port;  				     temp_port = temp_port->next_port_in_aggregator) {  					temp_port->aggregator = new_aggregator;  					temp_port->actor_port_aggregator_identifier = new_aggregator->aggregator_identifier;  				} -				// clear the aggregator  				ad_clear_agg(aggregator);  				if (select_new_active_agg)  					ad_agg_selection_logic(__get_first_agg(port));  			} else { -				pr_warning("%s: Warning: unbinding aggregator, and could not find a new aggregator for its ports\n", -					   slave->bond->dev->name); +				pr_warn("%s: Warning: unbinding aggregator, and could not find a new aggregator for its ports\n", +					slave->bond->dev->name);  			} -		} else { // in case that the only port related to this aggregator is the one we want to remove +		} else { +			/* in case that the only port related to this +			 * aggregator is the one we want to remove +			 */  			select_new_active_agg = aggregator->is_active; -			// clear the aggregator  			ad_clear_agg(aggregator);  			if (select_new_active_agg) {  				pr_info("%s: Removing an active aggregator\n",  					slave->bond->dev->name); -				// select new active aggregator -				ad_agg_selection_logic(__get_first_agg(port)); +				/* select new active aggregator */ +				temp_aggregator = __get_first_agg(port); +				if (temp_aggregator) +					ad_agg_selection_logic(temp_aggregator);  			}  		}  	}  	pr_debug("Unbinding port %d\n", port->actor_port_number); -	// find the aggregator that this port is connected to -	temp_aggregator = __get_first_agg(port); -	for (; temp_aggregator; temp_aggregator = __get_next_agg(temp_aggregator)) { + +	/* find the aggregator that this port is connected to */ +	bond_for_each_slave(bond, slave_iter, iter) { +		temp_aggregator = &(SLAVE_AD_INFO(slave_iter)->aggregator);  		prev_port = NULL; -		// search the port in the aggregator's related ports +		/* search the port in the aggregator's related ports */  		for (temp_port = temp_aggregator->lag_ports; temp_port;  		     prev_port = temp_port, -			     temp_port = temp_port->next_port_in_aggregator) { -			if (temp_port == port) { // the aggregator found - detach the port from this aggregator +		     temp_port = temp_port->next_port_in_aggregator) { +			if (temp_port == port) { +				/* the aggregator found - detach the port from +				 * this aggregator +				 */  				if (prev_port)  					prev_port->next_port_in_aggregator = temp_port->next_port_in_aggregator;  				else @@ -2078,12 +2027,11 @@ void bond_3ad_unbind_slave(struct slave *slave)  				temp_aggregator->num_of_ports--;  				if (temp_aggregator->num_of_ports == 0) {  					select_new_active_agg = temp_aggregator->is_active; -					// clear the aggregator  					ad_clear_agg(temp_aggregator);  					if (select_new_active_agg) {  						pr_info("%s: Removing an active aggregator\n",  							slave->bond->dev->name); -						// select new active aggregator +						/* select new active aggregator */  						ad_agg_selection_logic(__get_first_agg(port));  					}  				} @@ -2111,22 +2059,30 @@ void bond_3ad_state_machine_handler(struct work_struct *work)  {  	struct bonding *bond = container_of(work, struct bonding,  					    ad_work.work); -	struct port *port;  	struct aggregator *aggregator; +	struct list_head *iter; +	struct slave *slave; +	struct port *port; +	bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER;  	read_lock(&bond->lock); +	rcu_read_lock(); -	//check if there are any slaves -	if (list_empty(&bond->slave_list)) +	/* check if there are any slaves */ +	if (!bond_has_slaves(bond))  		goto re_arm; -	// check if agg_select_timer timer after initialize is timed out -	if (BOND_AD_INFO(bond).agg_select_timer && !(--BOND_AD_INFO(bond).agg_select_timer)) { -		// select the active aggregator for the bond -		if ((port = __get_first_port(bond))) { +	/* check if agg_select_timer timer after initialize is timed out */ +	if (BOND_AD_INFO(bond).agg_select_timer && +	    !(--BOND_AD_INFO(bond).agg_select_timer)) { +		slave = bond_first_slave_rcu(bond); +		port = slave ? &(SLAVE_AD_INFO(slave)->port) : NULL; + +		/* select the active aggregator for the bond */ +		if (port) {  			if (!port->slave) { -				pr_warning("%s: Warning: bond's first port is uninitialized\n", -					   bond->dev->name); +				pr_warn_ratelimited("%s: Warning: bond's first port is uninitialized\n", +						    bond->dev->name);  				goto re_arm;  			} @@ -2136,11 +2092,12 @@ void bond_3ad_state_machine_handler(struct work_struct *work)  		bond_3ad_set_carrier(bond);  	} -	// for each port run the state machines -	for (port = __get_first_port(bond); port; port = __get_next_port(port)) { +	/* for each port run the state machines */ +	bond_for_each_slave_rcu(bond, slave, iter) { +		port = &(SLAVE_AD_INFO(slave)->port);  		if (!port->slave) { -			pr_warning("%s: Warning: Found an uninitialized port\n", -				   bond->dev->name); +			pr_warn_ratelimited("%s: Warning: Found an uninitialized port\n", +					    bond->dev->name);  			goto re_arm;  		} @@ -2156,7 +2113,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)  		ad_mux_machine(port);  		ad_tx_machine(port); -		// turn off the BEGIN bit, since we already handled it +		/* turn off the BEGIN bit, since we already handled it */  		if (port->sm_vars & AD_PORT_BEGIN)  			port->sm_vars &= ~AD_PORT_BEGIN; @@ -2164,9 +2121,20 @@ void bond_3ad_state_machine_handler(struct work_struct *work)  	}  re_arm: -	queue_delayed_work(bond->wq, &bond->ad_work, ad_delta_in_ticks); - +	bond_for_each_slave_rcu(bond, slave, iter) { +		if (slave->should_notify) { +			should_notify_rtnl = BOND_SLAVE_NOTIFY_NOW; +			break; +		} +	} +	rcu_read_unlock();  	read_unlock(&bond->lock); + +	if (should_notify_rtnl && rtnl_trylock()) { +		bond_slave_state_notify(bond); +		rtnl_unlock(); +	} +	queue_delayed_work(bond->wq, &bond->ad_work, ad_delta_in_ticks);  }  /** @@ -2179,18 +2147,19 @@ re_arm:   * received frames (loopback). Since only the payload is given to this   * function, it check for loopback.   */ -static int bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u16 length) +static int bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, +				  u16 length)  {  	struct port *port;  	int ret = RX_HANDLER_ANOTHER;  	if (length >= sizeof(struct lacpdu)) { -		port = &(SLAVE_AD_INFO(slave).port); +		port = &(SLAVE_AD_INFO(slave)->port);  		if (!port->slave) { -			pr_warning("%s: Warning: port of slave %s is uninitialized\n", -				   slave->dev->name, slave->bond->dev->name); +			pr_warn_ratelimited("%s: Warning: port of slave %s is uninitialized\n", +					    slave->dev->name, slave->bond->dev->name);  			return ret;  		} @@ -2207,7 +2176,9 @@ static int bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u1  		case AD_TYPE_MARKER:  			ret = RX_HANDLER_CONSUMED; -			// No need to convert fields to Little Endian since we don't use the marker's fields. +			/* No need to convert fields to Little Endian since we +			 * don't use the marker's fields. +			 */  			switch (((struct bond_marker *)lacpdu)->tlv_type) {  			case AD_MARKER_INFORMATION_SUBTYPE: @@ -2241,22 +2212,27 @@ void bond_3ad_adapter_speed_changed(struct slave *slave)  {  	struct port *port; -	port = &(SLAVE_AD_INFO(slave).port); +	port = &(SLAVE_AD_INFO(slave)->port); -	// if slave is null, the whole port is not initialized +	/* if slave is null, the whole port is not initialized */  	if (!port->slave) { -		pr_warning("Warning: %s: speed changed for uninitialized port on %s\n", -			   slave->bond->dev->name, slave->dev->name); +		pr_warn("Warning: %s: speed changed for uninitialized port on %s\n", +			slave->bond->dev->name, slave->dev->name);  		return;  	} +	__get_state_machine_lock(port); +  	port->actor_admin_port_key &= ~AD_SPEED_KEY_BITS;  	port->actor_oper_port_key = port->actor_admin_port_key |=  		(__get_link_speed(port) << 1);  	pr_debug("Port %d changed speed\n", port->actor_port_number); -	// there is no need to reselect a new aggregator, just signal the -	// state machines to reinitialize +	/* there is no need to reselect a new aggregator, just signal the +	 * state machines to reinitialize +	 */  	port->sm_vars |= AD_PORT_BEGIN; + +	__release_state_machine_lock(port);  }  /** @@ -2269,22 +2245,27 @@ void bond_3ad_adapter_duplex_changed(struct slave *slave)  {  	struct port *port; -	port = &(SLAVE_AD_INFO(slave).port); +	port = &(SLAVE_AD_INFO(slave)->port); -	// if slave is null, the whole port is not initialized +	/* if slave is null, the whole port is not initialized */  	if (!port->slave) { -		pr_warning("%s: Warning: duplex changed for uninitialized port on %s\n", -			   slave->bond->dev->name, slave->dev->name); +		pr_warn("%s: Warning: duplex changed for uninitialized port on %s\n", +			slave->bond->dev->name, slave->dev->name);  		return;  	} +	__get_state_machine_lock(port); +  	port->actor_admin_port_key &= ~AD_DUPLEX_KEY_BITS;  	port->actor_oper_port_key = port->actor_admin_port_key |=  		__get_duplex(port);  	pr_debug("Port %d changed duplex\n", port->actor_port_number); -	// there is no need to reselect a new aggregator, just signal the -	// state machines to reinitialize +	/* there is no need to reselect a new aggregator, just signal the +	 * state machines to reinitialize +	 */  	port->sm_vars |= AD_PORT_BEGIN; + +	__release_state_machine_lock(port);  }  /** @@ -2298,17 +2279,23 @@ void bond_3ad_handle_link_change(struct slave *slave, char link)  {  	struct port *port; -	port = &(SLAVE_AD_INFO(slave).port); +	port = &(SLAVE_AD_INFO(slave)->port); -	// if slave is null, the whole port is not initialized +	/* if slave is null, the whole port is not initialized */  	if (!port->slave) { -		pr_warning("Warning: %s: link status changed for uninitialized port on %s\n", -			   slave->bond->dev->name, slave->dev->name); +		pr_warn("Warning: %s: link status changed for uninitialized port on %s\n", +			slave->bond->dev->name, slave->dev->name);  		return;  	} -	// on link down we are zeroing duplex and speed since some of the adaptors(ce1000.lan) report full duplex/speed instead of N/A(duplex) / 0(speed) -	// on link up we are forcing recheck on the duplex and speed since some of he adaptors(ce1000.lan) report +	__get_state_machine_lock(port); +	/* on link down we are zeroing duplex and speed since +	 * some of the adaptors(ce1000.lan) report full duplex/speed +	 * instead of N/A(duplex) / 0(speed). +	 * +	 * on link up we are forcing recheck on the duplex and speed since +	 * some of he adaptors(ce1000.lan) report. +	 */  	if (link == BOND_LINK_UP) {  		port->is_enabled = true;  		port->actor_admin_port_key &= ~AD_DUPLEX_KEY_BITS; @@ -2324,16 +2311,24 @@ void bond_3ad_handle_link_change(struct slave *slave, char link)  		port->actor_oper_port_key = (port->actor_admin_port_key &=  					     ~AD_SPEED_KEY_BITS);  	} -	//BOND_PRINT_DBG(("Port %d changed link status to %s", port->actor_port_number, ((link == BOND_LINK_UP)?"UP":"DOWN"))); -	// there is no need to reselect a new aggregator, just signal the -	// state machines to reinitialize +	pr_debug("Port %d changed link status to %s\n", +		 port->actor_port_number, +		 link == BOND_LINK_UP ? "UP" : "DOWN"); +	/* there is no need to reselect a new aggregator, just signal the +	 * state machines to reinitialize +	 */  	port->sm_vars |= AD_PORT_BEGIN; + +	__release_state_machine_lock(port);  } -/* - * set link state for bonding master: if we have an active - * aggregator, we're up, if not, we're down.  Presumes that we cannot - * have an active aggregator if there are no slaves with link up. +/** + * bond_3ad_set_carrier - set link state for bonding master + * @bond - bonding structure + * + * if we have an active aggregator, we're up, if not, we're down. + * Presumes that we cannot have an active aggregator if there are + * no slaves with link up.   *   * This behavior complies with IEEE 802.3 section 43.3.9.   * @@ -2344,30 +2339,32 @@ int bond_3ad_set_carrier(struct bonding *bond)  {  	struct aggregator *active;  	struct slave *first_slave; +	int ret = 1; -	first_slave = bond_first_slave(bond); -	if (!first_slave) -		return 0; -	active = __get_active_agg(&(SLAVE_AD_INFO(first_slave).aggregator)); +	rcu_read_lock(); +	first_slave = bond_first_slave_rcu(bond); +	if (!first_slave) { +		ret = 0; +		goto out; +	} +	active = __get_active_agg(&(SLAVE_AD_INFO(first_slave)->aggregator));  	if (active) {  		/* are enough slaves available to consider link up? */  		if (active->num_of_ports < bond->params.min_links) {  			if (netif_carrier_ok(bond->dev)) {  				netif_carrier_off(bond->dev); -				return 1; +				goto out;  			}  		} else if (!netif_carrier_ok(bond->dev)) {  			netif_carrier_on(bond->dev); -			return 1; +			goto out;  		} -		return 0; -	} - -	if (netif_carrier_ok(bond->dev)) { +	} else if (netif_carrier_ok(bond->dev)) {  		netif_carrier_off(bond->dev); -		return 1;  	} -	return 0; +out: +	rcu_read_unlock(); +	return ret;  }  /** @@ -2382,25 +2379,28 @@ int __bond_3ad_get_active_agg_info(struct bonding *bond,  				   struct ad_info *ad_info)  {  	struct aggregator *aggregator = NULL; +	struct list_head *iter; +	struct slave *slave;  	struct port *port; -	for (port = __get_first_port(bond); port; port = __get_next_port(port)) { +	bond_for_each_slave_rcu(bond, slave, iter) { +		port = &(SLAVE_AD_INFO(slave)->port);  		if (port->aggregator && port->aggregator->is_active) {  			aggregator = port->aggregator;  			break;  		}  	} -	if (aggregator) { -		ad_info->aggregator_id = aggregator->aggregator_identifier; -		ad_info->ports = aggregator->num_of_ports; -		ad_info->actor_key = aggregator->actor_oper_aggregator_key; -		ad_info->partner_key = aggregator->partner_oper_aggregator_key; -		memcpy(ad_info->partner_system, aggregator->partner_system.mac_addr_value, ETH_ALEN); -		return 0; -	} +	if (!aggregator) +		return -1; -	return -1; +	ad_info->aggregator_id = aggregator->aggregator_identifier; +	ad_info->ports = aggregator->num_of_ports; +	ad_info->actor_key = aggregator->actor_oper_aggregator_key; +	ad_info->partner_key = aggregator->partner_oper_aggregator_key; +	ether_addr_copy(ad_info->partner_system, +			aggregator->partner_system.mac_addr_value); +	return 0;  }  /* Wrapper used to hold bond->lock so no slave manipulation can occur */ @@ -2408,81 +2408,79 @@ int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info)  {  	int ret; -	read_lock(&bond->lock); +	rcu_read_lock();  	ret = __bond_3ad_get_active_agg_info(bond, ad_info); -	read_unlock(&bond->lock); +	rcu_read_unlock();  	return ret;  }  int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)  { -	struct slave *slave, *start_at;  	struct bonding *bond = netdev_priv(dev); -	int slave_agg_no; +	struct slave *slave, *first_ok_slave; +	struct aggregator *agg; +	struct ad_info ad_info; +	struct list_head *iter;  	int slaves_in_agg; +	int slave_agg_no;  	int agg_id; -	int i; -	struct ad_info ad_info; -	int res = 1; -	read_lock(&bond->lock);  	if (__bond_3ad_get_active_agg_info(bond, &ad_info)) {  		pr_debug("%s: Error: __bond_3ad_get_active_agg_info failed\n",  			 dev->name); -		goto out; +		goto err_free;  	}  	slaves_in_agg = ad_info.ports;  	agg_id = ad_info.aggregator_id;  	if (slaves_in_agg == 0) { -		/*the aggregator is empty*/  		pr_debug("%s: Error: active aggregator is empty\n", dev->name); -		goto out; +		goto err_free;  	} -	slave_agg_no = bond->xmit_hash_policy(skb, slaves_in_agg); +	slave_agg_no = bond_xmit_hash(bond, skb) % slaves_in_agg; +	first_ok_slave = NULL; -	bond_for_each_slave(bond, slave) { -		struct aggregator *agg = SLAVE_AD_INFO(slave).port.aggregator; +	bond_for_each_slave_rcu(bond, slave, iter) { +		agg = SLAVE_AD_INFO(slave)->port.aggregator; +		if (!agg || agg->aggregator_identifier != agg_id) +			continue; -		if (agg && (agg->aggregator_identifier == agg_id)) { +		if (slave_agg_no >= 0) { +			if (!first_ok_slave && bond_slave_can_tx(slave)) +				first_ok_slave = slave;  			slave_agg_no--; -			if (slave_agg_no < 0) -				break; +			continue; +		} + +		if (bond_slave_can_tx(slave)) { +			bond_dev_queue_xmit(bond, skb, slave->dev); +			goto out;  		}  	}  	if (slave_agg_no >= 0) {  		pr_err("%s: Error: Couldn't find a slave to tx on for aggregator ID %d\n",  		       dev->name, agg_id); -		goto out; +		goto err_free;  	} -	start_at = slave; - -	bond_for_each_slave_from(bond, slave, i, start_at) { -		int slave_agg_id = 0; -		struct aggregator *agg = SLAVE_AD_INFO(slave).port.aggregator; - -		if (agg) -			slave_agg_id = agg->aggregator_identifier; - -		if (SLAVE_IS_OK(slave) && agg && (slave_agg_id == agg_id)) { -			res = bond_dev_queue_xmit(bond, skb, slave->dev); -			break; -		} -	} +	/* we couldn't find any suitable slave after the agg_no, so use the +	 * first suitable found, if found. +	 */ +	if (first_ok_slave) +		bond_dev_queue_xmit(bond, skb, first_ok_slave->dev); +	else +		goto err_free;  out: -	read_unlock(&bond->lock); -	if (res) { -		/* no suitable interface, frame not sent */ -		kfree_skb(skb); -	} -  	return NETDEV_TX_OK; +err_free: +	/* no suitable interface, frame not sent */ +	dev_kfree_skb_any(skb); +	goto out;  }  int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond, @@ -2504,7 +2502,10 @@ int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,  	return ret;  } -/* +/** + * bond_3ad_update_lacp_rate - change the lacp rate + * @bond - bonding struct + *   * When modify lacp_rate parameter via sysfs,   * update actor_oper_port_state of each port.   * @@ -2515,12 +2516,13 @@ int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,  void bond_3ad_update_lacp_rate(struct bonding *bond)  {  	struct port *port = NULL; +	struct list_head *iter;  	struct slave *slave;  	int lacp_fast;  	lacp_fast = bond->params.lacp_fast; -	bond_for_each_slave(bond, slave) { -		port = &(SLAVE_AD_INFO(slave).port); +	bond_for_each_slave(bond, slave, iter) { +		port = &(SLAVE_AD_INFO(slave)->port);  		__get_state_machine_lock(port);  		if (lacp_fast)  			port->actor_oper_port_state |= AD_STATE_LACP_TIMEOUT; diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h index 5d91ad0cc04..bb03b1df2f3 100644 --- a/drivers/net/bonding/bond_3ad.h +++ b/drivers/net/bonding/bond_3ad.h @@ -28,7 +28,7 @@  #include <linux/netdevice.h>  #include <linux/if_ether.h> -// General definitions +/* General definitions */  #define PKT_TYPE_LACPDU         cpu_to_be16(ETH_P_SLOW)  #define AD_TIMER_INTERVAL       100 /*msec*/ @@ -47,54 +47,54 @@ enum {  	BOND_AD_COUNT = 2,  }; -// rx machine states(43.4.11 in the 802.3ad standard) +/* rx machine states(43.4.11 in the 802.3ad standard) */  typedef enum {  	AD_RX_DUMMY, -	AD_RX_INITIALIZE,     // rx Machine -	AD_RX_PORT_DISABLED,  // rx Machine -	AD_RX_LACP_DISABLED,  // rx Machine -	AD_RX_EXPIRED,	      // rx Machine -	AD_RX_DEFAULTED,      // rx Machine -	AD_RX_CURRENT	      // rx Machine +	AD_RX_INITIALIZE,	/* rx Machine */ +	AD_RX_PORT_DISABLED,	/* rx Machine */ +	AD_RX_LACP_DISABLED,	/* rx Machine */ +	AD_RX_EXPIRED,		/* rx Machine */ +	AD_RX_DEFAULTED,	/* rx Machine */ +	AD_RX_CURRENT		/* rx Machine */  } rx_states_t; -// periodic machine states(43.4.12 in the 802.3ad standard) +/* periodic machine states(43.4.12 in the 802.3ad standard) */  typedef enum {  	AD_PERIODIC_DUMMY, -	AD_NO_PERIODIC,	       // periodic machine -	AD_FAST_PERIODIC,      // periodic machine -	AD_SLOW_PERIODIC,      // periodic machine -	AD_PERIODIC_TX	   // periodic machine +	AD_NO_PERIODIC,		/* periodic machine */ +	AD_FAST_PERIODIC,	/* periodic machine */ +	AD_SLOW_PERIODIC,	/* periodic machine */ +	AD_PERIODIC_TX		/* periodic machine */  } periodic_states_t; -// mux machine states(43.4.13 in the 802.3ad standard) +/* mux machine states(43.4.13 in the 802.3ad standard) */  typedef enum {  	AD_MUX_DUMMY, -	AD_MUX_DETACHED,       // mux machine -	AD_MUX_WAITING,	       // mux machine -	AD_MUX_ATTACHED,       // mux machine -	AD_MUX_COLLECTING_DISTRIBUTING // mux machine +	AD_MUX_DETACHED,	/* mux machine */ +	AD_MUX_WAITING,		/* mux machine */ +	AD_MUX_ATTACHED,	/* mux machine */ +	AD_MUX_COLLECTING_DISTRIBUTING	/* mux machine */  } mux_states_t; -// tx machine states(43.4.15 in the 802.3ad standard) +/* tx machine states(43.4.15 in the 802.3ad standard) */  typedef enum {  	AD_TX_DUMMY, -	AD_TRANSMIT	   // tx Machine +	AD_TRANSMIT		/* tx Machine */  } tx_states_t; -// rx indication types +/* rx indication types */  typedef enum { -	AD_TYPE_LACPDU = 1,    // type lacpdu -	AD_TYPE_MARKER	   // type marker +	AD_TYPE_LACPDU = 1,	/* type lacpdu */ +	AD_TYPE_MARKER		/* type marker */  } pdu_type_t; -// rx marker indication types +/* rx marker indication types */  typedef enum { -	AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype -	AD_MARKER_RESPONSE_SUBTYPE     // marker response subtype +	AD_MARKER_INFORMATION_SUBTYPE = 1,	/* marker imformation subtype */ +	AD_MARKER_RESPONSE_SUBTYPE		/* marker response subtype */  } bond_marker_subtype_t; -// timers types(43.4.9 in the 802.3ad standard) +/* timers types(43.4.9 in the 802.3ad standard) */  typedef enum {  	AD_CURRENT_WHILE_TIMER,  	AD_ACTOR_CHURN_TIMER, @@ -105,35 +105,35 @@ typedef enum {  #pragma pack(1) -// Link Aggregation Control Protocol(LACP) data unit structure(43.4.2.2 in the 802.3ad standard) +/* Link Aggregation Control Protocol(LACP) data unit structure(43.4.2.2 in the 802.3ad standard) */  typedef struct lacpdu { -	u8 subtype;		     // = LACP(= 0x01) +	u8 subtype;		/* = LACP(= 0x01) */  	u8 version_number; -	u8 tlv_type_actor_info;	      // = actor information(type/length/value) -	u8 actor_information_length; // = 20 +	u8 tlv_type_actor_info;	/* = actor information(type/length/value) */ +	u8 actor_information_length;	/* = 20 */  	__be16 actor_system_priority;  	struct mac_addr actor_system;  	__be16 actor_key;  	__be16 actor_port_priority;  	__be16 actor_port;  	u8 actor_state; -	u8 reserved_3_1[3];	     // = 0 -	u8 tlv_type_partner_info;     // = partner information -	u8 partner_information_length;	 // = 20 +	u8 reserved_3_1[3];		/* = 0 */ +	u8 tlv_type_partner_info;	/* = partner information */ +	u8 partner_information_length;	/* = 20 */  	__be16 partner_system_priority;  	struct mac_addr partner_system;  	__be16 partner_key;  	__be16 partner_port_priority;  	__be16 partner_port;  	u8 partner_state; -	u8 reserved_3_2[3];	     // = 0 -	u8 tlv_type_collector_info;	  // = collector information -	u8 collector_information_length; // = 16 +	u8 reserved_3_2[3];		/* = 0 */ +	u8 tlv_type_collector_info;	/* = collector information */ +	u8 collector_information_length;/* = 16 */  	__be16 collector_max_delay;  	u8 reserved_12[12]; -	u8 tlv_type_terminator;	     // = terminator -	u8 terminator_length;	     // = 0 -	u8 reserved_50[50];	     // = 0 +	u8 tlv_type_terminator;		/* = terminator */ +	u8 terminator_length;		/* = 0 */ +	u8 reserved_50[50];		/* = 0 */  } __packed lacpdu_t;  typedef struct lacpdu_header { @@ -141,20 +141,20 @@ typedef struct lacpdu_header {  	struct lacpdu lacpdu;  } __packed lacpdu_header_t; -// Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard) +/* Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard) */  typedef struct bond_marker { -	u8 subtype;		 //  = 0x02  (marker PDU) -	u8 version_number;	 //  = 0x01 -	u8 tlv_type;		 //  = 0x01  (marker information) -	//  = 0x02  (marker response information) -	u8 marker_length;	 //  = 0x16 -	u16 requester_port;	 //   The number assigned to the port by the requester -	struct mac_addr requester_system;      //   The requester's system id -	u32 requester_transaction_id;	//   The transaction id allocated by the requester, -	u16 pad;		 //  = 0 -	u8 tlv_type_terminator;	     //  = 0x00 -	u8 terminator_length;	     //  = 0x00 -	u8 reserved_90[90];	     //  = 0 +	u8 subtype;		/* = 0x02  (marker PDU) */ +	u8 version_number;	/* = 0x01 */ +	u8 tlv_type;		/* = 0x01  (marker information) */ +	/* = 0x02  (marker response information) */ +	u8 marker_length;	/* = 0x16 */ +	u16 requester_port;	/* The number assigned to the port by the requester */ +	struct mac_addr requester_system;	/* The requester's system id */ +	u32 requester_transaction_id;		/* The transaction id allocated by the requester, */ +	u16 pad;		/* = 0 */ +	u8 tlv_type_terminator;	/* = 0x00 */ +	u8 terminator_length;	/* = 0x00 */ +	u8 reserved_90[90];	/* = 0 */  } __packed bond_marker_t;  typedef struct bond_marker_header { @@ -173,7 +173,7 @@ struct port;  #pragma pack(8)  #endif -// aggregator structure(43.4.5 in the 802.3ad standard) +/* aggregator structure(43.4.5 in the 802.3ad standard) */  typedef struct aggregator {  	struct mac_addr aggregator_mac_address;  	u16 aggregator_identifier; @@ -183,12 +183,12 @@ typedef struct aggregator {  	struct mac_addr partner_system;  	u16 partner_system_priority;  	u16 partner_oper_aggregator_key; -	u16 receive_state;		// BOOLEAN -	u16 transmit_state;		// BOOLEAN +	u16 receive_state;	/* BOOLEAN */ +	u16 transmit_state;	/* BOOLEAN */  	struct port *lag_ports; -	// ****** PRIVATE PARAMETERS ****** -	struct slave *slave;	    // pointer to the bond slave that this aggregator belongs to -	u16 is_active;	    // BOOLEAN. Indicates if this aggregator is active +	/* ****** PRIVATE PARAMETERS ****** */ +	struct slave *slave;	/* pointer to the bond slave that this aggregator belongs to */ +	u16 is_active;		/* BOOLEAN. Indicates if this aggregator is active */  	u16 num_of_ports;  } aggregator_t; @@ -201,12 +201,12 @@ struct port_params {  	u16 port_state;  }; -// port structure(43.4.6 in the 802.3ad standard) +/* port structure(43.4.6 in the 802.3ad standard) */  typedef struct port {  	u16 actor_port_number;  	u16 actor_port_priority; -	struct mac_addr actor_system;	       // This parameter is added here although it is not specified in the standard, just for simplification -	u16 actor_system_priority;	 // This parameter is added here although it is not specified in the standard, just for simplification +	struct mac_addr actor_system;	/* This parameter is added here although it is not specified in the standard, just for simplification */ +	u16 actor_system_priority;	/* This parameter is added here although it is not specified in the standard, just for simplification */  	u16 actor_port_aggregator_identifier;  	bool ntt;  	u16 actor_admin_port_key; @@ -219,24 +219,24 @@ typedef struct port {  	bool is_enabled; -	// ****** PRIVATE PARAMETERS ****** -	u16 sm_vars;	      // all state machines variables for this port -	rx_states_t sm_rx_state;	// state machine rx state -	u16 sm_rx_timer_counter;    // state machine rx timer counter -	periodic_states_t sm_periodic_state;// state machine periodic state -	u16 sm_periodic_timer_counter;	// state machine periodic timer counter -	mux_states_t sm_mux_state;	// state machine mux state -	u16 sm_mux_timer_counter;   // state machine mux timer counter -	tx_states_t sm_tx_state;	// state machine tx state -	u16 sm_tx_timer_counter;    // state machine tx timer counter(allways on - enter to transmit state 3 time per second) -	struct slave *slave;	    // pointer to the bond slave that this port belongs to -	struct aggregator *aggregator;	   // pointer to an aggregator that this port related to -	struct port *next_port_in_aggregator; // Next port on the linked list of the parent aggregator -	u32 transaction_id;	    // continuous number for identification of Marker PDU's; -	struct lacpdu lacpdu;	       // the lacpdu that will be sent for this port +	/* ****** PRIVATE PARAMETERS ****** */ +	u16 sm_vars;		/* all state machines variables for this port */ +	rx_states_t sm_rx_state;	/* state machine rx state */ +	u16 sm_rx_timer_counter;	/* state machine rx timer counter */ +	periodic_states_t sm_periodic_state;	/* state machine periodic state */ +	u16 sm_periodic_timer_counter;	/* state machine periodic timer counter */ +	mux_states_t sm_mux_state;	/* state machine mux state */ +	u16 sm_mux_timer_counter;	/* state machine mux timer counter */ +	tx_states_t sm_tx_state;	/* state machine tx state */ +	u16 sm_tx_timer_counter;	/* state machine tx timer counter(allways on - enter to transmit state 3 time per second) */ +	struct slave *slave;		/* pointer to the bond slave that this port belongs to */ +	struct aggregator *aggregator;	/* pointer to an aggregator that this port related to */ +	struct port *next_port_in_aggregator;	/* Next port on the linked list of the parent aggregator */ +	u32 transaction_id;		/* continuous number for identification of Marker PDU's; */ +	struct lacpdu lacpdu;		/* the lacpdu that will be sent for this port */  } port_t; -// system structure +/* system structure */  struct ad_system {  	u16 sys_priority;  	struct mac_addr sys_mac_addr; @@ -246,26 +246,26 @@ struct ad_system {  #pragma pack()  #endif -// ================= AD Exported structures to the main bonding code ================== +/* ========== AD Exported structures to the main bonding code ========== */  #define BOND_AD_INFO(bond)   ((bond)->ad_info)  #define SLAVE_AD_INFO(slave) ((slave)->ad_info)  struct ad_bond_info { -	struct ad_system system;	    /* 802.3ad system structure */ -	u32 agg_select_timer;	    // Timer to select aggregator after all adapter's hand shakes +	struct ad_system system;	/* 802.3ad system structure */ +	u32 agg_select_timer;		/* Timer to select aggregator after all adapter's hand shakes */ +	u16 aggregator_identifier;  };  struct ad_slave_info { -	struct aggregator aggregator;	    // 802.3ad aggregator structure -	struct port port;		    // 802.3ad port structure -	spinlock_t state_machine_lock; /* mutex state machines vs. -					  incoming LACPDU */ +	struct aggregator aggregator;	/* 802.3ad aggregator structure */ +	struct port port;		/* 802.3ad port structure */ +	spinlock_t state_machine_lock;	/* mutex state machines vs. incoming LACPDU */  	u16 id;  }; -// ================= AD Exported functions to the main bonding code ================== +/* ========== AD Exported functions to the main bonding code ========== */  void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution); -int  bond_3ad_bind_slave(struct slave *slave); +void bond_3ad_bind_slave(struct slave *slave);  void bond_3ad_unbind_slave(struct slave *slave);  void bond_3ad_state_machine_handler(struct work_struct *);  void bond_3ad_initiate_agg_selection(struct bonding *bond, int timeout); @@ -280,5 +280,5 @@ int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,  			 struct slave *slave);  int bond_3ad_set_carrier(struct bonding *bond);  void bond_3ad_update_lacp_rate(struct bonding *bond); -#endif //__BOND_3AD_H__ +#endif /* __BOND_3AD_H__ */ diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 91f179d5135..76c0dade233 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -12,8 +12,7 @@   * for more details.   *   * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA. + * with this program; if not, see <http://www.gnu.org/licenses/>.   *   * The full GNU General Public License is included in this distribution in the   * file called LICENSE. @@ -83,7 +82,8 @@ static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)  }  /* Forward declaration */ -static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]); +static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], +				      bool strict_match);  static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp);  static void rlb_src_unlink(struct bonding *bond, u32 index);  static void rlb_src_link(struct bonding *bond, u32 ip_src_hash, @@ -94,9 +94,8 @@ static inline u8 _simple_hash(const u8 *hash_start, int hash_size)  	int i;  	u8 hash = 0; -	for (i = 0; i < hash_size; i++) { +	for (i = 0; i < hash_size; i++)  		hash ^= hash_start[i]; -	}  	return hash;  } @@ -191,9 +190,8 @@ static int tlb_initialize(struct bonding *bond)  	bond_info->tx_hashtbl = new_hashtbl; -	for (i = 0; i < TLB_HASH_TABLE_SIZE; i++) { +	for (i = 0; i < TLB_HASH_TABLE_SIZE; i++)  		tlb_init_table_entry(&bond_info->tx_hashtbl[i], 0); -	}  	_unlock_tx_hashtbl_bh(bond); @@ -223,14 +221,15 @@ static long long compute_gap(struct slave *slave)  static struct slave *tlb_get_least_loaded_slave(struct bonding *bond)  {  	struct slave *slave, *least_loaded; +	struct list_head *iter;  	long long max_gap;  	least_loaded = NULL;  	max_gap = LLONG_MIN;  	/* Find the slave with the largest gap */ -	bond_for_each_slave(bond, slave) { -		if (SLAVE_IS_OK(slave)) { +	bond_for_each_slave_rcu(bond, slave, iter) { +		if (bond_slave_can_tx(slave)) {  			long long gap = compute_gap(slave);  			if (max_gap < gap) { @@ -264,9 +263,8 @@ static struct slave *__tlb_choose_channel(struct bonding *bond, u32 hash_index,  			hash_table[hash_index].next = next_index;  			hash_table[hash_index].prev = TLB_NULL_INDEX; -			if (next_index != TLB_NULL_INDEX) { +			if (next_index != TLB_NULL_INDEX)  				hash_table[next_index].prev = hash_index; -			}  			slave_info->head = hash_index;  			slave_info->load += @@ -274,9 +272,8 @@ static struct slave *__tlb_choose_channel(struct bonding *bond, u32 hash_index,  		}  	} -	if (assigned_slave) { +	if (assigned_slave)  		hash_table[hash_index].tx_bytes += skb_len; -	}  	return assigned_slave;  } @@ -329,7 +326,7 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)  	_lock_rx_hashtbl_bh(bond); -	hash_index = _simple_hash((u8*)&(arp->ip_src), sizeof(arp->ip_src)); +	hash_index = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src));  	client_info = &(bond_info->rx_hashtbl[hash_index]);  	if ((client_info->assigned) && @@ -337,7 +334,7 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)  	    (client_info->ip_dst == arp->ip_src) &&  	    (!ether_addr_equal_64bits(client_info->mac_dst, arp->mac_src))) {  		/* update the clients MAC address */ -		memcpy(client_info->mac_dst, arp->mac_src, ETH_ALEN); +		ether_addr_copy(client_info->mac_dst, arp->mac_src);  		client_info->ntt = 1;  		bond_info->rx_ntt = 1;  	} @@ -382,30 +379,64 @@ out:  static struct slave *rlb_next_rx_slave(struct bonding *bond)  {  	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); -	struct slave *rx_slave, *slave, *start_at; -	int i = 0; +	struct slave *before = NULL, *rx_slave = NULL, *slave; +	struct list_head *iter; +	bool found = false; -	if (bond_info->next_rx_slave) -		start_at = bond_info->next_rx_slave; -	else -		start_at = bond_first_slave(bond); +	bond_for_each_slave(bond, slave, iter) { +		if (!bond_slave_can_tx(slave)) +			continue; +		if (!found) { +			if (!before || before->speed < slave->speed) +				before = slave; +		} else { +			if (!rx_slave || rx_slave->speed < slave->speed) +				rx_slave = slave; +		} +		if (slave == bond_info->rx_slave) +			found = true; +	} +	/* we didn't find anything after the current or we have something +	 * better before and up to the current slave +	 */ +	if (!rx_slave || (before && rx_slave->speed < before->speed)) +		rx_slave = before; -	rx_slave = NULL; +	if (rx_slave) +		bond_info->rx_slave = rx_slave; -	bond_for_each_slave_from(bond, slave, i, start_at) { -		if (SLAVE_IS_OK(slave)) { -			if (!rx_slave) { -				rx_slave = slave; -			} else if (slave->speed > rx_slave->speed) { +	return rx_slave; +} + +/* Caller must hold rcu_read_lock() for read */ +static struct slave *__rlb_next_rx_slave(struct bonding *bond) +{ +	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); +	struct slave *before = NULL, *rx_slave = NULL, *slave; +	struct list_head *iter; +	bool found = false; + +	bond_for_each_slave_rcu(bond, slave, iter) { +		if (!bond_slave_can_tx(slave)) +			continue; +		if (!found) { +			if (!before || before->speed < slave->speed) +				before = slave; +		} else { +			if (!rx_slave || rx_slave->speed < slave->speed)  				rx_slave = slave; -			}  		} +		if (slave == bond_info->rx_slave) +			found = true;  	} +	/* we didn't find anything after the current or we have something +	 * better before and up to the current slave +	 */ +	if (!rx_slave || (before && rx_slave->speed < before->speed)) +		rx_slave = before; -	if (rx_slave) { -		slave = bond_next_slave(bond, rx_slave); -		bond_info->next_rx_slave = slave; -	} +	if (rx_slave) +		bond_info->rx_slave = rx_slave;  	return rx_slave;  } @@ -417,9 +448,8 @@ static struct slave *rlb_next_rx_slave(struct bonding *bond)   */  static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[])  { -	if (!bond->curr_active_slave) { +	if (!bond->curr_active_slave)  		return; -	}  	if (!bond->alb_info.primary_is_promisc) {  		if (!dev_set_promiscuity(bond->curr_active_slave->dev, 1)) @@ -430,12 +460,12 @@ static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[])  	bond->alb_info.rlb_promisc_timeout_counter = 0; -	alb_send_learning_packets(bond->curr_active_slave, addr); +	alb_send_learning_packets(bond->curr_active_slave, addr, true);  }  /* slave being removed should not be active at this point   * - * Caller must hold bond lock for read + * Caller must hold rtnl.   */  static void rlb_clear_slave(struct bonding *bond, struct slave *slave)  { @@ -479,9 +509,8 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)  	write_lock_bh(&bond->curr_slave_lock); -	if (slave != bond->curr_active_slave) { +	if (slave != bond->curr_active_slave)  		rlb_teach_disabled_mac_on_primary(bond, slave->dev->dev_addr); -	}  	write_unlock_bh(&bond->curr_slave_lock);  } @@ -490,9 +519,8 @@ static void rlb_update_client(struct rlb_client_info *client_info)  {  	int i; -	if (!client_info->slave) { +	if (!client_info->slave)  		return; -	}  	for (i = 0; i < RLB_ARP_BURST_SIZE; i++) {  		struct sk_buff *skb; @@ -540,9 +568,8 @@ static void rlb_update_rx_clients(struct bonding *bond)  		client_info = &(bond_info->rx_hashtbl[hash_index]);  		if (client_info->ntt) {  			rlb_update_client(client_info); -			if (bond_info->rlb_update_retry_counter == 0) { +			if (bond_info->rlb_update_retry_counter == 0)  				client_info->ntt = 0; -			}  		}  	} @@ -576,10 +603,10 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla  		}  	} -	// update the team's flag only after the whole iteration +	/* update the team's flag only after the whole iteration */  	if (ntt) {  		bond_info->rx_ntt = 1; -		//fasten the change +		/* fasten the change */  		bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY;  	} @@ -626,12 +653,14 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon  {  	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));  	struct arp_pkt *arp = arp_pkt(skb); -	struct slave *assigned_slave; +	struct slave *assigned_slave, *curr_active_slave;  	struct rlb_client_info *client_info;  	u32 hash_index = 0;  	_lock_rx_hashtbl(bond); +	curr_active_slave = rcu_dereference(bond->curr_active_slave); +  	hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_dst));  	client_info = &(bond_info->rx_hashtbl[hash_index]); @@ -641,9 +670,9 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon  			/* the entry is already assigned to this client */  			if (!ether_addr_equal_64bits(arp->mac_dst, mac_bcast)) {  				/* update mac address from arp */ -				memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN); +				ether_addr_copy(client_info->mac_dst, arp->mac_dst);  			} -			memcpy(client_info->mac_src, arp->mac_src, ETH_ALEN); +			ether_addr_copy(client_info->mac_src, arp->mac_src);  			assigned_slave = client_info->slave;  			if (assigned_slave) { @@ -656,14 +685,14 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon  			 * that the new client can be assigned to this entry.  			 */  			if (bond->curr_active_slave && -			    client_info->slave != bond->curr_active_slave) { -				client_info->slave = bond->curr_active_slave; +			    client_info->slave != curr_active_slave) { +				client_info->slave = curr_active_slave;  				rlb_update_client(client_info);  			}  		}  	}  	/* assign a new slave */ -	assigned_slave = rlb_next_rx_slave(bond); +	assigned_slave = __rlb_next_rx_slave(bond);  	if (assigned_slave) {  		if (!(client_info->assigned && @@ -683,8 +712,8 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon  		 * will be updated with clients actual unicast mac address  		 * upon receiving an arp reply.  		 */ -		memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN); -		memcpy(client_info->mac_src, arp->mac_src, ETH_ALEN); +		ether_addr_copy(client_info->mac_dst, arp->mac_dst); +		ether_addr_copy(client_info->mac_src, arp->mac_src);  		client_info->slave = assigned_slave;  		if (!ether_addr_equal_64bits(client_info->mac_dst, mac_bcast)) { @@ -694,7 +723,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon  			client_info->ntt = 0;  		} -		if (!vlan_get_tag(skb, &client_info->vlan_id)) +		if (vlan_get_tag(skb, &client_info->vlan_id))  			client_info->vlan_id = 0;  		if (!client_info->assigned) { @@ -726,7 +755,7 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)  	/* Don't modify or load balance ARPs that do not originate locally  	 * (e.g.,arrive via a bridge).  	 */ -	if (!bond_slave_has_mac(bond, arp->mac_src)) +	if (!bond_slave_has_mac_rx(bond, arp->mac_src))  		return NULL;  	if (arp->op_code == htons(ARPOP_REPLY)) { @@ -734,9 +763,8 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)  		* rx channel  		*/  		tx_slave = rlb_choose_channel(skb, bond); -		if (tx_slave) { -			memcpy(arp->mac_src,tx_slave->dev->dev_addr, ETH_ALEN); -		} +		if (tx_slave) +			ether_addr_copy(arp->mac_src, tx_slave->dev->dev_addr);  		pr_debug("Server sent ARP Reply packet\n");  	} else if (arp->op_code == htons(ARPOP_REQUEST)) {  		/* Create an entry in the rx_hashtbl for this client as a @@ -779,7 +807,7 @@ static void rlb_rebalance(struct bonding *bond)  	for (; hash_index != RLB_NULL_INDEX;  	     hash_index = client_info->used_next) {  		client_info = &(bond_info->rx_hashtbl[hash_index]); -		assigned_slave = rlb_next_rx_slave(bond); +		assigned_slave = __rlb_next_rx_slave(bond);  		if (assigned_slave && (client_info->slave != assigned_slave)) {  			client_info->slave = assigned_slave;  			client_info->ntt = 1; @@ -788,9 +816,8 @@ static void rlb_rebalance(struct bonding *bond)  	}  	/* update the team's flag only after the whole iteration */ -	if (ntt) { +	if (ntt)  		bond_info->rx_ntt = 1; -	}  	_unlock_rx_hashtbl_bh(bond);  } @@ -887,7 +914,7 @@ static void rlb_src_link(struct bonding *bond, u32 ip_src_hash, u32 ip_dst_hash)  static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp)  {  	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); -	u32 ip_src_hash = _simple_hash((u8*)&(arp->ip_src), sizeof(arp->ip_src)); +	u32 ip_src_hash = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src));  	u32 index;  	_lock_rx_hashtbl_bh(bond); @@ -921,9 +948,8 @@ static int rlb_initialize(struct bonding *bond)  	bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX; -	for (i = 0; i < RLB_HASH_TABLE_SIZE; i++) { +	for (i = 0; i < RLB_HASH_TABLE_SIZE; i++)  		rlb_init_table_entry(bond_info->rx_hashtbl + i); -	}  	_unlock_rx_hashtbl_bh(bond); @@ -970,7 +996,7 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)  /*********************** tlb/rlb shared functions *********************/  static void alb_send_lp_vid(struct slave *slave, u8 mac_addr[], -			    u16 vid) +			    __be16 vlan_proto, u16 vid)  {  	struct learning_pkt pkt;  	struct sk_buff *skb; @@ -978,9 +1004,9 @@ static void alb_send_lp_vid(struct slave *slave, u8 mac_addr[],  	char *data;  	memset(&pkt, 0, size); -	memcpy(pkt.mac_dst, mac_addr, ETH_ALEN); -	memcpy(pkt.mac_src, mac_addr, ETH_ALEN); -	pkt.type = cpu_to_be16(ETH_P_LOOP); +	ether_addr_copy(pkt.mac_dst, mac_addr); +	ether_addr_copy(pkt.mac_src, mac_addr); +	pkt.type = cpu_to_be16(ETH_P_LOOPBACK);  	skb = dev_alloc_skb(size);  	if (!skb) @@ -996,7 +1022,7 @@ static void alb_send_lp_vid(struct slave *slave, u8 mac_addr[],  	skb->dev = slave->dev;  	if (vid) { -		skb = vlan_put_tag(skb, htons(ETH_P_8021Q), vid); +		skb = vlan_put_tag(skb, vlan_proto, vid);  		if (!skb) {  			pr_err("%s: Error: failed to insert VLAN tag\n",  			       slave->bond->dev->name); @@ -1007,22 +1033,45 @@ static void alb_send_lp_vid(struct slave *slave, u8 mac_addr[],  	dev_queue_xmit(skb);  } - -static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]) +static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], +				      bool strict_match)  {  	struct bonding *bond = bond_get_bond_by_slave(slave);  	struct net_device *upper;  	struct list_head *iter; +	struct bond_vlan_tag tags[BOND_MAX_VLAN_ENCAP];  	/* send untagged */ -	alb_send_lp_vid(slave, mac_addr, 0); +	alb_send_lp_vid(slave, mac_addr, 0, 0); -	/* loop through vlans and send one packet for each */ +	/* loop through all devices and see if we need to send a packet +	 * for that device. +	 */  	rcu_read_lock(); -	netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) { -		if (upper->priv_flags & IFF_802_1Q_VLAN) -			alb_send_lp_vid(slave, mac_addr, -					vlan_dev_vlan_id(upper)); +	netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) { +		if (is_vlan_dev(upper) && vlan_get_encap_level(upper) == 0) { +			if (strict_match && +			    ether_addr_equal_64bits(mac_addr, +						    upper->dev_addr)) { +				alb_send_lp_vid(slave, mac_addr, +						vlan_dev_vlan_proto(upper), +						vlan_dev_vlan_id(upper)); +			} else if (!strict_match) { +				alb_send_lp_vid(slave, upper->dev_addr, +						vlan_dev_vlan_proto(upper), +						vlan_dev_vlan_id(upper)); +			} +		} + +		/* If this is a macvlan device, then only send updates +		 * when strict_match is turned off. +		 */ +		if (netif_is_macvlan(upper) && !strict_match) { +			memset(tags, 0, sizeof(tags)); +			bond_verify_device_path(bond->dev, upper, tags); +			alb_send_lp_vid(slave, upper->dev_addr, +					tags[0].vlan_proto, tags[0].vlan_id); +		}  	}  	rcu_read_unlock();  } @@ -1032,7 +1081,7 @@ static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[])  	struct net_device *dev = slave->dev;  	struct sockaddr s_addr; -	if (slave->bond->params.mode == BOND_MODE_TLB) { +	if (BOND_MODE(slave->bond) == BOND_MODE_TLB) {  		memcpy(dev->dev_addr, addr, dev->addr_len);  		return 0;  	} @@ -1061,7 +1110,7 @@ static void alb_swap_mac_addr(struct slave *slave1, struct slave *slave2)  {  	u8 tmp_mac_addr[ETH_ALEN]; -	memcpy(tmp_mac_addr, slave1->dev->dev_addr, ETH_ALEN); +	ether_addr_copy(tmp_mac_addr, slave1->dev->dev_addr);  	alb_set_slave_mac_addr(slave1, slave2->dev->dev_addr);  	alb_set_slave_mac_addr(slave2, tmp_mac_addr); @@ -1075,14 +1124,14 @@ static void alb_swap_mac_addr(struct slave *slave1, struct slave *slave2)  static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1,  				struct slave *slave2)  { -	int slaves_state_differ = (SLAVE_IS_OK(slave1) != SLAVE_IS_OK(slave2)); +	int slaves_state_differ = (bond_slave_can_tx(slave1) != bond_slave_can_tx(slave2));  	struct slave *disabled_slave = NULL;  	ASSERT_RTNL();  	/* fasten the change in the switch */ -	if (SLAVE_IS_OK(slave1)) { -		alb_send_learning_packets(slave1, slave1->dev->dev_addr); +	if (bond_slave_can_tx(slave1)) { +		alb_send_learning_packets(slave1, slave1->dev->dev_addr, false);  		if (bond->alb_info.rlb_enabled) {  			/* inform the clients that the mac address  			 * has changed @@ -1093,8 +1142,8 @@ static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1,  		disabled_slave = slave1;  	} -	if (SLAVE_IS_OK(slave2)) { -		alb_send_learning_packets(slave2, slave2->dev->dev_addr); +	if (bond_slave_can_tx(slave2)) { +		alb_send_learning_packets(slave2, slave2->dev->dev_addr, false);  		if (bond->alb_info.rlb_enabled) {  			/* inform the clients that the mac address  			 * has changed @@ -1172,10 +1221,11 @@ static void alb_change_hw_addr_on_detach(struct bonding *bond, struct slave *sla   */  static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slave *slave)  { -	struct slave *tmp_slave1, *free_mac_slave = NULL;  	struct slave *has_bond_addr = bond->curr_active_slave; +	struct slave *tmp_slave1, *free_mac_slave = NULL; +	struct list_head *iter; -	if (list_empty(&bond->slave_list)) { +	if (!bond_has_slaves(bond)) {  		/* this is the first slave */  		return 0;  	} @@ -1196,7 +1246,7 @@ static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slav  	/* The slave's address is equal to the address of the bond.  	 * Search for a spare address in the bond for this slave.  	 */ -	bond_for_each_slave(bond, tmp_slave1) { +	bond_for_each_slave(bond, tmp_slave1, iter) {  		if (!bond_slave_has_mac(bond, tmp_slave1->perm_hwaddr)) {  			/* no slave has tmp_slave1's perm addr  			 * as its curr addr @@ -1217,9 +1267,9 @@ static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slav  	if (free_mac_slave) {  		alb_set_slave_mac_addr(slave, free_mac_slave->perm_hwaddr); -		pr_warning("%s: Warning: the hw address of slave %s is in use by the bond; giving it the hw address of %s\n", -			   bond->dev->name, slave->dev->name, -			   free_mac_slave->dev->name); +		pr_warn("%s: Warning: the hw address of slave %s is in use by the bond; giving it the hw address of %s\n", +			bond->dev->name, slave->dev->name, +			free_mac_slave->dev->name);  	} else if (has_bond_addr) {  		pr_err("%s: Error: the hw address of slave %s is in use by the bond; couldn't find a slave with a free hw address to give it (this should not have happened)\n", @@ -1246,22 +1296,23 @@ static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slav   */  static int alb_set_mac_address(struct bonding *bond, void *addr)  { -	char tmp_addr[ETH_ALEN]; -	struct slave *slave; +	struct slave *slave, *rollback_slave; +	struct list_head *iter;  	struct sockaddr sa; +	char tmp_addr[ETH_ALEN];  	int res;  	if (bond->alb_info.rlb_enabled)  		return 0; -	bond_for_each_slave(bond, slave) { +	bond_for_each_slave(bond, slave, iter) {  		/* save net_device's current hw address */ -		memcpy(tmp_addr, slave->dev->dev_addr, ETH_ALEN); +		ether_addr_copy(tmp_addr, slave->dev->dev_addr);  		res = dev_set_mac_address(slave->dev, addr);  		/* restore net_device's hw address */ -		memcpy(slave->dev->dev_addr, tmp_addr, ETH_ALEN); +		ether_addr_copy(slave->dev->dev_addr, tmp_addr);  		if (res)  			goto unwind; @@ -1274,10 +1325,12 @@ unwind:  	sa.sa_family = bond->dev->type;  	/* unwind from head to the slave that failed */ -	bond_for_each_slave_continue_reverse(bond, slave) { -		memcpy(tmp_addr, slave->dev->dev_addr, ETH_ALEN); -		dev_set_mac_address(slave->dev, &sa); -		memcpy(slave->dev->dev_addr, tmp_addr, ETH_ALEN); +	bond_for_each_slave(bond, rollback_slave, iter) { +		if (rollback_slave == slave) +			break; +		ether_addr_copy(tmp_addr, rollback_slave->dev->dev_addr); +		dev_set_mac_address(rollback_slave->dev, &sa); +		ether_addr_copy(rollback_slave->dev->dev_addr, tmp_addr);  	}  	return res; @@ -1290,9 +1343,8 @@ int bond_alb_initialize(struct bonding *bond, int rlb_enabled)  	int res;  	res = tlb_initialize(bond); -	if (res) { +	if (res)  		return res; -	}  	if (rlb_enabled) {  		bond->alb_info.rlb_enabled = 1; @@ -1315,9 +1367,79 @@ void bond_alb_deinitialize(struct bonding *bond)  	tlb_deinitialize(bond); -	if (bond_info->rlb_enabled) { +	if (bond_info->rlb_enabled)  		rlb_deinitialize(bond); +} + +static int bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond, +		struct slave *tx_slave) +{ +	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); +	struct ethhdr *eth_data = eth_hdr(skb); + +	if (!tx_slave) { +		/* unbalanced or unassigned, send through primary */ +		tx_slave = rcu_dereference(bond->curr_active_slave); +		if (bond->params.tlb_dynamic_lb) +			bond_info->unbalanced_load += skb->len; +	} + +	if (tx_slave && bond_slave_can_tx(tx_slave)) { +		if (tx_slave != rcu_dereference(bond->curr_active_slave)) { +			ether_addr_copy(eth_data->h_source, +					tx_slave->dev->dev_addr); +		} + +		bond_dev_queue_xmit(bond, skb, tx_slave->dev); +		goto out; +	} + +	if (tx_slave && bond->params.tlb_dynamic_lb) { +		_lock_tx_hashtbl(bond); +		__tlb_clear_slave(bond, tx_slave, 0); +		_unlock_tx_hashtbl(bond); +	} + +	/* no suitable interface, frame not sent */ +	dev_kfree_skb_any(skb); +out: +	return NETDEV_TX_OK; +} + +int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) +{ +	struct bonding *bond = netdev_priv(bond_dev); +	struct ethhdr *eth_data; +	struct slave *tx_slave = NULL; +	u32 hash_index; + +	skb_reset_mac_header(skb); +	eth_data = eth_hdr(skb); + +	/* Do not TX balance any multicast or broadcast */ +	if (!is_multicast_ether_addr(eth_data->h_dest)) { +		switch (skb->protocol) { +		case htons(ETH_P_IP): +		case htons(ETH_P_IPX): +		    /* In case of IPX, it will falback to L2 hash */ +		case htons(ETH_P_IPV6): +			hash_index = bond_xmit_hash(bond, skb); +			if (bond->params.tlb_dynamic_lb) { +				tx_slave = tlb_choose_channel(bond, +							      hash_index & 0xFF, +							      skb->len); +			} else { +				struct list_head *iter; +				int idx = hash_index % bond->slave_cnt; + +				bond_for_each_slave_rcu(bond, tx_slave, iter) +					if (--idx < 0) +						break; +			} +			break; +		}  	} +	return bond_do_alb_xmit(skb, bond, tx_slave);  }  int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) @@ -1328,20 +1450,14 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)  	struct slave *tx_slave = NULL;  	static const __be32 ip_bcast = htonl(0xffffffff);  	int hash_size = 0; -	int do_tx_balance = 1; +	bool do_tx_balance = true;  	u32 hash_index = 0;  	const u8 *hash_start = NULL; -	int res = 1;  	struct ipv6hdr *ip6hdr;  	skb_reset_mac_header(skb);  	eth_data = eth_hdr(skb); -	/* make sure that the curr_active_slave do not change during tx -	 */ -	read_lock(&bond->lock); -	read_lock(&bond->curr_slave_lock); -  	switch (ntohs(skb->protocol)) {  	case ETH_P_IP: {  		const struct iphdr *iph = ip_hdr(skb); @@ -1349,7 +1465,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)  		if (ether_addr_equal_64bits(eth_data->h_dest, mac_bcast) ||  		    (iph->daddr == ip_bcast) ||  		    (iph->protocol == IPPROTO_IGMP)) { -			do_tx_balance = 0; +			do_tx_balance = false;  			break;  		}  		hash_start = (char *)&(iph->daddr); @@ -1361,7 +1477,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)  		 * that here just in case.  		 */  		if (ether_addr_equal_64bits(eth_data->h_dest, mac_bcast)) { -			do_tx_balance = 0; +			do_tx_balance = false;  			break;  		} @@ -1369,7 +1485,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)  		 * broadcasts in IPv4.  		 */  		if (ether_addr_equal_64bits(eth_data->h_dest, mac_v6_allmcast)) { -			do_tx_balance = 0; +			do_tx_balance = false;  			break;  		} @@ -1379,7 +1495,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)  		 */  		ip6hdr = ipv6_hdr(skb);  		if (ipv6_addr_any(&ip6hdr->saddr)) { -			do_tx_balance = 0; +			do_tx_balance = false;  			break;  		} @@ -1389,7 +1505,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)  	case ETH_P_IPX:  		if (ipx_hdr(skb)->ipx_checksum != IPX_NO_CHECKSUM) {  			/* something is wrong with this packet */ -			do_tx_balance = 0; +			do_tx_balance = false;  			break;  		} @@ -1398,21 +1514,20 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)  			 * this family since it has an "ARP" like  			 * mechanism  			 */ -			do_tx_balance = 0; +			do_tx_balance = false;  			break;  		} -		hash_start = (char*)eth_data->h_dest; +		hash_start = (char *)eth_data->h_dest;  		hash_size = ETH_ALEN;  		break;  	case ETH_P_ARP: -		do_tx_balance = 0; -		if (bond_info->rlb_enabled) { +		do_tx_balance = false; +		if (bond_info->rlb_enabled)  			tx_slave = rlb_arp_xmit(skb, bond); -		}  		break;  	default: -		do_tx_balance = 0; +		do_tx_balance = false;  		break;  	} @@ -1421,36 +1536,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)  		tx_slave = tlb_choose_channel(bond, hash_index, skb->len);  	} -	if (!tx_slave) { -		/* unbalanced or unassigned, send through primary */ -		tx_slave = bond->curr_active_slave; -		bond_info->unbalanced_load += skb->len; -	} - -	if (tx_slave && SLAVE_IS_OK(tx_slave)) { -		if (tx_slave != bond->curr_active_slave) { -			memcpy(eth_data->h_source, -			       tx_slave->dev->dev_addr, -			       ETH_ALEN); -		} - -		res = bond_dev_queue_xmit(bond, skb, tx_slave->dev); -	} else { -		if (tx_slave) { -			_lock_tx_hashtbl(bond); -			__tlb_clear_slave(bond, tx_slave, 0); -			_unlock_tx_hashtbl(bond); -		} -	} - -	read_unlock(&bond->curr_slave_lock); -	read_unlock(&bond->lock); -	if (res) { -		/* no suitable interface, frame not sent */ -		kfree_skb(skb); -	} - -	return NETDEV_TX_OK; +	return bond_do_alb_xmit(skb, bond, tx_slave);  }  void bond_alb_monitor(struct work_struct *work) @@ -1458,21 +1544,24 @@ void bond_alb_monitor(struct work_struct *work)  	struct bonding *bond = container_of(work, struct bonding,  					    alb_work.work);  	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); +	struct list_head *iter;  	struct slave *slave; -	read_lock(&bond->lock); - -	if (list_empty(&bond->slave_list)) { +	if (!bond_has_slaves(bond)) {  		bond_info->tx_rebalance_counter = 0;  		bond_info->lp_counter = 0;  		goto re_arm;  	} +	rcu_read_lock(); +  	bond_info->tx_rebalance_counter++;  	bond_info->lp_counter++;  	/* send learning packets */ -	if (bond_info->lp_counter >= BOND_ALB_LP_TICKS) { +	if (bond_info->lp_counter >= BOND_ALB_LP_TICKS(bond)) { +		bool strict_match; +  		/* change of curr_active_slave involves swapping of mac addresses.  		 * in order to avoid this swapping from happening while  		 * sending the learning packets, the curr_slave_lock must be held for @@ -1480,8 +1569,17 @@ void bond_alb_monitor(struct work_struct *work)  		 */  		read_lock(&bond->curr_slave_lock); -		bond_for_each_slave(bond, slave) -			alb_send_learning_packets(slave, slave->dev->dev_addr); +		bond_for_each_slave_rcu(bond, slave, iter) { +			/* If updating current_active, use all currently +			 * user mac addreses (!strict_match).  Otherwise, only +			 * use mac of the slave device. +			 * In RLB mode, we always use strict matches. +			 */ +			strict_match = (slave != bond->curr_active_slave || +					bond_info->rlb_enabled); +			alb_send_learning_packets(slave, slave->dev->dev_addr, +						  strict_match); +		}  		read_unlock(&bond->curr_slave_lock); @@ -1493,7 +1591,7 @@ void bond_alb_monitor(struct work_struct *work)  		read_lock(&bond->curr_slave_lock); -		bond_for_each_slave(bond, slave) { +		bond_for_each_slave_rcu(bond, slave, iter) {  			tlb_clear_slave(bond, slave, 1);  			if (slave == bond->curr_active_slave) {  				SLAVE_TLB_INFO(slave).load = @@ -1517,11 +1615,9 @@ void bond_alb_monitor(struct work_struct *work)  			 * dev_set_promiscuity requires rtnl and  			 * nothing else.  Avoid race with bond_close.  			 */ -			read_unlock(&bond->lock); -			if (!rtnl_trylock()) { -				read_lock(&bond->lock); +			rcu_read_unlock(); +			if (!rtnl_trylock())  				goto re_arm; -			}  			bond_info->rlb_promisc_timeout_counter = 0; @@ -1533,7 +1629,7 @@ void bond_alb_monitor(struct work_struct *work)  			bond_info->primary_is_promisc = 0;  			rtnl_unlock(); -			read_lock(&bond->lock); +			rcu_read_lock();  		}  		if (bond_info->rlb_rebalance) { @@ -1547,19 +1643,16 @@ void bond_alb_monitor(struct work_struct *work)  				--bond_info->rlb_update_delay_counter;  			} else {  				rlb_update_rx_clients(bond); -				if (bond_info->rlb_update_retry_counter) { +				if (bond_info->rlb_update_retry_counter)  					--bond_info->rlb_update_retry_counter; -				} else { +				else  					bond_info->rx_ntt = 0; -				}  			}  		}  	} - +	rcu_read_unlock();  re_arm:  	queue_delayed_work(bond->wq, &bond->alb_work, alb_delta_in_ticks); - -	read_unlock(&bond->lock);  }  /* assumption: called before the slave is attached to the bond @@ -1570,23 +1663,20 @@ int bond_alb_init_slave(struct bonding *bond, struct slave *slave)  	int res;  	res = alb_set_slave_mac_addr(slave, slave->perm_hwaddr); -	if (res) { +	if (res)  		return res; -	}  	res = alb_handle_addr_collision_on_attach(bond, slave); -	if (res) { +	if (res)  		return res; -	}  	tlb_init_slave(slave);  	/* order a rebalance ASAP */  	bond->alb_info.tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; -	if (bond->alb_info.rlb_enabled) { +	if (bond->alb_info.rlb_enabled)  		bond->alb_info.rlb_rebalance = 1; -	}  	return 0;  } @@ -1599,13 +1689,13 @@ int bond_alb_init_slave(struct bonding *bond, struct slave *slave)   */  void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave)  { -	if (!list_empty(&bond->slave_list)) +	if (bond_has_slaves(bond))  		alb_change_hw_addr_on_detach(bond, slave);  	tlb_clear_slave(bond, slave, 0);  	if (bond->alb_info.rlb_enabled) { -		bond->alb_info.next_rx_slave = NULL; +		bond->alb_info.rx_slave = NULL;  		rlb_clear_slave(bond, slave);  	}  } @@ -1617,9 +1707,8 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char  	if (link == BOND_LINK_DOWN) {  		tlb_clear_slave(bond, slave, 0); -		if (bond->alb_info.rlb_enabled) { +		if (bond->alb_info.rlb_enabled)  			rlb_clear_slave(bond, slave); -		}  	} else if (link == BOND_LINK_UP) {  		/* order a rebalance ASAP */  		bond_info->tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; @@ -1645,14 +1734,11 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char   * If new_slave is NULL, caller must hold curr_slave_lock or   * bond->lock for write.   * - * If new_slave is not NULL, caller must hold RTNL, bond->lock for - * read and curr_slave_lock for write.  Processing here may sleep, so - * no other locks may be held. + * If new_slave is not NULL, caller must hold RTNL, curr_slave_lock + * for write.  Processing here may sleep, so no other locks may be held.   */  void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave)  	__releases(&bond->curr_slave_lock) -	__releases(&bond->lock) -	__acquires(&bond->lock)  	__acquires(&bond->curr_slave_lock)  {  	struct slave *swap_slave; @@ -1669,7 +1755,7 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave  	swap_slave = bond->curr_active_slave;  	rcu_assign_pointer(bond->curr_active_slave, new_slave); -	if (!new_slave || list_empty(&bond->slave_list)) +	if (!new_slave || !bond_has_slaves(bond))  		return;  	/* set the new curr_active_slave to the bonds mac address @@ -1688,21 +1774,36 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave  	tlb_clear_slave(bond, new_slave, 1);  	write_unlock_bh(&bond->curr_slave_lock); -	read_unlock(&bond->lock);  	ASSERT_RTNL(); +	/* in TLB mode, the slave might flip down/up with the old dev_addr, +	 * and thus filter bond->dev_addr's packets, so force bond's mac +	 */ +	if (BOND_MODE(bond) == BOND_MODE_TLB) { +		struct sockaddr sa; +		u8 tmp_addr[ETH_ALEN]; + +		ether_addr_copy(tmp_addr, new_slave->dev->dev_addr); + +		memcpy(sa.sa_data, bond->dev->dev_addr, bond->dev->addr_len); +		sa.sa_family = bond->dev->type; +		/* we don't care if it can't change its mac, best effort */ +		dev_set_mac_address(new_slave->dev, &sa); + +		ether_addr_copy(new_slave->dev->dev_addr, tmp_addr); +	} +  	/* curr_active_slave must be set before calling alb_swap_mac_addr */  	if (swap_slave) {  		/* swap mac address */  		alb_swap_mac_addr(swap_slave, new_slave);  		alb_fasten_mac_swap(bond, swap_slave, new_slave); -		read_lock(&bond->lock);  	} else {  		/* set the new_slave to the bond mac address */  		alb_set_slave_mac_addr(new_slave, bond->dev->dev_addr); -		read_lock(&bond->lock); -		alb_send_learning_packets(new_slave, bond->dev->dev_addr); +		alb_send_learning_packets(new_slave, bond->dev->dev_addr, +					  false);  	}  	write_lock_bh(&bond->curr_slave_lock); @@ -1720,14 +1821,12 @@ int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr)  	struct slave *swap_slave;  	int res; -	if (!is_valid_ether_addr(sa->sa_data)) { +	if (!is_valid_ether_addr(sa->sa_data))  		return -EADDRNOTAVAIL; -	}  	res = alb_set_mac_address(bond, addr); -	if (res) { +	if (res)  		return res; -	}  	memcpy(bond_dev->dev_addr, sa->sa_data, bond_dev->addr_len); @@ -1735,9 +1834,8 @@ int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr)  	 * Otherwise we'll need to pass the new address to it and handle  	 * duplications.  	 */ -	if (!bond->curr_active_slave) { +	if (!bond->curr_active_slave)  		return 0; -	}  	swap_slave = bond_slave_has_mac(bond, bond_dev->dev_addr); @@ -1748,7 +1846,8 @@ int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr)  		alb_set_slave_mac_addr(bond->curr_active_slave, bond_dev->dev_addr);  		read_lock(&bond->lock); -		alb_send_learning_packets(bond->curr_active_slave, bond_dev->dev_addr); +		alb_send_learning_packets(bond->curr_active_slave, +					  bond_dev->dev_addr, false);  		if (bond->alb_info.rlb_enabled) {  			/* inform clients mac address has changed */  			rlb_req_update_slave_clients(bond, bond->curr_active_slave); @@ -1761,8 +1860,7 @@ int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr)  void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id)  { -	if (bond->alb_info.rlb_enabled) { +	if (bond->alb_info.rlb_enabled)  		rlb_clear_vlan(bond, vlan_id); -	}  } diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h index 28d8e4c7dc0..5fc76c01636 100644 --- a/drivers/net/bonding/bond_alb.h +++ b/drivers/net/bonding/bond_alb.h @@ -12,8 +12,7 @@   * for more details.   *   * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA. + * with this program; if not, see <http://www.gnu.org/licenses/>.   *   * The full GNU General Public License is included in this distribution in the   * file called LICENSE. @@ -36,14 +35,15 @@ struct slave;  					 * Used for division - never set  					 * to zero !!!  					 */ -#define BOND_ALB_LP_INTERVAL	    1	/* In seconds, periodic send of -					 * learning packets to the switch -					 */ +#define BOND_ALB_DEFAULT_LP_INTERVAL 1 +#define BOND_ALB_LP_INTERVAL(bond) (bond->params.lp_interval)	/* In seconds, periodic send of +								 * learning packets to the switch +								 */  #define BOND_TLB_REBALANCE_TICKS (BOND_TLB_REBALANCE_INTERVAL \  				  * ALB_TIMER_TICKS_PER_SEC) -#define BOND_ALB_LP_TICKS (BOND_ALB_LP_INTERVAL \ +#define BOND_ALB_LP_TICKS(bond) (BOND_ALB_LP_INTERVAL(bond) \  			   * ALB_TIMER_TICKS_PER_SEC)  #define TLB_HASH_TABLE_SIZE 256	/* The size of the clients hash table. @@ -153,9 +153,7 @@ struct alb_bond_info {  	u8			rx_ntt;	/* flag - need to transmit  					 * to all rx clients  					 */ -	struct slave		*next_rx_slave;/* next slave to be assigned -						* to a new rx client for -						*/ +	struct slave		*rx_slave;/* last slave to xmit from */  	u8			primary_is_promisc;	   /* boolean */  	u32			rlb_promisc_timeout_counter;/* counts primary  							     * promiscuity time @@ -177,6 +175,7 @@ void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave);  void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link);  void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave);  int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev); +int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev);  void bond_alb_monitor(struct work_struct *);  int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr);  void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id); diff --git a/drivers/net/bonding/bond_debugfs.c b/drivers/net/bonding/bond_debugfs.c index 5fc4c235147..658e761c456 100644 --- a/drivers/net/bonding/bond_debugfs.c +++ b/drivers/net/bonding/bond_debugfs.c @@ -23,7 +23,7 @@ static int bond_debug_rlb_hash_show(struct seq_file *m, void *v)  	struct rlb_client_info *client_info;  	u32 hash_index; -	if (bond->params.mode != BOND_MODE_ALB) +	if (BOND_MODE(bond) != BOND_MODE_ALB)  		return 0;  	seq_printf(m, "SourceIP        DestinationIP   " @@ -69,7 +69,7 @@ void bond_debug_register(struct bonding *bond)  		debugfs_create_dir(bond->dev->name, bonding_debug_root);  	if (!bond->debug_dir) { -		pr_warning("%s: Warning: failed to register to debugfs\n", +		pr_warn("%s: Warning: failed to register to debugfs\n",  			bond->dev->name);  		return;  	} @@ -98,9 +98,8 @@ void bond_debug_reregister(struct bonding *bond)  	if (d) {  		bond->debug_dir = d;  	} else { -		pr_warning("%s: Warning: failed to reregister, " -				"so just unregister old one\n", -				bond->dev->name); +		pr_warn("%s: Warning: failed to reregister, so just unregister old one\n", +			bond->dev->name);  		bond_debug_unregister(bond);  	}  } @@ -110,8 +109,7 @@ void bond_create_debugfs(void)  	bonding_debug_root = debugfs_create_dir("bonding", NULL);  	if (!bonding_debug_root) { -		pr_warning("Warning: Cannot create bonding directory" -				" in debugfs\n"); +		pr_warn("Warning: Cannot create bonding directory in debugfs\n");  	}  } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 72df399c4ab..701f86cd599 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -78,6 +78,7 @@  #include <net/netns/generic.h>  #include <net/pkt_sched.h>  #include <linux/rculist.h> +#include <net/flow_keys.h>  #include "bonding.h"  #include "bond_3ad.h"  #include "bond_alb.h" @@ -85,13 +86,11 @@  /*---------------------------- Module parameters ----------------------------*/  /* monitor all links that often (in milliseconds). <=0 disables monitoring */ -#define BOND_LINK_MON_INTERV	0 -#define BOND_LINK_ARP_INTERV	0  static int max_bonds	= BOND_DEFAULT_MAX_BONDS;  static int tx_queues	= BOND_DEFAULT_TX_QUEUES;  static int num_peer_notif = 1; -static int miimon	= BOND_LINK_MON_INTERV; +static int miimon;  static int updelay;  static int downdelay;  static int use_carrier	= 1; @@ -102,7 +101,7 @@ static char *lacp_rate;  static int min_links;  static char *ad_select;  static char *xmit_hash_policy; -static int arp_interval = BOND_LINK_ARP_INTERV; +static int arp_interval;  static char *arp_ip_target[BOND_MAX_ARP_TARGETS];  static char *arp_validate;  static char *arp_all_targets; @@ -110,6 +109,8 @@ static char *fail_over_mac;  static int all_slaves_active;  static struct bond_params bonding_defaults;  static int resend_igmp = BOND_DEFAULT_RESEND_IGMP; +static int packets_per_slave = 1; +static int lp_interval = BOND_ALB_DEFAULT_LP_INTERVAL;  module_param(max_bonds, int, 0);  MODULE_PARM_DESC(max_bonds, "Max number of bonded devices"); @@ -159,7 +160,8 @@ MODULE_PARM_DESC(min_links, "Minimum number of available links before turning on  module_param(xmit_hash_policy, charp, 0);  MODULE_PARM_DESC(xmit_hash_policy, "balance-xor and 802.3ad hashing method; "  				   "0 for layer 2 (default), 1 for layer 3+4, " -				   "2 for layer 2+3"); +				   "2 for layer 2+3, 3 for encap layer 2+3, " +				   "4 for encap layer 3+4");  module_param(arp_interval, int, 0);  MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");  module_param_array(arp_ip_target, charp, NULL, 0); @@ -181,6 +183,14 @@ MODULE_PARM_DESC(all_slaves_active, "Keep all frames received on an interface"  module_param(resend_igmp, int, 0);  MODULE_PARM_DESC(resend_igmp, "Number of IGMP membership reports to send on "  			      "link failure"); +module_param(packets_per_slave, int, 0); +MODULE_PARM_DESC(packets_per_slave, "Packets to send per slave in balance-rr " +				    "mode; 0 for a random slave, 1 packet per " +				    "slave (default), >1 packets per slave."); +module_param(lp_interval, uint, 0); +MODULE_PARM_DESC(lp_interval, "The number of seconds between instances where " +			      "the bonding driver sends learning packets to " +			      "each slaves peer switch. The default is 1.");  /*----------------------------- Global variables ----------------------------*/ @@ -196,65 +206,6 @@ static int bond_mode	= BOND_MODE_ROUNDROBIN;  static int xmit_hashtype = BOND_XMIT_POLICY_LAYER2;  static int lacp_fast; -const struct bond_parm_tbl bond_lacp_tbl[] = { -{	"slow",		AD_LACP_SLOW}, -{	"fast",		AD_LACP_FAST}, -{	NULL,		-1}, -}; - -const struct bond_parm_tbl bond_mode_tbl[] = { -{	"balance-rr",		BOND_MODE_ROUNDROBIN}, -{	"active-backup",	BOND_MODE_ACTIVEBACKUP}, -{	"balance-xor",		BOND_MODE_XOR}, -{	"broadcast",		BOND_MODE_BROADCAST}, -{	"802.3ad",		BOND_MODE_8023AD}, -{	"balance-tlb",		BOND_MODE_TLB}, -{	"balance-alb",		BOND_MODE_ALB}, -{	NULL,			-1}, -}; - -const struct bond_parm_tbl xmit_hashtype_tbl[] = { -{	"layer2",		BOND_XMIT_POLICY_LAYER2}, -{	"layer3+4",		BOND_XMIT_POLICY_LAYER34}, -{	"layer2+3",		BOND_XMIT_POLICY_LAYER23}, -{	NULL,			-1}, -}; - -const struct bond_parm_tbl arp_all_targets_tbl[] = { -{	"any",			BOND_ARP_TARGETS_ANY}, -{	"all",			BOND_ARP_TARGETS_ALL}, -{	NULL,			-1}, -}; - -const struct bond_parm_tbl arp_validate_tbl[] = { -{	"none",			BOND_ARP_VALIDATE_NONE}, -{	"active",		BOND_ARP_VALIDATE_ACTIVE}, -{	"backup",		BOND_ARP_VALIDATE_BACKUP}, -{	"all",			BOND_ARP_VALIDATE_ALL}, -{	NULL,			-1}, -}; - -const struct bond_parm_tbl fail_over_mac_tbl[] = { -{	"none",			BOND_FOM_NONE}, -{	"active",		BOND_FOM_ACTIVE}, -{	"follow",		BOND_FOM_FOLLOW}, -{	NULL,			-1}, -}; - -const struct bond_parm_tbl pri_reselect_tbl[] = { -{	"always",		BOND_PRI_RESELECT_ALWAYS}, -{	"better",		BOND_PRI_RESELECT_BETTER}, -{	"failure",		BOND_PRI_RESELECT_FAILURE}, -{	NULL,			-1}, -}; - -struct bond_parm_tbl ad_select_tbl[] = { -{	"stable",	BOND_AD_STABLE}, -{	"bandwidth",	BOND_AD_BANDWIDTH}, -{	"count",	BOND_AD_COUNT}, -{	NULL,		-1}, -}; -  /*-------------------------- Forward declarations ---------------------------*/  static int bond_init(struct net_device *bond_dev); @@ -289,7 +240,7 @@ const char *bond_mode_name(int mode)   * @skb: hw accel VLAN tagged skb to transmit   * @slave_dev: slave that is supposed to xmit this skbuff   */ -int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, +void bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,  			struct net_device *slave_dev)  {  	skb->dev = slave_dev; @@ -302,8 +253,6 @@ int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,  		bond_netpoll_send_skb(bond_get_slave_by_dev(bond, slave_dev), skb);  	else  		dev_queue_xmit(skb); - -	return 0;  }  /* @@ -332,10 +281,11 @@ static int bond_vlan_rx_add_vid(struct net_device *bond_dev,  				__be16 proto, u16 vid)  {  	struct bonding *bond = netdev_priv(bond_dev); -	struct slave *slave; +	struct slave *slave, *rollback_slave; +	struct list_head *iter;  	int res; -	bond_for_each_slave(bond, slave) { +	bond_for_each_slave(bond, slave, iter) {  		res = vlan_vid_add(slave->dev, proto, vid);  		if (res)  			goto unwind; @@ -344,9 +294,13 @@ static int bond_vlan_rx_add_vid(struct net_device *bond_dev,  	return 0;  unwind: -	/* unwind from the slave that failed */ -	bond_for_each_slave_continue_reverse(bond, slave) -		vlan_vid_del(slave->dev, proto, vid); +	/* unwind to the slave that failed */ +	bond_for_each_slave(bond, rollback_slave, iter) { +		if (rollback_slave == slave) +			break; + +		vlan_vid_del(rollback_slave->dev, proto, vid); +	}  	return res;  } @@ -360,9 +314,10 @@ static int bond_vlan_rx_kill_vid(struct net_device *bond_dev,  				 __be16 proto, u16 vid)  {  	struct bonding *bond = netdev_priv(bond_dev); +	struct list_head *iter;  	struct slave *slave; -	bond_for_each_slave(bond, slave) +	bond_for_each_slave(bond, slave, iter)  		vlan_vid_del(slave->dev, proto, vid);  	if (bond_is_lb(bond)) @@ -382,15 +337,16 @@ static int bond_vlan_rx_kill_vid(struct net_device *bond_dev,   */  static int bond_set_carrier(struct bonding *bond)  { +	struct list_head *iter;  	struct slave *slave; -	if (list_empty(&bond->slave_list)) +	if (!bond_has_slaves(bond))  		goto down; -	if (bond->params.mode == BOND_MODE_8023AD) +	if (BOND_MODE(bond) == BOND_MODE_8023AD)  		return bond_3ad_set_carrier(bond); -	bond_for_each_slave(bond, slave) { +	bond_for_each_slave(bond, slave, iter) {  		if (slave->link == BOND_LINK_UP) {  			if (!netif_carrier_ok(bond->dev)) {  				netif_carrier_on(bond->dev); @@ -446,6 +402,22 @@ static void bond_update_speed_duplex(struct slave *slave)  	return;  } +const char *bond_slave_link_status(s8 link) +{ +	switch (link) { +	case BOND_LINK_UP: +		return "up"; +	case BOND_LINK_FAIL: +		return "going down"; +	case BOND_LINK_DOWN: +		return "down"; +	case BOND_LINK_BACK: +		return "going back"; +	default: +		return "unknown"; +	} +} +  /*   * if <dev> supports MII link status reporting, check its link status.   * @@ -522,8 +494,10 @@ static int bond_check_dev_link(struct bonding *bond,   */  static int bond_set_promiscuity(struct bonding *bond, int inc)  { +	struct list_head *iter;  	int err = 0; -	if (USES_PRIMARY(bond->params.mode)) { + +	if (bond_uses_primary(bond)) {  		/* write lock already acquired */  		if (bond->curr_active_slave) {  			err = dev_set_promiscuity(bond->curr_active_slave->dev, @@ -532,7 +506,7 @@ static int bond_set_promiscuity(struct bonding *bond, int inc)  	} else {  		struct slave *slave; -		bond_for_each_slave(bond, slave) { +		bond_for_each_slave(bond, slave, iter) {  			err = dev_set_promiscuity(slave->dev, inc);  			if (err)  				return err; @@ -546,8 +520,10 @@ static int bond_set_promiscuity(struct bonding *bond, int inc)   */  static int bond_set_allmulti(struct bonding *bond, int inc)  { +	struct list_head *iter;  	int err = 0; -	if (USES_PRIMARY(bond->params.mode)) { + +	if (bond_uses_primary(bond)) {  		/* write lock already acquired */  		if (bond->curr_active_slave) {  			err = dev_set_allmulti(bond->curr_active_slave->dev, @@ -556,7 +532,7 @@ static int bond_set_allmulti(struct bonding *bond, int inc)  	} else {  		struct slave *slave; -		bond_for_each_slave(bond, slave) { +		bond_for_each_slave(bond, slave, iter) {  			err = dev_set_allmulti(slave->dev, inc);  			if (err)  				return err; @@ -570,33 +546,22 @@ static int bond_set_allmulti(struct bonding *bond, int inc)   * device and retransmit an IGMP JOIN request to the current active   * slave.   */ -static void bond_resend_igmp_join_requests(struct bonding *bond) +static void bond_resend_igmp_join_requests_delayed(struct work_struct *work)  { +	struct bonding *bond = container_of(work, struct bonding, +					    mcast_work.work); +  	if (!rtnl_trylock()) {  		queue_delayed_work(bond->wq, &bond->mcast_work, 1);  		return;  	}  	call_netdevice_notifiers(NETDEV_RESEND_IGMP, bond->dev); -	rtnl_unlock(); -	/* We use curr_slave_lock to protect against concurrent access to -	 * igmp_retrans from multiple running instances of this function and -	 * bond_change_active_slave -	 */ -	write_lock_bh(&bond->curr_slave_lock);  	if (bond->igmp_retrans > 1) {  		bond->igmp_retrans--;  		queue_delayed_work(bond->wq, &bond->mcast_work, HZ/5);  	} -	write_unlock_bh(&bond->curr_slave_lock); -} - -static void bond_resend_igmp_join_requests_delayed(struct work_struct *work) -{ -	struct bonding *bond = container_of(work, struct bonding, -					    mcast_work.work); - -	bond_resend_igmp_join_requests(bond); +	rtnl_unlock();  }  /* Flush bond's hardware addresses from slave @@ -609,7 +574,7 @@ static void bond_hw_addr_flush(struct net_device *bond_dev,  	dev_uc_unsync(slave_dev, bond_dev);  	dev_mc_unsync(slave_dev, bond_dev); -	if (bond->params.mode == BOND_MODE_8023AD) { +	if (BOND_MODE(bond) == BOND_MODE_8023AD) {  		/* del lacpdu mc addr from mc list */  		u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; @@ -620,8 +585,8 @@ static void bond_hw_addr_flush(struct net_device *bond_dev,  /*--------------------------- Active slave change ---------------------------*/  /* Update the hardware address list and promisc/allmulti for the new and - * old active slaves (if any).  Modes that are !USES_PRIMARY keep all - * slaves up date at all times; only the USES_PRIMARY modes need to call + * old active slaves (if any).  Modes that are not using primary keep all + * slaves up date at all times; only the modes that use primary need to call   * this function to swap these settings during a failover.   */  static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active, @@ -676,14 +641,12 @@ static void bond_set_dev_addr(struct net_device *bond_dev,   *   * Perform special MAC address swapping for fail_over_mac settings   * - * Called with RTNL, bond->lock for read, curr_slave_lock for write_bh. + * Called with RTNL, curr_slave_lock for write_bh.   */  static void bond_do_fail_over_mac(struct bonding *bond,  				  struct slave *new_active,  				  struct slave *old_active)  	__releases(&bond->curr_slave_lock) -	__releases(&bond->lock) -	__acquires(&bond->lock)  	__acquires(&bond->curr_slave_lock)  {  	u8 tmp_mac[ETH_ALEN]; @@ -694,9 +657,7 @@ static void bond_do_fail_over_mac(struct bonding *bond,  	case BOND_FOM_ACTIVE:  		if (new_active) {  			write_unlock_bh(&bond->curr_slave_lock); -			read_unlock(&bond->lock);  			bond_set_dev_addr(bond->dev, new_active->dev); -			read_lock(&bond->lock);  			write_lock_bh(&bond->curr_slave_lock);  		}  		break; @@ -710,15 +671,14 @@ static void bond_do_fail_over_mac(struct bonding *bond,  			return;  		write_unlock_bh(&bond->curr_slave_lock); -		read_unlock(&bond->lock);  		if (old_active) { -			memcpy(tmp_mac, new_active->dev->dev_addr, ETH_ALEN); -			memcpy(saddr.sa_data, old_active->dev->dev_addr, -			       ETH_ALEN); +			ether_addr_copy(tmp_mac, new_active->dev->dev_addr); +			ether_addr_copy(saddr.sa_data, +					old_active->dev->dev_addr);  			saddr.sa_family = new_active->dev->type;  		} else { -			memcpy(saddr.sa_data, bond->dev->dev_addr, ETH_ALEN); +			ether_addr_copy(saddr.sa_data, bond->dev->dev_addr);  			saddr.sa_family = bond->dev->type;  		} @@ -732,7 +692,7 @@ static void bond_do_fail_over_mac(struct bonding *bond,  		if (!old_active)  			goto out; -		memcpy(saddr.sa_data, tmp_mac, ETH_ALEN); +		ether_addr_copy(saddr.sa_data, tmp_mac);  		saddr.sa_family = old_active->dev->type;  		rv = dev_set_mac_address(old_active->dev, &saddr); @@ -740,7 +700,6 @@ static void bond_do_fail_over_mac(struct bonding *bond,  			pr_err("%s: Error %d setting MAC of slave %s\n",  			       bond->dev->name, -rv, new_active->dev->name);  out: -		read_lock(&bond->lock);  		write_lock_bh(&bond->curr_slave_lock);  		break;  	default: @@ -774,43 +733,24 @@ static bool bond_should_change_active(struct bonding *bond)  /**   * find_best_interface - select the best available slave to be the active one   * @bond: our bonding struct - * - * Warning: Caller must hold curr_slave_lock for writing.   */  static struct slave *bond_find_best_slave(struct bonding *bond)  { -	struct slave *new_active, *old_active; -	struct slave *bestslave = NULL; +	struct slave *slave, *bestslave = NULL; +	struct list_head *iter;  	int mintime = bond->params.updelay; -	int i; - -	new_active = bond->curr_active_slave; -	if (!new_active) { /* there were no active slaves left */ -		new_active = bond_first_slave(bond); -		if (!new_active) -			return NULL; /* still no slave, return NULL */ -	} - -	if ((bond->primary_slave) && -	    bond->primary_slave->link == BOND_LINK_UP && -	    bond_should_change_active(bond)) { -		new_active = bond->primary_slave; -	} - -	/* remember where to stop iterating over the slaves */ -	old_active = new_active; - -	bond_for_each_slave_from(bond, new_active, i, old_active) { -		if (new_active->link == BOND_LINK_UP) { -			return new_active; -		} else if (new_active->link == BOND_LINK_BACK && -			   IS_UP(new_active->dev)) { -			/* link up, but waiting for stabilization */ -			if (new_active->delay < mintime) { -				mintime = new_active->delay; -				bestslave = new_active; -			} +	if (bond->primary_slave && bond->primary_slave->link == BOND_LINK_UP && +	    bond_should_change_active(bond)) +		return bond->primary_slave; + +	bond_for_each_slave(bond, slave, iter) { +		if (slave->link == BOND_LINK_UP) +			return slave; +		if (slave->link == BOND_LINK_BACK && bond_slave_is_up(slave) && +		    slave->delay < mintime) { +			mintime = slave->delay; +			bestslave = slave;  		}  	} @@ -819,7 +759,11 @@ static struct slave *bond_find_best_slave(struct bonding *bond)  static bool bond_should_notify_peers(struct bonding *bond)  { -	struct slave *slave = bond->curr_active_slave; +	struct slave *slave; + +	rcu_read_lock(); +	slave = rcu_dereference(bond->curr_active_slave); +	rcu_read_unlock();  	pr_debug("bond_should_notify_peers: bond %s slave %s\n",  		 bond->dev->name, slave ? slave->dev->name : "NULL"); @@ -844,8 +788,7 @@ static bool bond_should_notify_peers(struct bonding *bond)   * because it is apparently the best available slave we have, even though its   * updelay hasn't timed out yet.   * - * If new_active is not NULL, caller must hold bond->lock for read and - * curr_slave_lock for write_bh. + * If new_active is not NULL, caller must hold curr_slave_lock for write_bh.   */  void bond_change_active_slave(struct bonding *bond, struct slave *new_active)  { @@ -855,11 +798,11 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)  		return;  	if (new_active) { -		new_active->jiffies = jiffies; +		new_active->last_link_up = jiffies;  		if (new_active->link == BOND_LINK_BACK) { -			if (USES_PRIMARY(bond->params.mode)) { -				pr_info("%s: making interface %s the new active one %d ms earlier.\n", +			if (bond_uses_primary(bond)) { +				pr_info("%s: making interface %s the new active one %d ms earlier\n",  					bond->dev->name, new_active->dev->name,  					(bond->params.updelay - new_active->delay) * bond->params.miimon);  			} @@ -867,40 +810,44 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)  			new_active->delay = 0;  			new_active->link = BOND_LINK_UP; -			if (bond->params.mode == BOND_MODE_8023AD) +			if (BOND_MODE(bond) == BOND_MODE_8023AD)  				bond_3ad_handle_link_change(new_active, BOND_LINK_UP);  			if (bond_is_lb(bond))  				bond_alb_handle_link_change(bond, new_active, BOND_LINK_UP);  		} else { -			if (USES_PRIMARY(bond->params.mode)) { -				pr_info("%s: making interface %s the new active one.\n", +			if (bond_uses_primary(bond)) { +				pr_info("%s: making interface %s the new active one\n",  					bond->dev->name, new_active->dev->name);  			}  		}  	} -	if (USES_PRIMARY(bond->params.mode)) +	if (bond_uses_primary(bond))  		bond_hw_addr_swap(bond, new_active, old_active);  	if (bond_is_lb(bond)) {  		bond_alb_handle_active_change(bond, new_active);  		if (old_active) -			bond_set_slave_inactive_flags(old_active); +			bond_set_slave_inactive_flags(old_active, +						      BOND_SLAVE_NOTIFY_NOW);  		if (new_active) -			bond_set_slave_active_flags(new_active); +			bond_set_slave_active_flags(new_active, +						    BOND_SLAVE_NOTIFY_NOW);  	} else {  		rcu_assign_pointer(bond->curr_active_slave, new_active);  	} -	if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) { +	if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) {  		if (old_active) -			bond_set_slave_inactive_flags(old_active); +			bond_set_slave_inactive_flags(old_active, +						      BOND_SLAVE_NOTIFY_NOW);  		if (new_active) {  			bool should_notify_peers = false; -			bond_set_slave_active_flags(new_active); +			bond_set_slave_active_flags(new_active, +						    BOND_SLAVE_NOTIFY_NOW);  			if (bond->params.fail_over_mac)  				bond_do_fail_over_mac(bond, new_active, @@ -914,14 +861,12 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)  			}  			write_unlock_bh(&bond->curr_slave_lock); -			read_unlock(&bond->lock);  			call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev);  			if (should_notify_peers)  				call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,  							 bond->dev); -			read_lock(&bond->lock);  			write_lock_bh(&bond->curr_slave_lock);  		}  	} @@ -931,8 +876,8 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)  	 * resend only if bond is brought up with the affected  	 * bonding modes and the retransmission is enabled */  	if (netif_running(bond->dev) && (bond->params.resend_igmp > 0) && -	    ((USES_PRIMARY(bond->params.mode) && new_active) || -	     bond->params.mode == BOND_MODE_ROUNDROBIN)) { +	    ((bond_uses_primary(bond) && new_active) || +	     BOND_MODE(bond) == BOND_MODE_ROUNDROBIN)) {  		bond->igmp_retrans = bond->params.resend_igmp;  		queue_delayed_work(bond->wq, &bond->mcast_work, 1);  	} @@ -947,7 +892,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)   * - The primary_slave has got its link back.   * - A slave has got its link back and there's no old curr_active_slave.   * - * Caller must hold bond->lock for read and curr_slave_lock for write_bh. + * Caller must hold curr_slave_lock for write_bh.   */  void bond_select_active_slave(struct bonding *bond)  { @@ -965,53 +910,24 @@ void bond_select_active_slave(struct bonding *bond)  			pr_info("%s: first active interface up!\n",  				bond->dev->name);  		} else { -			pr_info("%s: now running without any active interface !\n", +			pr_info("%s: now running without any active interface!\n",  				bond->dev->name);  		}  	}  } -/*--------------------------- slave list handling ---------------------------*/ - -/* - * This function attaches the slave to the end of list. - * - * bond->lock held for writing by caller. - */ -static void bond_attach_slave(struct bonding *bond, struct slave *new_slave) -{ -	list_add_tail_rcu(&new_slave->list, &bond->slave_list); -	bond->slave_cnt++; -} - -/* - * This function detaches the slave from the list. - * WARNING: no check is made to verify if the slave effectively - * belongs to <bond>. - * Nothing is freed on return, structures are just unchained. - * If any slave pointer in bond was pointing to <slave>, - * it should be changed by the calling function. - * - * bond->lock held for writing by caller. - */ -static void bond_detach_slave(struct bonding *bond, struct slave *slave) -{ -	list_del_rcu(&slave->list); -	bond->slave_cnt--; -} -  #ifdef CONFIG_NET_POLL_CONTROLLER  static inline int slave_enable_netpoll(struct slave *slave)  {  	struct netpoll *np;  	int err = 0; -	np = kzalloc(sizeof(*np), GFP_ATOMIC); +	np = kzalloc(sizeof(*np), GFP_KERNEL);  	err = -ENOMEM;  	if (!np)  		goto out; -	err = __netpoll_setup(np, slave->dev, GFP_ATOMIC); +	err = __netpoll_setup(np, slave->dev);  	if (err) {  		kfree(np);  		goto out; @@ -1030,14 +946,6 @@ static inline void slave_disable_netpoll(struct slave *slave)  	slave->np = NULL;  	__netpoll_free_async(np);  } -static inline bool slave_dev_support_netpoll(struct net_device *slave_dev) -{ -	if (slave_dev->priv_flags & IFF_DISABLE_NETPOLL) -		return false; -	if (!slave_dev->netdev_ops->ndo_poll_controller) -		return false; -	return true; -}  static void bond_poll_controller(struct net_device *bond_dev)  { @@ -1046,20 +954,22 @@ static void bond_poll_controller(struct net_device *bond_dev)  static void bond_netpoll_cleanup(struct net_device *bond_dev)  {  	struct bonding *bond = netdev_priv(bond_dev); +	struct list_head *iter;  	struct slave *slave; -	bond_for_each_slave(bond, slave) -		if (IS_UP(slave->dev)) +	bond_for_each_slave(bond, slave, iter) +		if (bond_slave_is_up(slave))  			slave_disable_netpoll(slave);  } -static int bond_netpoll_setup(struct net_device *dev, struct netpoll_info *ni, gfp_t gfp) +static int bond_netpoll_setup(struct net_device *dev, struct netpoll_info *ni)  {  	struct bonding *bond = netdev_priv(dev); +	struct list_head *iter;  	struct slave *slave;  	int err = 0; -	bond_for_each_slave(bond, slave) { +	bond_for_each_slave(bond, slave, iter) {  		err = slave_enable_netpoll(slave);  		if (err) {  			bond_netpoll_cleanup(dev); @@ -1087,10 +997,11 @@ static netdev_features_t bond_fix_features(struct net_device *dev,  					   netdev_features_t features)  {  	struct bonding *bond = netdev_priv(dev); +	struct list_head *iter;  	netdev_features_t mask;  	struct slave *slave; -	if (list_empty(&bond->slave_list)) { +	if (!bond_has_slaves(bond)) {  		/* Disable adding VLANs to empty bond. But why? --mq */  		features |= NETIF_F_VLAN_CHALLENGED;  		return features; @@ -1100,7 +1011,7 @@ static netdev_features_t bond_fix_features(struct net_device *dev,  	features &= ~NETIF_F_ONE_FOR_ALL;  	features |= NETIF_F_ALL_FOR_ALL; -	bond_for_each_slave(bond, slave) { +	bond_for_each_slave(bond, slave, iter) {  		features = netdev_increment_features(features,  						     slave->dev->features,  						     mask); @@ -1114,23 +1025,32 @@ static netdev_features_t bond_fix_features(struct net_device *dev,  				 NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \  				 NETIF_F_HIGHDMA | NETIF_F_LRO) +#define BOND_ENC_FEATURES	(NETIF_F_ALL_CSUM | NETIF_F_SG | NETIF_F_RXCSUM |\ +				 NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL) +  static void bond_compute_features(struct bonding *bond)  {  	unsigned int flags, dst_release_flag = IFF_XMIT_DST_RELEASE;  	netdev_features_t vlan_features = BOND_VLAN_FEATURES; +	netdev_features_t enc_features  = BOND_ENC_FEATURES; +	struct net_device *bond_dev = bond->dev; +	struct list_head *iter; +	struct slave *slave;  	unsigned short max_hard_header_len = ETH_HLEN;  	unsigned int gso_max_size = GSO_MAX_SIZE; -	struct net_device *bond_dev = bond->dev;  	u16 gso_max_segs = GSO_MAX_SEGS; -	struct slave *slave; -	if (list_empty(&bond->slave_list)) +	if (!bond_has_slaves(bond))  		goto done; +	vlan_features &= NETIF_F_ALL_FOR_ALL; -	bond_for_each_slave(bond, slave) { +	bond_for_each_slave(bond, slave, iter) {  		vlan_features = netdev_increment_features(vlan_features,  			slave->dev->vlan_features, BOND_VLAN_FEATURES); +		enc_features = netdev_increment_features(enc_features, +							 slave->dev->hw_enc_features, +							 BOND_ENC_FEATURES);  		dst_release_flag &= slave->dev->priv_flags;  		if (slave->dev->hard_header_len > max_hard_header_len)  			max_hard_header_len = slave->dev->hard_header_len; @@ -1141,6 +1061,7 @@ static void bond_compute_features(struct bonding *bond)  done:  	bond_dev->vlan_features = vlan_features; +	bond_dev->hw_enc_features = enc_features;  	bond_dev->hard_header_len = max_hard_header_len;  	bond_dev->gso_max_segs = gso_max_segs;  	netif_set_gso_max_size(bond_dev, gso_max_size); @@ -1172,7 +1093,7 @@ static bool bond_should_deliver_exact_match(struct sk_buff *skb,  					    struct bonding *bond)  {  	if (bond_is_slave_inactive(slave)) { -		if (bond->params.mode == BOND_MODE_ALB && +		if (BOND_MODE(bond) == BOND_MODE_ALB &&  		    skb->pkt_type != PACKET_BROADCAST &&  		    skb->pkt_type != PACKET_MULTICAST)  			return false; @@ -1199,9 +1120,6 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb)  	slave = bond_slave_get_rcu(skb->dev);  	bond = slave->bond; -	if (bond->params.arp_interval) -		slave->dev->last_rx = jiffies; -  	recv_probe = ACCESS_ONCE(bond->recv_probe);  	if (recv_probe) {  		ret = recv_probe(skb, bond, slave); @@ -1217,7 +1135,7 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb)  	skb->dev = bond->dev; -	if (bond->params.mode == BOND_MODE_ALB && +	if (BOND_MODE(bond) == BOND_MODE_ALB &&  	    bond->dev->priv_flags & IFF_BRIDGE_PORT &&  	    skb->pkt_type == PACKET_HOST) { @@ -1226,22 +1144,23 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb)  			kfree_skb(skb);  			return RX_HANDLER_CONSUMED;  		} -		memcpy(eth_hdr(skb)->h_dest, bond->dev->dev_addr, ETH_ALEN); +		ether_addr_copy(eth_hdr(skb)->h_dest, bond->dev->dev_addr);  	}  	return ret;  }  static int bond_master_upper_dev_link(struct net_device *bond_dev, -				      struct net_device *slave_dev) +				      struct net_device *slave_dev, +				      struct slave *slave)  {  	int err; -	err = netdev_master_upper_dev_link(slave_dev, bond_dev); +	err = netdev_master_upper_dev_link_private(slave_dev, bond_dev, slave);  	if (err)  		return err;  	slave_dev->flags |= IFF_SLAVE; -	rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE); +	rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL);  	return 0;  } @@ -1250,7 +1169,36 @@ static void bond_upper_dev_unlink(struct net_device *bond_dev,  {  	netdev_upper_dev_unlink(slave_dev, bond_dev);  	slave_dev->flags &= ~IFF_SLAVE; -	rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE); +	rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL); +} + +static struct slave *bond_alloc_slave(struct bonding *bond) +{ +	struct slave *slave = NULL; + +	slave = kzalloc(sizeof(struct slave), GFP_KERNEL); +	if (!slave) +		return NULL; + +	if (BOND_MODE(bond) == BOND_MODE_8023AD) { +		SLAVE_AD_INFO(slave) = kzalloc(sizeof(struct ad_slave_info), +					       GFP_KERNEL); +		if (!SLAVE_AD_INFO(slave)) { +			kfree(slave); +			return NULL; +		} +	} +	return slave; +} + +static void bond_free_slave(struct slave *slave) +{ +	struct bonding *bond = bond_get_bond_by_slave(slave); + +	if (BOND_MODE(bond) == BOND_MODE_8023AD) +		kfree(SLAVE_AD_INFO(slave)); + +	kfree(slave);  }  /* enslave device <slave> to bond device <master> */ @@ -1258,7 +1206,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  {  	struct bonding *bond = netdev_priv(bond_dev);  	const struct net_device_ops *slave_ops = slave_dev->netdev_ops; -	struct slave *new_slave = NULL; +	struct slave *new_slave = NULL, *prev_slave;  	struct sockaddr addr;  	int link_reporting;  	int res = 0, i; @@ -1266,16 +1214,21 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  	if (!bond->params.use_carrier &&  	    slave_dev->ethtool_ops->get_link == NULL &&  	    slave_ops->ndo_do_ioctl == NULL) { -		pr_warning("%s: Warning: no link monitoring support for %s\n", -			   bond_dev->name, slave_dev->name); +		pr_warn("%s: Warning: no link monitoring support for %s\n", +			bond_dev->name, slave_dev->name);  	}  	/* already enslaved */  	if (slave_dev->flags & IFF_SLAVE) { -		pr_debug("Error, Device was already enslaved\n"); +		pr_debug("Error: Device was already enslaved\n");  		return -EBUSY;  	} +	if (bond_dev == slave_dev) { +		pr_err("%s: cannot enslave bond to itself.\n", bond_dev->name); +		return -EPERM; +	} +  	/* vlan challenged mutual exclusion */  	/* no need to lock since we're protected by rtnl_lock */  	if (slave_dev->features & NETIF_F_VLAN_CHALLENGED) { @@ -1285,9 +1238,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  			       bond_dev->name, slave_dev->name, bond_dev->name);  			return -EPERM;  		} else { -			pr_warning("%s: Warning: enslaved VLAN challenged slave %s. Adding VLANs will be blocked as long as %s is part of bond %s\n", -				   bond_dev->name, slave_dev->name, -				   slave_dev->name, bond_dev->name); +			pr_warn("%s: Warning: enslaved VLAN challenged slave %s. Adding VLANs will be blocked as long as %s is part of bond %s\n", +				bond_dev->name, slave_dev->name, +				slave_dev->name, bond_dev->name);  		}  	} else {  		pr_debug("%s: ! NETIF_F_VLAN_CHALLENGED\n", slave_dev->name); @@ -1300,7 +1253,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  	 * enslaving it; the old ifenslave will not.  	 */  	if ((slave_dev->flags & IFF_UP)) { -		pr_err("%s is up. This may be due to an out of date ifenslave.\n", +		pr_err("%s is up - this may be due to an out of date ifenslave\n",  		       slave_dev->name);  		res = -EPERM;  		goto err_undo_flags; @@ -1313,7 +1266,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  	 * bond ether type mutual exclusion - don't allow slaves of dissimilar  	 * ether type (eg ARPHRD_ETHER and ARPHRD_INFINIBAND) share the same bond  	 */ -	if (list_empty(&bond->slave_list)) { +	if (!bond_has_slaves(bond)) {  		if (bond_dev->type != slave_dev->type) {  			pr_debug("%s: change device type from %d to %d\n",  				 bond_dev->name, @@ -1344,20 +1297,23 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  						 bond_dev);  		}  	} else if (bond_dev->type != slave_dev->type) { -		pr_err("%s ether type (%d) is different from other slaves (%d), can not enslave it.\n", -		       slave_dev->name, -		       slave_dev->type, bond_dev->type); +		pr_err("%s ether type (%d) is different from other slaves (%d), can not enslave it\n", +		       slave_dev->name, slave_dev->type, bond_dev->type);  		res = -EINVAL;  		goto err_undo_flags;  	}  	if (slave_ops->ndo_set_mac_address == NULL) { -		if (list_empty(&bond->slave_list)) { -			pr_warning("%s: Warning: The first slave device specified does not support setting the MAC address. Setting fail_over_mac to active.", -				   bond_dev->name); -			bond->params.fail_over_mac = BOND_FOM_ACTIVE; +		if (!bond_has_slaves(bond)) { +			pr_warn("%s: Warning: The first slave device specified does not support setting the MAC address\n", +				bond_dev->name); +			if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) { +				bond->params.fail_over_mac = BOND_FOM_ACTIVE; +				pr_warn("%s: Setting fail_over_mac to active for active-backup mode\n", +					bond_dev->name); +			}  		} else if (bond->params.fail_over_mac != BOND_FOM_ACTIVE) { -			pr_err("%s: Error: The slave device specified does not support setting the MAC address, but fail_over_mac is not set to active.\n", +			pr_err("%s: Error: The slave device specified does not support setting the MAC address, but fail_over_mac is not set to active\n",  			       bond_dev->name);  			res = -EOPNOTSUPP;  			goto err_undo_flags; @@ -1368,16 +1324,18 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  	/* If this is the first slave, then we need to set the master's hardware  	 * address to be the same as the slave's. */ -	if (list_empty(&bond->slave_list) && +	if (!bond_has_slaves(bond) &&  	    bond->dev->addr_assign_type == NET_ADDR_RANDOM)  		bond_set_dev_addr(bond->dev, slave_dev); -	new_slave = kzalloc(sizeof(struct slave), GFP_KERNEL); +	new_slave = bond_alloc_slave(bond);  	if (!new_slave) {  		res = -ENOMEM;  		goto err_undo_flags;  	} -	INIT_LIST_HEAD(&new_slave->list); + +	new_slave->bond = bond; +	new_slave->dev = slave_dev;  	/*  	 * Set the new_slave's queue_id to be zero.  Queue ID mapping  	 * is set via sysfs or module option if desired. @@ -1397,9 +1355,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  	 * that need it, and for restoring it upon release, and then  	 * set it to the master's address  	 */ -	memcpy(new_slave->perm_hwaddr, slave_dev->dev_addr, ETH_ALEN); +	ether_addr_copy(new_slave->perm_hwaddr, slave_dev->dev_addr); -	if (!bond->params.fail_over_mac) { +	if (!bond->params.fail_over_mac || +	    BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {  		/*  		 * Set slave to master's mac address.  The application already  		 * set the master's mac address to that of the first slave @@ -1413,21 +1372,13 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  		}  	} -	res = bond_master_upper_dev_link(bond_dev, slave_dev); -	if (res) { -		pr_debug("Error %d calling bond_master_upper_dev_link\n", res); -		goto err_restore_mac; -	} -  	/* open the slave since the application closed it */  	res = dev_open(slave_dev);  	if (res) {  		pr_debug("Opening slave %s failed\n", slave_dev->name); -		goto err_unset_master; +		goto err_restore_mac;  	} -	new_slave->bond = bond; -	new_slave->dev = slave_dev;  	slave_dev->priv_flags |= IFF_BONDING;  	if (bond_is_lb(bond)) { @@ -1439,10 +1390,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  			goto err_close;  	} -	/* If the mode USES_PRIMARY, then the following is handled by +	/* If the mode uses primary, then the following is handled by  	 * bond_change_active_slave().  	 */ -	if (!USES_PRIMARY(bond->params.mode)) { +	if (!bond_uses_primary(bond)) {  		/* set promiscuity level to new slave */  		if (bond_dev->flags & IFF_PROMISC) {  			res = dev_set_promiscuity(slave_dev, 1); @@ -1465,7 +1416,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  		netif_addr_unlock_bh(bond_dev);  	} -	if (bond->params.mode == BOND_MODE_8023AD) { +	if (BOND_MODE(bond) == BOND_MODE_8023AD) {  		/* add lacpdu mc addr to mc list */  		u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; @@ -1479,25 +1430,17 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  		goto err_close;  	} -	write_lock_bh(&bond->lock); - -	bond_attach_slave(bond, new_slave); +	prev_slave = bond_last_slave(bond);  	new_slave->delay = 0;  	new_slave->link_failure_count = 0; -	write_unlock_bh(&bond->lock); - -	bond_compute_features(bond); -  	bond_update_speed_duplex(new_slave); -	read_lock(&bond->lock); - -	new_slave->last_arp_rx = jiffies - +	new_slave->last_rx = jiffies -  		(msecs_to_jiffies(bond->params.arp_interval) + 1);  	for (i = 0; i < BOND_MAX_ARP_TARGETS; i++) -		new_slave->target_last_arp_rx[i] = new_slave->last_arp_rx; +		new_slave->target_last_arp_rx[i] = new_slave->last_rx;  	if (bond->params.miimon && !bond->params.use_carrier) {  		link_reporting = bond_check_dev_link(bond, slave_dev, 1); @@ -1512,12 +1455,12 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  			 * supported); thus, we don't need to change  			 * the messages for netif_carrier.  			 */ -			pr_warning("%s: Warning: MII and ETHTOOL support not available for interface %s, and arp_interval/arp_ip_target module parameters not specified, thus bonding will not detect link failures! see bonding.txt for details.\n", -			       bond_dev->name, slave_dev->name); +			pr_warn("%s: Warning: MII and ETHTOOL support not available for interface %s, and arp_interval/arp_ip_target module parameters not specified, thus bonding will not detect link failures! see bonding.txt for details\n", +				bond_dev->name, slave_dev->name);  		} else if (link_reporting == -1) {  			/* unable get link status using mii/ethtool */ -			pr_warning("%s: Warning: can't get link status from interface %s; the network driver associated with this interface does not support MII or ETHTOOL link status reporting, thus miimon has no effect on this interface.\n", -				   bond_dev->name, slave_dev->name); +			pr_warn("%s: Warning: can't get link status from interface %s; the network driver associated with this interface does not support MII or ETHTOOL link status reporting, thus miimon has no effect on this interface\n", +				bond_dev->name, slave_dev->name);  		}  	} @@ -1541,12 +1484,12 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  	}  	if (new_slave->link != BOND_LINK_DOWN) -		new_slave->jiffies = jiffies; +		new_slave->last_link_up = jiffies;  	pr_debug("Initial state of slave_dev is BOND_LINK_%s\n", -		new_slave->link == BOND_LINK_DOWN ? "DOWN" : -			(new_slave->link == BOND_LINK_UP ? "UP" : "BACK")); +		 new_slave->link == BOND_LINK_DOWN ? "DOWN" : +		 (new_slave->link == BOND_LINK_UP ? "UP" : "BACK")); -	if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) { +	if (bond_uses_primary(bond) && bond->params.primary[0]) {  		/* if there is a primary slave, remember it */  		if (strcmp(bond->params.primary, new_slave->dev->name) == 0) {  			bond->primary_slave = new_slave; @@ -1554,32 +1497,27 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  		}  	} -	write_lock_bh(&bond->curr_slave_lock); - -	switch (bond->params.mode) { +	switch (BOND_MODE(bond)) {  	case BOND_MODE_ACTIVEBACKUP: -		bond_set_slave_inactive_flags(new_slave); -		bond_select_active_slave(bond); +		bond_set_slave_inactive_flags(new_slave, +					      BOND_SLAVE_NOTIFY_NOW);  		break;  	case BOND_MODE_8023AD:  		/* in 802.3ad mode, the internal mechanism  		 * will activate the slaves in the selected  		 * aggregator  		 */ -		bond_set_slave_inactive_flags(new_slave); +		bond_set_slave_inactive_flags(new_slave, BOND_SLAVE_NOTIFY_NOW);  		/* if this is the first slave */ -		if (bond_first_slave(bond) == new_slave) { -			SLAVE_AD_INFO(new_slave).id = 1; +		if (!prev_slave) { +			SLAVE_AD_INFO(new_slave)->id = 1;  			/* Initialize AD with the number of times that the AD timer is called in 1 second  			 * can be called only after the mac address of the bond is set  			 */  			bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL);  		} else { -			struct slave *prev_slave; - -			prev_slave = bond_prev_slave(bond, new_slave); -			SLAVE_AD_INFO(new_slave).id = -				SLAVE_AD_INFO(prev_slave).id + 1; +			SLAVE_AD_INFO(new_slave)->id = +				SLAVE_AD_INFO(prev_slave)->id + 1;  		}  		bond_3ad_bind_slave(new_slave); @@ -1587,8 +1525,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  	case BOND_MODE_TLB:  	case BOND_MODE_ALB:  		bond_set_active_slave(new_slave); -		bond_set_slave_inactive_flags(new_slave); -		bond_select_active_slave(bond); +		bond_set_slave_inactive_flags(new_slave, BOND_SLAVE_NOTIFY_NOW);  		break;  	default:  		pr_debug("This slave is always active in trunk mode\n"); @@ -1606,68 +1543,78 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  		break;  	} /* switch(bond_mode) */ -	write_unlock_bh(&bond->curr_slave_lock); - -	bond_set_carrier(bond); -  #ifdef CONFIG_NET_POLL_CONTROLLER  	slave_dev->npinfo = bond->dev->npinfo;  	if (slave_dev->npinfo) {  		if (slave_enable_netpoll(new_slave)) { -			read_unlock(&bond->lock); -			pr_info("Error, %s: master_dev is using netpoll, " -				 "but new slave device does not support netpoll.\n", -				 bond_dev->name); +			pr_info("Error, %s: master_dev is using netpoll, but new slave device does not support netpoll\n", +				bond_dev->name);  			res = -EBUSY;  			goto err_detach;  		}  	}  #endif -	read_unlock(&bond->lock); - -	res = bond_create_slave_symlinks(bond_dev, slave_dev); -	if (res) -		goto err_detach; -  	res = netdev_rx_handler_register(slave_dev, bond_handle_frame,  					 new_slave);  	if (res) {  		pr_debug("Error %d calling netdev_rx_handler_register\n", res); -		goto err_dest_symlinks; +		goto err_detach; +	} + +	res = bond_master_upper_dev_link(bond_dev, slave_dev, new_slave); +	if (res) { +		pr_debug("Error %d calling bond_master_upper_dev_link\n", res); +		goto err_unregister;  	} -	pr_info("%s: enslaving %s as a%s interface with a%s link.\n", +	res = bond_sysfs_slave_add(new_slave); +	if (res) { +		pr_debug("Error %d calling bond_sysfs_slave_add\n", res); +		goto err_upper_unlink; +	} + +	bond->slave_cnt++; +	bond_compute_features(bond); +	bond_set_carrier(bond); + +	if (bond_uses_primary(bond)) { +		block_netpoll_tx(); +		write_lock_bh(&bond->curr_slave_lock); +		bond_select_active_slave(bond); +		write_unlock_bh(&bond->curr_slave_lock); +		unblock_netpoll_tx(); +	} + +	pr_info("%s: Enslaving %s as %s interface with %s link\n",  		bond_dev->name, slave_dev->name, -		bond_is_active_slave(new_slave) ? "n active" : " backup", -		new_slave->link != BOND_LINK_DOWN ? "n up" : " down"); +		bond_is_active_slave(new_slave) ? "an active" : "a backup", +		new_slave->link != BOND_LINK_DOWN ? "an up" : "a down");  	/* enslave is successful */  	return 0;  /* Undo stages on error */ -err_dest_symlinks: -	bond_destroy_slave_symlinks(bond_dev, slave_dev); +err_upper_unlink: +	bond_upper_dev_unlink(bond_dev, slave_dev); + +err_unregister: +	netdev_rx_handler_unregister(slave_dev);  err_detach: -	if (!USES_PRIMARY(bond->params.mode)) +	if (!bond_uses_primary(bond))  		bond_hw_addr_flush(bond_dev, slave_dev);  	vlan_vids_del_by_dev(slave_dev, bond_dev); -	write_lock_bh(&bond->lock); -	bond_detach_slave(bond, new_slave);  	if (bond->primary_slave == new_slave)  		bond->primary_slave = NULL;  	if (bond->curr_active_slave == new_slave) { -		bond_change_active_slave(bond, NULL); -		write_unlock_bh(&bond->lock); -		read_lock(&bond->lock); +		block_netpoll_tx();  		write_lock_bh(&bond->curr_slave_lock); +		bond_change_active_slave(bond, NULL);  		bond_select_active_slave(bond);  		write_unlock_bh(&bond->curr_slave_lock); -		read_unlock(&bond->lock); -	} else { -		write_unlock_bh(&bond->lock); +		unblock_netpoll_tx();  	}  	slave_disable_netpoll(new_slave); @@ -1675,16 +1622,14 @@ err_close:  	slave_dev->priv_flags &= ~IFF_BONDING;  	dev_close(slave_dev); -err_unset_master: -	bond_upper_dev_unlink(bond_dev, slave_dev); -  err_restore_mac: -	if (!bond->params.fail_over_mac) { +	if (!bond->params.fail_over_mac || +	    BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {  		/* XXX TODO - fom follow mode needs to change master's  		 * MAC if this slave's MAC is in use by the bond, or at  		 * least print a warning.  		 */ -		memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN); +		ether_addr_copy(addr.sa_data, new_slave->perm_hwaddr);  		addr.sa_family = slave_dev->type;  		dev_set_mac_address(slave_dev, &addr);  	} @@ -1693,13 +1638,12 @@ err_restore_mtu:  	dev_set_mtu(slave_dev, new_slave->original_mtu);  err_free: -	kfree(new_slave); +	bond_free_slave(new_slave);  err_undo_flags: -	bond_compute_features(bond);  	/* Enslave of first slave has failed and we need to fix master's mac */ -	if (list_empty(&bond->slave_list) && -	    ether_addr_equal(bond_dev->dev_addr, slave_dev->dev_addr)) +	if (!bond_has_slaves(bond) && +	    ether_addr_equal_64bits(bond_dev->dev_addr, slave_dev->dev_addr))  		eth_hw_addr_random(bond_dev);  	return res; @@ -1724,30 +1668,31 @@ static int __bond_release_one(struct net_device *bond_dev,  	struct bonding *bond = netdev_priv(bond_dev);  	struct slave *slave, *oldcurrent;  	struct sockaddr addr; +	int old_flags = bond_dev->flags;  	netdev_features_t old_features = bond_dev->features;  	/* slave is not a slave or master is not master of this slave */  	if (!(slave_dev->flags & IFF_SLAVE) ||  	    !netdev_has_upper_dev(slave_dev, bond_dev)) { -		pr_err("%s: Error: cannot release %s.\n", +		pr_err("%s: Error: cannot release %s\n",  		       bond_dev->name, slave_dev->name);  		return -EINVAL;  	}  	block_netpoll_tx(); -	write_lock_bh(&bond->lock);  	slave = bond_get_slave_by_dev(bond, slave_dev);  	if (!slave) {  		/* not a slave of this bond */  		pr_info("%s: %s not enslaved\n",  			bond_dev->name, slave_dev->name); -		write_unlock_bh(&bond->lock);  		unblock_netpoll_tx();  		return -EINVAL;  	} -	write_unlock_bh(&bond->lock); +	bond_sysfs_slave_del(slave); + +	bond_upper_dev_unlink(bond_dev, slave_dev);  	/* unregister rx_handler early so bond_handle_frame wouldn't be called  	 * for this slave anymore.  	 */ @@ -1755,14 +1700,12 @@ static int __bond_release_one(struct net_device *bond_dev,  	write_lock_bh(&bond->lock);  	/* Inform AD package of unbinding of slave. */ -	if (bond->params.mode == BOND_MODE_8023AD) { -		/* must be called before the slave is -		 * detached from the list -		 */ +	if (BOND_MODE(bond) == BOND_MODE_8023AD)  		bond_3ad_unbind_slave(slave); -	} -	pr_info("%s: releasing %s interface %s\n", +	write_unlock_bh(&bond->lock); + +	pr_info("%s: Releasing %s interface %s\n",  		bond_dev->name,  		bond_is_active_slave(slave) ? "active" : "backup",  		slave_dev->name); @@ -1771,23 +1714,24 @@ static int __bond_release_one(struct net_device *bond_dev,  	bond->current_arp_slave = NULL; -	/* release the slave from its bond */ -	bond_detach_slave(bond, slave); - -	if (!all && !bond->params.fail_over_mac) { -		if (ether_addr_equal(bond_dev->dev_addr, slave->perm_hwaddr) && -		    !list_empty(&bond->slave_list)) -			pr_warn("%s: Warning: the permanent HWaddr of %s - %pM - is still in use by %s. Set the HWaddr of %s to a different address to avoid conflicts.\n", -				   bond_dev->name, slave_dev->name, -				   slave->perm_hwaddr, -				   bond_dev->name, slave_dev->name); +	if (!all && (!bond->params.fail_over_mac || +		     BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP)) { +		if (ether_addr_equal_64bits(bond_dev->dev_addr, slave->perm_hwaddr) && +		    bond_has_slaves(bond)) +			pr_warn("%s: Warning: the permanent HWaddr of %s - %pM - is still in use by %s - set the HWaddr of %s to a different address to avoid conflicts\n", +				bond_dev->name, slave_dev->name, +				slave->perm_hwaddr, +				bond_dev->name, slave_dev->name);  	}  	if (bond->primary_slave == slave)  		bond->primary_slave = NULL; -	if (oldcurrent == slave) +	if (oldcurrent == slave) { +		write_lock_bh(&bond->curr_slave_lock);  		bond_change_active_slave(bond, NULL); +		write_unlock_bh(&bond->curr_slave_lock); +	}  	if (bond_is_lb(bond)) {  		/* Must be called only after the slave has been @@ -1795,47 +1739,41 @@ static int __bond_release_one(struct net_device *bond_dev,  		 * has been cleared (if our_slave == old_current),  		 * but before a new active slave is selected.  		 */ -		write_unlock_bh(&bond->lock);  		bond_alb_deinit_slave(bond, slave); -		write_lock_bh(&bond->lock);  	}  	if (all) { -		rcu_assign_pointer(bond->curr_active_slave, NULL); +		RCU_INIT_POINTER(bond->curr_active_slave, NULL);  	} else if (oldcurrent == slave) {  		/*  		 * Note that we hold RTNL over this sequence, so there  		 * is no concern that another slave add/remove event  		 * will interfere.  		 */ -		write_unlock_bh(&bond->lock); -		read_lock(&bond->lock);  		write_lock_bh(&bond->curr_slave_lock);  		bond_select_active_slave(bond);  		write_unlock_bh(&bond->curr_slave_lock); -		read_unlock(&bond->lock); -		write_lock_bh(&bond->lock);  	} -	if (list_empty(&bond->slave_list)) { +	if (!bond_has_slaves(bond)) {  		bond_set_carrier(bond);  		eth_hw_addr_random(bond_dev);  		if (vlan_uses_dev(bond_dev)) { -			pr_warning("%s: Warning: clearing HW address of %s while it still has VLANs.\n", -				   bond_dev->name, bond_dev->name); -			pr_warning("%s: When re-adding slaves, make sure the bond's HW address matches its VLANs'.\n", -				   bond_dev->name); +			pr_warn("%s: Warning: clearing HW address of %s while it still has VLANs\n", +				bond_dev->name, bond_dev->name); +			pr_warn("%s: When re-adding slaves, make sure the bond's HW address matches its VLANs\n", +				bond_dev->name);  		}  	} -	write_unlock_bh(&bond->lock);  	unblock_netpoll_tx();  	synchronize_rcu(); +	bond->slave_cnt--; -	if (list_empty(&bond->slave_list)) { +	if (!bond_has_slaves(bond)) {  		call_netdevice_notifiers(NETDEV_CHANGEADDR, bond->dev);  		call_netdevice_notifiers(NETDEV_RELEASE, bond->dev);  	} @@ -1843,39 +1781,42 @@ static int __bond_release_one(struct net_device *bond_dev,  	bond_compute_features(bond);  	if (!(bond_dev->features & NETIF_F_VLAN_CHALLENGED) &&  	    (old_features & NETIF_F_VLAN_CHALLENGED)) -		pr_info("%s: last VLAN challenged slave %s left bond %s. VLAN blocking is removed\n", +		pr_info("%s: last VLAN challenged slave %s left bond %s - VLAN blocking is removed\n",  			bond_dev->name, slave_dev->name, bond_dev->name);  	/* must do this from outside any spinlocks */ -	bond_destroy_slave_symlinks(bond_dev, slave_dev); -  	vlan_vids_del_by_dev(slave_dev, bond_dev); -	/* If the mode USES_PRIMARY, then this cases was handled above by +	/* If the mode uses primary, then this cases was handled above by  	 * bond_change_active_slave(..., NULL)  	 */ -	if (!USES_PRIMARY(bond->params.mode)) { -		/* unset promiscuity level from slave */ -		if (bond_dev->flags & IFF_PROMISC) +	if (!bond_uses_primary(bond)) { +		/* unset promiscuity level from slave +		 * NOTE: The NETDEV_CHANGEADDR call above may change the value +		 * of the IFF_PROMISC flag in the bond_dev, but we need the +		 * value of that flag before that change, as that was the value +		 * when this slave was attached, so we cache at the start of the +		 * function and use it here. Same goes for ALLMULTI below +		 */ +		if (old_flags & IFF_PROMISC)  			dev_set_promiscuity(slave_dev, -1);  		/* unset allmulti level from slave */ -		if (bond_dev->flags & IFF_ALLMULTI) +		if (old_flags & IFF_ALLMULTI)  			dev_set_allmulti(slave_dev, -1);  		bond_hw_addr_flush(bond_dev, slave_dev);  	} -	bond_upper_dev_unlink(bond_dev, slave_dev); -  	slave_disable_netpoll(slave);  	/* close slave before restoring its mac address */  	dev_close(slave_dev); -	if (bond->params.fail_over_mac != BOND_FOM_ACTIVE) { +	if (bond->params.fail_over_mac != BOND_FOM_ACTIVE || +	    BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {  		/* restore original ("permanent") mac address */ -		memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); +		ether_addr_copy(addr.sa_data, slave->perm_hwaddr);  		addr.sa_family = slave_dev->type;  		dev_set_mac_address(slave_dev, &addr);  	} @@ -1884,7 +1825,7 @@ static int __bond_release_one(struct net_device *bond_dev,  	slave_dev->priv_flags &= ~IFF_BONDING; -	kfree(slave); +	bond_free_slave(slave);  	return 0;  /* deletion OK */  } @@ -1906,80 +1847,23 @@ static int  bond_release_and_destroy(struct net_device *bond_dev,  	int ret;  	ret = bond_release(bond_dev, slave_dev); -	if (ret == 0 && list_empty(&bond->slave_list)) { +	if (ret == 0 && !bond_has_slaves(bond)) {  		bond_dev->priv_flags |= IFF_DISABLE_NETPOLL; -		pr_info("%s: destroying bond %s.\n", +		pr_info("%s: Destroying bond %s\n",  			bond_dev->name, bond_dev->name);  		unregister_netdevice(bond_dev);  	}  	return ret;  } -/* - * This function changes the active slave to slave <slave_dev>. - * It returns -EINVAL in the following cases. - *  - <slave_dev> is not found in the list. - *  - There is not active slave now. - *  - <slave_dev> is already active. - *  - The link state of <slave_dev> is not BOND_LINK_UP. - *  - <slave_dev> is not running. - * In these cases, this function does nothing. - * In the other cases, current_slave pointer is changed and 0 is returned. - */ -static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_device *slave_dev) -{ -	struct bonding *bond = netdev_priv(bond_dev); -	struct slave *old_active = NULL; -	struct slave *new_active = NULL; -	int res = 0; - -	if (!USES_PRIMARY(bond->params.mode)) -		return -EINVAL; - -	/* Verify that bond_dev is indeed the master of slave_dev */ -	if (!(slave_dev->flags & IFF_SLAVE) || -	    !netdev_has_upper_dev(slave_dev, bond_dev)) -		return -EINVAL; - -	read_lock(&bond->lock); - -	old_active = bond->curr_active_slave; -	new_active = bond_get_slave_by_dev(bond, slave_dev); -	/* -	 * Changing to the current active: do nothing; return success. -	 */ -	if (new_active && new_active == old_active) { -		read_unlock(&bond->lock); -		return 0; -	} - -	if (new_active && -	    old_active && -	    new_active->link == BOND_LINK_UP && -	    IS_UP(new_active->dev)) { -		block_netpoll_tx(); -		write_lock_bh(&bond->curr_slave_lock); -		bond_change_active_slave(bond, new_active); -		write_unlock_bh(&bond->curr_slave_lock); -		unblock_netpoll_tx(); -	} else -		res = -EINVAL; - -	read_unlock(&bond->lock); - -	return res; -} -  static int bond_info_query(struct net_device *bond_dev, struct ifbond *info)  {  	struct bonding *bond = netdev_priv(bond_dev); -	info->bond_mode = bond->params.mode; +	info->bond_mode = BOND_MODE(bond);  	info->miimon = bond->params.miimon; -	read_lock(&bond->lock);  	info->num_slaves = bond->slave_cnt; -	read_unlock(&bond->lock);  	return 0;  } @@ -1987,11 +1871,11 @@ static int bond_info_query(struct net_device *bond_dev, struct ifbond *info)  static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *info)  {  	struct bonding *bond = netdev_priv(bond_dev); +	struct list_head *iter;  	int i = 0, res = -ENODEV;  	struct slave *slave; -	read_lock(&bond->lock); -	bond_for_each_slave(bond, slave) { +	bond_for_each_slave(bond, slave, iter) {  		if (i++ == (int)info->slave_id) {  			res = 0;  			strcpy(info->slave_name, slave->dev->name); @@ -2001,7 +1885,6 @@ static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *in  			break;  		}  	} -	read_unlock(&bond->lock);  	return res;  } @@ -2012,12 +1895,13 @@ static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *in  static int bond_miimon_inspect(struct bonding *bond)  {  	int link_state, commit = 0; +	struct list_head *iter;  	struct slave *slave;  	bool ignore_updelay;  	ignore_updelay = !bond->curr_active_slave ? true : false; -	bond_for_each_slave(bond, slave) { +	bond_for_each_slave_rcu(bond, slave, iter) {  		slave->new_link = BOND_LINK_NOCHANGE;  		link_state = bond_check_dev_link(bond, slave->dev, 0); @@ -2030,9 +1914,9 @@ static int bond_miimon_inspect(struct bonding *bond)  			slave->link = BOND_LINK_FAIL;  			slave->delay = bond->params.downdelay;  			if (slave->delay) { -				pr_info("%s: link status down for %sinterface %s, disabling it in %d ms.\n", +				pr_info("%s: link status down for %sinterface %s, disabling it in %d ms\n",  					bond->dev->name, -					(bond->params.mode == +					(BOND_MODE(bond) ==  					 BOND_MODE_ACTIVEBACKUP) ?  					(bond_is_active_slave(slave) ?  					 "active " : "backup ") : "", @@ -2046,8 +1930,8 @@ static int bond_miimon_inspect(struct bonding *bond)  				 * recovered before downdelay expired  				 */  				slave->link = BOND_LINK_UP; -				slave->jiffies = jiffies; -				pr_info("%s: link status up again after %d ms for interface %s.\n", +				slave->last_link_up = jiffies; +				pr_info("%s: link status up again after %d ms for interface %s\n",  					bond->dev->name,  					(bond->params.downdelay - slave->delay) *  					bond->params.miimon, @@ -2072,7 +1956,7 @@ static int bond_miimon_inspect(struct bonding *bond)  			slave->delay = bond->params.updelay;  			if (slave->delay) { -				pr_info("%s: link status up for interface %s, enabling it in %d ms.\n", +				pr_info("%s: link status up for interface %s, enabling it in %d ms\n",  					bond->dev->name, slave->dev->name,  					ignore_updelay ? 0 :  					bond->params.updelay * @@ -2082,7 +1966,7 @@ static int bond_miimon_inspect(struct bonding *bond)  		case BOND_LINK_BACK:  			if (!link_state) {  				slave->link = BOND_LINK_DOWN; -				pr_info("%s: link status down again after %d ms for interface %s.\n", +				pr_info("%s: link status down again after %d ms for interface %s\n",  					bond->dev->name,  					(bond->params.updelay - slave->delay) *  					bond->params.miimon, @@ -2111,21 +1995,22 @@ static int bond_miimon_inspect(struct bonding *bond)  static void bond_miimon_commit(struct bonding *bond)  { +	struct list_head *iter;  	struct slave *slave; -	bond_for_each_slave(bond, slave) { +	bond_for_each_slave(bond, slave, iter) {  		switch (slave->new_link) {  		case BOND_LINK_NOCHANGE:  			continue;  		case BOND_LINK_UP:  			slave->link = BOND_LINK_UP; -			slave->jiffies = jiffies; +			slave->last_link_up = jiffies; -			if (bond->params.mode == BOND_MODE_8023AD) { +			if (BOND_MODE(bond) == BOND_MODE_8023AD) {  				/* prevent it from being the active one */  				bond_set_backup_slave(slave); -			} else if (bond->params.mode != BOND_MODE_ACTIVEBACKUP) { +			} else if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {  				/* make it immediately active */  				bond_set_active_slave(slave);  			} else if (slave != bond->primary_slave) { @@ -2133,13 +2018,13 @@ static void bond_miimon_commit(struct bonding *bond)  				bond_set_backup_slave(slave);  			} -			pr_info("%s: link status definitely up for interface %s, %u Mbps %s duplex.\n", +			pr_info("%s: link status definitely up for interface %s, %u Mbps %s duplex\n",  				bond->dev->name, slave->dev->name,  				slave->speed == SPEED_UNKNOWN ? 0 : slave->speed,  				slave->duplex ? "full" : "half");  			/* notify ad that the link status has changed */ -			if (bond->params.mode == BOND_MODE_8023AD) +			if (BOND_MODE(bond) == BOND_MODE_8023AD)  				bond_3ad_handle_link_change(slave, BOND_LINK_UP);  			if (bond_is_lb(bond)) @@ -2158,14 +2043,15 @@ static void bond_miimon_commit(struct bonding *bond)  			slave->link = BOND_LINK_DOWN; -			if (bond->params.mode == BOND_MODE_ACTIVEBACKUP || -			    bond->params.mode == BOND_MODE_8023AD) -				bond_set_slave_inactive_flags(slave); +			if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP || +			    BOND_MODE(bond) == BOND_MODE_8023AD) +				bond_set_slave_inactive_flags(slave, +							      BOND_SLAVE_NOTIFY_NOW);  			pr_info("%s: link status definitely down for interface %s, disabling it\n",  				bond->dev->name, slave->dev->name); -			if (bond->params.mode == BOND_MODE_8023AD) +			if (BOND_MODE(bond) == BOND_MODE_8023AD)  				bond_3ad_handle_link_change(slave,  							    BOND_LINK_DOWN); @@ -2207,48 +2093,42 @@ do_failover:   * an acquisition of appropriate locks followed by a commit phase to   * implement whatever link state changes are indicated.   */ -void bond_mii_monitor(struct work_struct *work) +static void bond_mii_monitor(struct work_struct *work)  {  	struct bonding *bond = container_of(work, struct bonding,  					    mii_work.work);  	bool should_notify_peers = false;  	unsigned long delay; -	read_lock(&bond->lock); -  	delay = msecs_to_jiffies(bond->params.miimon); -	if (list_empty(&bond->slave_list)) +	if (!bond_has_slaves(bond))  		goto re_arm; +	rcu_read_lock(); +  	should_notify_peers = bond_should_notify_peers(bond);  	if (bond_miimon_inspect(bond)) { -		read_unlock(&bond->lock); +		rcu_read_unlock();  		/* Race avoidance with bond_close cancel of workqueue */  		if (!rtnl_trylock()) { -			read_lock(&bond->lock);  			delay = 1;  			should_notify_peers = false;  			goto re_arm;  		} -		read_lock(&bond->lock); -  		bond_miimon_commit(bond); -		read_unlock(&bond->lock);  		rtnl_unlock();	/* might sleep, hold no other locks */ -		read_lock(&bond->lock); -	} +	} else +		rcu_read_unlock();  re_arm:  	if (bond->params.miimon)  		queue_delayed_work(bond->wq, &bond->mii_work, delay); -	read_unlock(&bond->lock); -  	if (should_notify_peers) {  		if (!rtnl_trylock())  			return; @@ -2267,7 +2147,7 @@ static bool bond_has_this_ip(struct bonding *bond, __be32 ip)  		return true;  	rcu_read_lock(); -	netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) { +	netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) {  		if (ip == bond_confirm_addr(upper, 0, ip)) {  			ret = true;  			break; @@ -2283,93 +2163,125 @@ static bool bond_has_this_ip(struct bonding *bond, __be32 ip)   * switches in VLAN mode (especially if ports are configured as   * "native" to a VLAN) might not pass non-tagged frames.   */ -static void bond_arp_send(struct net_device *slave_dev, int arp_op, __be32 dest_ip, __be32 src_ip, unsigned short vlan_id) +static void bond_arp_send(struct net_device *slave_dev, int arp_op, +			  __be32 dest_ip, __be32 src_ip, +			  struct bond_vlan_tag *tags)  {  	struct sk_buff *skb; +	int i; -	pr_debug("arp %d on slave %s: dst %pI4 src %pI4 vid %d\n", arp_op, -		 slave_dev->name, &dest_ip, &src_ip, vlan_id); +	pr_debug("arp %d on slave %s: dst %pI4 src %pI4\n", +		 arp_op, slave_dev->name, &dest_ip, &src_ip);  	skb = arp_create(arp_op, ETH_P_ARP, dest_ip, slave_dev, src_ip,  			 NULL, slave_dev->dev_addr, NULL);  	if (!skb) { -		pr_err("ARP packet allocation failed\n"); +		net_err_ratelimited("ARP packet allocation failed\n");  		return;  	} -	if (vlan_id) { -		skb = vlan_put_tag(skb, htons(ETH_P_8021Q), vlan_id); + +	/* Go through all the tags backwards and add them to the packet */ +	for (i = BOND_MAX_VLAN_ENCAP - 1; i > 0; i--) { +		if (!tags[i].vlan_id) +			continue; + +		pr_debug("inner tag: proto %X vid %X\n", +			 ntohs(tags[i].vlan_proto), tags[i].vlan_id); +		skb = __vlan_put_tag(skb, tags[i].vlan_proto, +				     tags[i].vlan_id); +		if (!skb) { +			net_err_ratelimited("failed to insert inner VLAN tag\n"); +			return; +		} +	} +	/* Set the outer tag */ +	if (tags[0].vlan_id) { +		pr_debug("outer tag: proto %X vid %X\n", +			 ntohs(tags[0].vlan_proto), tags[0].vlan_id); +		skb = vlan_put_tag(skb, tags[0].vlan_proto, tags[0].vlan_id);  		if (!skb) { -			pr_err("failed to insert VLAN tag\n"); +			net_err_ratelimited("failed to insert outer VLAN tag\n");  			return;  		}  	}  	arp_xmit(skb);  } +/* Validate the device path between the @start_dev and the @end_dev. + * The path is valid if the @end_dev is reachable through device + * stacking. + * When the path is validated, collect any vlan information in the + * path. + */ +bool bond_verify_device_path(struct net_device *start_dev, +			     struct net_device *end_dev, +			     struct bond_vlan_tag *tags) +{ +	struct net_device *upper; +	struct list_head  *iter; +	int  idx; + +	if (start_dev == end_dev) +		return true; + +	netdev_for_each_upper_dev_rcu(start_dev, upper, iter) { +		if (bond_verify_device_path(upper, end_dev, tags)) { +			if (is_vlan_dev(upper)) { +				idx = vlan_get_encap_level(upper); +				if (idx >= BOND_MAX_VLAN_ENCAP) +					return false; + +				tags[idx].vlan_proto = +						    vlan_dev_vlan_proto(upper); +				tags[idx].vlan_id = vlan_dev_vlan_id(upper); +			} +			return true; +		} +	} + +	return false; +}  static void bond_arp_send_all(struct bonding *bond, struct slave *slave)  { -	struct net_device *upper, *vlan_upper; -	struct list_head *iter, *vlan_iter;  	struct rtable *rt; +	struct bond_vlan_tag tags[BOND_MAX_VLAN_ENCAP];  	__be32 *targets = bond->params.arp_targets, addr; -	int i, vlan_id; +	int i; +	bool ret;  	for (i = 0; i < BOND_MAX_ARP_TARGETS && targets[i]; i++) {  		pr_debug("basa: target %pI4\n", &targets[i]); +		memset(tags, 0, sizeof(tags));  		/* Find out through which dev should the packet go */  		rt = ip_route_output(dev_net(bond->dev), targets[i], 0,  				     RTO_ONLINK, 0);  		if (IS_ERR(rt)) { -			pr_debug("%s: no route to arp_ip_target %pI4\n", -				 bond->dev->name, &targets[i]); +			/* there's no route to target - try to send arp +			 * probe to generate any traffic (arp_validate=0) +			 */ +			if (bond->params.arp_validate) +				net_warn_ratelimited("%s: no route to arp_ip_target %pI4 and arp_validate is set\n", +						     bond->dev->name, +						     &targets[i]); +			bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], +				      0, tags);  			continue;  		} -		vlan_id = 0; -  		/* bond device itself */  		if (rt->dst.dev == bond->dev)  			goto found;  		rcu_read_lock(); -		/* first we search only for vlan devices. for every vlan -		 * found we verify its upper dev list, searching for the -		 * rt->dst.dev. If found we save the tag of the vlan and -		 * proceed to send the packet. -		 * -		 * TODO: QinQ? -		 */ -		netdev_for_each_upper_dev_rcu(bond->dev, vlan_upper, vlan_iter) { -			if (!is_vlan_dev(vlan_upper)) -				continue; -			netdev_for_each_upper_dev_rcu(vlan_upper, upper, iter) { -				if (upper == rt->dst.dev) { -					vlan_id = vlan_dev_vlan_id(vlan_upper); -					rcu_read_unlock(); -					goto found; -				} -			} -		} - -		/* if the device we're looking for is not on top of any of -		 * our upper vlans, then just search for any dev that -		 * matches, and in case it's a vlan - save the id -		 */ -		netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) { -			if (upper == rt->dst.dev) { -				/* if it's a vlan - get its VID */ -				if (is_vlan_dev(upper)) -					vlan_id = vlan_dev_vlan_id(upper); - -				rcu_read_unlock(); -				goto found; -			} -		} +		ret = bond_verify_device_path(bond->dev, rt->dst.dev, tags);  		rcu_read_unlock(); +		if (ret) +			goto found; +  		/* Not our device - skip */  		pr_debug("%s: no path to arp_ip_target %pI4 via rt.dev %s\n",  			 bond->dev->name, &targets[i], @@ -2382,7 +2294,7 @@ found:  		addr = bond_confirm_addr(rt->dst.dev, targets[i], 0);  		ip_rt_put(rt);  		bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], -			      addr, vlan_id); +			      addr, tags);  	}  } @@ -2400,7 +2312,7 @@ static void bond_validate_arp(struct bonding *bond, struct slave *slave, __be32  		pr_debug("bva: sip %pI4 not found in targets\n", &sip);  		return;  	} -	slave->last_arp_rx = jiffies; +	slave->last_rx = jiffies;  	slave->target_last_arp_rx[i] = jiffies;  } @@ -2408,17 +2320,19 @@ int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond,  		 struct slave *slave)  {  	struct arphdr *arp = (struct arphdr *)skb->data; +	struct slave *curr_active_slave;  	unsigned char *arp_ptr;  	__be32 sip, tip; -	int alen; +	int alen, is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP); -	if (skb->protocol != __cpu_to_be16(ETH_P_ARP)) +	if (!slave_do_arp_validate(bond, slave)) { +		if ((slave_do_arp_validate_only(bond) && is_arp) || +		    !slave_do_arp_validate_only(bond)) +			slave->last_rx = jiffies;  		return RX_HANDLER_ANOTHER; - -	read_lock(&bond->lock); - -	if (!slave_do_arp_validate(bond, slave)) -		goto out_unlock; +	} else if (!is_arp) { +		return RX_HANDLER_ANOTHER; +	}  	alen = arp_hdr_len(bond->dev); @@ -2452,6 +2366,8 @@ int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond,  		 bond->params.arp_validate, slave_do_arp_validate(bond, slave),  		 &sip, &tip); +	curr_active_slave = rcu_dereference(bond->curr_active_slave); +  	/*  	 * Backup slaves won't see the ARP reply, but do come through  	 * here for each ARP probe (so we swap the sip/tip to validate @@ -2465,15 +2381,15 @@ int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond,  	 * is done to avoid endless looping when we can't reach the  	 * arp_ip_target and fool ourselves with our own arp requests.  	 */ +  	if (bond_is_active_slave(slave))  		bond_validate_arp(bond, slave, sip, tip); -	else if (bond->curr_active_slave && -		 time_after(slave_last_rx(bond, bond->curr_active_slave), -			    bond->curr_active_slave->jiffies)) +	else if (curr_active_slave && +		 time_after(slave_last_rx(bond, curr_active_slave), +			    curr_active_slave->last_link_up))  		bond_validate_arp(bond, slave, tip, sip);  out_unlock: -	read_unlock(&bond->lock);  	if (arp != (struct arphdr *)skb->data)  		kfree(arp);  	return RX_HANDLER_ANOTHER; @@ -2500,36 +2416,37 @@ static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act,   * arp is transmitted to generate traffic. see activebackup_arp_monitor for   * arp monitoring in active backup mode.   */ -void bond_loadbalance_arp_mon(struct work_struct *work) +static void bond_loadbalance_arp_mon(struct work_struct *work)  {  	struct bonding *bond = container_of(work, struct bonding,  					    arp_work.work);  	struct slave *slave, *oldcurrent; -	int do_failover = 0; - -	read_lock(&bond->lock); +	struct list_head *iter; +	int do_failover = 0, slave_state_changed = 0; -	if (list_empty(&bond->slave_list)) +	if (!bond_has_slaves(bond))  		goto re_arm; -	oldcurrent = bond->curr_active_slave; +	rcu_read_lock(); + +	oldcurrent = ACCESS_ONCE(bond->curr_active_slave);  	/* see if any of the previous devices are up now (i.e. they have  	 * xmt and rcv traffic). the curr_active_slave does not come into -	 * the picture unless it is null. also, slave->jiffies is not needed -	 * here because we send an arp on each slave and give a slave as -	 * long as it needs to get the tx/rx within the delta. +	 * the picture unless it is null. also, slave->last_link_up is not +	 * needed here because we send an arp on each slave and give a slave +	 * as long as it needs to get the tx/rx within the delta.  	 * TODO: what about up/down delay in arp mode? it wasn't here before  	 *       so it can wait  	 */ -	bond_for_each_slave(bond, slave) { +	bond_for_each_slave_rcu(bond, slave, iter) {  		unsigned long trans_start = dev_trans_start(slave->dev);  		if (slave->link != BOND_LINK_UP) {  			if (bond_time_in_interval(bond, trans_start, 1) && -			    bond_time_in_interval(bond, slave->dev->last_rx, 1)) { +			    bond_time_in_interval(bond, slave->last_rx, 1)) {  				slave->link  = BOND_LINK_UP; -				bond_set_active_slave(slave); +				slave_state_changed = 1;  				/* primary_slave has no meaning in round-robin  				 * mode. the window of a slave being up and @@ -2537,7 +2454,7 @@ void bond_loadbalance_arp_mon(struct work_struct *work)  				 * is closed.  				 */  				if (!oldcurrent) { -					pr_info("%s: link status definitely up for interface %s, ", +					pr_info("%s: link status definitely up for interface %s\n",  						bond->dev->name,  						slave->dev->name);  					do_failover = 1; @@ -2555,17 +2472,16 @@ void bond_loadbalance_arp_mon(struct work_struct *work)  			 * if we don't know our ip yet  			 */  			if (!bond_time_in_interval(bond, trans_start, 2) || -			    !bond_time_in_interval(bond, slave->dev->last_rx, 2)) { +			    !bond_time_in_interval(bond, slave->last_rx, 2)) {  				slave->link  = BOND_LINK_DOWN; -				bond_set_backup_slave(slave); +				slave_state_changed = 1;  				if (slave->link_failure_count < UINT_MAX)  					slave->link_failure_count++; -				pr_info("%s: interface %s is now down.\n", -					bond->dev->name, -					slave->dev->name); +				pr_info("%s: interface %s is now down\n", +					bond->dev->name, slave->dev->name);  				if (slave == oldcurrent)  					do_failover = 1; @@ -2579,26 +2495,37 @@ void bond_loadbalance_arp_mon(struct work_struct *work)  		 * do - all replies will be rx'ed on same link causing slaves  		 * to be unstable during low/no traffic periods  		 */ -		if (IS_UP(slave->dev)) +		if (bond_slave_is_up(slave))  			bond_arp_send_all(bond, slave);  	} -	if (do_failover) { -		block_netpoll_tx(); -		write_lock_bh(&bond->curr_slave_lock); +	rcu_read_unlock(); -		bond_select_active_slave(bond); +	if (do_failover || slave_state_changed) { +		if (!rtnl_trylock()) +			goto re_arm; -		write_unlock_bh(&bond->curr_slave_lock); -		unblock_netpoll_tx(); +		if (slave_state_changed) { +			bond_slave_state_change(bond); +		} else if (do_failover) { +			/* the bond_select_active_slave must hold RTNL +			 * and curr_slave_lock for write. +			 */ +			block_netpoll_tx(); +			write_lock_bh(&bond->curr_slave_lock); + +			bond_select_active_slave(bond); + +			write_unlock_bh(&bond->curr_slave_lock); +			unblock_netpoll_tx(); +		} +		rtnl_unlock();  	}  re_arm:  	if (bond->params.arp_interval)  		queue_delayed_work(bond->wq, &bond->arp_work,  				   msecs_to_jiffies(bond->params.arp_interval)); - -	read_unlock(&bond->lock);  }  /* @@ -2607,15 +2534,16 @@ re_arm:   * place for the slave.  Returns 0 if no changes are found, >0 if changes   * to link states must be committed.   * - * Called with bond->lock held for read. + * Called with rcu_read_lock hold.   */  static int bond_ab_arp_inspect(struct bonding *bond)  {  	unsigned long trans_start, last_rx; +	struct list_head *iter;  	struct slave *slave;  	int commit = 0; -	bond_for_each_slave(bond, slave) { +	bond_for_each_slave_rcu(bond, slave, iter) {  		slave->new_link = BOND_LINK_NOCHANGE;  		last_rx = slave_last_rx(bond, slave); @@ -2632,7 +2560,7 @@ static int bond_ab_arp_inspect(struct bonding *bond)  		 * active.  This avoids bouncing, as the last receive  		 * times need a full ARP monitor cycle to be updated.  		 */ -		if (bond_time_in_interval(bond, slave->jiffies, 2)) +		if (bond_time_in_interval(bond, slave->last_link_up, 2))  			continue;  		/* @@ -2677,14 +2605,15 @@ static int bond_ab_arp_inspect(struct bonding *bond)   * Called to commit link state changes noted by inspection step of   * active-backup mode ARP monitor.   * - * Called with RTNL and bond->lock for read. + * Called with RTNL hold.   */  static void bond_ab_arp_commit(struct bonding *bond)  {  	unsigned long trans_start; +	struct list_head *iter;  	struct slave *slave; -	bond_for_each_slave(bond, slave) { +	bond_for_each_slave(bond, slave, iter) {  		switch (slave->new_link) {  		case BOND_LINK_NOCHANGE:  			continue; @@ -2697,11 +2626,12 @@ static void bond_ab_arp_commit(struct bonding *bond)  				slave->link = BOND_LINK_UP;  				if (bond->current_arp_slave) {  					bond_set_slave_inactive_flags( -						bond->current_arp_slave); +						bond->current_arp_slave, +						BOND_SLAVE_NOTIFY_NOW);  					bond->current_arp_slave = NULL;  				} -				pr_info("%s: link status definitely up for interface %s.\n", +				pr_info("%s: link status definitely up for interface %s\n",  					bond->dev->name, slave->dev->name);  				if (!bond->curr_active_slave || @@ -2717,7 +2647,8 @@ static void bond_ab_arp_commit(struct bonding *bond)  				slave->link_failure_count++;  			slave->link = BOND_LINK_DOWN; -			bond_set_slave_inactive_flags(slave); +			bond_set_slave_inactive_flags(slave, +						      BOND_SLAVE_NOTIFY_NOW);  			pr_info("%s: link status definitely down for interface %s, disabling it\n",  				bond->dev->name, slave->dev->name); @@ -2751,53 +2682,46 @@ do_failover:  /*   * Send ARP probes for active-backup mode ARP monitor.   * - * Called with bond->lock held for read. + * Called with rcu_read_lock hold.   */ -static void bond_ab_arp_probe(struct bonding *bond) +static bool bond_ab_arp_probe(struct bonding *bond)  { -	struct slave *slave, *next_slave; -	int i; - -	read_lock(&bond->curr_slave_lock); +	struct slave *slave, *before = NULL, *new_slave = NULL, +		     *curr_arp_slave = rcu_dereference(bond->current_arp_slave), +		     *curr_active_slave = rcu_dereference(bond->curr_active_slave); +	struct list_head *iter; +	bool found = false; +	bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER; -	if (bond->current_arp_slave && bond->curr_active_slave) +	if (curr_arp_slave && curr_active_slave)  		pr_info("PROBE: c_arp %s && cas %s BAD\n", -			bond->current_arp_slave->dev->name, -			bond->curr_active_slave->dev->name); +			curr_arp_slave->dev->name, +			curr_active_slave->dev->name); -	if (bond->curr_active_slave) { -		bond_arp_send_all(bond, bond->curr_active_slave); -		read_unlock(&bond->curr_slave_lock); -		return; +	if (curr_active_slave) { +		bond_arp_send_all(bond, curr_active_slave); +		return should_notify_rtnl;  	} -	read_unlock(&bond->curr_slave_lock); -  	/* if we don't have a curr_active_slave, search for the next available  	 * backup slave from the current_arp_slave and make it the candidate  	 * for becoming the curr_active_slave  	 */ -	if (!bond->current_arp_slave) { -		bond->current_arp_slave = bond_first_slave(bond); -		if (!bond->current_arp_slave) -			return; +	if (!curr_arp_slave) { +		curr_arp_slave = bond_first_slave_rcu(bond); +		if (!curr_arp_slave) +			return should_notify_rtnl;  	} -	bond_set_slave_inactive_flags(bond->current_arp_slave); +	bond_set_slave_inactive_flags(curr_arp_slave, BOND_SLAVE_NOTIFY_LATER); -	/* search for next candidate */ -	next_slave = bond_next_slave(bond, bond->current_arp_slave); -	bond_for_each_slave_from(bond, slave, i, next_slave) { -		if (IS_UP(slave->dev)) { -			slave->link = BOND_LINK_BACK; -			bond_set_slave_active_flags(slave); -			bond_arp_send_all(bond, slave); -			slave->jiffies = jiffies; -			bond->current_arp_slave = slave; -			break; -		} +	bond_for_each_slave_rcu(bond, slave, iter) { +		if (!found && !before && bond_slave_is_up(slave)) +			before = slave; +		if (found && !new_slave && bond_slave_is_up(slave)) +			new_slave = slave;  		/* if the link state is up at this point, we  		 * mark it down - this can happen if we have  		 * simultaneous link failures and @@ -2805,67 +2729,93 @@ static void bond_ab_arp_probe(struct bonding *bond)  		 * one the current slave so it is still marked  		 * up when it is actually down  		 */ -		if (slave->link == BOND_LINK_UP) { +		if (!bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) {  			slave->link = BOND_LINK_DOWN;  			if (slave->link_failure_count < UINT_MAX)  				slave->link_failure_count++; -			bond_set_slave_inactive_flags(slave); +			bond_set_slave_inactive_flags(slave, +						      BOND_SLAVE_NOTIFY_LATER); -			pr_info("%s: backup interface %s is now down.\n", +			pr_info("%s: backup interface %s is now down\n",  				bond->dev->name, slave->dev->name);  		} +		if (slave == curr_arp_slave) +			found = true; +	} + +	if (!new_slave && before) +		new_slave = before; + +	if (!new_slave) +		goto check_state; + +	new_slave->link = BOND_LINK_BACK; +	bond_set_slave_active_flags(new_slave, BOND_SLAVE_NOTIFY_LATER); +	bond_arp_send_all(bond, new_slave); +	new_slave->last_link_up = jiffies; +	rcu_assign_pointer(bond->current_arp_slave, new_slave); + +check_state: +	bond_for_each_slave_rcu(bond, slave, iter) { +		if (slave->should_notify) { +			should_notify_rtnl = BOND_SLAVE_NOTIFY_NOW; +			break; +		}  	} +	return should_notify_rtnl;  } -void bond_activebackup_arp_mon(struct work_struct *work) +static void bond_activebackup_arp_mon(struct work_struct *work)  {  	struct bonding *bond = container_of(work, struct bonding,  					    arp_work.work);  	bool should_notify_peers = false; +	bool should_notify_rtnl = false;  	int delta_in_ticks; -	read_lock(&bond->lock); -  	delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); -	if (list_empty(&bond->slave_list)) +	if (!bond_has_slaves(bond))  		goto re_arm; +	rcu_read_lock(); +  	should_notify_peers = bond_should_notify_peers(bond);  	if (bond_ab_arp_inspect(bond)) { -		read_unlock(&bond->lock); +		rcu_read_unlock();  		/* Race avoidance with bond_close flush of workqueue */  		if (!rtnl_trylock()) { -			read_lock(&bond->lock);  			delta_in_ticks = 1;  			should_notify_peers = false;  			goto re_arm;  		} -		read_lock(&bond->lock); -  		bond_ab_arp_commit(bond); -		read_unlock(&bond->lock);  		rtnl_unlock(); -		read_lock(&bond->lock); +		rcu_read_lock();  	} -	bond_ab_arp_probe(bond); +	should_notify_rtnl = bond_ab_arp_probe(bond); +	rcu_read_unlock();  re_arm:  	if (bond->params.arp_interval)  		queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); -	read_unlock(&bond->lock); - -	if (should_notify_peers) { +	if (should_notify_peers || should_notify_rtnl) {  		if (!rtnl_trylock())  			return; -		call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); + +		if (should_notify_peers) +			call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, +						 bond->dev); +		if (should_notify_rtnl) +			bond_slave_state_notify(bond); +  		rtnl_unlock();  	}  } @@ -2942,7 +2892,7 @@ static int bond_slave_netdev_event(unsigned long event,  		bond_update_speed_duplex(slave); -		if (bond->params.mode == BOND_MODE_8023AD) { +		if (BOND_MODE(bond) == BOND_MODE_8023AD) {  			if (old_speed != slave->speed)  				bond_3ad_adapter_speed_changed(slave);  			if (old_duplex != slave->duplex) @@ -2969,9 +2919,30 @@ static int bond_slave_netdev_event(unsigned long event,  		 */  		break;  	case NETDEV_CHANGENAME: -		/* -		 * TODO: handle changing the primary's name -		 */ +		/* we don't care if we don't have primary set */ +		if (!bond_uses_primary(bond) || +		    !bond->params.primary[0]) +			break; + +		if (slave == bond->primary_slave) { +			/* slave's name changed - he's no longer primary */ +			bond->primary_slave = NULL; +		} else if (!strcmp(slave_dev->name, bond->params.primary)) { +			/* we have a new primary slave */ +			bond->primary_slave = slave; +		} else { /* we didn't change primary - exit */ +			break; +		} + +		pr_info("%s: Primary slave changed to %s, reselecting active slave\n", +			bond->dev->name, +			bond->primary_slave ? slave_dev->name : "none"); + +		block_netpoll_tx(); +		write_lock_bh(&bond->curr_slave_lock); +		bond_select_active_slave(bond); +		write_unlock_bh(&bond->curr_slave_lock); +		unblock_netpoll_tx();  		break;  	case NETDEV_FEAT_CHANGE:  		bond_compute_features(bond); @@ -3001,8 +2972,7 @@ static int bond_netdev_event(struct notifier_block *this,  	struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);  	pr_debug("event_dev: %s, event: %lx\n", -		 event_dev ? event_dev->name : "None", -		 event); +		 event_dev ? event_dev->name : "None", event);  	if (!(event_dev->priv_flags & IFF_BONDING))  		return NOTIFY_DONE; @@ -3026,99 +2996,83 @@ static struct notifier_block bond_netdev_notifier = {  /*---------------------------- Hashing Policies -----------------------------*/ -/* - * Hash for the output device based upon layer 2 data - */ -static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count) +/* L2 hash helper */ +static inline u32 bond_eth_hash(struct sk_buff *skb)  {  	struct ethhdr *data = (struct ethhdr *)skb->data;  	if (skb_headlen(skb) >= offsetof(struct ethhdr, h_proto)) -		return (data->h_dest[5] ^ data->h_source[5]) % count; +		return data->h_dest[5] ^ data->h_source[5];  	return 0;  } -/* - * Hash for the output device based upon layer 2 and layer 3 data. If - * the packet is not IP, fall back on bond_xmit_hash_policy_l2() - */ -static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count) +/* Extract the appropriate headers based on bond's xmit policy */ +static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, +			      struct flow_keys *fk)  { -	const struct ethhdr *data; +	const struct ipv6hdr *iph6;  	const struct iphdr *iph; -	const struct ipv6hdr *ipv6h; -	u32 v6hash; -	const __be32 *s, *d; +	int noff, proto = -1; -	if (skb->protocol == htons(ETH_P_IP) && -	    pskb_network_may_pull(skb, sizeof(*iph))) { +	if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23) +		return skb_flow_dissect(skb, fk); + +	fk->ports = 0; +	noff = skb_network_offset(skb); +	if (skb->protocol == htons(ETH_P_IP)) { +		if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph)))) +			return false;  		iph = ip_hdr(skb); -		data = (struct ethhdr *)skb->data; -		return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^ -			(data->h_dest[5] ^ data->h_source[5])) % count; -	} else if (skb->protocol == htons(ETH_P_IPV6) && -		   pskb_network_may_pull(skb, sizeof(*ipv6h))) { -		ipv6h = ipv6_hdr(skb); -		data = (struct ethhdr *)skb->data; -		s = &ipv6h->saddr.s6_addr32[0]; -		d = &ipv6h->daddr.s6_addr32[0]; -		v6hash = (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]); -		v6hash ^= (v6hash >> 24) ^ (v6hash >> 16) ^ (v6hash >> 8); -		return (v6hash ^ data->h_dest[5] ^ data->h_source[5]) % count; -	} - -	return bond_xmit_hash_policy_l2(skb, count); +		fk->src = iph->saddr; +		fk->dst = iph->daddr; +		noff += iph->ihl << 2; +		if (!ip_is_fragment(iph)) +			proto = iph->protocol; +	} else if (skb->protocol == htons(ETH_P_IPV6)) { +		if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph6)))) +			return false; +		iph6 = ipv6_hdr(skb); +		fk->src = (__force __be32)ipv6_addr_hash(&iph6->saddr); +		fk->dst = (__force __be32)ipv6_addr_hash(&iph6->daddr); +		noff += sizeof(*iph6); +		proto = iph6->nexthdr; +	} else { +		return false; +	} +	if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0) +		fk->ports = skb_flow_get_ports(skb, noff, proto); + +	return true;  } -/* - * Hash for the output device based upon layer 3 and layer 4 data. If - * the packet is a frag or not TCP or UDP, just use layer 3 data.  If it is - * altogether not IP, fall back on bond_xmit_hash_policy_l2() +/** + * bond_xmit_hash - generate a hash value based on the xmit policy + * @bond: bonding device + * @skb: buffer to use for headers + * + * This function will extract the necessary headers from the skb buffer and use + * them to generate a hash based on the xmit_policy set in the bonding device   */ -static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count) +u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)  { -	u32 layer4_xor = 0; -	const struct iphdr *iph; -	const struct ipv6hdr *ipv6h; -	const __be32 *s, *d; -	const __be16 *l4 = NULL; -	__be16 _l4[2]; -	int noff = skb_network_offset(skb); -	int poff; - -	if (skb->protocol == htons(ETH_P_IP) && -	    pskb_may_pull(skb, noff + sizeof(*iph))) { -		iph = ip_hdr(skb); -		poff = proto_ports_offset(iph->protocol); +	struct flow_keys flow; +	u32 hash; -		if (!ip_is_fragment(iph) && poff >= 0) { -			l4 = skb_header_pointer(skb, noff + (iph->ihl << 2) + poff, -						sizeof(_l4), &_l4); -			if (l4) -				layer4_xor = ntohs(l4[0] ^ l4[1]); -		} -		return (layer4_xor ^ -			((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count; -	} else if (skb->protocol == htons(ETH_P_IPV6) && -		   pskb_may_pull(skb, noff + sizeof(*ipv6h))) { -		ipv6h = ipv6_hdr(skb); -		poff = proto_ports_offset(ipv6h->nexthdr); -		if (poff >= 0) { -			l4 = skb_header_pointer(skb, noff + sizeof(*ipv6h) + poff, -						sizeof(_l4), &_l4); -			if (l4) -				layer4_xor = ntohs(l4[0] ^ l4[1]); -		} -		s = &ipv6h->saddr.s6_addr32[0]; -		d = &ipv6h->daddr.s6_addr32[0]; -		layer4_xor ^= (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]); -		layer4_xor ^= (layer4_xor >> 24) ^ (layer4_xor >> 16) ^ -			       (layer4_xor >> 8); -		return layer4_xor % count; -	} +	if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 || +	    !bond_flow_dissect(bond, skb, &flow)) +		return bond_eth_hash(skb); + +	if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 || +	    bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) +		hash = bond_eth_hash(skb); +	else +		hash = (__force u32)flow.ports; +	hash ^= (__force u32)flow.dst ^ (__force u32)flow.src; +	hash ^= (hash >> 16); +	hash ^= (hash >> 8); -	return bond_xmit_hash_policy_l2(skb, count); +	return hash;  }  /*-------------------------- Device entry points ----------------------------*/ @@ -3129,7 +3083,7 @@ static void bond_work_init_all(struct bonding *bond)  			  bond_resend_igmp_join_requests_delayed);  	INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor);  	INIT_DELAYED_WORK(&bond->mii_work, bond_mii_monitor); -	if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) +	if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP)  		INIT_DELAYED_WORK(&bond->arp_work, bond_activebackup_arp_mon);  	else  		INIT_DELAYED_WORK(&bond->arp_work, bond_loadbalance_arp_mon); @@ -3148,18 +3102,21 @@ static void bond_work_cancel_all(struct bonding *bond)  static int bond_open(struct net_device *bond_dev)  {  	struct bonding *bond = netdev_priv(bond_dev); +	struct list_head *iter;  	struct slave *slave;  	/* reset slave->backup and slave->inactive */  	read_lock(&bond->lock); -	if (!list_empty(&bond->slave_list)) { +	if (bond_has_slaves(bond)) {  		read_lock(&bond->curr_slave_lock); -		bond_for_each_slave(bond, slave) { -			if ((bond->params.mode == BOND_MODE_ACTIVEBACKUP) +		bond_for_each_slave(bond, slave, iter) { +			if (bond_uses_primary(bond)  				&& (slave != bond->curr_active_slave)) { -				bond_set_slave_inactive_flags(slave); +				bond_set_slave_inactive_flags(slave, +							      BOND_SLAVE_NOTIFY_NOW);  			} else { -				bond_set_slave_active_flags(slave); +				bond_set_slave_active_flags(slave, +							    BOND_SLAVE_NOTIFY_NOW);  			}  		}  		read_unlock(&bond->curr_slave_lock); @@ -3172,9 +3129,10 @@ static int bond_open(struct net_device *bond_dev)  		/* bond_alb_initialize must be called before the timer  		 * is started.  		 */ -		if (bond_alb_initialize(bond, (bond->params.mode == BOND_MODE_ALB))) +		if (bond_alb_initialize(bond, (BOND_MODE(bond) == BOND_MODE_ALB)))  			return -ENOMEM; -		queue_delayed_work(bond->wq, &bond->alb_work, 0); +		if (bond->params.tlb_dynamic_lb) +			queue_delayed_work(bond->wq, &bond->alb_work, 0);  	}  	if (bond->params.miimon)  /* link check interval, in milliseconds. */ @@ -3182,11 +3140,10 @@ static int bond_open(struct net_device *bond_dev)  	if (bond->params.arp_interval) {  /* arp interval, in milliseconds. */  		queue_delayed_work(bond->wq, &bond->arp_work, 0); -		if (bond->params.arp_validate) -			bond->recv_probe = bond_arp_rcv; +		bond->recv_probe = bond_arp_rcv;  	} -	if (bond->params.mode == BOND_MODE_8023AD) { +	if (BOND_MODE(bond) == BOND_MODE_8023AD) {  		queue_delayed_work(bond->wq, &bond->ad_work, 0);  		/* register to receive LACPDUs */  		bond->recv_probe = bond_3ad_lacpdu_recv; @@ -3214,12 +3171,13 @@ static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,  {  	struct bonding *bond = netdev_priv(bond_dev);  	struct rtnl_link_stats64 temp; +	struct list_head *iter;  	struct slave *slave;  	memset(stats, 0, sizeof(*stats));  	read_lock_bh(&bond->lock); -	bond_for_each_slave(bond, slave) { +	bond_for_each_slave(bond, slave, iter) {  		const struct rtnl_link_stats64 *sstats =  			dev_get_stats(slave->dev, &temp); @@ -3256,12 +3214,14 @@ static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,  static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd)  { +	struct bonding *bond = netdev_priv(bond_dev);  	struct net_device *slave_dev = NULL;  	struct ifbond k_binfo;  	struct ifbond __user *u_binfo = NULL;  	struct ifslave k_sinfo;  	struct ifslave __user *u_sinfo = NULL;  	struct mii_ioctl_data *mii = NULL; +	struct bond_opt_value newval;  	struct net *net;  	int res = 0; @@ -3286,7 +3246,6 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd  		if (mii->reg_num == 1) { -			struct bonding *bond = netdev_priv(bond_dev);  			mii->val_out = 0;  			read_lock(&bond->lock);  			read_lock(&bond->curr_slave_lock); @@ -3334,37 +3293,35 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd  	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))  		return -EPERM; -	slave_dev = dev_get_by_name(net, ifr->ifr_slave); +	slave_dev = __dev_get_by_name(net, ifr->ifr_slave);  	pr_debug("slave_dev=%p:\n", slave_dev);  	if (!slave_dev) -		res = -ENODEV; -	else { -		pr_debug("slave_dev->name=%s:\n", slave_dev->name); -		switch (cmd) { -		case BOND_ENSLAVE_OLD: -		case SIOCBONDENSLAVE: -			res = bond_enslave(bond_dev, slave_dev); -			break; -		case BOND_RELEASE_OLD: -		case SIOCBONDRELEASE: -			res = bond_release(bond_dev, slave_dev); -			break; -		case BOND_SETHWADDR_OLD: -		case SIOCBONDSETHWADDR: -			bond_set_dev_addr(bond_dev, slave_dev); -			res = 0; -			break; -		case BOND_CHANGE_ACTIVE_OLD: -		case SIOCBONDCHANGEACTIVE: -			res = bond_ioctl_change_active(bond_dev, slave_dev); -			break; -		default: -			res = -EOPNOTSUPP; -		} +		return -ENODEV; -		dev_put(slave_dev); +	pr_debug("slave_dev->name=%s:\n", slave_dev->name); +	switch (cmd) { +	case BOND_ENSLAVE_OLD: +	case SIOCBONDENSLAVE: +		res = bond_enslave(bond_dev, slave_dev); +		break; +	case BOND_RELEASE_OLD: +	case SIOCBONDRELEASE: +		res = bond_release(bond_dev, slave_dev); +		break; +	case BOND_SETHWADDR_OLD: +	case SIOCBONDSETHWADDR: +		bond_set_dev_addr(bond_dev, slave_dev); +		res = 0; +		break; +	case BOND_CHANGE_ACTIVE_OLD: +	case SIOCBONDCHANGEACTIVE: +		bond_opt_initstr(&newval, slave_dev->name); +		res = __bond_opt_set(bond, BOND_OPT_ACTIVE_SLAVE, &newval); +		break; +	default: +		res = -EOPNOTSUPP;  	}  	return res; @@ -3386,22 +3343,24 @@ static void bond_change_rx_flags(struct net_device *bond_dev, int change)  static void bond_set_rx_mode(struct net_device *bond_dev)  {  	struct bonding *bond = netdev_priv(bond_dev); +	struct list_head *iter;  	struct slave *slave; -	ASSERT_RTNL(); -	if (USES_PRIMARY(bond->params.mode)) { -		slave = rtnl_dereference(bond->curr_active_slave); +	rcu_read_lock(); +	if (bond_uses_primary(bond)) { +		slave = rcu_dereference(bond->curr_active_slave);  		if (slave) {  			dev_uc_sync(slave->dev, bond_dev);  			dev_mc_sync(slave->dev, bond_dev);  		}  	} else { -		bond_for_each_slave(bond, slave) { +		bond_for_each_slave_rcu(bond, slave, iter) {  			dev_uc_sync_multiple(slave->dev, bond_dev);  			dev_mc_sync_multiple(slave->dev, bond_dev);  		}  	} +	rcu_read_unlock();  }  static int bond_neigh_init(struct neighbour *n) @@ -3464,11 +3423,12 @@ static int bond_neigh_setup(struct net_device *dev,  static int bond_change_mtu(struct net_device *bond_dev, int new_mtu)  {  	struct bonding *bond = netdev_priv(bond_dev); -	struct slave *slave; +	struct slave *slave, *rollback_slave; +	struct list_head *iter;  	int res = 0; -	pr_debug("bond=%p, name=%s, new_mtu=%d\n", bond, -		 (bond_dev ? bond_dev->name : "None"), new_mtu); +	pr_debug("bond=%p, name=%s, new_mtu=%d\n", +		 bond, bond_dev ? bond_dev->name : "None", new_mtu);  	/* Can't hold bond->lock with bh disabled here since  	 * some base drivers panic. On the other hand we can't @@ -3485,11 +3445,9 @@ static int bond_change_mtu(struct net_device *bond_dev, int new_mtu)  	 * call to the base driver.  	 */ -	bond_for_each_slave(bond, slave) { -		pr_debug("s %p s->p %p c_m %p\n", -			 slave, -			 bond_prev_slave(bond, slave), -			 slave->dev->netdev_ops->ndo_change_mtu); +	bond_for_each_slave(bond, slave, iter) { +		pr_debug("s %p c_m %p\n", +			 slave, slave->dev->netdev_ops->ndo_change_mtu);  		res = dev_set_mtu(slave->dev, new_mtu); @@ -3513,13 +3471,16 @@ static int bond_change_mtu(struct net_device *bond_dev, int new_mtu)  unwind:  	/* unwind from head to the slave that failed */ -	bond_for_each_slave_continue_reverse(bond, slave) { +	bond_for_each_slave(bond, rollback_slave, iter) {  		int tmp_res; -		tmp_res = dev_set_mtu(slave->dev, bond_dev->mtu); +		if (rollback_slave == slave) +			break; + +		tmp_res = dev_set_mtu(rollback_slave->dev, bond_dev->mtu);  		if (tmp_res) {  			pr_debug("unwind err %d dev %s\n", -				 tmp_res, slave->dev->name); +				 tmp_res, rollback_slave->dev->name);  		}  	} @@ -3536,11 +3497,12 @@ unwind:  static int bond_set_mac_address(struct net_device *bond_dev, void *addr)  {  	struct bonding *bond = netdev_priv(bond_dev); +	struct slave *slave, *rollback_slave;  	struct sockaddr *sa = addr, tmp_sa; -	struct slave *slave; +	struct list_head *iter;  	int res = 0; -	if (bond->params.mode == BOND_MODE_ALB) +	if (BOND_MODE(bond) == BOND_MODE_ALB)  		return bond_alb_set_mac_address(bond_dev, addr); @@ -3550,7 +3512,8 @@ static int bond_set_mac_address(struct net_device *bond_dev, void *addr)  	/* If fail_over_mac is enabled, do nothing and return success.  	 * Returning an error causes ifenslave to fail.  	 */ -	if (bond->params.fail_over_mac) +	if (bond->params.fail_over_mac && +	    BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP)  		return 0;  	if (!is_valid_ether_addr(sa->sa_data)) @@ -3571,16 +3534,8 @@ static int bond_set_mac_address(struct net_device *bond_dev, void *addr)  	 * call to the base driver.  	 */ -	bond_for_each_slave(bond, slave) { -		const struct net_device_ops *slave_ops = slave->dev->netdev_ops; +	bond_for_each_slave(bond, slave, iter) {  		pr_debug("slave %p %s\n", slave, slave->dev->name); - -		if (slave_ops->ndo_set_mac_address == NULL) { -			res = -EOPNOTSUPP; -			pr_debug("EOPNOTSUPP %s\n", slave->dev->name); -			goto unwind; -		} -  		res = dev_set_mac_address(slave->dev, addr);  		if (res) {  			/* TODO: consider downing the slave @@ -3603,13 +3558,16 @@ unwind:  	tmp_sa.sa_family = bond_dev->type;  	/* unwind from head to the slave that failed */ -	bond_for_each_slave_continue_reverse(bond, slave) { +	bond_for_each_slave(bond, rollback_slave, iter) {  		int tmp_res; -		tmp_res = dev_set_mac_address(slave->dev, &tmp_sa); +		if (rollback_slave == slave) +			break; + +		tmp_res = dev_set_mac_address(rollback_slave->dev, &tmp_sa);  		if (tmp_res) {  			pr_debug("unwind err %d dev %s\n", -				 tmp_res, slave->dev->name); +				 tmp_res, rollback_slave->dev->name);  		}  	} @@ -3626,15 +3584,16 @@ unwind:   * it fails, it tries to find the first available slave for transmission.   * The skb is consumed in all cases, thus the function is void.   */ -void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id) +static void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id)  { +	struct list_head *iter;  	struct slave *slave;  	int i = slave_id;  	/* Here we start from the slave with slave_id */ -	bond_for_each_slave_rcu(bond, slave) { +	bond_for_each_slave_rcu(bond, slave, iter) {  		if (--i < 0) { -			if (slave_can_tx(slave)) { +			if (bond_slave_can_tx(slave)) {  				bond_dev_queue_xmit(bond, skb, slave->dev);  				return;  			} @@ -3643,16 +3602,49 @@ void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id)  	/* Here we start from the first slave up to slave_id */  	i = slave_id; -	bond_for_each_slave_rcu(bond, slave) { +	bond_for_each_slave_rcu(bond, slave, iter) {  		if (--i < 0)  			break; -		if (slave_can_tx(slave)) { +		if (bond_slave_can_tx(slave)) {  			bond_dev_queue_xmit(bond, skb, slave->dev);  			return;  		}  	}  	/* no slave that can tx has been found */ -	kfree_skb(skb); +	dev_kfree_skb_any(skb); +} + +/** + * bond_rr_gen_slave_id - generate slave id based on packets_per_slave + * @bond: bonding device to use + * + * Based on the value of the bonding device's packets_per_slave parameter + * this function generates a slave id, which is usually used as the next + * slave to transmit through. + */ +static u32 bond_rr_gen_slave_id(struct bonding *bond) +{ +	u32 slave_id; +	struct reciprocal_value reciprocal_packets_per_slave; +	int packets_per_slave = bond->params.packets_per_slave; + +	switch (packets_per_slave) { +	case 0: +		slave_id = prandom_u32(); +		break; +	case 1: +		slave_id = bond->rr_tx_counter; +		break; +	default: +		reciprocal_packets_per_slave = +			bond->params.reciprocal_packets_per_slave; +		slave_id = reciprocal_divide(bond->rr_tx_counter, +					     reciprocal_packets_per_slave); +		break; +	} +	bond->rr_tx_counter++; + +	return slave_id;  }  static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev) @@ -3660,9 +3652,9 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev  	struct bonding *bond = netdev_priv(bond_dev);  	struct iphdr *iph = ip_hdr(skb);  	struct slave *slave; +	u32 slave_id; -	/* -	 * Start with the curr_active_slave that joined the bond as the +	/* Start with the curr_active_slave that joined the bond as the  	 * default for sending IGMP traffic.  For failover purposes one  	 * needs to maintain some consistency for the interface that will  	 * send the join/membership reports.  The curr_active_slave found @@ -3670,13 +3662,13 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev  	 */  	if (iph->protocol == IPPROTO_IGMP && skb->protocol == htons(ETH_P_IP)) {  		slave = rcu_dereference(bond->curr_active_slave); -		if (slave && slave_can_tx(slave)) +		if (slave && bond_slave_can_tx(slave))  			bond_dev_queue_xmit(bond, skb, slave->dev);  		else  			bond_xmit_slave_id(bond, skb, 0);  	} else { -		bond_xmit_slave_id(bond, skb, -				   bond->rr_tx_counter++ % bond->slave_cnt); +		slave_id = bond_rr_gen_slave_id(bond); +		bond_xmit_slave_id(bond, skb, slave_id % bond->slave_cnt);  	}  	return NETDEV_TX_OK; @@ -3695,13 +3687,12 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d  	if (slave)  		bond_dev_queue_xmit(bond, skb, slave->dev);  	else -		kfree_skb(skb); +		dev_kfree_skb_any(skb);  	return NETDEV_TX_OK;  } -/* - * In bond_xmit_xor() , we determine the output device by using a pre- +/* In bond_xmit_xor() , we determine the output device by using a pre-   * determined xmit_hash_policy(), If the selected device is not enabled,   * find the next active slave.   */ @@ -3709,8 +3700,7 @@ static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev)  {  	struct bonding *bond = netdev_priv(bond_dev); -	bond_xmit_slave_id(bond, skb, -			   bond->xmit_hash_policy(skb, bond->slave_cnt)); +	bond_xmit_slave_id(bond, skb, bond_xmit_hash(bond, skb) % bond->slave_cnt);  	return NETDEV_TX_OK;  } @@ -3720,48 +3710,33 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev)  {  	struct bonding *bond = netdev_priv(bond_dev);  	struct slave *slave = NULL; +	struct list_head *iter; -	bond_for_each_slave_rcu(bond, slave) { +	bond_for_each_slave_rcu(bond, slave, iter) {  		if (bond_is_last_slave(bond, slave))  			break; -		if (IS_UP(slave->dev) && slave->link == BOND_LINK_UP) { +		if (bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) {  			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);  			if (!skb2) { -				pr_err("%s: Error: bond_xmit_broadcast(): skb_clone() failed\n", -				       bond_dev->name); +				net_err_ratelimited("%s: Error: %s: skb_clone() failed\n", +						    bond_dev->name, __func__);  				continue;  			}  			/* bond_dev_queue_xmit always returns 0 */  			bond_dev_queue_xmit(bond, skb2, slave->dev);  		}  	} -	if (slave && IS_UP(slave->dev) && slave->link == BOND_LINK_UP) +	if (slave && bond_slave_is_up(slave) && slave->link == BOND_LINK_UP)  		bond_dev_queue_xmit(bond, skb, slave->dev);  	else -		kfree_skb(skb); +		dev_kfree_skb_any(skb);  	return NETDEV_TX_OK;  }  /*------------------------- Device initialization ---------------------------*/ -static void bond_set_xmit_hash_policy(struct bonding *bond) -{ -	switch (bond->params.xmit_policy) { -	case BOND_XMIT_POLICY_LAYER23: -		bond->xmit_hash_policy = bond_xmit_hash_policy_l23; -		break; -	case BOND_XMIT_POLICY_LAYER34: -		bond->xmit_hash_policy = bond_xmit_hash_policy_l34; -		break; -	case BOND_XMIT_POLICY_LAYER2: -	default: -		bond->xmit_hash_policy = bond_xmit_hash_policy_l2; -		break; -	} -} -  /*   * Lookup the slave that corresponds to a qid   */ @@ -3769,31 +3744,29 @@ static inline int bond_slave_override(struct bonding *bond,  				      struct sk_buff *skb)  {  	struct slave *slave = NULL; -	struct slave *check_slave; -	int res = 1; +	struct list_head *iter;  	if (!skb->queue_mapping)  		return 1;  	/* Find out if any slaves have the same mapping as this skb. */ -	bond_for_each_slave_rcu(bond, check_slave) { -		if (check_slave->queue_id == skb->queue_mapping) { -			slave = check_slave; +	bond_for_each_slave_rcu(bond, slave, iter) { +		if (slave->queue_id == skb->queue_mapping) { +			if (bond_slave_can_tx(slave)) { +				bond_dev_queue_xmit(bond, skb, slave->dev); +				return 0; +			} +			/* If the slave isn't UP, use default transmit policy. */  			break;  		}  	} -	/* If the slave isn't UP, use default transmit policy. */ -	if (slave && slave->queue_id && IS_UP(slave->dev) && -	    (slave->link == BOND_LINK_UP)) { -		res = bond_dev_queue_xmit(bond, skb, slave->dev); -	} - -	return res; +	return 1;  } -static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb) +static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb, +			     void *accel_priv, select_queue_fallback_t fallback)  {  	/*  	 * This helper function exists to help dev_pick_tx get the correct @@ -3820,12 +3793,11 @@ static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev  {  	struct bonding *bond = netdev_priv(dev); -	if (TX_QUEUE_OVERRIDE(bond->params.mode)) { -		if (!bond_slave_override(bond, skb)) -			return NETDEV_TX_OK; -	} +	if (bond_should_override_tx_queue(bond) && +	    !bond_slave_override(bond, skb)) +		return NETDEV_TX_OK; -	switch (bond->params.mode) { +	switch (BOND_MODE(bond)) {  	case BOND_MODE_ROUNDROBIN:  		return bond_xmit_roundrobin(skb, dev);  	case BOND_MODE_ACTIVEBACKUP: @@ -3837,14 +3809,15 @@ static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev  	case BOND_MODE_8023AD:  		return bond_3ad_xmit_xor(skb, dev);  	case BOND_MODE_ALB: -	case BOND_MODE_TLB:  		return bond_alb_xmit(skb, dev); +	case BOND_MODE_TLB: +		return bond_tlb_xmit(skb, dev);  	default:  		/* Should never happen, mode already checked */  		pr_err("%s: Error: Unknown bonding mode %d\n", -		       dev->name, bond->params.mode); +		       dev->name, BOND_MODE(bond));  		WARN_ON_ONCE(1); -		kfree_skb(skb); +		dev_kfree_skb_any(skb);  		return NETDEV_TX_OK;  	}  } @@ -3858,69 +3831,38 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)  	 * If we risk deadlock from transmitting this in the  	 * netpoll path, tell netpoll to queue the frame for later tx  	 */ -	if (is_netpoll_tx_blocked(dev)) +	if (unlikely(is_netpoll_tx_blocked(dev)))  		return NETDEV_TX_BUSY;  	rcu_read_lock(); -	if (!list_empty(&bond->slave_list)) +	if (bond_has_slaves(bond))  		ret = __bond_start_xmit(skb, dev);  	else -		kfree_skb(skb); +		dev_kfree_skb_any(skb);  	rcu_read_unlock();  	return ret;  } -/* - * set bond mode specific net device operations - */ -void bond_set_mode_ops(struct bonding *bond, int mode) -{ -	struct net_device *bond_dev = bond->dev; - -	switch (mode) { -	case BOND_MODE_ROUNDROBIN: -		break; -	case BOND_MODE_ACTIVEBACKUP: -		break; -	case BOND_MODE_XOR: -		bond_set_xmit_hash_policy(bond); -		break; -	case BOND_MODE_BROADCAST: -		break; -	case BOND_MODE_8023AD: -		bond_set_xmit_hash_policy(bond); -		break; -	case BOND_MODE_ALB: -		/* FALLTHRU */ -	case BOND_MODE_TLB: -		break; -	default: -		/* Should never happen, mode already checked */ -		pr_err("%s: Error: Unknown bonding mode %d\n", -		       bond_dev->name, mode); -		break; -	} -} -  static int bond_ethtool_get_settings(struct net_device *bond_dev,  				     struct ethtool_cmd *ecmd)  {  	struct bonding *bond = netdev_priv(bond_dev);  	unsigned long speed = 0; +	struct list_head *iter;  	struct slave *slave;  	ecmd->duplex = DUPLEX_UNKNOWN;  	ecmd->port = PORT_OTHER; -	/* Since SLAVE_IS_OK returns false for all inactive or down slaves, we +	/* Since bond_slave_can_tx returns false for all inactive or down slaves, we  	 * do not need to check mode.  Though link speed might not represent  	 * the true receive or transmit bandwidth (not all modes are symmetric)  	 * this is an accurate maximum.  	 */  	read_lock(&bond->lock); -	bond_for_each_slave(bond, slave) { -		if (SLAVE_IS_OK(slave)) { +	bond_for_each_slave(bond, slave, iter) { +		if (bond_slave_can_tx(slave)) {  			if (slave->speed != SPEED_UNKNOWN)  				speed += slave->speed;  			if (ecmd->duplex == DUPLEX_UNKNOWN && @@ -3987,14 +3929,13 @@ static void bond_destructor(struct net_device *bond_dev)  	free_netdev(bond_dev);  } -static void bond_setup(struct net_device *bond_dev) +void bond_setup(struct net_device *bond_dev)  {  	struct bonding *bond = netdev_priv(bond_dev);  	/* initialize rwlocks */  	rwlock_init(&bond->lock);  	rwlock_init(&bond->curr_slave_lock); -	INIT_LIST_HEAD(&bond->slave_list);  	bond->params = bonding_defaults;  	/* Initialize pointers */ @@ -4004,7 +3945,6 @@ static void bond_setup(struct net_device *bond_dev)  	ether_setup(bond_dev);  	bond_dev->netdev_ops = &bond_netdev_ops;  	bond_dev->ethtool_ops = &bond_ethtool_ops; -	bond_set_mode_ops(bond, bond->params.mode);  	bond_dev->destructor = bond_destructor; @@ -4013,7 +3953,7 @@ static void bond_setup(struct net_device *bond_dev)  	/* Initialize the device options */  	bond_dev->tx_queue_len = 0;  	bond_dev->flags |= IFF_MASTER|IFF_MULTICAST; -	bond_dev->priv_flags |= IFF_BONDING; +	bond_dev->priv_flags |= IFF_BONDING | IFF_UNICAST_FLT;  	bond_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);  	/* At first, we block adding VLANs. That's the only way to @@ -4034,12 +3974,16 @@ static void bond_setup(struct net_device *bond_dev)  	 * capable  	 */ +	/* Don't allow bond devices to change network namespaces. */ +	bond_dev->features |= NETIF_F_NETNS_LOCAL; +  	bond_dev->hw_features = BOND_VLAN_FEATURES |  				NETIF_F_HW_VLAN_CTAG_TX |  				NETIF_F_HW_VLAN_CTAG_RX |  				NETIF_F_HW_VLAN_CTAG_FILTER;  	bond_dev->hw_features &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_HW_CSUM); +	bond_dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;  	bond_dev->features |= bond_dev->hw_features;  } @@ -4050,14 +3994,15 @@ static void bond_setup(struct net_device *bond_dev)  static void bond_uninit(struct net_device *bond_dev)  {  	struct bonding *bond = netdev_priv(bond_dev); -	struct slave *slave, *tmp_slave; +	struct list_head *iter; +	struct slave *slave;  	bond_netpoll_cleanup(bond_dev);  	/* Release the bonded slaves */ -	list_for_each_entry_safe(slave, tmp_slave, &bond->slave_list, list) +	bond_for_each_slave(bond, slave, iter)  		__bond_release_one(bond_dev, slave->dev, true); -	pr_info("%s: released all slaves\n", bond_dev->name); +	pr_info("%s: Released all slaves\n", bond_dev->name);  	list_del(&bond->bond_list); @@ -4066,70 +4011,42 @@ static void bond_uninit(struct net_device *bond_dev)  /*------------------------- Module initialization ---------------------------*/ -/* - * Convert string input module parms.  Accept either the - * number of the mode or its string name.  A bit complicated because - * some mode names are substrings of other names, and calls from sysfs - * may have whitespace in the name (trailing newlines, for example). - */ -int bond_parse_parm(const char *buf, const struct bond_parm_tbl *tbl) -{ -	int modeint = -1, i, rv; -	char *p, modestr[BOND_MAX_MODENAME_LEN + 1] = { 0, }; - -	for (p = (char *)buf; *p; p++) -		if (!(isdigit(*p) || isspace(*p))) -			break; - -	if (*p) -		rv = sscanf(buf, "%20s", modestr); -	else -		rv = sscanf(buf, "%d", &modeint); - -	if (!rv) -		return -1; - -	for (i = 0; tbl[i].modename; i++) { -		if (modeint == tbl[i].mode) -			return tbl[i].mode; -		if (strcmp(modestr, tbl[i].modename) == 0) -			return tbl[i].mode; -	} - -	return -1; -} -  static int bond_check_params(struct bond_params *params)  {  	int arp_validate_value, fail_over_mac_value, primary_reselect_value, i; +	struct bond_opt_value newval; +	const struct bond_opt_value *valptr;  	int arp_all_targets_value;  	/*  	 * Convert string parameters.  	 */  	if (mode) { -		bond_mode = bond_parse_parm(mode, bond_mode_tbl); -		if (bond_mode == -1) { -			pr_err("Error: Invalid bonding mode \"%s\"\n", -			       mode == NULL ? "NULL" : mode); +		bond_opt_initstr(&newval, mode); +		valptr = bond_opt_parse(bond_opt_get(BOND_OPT_MODE), &newval); +		if (!valptr) { +			pr_err("Error: Invalid bonding mode \"%s\"\n", mode);  			return -EINVAL;  		} +		bond_mode = valptr->value;  	}  	if (xmit_hash_policy) {  		if ((bond_mode != BOND_MODE_XOR) && -		    (bond_mode != BOND_MODE_8023AD)) { +		    (bond_mode != BOND_MODE_8023AD) && +		    (bond_mode != BOND_MODE_TLB)) {  			pr_info("xmit_hash_policy param is irrelevant in mode %s\n", -			       bond_mode_name(bond_mode)); +				bond_mode_name(bond_mode));  		} else { -			xmit_hashtype = bond_parse_parm(xmit_hash_policy, -							xmit_hashtype_tbl); -			if (xmit_hashtype == -1) { +			bond_opt_initstr(&newval, xmit_hash_policy); +			valptr = bond_opt_parse(bond_opt_get(BOND_OPT_XMIT_HASH), +						&newval); +			if (!valptr) {  				pr_err("Error: Invalid xmit_hash_policy \"%s\"\n", -				       xmit_hash_policy == NULL ? "NULL" :  				       xmit_hash_policy);  				return -EINVAL;  			} +			xmit_hashtype = valptr->value;  		}  	} @@ -4138,104 +4055,101 @@ static int bond_check_params(struct bond_params *params)  			pr_info("lacp_rate param is irrelevant in mode %s\n",  				bond_mode_name(bond_mode));  		} else { -			lacp_fast = bond_parse_parm(lacp_rate, bond_lacp_tbl); -			if (lacp_fast == -1) { +			bond_opt_initstr(&newval, lacp_rate); +			valptr = bond_opt_parse(bond_opt_get(BOND_OPT_LACP_RATE), +						&newval); +			if (!valptr) {  				pr_err("Error: Invalid lacp rate \"%s\"\n", -				       lacp_rate == NULL ? "NULL" : lacp_rate); +				       lacp_rate);  				return -EINVAL;  			} +			lacp_fast = valptr->value;  		}  	}  	if (ad_select) { -		params->ad_select = bond_parse_parm(ad_select, ad_select_tbl); -		if (params->ad_select == -1) { -			pr_err("Error: Invalid ad_select \"%s\"\n", -			       ad_select == NULL ? "NULL" : ad_select); +		bond_opt_initstr(&newval, ad_select); +		valptr = bond_opt_parse(bond_opt_get(BOND_OPT_AD_SELECT), +					&newval); +		if (!valptr) { +			pr_err("Error: Invalid ad_select \"%s\"\n", ad_select);  			return -EINVAL;  		} - -		if (bond_mode != BOND_MODE_8023AD) { -			pr_warning("ad_select param only affects 802.3ad mode\n"); -		} +		params->ad_select = valptr->value; +		if (bond_mode != BOND_MODE_8023AD) +			pr_warn("ad_select param only affects 802.3ad mode\n");  	} else {  		params->ad_select = BOND_AD_STABLE;  	}  	if (max_bonds < 0) { -		pr_warning("Warning: max_bonds (%d) not in range %d-%d, so it was reset to BOND_DEFAULT_MAX_BONDS (%d)\n", -			   max_bonds, 0, INT_MAX, BOND_DEFAULT_MAX_BONDS); +		pr_warn("Warning: max_bonds (%d) not in range %d-%d, so it was reset to BOND_DEFAULT_MAX_BONDS (%d)\n", +			max_bonds, 0, INT_MAX, BOND_DEFAULT_MAX_BONDS);  		max_bonds = BOND_DEFAULT_MAX_BONDS;  	}  	if (miimon < 0) { -		pr_warning("Warning: miimon module parameter (%d), not in range 0-%d, so it was reset to %d\n", -			   miimon, INT_MAX, BOND_LINK_MON_INTERV); -		miimon = BOND_LINK_MON_INTERV; +		pr_warn("Warning: miimon module parameter (%d), not in range 0-%d, so it was reset to 0\n", +			miimon, INT_MAX); +		miimon = 0;  	}  	if (updelay < 0) { -		pr_warning("Warning: updelay module parameter (%d), not in range 0-%d, so it was reset to 0\n", -			   updelay, INT_MAX); +		pr_warn("Warning: updelay module parameter (%d), not in range 0-%d, so it was reset to 0\n", +			updelay, INT_MAX);  		updelay = 0;  	}  	if (downdelay < 0) { -		pr_warning("Warning: downdelay module parameter (%d), not in range 0-%d, so it was reset to 0\n", -			   downdelay, INT_MAX); +		pr_warn("Warning: downdelay module parameter (%d), not in range 0-%d, so it was reset to 0\n", +			downdelay, INT_MAX);  		downdelay = 0;  	}  	if ((use_carrier != 0) && (use_carrier != 1)) { -		pr_warning("Warning: use_carrier module parameter (%d), not of valid value (0/1), so it was set to 1\n", -			   use_carrier); +		pr_warn("Warning: use_carrier module parameter (%d), not of valid value (0/1), so it was set to 1\n", +			use_carrier);  		use_carrier = 1;  	}  	if (num_peer_notif < 0 || num_peer_notif > 255) { -		pr_warning("Warning: num_grat_arp/num_unsol_na (%d) not in range 0-255 so it was reset to 1\n", -			   num_peer_notif); +		pr_warn("Warning: num_grat_arp/num_unsol_na (%d) not in range 0-255 so it was reset to 1\n", +			num_peer_notif);  		num_peer_notif = 1;  	} -	/* reset values for 802.3ad */ -	if (bond_mode == BOND_MODE_8023AD) { +	/* reset values for 802.3ad/TLB/ALB */ +	if (!bond_mode_uses_arp(bond_mode)) {  		if (!miimon) { -			pr_warning("Warning: miimon must be specified, otherwise bonding will not detect link failure, speed and duplex which are essential for 802.3ad operation\n"); -			pr_warning("Forcing miimon to 100msec\n"); -			miimon = 100; +			pr_warn("Warning: miimon must be specified, otherwise bonding will not detect link failure, speed and duplex which are essential for 802.3ad operation\n"); +			pr_warn("Forcing miimon to 100msec\n"); +			miimon = BOND_DEFAULT_MIIMON;  		}  	}  	if (tx_queues < 1 || tx_queues > 255) { -		pr_warning("Warning: tx_queues (%d) should be between " -			   "1 and 255, resetting to %d\n", -			   tx_queues, BOND_DEFAULT_TX_QUEUES); +		pr_warn("Warning: tx_queues (%d) should be between 1 and 255, resetting to %d\n", +			tx_queues, BOND_DEFAULT_TX_QUEUES);  		tx_queues = BOND_DEFAULT_TX_QUEUES;  	}  	if ((all_slaves_active != 0) && (all_slaves_active != 1)) { -		pr_warning("Warning: all_slaves_active module parameter (%d), " -			   "not of valid value (0/1), so it was set to " -			   "0\n", all_slaves_active); +		pr_warn("Warning: all_slaves_active module parameter (%d), not of valid value (0/1), so it was set to 0\n", +			all_slaves_active);  		all_slaves_active = 0;  	}  	if (resend_igmp < 0 || resend_igmp > 255) { -		pr_warning("Warning: resend_igmp (%d) should be between " -			   "0 and 255, resetting to %d\n", -			   resend_igmp, BOND_DEFAULT_RESEND_IGMP); +		pr_warn("Warning: resend_igmp (%d) should be between 0 and 255, resetting to %d\n", +			resend_igmp, BOND_DEFAULT_RESEND_IGMP);  		resend_igmp = BOND_DEFAULT_RESEND_IGMP;  	} -	/* reset values for TLB/ALB */ -	if ((bond_mode == BOND_MODE_TLB) || -	    (bond_mode == BOND_MODE_ALB)) { -		if (!miimon) { -			pr_warning("Warning: miimon must be specified, otherwise bonding will not detect link failure and link speed which are essential for TLB/ALB load balancing\n"); -			pr_warning("Forcing miimon to 100msec\n"); -			miimon = 100; -		} +	bond_opt_initval(&newval, packets_per_slave); +	if (!bond_opt_parse(bond_opt_get(BOND_OPT_PACKETS_PER_SLAVE), &newval)) { +		pr_warn("Warning: packets_per_slave (%d) should be between 0 and %u resetting to 1\n", +			packets_per_slave, USHRT_MAX); +		packets_per_slave = 1;  	}  	if (bond_mode == BOND_MODE_ALB) { @@ -4248,155 +4162,163 @@ static int bond_check_params(struct bond_params *params)  			/* just warn the user the up/down delay will have  			 * no effect since miimon is zero...  			 */ -			pr_warning("Warning: miimon module parameter not set and updelay (%d) or downdelay (%d) module parameter is set; updelay and downdelay have no effect unless miimon is set\n", -				   updelay, downdelay); +			pr_warn("Warning: miimon module parameter not set and updelay (%d) or downdelay (%d) module parameter is set; updelay and downdelay have no effect unless miimon is set\n", +				updelay, downdelay);  		}  	} else {  		/* don't allow arp monitoring */  		if (arp_interval) { -			pr_warning("Warning: miimon (%d) and arp_interval (%d) can't be used simultaneously, disabling ARP monitoring\n", -				   miimon, arp_interval); +			pr_warn("Warning: miimon (%d) and arp_interval (%d) can't be used simultaneously, disabling ARP monitoring\n", +				miimon, arp_interval);  			arp_interval = 0;  		}  		if ((updelay % miimon) != 0) { -			pr_warning("Warning: updelay (%d) is not a multiple of miimon (%d), updelay rounded to %d ms\n", -				   updelay, miimon, -				   (updelay / miimon) * miimon); +			pr_warn("Warning: updelay (%d) is not a multiple of miimon (%d), updelay rounded to %d ms\n", +				updelay, miimon, (updelay / miimon) * miimon);  		}  		updelay /= miimon;  		if ((downdelay % miimon) != 0) { -			pr_warning("Warning: downdelay (%d) is not a multiple of miimon (%d), downdelay rounded to %d ms\n", -				   downdelay, miimon, -				   (downdelay / miimon) * miimon); +			pr_warn("Warning: downdelay (%d) is not a multiple of miimon (%d), downdelay rounded to %d ms\n", +				downdelay, miimon, +				(downdelay / miimon) * miimon);  		}  		downdelay /= miimon;  	}  	if (arp_interval < 0) { -		pr_warning("Warning: arp_interval module parameter (%d) , not in range 0-%d, so it was reset to %d\n", -			   arp_interval, INT_MAX, BOND_LINK_ARP_INTERV); -		arp_interval = BOND_LINK_ARP_INTERV; +		pr_warn("Warning: arp_interval module parameter (%d), not in range 0-%d, so it was reset to 0\n", +			arp_interval, INT_MAX); +		arp_interval = 0;  	}  	for (arp_ip_count = 0, i = 0;  	     (arp_ip_count < BOND_MAX_ARP_TARGETS) && arp_ip_target[i]; i++) {  		/* not complete check, but should be good enough to  		   catch mistakes */ -		__be32 ip = in_aton(arp_ip_target[i]); -		if (!isdigit(arp_ip_target[i][0]) || ip == 0 || -		    ip == htonl(INADDR_BROADCAST)) { -			pr_warning("Warning: bad arp_ip_target module parameter (%s), ARP monitoring will not be performed\n", -				   arp_ip_target[i]); +		__be32 ip; +		if (!in4_pton(arp_ip_target[i], -1, (u8 *)&ip, -1, NULL) || +		    !bond_is_ip_target_ok(ip)) { +			pr_warn("Warning: bad arp_ip_target module parameter (%s), ARP monitoring will not be performed\n", +				arp_ip_target[i]);  			arp_interval = 0;  		} else {  			if (bond_get_targets_ip(arp_target, ip) == -1)  				arp_target[arp_ip_count++] = ip;  			else -				pr_warning("Warning: duplicate address %pI4 in arp_ip_target, skipping\n", -					   &ip); +				pr_warn("Warning: duplicate address %pI4 in arp_ip_target, skipping\n", +					&ip);  		}  	}  	if (arp_interval && !arp_ip_count) {  		/* don't allow arping if no arp_ip_target given... */ -		pr_warning("Warning: arp_interval module parameter (%d) specified without providing an arp_ip_target parameter, arp_interval was reset to 0\n", -			   arp_interval); +		pr_warn("Warning: arp_interval module parameter (%d) specified without providing an arp_ip_target parameter, arp_interval was reset to 0\n", +			arp_interval);  		arp_interval = 0;  	}  	if (arp_validate) { -		if (bond_mode != BOND_MODE_ACTIVEBACKUP) { -			pr_err("arp_validate only supported in active-backup mode\n"); -			return -EINVAL; -		}  		if (!arp_interval) {  			pr_err("arp_validate requires arp_interval\n");  			return -EINVAL;  		} -		arp_validate_value = bond_parse_parm(arp_validate, -						     arp_validate_tbl); -		if (arp_validate_value == -1) { +		bond_opt_initstr(&newval, arp_validate); +		valptr = bond_opt_parse(bond_opt_get(BOND_OPT_ARP_VALIDATE), +					&newval); +		if (!valptr) {  			pr_err("Error: invalid arp_validate \"%s\"\n", -			       arp_validate == NULL ? "NULL" : arp_validate); +			       arp_validate);  			return -EINVAL;  		} -	} else +		arp_validate_value = valptr->value; +	} else {  		arp_validate_value = 0; +	}  	arp_all_targets_value = 0;  	if (arp_all_targets) { -		arp_all_targets_value = bond_parse_parm(arp_all_targets, -							arp_all_targets_tbl); - -		if (arp_all_targets_value == -1) { +		bond_opt_initstr(&newval, arp_all_targets); +		valptr = bond_opt_parse(bond_opt_get(BOND_OPT_ARP_ALL_TARGETS), +					&newval); +		if (!valptr) {  			pr_err("Error: invalid arp_all_targets_value \"%s\"\n",  			       arp_all_targets);  			arp_all_targets_value = 0; +		} else { +			arp_all_targets_value = valptr->value;  		}  	}  	if (miimon) {  		pr_info("MII link monitoring set to %d ms\n", miimon);  	} else if (arp_interval) { +		valptr = bond_opt_get_val(BOND_OPT_ARP_VALIDATE, +					  arp_validate_value);  		pr_info("ARP monitoring set to %d ms, validate %s, with %d target(s):", -			arp_interval, -			arp_validate_tbl[arp_validate_value].modename, -			arp_ip_count); +			arp_interval, valptr->string, arp_ip_count);  		for (i = 0; i < arp_ip_count; i++) -			pr_info(" %s", arp_ip_target[i]); +			pr_cont(" %s", arp_ip_target[i]); -		pr_info("\n"); +		pr_cont("\n");  	} else if (max_bonds) {  		/* miimon and arp_interval not set, we need one so things  		 * work as expected, see bonding.txt for details  		 */ -		pr_debug("Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details.\n"); +		pr_debug("Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details\n");  	} -	if (primary && !USES_PRIMARY(bond_mode)) { +	if (primary && !bond_mode_uses_primary(bond_mode)) {  		/* currently, using a primary only makes sense  		 * in active backup, TLB or ALB modes  		 */ -		pr_warning("Warning: %s primary device specified but has no effect in %s mode\n", -			   primary, bond_mode_name(bond_mode)); +		pr_warn("Warning: %s primary device specified but has no effect in %s mode\n", +			primary, bond_mode_name(bond_mode));  		primary = NULL;  	}  	if (primary && primary_reselect) { -		primary_reselect_value = bond_parse_parm(primary_reselect, -							 pri_reselect_tbl); -		if (primary_reselect_value == -1) { +		bond_opt_initstr(&newval, primary_reselect); +		valptr = bond_opt_parse(bond_opt_get(BOND_OPT_PRIMARY_RESELECT), +					&newval); +		if (!valptr) {  			pr_err("Error: Invalid primary_reselect \"%s\"\n", -			       primary_reselect == -					NULL ? "NULL" : primary_reselect); +			       primary_reselect);  			return -EINVAL;  		} +		primary_reselect_value = valptr->value;  	} else {  		primary_reselect_value = BOND_PRI_RESELECT_ALWAYS;  	}  	if (fail_over_mac) { -		fail_over_mac_value = bond_parse_parm(fail_over_mac, -						      fail_over_mac_tbl); -		if (fail_over_mac_value == -1) { +		bond_opt_initstr(&newval, fail_over_mac); +		valptr = bond_opt_parse(bond_opt_get(BOND_OPT_FAIL_OVER_MAC), +					&newval); +		if (!valptr) {  			pr_err("Error: invalid fail_over_mac \"%s\"\n", -			       arp_validate == NULL ? "NULL" : arp_validate); +			       fail_over_mac);  			return -EINVAL;  		} - +		fail_over_mac_value = valptr->value;  		if (bond_mode != BOND_MODE_ACTIVEBACKUP) -			pr_warning("Warning: fail_over_mac only affects active-backup mode.\n"); +			pr_warn("Warning: fail_over_mac only affects active-backup mode\n");  	} else {  		fail_over_mac_value = BOND_FOM_NONE;  	} +	if (lp_interval == 0) { +		pr_warn("Warning: ip_interval must be between 1 and %d, so it was reset to %d\n", +			INT_MAX, BOND_ALB_DEFAULT_LP_INTERVAL); +		lp_interval = BOND_ALB_DEFAULT_LP_INTERVAL; +	} +  	/* fill params struct with the proper values */  	params->mode = bond_mode;  	params->xmit_policy = xmit_hashtype; @@ -4416,6 +4338,19 @@ static int bond_check_params(struct bond_params *params)  	params->all_slaves_active = all_slaves_active;  	params->resend_igmp = resend_igmp;  	params->min_links = min_links; +	params->lp_interval = lp_interval; +	params->packets_per_slave = packets_per_slave; +	params->tlb_dynamic_lb = 1; /* Default value */ +	if (packets_per_slave > 0) { +		params->reciprocal_packets_per_slave = +			reciprocal_value(packets_per_slave); +	} else { +		/* reciprocal_packets_per_slave is unused if +		 * packets_per_slave is 0 or 1, just initialize it +		 */ +		params->reciprocal_packets_per_slave = +			(struct reciprocal_value) { 0 }; +	}  	if (primary) {  		strncpy(params->primary, primary, IFNAMSIZ); @@ -4487,32 +4422,11 @@ static int bond_init(struct net_device *bond_dev)  	return 0;  } -static int bond_validate(struct nlattr *tb[], struct nlattr *data[]) -{ -	if (tb[IFLA_ADDRESS]) { -		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) -			return -EINVAL; -		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) -			return -EADDRNOTAVAIL; -	} -	return 0; -} - -static unsigned int bond_get_num_tx_queues(void) +unsigned int bond_get_num_tx_queues(void)  {  	return tx_queues;  } -static struct rtnl_link_ops bond_link_ops __read_mostly = { -	.kind			= "bond", -	.priv_size		= sizeof(struct bonding), -	.setup			= bond_setup, -	.validate		= bond_validate, -	.get_num_tx_queues	= bond_get_num_tx_queues, -	.get_num_rx_queues	= bond_get_num_tx_queues, /* Use the same number -							     as for TX queues */ -}; -  /* Create a new bond based on the specified name and bonding parameters.   * If name is NULL, obtain a suitable "bond%d" name for us.   * Caller must NOT hold rtnl_lock; we need to release it here before we @@ -4599,7 +4513,7 @@ static int __init bonding_init(void)  	if (res)  		goto out; -	res = rtnl_link_register(&bond_link_ops); +	res = bond_netlink_init();  	if (res)  		goto err_link; @@ -4615,7 +4529,8 @@ static int __init bonding_init(void)  out:  	return res;  err: -	rtnl_link_unregister(&bond_link_ops); +	bond_destroy_debugfs(); +	bond_netlink_fini();  err_link:  	unregister_pernet_subsys(&bond_net_ops);  	goto out; @@ -4628,7 +4543,7 @@ static void __exit bonding_exit(void)  	bond_destroy_debugfs(); -	rtnl_link_unregister(&bond_link_ops); +	bond_netlink_fini();  	unregister_pernet_subsys(&bond_net_ops);  #ifdef CONFIG_NET_POLL_CONTROLLER @@ -4645,4 +4560,3 @@ MODULE_LICENSE("GPL");  MODULE_VERSION(DRV_VERSION);  MODULE_DESCRIPTION(DRV_DESCRIPTION ", v" DRV_VERSION);  MODULE_AUTHOR("Thomas Davis, tadavis@lbl.gov and many others"); -MODULE_ALIAS_RTNL_LINK("bond"); diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c new file mode 100644 index 00000000000..5ab3c1847e6 --- /dev/null +++ b/drivers/net/bonding/bond_netlink.c @@ -0,0 +1,573 @@ +/* + * drivers/net/bond/bond_netlink.c - Netlink interface for bonding + * Copyright (c) 2013 Jiri Pirko <jiri@resnulli.us> + * Copyright (c) 2013 Scott Feldman <sfeldma@cumulusnetworks.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/if_link.h> +#include <linux/if_ether.h> +#include <net/netlink.h> +#include <net/rtnetlink.h> +#include "bonding.h" + +static size_t bond_get_slave_size(const struct net_device *bond_dev, +				  const struct net_device *slave_dev) +{ +	return nla_total_size(sizeof(u8)) +	/* IFLA_BOND_SLAVE_STATE */ +		nla_total_size(sizeof(u8)) +	/* IFLA_BOND_SLAVE_MII_STATUS */ +		nla_total_size(sizeof(u32)) +	/* IFLA_BOND_SLAVE_LINK_FAILURE_COUNT */ +		nla_total_size(MAX_ADDR_LEN) +	/* IFLA_BOND_SLAVE_PERM_HWADDR */ +		nla_total_size(sizeof(u16)) +	/* IFLA_BOND_SLAVE_QUEUE_ID */ +		nla_total_size(sizeof(u16)) +	/* IFLA_BOND_SLAVE_AD_AGGREGATOR_ID */ +		0; +} + +static int bond_fill_slave_info(struct sk_buff *skb, +				const struct net_device *bond_dev, +				const struct net_device *slave_dev) +{ +	struct slave *slave = bond_slave_get_rtnl(slave_dev); + +	if (nla_put_u8(skb, IFLA_BOND_SLAVE_STATE, bond_slave_state(slave))) +		goto nla_put_failure; + +	if (nla_put_u8(skb, IFLA_BOND_SLAVE_MII_STATUS, slave->link)) +		goto nla_put_failure; + +	if (nla_put_u32(skb, IFLA_BOND_SLAVE_LINK_FAILURE_COUNT, +			slave->link_failure_count)) +		goto nla_put_failure; + +	if (nla_put(skb, IFLA_BOND_SLAVE_PERM_HWADDR, +		    slave_dev->addr_len, slave->perm_hwaddr)) +		goto nla_put_failure; + +	if (nla_put_u16(skb, IFLA_BOND_SLAVE_QUEUE_ID, slave->queue_id)) +		goto nla_put_failure; + +	if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) { +		const struct aggregator *agg; + +		agg = SLAVE_AD_INFO(slave)->port.aggregator; +		if (agg) +			if (nla_put_u16(skb, IFLA_BOND_SLAVE_AD_AGGREGATOR_ID, +					agg->aggregator_identifier)) +				goto nla_put_failure; +	} + +	return 0; + +nla_put_failure: +	return -EMSGSIZE; +} + +static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = { +	[IFLA_BOND_MODE]		= { .type = NLA_U8 }, +	[IFLA_BOND_ACTIVE_SLAVE]	= { .type = NLA_U32 }, +	[IFLA_BOND_MIIMON]		= { .type = NLA_U32 }, +	[IFLA_BOND_UPDELAY]		= { .type = NLA_U32 }, +	[IFLA_BOND_DOWNDELAY]		= { .type = NLA_U32 }, +	[IFLA_BOND_USE_CARRIER]		= { .type = NLA_U8 }, +	[IFLA_BOND_ARP_INTERVAL]	= { .type = NLA_U32 }, +	[IFLA_BOND_ARP_IP_TARGET]	= { .type = NLA_NESTED }, +	[IFLA_BOND_ARP_VALIDATE]	= { .type = NLA_U32 }, +	[IFLA_BOND_ARP_ALL_TARGETS]	= { .type = NLA_U32 }, +	[IFLA_BOND_PRIMARY]		= { .type = NLA_U32 }, +	[IFLA_BOND_PRIMARY_RESELECT]	= { .type = NLA_U8 }, +	[IFLA_BOND_FAIL_OVER_MAC]	= { .type = NLA_U8 }, +	[IFLA_BOND_XMIT_HASH_POLICY]	= { .type = NLA_U8 }, +	[IFLA_BOND_RESEND_IGMP]		= { .type = NLA_U32 }, +	[IFLA_BOND_NUM_PEER_NOTIF]	= { .type = NLA_U8 }, +	[IFLA_BOND_ALL_SLAVES_ACTIVE]	= { .type = NLA_U8 }, +	[IFLA_BOND_MIN_LINKS]		= { .type = NLA_U32 }, +	[IFLA_BOND_LP_INTERVAL]		= { .type = NLA_U32 }, +	[IFLA_BOND_PACKETS_PER_SLAVE]	= { .type = NLA_U32 }, +	[IFLA_BOND_AD_LACP_RATE]	= { .type = NLA_U8 }, +	[IFLA_BOND_AD_SELECT]		= { .type = NLA_U8 }, +	[IFLA_BOND_AD_INFO]		= { .type = NLA_NESTED }, +}; + +static int bond_validate(struct nlattr *tb[], struct nlattr *data[]) +{ +	if (tb[IFLA_ADDRESS]) { +		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) +			return -EINVAL; +		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) +			return -EADDRNOTAVAIL; +	} +	return 0; +} + +static int bond_changelink(struct net_device *bond_dev, +			   struct nlattr *tb[], struct nlattr *data[]) +{ +	struct bonding *bond = netdev_priv(bond_dev); +	struct bond_opt_value newval; +	int miimon = 0; +	int err; + +	if (!data) +		return 0; + +	if (data[IFLA_BOND_MODE]) { +		int mode = nla_get_u8(data[IFLA_BOND_MODE]); + +		bond_opt_initval(&newval, mode); +		err = __bond_opt_set(bond, BOND_OPT_MODE, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_ACTIVE_SLAVE]) { +		int ifindex = nla_get_u32(data[IFLA_BOND_ACTIVE_SLAVE]); +		struct net_device *slave_dev; +		char *active_slave = ""; + +		if (ifindex != 0) { +			slave_dev = __dev_get_by_index(dev_net(bond_dev), +						       ifindex); +			if (!slave_dev) +				return -ENODEV; +			active_slave = slave_dev->name; +		} +		bond_opt_initstr(&newval, active_slave); +		err = __bond_opt_set(bond, BOND_OPT_ACTIVE_SLAVE, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_MIIMON]) { +		miimon = nla_get_u32(data[IFLA_BOND_MIIMON]); + +		bond_opt_initval(&newval, miimon); +		err = __bond_opt_set(bond, BOND_OPT_MIIMON, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_UPDELAY]) { +		int updelay = nla_get_u32(data[IFLA_BOND_UPDELAY]); + +		bond_opt_initval(&newval, updelay); +		err = __bond_opt_set(bond, BOND_OPT_UPDELAY, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_DOWNDELAY]) { +		int downdelay = nla_get_u32(data[IFLA_BOND_DOWNDELAY]); + +		bond_opt_initval(&newval, downdelay); +		err = __bond_opt_set(bond, BOND_OPT_DOWNDELAY, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_USE_CARRIER]) { +		int use_carrier = nla_get_u8(data[IFLA_BOND_USE_CARRIER]); + +		bond_opt_initval(&newval, use_carrier); +		err = __bond_opt_set(bond, BOND_OPT_USE_CARRIER, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_ARP_INTERVAL]) { +		int arp_interval = nla_get_u32(data[IFLA_BOND_ARP_INTERVAL]); + +		if (arp_interval && miimon) { +			pr_err("%s: ARP monitoring cannot be used with MII monitoring\n", +			       bond->dev->name); +			return -EINVAL; +		} + +		bond_opt_initval(&newval, arp_interval); +		err = __bond_opt_set(bond, BOND_OPT_ARP_INTERVAL, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_ARP_IP_TARGET]) { +		struct nlattr *attr; +		int i = 0, rem; + +		bond_option_arp_ip_targets_clear(bond); +		nla_for_each_nested(attr, data[IFLA_BOND_ARP_IP_TARGET], rem) { +			__be32 target = nla_get_be32(attr); + +			bond_opt_initval(&newval, (__force u64)target); +			err = __bond_opt_set(bond, BOND_OPT_ARP_TARGETS, +					     &newval); +			if (err) +				break; +			i++; +		} +		if (i == 0 && bond->params.arp_interval) +			pr_warn("%s: Removing last arp target with arp_interval on\n", +				bond->dev->name); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_ARP_VALIDATE]) { +		int arp_validate = nla_get_u32(data[IFLA_BOND_ARP_VALIDATE]); + +		if (arp_validate && miimon) { +			pr_err("%s: ARP validating cannot be used with MII monitoring\n", +			       bond->dev->name); +			return -EINVAL; +		} + +		bond_opt_initval(&newval, arp_validate); +		err = __bond_opt_set(bond, BOND_OPT_ARP_VALIDATE, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_ARP_ALL_TARGETS]) { +		int arp_all_targets = +			nla_get_u32(data[IFLA_BOND_ARP_ALL_TARGETS]); + +		bond_opt_initval(&newval, arp_all_targets); +		err = __bond_opt_set(bond, BOND_OPT_ARP_ALL_TARGETS, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_PRIMARY]) { +		int ifindex = nla_get_u32(data[IFLA_BOND_PRIMARY]); +		struct net_device *dev; +		char *primary = ""; + +		dev = __dev_get_by_index(dev_net(bond_dev), ifindex); +		if (dev) +			primary = dev->name; + +		bond_opt_initstr(&newval, primary); +		err = __bond_opt_set(bond, BOND_OPT_PRIMARY, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_PRIMARY_RESELECT]) { +		int primary_reselect = +			nla_get_u8(data[IFLA_BOND_PRIMARY_RESELECT]); + +		bond_opt_initval(&newval, primary_reselect); +		err = __bond_opt_set(bond, BOND_OPT_PRIMARY_RESELECT, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_FAIL_OVER_MAC]) { +		int fail_over_mac = +			nla_get_u8(data[IFLA_BOND_FAIL_OVER_MAC]); + +		bond_opt_initval(&newval, fail_over_mac); +		err = __bond_opt_set(bond, BOND_OPT_FAIL_OVER_MAC, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_XMIT_HASH_POLICY]) { +		int xmit_hash_policy = +			nla_get_u8(data[IFLA_BOND_XMIT_HASH_POLICY]); + +		bond_opt_initval(&newval, xmit_hash_policy); +		err = __bond_opt_set(bond, BOND_OPT_XMIT_HASH, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_RESEND_IGMP]) { +		int resend_igmp = +			nla_get_u32(data[IFLA_BOND_RESEND_IGMP]); + +		bond_opt_initval(&newval, resend_igmp); +		err = __bond_opt_set(bond, BOND_OPT_RESEND_IGMP, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_NUM_PEER_NOTIF]) { +		int num_peer_notif = +			nla_get_u8(data[IFLA_BOND_NUM_PEER_NOTIF]); + +		bond_opt_initval(&newval, num_peer_notif); +		err = __bond_opt_set(bond, BOND_OPT_NUM_PEER_NOTIF, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_ALL_SLAVES_ACTIVE]) { +		int all_slaves_active = +			nla_get_u8(data[IFLA_BOND_ALL_SLAVES_ACTIVE]); + +		bond_opt_initval(&newval, all_slaves_active); +		err = __bond_opt_set(bond, BOND_OPT_ALL_SLAVES_ACTIVE, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_MIN_LINKS]) { +		int min_links = +			nla_get_u32(data[IFLA_BOND_MIN_LINKS]); + +		bond_opt_initval(&newval, min_links); +		err = __bond_opt_set(bond, BOND_OPT_MINLINKS, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_LP_INTERVAL]) { +		int lp_interval = +			nla_get_u32(data[IFLA_BOND_LP_INTERVAL]); + +		bond_opt_initval(&newval, lp_interval); +		err = __bond_opt_set(bond, BOND_OPT_LP_INTERVAL, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_PACKETS_PER_SLAVE]) { +		int packets_per_slave = +			nla_get_u32(data[IFLA_BOND_PACKETS_PER_SLAVE]); + +		bond_opt_initval(&newval, packets_per_slave); +		err = __bond_opt_set(bond, BOND_OPT_PACKETS_PER_SLAVE, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_AD_LACP_RATE]) { +		int lacp_rate = +			nla_get_u8(data[IFLA_BOND_AD_LACP_RATE]); + +		bond_opt_initval(&newval, lacp_rate); +		err = __bond_opt_set(bond, BOND_OPT_LACP_RATE, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_AD_SELECT]) { +		int ad_select = +			nla_get_u8(data[IFLA_BOND_AD_SELECT]); + +		bond_opt_initval(&newval, ad_select); +		err = __bond_opt_set(bond, BOND_OPT_AD_SELECT, &newval); +		if (err) +			return err; +	} +	return 0; +} + +static int bond_newlink(struct net *src_net, struct net_device *bond_dev, +			struct nlattr *tb[], struct nlattr *data[]) +{ +	int err; + +	err = bond_changelink(bond_dev, tb, data); +	if (err < 0) +		return err; + +	return register_netdevice(bond_dev); +} + +static size_t bond_get_size(const struct net_device *bond_dev) +{ +	return nla_total_size(sizeof(u8)) +	/* IFLA_BOND_MODE */ +		nla_total_size(sizeof(u32)) +	/* IFLA_BOND_ACTIVE_SLAVE */ +		nla_total_size(sizeof(u32)) +	/* IFLA_BOND_MIIMON */ +		nla_total_size(sizeof(u32)) +	/* IFLA_BOND_UPDELAY */ +		nla_total_size(sizeof(u32)) +	/* IFLA_BOND_DOWNDELAY */ +		nla_total_size(sizeof(u8)) +	/* IFLA_BOND_USE_CARRIER */ +		nla_total_size(sizeof(u32)) +	/* IFLA_BOND_ARP_INTERVAL */ +						/* IFLA_BOND_ARP_IP_TARGET */ +		nla_total_size(sizeof(struct nlattr)) + +		nla_total_size(sizeof(u32)) * BOND_MAX_ARP_TARGETS + +		nla_total_size(sizeof(u32)) +	/* IFLA_BOND_ARP_VALIDATE */ +		nla_total_size(sizeof(u32)) +	/* IFLA_BOND_ARP_ALL_TARGETS */ +		nla_total_size(sizeof(u32)) +	/* IFLA_BOND_PRIMARY */ +		nla_total_size(sizeof(u8)) +	/* IFLA_BOND_PRIMARY_RESELECT */ +		nla_total_size(sizeof(u8)) +	/* IFLA_BOND_FAIL_OVER_MAC */ +		nla_total_size(sizeof(u8)) +	/* IFLA_BOND_XMIT_HASH_POLICY */ +		nla_total_size(sizeof(u32)) +	/* IFLA_BOND_RESEND_IGMP */ +		nla_total_size(sizeof(u8)) +	/* IFLA_BOND_NUM_PEER_NOTIF */ +		nla_total_size(sizeof(u8)) +   /* IFLA_BOND_ALL_SLAVES_ACTIVE */ +		nla_total_size(sizeof(u32)) +	/* IFLA_BOND_MIN_LINKS */ +		nla_total_size(sizeof(u32)) +	/* IFLA_BOND_LP_INTERVAL */ +		nla_total_size(sizeof(u32)) +  /* IFLA_BOND_PACKETS_PER_SLAVE */ +		nla_total_size(sizeof(u8)) +	/* IFLA_BOND_AD_LACP_RATE */ +		nla_total_size(sizeof(u8)) +	/* IFLA_BOND_AD_SELECT */ +		nla_total_size(sizeof(struct nlattr)) + /* IFLA_BOND_AD_INFO */ +		nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_AGGREGATOR */ +		nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_NUM_PORTS */ +		nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_ACTOR_KEY */ +		nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_PARTNER_KEY*/ +		nla_total_size(ETH_ALEN) +    /* IFLA_BOND_AD_INFO_PARTNER_MAC*/ +		0; +} + +static int bond_fill_info(struct sk_buff *skb, +			  const struct net_device *bond_dev) +{ +	struct bonding *bond = netdev_priv(bond_dev); +	struct net_device *slave_dev = bond_option_active_slave_get(bond); +	struct nlattr *targets; +	unsigned int packets_per_slave; +	int i, targets_added; + +	if (nla_put_u8(skb, IFLA_BOND_MODE, BOND_MODE(bond))) +		goto nla_put_failure; + +	if (slave_dev && +	    nla_put_u32(skb, IFLA_BOND_ACTIVE_SLAVE, slave_dev->ifindex)) +		goto nla_put_failure; + +	if (nla_put_u32(skb, IFLA_BOND_MIIMON, bond->params.miimon)) +		goto nla_put_failure; + +	if (nla_put_u32(skb, IFLA_BOND_UPDELAY, +			bond->params.updelay * bond->params.miimon)) +		goto nla_put_failure; + +	if (nla_put_u32(skb, IFLA_BOND_DOWNDELAY, +			bond->params.downdelay * bond->params.miimon)) +		goto nla_put_failure; + +	if (nla_put_u8(skb, IFLA_BOND_USE_CARRIER, bond->params.use_carrier)) +		goto nla_put_failure; + +	if (nla_put_u32(skb, IFLA_BOND_ARP_INTERVAL, bond->params.arp_interval)) +		goto nla_put_failure; + +	targets = nla_nest_start(skb, IFLA_BOND_ARP_IP_TARGET); +	if (!targets) +		goto nla_put_failure; + +	targets_added = 0; +	for (i = 0; i < BOND_MAX_ARP_TARGETS; i++) { +		if (bond->params.arp_targets[i]) { +			nla_put_be32(skb, i, bond->params.arp_targets[i]); +			targets_added = 1; +		} +	} + +	if (targets_added) +		nla_nest_end(skb, targets); +	else +		nla_nest_cancel(skb, targets); + +	if (nla_put_u32(skb, IFLA_BOND_ARP_VALIDATE, bond->params.arp_validate)) +		goto nla_put_failure; + +	if (nla_put_u32(skb, IFLA_BOND_ARP_ALL_TARGETS, +			bond->params.arp_all_targets)) +		goto nla_put_failure; + +	if (bond->primary_slave && +	    nla_put_u32(skb, IFLA_BOND_PRIMARY, +			bond->primary_slave->dev->ifindex)) +		goto nla_put_failure; + +	if (nla_put_u8(skb, IFLA_BOND_PRIMARY_RESELECT, +		       bond->params.primary_reselect)) +		goto nla_put_failure; + +	if (nla_put_u8(skb, IFLA_BOND_FAIL_OVER_MAC, +		       bond->params.fail_over_mac)) +		goto nla_put_failure; + +	if (nla_put_u8(skb, IFLA_BOND_XMIT_HASH_POLICY, +		       bond->params.xmit_policy)) +		goto nla_put_failure; + +	if (nla_put_u32(skb, IFLA_BOND_RESEND_IGMP, +		        bond->params.resend_igmp)) +		goto nla_put_failure; + +	if (nla_put_u8(skb, IFLA_BOND_NUM_PEER_NOTIF, +		       bond->params.num_peer_notif)) +		goto nla_put_failure; + +	if (nla_put_u8(skb, IFLA_BOND_ALL_SLAVES_ACTIVE, +		       bond->params.all_slaves_active)) +		goto nla_put_failure; + +	if (nla_put_u32(skb, IFLA_BOND_MIN_LINKS, +			bond->params.min_links)) +		goto nla_put_failure; + +	if (nla_put_u32(skb, IFLA_BOND_LP_INTERVAL, +			bond->params.lp_interval)) +		goto nla_put_failure; + +	packets_per_slave = bond->params.packets_per_slave; +	if (nla_put_u32(skb, IFLA_BOND_PACKETS_PER_SLAVE, +			packets_per_slave)) +		goto nla_put_failure; + +	if (nla_put_u8(skb, IFLA_BOND_AD_LACP_RATE, +		       bond->params.lacp_fast)) +		goto nla_put_failure; + +	if (nla_put_u8(skb, IFLA_BOND_AD_SELECT, +		       bond->params.ad_select)) +		goto nla_put_failure; + +	if (BOND_MODE(bond) == BOND_MODE_8023AD) { +		struct ad_info info; + +		if (!bond_3ad_get_active_agg_info(bond, &info)) { +			struct nlattr *nest; + +			nest = nla_nest_start(skb, IFLA_BOND_AD_INFO); +			if (!nest) +				goto nla_put_failure; + +			if (nla_put_u16(skb, IFLA_BOND_AD_INFO_AGGREGATOR, +					info.aggregator_id)) +				goto nla_put_failure; +			if (nla_put_u16(skb, IFLA_BOND_AD_INFO_NUM_PORTS, +					info.ports)) +				goto nla_put_failure; +			if (nla_put_u16(skb, IFLA_BOND_AD_INFO_ACTOR_KEY, +					info.actor_key)) +				goto nla_put_failure; +			if (nla_put_u16(skb, IFLA_BOND_AD_INFO_PARTNER_KEY, +					info.partner_key)) +				goto nla_put_failure; +			if (nla_put(skb, IFLA_BOND_AD_INFO_PARTNER_MAC, +				    sizeof(info.partner_system), +				    &info.partner_system)) +				goto nla_put_failure; + +			nla_nest_end(skb, nest); +		} +	} + +	return 0; + +nla_put_failure: +	return -EMSGSIZE; +} + +struct rtnl_link_ops bond_link_ops __read_mostly = { +	.kind			= "bond", +	.priv_size		= sizeof(struct bonding), +	.setup			= bond_setup, +	.maxtype		= IFLA_BOND_MAX, +	.policy			= bond_policy, +	.validate		= bond_validate, +	.newlink		= bond_newlink, +	.changelink		= bond_changelink, +	.get_size		= bond_get_size, +	.fill_info		= bond_fill_info, +	.get_num_tx_queues	= bond_get_num_tx_queues, +	.get_num_rx_queues	= bond_get_num_tx_queues, /* Use the same number +							     as for TX queues */ +	.get_slave_size		= bond_get_slave_size, +	.fill_slave_info	= bond_fill_slave_info, +}; + +int __init bond_netlink_init(void) +{ +	return rtnl_link_register(&bond_link_ops); +} + +void bond_netlink_fini(void) +{ +	rtnl_link_unregister(&bond_link_ops); +} + +MODULE_ALIAS_RTNL_LINK("bond"); diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c new file mode 100644 index 00000000000..540e0167bf2 --- /dev/null +++ b/drivers/net/bonding/bond_options.c @@ -0,0 +1,1394 @@ +/* + * drivers/net/bond/bond_options.c - bonding options + * Copyright (c) 2013 Jiri Pirko <jiri@resnulli.us> + * Copyright (c) 2013 Scott Feldman <sfeldma@cumulusnetworks.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/errno.h> +#include <linux/if.h> +#include <linux/netdevice.h> +#include <linux/spinlock.h> +#include <linux/rcupdate.h> +#include <linux/ctype.h> +#include <linux/inet.h> +#include "bonding.h" + +static int bond_option_active_slave_set(struct bonding *bond, +					const struct bond_opt_value *newval); +static int bond_option_miimon_set(struct bonding *bond, +				  const struct bond_opt_value *newval); +static int bond_option_updelay_set(struct bonding *bond, +				   const struct bond_opt_value *newval); +static int bond_option_downdelay_set(struct bonding *bond, +				     const struct bond_opt_value *newval); +static int bond_option_use_carrier_set(struct bonding *bond, +				       const struct bond_opt_value *newval); +static int bond_option_arp_interval_set(struct bonding *bond, +					const struct bond_opt_value *newval); +static int bond_option_arp_ip_target_add(struct bonding *bond, __be32 target); +static int bond_option_arp_ip_target_rem(struct bonding *bond, __be32 target); +static int bond_option_arp_ip_targets_set(struct bonding *bond, +					  const struct bond_opt_value *newval); +static int bond_option_arp_validate_set(struct bonding *bond, +					const struct bond_opt_value *newval); +static int bond_option_arp_all_targets_set(struct bonding *bond, +					   const struct bond_opt_value *newval); +static int bond_option_primary_set(struct bonding *bond, +				   const struct bond_opt_value *newval); +static int bond_option_primary_reselect_set(struct bonding *bond, +					    const struct bond_opt_value *newval); +static int bond_option_fail_over_mac_set(struct bonding *bond, +					 const struct bond_opt_value *newval); +static int bond_option_xmit_hash_policy_set(struct bonding *bond, +					    const struct bond_opt_value *newval); +static int bond_option_resend_igmp_set(struct bonding *bond, +				       const struct bond_opt_value *newval); +static int bond_option_num_peer_notif_set(struct bonding *bond, +					  const struct bond_opt_value *newval); +static int bond_option_all_slaves_active_set(struct bonding *bond, +					     const struct bond_opt_value *newval); +static int bond_option_min_links_set(struct bonding *bond, +				     const struct bond_opt_value *newval); +static int bond_option_lp_interval_set(struct bonding *bond, +				       const struct bond_opt_value *newval); +static int bond_option_pps_set(struct bonding *bond, +			       const struct bond_opt_value *newval); +static int bond_option_lacp_rate_set(struct bonding *bond, +				     const struct bond_opt_value *newval); +static int bond_option_ad_select_set(struct bonding *bond, +				     const struct bond_opt_value *newval); +static int bond_option_queue_id_set(struct bonding *bond, +				    const struct bond_opt_value *newval); +static int bond_option_mode_set(struct bonding *bond, +				const struct bond_opt_value *newval); +static int bond_option_slaves_set(struct bonding *bond, +				  const struct bond_opt_value *newval); +static int bond_option_tlb_dynamic_lb_set(struct bonding *bond, +				  const struct bond_opt_value *newval); + + +static const struct bond_opt_value bond_mode_tbl[] = { +	{ "balance-rr",    BOND_MODE_ROUNDROBIN,   BOND_VALFLAG_DEFAULT}, +	{ "active-backup", BOND_MODE_ACTIVEBACKUP, 0}, +	{ "balance-xor",   BOND_MODE_XOR,          0}, +	{ "broadcast",     BOND_MODE_BROADCAST,    0}, +	{ "802.3ad",       BOND_MODE_8023AD,       0}, +	{ "balance-tlb",   BOND_MODE_TLB,          0}, +	{ "balance-alb",   BOND_MODE_ALB,          0}, +	{ NULL,            -1,                     0}, +}; + +static const struct bond_opt_value bond_pps_tbl[] = { +	{ "default", 1,         BOND_VALFLAG_DEFAULT}, +	{ "maxval",  USHRT_MAX, BOND_VALFLAG_MAX}, +	{ NULL,      -1,        0}, +}; + +static const struct bond_opt_value bond_xmit_hashtype_tbl[] = { +	{ "layer2",   BOND_XMIT_POLICY_LAYER2, BOND_VALFLAG_DEFAULT}, +	{ "layer3+4", BOND_XMIT_POLICY_LAYER34, 0}, +	{ "layer2+3", BOND_XMIT_POLICY_LAYER23, 0}, +	{ "encap2+3", BOND_XMIT_POLICY_ENCAP23, 0}, +	{ "encap3+4", BOND_XMIT_POLICY_ENCAP34, 0}, +	{ NULL,       -1,                       0}, +}; + +static const struct bond_opt_value bond_arp_validate_tbl[] = { +	{ "none",		BOND_ARP_VALIDATE_NONE,		BOND_VALFLAG_DEFAULT}, +	{ "active",		BOND_ARP_VALIDATE_ACTIVE,	0}, +	{ "backup",		BOND_ARP_VALIDATE_BACKUP,	0}, +	{ "all",		BOND_ARP_VALIDATE_ALL,		0}, +	{ "filter",		BOND_ARP_FILTER,		0}, +	{ "filter_active",	BOND_ARP_FILTER_ACTIVE,		0}, +	{ "filter_backup",	BOND_ARP_FILTER_BACKUP,		0}, +	{ NULL,			-1,				0}, +}; + +static const struct bond_opt_value bond_arp_all_targets_tbl[] = { +	{ "any", BOND_ARP_TARGETS_ANY, BOND_VALFLAG_DEFAULT}, +	{ "all", BOND_ARP_TARGETS_ALL, 0}, +	{ NULL,  -1,                   0}, +}; + +static const struct bond_opt_value bond_fail_over_mac_tbl[] = { +	{ "none",   BOND_FOM_NONE,   BOND_VALFLAG_DEFAULT}, +	{ "active", BOND_FOM_ACTIVE, 0}, +	{ "follow", BOND_FOM_FOLLOW, 0}, +	{ NULL,     -1,              0}, +}; + +static const struct bond_opt_value bond_intmax_tbl[] = { +	{ "off",     0,       BOND_VALFLAG_DEFAULT}, +	{ "maxval",  INT_MAX, BOND_VALFLAG_MAX}, +	{ NULL,      -1,      0} +}; + +static const struct bond_opt_value bond_lacp_rate_tbl[] = { +	{ "slow", AD_LACP_SLOW, 0}, +	{ "fast", AD_LACP_FAST, 0}, +	{ NULL,   -1,           0}, +}; + +static const struct bond_opt_value bond_ad_select_tbl[] = { +	{ "stable",    BOND_AD_STABLE,    BOND_VALFLAG_DEFAULT}, +	{ "bandwidth", BOND_AD_BANDWIDTH, 0}, +	{ "count",     BOND_AD_COUNT,     0}, +	{ NULL,        -1,                0}, +}; + +static const struct bond_opt_value bond_num_peer_notif_tbl[] = { +	{ "off",     0,   0}, +	{ "maxval",  255, BOND_VALFLAG_MAX}, +	{ "default", 1,   BOND_VALFLAG_DEFAULT}, +	{ NULL,      -1,  0} +}; + +static const struct bond_opt_value bond_primary_reselect_tbl[] = { +	{ "always",  BOND_PRI_RESELECT_ALWAYS,  BOND_VALFLAG_DEFAULT}, +	{ "better",  BOND_PRI_RESELECT_BETTER,  0}, +	{ "failure", BOND_PRI_RESELECT_FAILURE, 0}, +	{ NULL,      -1}, +}; + +static const struct bond_opt_value bond_use_carrier_tbl[] = { +	{ "off", 0,  0}, +	{ "on",  1,  BOND_VALFLAG_DEFAULT}, +	{ NULL,  -1, 0} +}; + +static const struct bond_opt_value bond_all_slaves_active_tbl[] = { +	{ "off", 0,  BOND_VALFLAG_DEFAULT}, +	{ "on",  1,  0}, +	{ NULL,  -1, 0} +}; + +static const struct bond_opt_value bond_resend_igmp_tbl[] = { +	{ "off",     0,   0}, +	{ "maxval",  255, BOND_VALFLAG_MAX}, +	{ "default", 1,   BOND_VALFLAG_DEFAULT}, +	{ NULL,      -1,  0} +}; + +static const struct bond_opt_value bond_lp_interval_tbl[] = { +	{ "minval",  1,       BOND_VALFLAG_MIN | BOND_VALFLAG_DEFAULT}, +	{ "maxval",  INT_MAX, BOND_VALFLAG_MAX}, +	{ NULL,      -1,      0}, +}; + +static const struct bond_opt_value bond_tlb_dynamic_lb_tbl[] = { +	{ "off", 0,  0}, +	{ "on",  1,  BOND_VALFLAG_DEFAULT}, +	{ NULL,  -1, 0} +}; + +static const struct bond_option bond_opts[] = { +	[BOND_OPT_MODE] = { +		.id = BOND_OPT_MODE, +		.name = "mode", +		.desc = "bond device mode", +		.flags = BOND_OPTFLAG_NOSLAVES | BOND_OPTFLAG_IFDOWN, +		.values = bond_mode_tbl, +		.set = bond_option_mode_set +	}, +	[BOND_OPT_PACKETS_PER_SLAVE] = { +		.id = BOND_OPT_PACKETS_PER_SLAVE, +		.name = "packets_per_slave", +		.desc = "Packets to send per slave in RR mode", +		.unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_ROUNDROBIN)), +		.values = bond_pps_tbl, +		.set = bond_option_pps_set +	}, +	[BOND_OPT_XMIT_HASH] = { +		.id = BOND_OPT_XMIT_HASH, +		.name = "xmit_hash_policy", +		.desc = "balance-xor, 802.3ad, and tlb hashing method", +		.values = bond_xmit_hashtype_tbl, +		.set = bond_option_xmit_hash_policy_set +	}, +	[BOND_OPT_ARP_VALIDATE] = { +		.id = BOND_OPT_ARP_VALIDATE, +		.name = "arp_validate", +		.desc = "validate src/dst of ARP probes", +		.unsuppmodes = BIT(BOND_MODE_8023AD) | BIT(BOND_MODE_TLB) | +			       BIT(BOND_MODE_ALB), +		.values = bond_arp_validate_tbl, +		.set = bond_option_arp_validate_set +	}, +	[BOND_OPT_ARP_ALL_TARGETS] = { +		.id = BOND_OPT_ARP_ALL_TARGETS, +		.name = "arp_all_targets", +		.desc = "fail on any/all arp targets timeout", +		.values = bond_arp_all_targets_tbl, +		.set = bond_option_arp_all_targets_set +	}, +	[BOND_OPT_FAIL_OVER_MAC] = { +		.id = BOND_OPT_FAIL_OVER_MAC, +		.name = "fail_over_mac", +		.desc = "For active-backup, do not set all slaves to the same MAC", +		.flags = BOND_OPTFLAG_NOSLAVES, +		.values = bond_fail_over_mac_tbl, +		.set = bond_option_fail_over_mac_set +	}, +	[BOND_OPT_ARP_INTERVAL] = { +		.id = BOND_OPT_ARP_INTERVAL, +		.name = "arp_interval", +		.desc = "arp interval in milliseconds", +		.unsuppmodes = BIT(BOND_MODE_8023AD) | BIT(BOND_MODE_TLB) | +			       BIT(BOND_MODE_ALB), +		.values = bond_intmax_tbl, +		.set = bond_option_arp_interval_set +	}, +	[BOND_OPT_ARP_TARGETS] = { +		.id = BOND_OPT_ARP_TARGETS, +		.name = "arp_ip_target", +		.desc = "arp targets in n.n.n.n form", +		.flags = BOND_OPTFLAG_RAWVAL, +		.set = bond_option_arp_ip_targets_set +	}, +	[BOND_OPT_DOWNDELAY] = { +		.id = BOND_OPT_DOWNDELAY, +		.name = "downdelay", +		.desc = "Delay before considering link down, in milliseconds", +		.values = bond_intmax_tbl, +		.set = bond_option_downdelay_set +	}, +	[BOND_OPT_UPDELAY] = { +		.id = BOND_OPT_UPDELAY, +		.name = "updelay", +		.desc = "Delay before considering link up, in milliseconds", +		.values = bond_intmax_tbl, +		.set = bond_option_updelay_set +	}, +	[BOND_OPT_LACP_RATE] = { +		.id = BOND_OPT_LACP_RATE, +		.name = "lacp_rate", +		.desc = "LACPDU tx rate to request from 802.3ad partner", +		.flags = BOND_OPTFLAG_IFDOWN, +		.unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), +		.values = bond_lacp_rate_tbl, +		.set = bond_option_lacp_rate_set +	}, +	[BOND_OPT_MINLINKS] = { +		.id = BOND_OPT_MINLINKS, +		.name = "min_links", +		.desc = "Minimum number of available links before turning on carrier", +		.values = bond_intmax_tbl, +		.set = bond_option_min_links_set +	}, +	[BOND_OPT_AD_SELECT] = { +		.id = BOND_OPT_AD_SELECT, +		.name = "ad_select", +		.desc = "803.ad aggregation selection logic", +		.flags = BOND_OPTFLAG_IFDOWN, +		.values = bond_ad_select_tbl, +		.set = bond_option_ad_select_set +	}, +	[BOND_OPT_NUM_PEER_NOTIF] = { +		.id = BOND_OPT_NUM_PEER_NOTIF, +		.name = "num_unsol_na", +		.desc = "Number of peer notifications to send on failover event", +		.values = bond_num_peer_notif_tbl, +		.set = bond_option_num_peer_notif_set +	}, +	[BOND_OPT_MIIMON] = { +		.id = BOND_OPT_MIIMON, +		.name = "miimon", +		.desc = "Link check interval in milliseconds", +		.values = bond_intmax_tbl, +		.set = bond_option_miimon_set +	}, +	[BOND_OPT_PRIMARY] = { +		.id = BOND_OPT_PRIMARY, +		.name = "primary", +		.desc = "Primary network device to use", +		.flags = BOND_OPTFLAG_RAWVAL, +		.unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_ACTIVEBACKUP) | +						BIT(BOND_MODE_TLB) | +						BIT(BOND_MODE_ALB)), +		.set = bond_option_primary_set +	}, +	[BOND_OPT_PRIMARY_RESELECT] = { +		.id = BOND_OPT_PRIMARY_RESELECT, +		.name = "primary_reselect", +		.desc = "Reselect primary slave once it comes up", +		.values = bond_primary_reselect_tbl, +		.set = bond_option_primary_reselect_set +	}, +	[BOND_OPT_USE_CARRIER] = { +		.id = BOND_OPT_USE_CARRIER, +		.name = "use_carrier", +		.desc = "Use netif_carrier_ok (vs MII ioctls) in miimon", +		.values = bond_use_carrier_tbl, +		.set = bond_option_use_carrier_set +	}, +	[BOND_OPT_ACTIVE_SLAVE] = { +		.id = BOND_OPT_ACTIVE_SLAVE, +		.name = "active_slave", +		.desc = "Currently active slave", +		.flags = BOND_OPTFLAG_RAWVAL, +		.unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_ACTIVEBACKUP) | +						BIT(BOND_MODE_TLB) | +						BIT(BOND_MODE_ALB)), +		.set = bond_option_active_slave_set +	}, +	[BOND_OPT_QUEUE_ID] = { +		.id = BOND_OPT_QUEUE_ID, +		.name = "queue_id", +		.desc = "Set queue id of a slave", +		.flags = BOND_OPTFLAG_RAWVAL, +		.set = bond_option_queue_id_set +	}, +	[BOND_OPT_ALL_SLAVES_ACTIVE] = { +		.id = BOND_OPT_ALL_SLAVES_ACTIVE, +		.name = "all_slaves_active", +		.desc = "Keep all frames received on an interface by setting active flag for all slaves", +		.values = bond_all_slaves_active_tbl, +		.set = bond_option_all_slaves_active_set +	}, +	[BOND_OPT_RESEND_IGMP] = { +		.id = BOND_OPT_RESEND_IGMP, +		.name = "resend_igmp", +		.desc = "Number of IGMP membership reports to send on link failure", +		.values = bond_resend_igmp_tbl, +		.set = bond_option_resend_igmp_set +	}, +	[BOND_OPT_LP_INTERVAL] = { +		.id = BOND_OPT_LP_INTERVAL, +		.name = "lp_interval", +		.desc = "The number of seconds between instances where the bonding driver sends learning packets to each slave's peer switch", +		.values = bond_lp_interval_tbl, +		.set = bond_option_lp_interval_set +	}, +	[BOND_OPT_SLAVES] = { +		.id = BOND_OPT_SLAVES, +		.name = "slaves", +		.desc = "Slave membership management", +		.flags = BOND_OPTFLAG_RAWVAL, +		.set = bond_option_slaves_set +	}, +	[BOND_OPT_TLB_DYNAMIC_LB] = { +		.id = BOND_OPT_TLB_DYNAMIC_LB, +		.name = "tlb_dynamic_lb", +		.desc = "Enable dynamic flow shuffling", +		.unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_TLB)), +		.values = bond_tlb_dynamic_lb_tbl, +		.flags = BOND_OPTFLAG_IFDOWN, +		.set = bond_option_tlb_dynamic_lb_set, +	}, +	{ } +}; + +/* Searches for an option by name */ +const struct bond_option *bond_opt_get_by_name(const char *name) +{ +	const struct bond_option *opt; +	int option; + +	for (option = 0; option < BOND_OPT_LAST; option++) { +		opt = bond_opt_get(option); +		if (opt && !strcmp(opt->name, name)) +			return opt; +	} + +	return NULL; +} + +/* Searches for a value in opt's values[] table */ +const struct bond_opt_value *bond_opt_get_val(unsigned int option, u64 val) +{ +	const struct bond_option *opt; +	int i; + +	opt = bond_opt_get(option); +	if (WARN_ON(!opt)) +		return NULL; +	for (i = 0; opt->values && opt->values[i].string; i++) +		if (opt->values[i].value == val) +			return &opt->values[i]; + +	return NULL; +} + +/* Searches for a value in opt's values[] table which matches the flagmask */ +static const struct bond_opt_value *bond_opt_get_flags(const struct bond_option *opt, +						 u32 flagmask) +{ +	int i; + +	for (i = 0; opt->values && opt->values[i].string; i++) +		if (opt->values[i].flags & flagmask) +			return &opt->values[i]; + +	return NULL; +} + +/* If maxval is missing then there's no range to check. In case minval is + * missing then it's considered to be 0. + */ +static bool bond_opt_check_range(const struct bond_option *opt, u64 val) +{ +	const struct bond_opt_value *minval, *maxval; + +	minval = bond_opt_get_flags(opt, BOND_VALFLAG_MIN); +	maxval = bond_opt_get_flags(opt, BOND_VALFLAG_MAX); +	if (!maxval || (minval && val < minval->value) || val > maxval->value) +		return false; + +	return true; +} + +/** + * bond_opt_parse - parse option value + * @opt: the option to parse against + * @val: value to parse + * + * This function tries to extract the value from @val and check if it's + * a possible match for the option and returns NULL if a match isn't found, + * or the struct_opt_value that matched. It also strips the new line from + * @val->string if it's present. + */ +const struct bond_opt_value *bond_opt_parse(const struct bond_option *opt, +					    struct bond_opt_value *val) +{ +	char *p, valstr[BOND_OPT_MAX_NAMELEN + 1] = { 0, }; +	const struct bond_opt_value *tbl; +	const struct bond_opt_value *ret = NULL; +	bool checkval; +	int i, rv; + +	/* No parsing if the option wants a raw val */ +	if (opt->flags & BOND_OPTFLAG_RAWVAL) +		return val; + +	tbl = opt->values; +	if (!tbl) +		goto out; + +	/* ULLONG_MAX is used to bypass string processing */ +	checkval = val->value != ULLONG_MAX; +	if (!checkval) { +		if (!val->string) +			goto out; +		p = strchr(val->string, '\n'); +		if (p) +			*p = '\0'; +		for (p = val->string; *p; p++) +			if (!(isdigit(*p) || isspace(*p))) +				break; +		/* The following code extracts the string to match or the value +		 * and sets checkval appropriately +		 */ +		if (*p) { +			rv = sscanf(val->string, "%32s", valstr); +		} else { +			rv = sscanf(val->string, "%llu", &val->value); +			checkval = true; +		} +		if (!rv) +			goto out; +	} + +	for (i = 0; tbl[i].string; i++) { +		/* Check for exact match */ +		if (checkval) { +			if (val->value == tbl[i].value) +				ret = &tbl[i]; +		} else { +			if (!strcmp(valstr, "default") && +			    (tbl[i].flags & BOND_VALFLAG_DEFAULT)) +				ret = &tbl[i]; + +			if (!strcmp(valstr, tbl[i].string)) +				ret = &tbl[i]; +		} +		/* Found an exact match */ +		if (ret) +			goto out; +	} +	/* Possible range match */ +	if (checkval && bond_opt_check_range(opt, val->value)) +		ret = val; +out: +	return ret; +} + +/* Check opt's dependencies against bond mode and currently set options */ +static int bond_opt_check_deps(struct bonding *bond, +			       const struct bond_option *opt) +{ +	struct bond_params *params = &bond->params; + +	if (test_bit(params->mode, &opt->unsuppmodes)) +		return -EACCES; +	if ((opt->flags & BOND_OPTFLAG_NOSLAVES) && bond_has_slaves(bond)) +		return -ENOTEMPTY; +	if ((opt->flags & BOND_OPTFLAG_IFDOWN) && (bond->dev->flags & IFF_UP)) +		return -EBUSY; + +	return 0; +} + +static void bond_opt_dep_print(struct bonding *bond, +			       const struct bond_option *opt) +{ +	const struct bond_opt_value *modeval; +	struct bond_params *params; + +	params = &bond->params; +	modeval = bond_opt_get_val(BOND_OPT_MODE, params->mode); +	if (test_bit(params->mode, &opt->unsuppmodes)) +		pr_err("%s: option %s: mode dependency failed, not supported in mode %s(%llu)\n", +		       bond->dev->name, opt->name, +		       modeval->string, modeval->value); +} + +static void bond_opt_error_interpret(struct bonding *bond, +				     const struct bond_option *opt, +				     int error, const struct bond_opt_value *val) +{ +	const struct bond_opt_value *minval, *maxval; +	char *p; + +	switch (error) { +	case -EINVAL: +		if (val) { +			if (val->string) { +				/* sometimes RAWVAL opts may have new lines */ +				p = strchr(val->string, '\n'); +				if (p) +					*p = '\0'; +				pr_err("%s: option %s: invalid value (%s)\n", +				       bond->dev->name, opt->name, val->string); +			} else { +				pr_err("%s: option %s: invalid value (%llu)\n", +				       bond->dev->name, opt->name, val->value); +			} +		} +		minval = bond_opt_get_flags(opt, BOND_VALFLAG_MIN); +		maxval = bond_opt_get_flags(opt, BOND_VALFLAG_MAX); +		if (!maxval) +			break; +		pr_err("%s: option %s: allowed values %llu - %llu\n", +		       bond->dev->name, opt->name, minval ? minval->value : 0, +		       maxval->value); +		break; +	case -EACCES: +		bond_opt_dep_print(bond, opt); +		break; +	case -ENOTEMPTY: +		pr_err("%s: option %s: unable to set because the bond device has slaves\n", +		       bond->dev->name, opt->name); +		break; +	case -EBUSY: +		pr_err("%s: option %s: unable to set because the bond device is up\n", +		       bond->dev->name, opt->name); +		break; +	default: +		break; +	} +} + +/** + * __bond_opt_set - set a bonding option + * @bond: target bond device + * @option: option to set + * @val: value to set it to + * + * This function is used to change the bond's option value, it can be + * used for both enabling/changing an option and for disabling it. RTNL lock + * must be obtained before calling this function. + */ +int __bond_opt_set(struct bonding *bond, +		   unsigned int option, struct bond_opt_value *val) +{ +	const struct bond_opt_value *retval = NULL; +	const struct bond_option *opt; +	int ret = -ENOENT; + +	ASSERT_RTNL(); + +	opt = bond_opt_get(option); +	if (WARN_ON(!val) || WARN_ON(!opt)) +		goto out; +	ret = bond_opt_check_deps(bond, opt); +	if (ret) +		goto out; +	retval = bond_opt_parse(opt, val); +	if (!retval) { +		ret = -EINVAL; +		goto out; +	} +	ret = opt->set(bond, retval); +out: +	if (ret) +		bond_opt_error_interpret(bond, opt, ret, val); + +	return ret; +} + +/** + * bond_opt_tryset_rtnl - try to acquire rtnl and call __bond_opt_set + * @bond: target bond device + * @option: option to set + * @buf: value to set it to + * + * This function tries to acquire RTNL without blocking and if successful + * calls __bond_opt_set. It is mainly used for sysfs option manipulation. + */ +int bond_opt_tryset_rtnl(struct bonding *bond, unsigned int option, char *buf) +{ +	struct bond_opt_value optval; +	int ret; + +	if (!rtnl_trylock()) +		return restart_syscall(); +	bond_opt_initstr(&optval, buf); +	ret = __bond_opt_set(bond, option, &optval); +	rtnl_unlock(); + +	return ret; +} + +/** + * bond_opt_get - get a pointer to an option + * @option: option for which to return a pointer + * + * This function checks if option is valid and if so returns a pointer + * to its entry in the bond_opts[] option array. + */ +const struct bond_option *bond_opt_get(unsigned int option) +{ +	if (!BOND_OPT_VALID(option)) +		return NULL; + +	return &bond_opts[option]; +} + +int bond_option_mode_set(struct bonding *bond, const struct bond_opt_value *newval) +{ +	if (!bond_mode_uses_arp(newval->value) && bond->params.arp_interval) { +		pr_info("%s: %s mode is incompatible with arp monitoring, start mii monitoring\n", +			bond->dev->name, newval->string); +		/* disable arp monitoring */ +		bond->params.arp_interval = 0; +		/* set miimon to default value */ +		bond->params.miimon = BOND_DEFAULT_MIIMON; +		pr_info("%s: Setting MII monitoring interval to %d\n", +			bond->dev->name, bond->params.miimon); +	} + +	/* don't cache arp_validate between modes */ +	bond->params.arp_validate = BOND_ARP_VALIDATE_NONE; +	bond->params.mode = newval->value; + +	return 0; +} + +static struct net_device *__bond_option_active_slave_get(struct bonding *bond, +							 struct slave *slave) +{ +	return bond_uses_primary(bond) && slave ? slave->dev : NULL; +} + +struct net_device *bond_option_active_slave_get_rcu(struct bonding *bond) +{ +	struct slave *slave = rcu_dereference(bond->curr_active_slave); + +	return __bond_option_active_slave_get(bond, slave); +} + +struct net_device *bond_option_active_slave_get(struct bonding *bond) +{ +	return __bond_option_active_slave_get(bond, bond->curr_active_slave); +} + +static int bond_option_active_slave_set(struct bonding *bond, +					const struct bond_opt_value *newval) +{ +	char ifname[IFNAMSIZ] = { 0, }; +	struct net_device *slave_dev; +	int ret = 0; + +	sscanf(newval->string, "%15s", ifname); /* IFNAMSIZ */ +	if (!strlen(ifname) || newval->string[0] == '\n') { +		slave_dev = NULL; +	} else { +		slave_dev = __dev_get_by_name(dev_net(bond->dev), ifname); +		if (!slave_dev) +			return -ENODEV; +	} + +	if (slave_dev) { +		if (!netif_is_bond_slave(slave_dev)) { +			pr_err("Device %s is not bonding slave\n", +			       slave_dev->name); +			return -EINVAL; +		} + +		if (bond->dev != netdev_master_upper_dev_get(slave_dev)) { +			pr_err("%s: Device %s is not our slave\n", +			       bond->dev->name, slave_dev->name); +			return -EINVAL; +		} +	} + +	block_netpoll_tx(); +	write_lock_bh(&bond->curr_slave_lock); + +	/* check to see if we are clearing active */ +	if (!slave_dev) { +		pr_info("%s: Clearing current active slave\n", bond->dev->name); +		RCU_INIT_POINTER(bond->curr_active_slave, NULL); +		bond_select_active_slave(bond); +	} else { +		struct slave *old_active = bond->curr_active_slave; +		struct slave *new_active = bond_slave_get_rtnl(slave_dev); + +		BUG_ON(!new_active); + +		if (new_active == old_active) { +			/* do nothing */ +			pr_info("%s: %s is already the current active slave\n", +				bond->dev->name, new_active->dev->name); +		} else { +			if (old_active && (new_active->link == BOND_LINK_UP) && +			    bond_slave_is_up(new_active)) { +				pr_info("%s: Setting %s as active slave\n", +					bond->dev->name, new_active->dev->name); +				bond_change_active_slave(bond, new_active); +			} else { +				pr_err("%s: Could not set %s as active slave; either %s is down or the link is down\n", +				       bond->dev->name, new_active->dev->name, +				       new_active->dev->name); +				ret = -EINVAL; +			} +		} +	} + +	write_unlock_bh(&bond->curr_slave_lock); +	unblock_netpoll_tx(); + +	return ret; +} + +/* There are two tricky bits here.  First, if MII monitoring is activated, then + * we must disable ARP monitoring.  Second, if the timer isn't running, we must + * start it. + */ +static int bond_option_miimon_set(struct bonding *bond, +				  const struct bond_opt_value *newval) +{ +	pr_info("%s: Setting MII monitoring interval to %llu\n", +		bond->dev->name, newval->value); +	bond->params.miimon = newval->value; +	if (bond->params.updelay) +		pr_info("%s: Note: Updating updelay (to %d) since it is a multiple of the miimon value\n", +			bond->dev->name, +			bond->params.updelay * bond->params.miimon); +	if (bond->params.downdelay) +		pr_info("%s: Note: Updating downdelay (to %d) since it is a multiple of the miimon value\n", +			bond->dev->name, +			bond->params.downdelay * bond->params.miimon); +	if (newval->value && bond->params.arp_interval) { +		pr_info("%s: MII monitoring cannot be used with ARP monitoring - disabling ARP monitoring...\n", +			bond->dev->name); +		bond->params.arp_interval = 0; +		if (bond->params.arp_validate) +			bond->params.arp_validate = BOND_ARP_VALIDATE_NONE; +	} +	if (bond->dev->flags & IFF_UP) { +		/* If the interface is up, we may need to fire off +		 * the MII timer. If the interface is down, the +		 * timer will get fired off when the open function +		 * is called. +		 */ +		if (!newval->value) { +			cancel_delayed_work_sync(&bond->mii_work); +		} else { +			cancel_delayed_work_sync(&bond->arp_work); +			queue_delayed_work(bond->wq, &bond->mii_work, 0); +		} +	} + +	return 0; +} + +/* Set up and down delays. These must be multiples of the + * MII monitoring value, and are stored internally as the multiplier. + * Thus, we must translate to MS for the real world. + */ +static int bond_option_updelay_set(struct bonding *bond, +				   const struct bond_opt_value *newval) +{ +	int value = newval->value; + +	if (!bond->params.miimon) { +		pr_err("%s: Unable to set up delay as MII monitoring is disabled\n", +		       bond->dev->name); +		return -EPERM; +	} +	if ((value % bond->params.miimon) != 0) { +		pr_warn("%s: Warning: up delay (%d) is not a multiple of miimon (%d), updelay rounded to %d ms\n", +			bond->dev->name, value, +			bond->params.miimon, +			(value / bond->params.miimon) * +			bond->params.miimon); +	} +	bond->params.updelay = value / bond->params.miimon; +	pr_info("%s: Setting up delay to %d\n", +		bond->dev->name, bond->params.updelay * bond->params.miimon); + +	return 0; +} + +static int bond_option_downdelay_set(struct bonding *bond, +				     const struct bond_opt_value *newval) +{ +	int value = newval->value; + +	if (!bond->params.miimon) { +		pr_err("%s: Unable to set down delay as MII monitoring is disabled\n", +		       bond->dev->name); +		return -EPERM; +	} +	if ((value % bond->params.miimon) != 0) { +		pr_warn("%s: Warning: down delay (%d) is not a multiple of miimon (%d), delay rounded to %d ms\n", +			bond->dev->name, value, +			bond->params.miimon, +			(value / bond->params.miimon) * +			bond->params.miimon); +	} +	bond->params.downdelay = value / bond->params.miimon; +	pr_info("%s: Setting down delay to %d\n", +		bond->dev->name, bond->params.downdelay * bond->params.miimon); + +	return 0; +} + +static int bond_option_use_carrier_set(struct bonding *bond, +				       const struct bond_opt_value *newval) +{ +	pr_info("%s: Setting use_carrier to %llu\n", +		bond->dev->name, newval->value); +	bond->params.use_carrier = newval->value; + +	return 0; +} + +/* There are two tricky bits here.  First, if ARP monitoring is activated, then + * we must disable MII monitoring.  Second, if the ARP timer isn't running, + * we must start it. + */ +static int bond_option_arp_interval_set(struct bonding *bond, +					const struct bond_opt_value *newval) +{ +	pr_info("%s: Setting ARP monitoring interval to %llu\n", +		bond->dev->name, newval->value); +	bond->params.arp_interval = newval->value; +	if (newval->value) { +		if (bond->params.miimon) { +			pr_info("%s: ARP monitoring cannot be used with MII monitoring. %s Disabling MII monitoring\n", +				bond->dev->name, bond->dev->name); +			bond->params.miimon = 0; +		} +		if (!bond->params.arp_targets[0]) +			pr_info("%s: ARP monitoring has been set up, but no ARP targets have been specified\n", +				bond->dev->name); +	} +	if (bond->dev->flags & IFF_UP) { +		/* If the interface is up, we may need to fire off +		 * the ARP timer.  If the interface is down, the +		 * timer will get fired off when the open function +		 * is called. +		 */ +		if (!newval->value) { +			if (bond->params.arp_validate) +				bond->recv_probe = NULL; +			cancel_delayed_work_sync(&bond->arp_work); +		} else { +			/* arp_validate can be set only in active-backup mode */ +			bond->recv_probe = bond_arp_rcv; +			cancel_delayed_work_sync(&bond->mii_work); +			queue_delayed_work(bond->wq, &bond->arp_work, 0); +		} +	} + +	return 0; +} + +static void _bond_options_arp_ip_target_set(struct bonding *bond, int slot, +					    __be32 target, +					    unsigned long last_rx) +{ +	__be32 *targets = bond->params.arp_targets; +	struct list_head *iter; +	struct slave *slave; + +	if (slot >= 0 && slot < BOND_MAX_ARP_TARGETS) { +		bond_for_each_slave(bond, slave, iter) +			slave->target_last_arp_rx[slot] = last_rx; +		targets[slot] = target; +	} +} + +static int _bond_option_arp_ip_target_add(struct bonding *bond, __be32 target) +{ +	__be32 *targets = bond->params.arp_targets; +	int ind; + +	if (!bond_is_ip_target_ok(target)) { +		pr_err("%s: invalid ARP target %pI4 specified for addition\n", +		       bond->dev->name, &target); +		return -EINVAL; +	} + +	if (bond_get_targets_ip(targets, target) != -1) { /* dup */ +		pr_err("%s: ARP target %pI4 is already present\n", +		       bond->dev->name, &target); +		return -EINVAL; +	} + +	ind = bond_get_targets_ip(targets, 0); /* first free slot */ +	if (ind == -1) { +		pr_err("%s: ARP target table is full!\n", bond->dev->name); +		return -EINVAL; +	} + +	pr_info("%s: Adding ARP target %pI4\n", bond->dev->name, &target); + +	_bond_options_arp_ip_target_set(bond, ind, target, jiffies); + +	return 0; +} + +static int bond_option_arp_ip_target_add(struct bonding *bond, __be32 target) +{ +	int ret; + +	/* not to race with bond_arp_rcv */ +	write_lock_bh(&bond->lock); +	ret = _bond_option_arp_ip_target_add(bond, target); +	write_unlock_bh(&bond->lock); + +	return ret; +} + +static int bond_option_arp_ip_target_rem(struct bonding *bond, __be32 target) +{ +	__be32 *targets = bond->params.arp_targets; +	struct list_head *iter; +	struct slave *slave; +	unsigned long *targets_rx; +	int ind, i; + +	if (!bond_is_ip_target_ok(target)) { +		pr_err("%s: invalid ARP target %pI4 specified for removal\n", +		       bond->dev->name, &target); +		return -EINVAL; +	} + +	ind = bond_get_targets_ip(targets, target); +	if (ind == -1) { +		pr_err("%s: unable to remove nonexistent ARP target %pI4\n", +		       bond->dev->name, &target); +		return -EINVAL; +	} + +	if (ind == 0 && !targets[1] && bond->params.arp_interval) +		pr_warn("%s: Removing last arp target with arp_interval on\n", +			bond->dev->name); + +	pr_info("%s: Removing ARP target %pI4\n", bond->dev->name, &target); + +	/* not to race with bond_arp_rcv */ +	write_lock_bh(&bond->lock); + +	bond_for_each_slave(bond, slave, iter) { +		targets_rx = slave->target_last_arp_rx; +		for (i = ind; (i < BOND_MAX_ARP_TARGETS-1) && targets[i+1]; i++) +			targets_rx[i] = targets_rx[i+1]; +		targets_rx[i] = 0; +	} +	for (i = ind; (i < BOND_MAX_ARP_TARGETS-1) && targets[i+1]; i++) +		targets[i] = targets[i+1]; +	targets[i] = 0; + +	write_unlock_bh(&bond->lock); + +	return 0; +} + +void bond_option_arp_ip_targets_clear(struct bonding *bond) +{ +	int i; + +	/* not to race with bond_arp_rcv */ +	write_lock_bh(&bond->lock); +	for (i = 0; i < BOND_MAX_ARP_TARGETS; i++) +		_bond_options_arp_ip_target_set(bond, i, 0, 0); +	write_unlock_bh(&bond->lock); +} + +static int bond_option_arp_ip_targets_set(struct bonding *bond, +					  const struct bond_opt_value *newval) +{ +	int ret = -EPERM; +	__be32 target; + +	if (newval->string) { +		if (!in4_pton(newval->string+1, -1, (u8 *)&target, -1, NULL)) { +			pr_err("%s: invalid ARP target %pI4 specified\n", +			       bond->dev->name, &target); +			return ret; +		} +		if (newval->string[0] == '+') +			ret = bond_option_arp_ip_target_add(bond, target); +		else if (newval->string[0] == '-') +			ret = bond_option_arp_ip_target_rem(bond, target); +		else +			pr_err("no command found in arp_ip_targets file for bond %s - use +<addr> or -<addr>\n", +			       bond->dev->name); +	} else { +		target = newval->value; +		ret = bond_option_arp_ip_target_add(bond, target); +	} + +	return ret; +} + +static int bond_option_arp_validate_set(struct bonding *bond, +					const struct bond_opt_value *newval) +{ +	pr_info("%s: Setting arp_validate to %s (%llu)\n", +		bond->dev->name, newval->string, newval->value); + +	if (bond->dev->flags & IFF_UP) { +		if (!newval->value) +			bond->recv_probe = NULL; +		else if (bond->params.arp_interval) +			bond->recv_probe = bond_arp_rcv; +	} +	bond->params.arp_validate = newval->value; + +	return 0; +} + +static int bond_option_arp_all_targets_set(struct bonding *bond, +					   const struct bond_opt_value *newval) +{ +	pr_info("%s: Setting arp_all_targets to %s (%llu)\n", +		bond->dev->name, newval->string, newval->value); +	bond->params.arp_all_targets = newval->value; + +	return 0; +} + +static int bond_option_primary_set(struct bonding *bond, +				   const struct bond_opt_value *newval) +{ +	char *p, *primary = newval->string; +	struct list_head *iter; +	struct slave *slave; + +	block_netpoll_tx(); +	read_lock(&bond->lock); +	write_lock_bh(&bond->curr_slave_lock); + +	p = strchr(primary, '\n'); +	if (p) +		*p = '\0'; +	/* check to see if we are clearing primary */ +	if (!strlen(primary)) { +		pr_info("%s: Setting primary slave to None\n", bond->dev->name); +		bond->primary_slave = NULL; +		memset(bond->params.primary, 0, sizeof(bond->params.primary)); +		bond_select_active_slave(bond); +		goto out; +	} + +	bond_for_each_slave(bond, slave, iter) { +		if (strncmp(slave->dev->name, primary, IFNAMSIZ) == 0) { +			pr_info("%s: Setting %s as primary slave\n", +				bond->dev->name, slave->dev->name); +			bond->primary_slave = slave; +			strcpy(bond->params.primary, slave->dev->name); +			bond_select_active_slave(bond); +			goto out; +		} +	} + +	if (bond->primary_slave) { +		pr_info("%s: Setting primary slave to None\n", bond->dev->name); +		bond->primary_slave = NULL; +		bond_select_active_slave(bond); +	} +	strncpy(bond->params.primary, primary, IFNAMSIZ); +	bond->params.primary[IFNAMSIZ - 1] = 0; + +	pr_info("%s: Recording %s as primary, but it has not been enslaved to %s yet\n", +		bond->dev->name, primary, bond->dev->name); + +out: +	write_unlock_bh(&bond->curr_slave_lock); +	read_unlock(&bond->lock); +	unblock_netpoll_tx(); + +	return 0; +} + +static int bond_option_primary_reselect_set(struct bonding *bond, +					    const struct bond_opt_value *newval) +{ +	pr_info("%s: Setting primary_reselect to %s (%llu)\n", +		bond->dev->name, newval->string, newval->value); +	bond->params.primary_reselect = newval->value; + +	block_netpoll_tx(); +	write_lock_bh(&bond->curr_slave_lock); +	bond_select_active_slave(bond); +	write_unlock_bh(&bond->curr_slave_lock); +	unblock_netpoll_tx(); + +	return 0; +} + +static int bond_option_fail_over_mac_set(struct bonding *bond, +					 const struct bond_opt_value *newval) +{ +	pr_info("%s: Setting fail_over_mac to %s (%llu)\n", +		bond->dev->name, newval->string, newval->value); +	bond->params.fail_over_mac = newval->value; + +	return 0; +} + +static int bond_option_xmit_hash_policy_set(struct bonding *bond, +					    const struct bond_opt_value *newval) +{ +	pr_info("%s: Setting xmit hash policy to %s (%llu)\n", +		bond->dev->name, newval->string, newval->value); +	bond->params.xmit_policy = newval->value; + +	return 0; +} + +static int bond_option_resend_igmp_set(struct bonding *bond, +				       const struct bond_opt_value *newval) +{ +	pr_info("%s: Setting resend_igmp to %llu\n", +		bond->dev->name, newval->value); +	bond->params.resend_igmp = newval->value; + +	return 0; +} + +static int bond_option_num_peer_notif_set(struct bonding *bond, +				   const struct bond_opt_value *newval) +{ +	bond->params.num_peer_notif = newval->value; + +	return 0; +} + +static int bond_option_all_slaves_active_set(struct bonding *bond, +					     const struct bond_opt_value *newval) +{ +	struct list_head *iter; +	struct slave *slave; + +	if (newval->value == bond->params.all_slaves_active) +		return 0; +	bond->params.all_slaves_active = newval->value; +	bond_for_each_slave(bond, slave, iter) { +		if (!bond_is_active_slave(slave)) { +			if (newval->value) +				slave->inactive = 0; +			else +				slave->inactive = 1; +		} +	} + +	return 0; +} + +static int bond_option_min_links_set(struct bonding *bond, +				     const struct bond_opt_value *newval) +{ +	pr_info("%s: Setting min links value to %llu\n", +		bond->dev->name, newval->value); +	bond->params.min_links = newval->value; + +	return 0; +} + +static int bond_option_lp_interval_set(struct bonding *bond, +				       const struct bond_opt_value *newval) +{ +	bond->params.lp_interval = newval->value; + +	return 0; +} + +static int bond_option_pps_set(struct bonding *bond, +			       const struct bond_opt_value *newval) +{ +	bond->params.packets_per_slave = newval->value; +	if (newval->value > 0) { +		bond->params.reciprocal_packets_per_slave = +			reciprocal_value(newval->value); +	} else { +		/* reciprocal_packets_per_slave is unused if +		 * packets_per_slave is 0 or 1, just initialize it +		 */ +		bond->params.reciprocal_packets_per_slave = +			(struct reciprocal_value) { 0 }; +	} + +	return 0; +} + +static int bond_option_lacp_rate_set(struct bonding *bond, +				     const struct bond_opt_value *newval) +{ +	pr_info("%s: Setting LACP rate to %s (%llu)\n", +		bond->dev->name, newval->string, newval->value); +	bond->params.lacp_fast = newval->value; +	bond_3ad_update_lacp_rate(bond); + +	return 0; +} + +static int bond_option_ad_select_set(struct bonding *bond, +				     const struct bond_opt_value *newval) +{ +	pr_info("%s: Setting ad_select to %s (%llu)\n", +		bond->dev->name, newval->string, newval->value); +	bond->params.ad_select = newval->value; + +	return 0; +} + +static int bond_option_queue_id_set(struct bonding *bond, +				    const struct bond_opt_value *newval) +{ +	struct slave *slave, *update_slave; +	struct net_device *sdev; +	struct list_head *iter; +	char *delim; +	int ret = 0; +	u16 qid; + +	/* delim will point to queue id if successful */ +	delim = strchr(newval->string, ':'); +	if (!delim) +		goto err_no_cmd; + +	/* Terminate string that points to device name and bump it +	 * up one, so we can read the queue id there. +	 */ +	*delim = '\0'; +	if (sscanf(++delim, "%hd\n", &qid) != 1) +		goto err_no_cmd; + +	/* Check buffer length, valid ifname and queue id */ +	if (!dev_valid_name(newval->string) || +	    qid > bond->dev->real_num_tx_queues) +		goto err_no_cmd; + +	/* Get the pointer to that interface if it exists */ +	sdev = __dev_get_by_name(dev_net(bond->dev), newval->string); +	if (!sdev) +		goto err_no_cmd; + +	/* Search for thes slave and check for duplicate qids */ +	update_slave = NULL; +	bond_for_each_slave(bond, slave, iter) { +		if (sdev == slave->dev) +			/* We don't need to check the matching +			 * slave for dups, since we're overwriting it +			 */ +			update_slave = slave; +		else if (qid && qid == slave->queue_id) { +			goto err_no_cmd; +		} +	} + +	if (!update_slave) +		goto err_no_cmd; + +	/* Actually set the qids for the slave */ +	update_slave->queue_id = qid; + +out: +	return ret; + +err_no_cmd: +	pr_info("invalid input for queue_id set for %s\n", bond->dev->name); +	ret = -EPERM; +	goto out; + +} + +static int bond_option_slaves_set(struct bonding *bond, +				  const struct bond_opt_value *newval) +{ +	char command[IFNAMSIZ + 1] = { 0, }; +	struct net_device *dev; +	char *ifname; +	int ret; + +	sscanf(newval->string, "%16s", command); /* IFNAMSIZ*/ +	ifname = command + 1; +	if ((strlen(command) <= 1) || +	    !dev_valid_name(ifname)) +		goto err_no_cmd; + +	dev = __dev_get_by_name(dev_net(bond->dev), ifname); +	if (!dev) { +		pr_info("%s: interface %s does not exist!\n", +			bond->dev->name, ifname); +		ret = -ENODEV; +		goto out; +	} + +	switch (command[0]) { +	case '+': +		pr_info("%s: Adding slave %s\n", bond->dev->name, dev->name); +		ret = bond_enslave(bond->dev, dev); +		break; + +	case '-': +		pr_info("%s: Removing slave %s\n", bond->dev->name, dev->name); +		ret = bond_release(bond->dev, dev); +		break; + +	default: +		goto err_no_cmd; +	} + +out: +	return ret; + +err_no_cmd: +	pr_err("no command found in slaves file for bond %s - use +ifname or -ifname\n", +	       bond->dev->name); +	ret = -EPERM; +	goto out; +} + +static int bond_option_tlb_dynamic_lb_set(struct bonding *bond, +					  const struct bond_opt_value *newval) +{ +	pr_info("%s: Setting dynamic-lb to %s (%llu)\n", +		bond->dev->name, newval->string, newval->value); +	bond->params.tlb_dynamic_lb = newval->value; + +	return 0; +} diff --git a/drivers/net/bonding/bond_options.h b/drivers/net/bonding/bond_options.h new file mode 100644 index 00000000000..17ded5b2917 --- /dev/null +++ b/drivers/net/bonding/bond_options.h @@ -0,0 +1,130 @@ +/* + * drivers/net/bond/bond_options.h - bonding options + * Copyright (c) 2013 Nikolay Aleksandrov <nikolay@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _BOND_OPTIONS_H +#define _BOND_OPTIONS_H + +#define BOND_OPT_MAX_NAMELEN 32 +#define BOND_OPT_VALID(opt) ((opt) < BOND_OPT_LAST) +#define BOND_MODE_ALL_EX(x) (~(x)) + +/* Option flags: + * BOND_OPTFLAG_NOSLAVES - check if the bond device is empty before setting + * BOND_OPTFLAG_IFDOWN - check if the bond device is down before setting + * BOND_OPTFLAG_RAWVAL - the option parses the value itself + */ +enum { +	BOND_OPTFLAG_NOSLAVES	= BIT(0), +	BOND_OPTFLAG_IFDOWN	= BIT(1), +	BOND_OPTFLAG_RAWVAL	= BIT(2) +}; + +/* Value type flags: + * BOND_VALFLAG_DEFAULT - mark the value as default + * BOND_VALFLAG_(MIN|MAX) - mark the value as min/max + */ +enum { +	BOND_VALFLAG_DEFAULT	= BIT(0), +	BOND_VALFLAG_MIN	= BIT(1), +	BOND_VALFLAG_MAX	= BIT(2) +}; + +/* Option IDs, their bit positions correspond to their IDs */ +enum { +	BOND_OPT_MODE, +	BOND_OPT_PACKETS_PER_SLAVE, +	BOND_OPT_XMIT_HASH, +	BOND_OPT_ARP_VALIDATE, +	BOND_OPT_ARP_ALL_TARGETS, +	BOND_OPT_FAIL_OVER_MAC, +	BOND_OPT_ARP_INTERVAL, +	BOND_OPT_ARP_TARGETS, +	BOND_OPT_DOWNDELAY, +	BOND_OPT_UPDELAY, +	BOND_OPT_LACP_RATE, +	BOND_OPT_MINLINKS, +	BOND_OPT_AD_SELECT, +	BOND_OPT_NUM_PEER_NOTIF, +	BOND_OPT_MIIMON, +	BOND_OPT_PRIMARY, +	BOND_OPT_PRIMARY_RESELECT, +	BOND_OPT_USE_CARRIER, +	BOND_OPT_ACTIVE_SLAVE, +	BOND_OPT_QUEUE_ID, +	BOND_OPT_ALL_SLAVES_ACTIVE, +	BOND_OPT_RESEND_IGMP, +	BOND_OPT_LP_INTERVAL, +	BOND_OPT_SLAVES, +	BOND_OPT_TLB_DYNAMIC_LB, +	BOND_OPT_LAST +}; + +/* This structure is used for storing option values and for passing option + * values when changing an option. The logic when used as an arg is as follows: + * - if string != NULL -> parse it, if the opt is RAW type then return it, else + *   return the parse result + * - if string == NULL -> parse value + */ +struct bond_opt_value { +	char *string; +	u64 value; +	u32 flags; +}; + +struct bonding; + +struct bond_option { +	int id; +	const char *name; +	const char *desc; +	u32 flags; + +	/* unsuppmodes is used to denote modes in which the option isn't +	 * supported. +	 */ +	unsigned long unsuppmodes; +	/* supported values which this option can have, can be a subset of +	 * BOND_OPTVAL_RANGE's value range +	 */ +	const struct bond_opt_value *values; + +	int (*set)(struct bonding *bond, const struct bond_opt_value *val); +}; + +int __bond_opt_set(struct bonding *bond, unsigned int option, +		   struct bond_opt_value *val); +int bond_opt_tryset_rtnl(struct bonding *bond, unsigned int option, char *buf); + +const struct bond_opt_value *bond_opt_parse(const struct bond_option *opt, +					    struct bond_opt_value *val); +const struct bond_option *bond_opt_get(unsigned int option); +const struct bond_option *bond_opt_get_by_name(const char *name); +const struct bond_opt_value *bond_opt_get_val(unsigned int option, u64 val); + +/* This helper is used to initialize a bond_opt_value structure for parameter + * passing. There should be either a valid string or value, but not both. + * When value is ULLONG_MAX then string will be used. + */ +static inline void __bond_opt_init(struct bond_opt_value *optval, +				   char *string, u64 value) +{ +	memset(optval, 0, sizeof(*optval)); +	optval->value = ULLONG_MAX; +	if (value == ULLONG_MAX) +		optval->string = string; +	else +		optval->value = value; +} +#define bond_opt_initval(optval, value) __bond_opt_init(optval, NULL, value) +#define bond_opt_initstr(optval, str) __bond_opt_init(optval, str, ULLONG_MAX) + +void bond_option_arp_ip_targets_clear(struct bonding *bond); + +#endif /* _BOND_OPTIONS_H */ diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c index 20a6ee25bb6..b215b479bb3 100644 --- a/drivers/net/bonding/bond_procfs.c +++ b/drivers/net/bonding/bond_procfs.c @@ -10,8 +10,9 @@ static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos)  	__acquires(&bond->lock)  {  	struct bonding *bond = seq->private; -	loff_t off = 0; +	struct list_head *iter;  	struct slave *slave; +	loff_t off = 0;  	/* make sure the bond won't be taken away */  	rcu_read_lock(); @@ -20,7 +21,7 @@ static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos)  	if (*pos == 0)  		return SEQ_START_TOKEN; -	bond_for_each_slave(bond, slave) +	bond_for_each_slave(bond, slave, iter)  		if (++off == *pos)  			return slave; @@ -30,17 +31,25 @@ static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos)  static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)  {  	struct bonding *bond = seq->private; -	struct slave *slave = v; +	struct list_head *iter; +	struct slave *slave; +	bool found = false;  	++*pos;  	if (v == SEQ_START_TOKEN)  		return bond_first_slave(bond); -	if (bond_is_last_slave(bond, slave)) +	if (bond_is_last_slave(bond, v))  		return NULL; -	slave = bond_next_slave(bond, slave); -	return slave; +	bond_for_each_slave(bond, slave, iter) { +		if (found) +			return slave; +		if (slave == v) +			found = true; +	} + +	return NULL;  }  static void bond_info_seq_stop(struct seq_file *seq, void *v) @@ -56,37 +65,42 @@ static void bond_info_seq_stop(struct seq_file *seq, void *v)  static void bond_info_show_master(struct seq_file *seq)  {  	struct bonding *bond = seq->private; +	const struct bond_opt_value *optval;  	struct slave *curr;  	int i; -	read_lock(&bond->curr_slave_lock); -	curr = bond->curr_active_slave; -	read_unlock(&bond->curr_slave_lock); +	curr = rcu_dereference(bond->curr_active_slave);  	seq_printf(seq, "Bonding Mode: %s", -		   bond_mode_name(bond->params.mode)); +		   bond_mode_name(BOND_MODE(bond))); -	if (bond->params.mode == BOND_MODE_ACTIVEBACKUP && -	    bond->params.fail_over_mac) -		seq_printf(seq, " (fail_over_mac %s)", -		   fail_over_mac_tbl[bond->params.fail_over_mac].modename); +	if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP && +	    bond->params.fail_over_mac) { +		optval = bond_opt_get_val(BOND_OPT_FAIL_OVER_MAC, +					  bond->params.fail_over_mac); +		seq_printf(seq, " (fail_over_mac %s)", optval->string); +	}  	seq_printf(seq, "\n"); -	if (bond->params.mode == BOND_MODE_XOR || -		bond->params.mode == BOND_MODE_8023AD) { +	if (BOND_MODE(bond) == BOND_MODE_XOR || +		BOND_MODE(bond) == BOND_MODE_8023AD) { +		optval = bond_opt_get_val(BOND_OPT_XMIT_HASH, +					  bond->params.xmit_policy);  		seq_printf(seq, "Transmit Hash Policy: %s (%d)\n", -			xmit_hashtype_tbl[bond->params.xmit_policy].modename, -			bond->params.xmit_policy); +			   optval->string, bond->params.xmit_policy);  	} -	if (USES_PRIMARY(bond->params.mode)) { +	if (bond_uses_primary(bond)) {  		seq_printf(seq, "Primary Slave: %s",  			   (bond->primary_slave) ?  			   bond->primary_slave->dev->name : "None"); -		if (bond->primary_slave) +		if (bond->primary_slave) { +			optval = bond_opt_get_val(BOND_OPT_PRIMARY_RESELECT, +						  bond->params.primary_reselect);  			seq_printf(seq, " (primary_reselect %s)", -		   pri_reselect_tbl[bond->params.primary_reselect].modename); +				   optval->string); +		}  		seq_printf(seq, "\nCurrently Active Slave: %s\n",  			   (curr) ? curr->dev->name : "None"); @@ -120,15 +134,17 @@ static void bond_info_show_master(struct seq_file *seq)  		seq_printf(seq, "\n");  	} -	if (bond->params.mode == BOND_MODE_8023AD) { +	if (BOND_MODE(bond) == BOND_MODE_8023AD) {  		struct ad_info ad_info;  		seq_puts(seq, "\n802.3ad info\n");  		seq_printf(seq, "LACP rate: %s\n",  			   (bond->params.lacp_fast) ? "fast" : "slow");  		seq_printf(seq, "Min links: %d\n", bond->params.min_links); +		optval = bond_opt_get_val(BOND_OPT_AD_SELECT, +					  bond->params.ad_select);  		seq_printf(seq, "Aggregator selection policy (ad_select): %s\n", -			   ad_select_tbl[bond->params.ad_select].modename); +			   optval->string);  		if (__bond_3ad_get_active_agg_info(bond, &ad_info)) {  			seq_printf(seq, "bond %s has no active aggregator\n", @@ -150,18 +166,6 @@ static void bond_info_show_master(struct seq_file *seq)  	}  } -static const char *bond_slave_link_status(s8 link) -{ -	static const char * const status[] = { -		[BOND_LINK_UP] = "up", -		[BOND_LINK_FAIL] = "going down", -		[BOND_LINK_DOWN] = "down", -		[BOND_LINK_BACK] = "going back", -	}; - -	return status[link]; -} -  static void bond_info_show_slave(struct seq_file *seq,  				 const struct slave *slave)  { @@ -184,9 +188,9 @@ static void bond_info_show_slave(struct seq_file *seq,  	seq_printf(seq, "Permanent HW addr: %pM\n", slave->perm_hwaddr); -	if (bond->params.mode == BOND_MODE_8023AD) { +	if (BOND_MODE(bond) == BOND_MODE_8023AD) {  		const struct aggregator *agg -			= SLAVE_AD_INFO(slave).port.aggregator; +			= SLAVE_AD_INFO(slave)->port.aggregator;  		if (agg)  			seq_printf(seq, "Aggregator ID: %d\n", @@ -248,8 +252,8 @@ void bond_create_proc_entry(struct bonding *bond)  						    S_IRUGO, bn->proc_dir,  						    &bond_info_fops, bond);  		if (bond->proc_entry == NULL) -			pr_warning("Warning: Cannot create /proc/net/%s/%s\n", -				   DRV_NAME, bond_dev->name); +			pr_warn("Warning: Cannot create /proc/net/%s/%s\n", +				DRV_NAME, bond_dev->name);  		else  			memcpy(bond->proc_file_name, bond_dev->name, IFNAMSIZ);  	} @@ -275,8 +279,8 @@ void __net_init bond_create_proc_dir(struct bond_net *bn)  	if (!bn->proc_dir) {  		bn->proc_dir = proc_mkdir(DRV_NAME, bn->net->proc_net);  		if (!bn->proc_dir) -			pr_warning("Warning: cannot create /proc/net/%s\n", -				   DRV_NAME); +			pr_warn("Warning: Cannot create /proc/net/%s\n", +				DRV_NAME);  	}  } diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index eeab40b01b7..daed52f68ce 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -12,8 +12,7 @@   * for more details.   *   * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA. + * with this program; if not, see <http://www.gnu.org/licenses/>.   *   * The full GNU General Public License is included in this distribution in the   * file called LICENSE. @@ -46,8 +45,7 @@  #define to_dev(obj)	container_of(obj, struct device, kobj)  #define to_bond(cd)	((struct bonding *)(netdev_priv(to_net_dev(cd)))) -/* - * "show" function for the bond_masters attribute. +/* "show" function for the bond_masters attribute.   * The class parameter is ignored.   */  static ssize_t bonding_show_bonds(struct class *cls, @@ -89,14 +87,12 @@ static struct net_device *bond_get_by_name(struct bond_net *bn, const char *ifna  	return NULL;  } -/* - * "store" function for the bond_masters attribute.  This is what +/* "store" function for the bond_masters attribute.  This is what   * creates and deletes entire bonds.   *   * The class parameter is ignored.   *   */ -  static ssize_t bonding_store_bonds(struct class *cls,  				   struct class_attribute *attr,  				   const char *buffer, size_t count) @@ -118,9 +114,9 @@ static ssize_t bonding_store_bonds(struct class *cls,  		rv = bond_create(bn->net, ifname);  		if (rv) {  			if (rv == -EEXIST) -				pr_info("%s already exists.\n", ifname); +				pr_info("%s already exists\n", ifname);  			else -				pr_info("%s creation failed.\n", ifname); +				pr_info("%s creation failed\n", ifname);  			res = rv;  		}  	} else if (command[0] == '-') { @@ -145,18 +141,10 @@ static ssize_t bonding_store_bonds(struct class *cls,  	return res;  err_no_cmd: -	pr_err("no command found in bonding_masters. Use +ifname or -ifname.\n"); +	pr_err("no command found in bonding_masters - use +ifname or -ifname\n");  	return -EPERM;  } -static const void *bonding_namespace(struct class *cls, -				     const struct class_attribute *attr) -{ -	const struct bond_net *bn = -		container_of(attr, struct bond_net, class_attr_bonding_masters); -	return bn->net; -} -  /* class attribute for bond_masters file.  This ends up in /sys/class/net */  static const struct class_attribute class_attr_bonding_masters = {  	.attr = { @@ -165,56 +153,40 @@ static const struct class_attribute class_attr_bonding_masters = {  	},  	.show = bonding_show_bonds,  	.store = bonding_store_bonds, -	.namespace = bonding_namespace,  }; -int bond_create_slave_symlinks(struct net_device *master, -			       struct net_device *slave) +/* Generic "store" method for bonding sysfs option setting */ +static ssize_t bonding_sysfs_store_option(struct device *d, +					  struct device_attribute *attr, +					  const char *buffer, size_t count)  { -	char linkname[IFNAMSIZ+7]; -	int ret = 0; - -	/* first, create a link from the slave back to the master */ -	ret = sysfs_create_link(&(slave->dev.kobj), &(master->dev.kobj), -				"master"); -	if (ret) -		return ret; -	/* next, create a link from the master to the slave */ -	sprintf(linkname, "slave_%s", slave->name); -	ret = sysfs_create_link(&(master->dev.kobj), &(slave->dev.kobj), -				linkname); - -	/* free the master link created earlier in case of error */ -	if (ret) -		sysfs_remove_link(&(slave->dev.kobj), "master"); - -	return ret; - -} +	struct bonding *bond = to_bond(d); +	const struct bond_option *opt; +	int ret; -void bond_destroy_slave_symlinks(struct net_device *master, -				 struct net_device *slave) -{ -	char linkname[IFNAMSIZ+7]; +	opt = bond_opt_get_by_name(attr->attr.name); +	if (WARN_ON(!opt)) +		return -ENOENT; +	ret = bond_opt_tryset_rtnl(bond, opt->id, (char *)buffer); +	if (!ret) +		ret = count; -	sysfs_remove_link(&(slave->dev.kobj), "master"); -	sprintf(linkname, "slave_%s", slave->name); -	sysfs_remove_link(&(master->dev.kobj), linkname); +	return ret;  } - -/* - * Show the slaves in the current bond. - */ +/* Show the slaves in the current bond. */  static ssize_t bonding_show_slaves(struct device *d,  				   struct device_attribute *attr, char *buf)  {  	struct bonding *bond = to_bond(d); +	struct list_head *iter;  	struct slave *slave;  	int res = 0; -	read_lock(&bond->lock); -	bond_for_each_slave(bond, slave) { +	if (!rtnl_trylock()) +		return restart_syscall(); + +	bond_for_each_slave(bond, slave, iter) {  		if (res > (PAGE_SIZE - IFNAMSIZ)) {  			/* not enough space for another interface name */  			if ((PAGE_SIZE - res) > 10) @@ -224,338 +196,95 @@ static ssize_t bonding_show_slaves(struct device *d,  		}  		res += sprintf(buf + res, "%s ", slave->dev->name);  	} -	read_unlock(&bond->lock); -	if (res) -		buf[res-1] = '\n'; /* eat the leftover space */ - -	return res; -} - -/* - * Set the slaves in the current bond. - * This is supposed to be only thin wrapper for bond_enslave and bond_release. - * All hard work should be done there. - */ -static ssize_t bonding_store_slaves(struct device *d, -				    struct device_attribute *attr, -				    const char *buffer, size_t count) -{ -	char command[IFNAMSIZ + 1] = { 0, }; -	char *ifname; -	int res, ret = count; -	struct net_device *dev; -	struct bonding *bond = to_bond(d); - -	if (!rtnl_trylock()) -		return restart_syscall(); - -	sscanf(buffer, "%16s", command); /* IFNAMSIZ*/ -	ifname = command + 1; -	if ((strlen(command) <= 1) || -	    !dev_valid_name(ifname)) -		goto err_no_cmd; - -	dev = __dev_get_by_name(dev_net(bond->dev), ifname); -	if (!dev) { -		pr_info("%s: Interface %s does not exist!\n", -			bond->dev->name, ifname); -		ret = -ENODEV; -		goto out; -	} - -	switch (command[0]) { -	case '+': -		pr_info("%s: Adding slave %s.\n", bond->dev->name, dev->name); -		res = bond_enslave(bond->dev, dev); -		break; -	case '-': -		pr_info("%s: Removing slave %s.\n", bond->dev->name, dev->name); -		res = bond_release(bond->dev, dev); -		break; - -	default: -		goto err_no_cmd; -	} +	rtnl_unlock();  	if (res) -		ret = res; -	goto out; - -err_no_cmd: -	pr_err("no command found in slaves file for bond %s. Use +ifname or -ifname.\n", -	       bond->dev->name); -	ret = -EPERM; +		buf[res-1] = '\n'; /* eat the leftover space */ -out: -	rtnl_unlock(); -	return ret; +	return res;  } -  static DEVICE_ATTR(slaves, S_IRUGO | S_IWUSR, bonding_show_slaves, -		   bonding_store_slaves); +		   bonding_sysfs_store_option); -/* - * Show and set the bonding mode.  The bond interface must be down to - * change the mode. - */ +/* Show the bonding mode. */  static ssize_t bonding_show_mode(struct device *d,  				 struct device_attribute *attr, char *buf)  {  	struct bonding *bond = to_bond(d); +	const struct bond_opt_value *val; -	return sprintf(buf, "%s %d\n", -			bond_mode_tbl[bond->params.mode].modename, -			bond->params.mode); -} - -static ssize_t bonding_store_mode(struct device *d, -				  struct device_attribute *attr, -				  const char *buf, size_t count) -{ -	int new_value, ret = count; -	struct bonding *bond = to_bond(d); - -	if (!rtnl_trylock()) -		return restart_syscall(); - -	if (bond->dev->flags & IFF_UP) { -		pr_err("unable to update mode of %s because interface is up.\n", -		       bond->dev->name); -		ret = -EPERM; -		goto out; -	} - -	if (!list_empty(&bond->slave_list)) { -		pr_err("unable to update mode of %s because it has slaves.\n", -			bond->dev->name); -		ret = -EPERM; -		goto out; -	} - -	new_value = bond_parse_parm(buf, bond_mode_tbl); -	if (new_value < 0)  { -		pr_err("%s: Ignoring invalid mode value %.*s.\n", -		       bond->dev->name, (int)strlen(buf) - 1, buf); -		ret = -EINVAL; -		goto out; -	} -	if ((new_value == BOND_MODE_ALB || -	     new_value == BOND_MODE_TLB) && -	    bond->params.arp_interval) { -		pr_err("%s: %s mode is incompatible with arp monitoring.\n", -		       bond->dev->name, bond_mode_tbl[new_value].modename); -		ret = -EINVAL; -		goto out; -	} +	val = bond_opt_get_val(BOND_OPT_MODE, BOND_MODE(bond)); -	/* don't cache arp_validate between modes */ -	bond->params.arp_validate = BOND_ARP_VALIDATE_NONE; -	bond->params.mode = new_value; -	bond_set_mode_ops(bond, bond->params.mode); -	pr_info("%s: setting mode to %s (%d).\n", -		bond->dev->name, bond_mode_tbl[new_value].modename, -		new_value); -out: -	rtnl_unlock(); -	return ret; +	return sprintf(buf, "%s %d\n", val->string, BOND_MODE(bond));  }  static DEVICE_ATTR(mode, S_IRUGO | S_IWUSR, -		   bonding_show_mode, bonding_store_mode); +		   bonding_show_mode, bonding_sysfs_store_option); -/* - * Show and set the bonding transmit hash method. - */ +/* Show the bonding transmit hash method. */  static ssize_t bonding_show_xmit_hash(struct device *d,  				      struct device_attribute *attr,  				      char *buf)  {  	struct bonding *bond = to_bond(d); +	const struct bond_opt_value *val; -	return sprintf(buf, "%s %d\n", -		       xmit_hashtype_tbl[bond->params.xmit_policy].modename, -		       bond->params.xmit_policy); -} - -static ssize_t bonding_store_xmit_hash(struct device *d, -				       struct device_attribute *attr, -				       const char *buf, size_t count) -{ -	int new_value, ret = count; -	struct bonding *bond = to_bond(d); +	val = bond_opt_get_val(BOND_OPT_XMIT_HASH, bond->params.xmit_policy); -	new_value = bond_parse_parm(buf, xmit_hashtype_tbl); -	if (new_value < 0)  { -		pr_err("%s: Ignoring invalid xmit hash policy value %.*s.\n", -		       bond->dev->name, -		       (int)strlen(buf) - 1, buf); -		ret = -EINVAL; -	} else { -		bond->params.xmit_policy = new_value; -		bond_set_mode_ops(bond, bond->params.mode); -		pr_info("%s: setting xmit hash policy to %s (%d).\n", -			bond->dev->name, -			xmit_hashtype_tbl[new_value].modename, new_value); -	} - -	return ret; +	return sprintf(buf, "%s %d\n", val->string, bond->params.xmit_policy);  }  static DEVICE_ATTR(xmit_hash_policy, S_IRUGO | S_IWUSR, -		   bonding_show_xmit_hash, bonding_store_xmit_hash); +		   bonding_show_xmit_hash, bonding_sysfs_store_option); -/* - * Show and set arp_validate. - */ +/* Show arp_validate. */  static ssize_t bonding_show_arp_validate(struct device *d,  					 struct device_attribute *attr,  					 char *buf)  {  	struct bonding *bond = to_bond(d); +	const struct bond_opt_value *val; -	return sprintf(buf, "%s %d\n", -		       arp_validate_tbl[bond->params.arp_validate].modename, -		       bond->params.arp_validate); -} - -static ssize_t bonding_store_arp_validate(struct device *d, -					  struct device_attribute *attr, -					  const char *buf, size_t count) -{ -	struct bonding *bond = to_bond(d); -	int new_value, ret = count; +	val = bond_opt_get_val(BOND_OPT_ARP_VALIDATE, +			       bond->params.arp_validate); -	if (!rtnl_trylock()) -		return restart_syscall(); -	new_value = bond_parse_parm(buf, arp_validate_tbl); -	if (new_value < 0) { -		pr_err("%s: Ignoring invalid arp_validate value %s\n", -		       bond->dev->name, buf); -		ret = -EINVAL; -		goto out; -	} -	if (bond->params.mode != BOND_MODE_ACTIVEBACKUP) { -		pr_err("%s: arp_validate only supported in active-backup mode.\n", -		       bond->dev->name); -		ret = -EINVAL; -		goto out; -	} -	pr_info("%s: setting arp_validate to %s (%d).\n", -		bond->dev->name, arp_validate_tbl[new_value].modename, -		new_value); - -	if (bond->dev->flags & IFF_UP) { -		if (!new_value) -			bond->recv_probe = NULL; -		else if (bond->params.arp_interval) -			bond->recv_probe = bond_arp_rcv; -	} -	bond->params.arp_validate = new_value; -out: -	rtnl_unlock(); - -	return ret; +	return sprintf(buf, "%s %d\n", val->string, bond->params.arp_validate);  } -  static DEVICE_ATTR(arp_validate, S_IRUGO | S_IWUSR, bonding_show_arp_validate, -		   bonding_store_arp_validate); -/* - * Show and set arp_all_targets. - */ +		   bonding_sysfs_store_option); + +/* Show arp_all_targets. */  static ssize_t bonding_show_arp_all_targets(struct device *d,  					 struct device_attribute *attr,  					 char *buf)  {  	struct bonding *bond = to_bond(d); -	int value = bond->params.arp_all_targets; - -	return sprintf(buf, "%s %d\n", arp_all_targets_tbl[value].modename, -		       value); -} +	const struct bond_opt_value *val; -static ssize_t bonding_store_arp_all_targets(struct device *d, -					  struct device_attribute *attr, -					  const char *buf, size_t count) -{ -	struct bonding *bond = to_bond(d); -	int new_value; - -	new_value = bond_parse_parm(buf, arp_all_targets_tbl); -	if (new_value < 0) { -		pr_err("%s: Ignoring invalid arp_all_targets value %s\n", -		       bond->dev->name, buf); -		return -EINVAL; -	} -	pr_info("%s: setting arp_all_targets to %s (%d).\n", -		bond->dev->name, arp_all_targets_tbl[new_value].modename, -		new_value); - -	bond->params.arp_all_targets = new_value; - -	return count; +	val = bond_opt_get_val(BOND_OPT_ARP_ALL_TARGETS, +			       bond->params.arp_all_targets); +	return sprintf(buf, "%s %d\n", +		       val->string, bond->params.arp_all_targets);  } -  static DEVICE_ATTR(arp_all_targets, S_IRUGO | S_IWUSR, -		   bonding_show_arp_all_targets, bonding_store_arp_all_targets); +		   bonding_show_arp_all_targets, bonding_sysfs_store_option); -/* - * Show and store fail_over_mac.  User only allowed to change the - * value when there are no slaves. - */ +/* Show fail_over_mac. */  static ssize_t bonding_show_fail_over_mac(struct device *d,  					  struct device_attribute *attr,  					  char *buf)  {  	struct bonding *bond = to_bond(d); +	const struct bond_opt_value *val; -	return sprintf(buf, "%s %d\n", -		       fail_over_mac_tbl[bond->params.fail_over_mac].modename, -		       bond->params.fail_over_mac); -} - -static ssize_t bonding_store_fail_over_mac(struct device *d, -					   struct device_attribute *attr, -					   const char *buf, size_t count) -{ -	int new_value, ret = count; -	struct bonding *bond = to_bond(d); - -	if (!rtnl_trylock()) -		return restart_syscall(); - -	if (!list_empty(&bond->slave_list)) { -		pr_err("%s: Can't alter fail_over_mac with slaves in bond.\n", -		       bond->dev->name); -		ret = -EPERM; -		goto out; -	} - -	new_value = bond_parse_parm(buf, fail_over_mac_tbl); -	if (new_value < 0) { -		pr_err("%s: Ignoring invalid fail_over_mac value %s.\n", -		       bond->dev->name, buf); -		ret = -EINVAL; -		goto out; -	} - -	bond->params.fail_over_mac = new_value; -	pr_info("%s: Setting fail_over_mac to %s (%d).\n", -		bond->dev->name, fail_over_mac_tbl[new_value].modename, -		new_value); +	val = bond_opt_get_val(BOND_OPT_FAIL_OVER_MAC, +			       bond->params.fail_over_mac); -out: -	rtnl_unlock(); -	return ret; +	return sprintf(buf, "%s %d\n", val->string, bond->params.fail_over_mac);  } -  static DEVICE_ATTR(fail_over_mac, S_IRUGO | S_IWUSR, -		   bonding_show_fail_over_mac, bonding_store_fail_over_mac); +		   bonding_show_fail_over_mac, bonding_sysfs_store_option); -/* - * Show and set the arp timer interval.  There are two tricky bits - * here.  First, if ARP monitoring is activated, then we must disable - * MII monitoring.  Second, if the ARP timer isn't running, we must - * start it. - */ +/* Show the arp timer interval. */  static ssize_t bonding_show_arp_interval(struct device *d,  					 struct device_attribute *attr,  					 char *buf) @@ -564,82 +293,16 @@ static ssize_t bonding_show_arp_interval(struct device *d,  	return sprintf(buf, "%d\n", bond->params.arp_interval);  } - -static ssize_t bonding_store_arp_interval(struct device *d, -					  struct device_attribute *attr, -					  const char *buf, size_t count) -{ -	struct bonding *bond = to_bond(d); -	int new_value, ret = count; - -	if (!rtnl_trylock()) -		return restart_syscall(); -	if (sscanf(buf, "%d", &new_value) != 1) { -		pr_err("%s: no arp_interval value specified.\n", -		       bond->dev->name); -		ret = -EINVAL; -		goto out; -	} -	if (new_value < 0) { -		pr_err("%s: Invalid arp_interval value %d not in range 0-%d; rejected.\n", -		       bond->dev->name, new_value, INT_MAX); -		ret = -EINVAL; -		goto out; -	} -	if (bond->params.mode == BOND_MODE_ALB || -	    bond->params.mode == BOND_MODE_TLB) { -		pr_info("%s: ARP monitoring cannot be used with ALB/TLB. Only MII monitoring is supported on %s.\n", -			bond->dev->name, bond->dev->name); -		ret = -EINVAL; -		goto out; -	} -	pr_info("%s: Setting ARP monitoring interval to %d.\n", -		bond->dev->name, new_value); -	bond->params.arp_interval = new_value; -	if (new_value) { -		if (bond->params.miimon) { -			pr_info("%s: ARP monitoring cannot be used with MII monitoring. %s Disabling MII monitoring.\n", -				bond->dev->name, bond->dev->name); -			bond->params.miimon = 0; -		} -		if (!bond->params.arp_targets[0]) -			pr_info("%s: ARP monitoring has been set up, but no ARP targets have been specified.\n", -				bond->dev->name); -	} -	if (bond->dev->flags & IFF_UP) { -		/* If the interface is up, we may need to fire off -		 * the ARP timer.  If the interface is down, the -		 * timer will get fired off when the open function -		 * is called. -		 */ -		if (!new_value) { -			if (bond->params.arp_validate) -				bond->recv_probe = NULL; -			cancel_delayed_work_sync(&bond->arp_work); -		} else { -			/* arp_validate can be set only in active-backup mode */ -			if (bond->params.arp_validate) -				bond->recv_probe = bond_arp_rcv; -			cancel_delayed_work_sync(&bond->mii_work); -			queue_delayed_work(bond->wq, &bond->arp_work, 0); -		} -	} -out: -	rtnl_unlock(); -	return ret; -}  static DEVICE_ATTR(arp_interval, S_IRUGO | S_IWUSR, -		   bonding_show_arp_interval, bonding_store_arp_interval); +		   bonding_show_arp_interval, bonding_sysfs_store_option); -/* - * Show and set the arp targets. - */ +/* Show the arp targets. */  static ssize_t bonding_show_arp_targets(struct device *d,  					struct device_attribute *attr,  					char *buf)  { -	int i, res = 0;  	struct bonding *bond = to_bond(d); +	int i, res = 0;  	for (i = 0; i < BOND_MAX_ARP_TARGETS; i++) {  		if (bond->params.arp_targets[i]) @@ -648,101 +311,13 @@ static ssize_t bonding_show_arp_targets(struct device *d,  	}  	if (res)  		buf[res-1] = '\n'; /* eat the leftover space */ -	return res; -} - -static ssize_t bonding_store_arp_targets(struct device *d, -					 struct device_attribute *attr, -					 const char *buf, size_t count) -{ -	struct bonding *bond = to_bond(d); -	struct slave *slave; -	__be32 newtarget, *targets; -	unsigned long *targets_rx; -	int ind, i, j, ret = -EINVAL; - -	targets = bond->params.arp_targets; -	newtarget = in_aton(buf + 1); -	/* look for adds */ -	if (buf[0] == '+') { -		if ((newtarget == 0) || (newtarget == htonl(INADDR_BROADCAST))) { -			pr_err("%s: invalid ARP target %pI4 specified for addition\n", -			       bond->dev->name, &newtarget); -			goto out; -		} - -		if (bond_get_targets_ip(targets, newtarget) != -1) { /* dup */ -			pr_err("%s: ARP target %pI4 is already present\n", -			       bond->dev->name, &newtarget); -			goto out; -		} - -		ind = bond_get_targets_ip(targets, 0); /* first free slot */ -		if (ind == -1) { -			pr_err("%s: ARP target table is full!\n", -			       bond->dev->name); -			goto out; -		} - -		pr_info("%s: adding ARP target %pI4.\n", bond->dev->name, -			 &newtarget); -		/* not to race with bond_arp_rcv */ -		write_lock_bh(&bond->lock); -		bond_for_each_slave(bond, slave) -			slave->target_last_arp_rx[ind] = jiffies; -		targets[ind] = newtarget; -		write_unlock_bh(&bond->lock); -	} else if (buf[0] == '-')	{ -		if ((newtarget == 0) || (newtarget == htonl(INADDR_BROADCAST))) { -			pr_err("%s: invalid ARP target %pI4 specified for removal\n", -			       bond->dev->name, &newtarget); -			goto out; -		} - -		ind = bond_get_targets_ip(targets, newtarget); -		if (ind == -1) { -			pr_err("%s: unable to remove nonexistent ARP target %pI4.\n", -				bond->dev->name, &newtarget); -			goto out; -		} - -		if (ind == 0 && !targets[1] && bond->params.arp_interval) -			pr_warn("%s: removing last arp target with arp_interval on\n", -				bond->dev->name); - -		pr_info("%s: removing ARP target %pI4.\n", bond->dev->name, -			&newtarget); -		write_lock_bh(&bond->lock); -		bond_for_each_slave(bond, slave) { -			targets_rx = slave->target_last_arp_rx; -			j = ind; -			for (; (j < BOND_MAX_ARP_TARGETS-1) && targets[j+1]; j++) -				targets_rx[j] = targets_rx[j+1]; -			targets_rx[j] = 0; -		} -		for (i = ind; (i < BOND_MAX_ARP_TARGETS-1) && targets[i+1]; i++) -			targets[i] = targets[i+1]; -		targets[i] = 0; -		write_unlock_bh(&bond->lock); -	} else { -		pr_err("no command found in arp_ip_targets file for bond %s. Use +<addr> or -<addr>.\n", -		       bond->dev->name); -		ret = -EPERM; -		goto out; -	} - -	ret = count; -out: -	return ret; +	return res;  } -static DEVICE_ATTR(arp_ip_target, S_IRUGO | S_IWUSR , bonding_show_arp_targets, bonding_store_arp_targets); +static DEVICE_ATTR(arp_ip_target, S_IRUGO | S_IWUSR, +		   bonding_show_arp_targets, bonding_sysfs_store_option); -/* - * Show and set the up and down delays.  These must be multiples of the - * MII monitoring value, and are stored internally as the multiplier. - * Thus, we must translate to MS for the real world. - */ +/* Show the up and down delays. */  static ssize_t bonding_show_downdelay(struct device *d,  				      struct device_attribute *attr,  				      char *buf) @@ -751,51 +326,8 @@ static ssize_t bonding_show_downdelay(struct device *d,  	return sprintf(buf, "%d\n", bond->params.downdelay * bond->params.miimon);  } - -static ssize_t bonding_store_downdelay(struct device *d, -				       struct device_attribute *attr, -				       const char *buf, size_t count) -{ -	int new_value, ret = count; -	struct bonding *bond = to_bond(d); - -	if (!(bond->params.miimon)) { -		pr_err("%s: Unable to set down delay as MII monitoring is disabled\n", -		       bond->dev->name); -		ret = -EPERM; -		goto out; -	} - -	if (sscanf(buf, "%d", &new_value) != 1) { -		pr_err("%s: no down delay value specified.\n", bond->dev->name); -		ret = -EINVAL; -		goto out; -	} -	if (new_value < 0) { -		pr_err("%s: Invalid down delay value %d not in range %d-%d; rejected.\n", -		       bond->dev->name, new_value, 0, INT_MAX); -		ret = -EINVAL; -		goto out; -	} else { -		if ((new_value % bond->params.miimon) != 0) { -			pr_warning("%s: Warning: down delay (%d) is not a multiple of miimon (%d), delay rounded to %d ms\n", -				   bond->dev->name, new_value, -				   bond->params.miimon, -				   (new_value / bond->params.miimon) * -				   bond->params.miimon); -		} -		bond->params.downdelay = new_value / bond->params.miimon; -		pr_info("%s: Setting down delay to %d.\n", -			bond->dev->name, -			bond->params.downdelay * bond->params.miimon); - -	} - -out: -	return ret; -}  static DEVICE_ATTR(downdelay, S_IRUGO | S_IWUSR, -		   bonding_show_downdelay, bonding_store_downdelay); +		   bonding_show_downdelay, bonding_sysfs_store_option);  static ssize_t bonding_show_updelay(struct device *d,  				    struct device_attribute *attr, @@ -806,111 +338,23 @@ static ssize_t bonding_show_updelay(struct device *d,  	return sprintf(buf, "%d\n", bond->params.updelay * bond->params.miimon);  } - -static ssize_t bonding_store_updelay(struct device *d, -				     struct device_attribute *attr, -				     const char *buf, size_t count) -{ -	int new_value, ret = count; -	struct bonding *bond = to_bond(d); - -	if (!(bond->params.miimon)) { -		pr_err("%s: Unable to set up delay as MII monitoring is disabled\n", -		       bond->dev->name); -		ret = -EPERM; -		goto out; -	} - -	if (sscanf(buf, "%d", &new_value) != 1) { -		pr_err("%s: no up delay value specified.\n", -		       bond->dev->name); -		ret = -EINVAL; -		goto out; -	} -	if (new_value < 0) { -		pr_err("%s: Invalid up delay value %d not in range %d-%d; rejected.\n", -		       bond->dev->name, new_value, 0, INT_MAX); -		ret = -EINVAL; -		goto out; -	} else { -		if ((new_value % bond->params.miimon) != 0) { -			pr_warning("%s: Warning: up delay (%d) is not a multiple of miimon (%d), updelay rounded to %d ms\n", -				   bond->dev->name, new_value, -				   bond->params.miimon, -				   (new_value / bond->params.miimon) * -				   bond->params.miimon); -		} -		bond->params.updelay = new_value / bond->params.miimon; -		pr_info("%s: Setting up delay to %d.\n", -			bond->dev->name, -			bond->params.updelay * bond->params.miimon); -	} - -out: -	return ret; -}  static DEVICE_ATTR(updelay, S_IRUGO | S_IWUSR, -		   bonding_show_updelay, bonding_store_updelay); +		   bonding_show_updelay, bonding_sysfs_store_option); -/* - * Show and set the LACP interval.  Interface must be down, and the mode - * must be set to 802.3ad mode. - */ +/* Show the LACP interval. */  static ssize_t bonding_show_lacp(struct device *d,  				 struct device_attribute *attr,  				 char *buf)  {  	struct bonding *bond = to_bond(d); +	const struct bond_opt_value *val; -	return sprintf(buf, "%s %d\n", -		bond_lacp_tbl[bond->params.lacp_fast].modename, -		bond->params.lacp_fast); -} - -static ssize_t bonding_store_lacp(struct device *d, -				  struct device_attribute *attr, -				  const char *buf, size_t count) -{ -	struct bonding *bond = to_bond(d); -	int new_value, ret = count; +	val = bond_opt_get_val(BOND_OPT_LACP_RATE, bond->params.lacp_fast); -	if (!rtnl_trylock()) -		return restart_syscall(); - -	if (bond->dev->flags & IFF_UP) { -		pr_err("%s: Unable to update LACP rate because interface is up.\n", -		       bond->dev->name); -		ret = -EPERM; -		goto out; -	} - -	if (bond->params.mode != BOND_MODE_8023AD) { -		pr_err("%s: Unable to update LACP rate because bond is not in 802.3ad mode.\n", -		       bond->dev->name); -		ret = -EPERM; -		goto out; -	} - -	new_value = bond_parse_parm(buf, bond_lacp_tbl); - -	if ((new_value == 1) || (new_value == 0)) { -		bond->params.lacp_fast = new_value; -		bond_3ad_update_lacp_rate(bond); -		pr_info("%s: Setting LACP rate to %s (%d).\n", -			bond->dev->name, bond_lacp_tbl[new_value].modename, -			new_value); -	} else { -		pr_err("%s: Ignoring invalid LACP rate value %.*s.\n", -		       bond->dev->name, (int)strlen(buf) - 1, buf); -		ret = -EINVAL; -	} -out: -	rtnl_unlock(); - -	return ret; +	return sprintf(buf, "%s %d\n", val->string, bond->params.lacp_fast);  }  static DEVICE_ATTR(lacp_rate, S_IRUGO | S_IWUSR, -		   bonding_show_lacp, bonding_store_lacp); +		   bonding_show_lacp, bonding_sysfs_store_option);  static ssize_t bonding_show_min_links(struct device *d,  				      struct device_attribute *attr, @@ -918,79 +362,26 @@ static ssize_t bonding_show_min_links(struct device *d,  {  	struct bonding *bond = to_bond(d); -	return sprintf(buf, "%d\n", bond->params.min_links); -} - -static ssize_t bonding_store_min_links(struct device *d, -				       struct device_attribute *attr, -				       const char *buf, size_t count) -{ -	struct bonding *bond = to_bond(d); -	int ret; -	unsigned int new_value; - -	ret = kstrtouint(buf, 0, &new_value); -	if (ret < 0) { -		pr_err("%s: Ignoring invalid min links value %s.\n", -		       bond->dev->name, buf); -		return ret; -	} - -	pr_info("%s: Setting min links value to %u\n", -		bond->dev->name, new_value); -	bond->params.min_links = new_value; -	return count; +	return sprintf(buf, "%u\n", bond->params.min_links);  }  static DEVICE_ATTR(min_links, S_IRUGO | S_IWUSR, -		   bonding_show_min_links, bonding_store_min_links); +		   bonding_show_min_links, bonding_sysfs_store_option);  static ssize_t bonding_show_ad_select(struct device *d,  				      struct device_attribute *attr,  				      char *buf)  {  	struct bonding *bond = to_bond(d); +	const struct bond_opt_value *val; -	return sprintf(buf, "%s %d\n", -		ad_select_tbl[bond->params.ad_select].modename, -		bond->params.ad_select); -} - - -static ssize_t bonding_store_ad_select(struct device *d, -				       struct device_attribute *attr, -				       const char *buf, size_t count) -{ -	int new_value, ret = count; -	struct bonding *bond = to_bond(d); +	val = bond_opt_get_val(BOND_OPT_AD_SELECT, bond->params.ad_select); -	if (bond->dev->flags & IFF_UP) { -		pr_err("%s: Unable to update ad_select because interface is up.\n", -		       bond->dev->name); -		ret = -EPERM; -		goto out; -	} - -	new_value = bond_parse_parm(buf, ad_select_tbl); - -	if (new_value != -1) { -		bond->params.ad_select = new_value; -		pr_info("%s: Setting ad_select to %s (%d).\n", -			bond->dev->name, ad_select_tbl[new_value].modename, -			new_value); -	} else { -		pr_err("%s: Ignoring invalid ad_select value %.*s.\n", -		       bond->dev->name, (int)strlen(buf) - 1, buf); -		ret = -EINVAL; -	} -out: -	return ret; +	return sprintf(buf, "%s %d\n", val->string, bond->params.ad_select);  }  static DEVICE_ATTR(ad_select, S_IRUGO | S_IWUSR, -		   bonding_show_ad_select, bonding_store_ad_select); +		   bonding_show_ad_select, bonding_sysfs_store_option); -/* - * Show and set the number of peer notifications to send after a failover event. - */ +/* Show and set the number of peer notifications to send after a failover event. */  static ssize_t bonding_show_num_peer_notif(struct device *d,  					   struct device_attribute *attr,  					   char *buf) @@ -1004,20 +395,20 @@ static ssize_t bonding_store_num_peer_notif(struct device *d,  					    const char *buf, size_t count)  {  	struct bonding *bond = to_bond(d); -	int err = kstrtou8(buf, 10, &bond->params.num_peer_notif); -	return err ? err : count; +	int ret; + +	ret = bond_opt_tryset_rtnl(bond, BOND_OPT_NUM_PEER_NOTIF, (char *)buf); +	if (!ret) +		ret = count; + +	return ret;  }  static DEVICE_ATTR(num_grat_arp, S_IRUGO | S_IWUSR,  		   bonding_show_num_peer_notif, bonding_store_num_peer_notif);  static DEVICE_ATTR(num_unsol_na, S_IRUGO | S_IWUSR,  		   bonding_show_num_peer_notif, bonding_store_num_peer_notif); -/* - * Show and set the MII monitor interval.  There are two tricky bits - * here.  First, if MII monitoring is activated, then we must disable - * ARP monitoring.  Second, if the timer isn't running, we must - * start it. - */ +/* Show the MII monitor interval. */  static ssize_t bonding_show_miimon(struct device *d,  				   struct device_attribute *attr,  				   char *buf) @@ -1026,73 +417,10 @@ static ssize_t bonding_show_miimon(struct device *d,  	return sprintf(buf, "%d\n", bond->params.miimon);  } - -static ssize_t bonding_store_miimon(struct device *d, -				    struct device_attribute *attr, -				    const char *buf, size_t count) -{ -	int new_value, ret = count; -	struct bonding *bond = to_bond(d); - -	if (!rtnl_trylock()) -		return restart_syscall(); -	if (sscanf(buf, "%d", &new_value) != 1) { -		pr_err("%s: no miimon value specified.\n", -		       bond->dev->name); -		ret = -EINVAL; -		goto out; -	} -	if (new_value < 0) { -		pr_err("%s: Invalid miimon value %d not in range %d-%d; rejected.\n", -		       bond->dev->name, new_value, 0, INT_MAX); -		ret = -EINVAL; -		goto out; -	} -	pr_info("%s: Setting MII monitoring interval to %d.\n", -		bond->dev->name, new_value); -	bond->params.miimon = new_value; -	if (bond->params.updelay) -		pr_info("%s: Note: Updating updelay (to %d) since it is a multiple of the miimon value.\n", -			bond->dev->name, -			bond->params.updelay * bond->params.miimon); -	if (bond->params.downdelay) -		pr_info("%s: Note: Updating downdelay (to %d) since it is a multiple of the miimon value.\n", -			bond->dev->name, -			bond->params.downdelay * bond->params.miimon); -	if (new_value && bond->params.arp_interval) { -		pr_info("%s: MII monitoring cannot be used with ARP monitoring. Disabling ARP monitoring...\n", -			bond->dev->name); -		bond->params.arp_interval = 0; -		if (bond->params.arp_validate) -			bond->params.arp_validate = BOND_ARP_VALIDATE_NONE; -	} -	if (bond->dev->flags & IFF_UP) { -		/* If the interface is up, we may need to fire off -		 * the MII timer. If the interface is down, the -		 * timer will get fired off when the open function -		 * is called. -		 */ -		if (!new_value) { -			cancel_delayed_work_sync(&bond->mii_work); -		} else { -			cancel_delayed_work_sync(&bond->arp_work); -			queue_delayed_work(bond->wq, &bond->mii_work, 0); -		} -	} -out: -	rtnl_unlock(); -	return ret; -}  static DEVICE_ATTR(miimon, S_IRUGO | S_IWUSR, -		   bonding_show_miimon, bonding_store_miimon); +		   bonding_show_miimon, bonding_sysfs_store_option); -/* - * Show and set the primary slave.  The store function is much - * simpler than bonding_store_slaves function because it only needs to - * handle one interface name. - * The bond must be a mode that supports a primary for this be - * set. - */ +/* Show the primary slave. */  static ssize_t bonding_show_primary(struct device *d,  				    struct device_attribute *attr,  				    char *buf) @@ -1105,123 +433,27 @@ static ssize_t bonding_show_primary(struct device *d,  	return count;  } - -static ssize_t bonding_store_primary(struct device *d, -				     struct device_attribute *attr, -				     const char *buf, size_t count) -{ -	struct bonding *bond = to_bond(d); -	char ifname[IFNAMSIZ]; -	struct slave *slave; - -	if (!rtnl_trylock()) -		return restart_syscall(); -	block_netpoll_tx(); -	read_lock(&bond->lock); -	write_lock_bh(&bond->curr_slave_lock); - -	if (!USES_PRIMARY(bond->params.mode)) { -		pr_info("%s: Unable to set primary slave; %s is in mode %d\n", -			bond->dev->name, bond->dev->name, bond->params.mode); -		goto out; -	} - -	sscanf(buf, "%15s", ifname); /* IFNAMSIZ */ - -	/* check to see if we are clearing primary */ -	if (!strlen(ifname) || buf[0] == '\n') { -		pr_info("%s: Setting primary slave to None.\n", -			bond->dev->name); -		bond->primary_slave = NULL; -		memset(bond->params.primary, 0, sizeof(bond->params.primary)); -		bond_select_active_slave(bond); -		goto out; -	} - -	bond_for_each_slave(bond, slave) { -		if (strncmp(slave->dev->name, ifname, IFNAMSIZ) == 0) { -			pr_info("%s: Setting %s as primary slave.\n", -				bond->dev->name, slave->dev->name); -			bond->primary_slave = slave; -			strcpy(bond->params.primary, slave->dev->name); -			bond_select_active_slave(bond); -			goto out; -		} -	} - -	strncpy(bond->params.primary, ifname, IFNAMSIZ); -	bond->params.primary[IFNAMSIZ - 1] = 0; - -	pr_info("%s: Recording %s as primary, " -		"but it has not been enslaved to %s yet.\n", -		bond->dev->name, ifname, bond->dev->name); -out: -	write_unlock_bh(&bond->curr_slave_lock); -	read_unlock(&bond->lock); -	unblock_netpoll_tx(); -	rtnl_unlock(); - -	return count; -}  static DEVICE_ATTR(primary, S_IRUGO | S_IWUSR, -		   bonding_show_primary, bonding_store_primary); +		   bonding_show_primary, bonding_sysfs_store_option); -/* - * Show and set the primary_reselect flag. - */ +/* Show the primary_reselect flag. */  static ssize_t bonding_show_primary_reselect(struct device *d,  					     struct device_attribute *attr,  					     char *buf)  {  	struct bonding *bond = to_bond(d); +	const struct bond_opt_value *val; -	return sprintf(buf, "%s %d\n", -		       pri_reselect_tbl[bond->params.primary_reselect].modename, -		       bond->params.primary_reselect); -} +	val = bond_opt_get_val(BOND_OPT_PRIMARY_RESELECT, +			       bond->params.primary_reselect); -static ssize_t bonding_store_primary_reselect(struct device *d, -					      struct device_attribute *attr, -					      const char *buf, size_t count) -{ -	int new_value, ret = count; -	struct bonding *bond = to_bond(d); - -	if (!rtnl_trylock()) -		return restart_syscall(); - -	new_value = bond_parse_parm(buf, pri_reselect_tbl); -	if (new_value < 0)  { -		pr_err("%s: Ignoring invalid primary_reselect value %.*s.\n", -		       bond->dev->name, -		       (int) strlen(buf) - 1, buf); -		ret = -EINVAL; -		goto out; -	} - -	bond->params.primary_reselect = new_value; -	pr_info("%s: setting primary_reselect to %s (%d).\n", -		bond->dev->name, pri_reselect_tbl[new_value].modename, -		new_value); - -	block_netpoll_tx(); -	read_lock(&bond->lock); -	write_lock_bh(&bond->curr_slave_lock); -	bond_select_active_slave(bond); -	write_unlock_bh(&bond->curr_slave_lock); -	read_unlock(&bond->lock); -	unblock_netpoll_tx(); -out: -	rtnl_unlock(); -	return ret; +	return sprintf(buf, "%s %d\n", +		       val->string, bond->params.primary_reselect);  }  static DEVICE_ATTR(primary_reselect, S_IRUGO | S_IWUSR, -		   bonding_show_primary_reselect, -		   bonding_store_primary_reselect); +		   bonding_show_primary_reselect, bonding_sysfs_store_option); -/* - * Show and set the use_carrier flag. - */ +/* Show the use_carrier flag. */  static ssize_t bonding_show_carrier(struct device *d,  				    struct device_attribute *attr,  				    char *buf) @@ -1230,143 +462,31 @@ static ssize_t bonding_show_carrier(struct device *d,  	return sprintf(buf, "%d\n", bond->params.use_carrier);  } - -static ssize_t bonding_store_carrier(struct device *d, -				     struct device_attribute *attr, -				     const char *buf, size_t count) -{ -	int new_value, ret = count; -	struct bonding *bond = to_bond(d); - - -	if (sscanf(buf, "%d", &new_value) != 1) { -		pr_err("%s: no use_carrier value specified.\n", -		       bond->dev->name); -		ret = -EINVAL; -		goto out; -	} -	if ((new_value == 0) || (new_value == 1)) { -		bond->params.use_carrier = new_value; -		pr_info("%s: Setting use_carrier to %d.\n", -			bond->dev->name, new_value); -	} else { -		pr_info("%s: Ignoring invalid use_carrier value %d.\n", -			bond->dev->name, new_value); -	} -out: -	return ret; -}  static DEVICE_ATTR(use_carrier, S_IRUGO | S_IWUSR, -		   bonding_show_carrier, bonding_store_carrier); +		   bonding_show_carrier, bonding_sysfs_store_option); -/* - * Show and set currently active_slave. - */ +/* Show currently active_slave. */  static ssize_t bonding_show_active_slave(struct device *d,  					 struct device_attribute *attr,  					 char *buf)  {  	struct bonding *bond = to_bond(d); -	struct slave *curr; +	struct net_device *slave_dev;  	int count = 0;  	rcu_read_lock(); -	curr = rcu_dereference(bond->curr_active_slave); -	if (USES_PRIMARY(bond->params.mode) && curr) -		count = sprintf(buf, "%s\n", curr->dev->name); +	slave_dev = bond_option_active_slave_get_rcu(bond); +	if (slave_dev) +		count = sprintf(buf, "%s\n", slave_dev->name);  	rcu_read_unlock();  	return count;  } - -static ssize_t bonding_store_active_slave(struct device *d, -					  struct device_attribute *attr, -					  const char *buf, size_t count) -{ -	struct slave *slave, *old_active, *new_active; -	struct bonding *bond = to_bond(d); -	char ifname[IFNAMSIZ]; - -	if (!rtnl_trylock()) -		return restart_syscall(); - -	old_active = new_active = NULL; -	block_netpoll_tx(); -	read_lock(&bond->lock); -	write_lock_bh(&bond->curr_slave_lock); - -	if (!USES_PRIMARY(bond->params.mode)) { -		pr_info("%s: Unable to change active slave; %s is in mode %d\n", -			bond->dev->name, bond->dev->name, bond->params.mode); -		goto out; -	} - -	sscanf(buf, "%15s", ifname); /* IFNAMSIZ */ - -	/* check to see if we are clearing active */ -	if (!strlen(ifname) || buf[0] == '\n') { -		pr_info("%s: Clearing current active slave.\n", -			bond->dev->name); -		rcu_assign_pointer(bond->curr_active_slave, NULL); -		bond_select_active_slave(bond); -		goto out; -	} - -	bond_for_each_slave(bond, slave) { -		if (strncmp(slave->dev->name, ifname, IFNAMSIZ) == 0) { -			old_active = bond->curr_active_slave; -			new_active = slave; -			if (new_active == old_active) { -				/* do nothing */ -				pr_info("%s: %s is already the current" -					" active slave.\n", -					bond->dev->name, -					slave->dev->name); -				goto out; -			} else { -				if ((new_active) && -				    (old_active) && -				    (new_active->link == BOND_LINK_UP) && -				    IS_UP(new_active->dev)) { -					pr_info("%s: Setting %s as active" -						" slave.\n", -						bond->dev->name, -						slave->dev->name); -					bond_change_active_slave(bond, -								 new_active); -				} else { -					pr_info("%s: Could not set %s as" -						" active slave; either %s is" -						" down or the link is down.\n", -						bond->dev->name, -						slave->dev->name, -						slave->dev->name); -				} -				goto out; -			} -		} -	} - -	pr_info("%s: Unable to set %.*s as active slave.\n", -		bond->dev->name, (int)strlen(buf) - 1, buf); - out: -	write_unlock_bh(&bond->curr_slave_lock); -	read_unlock(&bond->lock); -	unblock_netpoll_tx(); - -	rtnl_unlock(); - -	return count; - -}  static DEVICE_ATTR(active_slave, S_IRUGO | S_IWUSR, -		   bonding_show_active_slave, bonding_store_active_slave); - +		   bonding_show_active_slave, bonding_sysfs_store_option); -/* - * Show link status of the bond interface. - */ +/* Show link status of the bond interface. */  static ssize_t bonding_show_mii_status(struct device *d,  				       struct device_attribute *attr,  				       char *buf) @@ -1377,9 +497,7 @@ static ssize_t bonding_show_mii_status(struct device *d,  }  static DEVICE_ATTR(mii_status, S_IRUGO, bonding_show_mii_status, NULL); -/* - * Show current 802.3ad aggregator ID. - */ +/* Show current 802.3ad aggregator ID. */  static ssize_t bonding_show_ad_aggregator(struct device *d,  					  struct device_attribute *attr,  					  char *buf) @@ -1387,7 +505,7 @@ static ssize_t bonding_show_ad_aggregator(struct device *d,  	int count = 0;  	struct bonding *bond = to_bond(d); -	if (bond->params.mode == BOND_MODE_8023AD) { +	if (BOND_MODE(bond) == BOND_MODE_8023AD) {  		struct ad_info ad_info;  		count = sprintf(buf, "%d\n",  				bond_3ad_get_active_agg_info(bond, &ad_info) @@ -1399,9 +517,7 @@ static ssize_t bonding_show_ad_aggregator(struct device *d,  static DEVICE_ATTR(ad_aggregator, S_IRUGO, bonding_show_ad_aggregator, NULL); -/* - * Show number of active 802.3ad ports. - */ +/* Show number of active 802.3ad ports. */  static ssize_t bonding_show_ad_num_ports(struct device *d,  					 struct device_attribute *attr,  					 char *buf) @@ -1409,7 +525,7 @@ static ssize_t bonding_show_ad_num_ports(struct device *d,  	int count = 0;  	struct bonding *bond = to_bond(d); -	if (bond->params.mode == BOND_MODE_8023AD) { +	if (BOND_MODE(bond) == BOND_MODE_8023AD) {  		struct ad_info ad_info;  		count = sprintf(buf, "%d\n",  				bond_3ad_get_active_agg_info(bond, &ad_info) @@ -1421,9 +537,7 @@ static ssize_t bonding_show_ad_num_ports(struct device *d,  static DEVICE_ATTR(ad_num_ports, S_IRUGO, bonding_show_ad_num_ports, NULL); -/* - * Show current 802.3ad actor key. - */ +/* Show current 802.3ad actor key. */  static ssize_t bonding_show_ad_actor_key(struct device *d,  					 struct device_attribute *attr,  					 char *buf) @@ -1431,7 +545,7 @@ static ssize_t bonding_show_ad_actor_key(struct device *d,  	int count = 0;  	struct bonding *bond = to_bond(d); -	if (bond->params.mode == BOND_MODE_8023AD) { +	if (BOND_MODE(bond) == BOND_MODE_8023AD) {  		struct ad_info ad_info;  		count = sprintf(buf, "%d\n",  				bond_3ad_get_active_agg_info(bond, &ad_info) @@ -1443,9 +557,7 @@ static ssize_t bonding_show_ad_actor_key(struct device *d,  static DEVICE_ATTR(ad_actor_key, S_IRUGO, bonding_show_ad_actor_key, NULL); -/* - * Show current 802.3ad partner key. - */ +/* Show current 802.3ad partner key. */  static ssize_t bonding_show_ad_partner_key(struct device *d,  					   struct device_attribute *attr,  					   char *buf) @@ -1453,7 +565,7 @@ static ssize_t bonding_show_ad_partner_key(struct device *d,  	int count = 0;  	struct bonding *bond = to_bond(d); -	if (bond->params.mode == BOND_MODE_8023AD) { +	if (BOND_MODE(bond) == BOND_MODE_8023AD) {  		struct ad_info ad_info;  		count = sprintf(buf, "%d\n",  				bond_3ad_get_active_agg_info(bond, &ad_info) @@ -1465,9 +577,7 @@ static ssize_t bonding_show_ad_partner_key(struct device *d,  static DEVICE_ATTR(ad_partner_key, S_IRUGO, bonding_show_ad_partner_key, NULL); -/* - * Show current 802.3ad partner mac. - */ +/* Show current 802.3ad partner mac. */  static ssize_t bonding_show_ad_partner_mac(struct device *d,  					   struct device_attribute *attr,  					   char *buf) @@ -1475,7 +585,7 @@ static ssize_t bonding_show_ad_partner_mac(struct device *d,  	int count = 0;  	struct bonding *bond = to_bond(d); -	if (bond->params.mode == BOND_MODE_8023AD) { +	if (BOND_MODE(bond) == BOND_MODE_8023AD) {  		struct ad_info ad_info;  		if (!bond_3ad_get_active_agg_info(bond, &ad_info))  			count = sprintf(buf, "%pM\n", ad_info.partner_system); @@ -1485,22 +595,20 @@ static ssize_t bonding_show_ad_partner_mac(struct device *d,  }  static DEVICE_ATTR(ad_partner_mac, S_IRUGO, bonding_show_ad_partner_mac, NULL); -/* - * Show the queue_ids of the slaves in the current bond. - */ +/* Show the queue_ids of the slaves in the current bond. */  static ssize_t bonding_show_queue_id(struct device *d,  				     struct device_attribute *attr,  				     char *buf)  {  	struct bonding *bond = to_bond(d); +	struct list_head *iter;  	struct slave *slave;  	int res = 0;  	if (!rtnl_trylock())  		return restart_syscall(); -	read_lock(&bond->lock); -	bond_for_each_slave(bond, slave) { +	bond_for_each_slave(bond, slave, iter) {  		if (res > (PAGE_SIZE - IFNAMSIZ - 6)) {  			/* not enough space for another interface_name:queue_id pair */  			if ((PAGE_SIZE - res) > 10) @@ -1511,99 +619,18 @@ static ssize_t bonding_show_queue_id(struct device *d,  		res += sprintf(buf + res, "%s:%d ",  			       slave->dev->name, slave->queue_id);  	} -	read_unlock(&bond->lock);  	if (res)  		buf[res-1] = '\n'; /* eat the leftover space */ -	rtnl_unlock(); - -	return res; -} - -/* - * Set the queue_ids of the  slaves in the current bond.  The bond - * interface must be enslaved for this to work. - */ -static ssize_t bonding_store_queue_id(struct device *d, -				      struct device_attribute *attr, -				      const char *buffer, size_t count) -{ -	struct slave *slave, *update_slave; -	struct bonding *bond = to_bond(d); -	u16 qid; -	int ret = count; -	char *delim; -	struct net_device *sdev = NULL; - -	if (!rtnl_trylock()) -		return restart_syscall(); - -	/* delim will point to queue id if successful */ -	delim = strchr(buffer, ':'); -	if (!delim) -		goto err_no_cmd; - -	/* -	 * Terminate string that points to device name and bump it -	 * up one, so we can read the queue id there. -	 */ -	*delim = '\0'; -	if (sscanf(++delim, "%hd\n", &qid) != 1) -		goto err_no_cmd; -	/* Check buffer length, valid ifname and queue id */ -	if (strlen(buffer) > IFNAMSIZ || -	    !dev_valid_name(buffer) || -	    qid > bond->dev->real_num_tx_queues) -		goto err_no_cmd; - -	/* Get the pointer to that interface if it exists */ -	sdev = __dev_get_by_name(dev_net(bond->dev), buffer); -	if (!sdev) -		goto err_no_cmd; - -	read_lock(&bond->lock); - -	/* Search for thes slave and check for duplicate qids */ -	update_slave = NULL; -	bond_for_each_slave(bond, slave) { -		if (sdev == slave->dev) -			/* -			 * We don't need to check the matching -			 * slave for dups, since we're overwriting it -			 */ -			update_slave = slave; -		else if (qid && qid == slave->queue_id) { -			goto err_no_cmd_unlock; -		} -	} - -	if (!update_slave) -		goto err_no_cmd_unlock; - -	/* Actually set the qids for the slave */ -	update_slave->queue_id = qid; - -	read_unlock(&bond->lock); -out:  	rtnl_unlock(); -	return ret; -err_no_cmd_unlock: -	read_unlock(&bond->lock); -err_no_cmd: -	pr_info("invalid input for queue_id set for %s.\n", -		bond->dev->name); -	ret = -EPERM; -	goto out; +	return res;  } -  static DEVICE_ATTR(queue_id, S_IRUGO | S_IWUSR, bonding_show_queue_id, -		   bonding_store_queue_id); +		   bonding_sysfs_store_option); -/* - * Show and set the all_slaves_active flag. - */ +/* Show the all_slaves_active flag. */  static ssize_t bonding_show_slaves_active(struct device *d,  					  struct device_attribute *attr,  					  char *buf) @@ -1612,92 +639,54 @@ static ssize_t bonding_show_slaves_active(struct device *d,  	return sprintf(buf, "%d\n", bond->params.all_slaves_active);  } +static DEVICE_ATTR(all_slaves_active, S_IRUGO | S_IWUSR, +		   bonding_show_slaves_active, bonding_sysfs_store_option); -static ssize_t bonding_store_slaves_active(struct device *d, -					   struct device_attribute *attr, -					   const char *buf, size_t count) +/* Show the number of IGMP membership reports to send on link failure */ +static ssize_t bonding_show_resend_igmp(struct device *d, +					struct device_attribute *attr, +					char *buf)  {  	struct bonding *bond = to_bond(d); -	int new_value, ret = count; -	struct slave *slave; -	if (sscanf(buf, "%d", &new_value) != 1) { -		pr_err("%s: no all_slaves_active value specified.\n", -		       bond->dev->name); -		ret = -EINVAL; -		goto out; -	} - -	if (new_value == bond->params.all_slaves_active) -		goto out; - -	if ((new_value == 0) || (new_value == 1)) { -		bond->params.all_slaves_active = new_value; -	} else { -		pr_info("%s: Ignoring invalid all_slaves_active value %d.\n", -			bond->dev->name, new_value); -		ret = -EINVAL; -		goto out; -	} - -	read_lock(&bond->lock); -	bond_for_each_slave(bond, slave) { -		if (!bond_is_active_slave(slave)) { -			if (new_value) -				slave->inactive = 0; -			else -				slave->inactive = 1; -		} -	} -	read_unlock(&bond->lock); -out: -	return ret; +	return sprintf(buf, "%d\n", bond->params.resend_igmp);  } -static DEVICE_ATTR(all_slaves_active, S_IRUGO | S_IWUSR, -		   bonding_show_slaves_active, bonding_store_slaves_active); +static DEVICE_ATTR(resend_igmp, S_IRUGO | S_IWUSR, +		   bonding_show_resend_igmp, bonding_sysfs_store_option); -/* - * Show and set the number of IGMP membership reports to send on link failure - */ -static ssize_t bonding_show_resend_igmp(struct device *d, + +static ssize_t bonding_show_lp_interval(struct device *d,  					struct device_attribute *attr,  					char *buf)  {  	struct bonding *bond = to_bond(d); -	return sprintf(buf, "%d\n", bond->params.resend_igmp); +	return sprintf(buf, "%d\n", bond->params.lp_interval);  } +static DEVICE_ATTR(lp_interval, S_IRUGO | S_IWUSR, +		   bonding_show_lp_interval, bonding_sysfs_store_option); -static ssize_t bonding_store_resend_igmp(struct device *d, -					 struct device_attribute *attr, -					 const char *buf, size_t count) +static ssize_t bonding_show_tlb_dynamic_lb(struct device *d, +					   struct device_attribute *attr, +					   char *buf)  { -	int new_value, ret = count;  	struct bonding *bond = to_bond(d); +	return sprintf(buf, "%d\n", bond->params.tlb_dynamic_lb); +} +static DEVICE_ATTR(tlb_dynamic_lb, S_IRUGO | S_IWUSR, +		   bonding_show_tlb_dynamic_lb, bonding_sysfs_store_option); -	if (sscanf(buf, "%d", &new_value) != 1) { -		pr_err("%s: no resend_igmp value specified.\n", -		       bond->dev->name); -		ret = -EINVAL; -		goto out; -	} - -	if (new_value < 0 || new_value > 255) { -		pr_err("%s: Invalid resend_igmp value %d not in range 0-255; rejected.\n", -		       bond->dev->name, new_value); -		ret = -EINVAL; -		goto out; -	} +static ssize_t bonding_show_packets_per_slave(struct device *d, +					      struct device_attribute *attr, +					      char *buf) +{ +	struct bonding *bond = to_bond(d); +	unsigned int packets_per_slave = bond->params.packets_per_slave; -	pr_info("%s: Setting resend_igmp to %d.\n", -		bond->dev->name, new_value); -	bond->params.resend_igmp = new_value; -out: -	return ret; +	return sprintf(buf, "%u\n", packets_per_slave);  } - -static DEVICE_ATTR(resend_igmp, S_IRUGO | S_IWUSR, -		   bonding_show_resend_igmp, bonding_store_resend_igmp); +static DEVICE_ATTR(packets_per_slave, S_IRUGO | S_IWUSR, +		   bonding_show_packets_per_slave, bonding_sysfs_store_option);  static struct attribute *per_bond_attrs[] = {  	&dev_attr_slaves.attr, @@ -1729,6 +718,9 @@ static struct attribute *per_bond_attrs[] = {  	&dev_attr_all_slaves_active.attr,  	&dev_attr_resend_igmp.attr,  	&dev_attr_min_links.attr, +	&dev_attr_lp_interval.attr, +	&dev_attr_packets_per_slave.attr, +	&dev_attr_tlb_dynamic_lb.attr,  	NULL,  }; @@ -1737,8 +729,7 @@ static struct attribute_group bonding_group = {  	.attrs = per_bond_attrs,  }; -/* - * Initialize sysfs.  This sets up the bonding_masters file in +/* Initialize sysfs.  This sets up the bonding_masters file in   * /sys/class/net.   */  int bond_create_sysfs(struct bond_net *bn) @@ -1748,9 +739,9 @@ int bond_create_sysfs(struct bond_net *bn)  	bn->class_attr_bonding_masters = class_attr_bonding_masters;  	sysfs_attr_init(&bn->class_attr_bonding_masters.attr); -	ret = netdev_class_create_file(&bn->class_attr_bonding_masters); -	/* -	 * Permit multiple loads of the module by ignoring failures to +	ret = netdev_class_create_file_ns(&bn->class_attr_bonding_masters, +					  bn->net); +	/* Permit multiple loads of the module by ignoring failures to  	 * create the bonding_masters sysfs file.  Bonding devices  	 * created by second or subsequent loads of the module will  	 * not be listed in, or controllable by, bonding_masters, but @@ -1764,7 +755,7 @@ int bond_create_sysfs(struct bond_net *bn)  		/* Is someone being kinky and naming a device bonding_master? */  		if (__dev_get_by_name(bn->net,  				      class_attr_bonding_masters.attr.name)) -			pr_err("network device named %s already exists in sysfs", +			pr_err("network device named %s already exists in sysfs\n",  			       class_attr_bonding_masters.attr.name);  		ret = 0;  	} @@ -1773,16 +764,13 @@ int bond_create_sysfs(struct bond_net *bn)  } -/* - * Remove /sys/class/net/bonding_masters. - */ +/* Remove /sys/class/net/bonding_masters. */  void bond_destroy_sysfs(struct bond_net *bn)  { -	netdev_class_remove_file(&bn->class_attr_bonding_masters); +	netdev_class_remove_file_ns(&bn->class_attr_bonding_masters, bn->net);  } -/* - * Initialize sysfs for each bond.  This sets up and registers +/* Initialize sysfs for each bond.  This sets up and registers   * the 'bondctl' directory for each individual bond under /sys/class/net.   */  void bond_prepare_sysfs_group(struct bonding *bond) diff --git a/drivers/net/bonding/bond_sysfs_slave.c b/drivers/net/bonding/bond_sysfs_slave.c new file mode 100644 index 00000000000..198677f58ce --- /dev/null +++ b/drivers/net/bonding/bond_sysfs_slave.c @@ -0,0 +1,144 @@ +/*	Sysfs attributes of bond slaves + * + *      Copyright (c) 2014 Scott Feldman <sfeldma@cumulusnetworks.com> + * + *	This program is free software; you can redistribute it and/or + *	modify it under the terms of the GNU General Public License + *	as published by the Free Software Foundation; either version + *	2 of the License, or (at your option) any later version. + */ + +#include <linux/capability.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> + +#include "bonding.h" + +struct slave_attribute { +	struct attribute attr; +	ssize_t (*show)(struct slave *, char *); +}; + +#define SLAVE_ATTR(_name, _mode, _show)				\ +const struct slave_attribute slave_attr_##_name = {		\ +	.attr = {.name = __stringify(_name),			\ +		 .mode = _mode },				\ +	.show	= _show,					\ +}; +#define SLAVE_ATTR_RO(_name) \ +	SLAVE_ATTR(_name, S_IRUGO, _name##_show) + +static ssize_t state_show(struct slave *slave, char *buf) +{ +	switch (bond_slave_state(slave)) { +	case BOND_STATE_ACTIVE: +		return sprintf(buf, "active\n"); +	case BOND_STATE_BACKUP: +		return sprintf(buf, "backup\n"); +	default: +		return sprintf(buf, "UNKONWN\n"); +	} +} +static SLAVE_ATTR_RO(state); + +static ssize_t mii_status_show(struct slave *slave, char *buf) +{ +	return sprintf(buf, "%s\n", bond_slave_link_status(slave->link)); +} +static SLAVE_ATTR_RO(mii_status); + +static ssize_t link_failure_count_show(struct slave *slave, char *buf) +{ +	return sprintf(buf, "%d\n", slave->link_failure_count); +} +static SLAVE_ATTR_RO(link_failure_count); + +static ssize_t perm_hwaddr_show(struct slave *slave, char *buf) +{ +	return sprintf(buf, "%pM\n", slave->perm_hwaddr); +} +static SLAVE_ATTR_RO(perm_hwaddr); + +static ssize_t queue_id_show(struct slave *slave, char *buf) +{ +	return sprintf(buf, "%d\n", slave->queue_id); +} +static SLAVE_ATTR_RO(queue_id); + +static ssize_t ad_aggregator_id_show(struct slave *slave, char *buf) +{ +	const struct aggregator *agg; + +	if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) { +		agg = SLAVE_AD_INFO(slave)->port.aggregator; +		if (agg) +			return sprintf(buf, "%d\n", +				       agg->aggregator_identifier); +	} + +	return sprintf(buf, "N/A\n"); +} +static SLAVE_ATTR_RO(ad_aggregator_id); + +static const struct slave_attribute *slave_attrs[] = { +	&slave_attr_state, +	&slave_attr_mii_status, +	&slave_attr_link_failure_count, +	&slave_attr_perm_hwaddr, +	&slave_attr_queue_id, +	&slave_attr_ad_aggregator_id, +	NULL +}; + +#define to_slave_attr(_at) container_of(_at, struct slave_attribute, attr) +#define to_slave(obj)	container_of(obj, struct slave, kobj) + +static ssize_t slave_show(struct kobject *kobj, +			  struct attribute *attr, char *buf) +{ +	struct slave_attribute *slave_attr = to_slave_attr(attr); +	struct slave *slave = to_slave(kobj); + +	return slave_attr->show(slave, buf); +} + +static const struct sysfs_ops slave_sysfs_ops = { +	.show = slave_show, +}; + +static struct kobj_type slave_ktype = { +#ifdef CONFIG_SYSFS +	.sysfs_ops = &slave_sysfs_ops, +#endif +}; + +int bond_sysfs_slave_add(struct slave *slave) +{ +	const struct slave_attribute **a; +	int err; + +	err = kobject_init_and_add(&slave->kobj, &slave_ktype, +				   &(slave->dev->dev.kobj), "bonding_slave"); +	if (err) +		return err; + +	for (a = slave_attrs; *a; ++a) { +		err = sysfs_create_file(&slave->kobj, &((*a)->attr)); +		if (err) { +			kobject_del(&slave->kobj); +			return err; +		} +	} + +	return 0; +} + +void bond_sysfs_slave_del(struct slave *slave) +{ +	const struct slave_attribute **a; + +	for (a = slave_attrs; *a; ++a) +		sysfs_remove_file(&slave->kobj, &((*a)->attr)); + +	kobject_del(&slave->kobj); +} diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index 7ad8bd5cc94..0b4d9cde0b0 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -23,8 +23,11 @@  #include <linux/netpoll.h>  #include <linux/inetdevice.h>  #include <linux/etherdevice.h> +#include <linux/reciprocal_div.h> +  #include "bond_3ad.h"  #include "bond_alb.h" +#include "bond_options.h"  #define DRV_VERSION	"3.7.1"  #define DRV_RELDATE	"April 27, 2011" @@ -33,32 +36,12 @@  #define bond_version DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n" +#define BOND_MAX_VLAN_ENCAP	2  #define BOND_MAX_ARP_TARGETS	16 -#define IS_UP(dev)					   \ -	      ((((dev)->flags & IFF_UP) == IFF_UP)	&& \ -	       netif_running(dev)			&& \ -	       netif_carrier_ok(dev)) +#define BOND_DEFAULT_MIIMON	100  /* - * Checks whether slave is ready for transmit. - */ -#define SLAVE_IS_OK(slave)			        \ -		    (((slave)->dev->flags & IFF_UP)  && \ -		     netif_running((slave)->dev)     && \ -		     ((slave)->link == BOND_LINK_UP) && \ -		     bond_is_active_slave(slave)) - - -#define USES_PRIMARY(mode)				\ -		(((mode) == BOND_MODE_ACTIVEBACKUP) ||	\ -		 ((mode) == BOND_MODE_TLB)          ||	\ -		 ((mode) == BOND_MODE_ALB)) - -#define TX_QUEUE_OVERRIDE(mode)				\ -			(((mode) == BOND_MODE_ACTIVEBACKUP) ||	\ -			 ((mode) == BOND_MODE_ROUNDROBIN)) -/*   * Less bad way to call ioctl from within the kernel; this needs to be   * done some other way to get the call out of interrupt context.   * Needs "ioctl" variable to be supplied by calling context. @@ -71,64 +54,44 @@  	set_fs(fs);			\  	res; }) +#define BOND_MODE(bond) ((bond)->params.mode) +  /* slave list primitives */ -#define bond_to_slave(ptr) list_entry(ptr, struct slave, list) +#define bond_slave_list(bond) (&(bond)->dev->adj_list.lower) + +#define bond_has_slaves(bond) !list_empty(bond_slave_list(bond))  /* IMPORTANT: bond_first/last_slave can return NULL in case of an empty list */  #define bond_first_slave(bond) \ -	list_first_entry_or_null(&(bond)->slave_list, struct slave, list) +	(bond_has_slaves(bond) ? \ +		netdev_adjacent_get_private(bond_slave_list(bond)->next) : \ +		NULL)  #define bond_last_slave(bond) \ -	(list_empty(&(bond)->slave_list) ? NULL : \ -					   bond_to_slave((bond)->slave_list.prev)) - -#define bond_is_first_slave(bond, pos) ((pos)->list.prev == &(bond)->slave_list) -#define bond_is_last_slave(bond, pos) ((pos)->list.next == &(bond)->slave_list) +	(bond_has_slaves(bond) ? \ +		netdev_adjacent_get_private(bond_slave_list(bond)->prev) : \ +		NULL) -/* Since bond_first/last_slave can return NULL, these can return NULL too */ -#define bond_next_slave(bond, pos) \ -	(bond_is_last_slave(bond, pos) ? bond_first_slave(bond) : \ -					 bond_to_slave((pos)->list.next)) - -#define bond_prev_slave(bond, pos) \ -	(bond_is_first_slave(bond, pos) ? bond_last_slave(bond) : \ -					  bond_to_slave((pos)->list.prev)) +/* Caller must have rcu_read_lock */ +#define bond_first_slave_rcu(bond) \ +	netdev_lower_get_first_private_rcu(bond->dev) -/** - * bond_for_each_slave_from - iterate the slaves list from a starting point - * @bond:	the bond holding this list. - * @pos:	current slave. - * @cnt:	counter for max number of moves - * @start:	starting point. - * - * Caller must hold bond->lock - */ -#define bond_for_each_slave_from(bond, pos, cnt, start) \ -	for (cnt = 0, pos = start; pos && cnt < (bond)->slave_cnt; \ -	     cnt++, pos = bond_next_slave(bond, pos)) +#define bond_is_first_slave(bond, pos) (pos == bond_first_slave(bond)) +#define bond_is_last_slave(bond, pos) (pos == bond_last_slave(bond))  /**   * bond_for_each_slave - iterate over all slaves   * @bond:	the bond holding this list   * @pos:	current slave + * @iter:	list_head * iterator   *   * Caller must hold bond->lock   */ -#define bond_for_each_slave(bond, pos) \ -	list_for_each_entry(pos, &(bond)->slave_list, list) +#define bond_for_each_slave(bond, pos, iter) \ +	netdev_for_each_lower_private((bond)->dev, pos, iter)  /* Caller must have rcu_read_lock */ -#define bond_for_each_slave_rcu(bond, pos) \ -	list_for_each_entry_rcu(pos, &(bond)->slave_list, list) - -/** - * bond_for_each_slave_reverse - iterate in reverse from a given position - * @bond:	the bond holding this list - * @pos:	slave to continue from - * - * Caller must hold bond->lock - */ -#define bond_for_each_slave_continue_reverse(bond, pos) \ -	list_for_each_entry_continue_reverse(pos, &(bond)->slave_list, list) +#define bond_for_each_slave_rcu(bond, pos, iter) \ +	netdev_for_each_lower_private_rcu((bond)->dev, pos, iter)  #ifdef CONFIG_NET_POLL_CONTROLLER  extern atomic_t netpoll_block_tx; @@ -176,6 +139,10 @@ struct bond_params {  	int tx_queues;  	int all_slaves_active;  	int resend_igmp; +	int lp_interval; +	int packets_per_slave; +	int tlb_dynamic_lb; +	struct reciprocal_value reciprocal_packets_per_slave;  };  struct bond_parm_tbl { @@ -183,32 +150,32 @@ struct bond_parm_tbl {  	int mode;  }; -#define BOND_MAX_MODENAME_LEN 20 -  struct slave {  	struct net_device *dev; /* first - useful for panic debug */ -	struct list_head list;  	struct bonding *bond; /* our master */  	int    delay; -	unsigned long jiffies; -	unsigned long last_arp_rx; +	/* all three in jiffies */ +	unsigned long last_link_up; +	unsigned long last_rx;  	unsigned long target_last_arp_rx[BOND_MAX_ARP_TARGETS];  	s8     link;    /* one of BOND_LINK_XXXX */  	s8     new_link;  	u8     backup:1,   /* indicates backup slave. Value corresponds with  			      BOND_STATE_ACTIVE and BOND_STATE_BACKUP */ -	       inactive:1; /* indicates inactive slave */ +	       inactive:1, /* indicates inactive slave */ +	       should_notify:1; /* indicateds whether the state changed */  	u8     duplex;  	u32    original_mtu;  	u32    link_failure_count;  	u32    speed;  	u16    queue_id;  	u8     perm_hwaddr[ETH_ALEN]; -	struct ad_slave_info ad_info; /* HUGE - better to dynamically alloc */ +	struct ad_slave_info *ad_info;  	struct tlb_slave_info tlb_info;  #ifdef CONFIG_NET_POLL_CONTROLLER  	struct netpoll *np;  #endif +	struct kobject kobj;  };  /* @@ -227,7 +194,6 @@ struct slave {   */  struct bonding {  	struct   net_device *dev; /* first - useful for panic debug */ -	struct   list_head slave_list;  	struct   slave *curr_active_slave;  	struct   slave *current_arp_slave;  	struct   slave *primary_slave; @@ -244,8 +210,7 @@ struct bonding {  	char     proc_file_name[IFNAMSIZ];  #endif /* CONFIG_PROC_FS */  	struct   list_head bond_list; -	int      (*xmit_hash_policy)(struct sk_buff *, int); -	u16      rr_tx_counter; +	u32      rr_tx_counter;  	struct   ad_bond_info ad_info;  	struct   alb_bond_info alb_info;  	struct   bond_params params; @@ -267,6 +232,11 @@ struct bonding {  #define bond_slave_get_rtnl(dev) \  	((struct slave *) rtnl_dereference(dev->rx_handler_data)) +struct bond_vlan_tag { +	__be16		vlan_proto; +	unsigned short	vlan_id; +}; +  /**   * Returns NULL if the net_device does not belong to any of the bond's slaves   * @@ -275,36 +245,106 @@ struct bonding {  static inline struct slave *bond_get_slave_by_dev(struct bonding *bond,  						  struct net_device *slave_dev)  { -	struct slave *slave = NULL; - -	bond_for_each_slave(bond, slave) -		if (slave->dev == slave_dev) -			return slave; - -	return NULL; +	return netdev_lower_dev_get_private(bond->dev, slave_dev);  }  static inline struct bonding *bond_get_bond_by_slave(struct slave *slave)  { -	if (!slave || !slave->bond) -		return NULL;  	return slave->bond;  } +static inline bool bond_should_override_tx_queue(struct bonding *bond) +{ +	return BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP || +	       BOND_MODE(bond) == BOND_MODE_ROUNDROBIN; +} +  static inline bool bond_is_lb(const struct bonding *bond)  { -	return (bond->params.mode == BOND_MODE_TLB || -		bond->params.mode == BOND_MODE_ALB); +	return BOND_MODE(bond) == BOND_MODE_TLB || +	       BOND_MODE(bond) == BOND_MODE_ALB; +} + +static inline bool bond_mode_uses_arp(int mode) +{ +	return mode != BOND_MODE_8023AD && mode != BOND_MODE_TLB && +	       mode != BOND_MODE_ALB; +} + +static inline bool bond_mode_uses_primary(int mode) +{ +	return mode == BOND_MODE_ACTIVEBACKUP || mode == BOND_MODE_TLB || +	       mode == BOND_MODE_ALB; +} + +static inline bool bond_uses_primary(struct bonding *bond) +{ +	return bond_mode_uses_primary(BOND_MODE(bond)); +} + +static inline bool bond_slave_is_up(struct slave *slave) +{ +	return netif_running(slave->dev) && netif_carrier_ok(slave->dev);  }  static inline void bond_set_active_slave(struct slave *slave)  { -	slave->backup = 0; +	if (slave->backup) { +		slave->backup = 0; +		rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC); +	}  }  static inline void bond_set_backup_slave(struct slave *slave)  { -	slave->backup = 1; +	if (!slave->backup) { +		slave->backup = 1; +		rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC); +	} +} + +static inline void bond_set_slave_state(struct slave *slave, +					int slave_state, bool notify) +{ +	if (slave->backup == slave_state) +		return; + +	slave->backup = slave_state; +	if (notify) { +		rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC); +		slave->should_notify = 0; +	} else { +		if (slave->should_notify) +			slave->should_notify = 0; +		else +			slave->should_notify = 1; +	} +} + +static inline void bond_slave_state_change(struct bonding *bond) +{ +	struct list_head *iter; +	struct slave *tmp; + +	bond_for_each_slave(bond, tmp, iter) { +		if (tmp->link == BOND_LINK_UP) +			bond_set_active_slave(tmp); +		else if (tmp->link == BOND_LINK_DOWN) +			bond_set_backup_slave(tmp); +	} +} + +static inline void bond_slave_state_notify(struct bonding *bond) +{ +	struct list_head *iter; +	struct slave *tmp; + +	bond_for_each_slave(bond, tmp, iter) { +		if (tmp->should_notify) { +			rtmsg_ifinfo(RTM_NEWLINK, tmp->dev, 0, GFP_ATOMIC); +			tmp->should_notify = 0; +		} +	}  }  static inline int bond_slave_state(struct slave *slave) @@ -317,6 +357,12 @@ static inline bool bond_is_active_slave(struct slave *slave)  	return !bond_slave_state(slave);  } +static inline bool bond_slave_can_tx(struct slave *slave) +{ +	return bond_slave_is_up(slave) && slave->link == BOND_LINK_UP && +	       bond_is_active_slave(slave); +} +  #define BOND_PRI_RESELECT_ALWAYS	0  #define BOND_PRI_RESELECT_BETTER	1  #define BOND_PRI_RESELECT_FAILURE	2 @@ -333,6 +379,14 @@ static inline bool bond_is_active_slave(struct slave *slave)  #define BOND_ARP_VALIDATE_BACKUP	(1 << BOND_STATE_BACKUP)  #define BOND_ARP_VALIDATE_ALL		(BOND_ARP_VALIDATE_ACTIVE | \  					 BOND_ARP_VALIDATE_BACKUP) +#define BOND_ARP_FILTER			(BOND_ARP_VALIDATE_ALL + 1) +#define BOND_ARP_FILTER_ACTIVE		(BOND_ARP_VALIDATE_ACTIVE | \ +					 BOND_ARP_FILTER) +#define BOND_ARP_FILTER_BACKUP		(BOND_ARP_VALIDATE_BACKUP | \ +					 BOND_ARP_FILTER) + +#define BOND_SLAVE_NOTIFY_NOW		true +#define BOND_SLAVE_NOTIFY_LATER		false  static inline int slave_do_arp_validate(struct bonding *bond,  					struct slave *slave) @@ -340,6 +394,16 @@ static inline int slave_do_arp_validate(struct bonding *bond,  	return bond->params.arp_validate & (1 << bond_slave_state(slave));  } +static inline int slave_do_arp_validate_only(struct bonding *bond) +{ +	return bond->params.arp_validate & BOND_ARP_FILTER; +} + +static inline int bond_is_ip_target_ok(__be32 addr) +{ +	return !ipv4_is_lbcast(addr) && !ipv4_is_zeronet(addr); +} +  /* Get the oldest arp which we've received on this slave for bond's   * arp_targets.   */ @@ -359,14 +423,10 @@ static inline unsigned long slave_oldest_target_arp_rx(struct bonding *bond,  static inline unsigned long slave_last_rx(struct bonding *bond,  					struct slave *slave)  { -	if (slave_do_arp_validate(bond, slave)) { -		if (bond->params.arp_all_targets == BOND_ARP_TARGETS_ALL) -			return slave_oldest_target_arp_rx(bond, slave); -		else -			return slave->last_arp_rx; -	} +	if (bond->params.arp_all_targets == BOND_ARP_TARGETS_ALL) +		return slave_oldest_target_arp_rx(bond, slave); -	return slave->dev->last_rx; +	return slave->last_rx;  }  #ifdef CONFIG_NET_POLL_CONTROLLER @@ -385,17 +445,19 @@ static inline void bond_netpoll_send_skb(const struct slave *slave,  }  #endif -static inline void bond_set_slave_inactive_flags(struct slave *slave) +static inline void bond_set_slave_inactive_flags(struct slave *slave, +						 bool notify)  {  	if (!bond_is_lb(slave->bond)) -		bond_set_backup_slave(slave); +		bond_set_slave_state(slave, BOND_STATE_BACKUP, notify);  	if (!slave->bond->params.all_slaves_active)  		slave->inactive = 1;  } -static inline void bond_set_slave_active_flags(struct slave *slave) +static inline void bond_set_slave_active_flags(struct slave *slave, +					       bool notify)  { -	bond_set_active_slave(slave); +	bond_set_slave_state(slave, BOND_STATE_ACTIVE, notify);  	slave->inactive = 0;  } @@ -413,40 +475,32 @@ static inline __be32 bond_confirm_addr(struct net_device *dev, __be32 dst, __be3  	in_dev = __in_dev_get_rcu(dev);  	if (in_dev) -		addr = inet_confirm_addr(in_dev, dst, local, RT_SCOPE_HOST); - +		addr = inet_confirm_addr(dev_net(dev), in_dev, dst, local, +					 RT_SCOPE_HOST);  	rcu_read_unlock();  	return addr;  } -static inline bool slave_can_tx(struct slave *slave) -{ -	if (IS_UP(slave->dev) && slave->link == BOND_LINK_UP && -	    bond_is_active_slave(slave)) -		return true; -	else -		return false; -} - -struct bond_net; +struct bond_net { +	struct net		*net;	/* Associated network namespace */ +	struct list_head	dev_list; +#ifdef CONFIG_PROC_FS +	struct proc_dir_entry	*proc_dir; +#endif +	struct class_attribute	class_attr_bonding_masters; +};  int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave); -struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr); -int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev); -void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id); +void bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev);  int bond_create(struct net *net, const char *name);  int bond_create_sysfs(struct bond_net *net);  void bond_destroy_sysfs(struct bond_net *net);  void bond_prepare_sysfs_group(struct bonding *bond); -int bond_create_slave_symlinks(struct net_device *master, struct net_device *slave); -void bond_destroy_slave_symlinks(struct net_device *master, struct net_device *slave); +int bond_sysfs_slave_add(struct slave *slave); +void bond_sysfs_slave_del(struct slave *slave);  int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev);  int bond_release(struct net_device *bond_dev, struct net_device *slave_dev); -void bond_mii_monitor(struct work_struct *); -void bond_loadbalance_arp_mon(struct work_struct *); -void bond_activebackup_arp_mon(struct work_struct *); -void bond_set_mode_ops(struct bonding *bond, int mode); -int bond_parse_parm(const char *mode_arg, const struct bond_parm_tbl *tbl); +u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb);  void bond_select_active_slave(struct bonding *bond);  void bond_change_active_slave(struct bonding *bond, struct slave *new_active);  void bond_create_debugfs(void); @@ -455,15 +509,16 @@ void bond_debug_register(struct bonding *bond);  void bond_debug_unregister(struct bonding *bond);  void bond_debug_reregister(struct bonding *bond);  const char *bond_mode_name(int mode); - -struct bond_net { -	struct net *		net;	/* Associated network namespace */ -	struct list_head	dev_list; -#ifdef CONFIG_PROC_FS -	struct proc_dir_entry *	proc_dir; -#endif -	struct class_attribute	class_attr_bonding_masters; -}; +void bond_setup(struct net_device *bond_dev); +unsigned int bond_get_num_tx_queues(void); +int bond_netlink_init(void); +void bond_netlink_fini(void); +struct net_device *bond_option_active_slave_get_rcu(struct bonding *bond); +struct net_device *bond_option_active_slave_get(struct bonding *bond); +const char *bond_slave_link_status(s8 link); +bool bond_verify_device_path(struct net_device *start_dev, +			     struct net_device *end_dev, +			     struct bond_vlan_tag *tags);  #ifdef CONFIG_PROC_FS  void bond_create_proc_entry(struct bonding *bond); @@ -491,15 +546,51 @@ static inline void bond_destroy_proc_dir(struct bond_net *bn)  static inline struct slave *bond_slave_has_mac(struct bonding *bond,  					       const u8 *mac)  { +	struct list_head *iter; +	struct slave *tmp; + +	bond_for_each_slave(bond, tmp, iter) +		if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr)) +			return tmp; + +	return NULL; +} + +/* Caller must hold rcu_read_lock() for read */ +static inline struct slave *bond_slave_has_mac_rcu(struct bonding *bond, +					       const u8 *mac) +{ +	struct list_head *iter;  	struct slave *tmp; -	bond_for_each_slave(bond, tmp) +	bond_for_each_slave_rcu(bond, tmp, iter)  		if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr))  			return tmp;  	return NULL;  } +/* Caller must hold rcu_read_lock() for read */ +static inline bool bond_slave_has_mac_rx(struct bonding *bond, const u8 *mac) +{ +	struct list_head *iter; +	struct slave *tmp; +	struct netdev_hw_addr *ha; + +	bond_for_each_slave_rcu(bond, tmp, iter) +		if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr)) +			return true; + +	if (netdev_uc_empty(bond->dev)) +		return false; + +	netdev_for_each_uc_addr(ha, bond->dev) +		if (ether_addr_equal_64bits(mac, ha->addr)) +			return true; + +	return false; +} +  /* Check if the ip is present in arp ip list, or first free slot if ip == 0   * Returns -1 if not found, index if found   */ @@ -519,7 +610,6 @@ static inline int bond_get_targets_ip(__be32 *targets, __be32 ip)  /* exported from bond_main.c */  extern int bond_net_id;  extern const struct bond_parm_tbl bond_lacp_tbl[]; -extern const struct bond_parm_tbl bond_mode_tbl[];  extern const struct bond_parm_tbl xmit_hashtype_tbl[];  extern const struct bond_parm_tbl arp_validate_tbl[];  extern const struct bond_parm_tbl arp_all_targets_tbl[]; @@ -527,4 +617,7 @@ extern const struct bond_parm_tbl fail_over_mac_tbl[];  extern const struct bond_parm_tbl pri_reselect_tbl[];  extern struct bond_parm_tbl ad_select_tbl[]; +/* exported from bond_netlink.c */ +extern struct rtnl_link_ops bond_link_ops; +  #endif /* _LINUX_BONDING_H */  | 
