diff options
Diffstat (limited to 'drivers/net/bonding')
| -rw-r--r-- | drivers/net/bonding/Makefile | 6 | ||||
| -rw-r--r-- | drivers/net/bonding/bond_3ad.c | 1356 | ||||
| -rw-r--r-- | drivers/net/bonding/bond_3ad.h | 199 | ||||
| -rw-r--r-- | drivers/net/bonding/bond_alb.c | 1131 | ||||
| -rw-r--r-- | drivers/net/bonding/bond_alb.h | 85 | ||||
| -rw-r--r-- | drivers/net/bonding/bond_debugfs.c | 145 | ||||
| -rw-r--r-- | drivers/net/bonding/bond_ipv6.c | 220 | ||||
| -rw-r--r-- | drivers/net/bonding/bond_main.c | 4570 | ||||
| -rw-r--r-- | drivers/net/bonding/bond_netlink.c | 573 | ||||
| -rw-r--r-- | drivers/net/bonding/bond_options.c | 1394 | ||||
| -rw-r--r-- | drivers/net/bonding/bond_options.h | 130 | ||||
| -rw-r--r-- | drivers/net/bonding/bond_procfs.c | 296 | ||||
| -rw-r--r-- | drivers/net/bonding/bond_sysfs.c | 1391 | ||||
| -rw-r--r-- | drivers/net/bonding/bond_sysfs_slave.c | 144 | ||||
| -rw-r--r-- | drivers/net/bonding/bonding.h | 586 | 
15 files changed, 6669 insertions, 5557 deletions
diff --git a/drivers/net/bonding/Makefile b/drivers/net/bonding/Makefile index 6f9c6faef24..6f4e80853ed 100644 --- a/drivers/net/bonding/Makefile +++ b/drivers/net/bonding/Makefile @@ -4,8 +4,8 @@  obj-$(CONFIG_BONDING) += bonding.o -bonding-objs := bond_main.o bond_3ad.o bond_alb.o bond_sysfs.o +bonding-objs := bond_main.o bond_3ad.o bond_alb.o bond_sysfs.o bond_sysfs_slave.o bond_debugfs.o bond_netlink.o bond_options.o -ipv6-$(subst m,y,$(CONFIG_IPV6)) += bond_ipv6.o -bonding-objs += $(ipv6-y) +proc-$(CONFIG_PROC_FS) += bond_procfs.o +bonding-objs += $(proc-y) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 48cf24ff4e6..0dfeaf5da3f 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -34,14 +34,14 @@  #include "bonding.h"  #include "bond_3ad.h" -// General definitions +/* General definitions */  #define AD_SHORT_TIMEOUT           1  #define AD_LONG_TIMEOUT            0  #define AD_STANDBY                 0x2  #define AD_MAX_TX_IN_SECOND        3  #define AD_COLLECTOR_MAX_DELAY     0 -// Timer definitions(43.4.4 in the 802.3ad standard) +/* Timer definitions (43.4.4 in the 802.3ad standard) */  #define AD_FAST_PERIODIC_TIME      1  #define AD_SLOW_PERIODIC_TIME      30  #define AD_SHORT_TIMEOUT_TIME      (3*AD_FAST_PERIODIC_TIME) @@ -49,7 +49,7 @@  #define AD_CHURN_DETECTION_TIME    60  #define AD_AGGREGATE_WAIT_TIME     2 -// Port state definitions(43.4.2.2 in the 802.3ad standard) +/* Port state definitions (43.4.2.2 in the 802.3ad standard) */  #define AD_STATE_LACP_ACTIVITY   0x1  #define AD_STATE_LACP_TIMEOUT    0x2  #define AD_STATE_AGGREGATION     0x4 @@ -59,7 +59,9 @@  #define AD_STATE_DEFAULTED       0x40  #define AD_STATE_EXPIRED         0x80 -// Port Variables definitions used by the State Machines(43.4.7 in the 802.3ad standard) +/* Port Variables definitions used by the State Machines (43.4.7 in the + * 802.3ad standard) + */  #define AD_PORT_BEGIN           0x1  #define AD_PORT_LACP_ENABLED    0x2  #define AD_PORT_ACTOR_CHURN     0x4 @@ -71,27 +73,27 @@  #define AD_PORT_SELECTED        0x100  #define AD_PORT_MOVED           0x200 -// Port Key definitions -// key is determined according to the link speed, duplex and -// user key(which is yet not supported) -//              ------------------------------------------------------------ -// Port key :   | User key                       |      Speed       |Duplex| -//              ------------------------------------------------------------ -//              16                               6               1 0 +/* Port Key definitions + * key is determined according to the link speed, duplex and + * user key (which is yet not supported) + * -------------------------------------------------------------- + * Port key :	| User key	| Speed		| Duplex	| + * -------------------------------------------------------------- + * 16		  6		  1		  0 + */  #define  AD_DUPLEX_KEY_BITS    0x1  #define  AD_SPEED_KEY_BITS     0x3E  #define  AD_USER_KEY_BITS      0xFFC0 -//dalloun  #define     AD_LINK_SPEED_BITMASK_1MBPS       0x1  #define     AD_LINK_SPEED_BITMASK_10MBPS      0x2  #define     AD_LINK_SPEED_BITMASK_100MBPS     0x4  #define     AD_LINK_SPEED_BITMASK_1000MBPS    0x8  #define     AD_LINK_SPEED_BITMASK_10000MBPS   0x10 -//endalloun -// compare MAC addresses -#define MAC_ADDRESS_COMPARE(A, B) memcmp(A, B, ETH_ALEN) +/* compare MAC addresses */ +#define MAC_ADDRESS_EQUAL(A, B)	\ +	ether_addr_equal_64bits((const u8 *)A, (const u8 *)B)  static struct mac_addr null_mac_addr = { { 0, 0, 0, 0, 0, 0 } };  static u16 ad_ticks_per_sec; @@ -99,7 +101,7 @@ static const int ad_delta_in_ticks = (AD_TIMER_INTERVAL * HZ) / 1000;  static const u8 lacpdu_mcast_addr[ETH_ALEN] = MULTICAST_LACPDU_ADDR; -// ================= main 802.3ad protocol functions ================== +/* ================= main 802.3ad protocol functions ================== */  static int ad_lacpdu_send(struct port *port);  static int ad_marker_send(struct port *port, struct bond_marker *marker);  static void ad_mux_machine(struct port *port); @@ -113,13 +115,13 @@ static void ad_initialize_agg(struct aggregator *aggregator);  static void ad_initialize_port(struct port *port, int lacp_fast);  static void ad_enable_collecting_distributing(struct port *port);  static void ad_disable_collecting_distributing(struct port *port); -static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port); -static void ad_marker_response_received(struct bond_marker *marker, struct port *port); +static void ad_marker_info_received(struct bond_marker *marker_info, +				    struct port *port); +static void ad_marker_response_received(struct bond_marker *marker, +					struct port *port); -///////////////////////////////////////////////////////////////////////////////// -// ================= api to bonding and kernel code ================== -///////////////////////////////////////////////////////////////////////////////// +/* ================= api to bonding and kernel code ================== */  /**   * __get_bond_by_port - get the port's bonding struct @@ -136,80 +138,37 @@ static inline struct bonding *__get_bond_by_port(struct port *port)  }  /** - * __get_first_port - get the first port in the bond - * @bond: the bond we're looking at - * - * Return the port of the first slave in @bond, or %NULL if it can't be found. - */ -static inline struct port *__get_first_port(struct bonding *bond) -{ -	if (bond->slave_cnt == 0) -		return NULL; - -	return &(SLAVE_AD_INFO(bond->first_slave).port); -} - -/** - * __get_next_port - get the next port in the bond - * @port: the port we're looking at - * - * Return the port of the slave that is next in line of @port's slave in the - * bond, or %NULL if it can't be found. - */ -static inline struct port *__get_next_port(struct port *port) -{ -	struct bonding *bond = __get_bond_by_port(port); -	struct slave *slave = port->slave; - -	// If there's no bond for this port, or this is the last slave -	if ((bond == NULL) || (slave->next == bond->first_slave)) -		return NULL; - -	return &(SLAVE_AD_INFO(slave->next).port); -} - -/**   * __get_first_agg - get the first aggregator in the bond   * @bond: the bond we're looking at   *   * Return the aggregator of the first slave in @bond, or %NULL if it can't be   * found. + * The caller must hold RCU or RTNL lock.   */  static inline struct aggregator *__get_first_agg(struct port *port)  {  	struct bonding *bond = __get_bond_by_port(port); +	struct slave *first_slave; +	struct aggregator *agg; -	// If there's no bond for this port, or bond has no slaves -	if ((bond == NULL) || (bond->slave_cnt == 0)) +	/* If there's no bond for this port, or bond has no slaves */ +	if (bond == NULL)  		return NULL; -	return &(SLAVE_AD_INFO(bond->first_slave).aggregator); -} - -/** - * __get_next_agg - get the next aggregator in the bond - * @aggregator: the aggregator we're looking at - * - * Return the aggregator of the slave that is next in line of @aggregator's - * slave in the bond, or %NULL if it can't be found. - */ -static inline struct aggregator *__get_next_agg(struct aggregator *aggregator) -{ -	struct slave *slave = aggregator->slave; -	struct bonding *bond = bond_get_bond_by_slave(slave); - -	// If there's no bond for this aggregator, or this is the last slave -	if ((bond == NULL) || (slave->next == bond->first_slave)) -		return NULL; +	rcu_read_lock(); +	first_slave = bond_first_slave_rcu(bond); +	agg = first_slave ? &(SLAVE_AD_INFO(first_slave)->aggregator) : NULL; +	rcu_read_unlock(); -	return &(SLAVE_AD_INFO(slave->next).aggregator); +	return agg;  } -/* - * __agg_has_partner +/** + * __agg_has_partner - see if we have a partner + * @agg: the agregator we're looking at   *   * Return nonzero if aggregator has a partner (denoted by a non-zero ether - * address for the partner).  Return 0 if not. + * address for the partner). Return 0 if not.   */  static inline int __agg_has_partner(struct aggregator *agg)  { @@ -219,34 +178,31 @@ static inline int __agg_has_partner(struct aggregator *agg)  /**   * __disable_port - disable the port's slave   * @port: the port we're looking at - *   */  static inline void __disable_port(struct port *port)  { -	bond_set_slave_inactive_flags(port->slave); +	bond_set_slave_inactive_flags(port->slave, BOND_SLAVE_NOTIFY_LATER);  }  /**   * __enable_port - enable the port's slave, if it's up   * @port: the port we're looking at - *   */  static inline void __enable_port(struct port *port)  {  	struct slave *slave = port->slave; -	if ((slave->link == BOND_LINK_UP) && IS_UP(slave->dev)) -		bond_set_slave_active_flags(slave); +	if ((slave->link == BOND_LINK_UP) && bond_slave_is_up(slave)) +		bond_set_slave_active_flags(slave, BOND_SLAVE_NOTIFY_LATER);  }  /**   * __port_is_enabled - check if the port's slave is in active state   * @port: the port we're looking at - *   */  static inline int __port_is_enabled(struct port *port)  { -	return port->slave->state == BOND_STATE_ACTIVE; +	return bond_is_active_slave(port->slave);  }  /** @@ -262,13 +218,12 @@ static inline u32 __get_agg_selection_mode(struct port *port)  	if (bond == NULL)  		return BOND_AD_STABLE; -	return BOND_AD_INFO(bond).agg_select_mode; +	return bond->params.ad_select;  }  /**   * __check_agg_selection_timer - check if the selection timer has expired   * @port: the port we're looking at - *   */  static inline int __check_agg_selection_timer(struct port *port)  { @@ -281,23 +236,21 @@ static inline int __check_agg_selection_timer(struct port *port)  }  /** - * __get_rx_machine_lock - lock the port's RX machine + * __get_state_machine_lock - lock the port's state machines   * @port: the port we're looking at - *   */ -static inline void __get_rx_machine_lock(struct port *port) +static inline void __get_state_machine_lock(struct port *port)  { -	spin_lock_bh(&(SLAVE_AD_INFO(port->slave).rx_machine_lock)); +	spin_lock_bh(&(SLAVE_AD_INFO(port->slave)->state_machine_lock));  }  /** - * __release_rx_machine_lock - unlock the port's RX machine + * __release_state_machine_lock - unlock the port's state machines   * @port: the port we're looking at - *   */ -static inline void __release_rx_machine_lock(struct port *port) +static inline void __release_state_machine_lock(struct port *port)  { -	spin_unlock_bh(&(SLAVE_AD_INFO(port->slave).rx_machine_lock)); +	spin_unlock_bh(&(SLAVE_AD_INFO(port->slave)->state_machine_lock));  }  /** @@ -316,10 +269,11 @@ static u16 __get_link_speed(struct port *port)  	struct slave *slave = port->slave;  	u16 speed; -	/* this if covers only a special case: when the configuration starts with -	 * link down, it sets the speed to 0. -	 * This is done in spite of the fact that the e100 driver reports 0 to be -	 * compatible with MVT in the future.*/ +	/* this if covers only a special case: when the configuration starts +	 * with link down, it sets the speed to 0. +	 * This is done in spite of the fact that the e100 driver reports 0 +	 * to be compatible with MVT in the future. +	 */  	if (slave->link != BOND_LINK_UP)  		speed = 0;  	else { @@ -341,7 +295,8 @@ static u16 __get_link_speed(struct port *port)  			break;  		default: -			speed = 0; // unknown speed value from ethtool. shouldn't happen +			/* unknown speed value from ethtool. shouldn't happen */ +			speed = 0;  			break;  		}  	} @@ -365,8 +320,9 @@ static u8 __get_duplex(struct port *port)  	u8 retval; -	//  handling a special case: when the configuration starts with -	// link down, it sets the duplex to 0. +	/* handling a special case: when the configuration starts with +	 * link down, it sets the duplex to 0. +	 */  	if (slave->link != BOND_LINK_UP)  		retval = 0x0;  	else { @@ -388,17 +344,16 @@ static u8 __get_duplex(struct port *port)  }  /** - * __initialize_port_locks - initialize a port's RX machine spinlock - * @port: the port we're looking at - * + * __initialize_port_locks - initialize a port's STATE machine spinlock + * @port: the slave of the port we're looking at   */ -static inline void __initialize_port_locks(struct port *port) +static inline void __initialize_port_locks(struct slave *slave)  { -	// make sure it isn't called twice -	spin_lock_init(&(SLAVE_AD_INFO(port->slave).rx_machine_lock)); +	/* make sure it isn't called twice */ +	spin_lock_init(&(SLAVE_AD_INFO(slave)->state_machine_lock));  } -//conversions +/* Conversions */  /**   * __ad_timer_to_ticks - convert a given timer type to AD module ticks @@ -407,39 +362,38 @@ static inline void __initialize_port_locks(struct port *port)   *   * If @timer_type is %current_while_timer, @par indicates long/short timer.   * If @timer_type is %periodic_timer, @par is one of %FAST_PERIODIC_TIME, - *						    %SLOW_PERIODIC_TIME. + *						     %SLOW_PERIODIC_TIME.   */  static u16 __ad_timer_to_ticks(u16 timer_type, u16 par)  {  	u16 retval = 0; /* to silence the compiler */  	switch (timer_type) { -	case AD_CURRENT_WHILE_TIMER:   // for rx machine usage +	case AD_CURRENT_WHILE_TIMER:	/* for rx machine usage */  		if (par) -			retval = (AD_SHORT_TIMEOUT_TIME*ad_ticks_per_sec); // short timeout +			retval = (AD_SHORT_TIMEOUT_TIME*ad_ticks_per_sec);  		else -			retval = (AD_LONG_TIMEOUT_TIME*ad_ticks_per_sec); // long timeout +			retval = (AD_LONG_TIMEOUT_TIME*ad_ticks_per_sec);  		break; -	case AD_ACTOR_CHURN_TIMER:	    // for local churn machine +	case AD_ACTOR_CHURN_TIMER:	/* for local churn machine */  		retval = (AD_CHURN_DETECTION_TIME*ad_ticks_per_sec);  		break; -	case AD_PERIODIC_TIMER:	    // for periodic machine -		retval = (par*ad_ticks_per_sec); // long timeout +	case AD_PERIODIC_TIMER:		/* for periodic machine */ +		retval = (par*ad_ticks_per_sec); /* long timeout */  		break; -	case AD_PARTNER_CHURN_TIMER:   // for remote churn machine +	case AD_PARTNER_CHURN_TIMER:	/* for remote churn machine */  		retval = (AD_CHURN_DETECTION_TIME*ad_ticks_per_sec);  		break; -	case AD_WAIT_WHILE_TIMER:	    // for selection machine +	case AD_WAIT_WHILE_TIMER:	/* for selection machine */  		retval = (AD_AGGREGATE_WAIT_TIME*ad_ticks_per_sec);  		break;  	} +  	return retval;  } -///////////////////////////////////////////////////////////////////////////////// -// ================= ad_rx_machine helper functions ================== -///////////////////////////////////////////////////////////////////////////////// +/* ================= ad_rx_machine helper functions ================== */  /**   * __choose_matched - update a port's matched variable from a received lacpdu @@ -466,17 +420,18 @@ static u16 __ad_timer_to_ticks(u16 timer_type, u16 par)   */  static void __choose_matched(struct lacpdu *lacpdu, struct port *port)  { -	// check if all parameters are alike +	/* check if all parameters are alike +	 * or this is individual link(aggregation == FALSE) +	 * then update the state machine Matched variable. +	 */  	if (((ntohs(lacpdu->partner_port) == port->actor_port_number) &&  	     (ntohs(lacpdu->partner_port_priority) == port->actor_port_priority) && -	     !MAC_ADDRESS_COMPARE(&(lacpdu->partner_system), &(port->actor_system)) && +	     MAC_ADDRESS_EQUAL(&(lacpdu->partner_system), &(port->actor_system)) &&  	     (ntohs(lacpdu->partner_system_priority) == port->actor_system_priority) &&  	     (ntohs(lacpdu->partner_key) == port->actor_oper_port_key) &&  	     ((lacpdu->partner_state & AD_STATE_AGGREGATION) == (port->actor_oper_port_state & AD_STATE_AGGREGATION))) || -	    // or this is individual link(aggregation == FALSE)  	    ((lacpdu->actor_state & AD_STATE_AGGREGATION) == 0)  		) { -		// update the state machine Matched variable  		port->sm_vars |= AD_PORT_MATCHED;  	} else {  		port->sm_vars &= ~AD_PORT_MATCHED; @@ -498,7 +453,9 @@ static void __record_pdu(struct lacpdu *lacpdu, struct port *port)  		struct port_params *partner = &port->partner_oper;  		__choose_matched(lacpdu, port); -		// record the new parameter values for the partner operational +		/* record the new parameter values for the partner +		 * operational +		 */  		partner->port_number = ntohs(lacpdu->actor_port);  		partner->port_priority = ntohs(lacpdu->actor_port_priority);  		partner->system = lacpdu->actor_system; @@ -506,10 +463,12 @@ static void __record_pdu(struct lacpdu *lacpdu, struct port *port)  		partner->key = ntohs(lacpdu->actor_key);  		partner->port_state = lacpdu->actor_state; -		// set actor_oper_port_state.defaulted to FALSE +		/* set actor_oper_port_state.defaulted to FALSE */  		port->actor_oper_port_state &= ~AD_STATE_DEFAULTED; -		// set the partner sync. to on if the partner is sync. and the port is matched +		/* set the partner sync. to on if the partner is sync, +		 * and the port is matched +		 */  		if ((port->sm_vars & AD_PORT_MATCHED)  		    && (lacpdu->actor_state & AD_STATE_SYNCHRONIZATION))  			partner->port_state |= AD_STATE_SYNCHRONIZATION; @@ -529,11 +488,11 @@ static void __record_pdu(struct lacpdu *lacpdu, struct port *port)  static void __record_default(struct port *port)  {  	if (port) { -		// record the partner admin parameters +		/* record the partner admin parameters */  		memcpy(&port->partner_oper, &port->partner_admin,  		       sizeof(struct port_params)); -		// set actor_oper_port_state.defaulted to true +		/* set actor_oper_port_state.defaulted to true */  		port->actor_oper_port_state |= AD_STATE_DEFAULTED;  	}  } @@ -556,14 +515,15 @@ static void __update_selected(struct lacpdu *lacpdu, struct port *port)  	if (lacpdu && port) {  		const struct port_params *partner = &port->partner_oper; -		// check if any parameter is different +		/* check if any parameter is different then +		 * update the state machine selected variable. +		 */  		if (ntohs(lacpdu->actor_port) != partner->port_number ||  		    ntohs(lacpdu->actor_port_priority) != partner->port_priority || -		    MAC_ADDRESS_COMPARE(&lacpdu->actor_system, &partner->system) || +		    !MAC_ADDRESS_EQUAL(&lacpdu->actor_system, &partner->system) ||  		    ntohs(lacpdu->actor_system_priority) != partner->system_priority ||  		    ntohs(lacpdu->actor_key) != partner->key ||  		    (lacpdu->actor_state & AD_STATE_AGGREGATION) != (partner->port_state & AD_STATE_AGGREGATION)) { -			// update the state machine Selected variable  			port->sm_vars &= ~AD_PORT_SELECTED;  		}  	} @@ -587,15 +547,16 @@ static void __update_default_selected(struct port *port)  		const struct port_params *admin = &port->partner_admin;  		const struct port_params *oper = &port->partner_oper; -		// check if any parameter is different +		/* check if any parameter is different then +		 * update the state machine selected variable. +		 */  		if (admin->port_number != oper->port_number ||  		    admin->port_priority != oper->port_priority || -		    MAC_ADDRESS_COMPARE(&admin->system, &oper->system) || +		    !MAC_ADDRESS_EQUAL(&admin->system, &oper->system) ||  		    admin->system_priority != oper->system_priority ||  		    admin->key != oper->key ||  		    (admin->port_state & AD_STATE_AGGREGATION)  			!= (oper->port_state & AD_STATE_AGGREGATION)) { -			// update the state machine Selected variable  			port->sm_vars &= ~AD_PORT_SELECTED;  		}  	} @@ -615,12 +576,14 @@ static void __update_default_selected(struct port *port)   */  static void __update_ntt(struct lacpdu *lacpdu, struct port *port)  { -	// validate lacpdu and port +	/* validate lacpdu and port */  	if (lacpdu && port) { -		// check if any parameter is different +		/* check if any parameter is different then +		 * update the port->ntt. +		 */  		if ((ntohs(lacpdu->partner_port) != port->actor_port_number) ||  		    (ntohs(lacpdu->partner_port_priority) != port->actor_port_priority) || -		    MAC_ADDRESS_COMPARE(&(lacpdu->partner_system), &(port->actor_system)) || +		    !MAC_ADDRESS_EQUAL(&(lacpdu->partner_system), &(port->actor_system)) ||  		    (ntohs(lacpdu->partner_system_priority) != port->actor_system_priority) ||  		    (ntohs(lacpdu->partner_key) != port->actor_oper_port_key) ||  		    ((lacpdu->partner_state & AD_STATE_LACP_ACTIVITY) != (port->actor_oper_port_state & AD_STATE_LACP_ACTIVITY)) || @@ -628,43 +591,12 @@ static void __update_ntt(struct lacpdu *lacpdu, struct port *port)  		    ((lacpdu->partner_state & AD_STATE_SYNCHRONIZATION) != (port->actor_oper_port_state & AD_STATE_SYNCHRONIZATION)) ||  		    ((lacpdu->partner_state & AD_STATE_AGGREGATION) != (port->actor_oper_port_state & AD_STATE_AGGREGATION))  		   ) { -  			port->ntt = true;  		}  	}  }  /** - * __attach_bond_to_agg - * @port: the port we're looking at - * - * Handle the attaching of the port's control parser/multiplexer and the - * aggregator. This function does nothing since the parser/multiplexer of the - * receive and the parser/multiplexer of the aggregator are already combined. - */ -static void __attach_bond_to_agg(struct port *port) -{ -	port = NULL; /* just to satisfy the compiler */ -	// This function does nothing since the parser/multiplexer of the receive -	// and the parser/multiplexer of the aggregator are already combined -} - -/** - * __detach_bond_from_agg - * @port: the port we're looking at - * - * Handle the detaching of the port's control parser/multiplexer from the - * aggregator. This function does nothing since the parser/multiplexer of the - * receive and the parser/multiplexer of the aggregator are already combined. - */ -static void __detach_bond_from_agg(struct port *port) -{ -	port = NULL; /* just to satisfy the compiler */ -	// This function does nothing sience the parser/multiplexer of the receive -	// and the parser/multiplexer of the aggregator are already combined -} - -/**   * __agg_ports_are_ready - check if all ports in an aggregator are ready   * @aggregator: the aggregator we're looking at   * @@ -675,7 +607,9 @@ static int __agg_ports_are_ready(struct aggregator *aggregator)  	int retval = 1;  	if (aggregator) { -		// scan all ports in this aggregator to verfy if they are all ready +		/* scan all ports in this aggregator to verfy if they are +		 * all ready. +		 */  		for (port = aggregator->lag_ports;  		     port;  		     port = port->next_port_in_aggregator) { @@ -716,11 +650,9 @@ static void __set_agg_ports_ready(struct aggregator *aggregator, int val)  static u32 __get_agg_bandwidth(struct aggregator *aggregator)  {  	u32 bandwidth = 0; -	u32 basic_speed;  	if (aggregator->num_of_ports) { -		basic_speed = __get_link_speed(aggregator->lag_ports); -		switch (basic_speed) { +		switch (__get_link_speed(aggregator->lag_ports)) {  		case AD_LINK_SPEED_BITMASK_1MBPS:  			bandwidth = aggregator->num_of_ports;  			break; @@ -737,7 +669,7 @@ static u32 __get_agg_bandwidth(struct aggregator *aggregator)  			bandwidth = aggregator->num_of_ports * 10000;  			break;  		default: -			bandwidth = 0; /*to silence the compiler ....*/ +			bandwidth = 0; /* to silence the compiler */  		}  	}  	return bandwidth; @@ -747,33 +679,32 @@ static u32 __get_agg_bandwidth(struct aggregator *aggregator)   * __get_active_agg - get the current active aggregator   * @aggregator: the aggregator we're looking at   * + * Caller must hold RCU lock.   */  static struct aggregator *__get_active_agg(struct aggregator *aggregator)  { -	struct aggregator *retval = NULL; +	struct bonding *bond = aggregator->slave->bond; +	struct list_head *iter; +	struct slave *slave; -	for (; aggregator; aggregator = __get_next_agg(aggregator)) { -		if (aggregator->is_active) { -			retval = aggregator; -			break; -		} -	} +	bond_for_each_slave_rcu(bond, slave, iter) +		if (SLAVE_AD_INFO(slave)->aggregator.is_active) +			return &(SLAVE_AD_INFO(slave)->aggregator); -	return retval; +	return NULL;  }  /**   * __update_lacpdu_from_port - update a port's lacpdu fields   * @port: the port we're looking at - *   */  static inline void __update_lacpdu_from_port(struct port *port)  {  	struct lacpdu *lacpdu = &port->lacpdu;  	const struct port_params *partner = &port->partner_oper; -	/* update current actual Actor parameters */ -	/* lacpdu->subtype                   initialized +	/* update current actual Actor parameters +	 * lacpdu->subtype                   initialized  	 * lacpdu->version_number            initialized  	 * lacpdu->tlv_type_actor_info       initialized  	 * lacpdu->actor_information_length  initialized @@ -809,9 +740,7 @@ static inline void __update_lacpdu_from_port(struct port *port)  	 */  } -////////////////////////////////////////////////////////////////////////////////////// -// ================= main 802.3ad protocol code ====================================== -////////////////////////////////////////////////////////////////////////////////////// +/* ================= main 802.3ad protocol code ========================= */  /**   * ad_lacpdu_send - send out a lacpdu packet on a given port @@ -839,13 +768,14 @@ static int ad_lacpdu_send(struct port *port)  	lacpdu_header = (struct lacpdu_header *)skb_put(skb, length); -	memcpy(lacpdu_header->hdr.h_dest, lacpdu_mcast_addr, ETH_ALEN); -	/* Note: source addres is set to be the member's PERMANENT address, -	   because we use it to identify loopback lacpdus in receive. */ -	memcpy(lacpdu_header->hdr.h_source, slave->perm_hwaddr, ETH_ALEN); +	ether_addr_copy(lacpdu_header->hdr.h_dest, lacpdu_mcast_addr); +	/* Note: source address is set to be the member's PERMANENT address, +	 * because we use it to identify loopback lacpdus in receive. +	 */ +	ether_addr_copy(lacpdu_header->hdr.h_source, slave->perm_hwaddr);  	lacpdu_header->hdr.h_proto = PKT_TYPE_LACPDU; -	lacpdu_header->lacpdu = port->lacpdu; // struct copy +	lacpdu_header->lacpdu = port->lacpdu;  	dev_queue_xmit(skb); @@ -880,13 +810,14 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker)  	marker_header = (struct bond_marker_header *)skb_put(skb, length); -	memcpy(marker_header->hdr.h_dest, lacpdu_mcast_addr, ETH_ALEN); -	/* Note: source addres is set to be the member's PERMANENT address, -	   because we use it to identify loopback MARKERs in receive. */ -	memcpy(marker_header->hdr.h_source, slave->perm_hwaddr, ETH_ALEN); +	ether_addr_copy(marker_header->hdr.h_dest, lacpdu_mcast_addr); +	/* Note: source address is set to be the member's PERMANENT address, +	 * because we use it to identify loopback MARKERs in receive. +	 */ +	ether_addr_copy(marker_header->hdr.h_source, slave->perm_hwaddr);  	marker_header->hdr.h_proto = PKT_TYPE_LACPDU; -	marker_header->marker = *marker; // struct copy +	marker_header->marker = *marker;  	dev_queue_xmit(skb); @@ -896,72 +827,90 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker)  /**   * ad_mux_machine - handle a port's mux state machine   * @port: the port we're looking at - *   */  static void ad_mux_machine(struct port *port)  {  	mux_states_t last_state; -	// keep current State Machine state to compare later if it was changed +	/* keep current State Machine state to compare later if it was +	 * changed +	 */  	last_state = port->sm_mux_state;  	if (port->sm_vars & AD_PORT_BEGIN) { -		port->sm_mux_state = AD_MUX_DETACHED;		 // next state +		port->sm_mux_state = AD_MUX_DETACHED;  	} else {  		switch (port->sm_mux_state) {  		case AD_MUX_DETACHED:  			if ((port->sm_vars & AD_PORT_SELECTED)  			    || (port->sm_vars & AD_PORT_STANDBY))  				/* if SELECTED or STANDBY */ -				port->sm_mux_state = AD_MUX_WAITING; // next state +				port->sm_mux_state = AD_MUX_WAITING;  			break;  		case AD_MUX_WAITING: -			// if SELECTED == FALSE return to DETACH state -			if (!(port->sm_vars & AD_PORT_SELECTED)) { // if UNSELECTED +			/* if SELECTED == FALSE return to DETACH state */ +			if (!(port->sm_vars & AD_PORT_SELECTED)) {  				port->sm_vars &= ~AD_PORT_READY_N; -				// in order to withhold the Selection Logic to check all ports READY_N value -				// every callback cycle to update ready variable, we check READY_N and update READY here +				/* in order to withhold the Selection Logic to +				 * check all ports READY_N value every callback +				 * cycle to update ready variable, we check +				 * READY_N and update READY here +				 */  				__set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); -				port->sm_mux_state = AD_MUX_DETACHED;	 // next state +				port->sm_mux_state = AD_MUX_DETACHED;  				break;  			} -			// check if the wait_while_timer expired +			/* check if the wait_while_timer expired */  			if (port->sm_mux_timer_counter  			    && !(--port->sm_mux_timer_counter))  				port->sm_vars |= AD_PORT_READY_N; -			// in order to withhold the selection logic to check all ports READY_N value -			// every callback cycle to update ready variable, we check READY_N and update READY here +			/* in order to withhold the selection logic to check +			 * all ports READY_N value every callback cycle to +			 * update ready variable, we check READY_N and update +			 * READY here +			 */  			__set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); -			// if the wait_while_timer expired, and the port is in READY state, move to ATTACHED state +			/* if the wait_while_timer expired, and the port is +			 * in READY state, move to ATTACHED state +			 */  			if ((port->sm_vars & AD_PORT_READY)  			    && !port->sm_mux_timer_counter) -				port->sm_mux_state = AD_MUX_ATTACHED;	 // next state +				port->sm_mux_state = AD_MUX_ATTACHED;  			break;  		case AD_MUX_ATTACHED: -			// check also if agg_select_timer expired(so the edable port will take place only after this timer) -			if ((port->sm_vars & AD_PORT_SELECTED) && (port->partner_oper.port_state & AD_STATE_SYNCHRONIZATION) && !__check_agg_selection_timer(port)) { -				port->sm_mux_state = AD_MUX_COLLECTING_DISTRIBUTING;// next state -			} else if (!(port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY)) {	  // if UNSELECTED or STANDBY +			/* check also if agg_select_timer expired (so the +			 * edable port will take place only after this timer) +			 */ +			if ((port->sm_vars & AD_PORT_SELECTED) && +			    (port->partner_oper.port_state & AD_STATE_SYNCHRONIZATION) && +			    !__check_agg_selection_timer(port)) { +				port->sm_mux_state = AD_MUX_COLLECTING_DISTRIBUTING; +			} else if (!(port->sm_vars & AD_PORT_SELECTED) || +				   (port->sm_vars & AD_PORT_STANDBY)) { +				/* if UNSELECTED or STANDBY */  				port->sm_vars &= ~AD_PORT_READY_N; -				// in order to withhold the selection logic to check all ports READY_N value -				// every callback cycle to update ready variable, we check READY_N and update READY here +				/* in order to withhold the selection logic to +				 * check all ports READY_N value every callback +				 * cycle to update ready variable, we check +				 * READY_N and update READY here +				 */  				__set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); -				port->sm_mux_state = AD_MUX_DETACHED;// next state +				port->sm_mux_state = AD_MUX_DETACHED;  			}  			break;  		case AD_MUX_COLLECTING_DISTRIBUTING: -			if (!(port->sm_vars & AD_PORT_SELECTED) || (port->sm_vars & AD_PORT_STANDBY) || -			    !(port->partner_oper.port_state & AD_STATE_SYNCHRONIZATION) -			   ) { -				port->sm_mux_state = AD_MUX_ATTACHED;// next state - +			if (!(port->sm_vars & AD_PORT_SELECTED) || +			    (port->sm_vars & AD_PORT_STANDBY) || +			    !(port->partner_oper.port_state & AD_STATE_SYNCHRONIZATION)) { +				port->sm_mux_state = AD_MUX_ATTACHED;  			} else { -				// if port state hasn't changed make -				// sure that a collecting distributing -				// port in an active aggregator is enabled +				/* if port state hasn't changed make +				 * sure that a collecting distributing +				 * port in an active aggregator is enabled +				 */  				if (port->aggregator &&  				    port->aggregator->is_active &&  				    !__port_is_enabled(port)) { @@ -970,19 +919,18 @@ static void ad_mux_machine(struct port *port)  				}  			}  			break; -		default:    //to silence the compiler +		default:  			break;  		}  	} -	// check if the state machine was changed +	/* check if the state machine was changed */  	if (port->sm_mux_state != last_state) {  		pr_debug("Mux Machine: Port=%d, Last State=%d, Curr State=%d\n",  			 port->actor_port_number, last_state,  			 port->sm_mux_state);  		switch (port->sm_mux_state) {  		case AD_MUX_DETACHED: -			__detach_bond_from_agg(port);  			port->actor_oper_port_state &= ~AD_STATE_SYNCHRONIZATION;  			ad_disable_collecting_distributing(port);  			port->actor_oper_port_state &= ~AD_STATE_COLLECTING; @@ -993,7 +941,6 @@ static void ad_mux_machine(struct port *port)  			port->sm_mux_timer_counter = __ad_timer_to_ticks(AD_WAIT_WHILE_TIMER, 0);  			break;  		case AD_MUX_ATTACHED: -			__attach_bond_to_agg(port);  			port->actor_oper_port_state |= AD_STATE_SYNCHRONIZATION;  			port->actor_oper_port_state &= ~AD_STATE_COLLECTING;  			port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING; @@ -1006,7 +953,7 @@ static void ad_mux_machine(struct port *port)  			ad_enable_collecting_distributing(port);  			port->ntt = true;  			break; -		default:    //to silence the compiler +		default:  			break;  		}  	} @@ -1025,62 +972,63 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)  {  	rx_states_t last_state; -	// Lock to prevent 2 instances of this function to run simultaneously(rx interrupt and periodic machine callback) -	__get_rx_machine_lock(port); - -	// keep current State Machine state to compare later if it was changed +	/* keep current State Machine state to compare later if it was +	 * changed +	 */  	last_state = port->sm_rx_state; -	// check if state machine should change state -	// first, check if port was reinitialized +	/* check if state machine should change state */ + +	/* first, check if port was reinitialized */  	if (port->sm_vars & AD_PORT_BEGIN) -		/* next state */  		port->sm_rx_state = AD_RX_INITIALIZE; -	// check if port is not enabled +	/* check if port is not enabled */  	else if (!(port->sm_vars & AD_PORT_BEGIN)  		 && !port->is_enabled && !(port->sm_vars & AD_PORT_MOVED)) -		/* next state */  		port->sm_rx_state = AD_RX_PORT_DISABLED; -	// check if new lacpdu arrived -	else if (lacpdu && ((port->sm_rx_state == AD_RX_EXPIRED) || (port->sm_rx_state == AD_RX_DEFAULTED) || (port->sm_rx_state == AD_RX_CURRENT))) { -		port->sm_rx_timer_counter = 0; // zero timer +	/* check if new lacpdu arrived */ +	else if (lacpdu && ((port->sm_rx_state == AD_RX_EXPIRED) || +		 (port->sm_rx_state == AD_RX_DEFAULTED) || +		 (port->sm_rx_state == AD_RX_CURRENT))) { +		port->sm_rx_timer_counter = 0;  		port->sm_rx_state = AD_RX_CURRENT;  	} else { -		// if timer is on, and if it is expired -		if (port->sm_rx_timer_counter && !(--port->sm_rx_timer_counter)) { +		/* if timer is on, and if it is expired */ +		if (port->sm_rx_timer_counter && +		    !(--port->sm_rx_timer_counter)) {  			switch (port->sm_rx_state) {  			case AD_RX_EXPIRED: -				port->sm_rx_state = AD_RX_DEFAULTED;		// next state +				port->sm_rx_state = AD_RX_DEFAULTED;  				break;  			case AD_RX_CURRENT: -				port->sm_rx_state = AD_RX_EXPIRED;	    // next state +				port->sm_rx_state = AD_RX_EXPIRED;  				break; -			default:    //to silence the compiler +			default:  				break;  			}  		} else { -			// if no lacpdu arrived and no timer is on +			/* if no lacpdu arrived and no timer is on */  			switch (port->sm_rx_state) {  			case AD_RX_PORT_DISABLED:  				if (port->sm_vars & AD_PORT_MOVED) -					port->sm_rx_state = AD_RX_INITIALIZE;	    // next state +					port->sm_rx_state = AD_RX_INITIALIZE;  				else if (port->is_enabled  					 && (port->sm_vars  					     & AD_PORT_LACP_ENABLED)) -					port->sm_rx_state = AD_RX_EXPIRED;	// next state +					port->sm_rx_state = AD_RX_EXPIRED;  				else if (port->is_enabled  					 && ((port->sm_vars  					      & AD_PORT_LACP_ENABLED) == 0)) -					port->sm_rx_state = AD_RX_LACP_DISABLED;    // next state +					port->sm_rx_state = AD_RX_LACP_DISABLED;  				break; -			default:    //to silence the compiler +			default:  				break;  			}  		}  	} -	// check if the State machine was changed or new lacpdu arrived +	/* check if the State machine was changed or new lacpdu arrived */  	if ((port->sm_rx_state != last_state) || (lacpdu)) {  		pr_debug("Rx Machine: Port=%d, Last State=%d, Curr State=%d\n",  			 port->actor_port_number, last_state, @@ -1095,10 +1043,9 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)  			__record_default(port);  			port->actor_oper_port_state &= ~AD_STATE_EXPIRED;  			port->sm_vars &= ~AD_PORT_MOVED; -			port->sm_rx_state = AD_RX_PORT_DISABLED;	// next state - -			/*- Fall Through -*/ +			port->sm_rx_state = AD_RX_PORT_DISABLED; +			/* Fall Through */  		case AD_RX_PORT_DISABLED:  			port->sm_vars &= ~AD_PORT_MATCHED;  			break; @@ -1110,13 +1057,15 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)  			port->actor_oper_port_state &= ~AD_STATE_EXPIRED;  			break;  		case AD_RX_EXPIRED: -			//Reset of the Synchronization flag. (Standard 43.4.12) -			//This reset cause to disable this port in the COLLECTING_DISTRIBUTING state of the -			//mux machine in case of EXPIRED even if LINK_DOWN didn't arrive for the port. +			/* Reset of the Synchronization flag (Standard 43.4.12) +			 * This reset cause to disable this port in the +			 * COLLECTING_DISTRIBUTING state of the mux machine in +			 * case of EXPIRED even if LINK_DOWN didn't arrive for +			 * the port. +			 */  			port->partner_oper.port_state &= ~AD_STATE_SYNCHRONIZATION;  			port->sm_vars &= ~AD_PORT_MATCHED; -			port->partner_oper.port_state |= -				AD_STATE_LACP_ACTIVITY; +			port->partner_oper.port_state |= AD_STATE_LACP_ACTIVITY;  			port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(AD_SHORT_TIMEOUT));  			port->actor_oper_port_state |= AD_STATE_EXPIRED;  			break; @@ -1127,13 +1076,13 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)  			port->actor_oper_port_state &= ~AD_STATE_EXPIRED;  			break;  		case AD_RX_CURRENT: -			// detect loopback situation -			if (!MAC_ADDRESS_COMPARE(&(lacpdu->actor_system), &(port->actor_system))) { -				// INFO_RECEIVED_LOOPBACK_FRAMES -				pr_err("%s: An illegal loopback occurred on adapter (%s).\n" +			/* detect loopback situation */ +			if (MAC_ADDRESS_EQUAL(&(lacpdu->actor_system), +					      &(port->actor_system))) { +				pr_err("%s: An illegal loopback occurred on adapter (%s)\n"  				       "Check the configuration to verify that all adapters are connected to 802.3ad compliant switch ports\n", -				       port->slave->dev->master->name, port->slave->dev->name); -				__release_rx_machine_lock(port); +				       port->slave->bond->dev->name, +				       port->slave->dev->name);  				return;  			}  			__update_selected(lacpdu, port); @@ -1141,31 +1090,24 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)  			__record_pdu(lacpdu, port);  			port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(port->actor_oper_port_state & AD_STATE_LACP_TIMEOUT));  			port->actor_oper_port_state &= ~AD_STATE_EXPIRED; -			// verify that if the aggregator is enabled, the port is enabled too. -			//(because if the link goes down for a short time, the 802.3ad will not -			// catch it, and the port will continue to be disabled) -			if (port->aggregator -			    && port->aggregator->is_active -			    && !__port_is_enabled(port)) -				__enable_port(port);  			break; -		default:    //to silence the compiler +		default:  			break;  		}  	} -	__release_rx_machine_lock(port);  }  /**   * ad_tx_machine - handle a port's tx state machine   * @port: the port we're looking at - *   */  static void ad_tx_machine(struct port *port)  { -	// check if tx timer expired, to verify that we do not send more than 3 packets per second +	/* check if tx timer expired, to verify that we do not send more than +	 * 3 packets per second +	 */  	if (port->sm_tx_timer_counter && !(--port->sm_tx_timer_counter)) { -		// check if there is something to send +		/* check if there is something to send */  		if (port->ntt && (port->sm_vars & AD_PORT_LACP_ENABLED)) {  			__update_lacpdu_from_port(port); @@ -1173,14 +1115,16 @@ static void ad_tx_machine(struct port *port)  				pr_debug("Sent LACPDU on port %d\n",  					 port->actor_port_number); -				/* mark ntt as false, so it will not be sent again until -				   demanded */ +				/* mark ntt as false, so it will not be sent +				 * again until demanded +				 */  				port->ntt = false;  			}  		} -		// restart tx timer(to verify that we will not exceed AD_MAX_TX_IN_SECOND -		port->sm_tx_timer_counter = -			ad_ticks_per_sec/AD_MAX_TX_IN_SECOND; +		/* restart tx timer(to verify that we will not exceed +		 * AD_MAX_TX_IN_SECOND +		 */ +		port->sm_tx_timer_counter = ad_ticks_per_sec/AD_MAX_TX_IN_SECOND;  	}  } @@ -1194,76 +1138,79 @@ static void ad_periodic_machine(struct port *port)  {  	periodic_states_t last_state; -	// keep current state machine state to compare later if it was changed +	/* keep current state machine state to compare later if it was changed */  	last_state = port->sm_periodic_state; -	// check if port was reinitialized +	/* check if port was reinitialized */  	if (((port->sm_vars & AD_PORT_BEGIN) || !(port->sm_vars & AD_PORT_LACP_ENABLED) || !port->is_enabled) ||  	    (!(port->actor_oper_port_state & AD_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & AD_STATE_LACP_ACTIVITY))  	   ) { -		port->sm_periodic_state = AD_NO_PERIODIC;	     // next state +		port->sm_periodic_state = AD_NO_PERIODIC;  	} -	// check if state machine should change state +	/* check if state machine should change state */  	else if (port->sm_periodic_timer_counter) { -		// check if periodic state machine expired +		/* check if periodic state machine expired */  		if (!(--port->sm_periodic_timer_counter)) { -			// if expired then do tx -			port->sm_periodic_state = AD_PERIODIC_TX;    // next state +			/* if expired then do tx */ +			port->sm_periodic_state = AD_PERIODIC_TX;  		} else { -			// If not expired, check if there is some new timeout parameter from the partner state +			/* If not expired, check if there is some new timeout +			 * parameter from the partner state +			 */  			switch (port->sm_periodic_state) {  			case AD_FAST_PERIODIC:  				if (!(port->partner_oper.port_state  				      & AD_STATE_LACP_TIMEOUT)) -					port->sm_periodic_state = AD_SLOW_PERIODIC;  // next state +					port->sm_periodic_state = AD_SLOW_PERIODIC;  				break;  			case AD_SLOW_PERIODIC:  				if ((port->partner_oper.port_state & AD_STATE_LACP_TIMEOUT)) { -					// stop current timer  					port->sm_periodic_timer_counter = 0; -					port->sm_periodic_state = AD_PERIODIC_TX;	 // next state +					port->sm_periodic_state = AD_PERIODIC_TX;  				}  				break; -			default:    //to silence the compiler +			default:  				break;  			}  		}  	} else {  		switch (port->sm_periodic_state) {  		case AD_NO_PERIODIC: -			port->sm_periodic_state = AD_FAST_PERIODIC;	 // next state +			port->sm_periodic_state = AD_FAST_PERIODIC;  			break;  		case AD_PERIODIC_TX: -			if (!(port->partner_oper.port_state -			      & AD_STATE_LACP_TIMEOUT)) -				port->sm_periodic_state = AD_SLOW_PERIODIC;  // next state +			if (!(port->partner_oper.port_state & +			    AD_STATE_LACP_TIMEOUT)) +				port->sm_periodic_state = AD_SLOW_PERIODIC;  			else -				port->sm_periodic_state = AD_FAST_PERIODIC;  // next state +				port->sm_periodic_state = AD_FAST_PERIODIC;  			break; -		default:    //to silence the compiler +		default:  			break;  		}  	} -	// check if the state machine was changed +	/* check if the state machine was changed */  	if (port->sm_periodic_state != last_state) {  		pr_debug("Periodic Machine: Port=%d, Last State=%d, Curr State=%d\n",  			 port->actor_port_number, last_state,  			 port->sm_periodic_state);  		switch (port->sm_periodic_state) {  		case AD_NO_PERIODIC: -			port->sm_periodic_timer_counter = 0;	   // zero timer +			port->sm_periodic_timer_counter = 0;  			break;  		case AD_FAST_PERIODIC: -			port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_FAST_PERIODIC_TIME))-1; // decrement 1 tick we lost in the PERIODIC_TX cycle +			/* decrement 1 tick we lost in the PERIODIC_TX cycle */ +			port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_FAST_PERIODIC_TIME))-1;  			break;  		case AD_SLOW_PERIODIC: -			port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_SLOW_PERIODIC_TIME))-1; // decrement 1 tick we lost in the PERIODIC_TX cycle +			/* decrement 1 tick we lost in the PERIODIC_TX cycle */ +			port->sm_periodic_timer_counter = __ad_timer_to_ticks(AD_PERIODIC_TIMER, (u16)(AD_SLOW_PERIODIC_TIME))-1;  			break;  		case AD_PERIODIC_TX:  			port->ntt = true;  			break; -		default:    //to silence the compiler +		default:  			break;  		}  	} @@ -1281,30 +1228,43 @@ static void ad_port_selection_logic(struct port *port)  {  	struct aggregator *aggregator, *free_aggregator = NULL, *temp_aggregator;  	struct port *last_port = NULL, *curr_port; +	struct list_head *iter; +	struct bonding *bond; +	struct slave *slave;  	int found = 0; -	// if the port is already Selected, do nothing +	/* if the port is already Selected, do nothing */  	if (port->sm_vars & AD_PORT_SELECTED)  		return; -	// if the port is connected to other aggregator, detach it +	bond = __get_bond_by_port(port); + +	/* if the port is connected to other aggregator, detach it */  	if (port->aggregator) { -		// detach the port from its former aggregator +		/* detach the port from its former aggregator */  		temp_aggregator = port->aggregator;  		for (curr_port = temp_aggregator->lag_ports; curr_port;  		     last_port = curr_port, -			     curr_port = curr_port->next_port_in_aggregator) { +		     curr_port = curr_port->next_port_in_aggregator) {  			if (curr_port == port) {  				temp_aggregator->num_of_ports--; -				if (!last_port) {// if it is the first port attached to the aggregator +				/* if it is the first port attached to the +				 * aggregator +				 */ +				if (!last_port) {  					temp_aggregator->lag_ports =  						port->next_port_in_aggregator; -				} else {// not the first port attached to the aggregator +				} else { +					/* not the first port attached to the +					 * aggregator +					 */  					last_port->next_port_in_aggregator =  						port->next_port_in_aggregator;  				} -				// clear the port's relations to this aggregator +				/* clear the port's relations to this +				 * aggregator +				 */  				port->aggregator = NULL;  				port->next_port_in_aggregator = NULL;  				port->actor_port_aggregator_identifier = 0; @@ -1312,41 +1272,46 @@ static void ad_port_selection_logic(struct port *port)  				pr_debug("Port %d left LAG %d\n",  					 port->actor_port_number,  					 temp_aggregator->aggregator_identifier); -				// if the aggregator is empty, clear its parameters, and set it ready to be attached +				/* if the aggregator is empty, clear its +				 * parameters, and set it ready to be attached +				 */  				if (!temp_aggregator->lag_ports)  					ad_clear_agg(temp_aggregator);  				break;  			}  		} -		if (!curr_port) { // meaning: the port was related to an aggregator but was not on the aggregator port list -			pr_warning("%s: Warning: Port %d (on %s) was related to aggregator %d but was not on its port list\n", -				   port->slave->dev->master->name, -				   port->actor_port_number, -				   port->slave->dev->name, -				   port->aggregator->aggregator_identifier); +		if (!curr_port) { +			/* meaning: the port was related to an aggregator +			 * but was not on the aggregator port list +			 */ +			pr_warn_ratelimited("%s: Warning: Port %d (on %s) was related to aggregator %d but was not on its port list\n", +					    port->slave->bond->dev->name, +					    port->actor_port_number, +					    port->slave->dev->name, +					    port->aggregator->aggregator_identifier);  		}  	} -	// search on all aggregators for a suitable aggregator for this port -	for (aggregator = __get_first_agg(port); aggregator; -	     aggregator = __get_next_agg(aggregator)) { +	/* search on all aggregators for a suitable aggregator for this port */ +	bond_for_each_slave(bond, slave, iter) { +		aggregator = &(SLAVE_AD_INFO(slave)->aggregator); -		// keep a free aggregator for later use(if needed) +		/* keep a free aggregator for later use(if needed) */  		if (!aggregator->lag_ports) {  			if (!free_aggregator)  				free_aggregator = aggregator;  			continue;  		} -		// check if current aggregator suits us -		if (((aggregator->actor_oper_aggregator_key == port->actor_oper_port_key) && // if all parameters match AND -		     !MAC_ADDRESS_COMPARE(&(aggregator->partner_system), &(port->partner_oper.system)) && +		/* check if current aggregator suits us */ +		if (((aggregator->actor_oper_aggregator_key == port->actor_oper_port_key) && /* if all parameters match AND */ +		     MAC_ADDRESS_EQUAL(&(aggregator->partner_system), &(port->partner_oper.system)) &&  		     (aggregator->partner_system_priority == port->partner_oper.system_priority) &&  		     (aggregator->partner_oper_aggregator_key == port->partner_oper.key)  		    ) && -		    ((MAC_ADDRESS_COMPARE(&(port->partner_oper.system), &(null_mac_addr)) && // partner answers -		      !aggregator->is_individual)  // but is not individual OR +		    ((!MAC_ADDRESS_EQUAL(&(port->partner_oper.system), &(null_mac_addr)) && /* partner answers */ +		      !aggregator->is_individual)  /* but is not individual OR */  		    )  		   ) { -			// attach to the founded aggregator +			/* attach to the founded aggregator */  			port->aggregator = aggregator;  			port->actor_port_aggregator_identifier =  				port->aggregator->aggregator_identifier; @@ -1357,23 +1322,26 @@ static void ad_port_selection_logic(struct port *port)  				 port->actor_port_number,  				 port->aggregator->aggregator_identifier); -			// mark this port as selected +			/* mark this port as selected */  			port->sm_vars |= AD_PORT_SELECTED;  			found = 1;  			break;  		}  	} -	// the port couldn't find an aggregator - attach it to a new aggregator +	/* the port couldn't find an aggregator - attach it to a new +	 * aggregator +	 */  	if (!found) {  		if (free_aggregator) { -			// assign port a new aggregator +			/* assign port a new aggregator */  			port->aggregator = free_aggregator;  			port->actor_port_aggregator_identifier =  				port->aggregator->aggregator_identifier; -			// update the new aggregator's parameters -			// if port was responsed from the end-user +			/* update the new aggregator's parameters +			 * if port was responsed from the end-user +			 */  			if (port->actor_oper_port_key & AD_DUPLEX_KEY_BITS)  				/* if port is full duplex */  				port->aggregator->is_individual = false; @@ -1392,7 +1360,7 @@ static void ad_port_selection_logic(struct port *port)  			port->aggregator->lag_ports = port;  			port->aggregator->num_of_ports++; -			// mark this port as selected +			/* mark this port as selected */  			port->sm_vars |= AD_PORT_SELECTED;  			pr_debug("Port %d joined LAG %d(new LAG)\n", @@ -1400,27 +1368,28 @@ static void ad_port_selection_logic(struct port *port)  				 port->aggregator->aggregator_identifier);  		} else {  			pr_err("%s: Port %d (on %s) did not find a suitable aggregator\n", -			       port->slave->dev->master->name, +			       port->slave->bond->dev->name,  			       port->actor_port_number, port->slave->dev->name);  		}  	} -	// if all aggregator's ports are READY_N == TRUE, set ready=TRUE in all aggregator's ports -	// else set ready=FALSE in all aggregator's ports -	__set_agg_ports_ready(port->aggregator, __agg_ports_are_ready(port->aggregator)); +	/* if all aggregator's ports are READY_N == TRUE, set ready=TRUE +	 * in all aggregator's ports, else set ready=FALSE in all +	 * aggregator's ports +	 */ +	__set_agg_ports_ready(port->aggregator, +			      __agg_ports_are_ready(port->aggregator));  	aggregator = __get_first_agg(port);  	ad_agg_selection_logic(aggregator);  } -/* - * Decide if "agg" is a better choice for the new active aggregator that +/* Decide if "agg" is a better choice for the new active aggregator that   * the current best, according to the ad_select policy.   */  static struct aggregator *ad_agg_selection_test(struct aggregator *best,  						struct aggregator *curr)  { -	/* -	 * 0. If no best, select current. +	/* 0. If no best, select current.  	 *  	 * 1. If the current agg is not individual, and the best is  	 *    individual, select current. @@ -1476,9 +1445,9 @@ static struct aggregator *ad_agg_selection_test(struct aggregator *best,  		break;  	default: -		pr_warning("%s: Impossible agg select mode %d\n", -			   curr->slave->dev->master->name, -			   __get_agg_selection_mode(curr->lag_ports)); +		pr_warn_ratelimited("%s: Impossible agg select mode %d\n", +				    curr->slave->bond->dev->name, +				    __get_agg_selection_mode(curr->lag_ports));  		break;  	} @@ -1487,8 +1456,13 @@ static struct aggregator *ad_agg_selection_test(struct aggregator *best,  static int agg_device_up(const struct aggregator *agg)  { -	return (netif_running(agg->slave->dev) && -		netif_carrier_ok(agg->slave->dev)); +	struct port *port = agg->lag_ports; + +	if (!port) +		return 0; + +	return netif_running(port->slave->dev) && +	       netif_carrier_ok(port->slave->dev);  }  /** @@ -1519,24 +1493,28 @@ static int agg_device_up(const struct aggregator *agg)  static void ad_agg_selection_logic(struct aggregator *agg)  {  	struct aggregator *best, *active, *origin; +	struct bonding *bond = agg->slave->bond; +	struct list_head *iter; +	struct slave *slave;  	struct port *port; +	rcu_read_lock();  	origin = agg;  	active = __get_active_agg(agg);  	best = (active && agg_device_up(active)) ? active : NULL; -	do { +	bond_for_each_slave_rcu(bond, slave, iter) { +		agg = &(SLAVE_AD_INFO(slave)->aggregator); +  		agg->is_active = 0;  		if (agg->num_of_ports && agg_device_up(agg))  			best = ad_agg_selection_test(best, agg); - -	} while ((agg = __get_next_agg(agg))); +	}  	if (best &&  	    __get_agg_selection_mode(best->lag_ports) == BOND_AD_STABLE) { -		/* -		 * For the STABLE policy, don't replace the old active +		/* For the STABLE policy, don't replace the old active  		 * aggregator if it's still active (it has an answering  		 * partner) or if both the best and active don't have an  		 * answering partner. @@ -1544,7 +1522,8 @@ static void ad_agg_selection_logic(struct aggregator *agg)  		if (active && active->lag_ports &&  		    active->lag_ports->is_enabled &&  		    (__agg_has_partner(active) || -		     (!__agg_has_partner(active) && !__agg_has_partner(best)))) { +		     (!__agg_has_partner(active) && +		     !__agg_has_partner(best)))) {  			if (!(!active->actor_oper_aggregator_key &&  			      best->actor_oper_aggregator_key)) {  				best = NULL; @@ -1558,7 +1537,7 @@ static void ad_agg_selection_logic(struct aggregator *agg)  		active->is_active = 1;  	} -	// if there is new best aggregator, activate it +	/* if there is new best aggregator, activate it */  	if (best) {  		pr_debug("best Agg=%d; P=%d; a k=%d; p k=%d; Ind=%d; Act=%d\n",  			 best->aggregator_identifier, best->num_of_ports, @@ -1569,8 +1548,8 @@ static void ad_agg_selection_logic(struct aggregator *agg)  			 best->lag_ports, best->slave,  			 best->slave ? best->slave->dev->name : "NULL"); -		for (agg = __get_first_agg(best->lag_ports); agg; -		     agg = __get_next_agg(agg)) { +		bond_for_each_slave_rcu(bond, slave, iter) { +			agg = &(SLAVE_AD_INFO(slave)->aggregator);  			pr_debug("Agg=%d; P=%d; a k=%d; p k=%d; Ind=%d; Act=%d\n",  				 agg->aggregator_identifier, agg->num_of_ports, @@ -1579,10 +1558,11 @@ static void ad_agg_selection_logic(struct aggregator *agg)  				 agg->is_individual, agg->is_active);  		} -		// check if any partner replys +		/* check if any partner replys */  		if (best->is_individual) { -			pr_warning("%s: Warning: No 802.3ad response from the link partner for any adapters in the bond\n", -				   best->slave ? best->slave->dev->master->name : "NULL"); +			pr_warn_ratelimited("%s: Warning: No 802.3ad response from the link partner for any adapters in the bond\n", +					    best->slave ? +					    best->slave->bond->dev->name : "NULL");  		}  		best->is_active = 1; @@ -1594,7 +1574,9 @@ static void ad_agg_selection_logic(struct aggregator *agg)  			 best->partner_oper_aggregator_key,  			 best->is_individual, best->is_active); -		// disable the ports that were related to the former active_aggregator +		/* disable the ports that were related to the former +		 * active_aggregator +		 */  		if (active) {  			for (port = active->lag_ports; port;  			     port = port->next_port_in_aggregator) { @@ -1603,8 +1585,7 @@ static void ad_agg_selection_logic(struct aggregator *agg)  		}  	} -	/* -	 * if the selected aggregator is of join individuals +	/* if the selected aggregator is of join individuals  	 * (partner_system is NULL), enable their ports  	 */  	active = __get_active_agg(origin); @@ -1618,19 +1599,14 @@ static void ad_agg_selection_logic(struct aggregator *agg)  		}  	} -	if (origin->slave) { -		struct bonding *bond; +	rcu_read_unlock(); -		bond = bond_get_bond_by_slave(origin->slave); -		if (bond) -			bond_3ad_set_carrier(bond); -	} +	bond_3ad_set_carrier(bond);  }  /**   * ad_clear_agg - clear a given aggregator's parameters   * @aggregator: the aggregator we're looking at - *   */  static void ad_clear_agg(struct aggregator *aggregator)  { @@ -1654,7 +1630,6 @@ static void ad_clear_agg(struct aggregator *aggregator)  /**   * ad_initialize_agg - initialize a given aggregator's parameters   * @aggregator: the aggregator we're looking at - *   */  static void ad_initialize_agg(struct aggregator *aggregator)  { @@ -1671,7 +1646,6 @@ static void ad_initialize_agg(struct aggregator *aggregator)   * ad_initialize_port - initialize a given port's parameters   * @aggregator: the aggregator we're looking at   * @lacp_fast: boolean. whether fast periodic should be used - *   */  static void ad_initialize_port(struct port *port, int lacp_fast)  { @@ -1703,8 +1677,10 @@ static void ad_initialize_port(struct port *port, int lacp_fast)  		port->ntt = false;  		port->actor_admin_port_key = 1;  		port->actor_oper_port_key  = 1; -		port->actor_admin_port_state = AD_STATE_AGGREGATION | AD_STATE_LACP_ACTIVITY; -		port->actor_oper_port_state  = AD_STATE_AGGREGATION | AD_STATE_LACP_ACTIVITY; +		port->actor_admin_port_state = AD_STATE_AGGREGATION | +					       AD_STATE_LACP_ACTIVITY; +		port->actor_oper_port_state  = AD_STATE_AGGREGATION | +					       AD_STATE_LACP_ACTIVITY;  		if (lacp_fast)  			port->actor_oper_port_state |= AD_STATE_LACP_TIMEOUT; @@ -1713,7 +1689,7 @@ static void ad_initialize_port(struct port *port, int lacp_fast)  		memcpy(&port->partner_oper, &tmpl, sizeof(tmpl));  		port->is_enabled = true; -		// ****** private parameters ****** +		/* private parameters */  		port->sm_vars = 0x3;  		port->sm_rx_state = 0;  		port->sm_rx_timer_counter = 0; @@ -1751,11 +1727,12 @@ static void ad_enable_collecting_distributing(struct port *port)  /**   * ad_disable_collecting_distributing - disable a port's transmit/receive   * @port: the port we're looking at - *   */  static void ad_disable_collecting_distributing(struct port *port)  { -	if (port->aggregator && MAC_ADDRESS_COMPARE(&(port->aggregator->partner_system), &(null_mac_addr))) { +	if (port->aggregator && +	    !MAC_ADDRESS_EQUAL(&(port->aggregator->partner_system), +			       &(null_mac_addr))) {  		pr_debug("Disabling port %d(LAG %d)\n",  			 port->actor_port_number,  			 port->aggregator->aggregator_identifier); @@ -1763,66 +1740,22 @@ static void ad_disable_collecting_distributing(struct port *port)  	}  } -#if 0 -/** - * ad_marker_info_send - send a marker information frame - * @port: the port we're looking at - * - * This function does nothing since we decided not to implement send and handle - * response for marker PDU's, in this stage, but only to respond to marker - * information. - */ -static void ad_marker_info_send(struct port *port) -{ -	struct bond_marker marker; -	u16 index; - -	// fill the marker PDU with the appropriate values -	marker.subtype = 0x02; -	marker.version_number = 0x01; -	marker.tlv_type = AD_MARKER_INFORMATION_SUBTYPE; -	marker.marker_length = 0x16; -	// convert requester_port to Big Endian -	marker.requester_port = (((port->actor_port_number & 0xFF) << 8) |((u16)(port->actor_port_number & 0xFF00) >> 8)); -	marker.requester_system = port->actor_system; -	// convert requester_port(u32) to Big Endian -	marker.requester_transaction_id = -		(((++port->transaction_id & 0xFF) << 24) -		 | ((port->transaction_id & 0xFF00) << 8) -		 | ((port->transaction_id & 0xFF0000) >> 8) -		 | ((port->transaction_id & 0xFF000000) >> 24)); -	marker.pad = 0; -	marker.tlv_type_terminator = 0x00; -	marker.terminator_length = 0x00; -	for (index = 0; index < 90; index++) -		marker.reserved_90[index] = 0; - -	// send the marker information -	if (ad_marker_send(port, &marker) >= 0) { -		pr_debug("Sent Marker Information on port %d\n", -			 port->actor_port_number); -	} -} -#endif -  /**   * ad_marker_info_received - handle receive of a Marker information frame   * @marker_info: Marker info received   * @port: the port we're looking at - *   */  static void ad_marker_info_received(struct bond_marker *marker_info,  	struct port *port)  {  	struct bond_marker marker; -	// copy the received marker data to the response marker -	//marker = *marker_info; +	/* copy the received marker data to the response marker */  	memcpy(&marker, marker_info, sizeof(struct bond_marker)); -	// change the marker subtype to marker response +	/* change the marker subtype to marker response */  	marker.tlv_type = AD_MARKER_RESPONSE_SUBTYPE; -	// send the marker response +	/* send the marker response */  	if (ad_marker_send(port, &marker) >= 0) {  		pr_debug("Sent Marker Response on port %d\n",  			 port->actor_port_number); @@ -1839,22 +1772,21 @@ static void ad_marker_info_received(struct bond_marker *marker_info,   * information.   */  static void ad_marker_response_received(struct bond_marker *marker, -	struct port *port) +					struct port *port)  { -	marker = NULL; /* just to satisfy the compiler */ -	port = NULL;  /* just to satisfy the compiler */ -	// DO NOTHING, SINCE WE DECIDED NOT TO IMPLEMENT THIS FEATURE FOR NOW +	marker = NULL; +	port = NULL; +	/* DO NOTHING, SINCE WE DECIDED NOT TO IMPLEMENT THIS FEATURE FOR NOW */  } -////////////////////////////////////////////////////////////////////////////////////// -// ================= AD exported functions to the main bonding code ================== -////////////////////////////////////////////////////////////////////////////////////// +/* ========= AD exported functions to the main bonding code ========= */ -// Check aggregators status in team every T seconds +/* Check aggregators status in team every T seconds */  #define AD_AGGREGATOR_SELECTION_TIMER  8 -/* - * bond_3ad_initiate_agg_selection(struct bonding *bond) +/** + * bond_3ad_initiate_agg_selection - initate aggregator selection + * @bond: bonding struct   *   * Set the aggregation selection timer, to initiate an agg selection in   * the very near future.  Called during first initialization, and during @@ -1863,32 +1795,29 @@ static void ad_marker_response_received(struct bond_marker *marker,  void bond_3ad_initiate_agg_selection(struct bonding *bond, int timeout)  {  	BOND_AD_INFO(bond).agg_select_timer = timeout; -	BOND_AD_INFO(bond).agg_select_mode = bond->params.ad_select;  } -static u16 aggregator_identifier; -  /**   * bond_3ad_initialize - initialize a bond's 802.3ad parameters and structures   * @bond: bonding struct to work on   * @tick_resolution: tick duration (millisecond resolution) - * @lacp_fast: boolean. whether fast periodic should be used   *   * Can be called only after the mac address of the bond is set.   */ -void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution, int lacp_fast) +void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution)  { -	// check that the bond is not initialized yet -	if (MAC_ADDRESS_COMPARE(&(BOND_AD_INFO(bond).system.sys_mac_addr), +	/* check that the bond is not initialized yet */ +	if (!MAC_ADDRESS_EQUAL(&(BOND_AD_INFO(bond).system.sys_mac_addr),  				bond->dev->dev_addr)) { -		aggregator_identifier = 0; +		BOND_AD_INFO(bond).aggregator_identifier = 0; -		BOND_AD_INFO(bond).lacp_fast = lacp_fast;  		BOND_AD_INFO(bond).system.sys_priority = 0xFFFF;  		BOND_AD_INFO(bond).system.sys_mac_addr = *((struct mac_addr *)bond->dev->dev_addr); -		// initialize how many times this module is called in one second(should be about every 100ms) +		/* initialize how many times this module is called in one +		 * second (should be about every 100ms) +		 */  		ad_ticks_per_sec = tick_resolution;  		bond_3ad_initiate_agg_selection(bond, @@ -1904,64 +1833,57 @@ void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution, int lacp_fas   * Returns:   0 on success   *          < 0 on error   */ -int bond_3ad_bind_slave(struct slave *slave) +void bond_3ad_bind_slave(struct slave *slave)  {  	struct bonding *bond = bond_get_bond_by_slave(slave);  	struct port *port;  	struct aggregator *aggregator; -	if (bond == NULL) { -		pr_err("%s: The slave %s is not attached to its bond\n", -		       slave->dev->master->name, slave->dev->name); -		return -1; -	} +	/* check that the slave has not been initialized yet. */ +	if (SLAVE_AD_INFO(slave)->port.slave != slave) { -	//check that the slave has not been intialized yet. -	if (SLAVE_AD_INFO(slave).port.slave != slave) { +		/* port initialization */ +		port = &(SLAVE_AD_INFO(slave)->port); -		// port initialization -		port = &(SLAVE_AD_INFO(slave).port); - -		ad_initialize_port(port, BOND_AD_INFO(bond).lacp_fast); +		ad_initialize_port(port, bond->params.lacp_fast); +		__initialize_port_locks(slave);  		port->slave = slave; -		port->actor_port_number = SLAVE_AD_INFO(slave).id; -		// key is determined according to the link speed, duplex and user key(which is yet not supported) -		//              ------------------------------------------------------------ -		// Port key :   | User key                       |      Speed       |Duplex| -		//              ------------------------------------------------------------ -		//              16                               6               1 0 -		port->actor_admin_port_key = 0;	// initialize this parameter +		port->actor_port_number = SLAVE_AD_INFO(slave)->id; +		/* key is determined according to the link speed, duplex and user key(which +		 * is yet not supported) +		 */ +		port->actor_admin_port_key = 0;  		port->actor_admin_port_key |= __get_duplex(port);  		port->actor_admin_port_key |= (__get_link_speed(port) << 1);  		port->actor_oper_port_key = port->actor_admin_port_key; -		// if the port is not full duplex, then the port should be not lacp Enabled +		/* if the port is not full duplex, then the port should be not +		 * lacp Enabled +		 */  		if (!(port->actor_oper_port_key & AD_DUPLEX_KEY_BITS))  			port->sm_vars &= ~AD_PORT_LACP_ENABLED; -		// actor system is the bond's system +		/* actor system is the bond's system */  		port->actor_system = BOND_AD_INFO(bond).system.sys_mac_addr; -		// tx timer(to verify that no more than MAX_TX_IN_SECOND lacpdu's are sent in one second) +		/* tx timer(to verify that no more than MAX_TX_IN_SECOND +		 * lacpdu's are sent in one second) +		 */  		port->sm_tx_timer_counter = ad_ticks_per_sec/AD_MAX_TX_IN_SECOND;  		port->aggregator = NULL;  		port->next_port_in_aggregator = NULL;  		__disable_port(port); -		__initialize_port_locks(port); - -		// aggregator initialization -		aggregator = &(SLAVE_AD_INFO(slave).aggregator); +		/* aggregator initialization */ +		aggregator = &(SLAVE_AD_INFO(slave)->aggregator);  		ad_initialize_agg(aggregator);  		aggregator->aggregator_mac_address = *((struct mac_addr *)bond->dev->dev_addr); -		aggregator->aggregator_identifier = (++aggregator_identifier); +		aggregator->aggregator_identifier = ++BOND_AD_INFO(bond).aggregator_identifier;  		aggregator->slave = slave;  		aggregator->is_active = 0;  		aggregator->num_of_ports = 0;  	} - -	return 0;  }  /** @@ -1977,17 +1899,17 @@ void bond_3ad_unbind_slave(struct slave *slave)  	struct port *port, *prev_port, *temp_port;  	struct aggregator *aggregator, *new_aggregator, *temp_aggregator;  	int select_new_active_agg = 0; +	struct bonding *bond = slave->bond; +	struct slave *slave_iter; +	struct list_head *iter; -	// find the aggregator related to this slave -	aggregator = &(SLAVE_AD_INFO(slave).aggregator); - -	// find the port related to this slave -	port = &(SLAVE_AD_INFO(slave).port); +	aggregator = &(SLAVE_AD_INFO(slave)->aggregator); +	port = &(SLAVE_AD_INFO(slave)->port); -	// if slave is null, the whole port is not initialized +	/* if slave is null, the whole port is not initialized */  	if (!port->slave) { -		pr_warning("Warning: %s: Trying to unbind an uninitialized port on %s\n", -			   slave->dev->master->name, slave->dev->name); +		pr_warn("Warning: %s: Trying to unbind an uninitialized port on %s\n", +			slave->bond->dev->name, slave->dev->name);  		return;  	} @@ -1999,32 +1921,42 @@ void bond_3ad_unbind_slave(struct slave *slave)  	__update_lacpdu_from_port(port);  	ad_lacpdu_send(port); -	// check if this aggregator is occupied +	/* check if this aggregator is occupied */  	if (aggregator->lag_ports) { -		// check if there are other ports related to this aggregator except -		// the port related to this slave(thats ensure us that there is a -		// reason to search for new aggregator, and that we will find one -		if ((aggregator->lag_ports != port) || (aggregator->lag_ports->next_port_in_aggregator)) { -			// find new aggregator for the related port(s) -			new_aggregator = __get_first_agg(port); -			for (; new_aggregator; new_aggregator = __get_next_agg(new_aggregator)) { -				// if the new aggregator is empty, or it is connected to our port only -				if (!new_aggregator->lag_ports -				    || ((new_aggregator->lag_ports == port) -					&& !new_aggregator->lag_ports->next_port_in_aggregator)) +		/* check if there are other ports related to this aggregator +		 * except the port related to this slave(thats ensure us that +		 * there is a reason to search for new aggregator, and that we +		 * will find one +		 */ +		if ((aggregator->lag_ports != port) || +		    (aggregator->lag_ports->next_port_in_aggregator)) { +			/* find new aggregator for the related port(s) */ +			bond_for_each_slave(bond, slave_iter, iter) { +				new_aggregator = &(SLAVE_AD_INFO(slave_iter)->aggregator); +				/* if the new aggregator is empty, or it is +				 * connected to our port only +				 */ +				if (!new_aggregator->lag_ports || +				    ((new_aggregator->lag_ports == port) && +				     !new_aggregator->lag_ports->next_port_in_aggregator))  					break;  			} -			// if new aggregator found, copy the aggregator's parameters -			// and connect the related lag_ports to the new aggregator +			if (!slave_iter) +				new_aggregator = NULL; + +			/* if new aggregator found, copy the aggregator's +			 * parameters and connect the related lag_ports to the +			 * new aggregator +			 */  			if ((new_aggregator) && ((!new_aggregator->lag_ports) || ((new_aggregator->lag_ports == port) && !new_aggregator->lag_ports->next_port_in_aggregator))) { -				pr_debug("Some port(s) related to LAG %d - replaceing with LAG %d\n", +				pr_debug("Some port(s) related to LAG %d - replacing with LAG %d\n",  					 aggregator->aggregator_identifier,  					 new_aggregator->aggregator_identifier); -				if ((new_aggregator->lag_ports == port) && new_aggregator->is_active) { +				if ((new_aggregator->lag_ports == port) && +				    new_aggregator->is_active) {  					pr_info("%s: Removing an active aggregator\n", -						aggregator->slave->dev->master->name); -					// select new active aggregator +						aggregator->slave->bond->dev->name);  					 select_new_active_agg = 1;  				} @@ -2040,45 +1972,54 @@ void bond_3ad_unbind_slave(struct slave *slave)  				new_aggregator->is_active = aggregator->is_active;  				new_aggregator->num_of_ports = aggregator->num_of_ports; -				// update the information that is written on the ports about the aggregator +				/* update the information that is written on +				 * the ports about the aggregator +				 */  				for (temp_port = aggregator->lag_ports; temp_port;  				     temp_port = temp_port->next_port_in_aggregator) {  					temp_port->aggregator = new_aggregator;  					temp_port->actor_port_aggregator_identifier = new_aggregator->aggregator_identifier;  				} -				// clear the aggregator  				ad_clear_agg(aggregator);  				if (select_new_active_agg)  					ad_agg_selection_logic(__get_first_agg(port));  			} else { -				pr_warning("%s: Warning: unbinding aggregator, and could not find a new aggregator for its ports\n", -					   slave->dev->master->name); +				pr_warn("%s: Warning: unbinding aggregator, and could not find a new aggregator for its ports\n", +					slave->bond->dev->name);  			} -		} else { // in case that the only port related to this aggregator is the one we want to remove +		} else { +			/* in case that the only port related to this +			 * aggregator is the one we want to remove +			 */  			select_new_active_agg = aggregator->is_active; -			// clear the aggregator  			ad_clear_agg(aggregator);  			if (select_new_active_agg) {  				pr_info("%s: Removing an active aggregator\n", -					slave->dev->master->name); -				// select new active aggregator -				ad_agg_selection_logic(__get_first_agg(port)); +					slave->bond->dev->name); +				/* select new active aggregator */ +				temp_aggregator = __get_first_agg(port); +				if (temp_aggregator) +					ad_agg_selection_logic(temp_aggregator);  			}  		}  	}  	pr_debug("Unbinding port %d\n", port->actor_port_number); -	// find the aggregator that this port is connected to -	temp_aggregator = __get_first_agg(port); -	for (; temp_aggregator; temp_aggregator = __get_next_agg(temp_aggregator)) { + +	/* find the aggregator that this port is connected to */ +	bond_for_each_slave(bond, slave_iter, iter) { +		temp_aggregator = &(SLAVE_AD_INFO(slave_iter)->aggregator);  		prev_port = NULL; -		// search the port in the aggregator's related ports +		/* search the port in the aggregator's related ports */  		for (temp_port = temp_aggregator->lag_ports; temp_port;  		     prev_port = temp_port, -			     temp_port = temp_port->next_port_in_aggregator) { -			if (temp_port == port) { // the aggregator found - detach the port from this aggregator +		     temp_port = temp_port->next_port_in_aggregator) { +			if (temp_port == port) { +				/* the aggregator found - detach the port from +				 * this aggregator +				 */  				if (prev_port)  					prev_port->next_port_in_aggregator = temp_port->next_port_in_aggregator;  				else @@ -2086,12 +2027,11 @@ void bond_3ad_unbind_slave(struct slave *slave)  				temp_aggregator->num_of_ports--;  				if (temp_aggregator->num_of_ports == 0) {  					select_new_active_agg = temp_aggregator->is_active; -					// clear the aggregator  					ad_clear_agg(temp_aggregator);  					if (select_new_active_agg) {  						pr_info("%s: Removing an active aggregator\n", -							slave->dev->master->name); -						// select new active aggregator +							slave->bond->dev->name); +						/* select new active aggregator */  						ad_agg_selection_logic(__get_first_agg(port));  					}  				} @@ -2119,25 +2059,30 @@ void bond_3ad_state_machine_handler(struct work_struct *work)  {  	struct bonding *bond = container_of(work, struct bonding,  					    ad_work.work); -	struct port *port;  	struct aggregator *aggregator; +	struct list_head *iter; +	struct slave *slave; +	struct port *port; +	bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER;  	read_lock(&bond->lock); +	rcu_read_lock(); -	if (bond->kill_timers) -		goto out; - -	//check if there are any slaves -	if (bond->slave_cnt == 0) +	/* check if there are any slaves */ +	if (!bond_has_slaves(bond))  		goto re_arm; -	// check if agg_select_timer timer after initialize is timed out -	if (BOND_AD_INFO(bond).agg_select_timer && !(--BOND_AD_INFO(bond).agg_select_timer)) { -		// select the active aggregator for the bond -		if ((port = __get_first_port(bond))) { +	/* check if agg_select_timer timer after initialize is timed out */ +	if (BOND_AD_INFO(bond).agg_select_timer && +	    !(--BOND_AD_INFO(bond).agg_select_timer)) { +		slave = bond_first_slave_rcu(bond); +		port = slave ? &(SLAVE_AD_INFO(slave)->port) : NULL; + +		/* select the active aggregator for the bond */ +		if (port) {  			if (!port->slave) { -				pr_warning("%s: Warning: bond's first port is uninitialized\n", -					   bond->dev->name); +				pr_warn_ratelimited("%s: Warning: bond's first port is uninitialized\n", +						    bond->dev->name);  				goto re_arm;  			} @@ -2147,29 +2092,49 @@ void bond_3ad_state_machine_handler(struct work_struct *work)  		bond_3ad_set_carrier(bond);  	} -	// for each port run the state machines -	for (port = __get_first_port(bond); port; port = __get_next_port(port)) { +	/* for each port run the state machines */ +	bond_for_each_slave_rcu(bond, slave, iter) { +		port = &(SLAVE_AD_INFO(slave)->port);  		if (!port->slave) { -			pr_warning("%s: Warning: Found an uninitialized port\n", -				   bond->dev->name); +			pr_warn_ratelimited("%s: Warning: Found an uninitialized port\n", +					    bond->dev->name);  			goto re_arm;  		} +		/* Lock around state machines to protect data accessed +		 * by all (e.g., port->sm_vars).  ad_rx_machine may run +		 * concurrently due to incoming LACPDU. +		 */ +		__get_state_machine_lock(port); +  		ad_rx_machine(NULL, port);  		ad_periodic_machine(port);  		ad_port_selection_logic(port);  		ad_mux_machine(port);  		ad_tx_machine(port); -		// turn off the BEGIN bit, since we already handled it +		/* turn off the BEGIN bit, since we already handled it */  		if (port->sm_vars & AD_PORT_BEGIN)  			port->sm_vars &= ~AD_PORT_BEGIN; + +		__release_state_machine_lock(port);  	}  re_arm: -	queue_delayed_work(bond->wq, &bond->ad_work, ad_delta_in_ticks); -out: +	bond_for_each_slave_rcu(bond, slave, iter) { +		if (slave->should_notify) { +			should_notify_rtnl = BOND_SLAVE_NOTIFY_NOW; +			break; +		} +	} +	rcu_read_unlock();  	read_unlock(&bond->lock); + +	if (should_notify_rtnl && rtnl_trylock()) { +		bond_slave_state_notify(bond); +		rtnl_unlock(); +	} +	queue_delayed_work(bond->wq, &bond->ad_work, ad_delta_in_ticks);  }  /** @@ -2182,29 +2147,38 @@ out:   * received frames (loopback). Since only the payload is given to this   * function, it check for loopback.   */ -static void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u16 length) +static int bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, +				  u16 length)  {  	struct port *port; +	int ret = RX_HANDLER_ANOTHER;  	if (length >= sizeof(struct lacpdu)) { -		port = &(SLAVE_AD_INFO(slave).port); +		port = &(SLAVE_AD_INFO(slave)->port);  		if (!port->slave) { -			pr_warning("%s: Warning: port of slave %s is uninitialized\n", -				   slave->dev->name, slave->dev->master->name); -			return; +			pr_warn_ratelimited("%s: Warning: port of slave %s is uninitialized\n", +					    slave->dev->name, slave->bond->dev->name); +			return ret;  		}  		switch (lacpdu->subtype) {  		case AD_TYPE_LACPDU: +			ret = RX_HANDLER_CONSUMED;  			pr_debug("Received LACPDU on port %d\n",  				 port->actor_port_number); +			/* Protect against concurrent state machines */ +			__get_state_machine_lock(port);  			ad_rx_machine(lacpdu, port); +			__release_state_machine_lock(port);  			break;  		case AD_TYPE_MARKER: -			// No need to convert fields to Little Endian since we don't use the marker's fields. +			ret = RX_HANDLER_CONSUMED; +			/* No need to convert fields to Little Endian since we +			 * don't use the marker's fields. +			 */  			switch (((struct bond_marker *)lacpdu)->tlv_type) {  			case AD_MARKER_INFORMATION_SUBTYPE: @@ -2225,6 +2199,7 @@ static void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u  			}  		}  	} +	return ret;  }  /** @@ -2237,22 +2212,27 @@ void bond_3ad_adapter_speed_changed(struct slave *slave)  {  	struct port *port; -	port = &(SLAVE_AD_INFO(slave).port); +	port = &(SLAVE_AD_INFO(slave)->port); -	// if slave is null, the whole port is not initialized +	/* if slave is null, the whole port is not initialized */  	if (!port->slave) { -		pr_warning("Warning: %s: speed changed for uninitialized port on %s\n", -			   slave->dev->master->name, slave->dev->name); +		pr_warn("Warning: %s: speed changed for uninitialized port on %s\n", +			slave->bond->dev->name, slave->dev->name);  		return;  	} +	__get_state_machine_lock(port); +  	port->actor_admin_port_key &= ~AD_SPEED_KEY_BITS;  	port->actor_oper_port_key = port->actor_admin_port_key |=  		(__get_link_speed(port) << 1);  	pr_debug("Port %d changed speed\n", port->actor_port_number); -	// there is no need to reselect a new aggregator, just signal the -	// state machines to reinitialize +	/* there is no need to reselect a new aggregator, just signal the +	 * state machines to reinitialize +	 */  	port->sm_vars |= AD_PORT_BEGIN; + +	__release_state_machine_lock(port);  }  /** @@ -2265,22 +2245,27 @@ void bond_3ad_adapter_duplex_changed(struct slave *slave)  {  	struct port *port; -	port = &(SLAVE_AD_INFO(slave).port); +	port = &(SLAVE_AD_INFO(slave)->port); -	// if slave is null, the whole port is not initialized +	/* if slave is null, the whole port is not initialized */  	if (!port->slave) { -		pr_warning("%s: Warning: duplex changed for uninitialized port on %s\n", -			   slave->dev->master->name, slave->dev->name); +		pr_warn("%s: Warning: duplex changed for uninitialized port on %s\n", +			slave->bond->dev->name, slave->dev->name);  		return;  	} +	__get_state_machine_lock(port); +  	port->actor_admin_port_key &= ~AD_DUPLEX_KEY_BITS;  	port->actor_oper_port_key = port->actor_admin_port_key |=  		__get_duplex(port);  	pr_debug("Port %d changed duplex\n", port->actor_port_number); -	// there is no need to reselect a new aggregator, just signal the -	// state machines to reinitialize +	/* there is no need to reselect a new aggregator, just signal the +	 * state machines to reinitialize +	 */  	port->sm_vars |= AD_PORT_BEGIN; + +	__release_state_machine_lock(port);  }  /** @@ -2294,17 +2279,23 @@ void bond_3ad_handle_link_change(struct slave *slave, char link)  {  	struct port *port; -	port = &(SLAVE_AD_INFO(slave).port); +	port = &(SLAVE_AD_INFO(slave)->port); -	// if slave is null, the whole port is not initialized +	/* if slave is null, the whole port is not initialized */  	if (!port->slave) { -		pr_warning("Warning: %s: link status changed for uninitialized port on %s\n", -			   slave->dev->master->name, slave->dev->name); +		pr_warn("Warning: %s: link status changed for uninitialized port on %s\n", +			slave->bond->dev->name, slave->dev->name);  		return;  	} -	// on link down we are zeroing duplex and speed since some of the adaptors(ce1000.lan) report full duplex/speed instead of N/A(duplex) / 0(speed) -	// on link up we are forcing recheck on the duplex and speed since some of he adaptors(ce1000.lan) report +	__get_state_machine_lock(port); +	/* on link down we are zeroing duplex and speed since +	 * some of the adaptors(ce1000.lan) report full duplex/speed +	 * instead of N/A(duplex) / 0(speed). +	 * +	 * on link up we are forcing recheck on the duplex and speed since +	 * some of he adaptors(ce1000.lan) report. +	 */  	if (link == BOND_LINK_UP) {  		port->is_enabled = true;  		port->actor_admin_port_key &= ~AD_DUPLEX_KEY_BITS; @@ -2320,16 +2311,24 @@ void bond_3ad_handle_link_change(struct slave *slave, char link)  		port->actor_oper_port_key = (port->actor_admin_port_key &=  					     ~AD_SPEED_KEY_BITS);  	} -	//BOND_PRINT_DBG(("Port %d changed link status to %s", port->actor_port_number, ((link == BOND_LINK_UP)?"UP":"DOWN"))); -	// there is no need to reselect a new aggregator, just signal the -	// state machines to reinitialize +	pr_debug("Port %d changed link status to %s\n", +		 port->actor_port_number, +		 link == BOND_LINK_UP ? "UP" : "DOWN"); +	/* there is no need to reselect a new aggregator, just signal the +	 * state machines to reinitialize +	 */  	port->sm_vars |= AD_PORT_BEGIN; + +	__release_state_machine_lock(port);  } -/* - * set link state for bonding master: if we have an active - * aggregator, we're up, if not, we're down.  Presumes that we cannot - * have an active aggregator if there are no slaves with link up. +/** + * bond_3ad_set_carrier - set link state for bonding master + * @bond - bonding structure + * + * if we have an active aggregator, we're up, if not, we're down. + * Presumes that we cannot have an active aggregator if there are + * no slaves with link up.   *   * This behavior complies with IEEE 802.3 section 43.3.9.   * @@ -2338,154 +2337,197 @@ void bond_3ad_handle_link_change(struct slave *slave, char link)   */  int bond_3ad_set_carrier(struct bonding *bond)  { -	if (__get_active_agg(&(SLAVE_AD_INFO(bond->first_slave).aggregator))) { -		if (!netif_carrier_ok(bond->dev)) { +	struct aggregator *active; +	struct slave *first_slave; +	int ret = 1; + +	rcu_read_lock(); +	first_slave = bond_first_slave_rcu(bond); +	if (!first_slave) { +		ret = 0; +		goto out; +	} +	active = __get_active_agg(&(SLAVE_AD_INFO(first_slave)->aggregator)); +	if (active) { +		/* are enough slaves available to consider link up? */ +		if (active->num_of_ports < bond->params.min_links) { +			if (netif_carrier_ok(bond->dev)) { +				netif_carrier_off(bond->dev); +				goto out; +			} +		} else if (!netif_carrier_ok(bond->dev)) {  			netif_carrier_on(bond->dev); -			return 1; +			goto out;  		} -		return 0; -	} - -	if (netif_carrier_ok(bond->dev)) { +	} else if (netif_carrier_ok(bond->dev)) {  		netif_carrier_off(bond->dev); -		return 1;  	} -	return 0; +out: +	rcu_read_unlock(); +	return ret;  }  /** - * bond_3ad_get_active_agg_info - get information of the active aggregator + * __bond_3ad_get_active_agg_info - get information of the active aggregator   * @bond: bonding struct to work on   * @ad_info: ad_info struct to fill with the bond's info   *   * Returns:   0 on success   *          < 0 on error   */ -int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info) +int __bond_3ad_get_active_agg_info(struct bonding *bond, +				   struct ad_info *ad_info)  {  	struct aggregator *aggregator = NULL; +	struct list_head *iter; +	struct slave *slave;  	struct port *port; -	for (port = __get_first_port(bond); port; port = __get_next_port(port)) { +	bond_for_each_slave_rcu(bond, slave, iter) { +		port = &(SLAVE_AD_INFO(slave)->port);  		if (port->aggregator && port->aggregator->is_active) {  			aggregator = port->aggregator;  			break;  		}  	} -	if (aggregator) { -		ad_info->aggregator_id = aggregator->aggregator_identifier; -		ad_info->ports = aggregator->num_of_ports; -		ad_info->actor_key = aggregator->actor_oper_aggregator_key; -		ad_info->partner_key = aggregator->partner_oper_aggregator_key; -		memcpy(ad_info->partner_system, aggregator->partner_system.mac_addr_value, ETH_ALEN); -		return 0; -	} +	if (!aggregator) +		return -1; -	return -1; +	ad_info->aggregator_id = aggregator->aggregator_identifier; +	ad_info->ports = aggregator->num_of_ports; +	ad_info->actor_key = aggregator->actor_oper_aggregator_key; +	ad_info->partner_key = aggregator->partner_oper_aggregator_key; +	ether_addr_copy(ad_info->partner_system, +			aggregator->partner_system.mac_addr_value); +	return 0; +} + +/* Wrapper used to hold bond->lock so no slave manipulation can occur */ +int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info) +{ +	int ret; + +	rcu_read_lock(); +	ret = __bond_3ad_get_active_agg_info(bond, ad_info); +	rcu_read_unlock(); + +	return ret;  }  int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)  { -	struct slave *slave, *start_at;  	struct bonding *bond = netdev_priv(dev); -	int slave_agg_no; +	struct slave *slave, *first_ok_slave; +	struct aggregator *agg; +	struct ad_info ad_info; +	struct list_head *iter;  	int slaves_in_agg; +	int slave_agg_no;  	int agg_id; -	int i; -	struct ad_info ad_info; -	int res = 1; - -	/* make sure that the slaves list will -	 * not change during tx -	 */ -	read_lock(&bond->lock); -	if (!BOND_IS_OK(bond)) -		goto out; - -	if (bond_3ad_get_active_agg_info(bond, &ad_info)) { -		pr_debug("%s: Error: bond_3ad_get_active_agg_info failed\n", +	if (__bond_3ad_get_active_agg_info(bond, &ad_info)) { +		pr_debug("%s: Error: __bond_3ad_get_active_agg_info failed\n",  			 dev->name); -		goto out; +		goto err_free;  	}  	slaves_in_agg = ad_info.ports;  	agg_id = ad_info.aggregator_id;  	if (slaves_in_agg == 0) { -		/*the aggregator is empty*/  		pr_debug("%s: Error: active aggregator is empty\n", dev->name); -		goto out; +		goto err_free;  	} -	slave_agg_no = bond->xmit_hash_policy(skb, slaves_in_agg); +	slave_agg_no = bond_xmit_hash(bond, skb) % slaves_in_agg; +	first_ok_slave = NULL; -	bond_for_each_slave(bond, slave, i) { -		struct aggregator *agg = SLAVE_AD_INFO(slave).port.aggregator; +	bond_for_each_slave_rcu(bond, slave, iter) { +		agg = SLAVE_AD_INFO(slave)->port.aggregator; +		if (!agg || agg->aggregator_identifier != agg_id) +			continue; -		if (agg && (agg->aggregator_identifier == agg_id)) { +		if (slave_agg_no >= 0) { +			if (!first_ok_slave && bond_slave_can_tx(slave)) +				first_ok_slave = slave;  			slave_agg_no--; -			if (slave_agg_no < 0) -				break; +			continue; +		} + +		if (bond_slave_can_tx(slave)) { +			bond_dev_queue_xmit(bond, skb, slave->dev); +			goto out;  		}  	}  	if (slave_agg_no >= 0) {  		pr_err("%s: Error: Couldn't find a slave to tx on for aggregator ID %d\n",  		       dev->name, agg_id); -		goto out; +		goto err_free;  	} -	start_at = slave; - -	bond_for_each_slave_from(bond, slave, i, start_at) { -		int slave_agg_id = 0; -		struct aggregator *agg = SLAVE_AD_INFO(slave).port.aggregator; - -		if (agg) -			slave_agg_id = agg->aggregator_identifier; - -		if (SLAVE_IS_OK(slave) && agg && (slave_agg_id == agg_id)) { -			res = bond_dev_queue_xmit(bond, skb, slave->dev); -			break; -		} -	} +	/* we couldn't find any suitable slave after the agg_no, so use the +	 * first suitable found, if found. +	 */ +	if (first_ok_slave) +		bond_dev_queue_xmit(bond, skb, first_ok_slave->dev); +	else +		goto err_free;  out: -	if (res) { -		/* no suitable interface, frame not sent */ -		dev_kfree_skb(skb); -	} -	read_unlock(&bond->lock);  	return NETDEV_TX_OK; +err_free: +	/* no suitable interface, frame not sent */ +	dev_kfree_skb_any(skb); +	goto out;  } -int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype, struct net_device *orig_dev) +int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond, +			 struct slave *slave)  { -	struct bonding *bond = netdev_priv(dev); -	struct slave *slave = NULL; -	int ret = NET_RX_DROP; +	int ret = RX_HANDLER_ANOTHER; +	struct lacpdu *lacpdu, _lacpdu; -	if (!(dev->flags & IFF_MASTER)) -		goto out; +	if (skb->protocol != PKT_TYPE_LACPDU) +		return ret; -	if (!pskb_may_pull(skb, sizeof(struct lacpdu))) -		goto out; +	lacpdu = skb_header_pointer(skb, 0, sizeof(_lacpdu), &_lacpdu); +	if (!lacpdu) +		return ret;  	read_lock(&bond->lock); -	slave = bond_get_slave_by_dev(netdev_priv(dev), orig_dev); -	if (!slave) -		goto out_unlock; - -	bond_3ad_rx_indication((struct lacpdu *) skb->data, slave, skb->len); - -	ret = NET_RX_SUCCESS; - -out_unlock: +	ret = bond_3ad_rx_indication(lacpdu, slave, skb->len);  	read_unlock(&bond->lock); -out: -	dev_kfree_skb(skb); -  	return ret;  } + +/** + * bond_3ad_update_lacp_rate - change the lacp rate + * @bond - bonding struct + * + * When modify lacp_rate parameter via sysfs, + * update actor_oper_port_state of each port. + * + * Hold slave->state_machine_lock, + * so we can modify port->actor_oper_port_state, + * no matter bond is up or down. + */ +void bond_3ad_update_lacp_rate(struct bonding *bond) +{ +	struct port *port = NULL; +	struct list_head *iter; +	struct slave *slave; +	int lacp_fast; + +	lacp_fast = bond->params.lacp_fast; +	bond_for_each_slave(bond, slave, iter) { +		port = &(SLAVE_AD_INFO(slave)->port); +		__get_state_machine_lock(port); +		if (lacp_fast) +			port->actor_oper_port_state |= AD_STATE_LACP_TIMEOUT; +		else +			port->actor_oper_port_state &= ~AD_STATE_LACP_TIMEOUT; +		__release_state_machine_lock(port); +	} +} diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h index 2c46a154f2c..bb03b1df2f3 100644 --- a/drivers/net/bonding/bond_3ad.h +++ b/drivers/net/bonding/bond_3ad.h @@ -28,7 +28,7 @@  #include <linux/netdevice.h>  #include <linux/if_ether.h> -// General definitions +/* General definitions */  #define PKT_TYPE_LACPDU         cpu_to_be16(ETH_P_SLOW)  #define AD_TIMER_INTERVAL       100 /*msec*/ @@ -39,7 +39,7 @@  typedef struct mac_addr {  	u8 mac_addr_value[ETH_ALEN]; -} mac_addr_t; +} __packed mac_addr_t;  enum {  	BOND_AD_STABLE = 0, @@ -47,54 +47,54 @@ enum {  	BOND_AD_COUNT = 2,  }; -// rx machine states(43.4.11 in the 802.3ad standard) +/* rx machine states(43.4.11 in the 802.3ad standard) */  typedef enum {  	AD_RX_DUMMY, -	AD_RX_INITIALIZE,     // rx Machine -	AD_RX_PORT_DISABLED,  // rx Machine -	AD_RX_LACP_DISABLED,  // rx Machine -	AD_RX_EXPIRED,	      // rx Machine -	AD_RX_DEFAULTED,      // rx Machine -	AD_RX_CURRENT	      // rx Machine +	AD_RX_INITIALIZE,	/* rx Machine */ +	AD_RX_PORT_DISABLED,	/* rx Machine */ +	AD_RX_LACP_DISABLED,	/* rx Machine */ +	AD_RX_EXPIRED,		/* rx Machine */ +	AD_RX_DEFAULTED,	/* rx Machine */ +	AD_RX_CURRENT		/* rx Machine */  } rx_states_t; -// periodic machine states(43.4.12 in the 802.3ad standard) +/* periodic machine states(43.4.12 in the 802.3ad standard) */  typedef enum {  	AD_PERIODIC_DUMMY, -	AD_NO_PERIODIC,	       // periodic machine -	AD_FAST_PERIODIC,      // periodic machine -	AD_SLOW_PERIODIC,      // periodic machine -	AD_PERIODIC_TX	   // periodic machine +	AD_NO_PERIODIC,		/* periodic machine */ +	AD_FAST_PERIODIC,	/* periodic machine */ +	AD_SLOW_PERIODIC,	/* periodic machine */ +	AD_PERIODIC_TX		/* periodic machine */  } periodic_states_t; -// mux machine states(43.4.13 in the 802.3ad standard) +/* mux machine states(43.4.13 in the 802.3ad standard) */  typedef enum {  	AD_MUX_DUMMY, -	AD_MUX_DETACHED,       // mux machine -	AD_MUX_WAITING,	       // mux machine -	AD_MUX_ATTACHED,       // mux machine -	AD_MUX_COLLECTING_DISTRIBUTING // mux machine +	AD_MUX_DETACHED,	/* mux machine */ +	AD_MUX_WAITING,		/* mux machine */ +	AD_MUX_ATTACHED,	/* mux machine */ +	AD_MUX_COLLECTING_DISTRIBUTING	/* mux machine */  } mux_states_t; -// tx machine states(43.4.15 in the 802.3ad standard) +/* tx machine states(43.4.15 in the 802.3ad standard) */  typedef enum {  	AD_TX_DUMMY, -	AD_TRANSMIT	   // tx Machine +	AD_TRANSMIT		/* tx Machine */  } tx_states_t; -// rx indication types +/* rx indication types */  typedef enum { -	AD_TYPE_LACPDU = 1,    // type lacpdu -	AD_TYPE_MARKER	   // type marker +	AD_TYPE_LACPDU = 1,	/* type lacpdu */ +	AD_TYPE_MARKER		/* type marker */  } pdu_type_t; -// rx marker indication types +/* rx marker indication types */  typedef enum { -	AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype -	AD_MARKER_RESPONSE_SUBTYPE     // marker response subtype +	AD_MARKER_INFORMATION_SUBTYPE = 1,	/* marker imformation subtype */ +	AD_MARKER_RESPONSE_SUBTYPE		/* marker response subtype */  } bond_marker_subtype_t; -// timers types(43.4.9 in the 802.3ad standard) +/* timers types(43.4.9 in the 802.3ad standard) */  typedef enum {  	AD_CURRENT_WHILE_TIMER,  	AD_ACTOR_CHURN_TIMER, @@ -105,62 +105,62 @@ typedef enum {  #pragma pack(1) -// Link Aggregation Control Protocol(LACP) data unit structure(43.4.2.2 in the 802.3ad standard) +/* Link Aggregation Control Protocol(LACP) data unit structure(43.4.2.2 in the 802.3ad standard) */  typedef struct lacpdu { -	u8 subtype;		     // = LACP(= 0x01) +	u8 subtype;		/* = LACP(= 0x01) */  	u8 version_number; -	u8 tlv_type_actor_info;	      // = actor information(type/length/value) -	u8 actor_information_length; // = 20 +	u8 tlv_type_actor_info;	/* = actor information(type/length/value) */ +	u8 actor_information_length;	/* = 20 */  	__be16 actor_system_priority;  	struct mac_addr actor_system;  	__be16 actor_key;  	__be16 actor_port_priority;  	__be16 actor_port;  	u8 actor_state; -	u8 reserved_3_1[3];	     // = 0 -	u8 tlv_type_partner_info;     // = partner information -	u8 partner_information_length;	 // = 20 +	u8 reserved_3_1[3];		/* = 0 */ +	u8 tlv_type_partner_info;	/* = partner information */ +	u8 partner_information_length;	/* = 20 */  	__be16 partner_system_priority;  	struct mac_addr partner_system;  	__be16 partner_key;  	__be16 partner_port_priority;  	__be16 partner_port;  	u8 partner_state; -	u8 reserved_3_2[3];	     // = 0 -	u8 tlv_type_collector_info;	  // = collector information -	u8 collector_information_length; // = 16 +	u8 reserved_3_2[3];		/* = 0 */ +	u8 tlv_type_collector_info;	/* = collector information */ +	u8 collector_information_length;/* = 16 */  	__be16 collector_max_delay;  	u8 reserved_12[12]; -	u8 tlv_type_terminator;	     // = terminator -	u8 terminator_length;	     // = 0 -	u8 reserved_50[50];	     // = 0 -} lacpdu_t; +	u8 tlv_type_terminator;		/* = terminator */ +	u8 terminator_length;		/* = 0 */ +	u8 reserved_50[50];		/* = 0 */ +} __packed lacpdu_t;  typedef struct lacpdu_header {  	struct ethhdr hdr;  	struct lacpdu lacpdu; -} lacpdu_header_t; +} __packed lacpdu_header_t; -// Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard) +/* Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard) */  typedef struct bond_marker { -	u8 subtype;		 //  = 0x02  (marker PDU) -	u8 version_number;	 //  = 0x01 -	u8 tlv_type;		 //  = 0x01  (marker information) -	//  = 0x02  (marker response information) -	u8 marker_length;	 //  = 0x16 -	u16 requester_port;	 //   The number assigned to the port by the requester -	struct mac_addr requester_system;      //   The requester's system id -	u32 requester_transaction_id;	//   The transaction id allocated by the requester, -	u16 pad;		 //  = 0 -	u8 tlv_type_terminator;	     //  = 0x00 -	u8 terminator_length;	     //  = 0x00 -	u8 reserved_90[90];	     //  = 0 -} bond_marker_t; +	u8 subtype;		/* = 0x02  (marker PDU) */ +	u8 version_number;	/* = 0x01 */ +	u8 tlv_type;		/* = 0x01  (marker information) */ +	/* = 0x02  (marker response information) */ +	u8 marker_length;	/* = 0x16 */ +	u16 requester_port;	/* The number assigned to the port by the requester */ +	struct mac_addr requester_system;	/* The requester's system id */ +	u32 requester_transaction_id;		/* The transaction id allocated by the requester, */ +	u16 pad;		/* = 0 */ +	u8 tlv_type_terminator;	/* = 0x00 */ +	u8 terminator_length;	/* = 0x00 */ +	u8 reserved_90[90];	/* = 0 */ +} __packed bond_marker_t;  typedef struct bond_marker_header {  	struct ethhdr hdr;  	struct bond_marker marker; -} bond_marker_header_t; +} __packed bond_marker_header_t;  #pragma pack() @@ -173,7 +173,7 @@ struct port;  #pragma pack(8)  #endif -// aggregator structure(43.4.5 in the 802.3ad standard) +/* aggregator structure(43.4.5 in the 802.3ad standard) */  typedef struct aggregator {  	struct mac_addr aggregator_mac_address;  	u16 aggregator_identifier; @@ -183,12 +183,12 @@ typedef struct aggregator {  	struct mac_addr partner_system;  	u16 partner_system_priority;  	u16 partner_oper_aggregator_key; -	u16 receive_state;		// BOOLEAN -	u16 transmit_state;		// BOOLEAN +	u16 receive_state;	/* BOOLEAN */ +	u16 transmit_state;	/* BOOLEAN */  	struct port *lag_ports; -	// ****** PRIVATE PARAMETERS ****** -	struct slave *slave;	    // pointer to the bond slave that this aggregator belongs to -	u16 is_active;	    // BOOLEAN. Indicates if this aggregator is active +	/* ****** PRIVATE PARAMETERS ****** */ +	struct slave *slave;	/* pointer to the bond slave that this aggregator belongs to */ +	u16 is_active;		/* BOOLEAN. Indicates if this aggregator is active */  	u16 num_of_ports;  } aggregator_t; @@ -201,12 +201,12 @@ struct port_params {  	u16 port_state;  }; -// port structure(43.4.6 in the 802.3ad standard) +/* port structure(43.4.6 in the 802.3ad standard) */  typedef struct port {  	u16 actor_port_number;  	u16 actor_port_priority; -	struct mac_addr actor_system;	       // This parameter is added here although it is not specified in the standard, just for simplification -	u16 actor_system_priority;	 // This parameter is added here although it is not specified in the standard, just for simplification +	struct mac_addr actor_system;	/* This parameter is added here although it is not specified in the standard, just for simplification */ +	u16 actor_system_priority;	/* This parameter is added here although it is not specified in the standard, just for simplification */  	u16 actor_port_aggregator_identifier;  	bool ntt;  	u16 actor_admin_port_key; @@ -219,24 +219,24 @@ typedef struct port {  	bool is_enabled; -	// ****** PRIVATE PARAMETERS ****** -	u16 sm_vars;	      // all state machines variables for this port -	rx_states_t sm_rx_state;	// state machine rx state -	u16 sm_rx_timer_counter;    // state machine rx timer counter -	periodic_states_t sm_periodic_state;// state machine periodic state -	u16 sm_periodic_timer_counter;	// state machine periodic timer counter -	mux_states_t sm_mux_state;	// state machine mux state -	u16 sm_mux_timer_counter;   // state machine mux timer counter -	tx_states_t sm_tx_state;	// state machine tx state -	u16 sm_tx_timer_counter;    // state machine tx timer counter(allways on - enter to transmit state 3 time per second) -	struct slave *slave;	    // pointer to the bond slave that this port belongs to -	struct aggregator *aggregator;	   // pointer to an aggregator that this port related to -	struct port *next_port_in_aggregator; // Next port on the linked list of the parent aggregator -	u32 transaction_id;	    // continuous number for identification of Marker PDU's; -	struct lacpdu lacpdu;	       // the lacpdu that will be sent for this port +	/* ****** PRIVATE PARAMETERS ****** */ +	u16 sm_vars;		/* all state machines variables for this port */ +	rx_states_t sm_rx_state;	/* state machine rx state */ +	u16 sm_rx_timer_counter;	/* state machine rx timer counter */ +	periodic_states_t sm_periodic_state;	/* state machine periodic state */ +	u16 sm_periodic_timer_counter;	/* state machine periodic timer counter */ +	mux_states_t sm_mux_state;	/* state machine mux state */ +	u16 sm_mux_timer_counter;	/* state machine mux timer counter */ +	tx_states_t sm_tx_state;	/* state machine tx state */ +	u16 sm_tx_timer_counter;	/* state machine tx timer counter(allways on - enter to transmit state 3 time per second) */ +	struct slave *slave;		/* pointer to the bond slave that this port belongs to */ +	struct aggregator *aggregator;	/* pointer to an aggregator that this port related to */ +	struct port *next_port_in_aggregator;	/* Next port on the linked list of the parent aggregator */ +	u32 transaction_id;		/* continuous number for identification of Marker PDU's; */ +	struct lacpdu lacpdu;		/* the lacpdu that will be sent for this port */  } port_t; -// system structure +/* system structure */  struct ad_system {  	u16 sys_priority;  	struct mac_addr sys_mac_addr; @@ -246,31 +246,26 @@ struct ad_system {  #pragma pack()  #endif -// ================= AD Exported structures to the main bonding code ================== +/* ========== AD Exported structures to the main bonding code ========== */  #define BOND_AD_INFO(bond)   ((bond)->ad_info)  #define SLAVE_AD_INFO(slave) ((slave)->ad_info)  struct ad_bond_info { -	struct ad_system system;	    /* 802.3ad system structure */ -	u32 agg_select_timer;	    // Timer to select aggregator after all adapter's hand shakes -	u32 agg_select_mode;	    // Mode of selection of active aggregator(bandwidth/count) -	int lacp_fast;		/* whether fast periodic tx should be -				 * requested -				 */ -	struct timer_list ad_timer; -	struct packet_type ad_pkt_type; +	struct ad_system system;	/* 802.3ad system structure */ +	u32 agg_select_timer;		/* Timer to select aggregator after all adapter's hand shakes */ +	u16 aggregator_identifier;  };  struct ad_slave_info { -	struct aggregator aggregator;	    // 802.3ad aggregator structure -	struct port port;		    // 802.3ad port structure -	spinlock_t rx_machine_lock; // To avoid race condition between callback and receive interrupt +	struct aggregator aggregator;	/* 802.3ad aggregator structure */ +	struct port port;		/* 802.3ad port structure */ +	spinlock_t state_machine_lock;	/* mutex state machines vs. incoming LACPDU */  	u16 id;  }; -// ================= AD Exported functions to the main bonding code ================== -void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution, int lacp_fast); -int  bond_3ad_bind_slave(struct slave *slave); +/* ========== AD Exported functions to the main bonding code ========== */ +void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution); +void bond_3ad_bind_slave(struct slave *slave);  void bond_3ad_unbind_slave(struct slave *slave);  void bond_3ad_state_machine_handler(struct work_struct *);  void bond_3ad_initiate_agg_selection(struct bonding *bond, int timeout); @@ -278,8 +273,12 @@ void bond_3ad_adapter_speed_changed(struct slave *slave);  void bond_3ad_adapter_duplex_changed(struct slave *slave);  void bond_3ad_handle_link_change(struct slave *slave, char link);  int  bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info); +int  __bond_3ad_get_active_agg_info(struct bonding *bond, +				    struct ad_info *ad_info);  int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev); -int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype, struct net_device *orig_dev); +int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond, +			 struct slave *slave);  int bond_3ad_set_carrier(struct bonding *bond); -#endif //__BOND_3AD_H__ +void bond_3ad_update_lacp_rate(struct bonding *bond); +#endif /* __BOND_3AD_H__ */ diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 26bb118c453..76c0dade233 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -12,8 +12,7 @@   * for more details.   *   * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA. + * with this program; if not, see <http://www.gnu.org/licenses/>.   *   * The full GNU General Public License is included in this distribution in the   * file called LICENSE. @@ -44,42 +43,6 @@  #include "bond_alb.h" -#define ALB_TIMER_TICKS_PER_SEC	    10	/* should be a divisor of HZ */ -#define BOND_TLB_REBALANCE_INTERVAL 10	/* In seconds, periodic re-balancing. -					 * Used for division - never set -					 * to zero !!! -					 */ -#define BOND_ALB_LP_INTERVAL	    1	/* In seconds, periodic send of -					 * learning packets to the switch -					 */ - -#define BOND_TLB_REBALANCE_TICKS (BOND_TLB_REBALANCE_INTERVAL \ -				  * ALB_TIMER_TICKS_PER_SEC) - -#define BOND_ALB_LP_TICKS (BOND_ALB_LP_INTERVAL \ -			   * ALB_TIMER_TICKS_PER_SEC) - -#define TLB_HASH_TABLE_SIZE 256	/* The size of the clients hash table. -				 * Note that this value MUST NOT be smaller -				 * because the key hash table is BYTE wide ! -				 */ - - -#define TLB_NULL_INDEX		0xffffffff -#define MAX_LP_BURST		3 - -/* rlb defs */ -#define RLB_HASH_TABLE_SIZE	256 -#define RLB_NULL_INDEX		0xffffffff -#define RLB_UPDATE_DELAY	2*ALB_TIMER_TICKS_PER_SEC /* 2 seconds */ -#define RLB_ARP_BURST_SIZE	2 -#define RLB_UPDATE_RETRY	3	/* 3-ticks - must be smaller than the rlb -					 * rebalance interval (5 min). -					 */ -/* RLB_PROMISC_TIMEOUT = 10 sec equals the time that the current slave is - * promiscuous after failover - */ -#define RLB_PROMISC_TIMEOUT	10*ALB_TIMER_TICKS_PER_SEC  #ifndef __long_aligned  #define __long_aligned __attribute__((aligned((sizeof(long))))) @@ -119,32 +82,46 @@ static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)  }  /* Forward declaration */ -static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]); +static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], +				      bool strict_match); +static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp); +static void rlb_src_unlink(struct bonding *bond, u32 index); +static void rlb_src_link(struct bonding *bond, u32 ip_src_hash, +			 u32 ip_dst_hash);  static inline u8 _simple_hash(const u8 *hash_start, int hash_size)  {  	int i;  	u8 hash = 0; -	for (i = 0; i < hash_size; i++) { +	for (i = 0; i < hash_size; i++)  		hash ^= hash_start[i]; -	}  	return hash;  }  /*********************** tlb specific functions ***************************/ -static inline void _lock_tx_hashtbl(struct bonding *bond) +static inline void _lock_tx_hashtbl_bh(struct bonding *bond)  {  	spin_lock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));  } -static inline void _unlock_tx_hashtbl(struct bonding *bond) +static inline void _unlock_tx_hashtbl_bh(struct bonding *bond)  {  	spin_unlock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));  } +static inline void _lock_tx_hashtbl(struct bonding *bond) +{ +	spin_lock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); +} + +static inline void _unlock_tx_hashtbl(struct bonding *bond) +{ +	spin_unlock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); +} +  /* Caller must hold tx_hashtbl lock */  static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load)  { @@ -165,14 +142,13 @@ static inline void tlb_init_slave(struct slave *slave)  	SLAVE_TLB_INFO(slave).head = TLB_NULL_INDEX;  } -/* Caller must hold bond lock for read */ -static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_load) +/* Caller must hold bond lock for read, BH disabled */ +static void __tlb_clear_slave(struct bonding *bond, struct slave *slave, +			 int save_load)  {  	struct tlb_client_info *tx_hash_table;  	u32 index; -	_lock_tx_hashtbl(bond); -  	/* clear slave from tx_hashtbl */  	tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl; @@ -187,8 +163,15 @@ static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_  	}  	tlb_init_slave(slave); +} -	_unlock_tx_hashtbl(bond); +/* Caller must hold bond lock for read */ +static void tlb_clear_slave(struct bonding *bond, struct slave *slave, +			 int save_load) +{ +	_lock_tx_hashtbl_bh(bond); +	__tlb_clear_slave(bond, slave, save_load); +	_unlock_tx_hashtbl_bh(bond);  }  /* Must be called before starting the monitor timer */ @@ -199,23 +182,18 @@ static int tlb_initialize(struct bonding *bond)  	struct tlb_client_info *new_hashtbl;  	int i; -	spin_lock_init(&(bond_info->tx_hashtbl_lock)); -  	new_hashtbl = kzalloc(size, GFP_KERNEL); -	if (!new_hashtbl) { -		pr_err("%s: Error: Failed to allocate TLB hash table\n", -		       bond->dev->name); +	if (!new_hashtbl)  		return -1; -	} -	_lock_tx_hashtbl(bond); + +	_lock_tx_hashtbl_bh(bond);  	bond_info->tx_hashtbl = new_hashtbl; -	for (i = 0; i < TLB_HASH_TABLE_SIZE; i++) { -		tlb_init_table_entry(&bond_info->tx_hashtbl[i], 1); -	} +	for (i = 0; i < TLB_HASH_TABLE_SIZE; i++) +		tlb_init_table_entry(&bond_info->tx_hashtbl[i], 0); -	_unlock_tx_hashtbl(bond); +	_unlock_tx_hashtbl_bh(bond);  	return 0;  } @@ -225,12 +203,12 @@ static void tlb_deinitialize(struct bonding *bond)  {  	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); -	_lock_tx_hashtbl(bond); +	_lock_tx_hashtbl_bh(bond);  	kfree(bond_info->tx_hashtbl);  	bond_info->tx_hashtbl = NULL; -	_unlock_tx_hashtbl(bond); +	_unlock_tx_hashtbl_bh(bond);  }  static long long compute_gap(struct slave *slave) @@ -243,15 +221,15 @@ static long long compute_gap(struct slave *slave)  static struct slave *tlb_get_least_loaded_slave(struct bonding *bond)  {  	struct slave *slave, *least_loaded; +	struct list_head *iter;  	long long max_gap; -	int i;  	least_loaded = NULL;  	max_gap = LLONG_MIN;  	/* Find the slave with the largest gap */ -	bond_for_each_slave(bond, slave, i) { -		if (SLAVE_IS_OK(slave)) { +	bond_for_each_slave_rcu(bond, slave, iter) { +		if (bond_slave_can_tx(slave)) {  			long long gap = compute_gap(slave);  			if (max_gap < gap) { @@ -264,15 +242,13 @@ static struct slave *tlb_get_least_loaded_slave(struct bonding *bond)  	return least_loaded;  } -/* Caller must hold bond lock for read */ -static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u32 skb_len) +static struct slave *__tlb_choose_channel(struct bonding *bond, u32 hash_index, +						u32 skb_len)  {  	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));  	struct tlb_client_info *hash_table;  	struct slave *assigned_slave; -	_lock_tx_hashtbl(bond); -  	hash_table = bond_info->tx_hashtbl;  	assigned_slave = hash_table[hash_index].tx_slave;  	if (!assigned_slave) { @@ -287,9 +263,8 @@ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u3  			hash_table[hash_index].next = next_index;  			hash_table[hash_index].prev = TLB_NULL_INDEX; -			if (next_index != TLB_NULL_INDEX) { +			if (next_index != TLB_NULL_INDEX)  				hash_table[next_index].prev = hash_index; -			}  			slave_info->head = hash_index;  			slave_info->load += @@ -297,26 +272,49 @@ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u3  		}  	} -	if (assigned_slave) { +	if (assigned_slave)  		hash_table[hash_index].tx_bytes += skb_len; -	} - -	_unlock_tx_hashtbl(bond);  	return assigned_slave;  } +/* Caller must hold bond lock for read */ +static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, +					u32 skb_len) +{ +	struct slave *tx_slave; +	/* +	 * We don't need to disable softirq here, becase +	 * tlb_choose_channel() is only called by bond_alb_xmit() +	 * which already has softirq disabled. +	 */ +	_lock_tx_hashtbl(bond); +	tx_slave = __tlb_choose_channel(bond, hash_index, skb_len); +	_unlock_tx_hashtbl(bond); +	return tx_slave; +} +  /*********************** rlb specific functions ***************************/ -static inline void _lock_rx_hashtbl(struct bonding *bond) +static inline void _lock_rx_hashtbl_bh(struct bonding *bond)  {  	spin_lock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));  } -static inline void _unlock_rx_hashtbl(struct bonding *bond) +static inline void _unlock_rx_hashtbl_bh(struct bonding *bond)  {  	spin_unlock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));  } +static inline void _lock_rx_hashtbl(struct bonding *bond) +{ +	spin_lock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); +} + +static inline void _unlock_rx_hashtbl(struct bonding *bond) +{ +	spin_unlock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); +} +  /* when an ARP REPLY is received from a client update its info   * in the rx_hashtbl   */ @@ -326,93 +324,119 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)  	struct rlb_client_info *client_info;  	u32 hash_index; -	_lock_rx_hashtbl(bond); +	_lock_rx_hashtbl_bh(bond); -	hash_index = _simple_hash((u8*)&(arp->ip_src), sizeof(arp->ip_src)); +	hash_index = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src));  	client_info = &(bond_info->rx_hashtbl[hash_index]);  	if ((client_info->assigned) &&  	    (client_info->ip_src == arp->ip_dst) &&  	    (client_info->ip_dst == arp->ip_src) && -	    (compare_ether_addr_64bits(client_info->mac_dst, arp->mac_src))) { +	    (!ether_addr_equal_64bits(client_info->mac_dst, arp->mac_src))) {  		/* update the clients MAC address */ -		memcpy(client_info->mac_dst, arp->mac_src, ETH_ALEN); +		ether_addr_copy(client_info->mac_dst, arp->mac_src);  		client_info->ntt = 1;  		bond_info->rx_ntt = 1;  	} -	_unlock_rx_hashtbl(bond); +	_unlock_rx_hashtbl_bh(bond);  } -static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct packet_type *ptype, struct net_device *orig_dev) +static int rlb_arp_recv(const struct sk_buff *skb, struct bonding *bond, +			struct slave *slave)  { -	struct bonding *bond; -	struct arp_pkt *arp = (struct arp_pkt *)skb->data; -	int res = NET_RX_DROP; - -	while (bond_dev->priv_flags & IFF_802_1Q_VLAN) -		bond_dev = vlan_dev_real_dev(bond_dev); +	struct arp_pkt *arp, _arp; -	if (!(bond_dev->priv_flags & IFF_BONDING) || -	    !(bond_dev->flags & IFF_MASTER)) +	if (skb->protocol != cpu_to_be16(ETH_P_ARP))  		goto out; -	if (!arp) { -		pr_debug("Packet has no ARP data\n"); -		goto out; -	} - -	if (!pskb_may_pull(skb, arp_hdr_len(bond_dev))) +	arp = skb_header_pointer(skb, 0, sizeof(_arp), &_arp); +	if (!arp)  		goto out; -	if (skb->len < sizeof(struct arp_pkt)) { -		pr_debug("Packet is too small to be an ARP\n"); -		goto out; -	} +	/* We received an ARP from arp->ip_src. +	 * We might have used this IP address previously (on the bonding host +	 * itself or on a system that is bridged together with the bond). +	 * However, if arp->mac_src is different than what is stored in +	 * rx_hashtbl, some other host is now using the IP and we must prevent +	 * sending out client updates with this IP address and the old MAC +	 * address. +	 * Clean up all hash table entries that have this address as ip_src but +	 * have a different mac_src. +	 */ +	rlb_purge_src_ip(bond, arp);  	if (arp->op_code == htons(ARPOP_REPLY)) {  		/* update rx hash table for this ARP */ -		bond = netdev_priv(bond_dev);  		rlb_update_entry_from_arp(bond, arp);  		pr_debug("Server received an ARP Reply from client\n");  	} - -	res = NET_RX_SUCCESS; -  out: -	dev_kfree_skb(skb); - -	return res; +	return RX_HANDLER_ANOTHER;  }  /* Caller must hold bond lock for read */  static struct slave *rlb_next_rx_slave(struct bonding *bond)  {  	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); -	struct slave *rx_slave, *slave, *start_at; -	int i = 0; +	struct slave *before = NULL, *rx_slave = NULL, *slave; +	struct list_head *iter; +	bool found = false; -	if (bond_info->next_rx_slave) { -		start_at = bond_info->next_rx_slave; -	} else { -		start_at = bond->first_slave; +	bond_for_each_slave(bond, slave, iter) { +		if (!bond_slave_can_tx(slave)) +			continue; +		if (!found) { +			if (!before || before->speed < slave->speed) +				before = slave; +		} else { +			if (!rx_slave || rx_slave->speed < slave->speed) +				rx_slave = slave; +		} +		if (slave == bond_info->rx_slave) +			found = true;  	} +	/* we didn't find anything after the current or we have something +	 * better before and up to the current slave +	 */ +	if (!rx_slave || (before && rx_slave->speed < before->speed)) +		rx_slave = before; -	rx_slave = NULL; +	if (rx_slave) +		bond_info->rx_slave = rx_slave; -	bond_for_each_slave_from(bond, slave, i, start_at) { -		if (SLAVE_IS_OK(slave)) { -			if (!rx_slave) { -				rx_slave = slave; -			} else if (slave->speed > rx_slave->speed) { +	return rx_slave; +} + +/* Caller must hold rcu_read_lock() for read */ +static struct slave *__rlb_next_rx_slave(struct bonding *bond) +{ +	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); +	struct slave *before = NULL, *rx_slave = NULL, *slave; +	struct list_head *iter; +	bool found = false; + +	bond_for_each_slave_rcu(bond, slave, iter) { +		if (!bond_slave_can_tx(slave)) +			continue; +		if (!found) { +			if (!before || before->speed < slave->speed) +				before = slave; +		} else { +			if (!rx_slave || rx_slave->speed < slave->speed)  				rx_slave = slave; -			}  		} +		if (slave == bond_info->rx_slave) +			found = true;  	} +	/* we didn't find anything after the current or we have something +	 * better before and up to the current slave +	 */ +	if (!rx_slave || (before && rx_slave->speed < before->speed)) +		rx_slave = before; -	if (rx_slave) { -		bond_info->next_rx_slave = rx_slave->next; -	} +	if (rx_slave) +		bond_info->rx_slave = rx_slave;  	return rx_slave;  } @@ -424,9 +448,8 @@ static struct slave *rlb_next_rx_slave(struct bonding *bond)   */  static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[])  { -	if (!bond->curr_active_slave) { +	if (!bond->curr_active_slave)  		return; -	}  	if (!bond->alb_info.primary_is_promisc) {  		if (!dev_set_promiscuity(bond->curr_active_slave->dev, 1)) @@ -437,12 +460,12 @@ static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[])  	bond->alb_info.rlb_promisc_timeout_counter = 0; -	alb_send_learning_packets(bond->curr_active_slave, addr); +	alb_send_learning_packets(bond->curr_active_slave, addr, true);  }  /* slave being removed should not be active at this point   * - * Caller must hold bond lock for read + * Caller must hold rtnl.   */  static void rlb_clear_slave(struct bonding *bond, struct slave *slave)  { @@ -451,19 +474,19 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)  	u32 index, next_index;  	/* clear slave from rx_hashtbl */ -	_lock_rx_hashtbl(bond); +	_lock_rx_hashtbl_bh(bond);  	rx_hash_table = bond_info->rx_hashtbl; -	index = bond_info->rx_hashtbl_head; +	index = bond_info->rx_hashtbl_used_head;  	for (; index != RLB_NULL_INDEX; index = next_index) { -		next_index = rx_hash_table[index].next; +		next_index = rx_hash_table[index].used_next;  		if (rx_hash_table[index].slave == slave) {  			struct slave *assigned_slave = rlb_next_rx_slave(bond);  			if (assigned_slave) {  				rx_hash_table[index].slave = assigned_slave; -				if (compare_ether_addr_64bits(rx_hash_table[index].mac_dst, -							      mac_bcast)) { +				if (!ether_addr_equal_64bits(rx_hash_table[index].mac_dst, +							     mac_bcast)) {  					bond_info->rx_hashtbl[index].ntt = 1;  					bond_info->rx_ntt = 1;  					/* A slave has been removed from the @@ -482,13 +505,12 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)  		}  	} -	_unlock_rx_hashtbl(bond); +	_unlock_rx_hashtbl_bh(bond);  	write_lock_bh(&bond->curr_slave_lock); -	if (slave != bond->curr_active_slave) { +	if (slave != bond->curr_active_slave)  		rlb_teach_disabled_mac_on_primary(bond, slave->dev->dev_addr); -	}  	write_unlock_bh(&bond->curr_slave_lock);  } @@ -497,9 +519,8 @@ static void rlb_update_client(struct rlb_client_info *client_info)  {  	int i; -	if (!client_info->slave) { +	if (!client_info->slave)  		return; -	}  	for (i = 0; i < RLB_ARP_BURST_SIZE; i++) {  		struct sk_buff *skb; @@ -513,17 +534,17 @@ static void rlb_update_client(struct rlb_client_info *client_info)  				 client_info->mac_dst);  		if (!skb) {  			pr_err("%s: Error: failed to create an ARP packet\n", -			       client_info->slave->dev->master->name); +			       client_info->slave->bond->dev->name);  			continue;  		}  		skb->dev = client_info->slave->dev; -		if (client_info->tag) { -			skb = vlan_put_tag(skb, client_info->vlan_id); +		if (client_info->vlan_id) { +			skb = vlan_put_tag(skb, htons(ETH_P_8021Q), client_info->vlan_id);  			if (!skb) {  				pr_err("%s: Error: failed to insert VLAN tag\n", -				       client_info->slave->dev->master->name); +				       client_info->slave->bond->dev->name);  				continue;  			}  		} @@ -539,16 +560,16 @@ static void rlb_update_rx_clients(struct bonding *bond)  	struct rlb_client_info *client_info;  	u32 hash_index; -	_lock_rx_hashtbl(bond); +	_lock_rx_hashtbl_bh(bond); -	hash_index = bond_info->rx_hashtbl_head; -	for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { +	hash_index = bond_info->rx_hashtbl_used_head; +	for (; hash_index != RLB_NULL_INDEX; +	     hash_index = client_info->used_next) {  		client_info = &(bond_info->rx_hashtbl[hash_index]);  		if (client_info->ntt) {  			rlb_update_client(client_info); -			if (bond_info->rlb_update_retry_counter == 0) { +			if (bond_info->rlb_update_retry_counter == 0)  				client_info->ntt = 0; -			}  		}  	} @@ -557,7 +578,7 @@ static void rlb_update_rx_clients(struct bonding *bond)  	 */  	bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY; -	_unlock_rx_hashtbl(bond); +	_unlock_rx_hashtbl_bh(bond);  }  /* The slave was assigned a new mac address - update the clients */ @@ -568,27 +589,28 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla  	int ntt = 0;  	u32 hash_index; -	_lock_rx_hashtbl(bond); +	_lock_rx_hashtbl_bh(bond); -	hash_index = bond_info->rx_hashtbl_head; -	for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { +	hash_index = bond_info->rx_hashtbl_used_head; +	for (; hash_index != RLB_NULL_INDEX; +	     hash_index = client_info->used_next) {  		client_info = &(bond_info->rx_hashtbl[hash_index]);  		if ((client_info->slave == slave) && -		    compare_ether_addr_64bits(client_info->mac_dst, mac_bcast)) { +		    !ether_addr_equal_64bits(client_info->mac_dst, mac_bcast)) {  			client_info->ntt = 1;  			ntt = 1;  		}  	} -	// update the team's flag only after the whole iteration +	/* update the team's flag only after the whole iteration */  	if (ntt) {  		bond_info->rx_ntt = 1; -		//fasten the change +		/* fasten the change */  		bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY;  	} -	_unlock_rx_hashtbl(bond); +	_unlock_rx_hashtbl_bh(bond);  }  /* mark all clients using src_ip to be updated */ @@ -600,8 +622,9 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)  	_lock_rx_hashtbl(bond); -	hash_index = bond_info->rx_hashtbl_head; -	for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { +	hash_index = bond_info->rx_hashtbl_used_head; +	for (; hash_index != RLB_NULL_INDEX; +	     hash_index = client_info->used_next) {  		client_info = &(bond_info->rx_hashtbl[hash_index]);  		if (!client_info->slave) { @@ -614,9 +637,9 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)  		 * unicast mac address.  		 */  		if ((client_info->ip_src == src_ip) && -		    compare_ether_addr_64bits(client_info->slave->dev->dev_addr, -			   bond->dev->dev_addr) && -		    compare_ether_addr_64bits(client_info->mac_dst, mac_bcast)) { +		    !ether_addr_equal_64bits(client_info->slave->dev->dev_addr, +					     bond->dev->dev_addr) && +		    !ether_addr_equal_64bits(client_info->mac_dst, mac_bcast)) {  			client_info->ntt = 1;  			bond_info->rx_ntt = 1;  		} @@ -630,23 +653,26 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon  {  	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));  	struct arp_pkt *arp = arp_pkt(skb); -	struct slave *assigned_slave; +	struct slave *assigned_slave, *curr_active_slave;  	struct rlb_client_info *client_info;  	u32 hash_index = 0;  	_lock_rx_hashtbl(bond); -	hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_src)); +	curr_active_slave = rcu_dereference(bond->curr_active_slave); + +	hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_dst));  	client_info = &(bond_info->rx_hashtbl[hash_index]);  	if (client_info->assigned) {  		if ((client_info->ip_src == arp->ip_src) &&  		    (client_info->ip_dst == arp->ip_dst)) {  			/* the entry is already assigned to this client */ -			if (compare_ether_addr_64bits(arp->mac_dst, mac_bcast)) { +			if (!ether_addr_equal_64bits(arp->mac_dst, mac_bcast)) {  				/* update mac address from arp */ -				memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN); +				ether_addr_copy(client_info->mac_dst, arp->mac_dst);  			} +			ether_addr_copy(client_info->mac_src, arp->mac_src);  			assigned_slave = client_info->slave;  			if (assigned_slave) { @@ -659,43 +685,53 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon  			 * that the new client can be assigned to this entry.  			 */  			if (bond->curr_active_slave && -			    client_info->slave != bond->curr_active_slave) { -				client_info->slave = bond->curr_active_slave; +			    client_info->slave != curr_active_slave) { +				client_info->slave = curr_active_slave;  				rlb_update_client(client_info);  			}  		}  	}  	/* assign a new slave */ -	assigned_slave = rlb_next_rx_slave(bond); +	assigned_slave = __rlb_next_rx_slave(bond);  	if (assigned_slave) { +		if (!(client_info->assigned && +		      client_info->ip_src == arp->ip_src)) { +			/* ip_src is going to be updated, +			 * fix the src hash list +			 */ +			u32 hash_src = _simple_hash((u8 *)&arp->ip_src, +						    sizeof(arp->ip_src)); +			rlb_src_unlink(bond, hash_index); +			rlb_src_link(bond, hash_src, hash_index); +		} +  		client_info->ip_src = arp->ip_src;  		client_info->ip_dst = arp->ip_dst;  		/* arp->mac_dst is broadcast for arp reqeusts.  		 * will be updated with clients actual unicast mac address  		 * upon receiving an arp reply.  		 */ -		memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN); +		ether_addr_copy(client_info->mac_dst, arp->mac_dst); +		ether_addr_copy(client_info->mac_src, arp->mac_src);  		client_info->slave = assigned_slave; -		if (compare_ether_addr_64bits(client_info->mac_dst, mac_bcast)) { +		if (!ether_addr_equal_64bits(client_info->mac_dst, mac_bcast)) {  			client_info->ntt = 1;  			bond->alb_info.rx_ntt = 1;  		} else {  			client_info->ntt = 0;  		} -		if (bond->vlgrp) { -			if (!vlan_get_tag(skb, &client_info->vlan_id)) -				client_info->tag = 1; -		} +		if (vlan_get_tag(skb, &client_info->vlan_id)) +			client_info->vlan_id = 0;  		if (!client_info->assigned) { -			u32 prev_tbl_head = bond_info->rx_hashtbl_head; -			bond_info->rx_hashtbl_head = hash_index; -			client_info->next = prev_tbl_head; +			u32 prev_tbl_head = bond_info->rx_hashtbl_used_head; +			bond_info->rx_hashtbl_used_head = hash_index; +			client_info->used_next = prev_tbl_head;  			if (prev_tbl_head != RLB_NULL_INDEX) { -				bond_info->rx_hashtbl[prev_tbl_head].prev = +				bond_info->rx_hashtbl[prev_tbl_head].used_prev =  					hash_index;  			}  			client_info->assigned = 1; @@ -716,14 +752,19 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)  	struct arp_pkt *arp = arp_pkt(skb);  	struct slave *tx_slave = NULL; +	/* Don't modify or load balance ARPs that do not originate locally +	 * (e.g.,arrive via a bridge). +	 */ +	if (!bond_slave_has_mac_rx(bond, arp->mac_src)) +		return NULL; +  	if (arp->op_code == htons(ARPOP_REPLY)) {  		/* the arp must be sent on the selected  		* rx channel  		*/  		tx_slave = rlb_choose_channel(skb, bond); -		if (tx_slave) { -			memcpy(arp->mac_src,tx_slave->dev->dev_addr, ETH_ALEN); -		} +		if (tx_slave) +			ether_addr_copy(arp->mac_src, tx_slave->dev->dev_addr);  		pr_debug("Server sent ARP Reply packet\n");  	} else if (arp->op_code == htons(ARPOP_REQUEST)) {  		/* Create an entry in the rx_hashtbl for this client as a @@ -733,7 +774,7 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)  		 */  		rlb_choose_channel(skb, bond); -		/* The ARP relpy packets must be delayed so that +		/* The ARP reply packets must be delayed so that  		 * they can cancel out the influence of the ARP request.  		 */  		bond->alb_info.rlb_update_delay_counter = RLB_UPDATE_DELAY; @@ -759,13 +800,14 @@ static void rlb_rebalance(struct bonding *bond)  	int ntt;  	u32 hash_index; -	_lock_rx_hashtbl(bond); +	_lock_rx_hashtbl_bh(bond);  	ntt = 0; -	hash_index = bond_info->rx_hashtbl_head; -	for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { +	hash_index = bond_info->rx_hashtbl_used_head; +	for (; hash_index != RLB_NULL_INDEX; +	     hash_index = client_info->used_next) {  		client_info = &(bond_info->rx_hashtbl[hash_index]); -		assigned_slave = rlb_next_rx_slave(bond); +		assigned_slave = __rlb_next_rx_slave(bond);  		if (assigned_slave && (client_info->slave != assigned_slave)) {  			client_info->slave = assigned_slave;  			client_info->ntt = 1; @@ -774,55 +816,145 @@ static void rlb_rebalance(struct bonding *bond)  	}  	/* update the team's flag only after the whole iteration */ -	if (ntt) { +	if (ntt)  		bond_info->rx_ntt = 1; -	} -	_unlock_rx_hashtbl(bond); +	_unlock_rx_hashtbl_bh(bond);  }  /* Caller must hold rx_hashtbl lock */ +static void rlb_init_table_entry_dst(struct rlb_client_info *entry) +{ +	entry->used_next = RLB_NULL_INDEX; +	entry->used_prev = RLB_NULL_INDEX; +	entry->assigned = 0; +	entry->slave = NULL; +	entry->vlan_id = 0; +} +static void rlb_init_table_entry_src(struct rlb_client_info *entry) +{ +	entry->src_first = RLB_NULL_INDEX; +	entry->src_prev = RLB_NULL_INDEX; +	entry->src_next = RLB_NULL_INDEX; +} +  static void rlb_init_table_entry(struct rlb_client_info *entry)  {  	memset(entry, 0, sizeof(struct rlb_client_info)); -	entry->next = RLB_NULL_INDEX; -	entry->prev = RLB_NULL_INDEX; +	rlb_init_table_entry_dst(entry); +	rlb_init_table_entry_src(entry); +} + +static void rlb_delete_table_entry_dst(struct bonding *bond, u32 index) +{ +	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); +	u32 next_index = bond_info->rx_hashtbl[index].used_next; +	u32 prev_index = bond_info->rx_hashtbl[index].used_prev; + +	if (index == bond_info->rx_hashtbl_used_head) +		bond_info->rx_hashtbl_used_head = next_index; +	if (prev_index != RLB_NULL_INDEX) +		bond_info->rx_hashtbl[prev_index].used_next = next_index; +	if (next_index != RLB_NULL_INDEX) +		bond_info->rx_hashtbl[next_index].used_prev = prev_index; +} + +/* unlink a rlb hash table entry from the src list */ +static void rlb_src_unlink(struct bonding *bond, u32 index) +{ +	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); +	u32 next_index = bond_info->rx_hashtbl[index].src_next; +	u32 prev_index = bond_info->rx_hashtbl[index].src_prev; + +	bond_info->rx_hashtbl[index].src_next = RLB_NULL_INDEX; +	bond_info->rx_hashtbl[index].src_prev = RLB_NULL_INDEX; + +	if (next_index != RLB_NULL_INDEX) +		bond_info->rx_hashtbl[next_index].src_prev = prev_index; + +	if (prev_index == RLB_NULL_INDEX) +		return; + +	/* is prev_index pointing to the head of this list? */ +	if (bond_info->rx_hashtbl[prev_index].src_first == index) +		bond_info->rx_hashtbl[prev_index].src_first = next_index; +	else +		bond_info->rx_hashtbl[prev_index].src_next = next_index; + +} + +static void rlb_delete_table_entry(struct bonding *bond, u32 index) +{ +	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); +	struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]); + +	rlb_delete_table_entry_dst(bond, index); +	rlb_init_table_entry_dst(entry); + +	rlb_src_unlink(bond, index); +} + +/* add the rx_hashtbl[ip_dst_hash] entry to the list + * of entries with identical ip_src_hash + */ +static void rlb_src_link(struct bonding *bond, u32 ip_src_hash, u32 ip_dst_hash) +{ +	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); +	u32 next; + +	bond_info->rx_hashtbl[ip_dst_hash].src_prev = ip_src_hash; +	next = bond_info->rx_hashtbl[ip_src_hash].src_first; +	bond_info->rx_hashtbl[ip_dst_hash].src_next = next; +	if (next != RLB_NULL_INDEX) +		bond_info->rx_hashtbl[next].src_prev = ip_dst_hash; +	bond_info->rx_hashtbl[ip_src_hash].src_first = ip_dst_hash; +} + +/* deletes all rx_hashtbl entries with  arp->ip_src if their mac_src does + * not match arp->mac_src */ +static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp) +{ +	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); +	u32 ip_src_hash = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src)); +	u32 index; + +	_lock_rx_hashtbl_bh(bond); + +	index = bond_info->rx_hashtbl[ip_src_hash].src_first; +	while (index != RLB_NULL_INDEX) { +		struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]); +		u32 next_index = entry->src_next; +		if (entry->ip_src == arp->ip_src && +		    !ether_addr_equal_64bits(arp->mac_src, entry->mac_src)) +				rlb_delete_table_entry(bond, index); +		index = next_index; +	} +	_unlock_rx_hashtbl_bh(bond);  }  static int rlb_initialize(struct bonding *bond)  {  	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); -	struct packet_type *pk_type = &(BOND_ALB_INFO(bond).rlb_pkt_type);  	struct rlb_client_info	*new_hashtbl;  	int size = RLB_HASH_TABLE_SIZE * sizeof(struct rlb_client_info);  	int i; -	spin_lock_init(&(bond_info->rx_hashtbl_lock)); -  	new_hashtbl = kmalloc(size, GFP_KERNEL); -	if (!new_hashtbl) { -		pr_err("%s: Error: Failed to allocate RLB hash table\n", -		       bond->dev->name); +	if (!new_hashtbl)  		return -1; -	} -	_lock_rx_hashtbl(bond); + +	_lock_rx_hashtbl_bh(bond);  	bond_info->rx_hashtbl = new_hashtbl; -	bond_info->rx_hashtbl_head = RLB_NULL_INDEX; +	bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX; -	for (i = 0; i < RLB_HASH_TABLE_SIZE; i++) { +	for (i = 0; i < RLB_HASH_TABLE_SIZE; i++)  		rlb_init_table_entry(bond_info->rx_hashtbl + i); -	} - -	_unlock_rx_hashtbl(bond); -	/*initialize packet type*/ -	pk_type->type = cpu_to_be16(ETH_P_ARP); -	pk_type->dev = bond->dev; -	pk_type->func = rlb_arp_recv; +	_unlock_rx_hashtbl_bh(bond);  	/* register to receive ARPs */ -	dev_add_pack(pk_type); +	bond->recv_probe = rlb_arp_recv;  	return 0;  } @@ -831,15 +963,13 @@ static void rlb_deinitialize(struct bonding *bond)  {  	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); -	dev_remove_pack(&(bond_info->rlb_pkt_type)); - -	_lock_rx_hashtbl(bond); +	_lock_rx_hashtbl_bh(bond);  	kfree(bond_info->rx_hashtbl);  	bond_info->rx_hashtbl = NULL; -	bond_info->rx_hashtbl_head = RLB_NULL_INDEX; +	bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX; -	_unlock_rx_hashtbl(bond); +	_unlock_rx_hashtbl_bh(bond);  }  static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id) @@ -847,100 +977,111 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)  	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));  	u32 curr_index; -	_lock_rx_hashtbl(bond); +	_lock_rx_hashtbl_bh(bond); -	curr_index = bond_info->rx_hashtbl_head; +	curr_index = bond_info->rx_hashtbl_used_head;  	while (curr_index != RLB_NULL_INDEX) {  		struct rlb_client_info *curr = &(bond_info->rx_hashtbl[curr_index]); -		u32 next_index = bond_info->rx_hashtbl[curr_index].next; -		u32 prev_index = bond_info->rx_hashtbl[curr_index].prev; +		u32 next_index = bond_info->rx_hashtbl[curr_index].used_next; -		if (curr->tag && (curr->vlan_id == vlan_id)) { -			if (curr_index == bond_info->rx_hashtbl_head) { -				bond_info->rx_hashtbl_head = next_index; -			} -			if (prev_index != RLB_NULL_INDEX) { -				bond_info->rx_hashtbl[prev_index].next = next_index; -			} -			if (next_index != RLB_NULL_INDEX) { -				bond_info->rx_hashtbl[next_index].prev = prev_index; -			} - -			rlb_init_table_entry(curr); -		} +		if (curr->vlan_id == vlan_id) +			rlb_delete_table_entry(bond, curr_index);  		curr_index = next_index;  	} -	_unlock_rx_hashtbl(bond); +	_unlock_rx_hashtbl_bh(bond);  }  /*********************** tlb/rlb shared functions *********************/ -static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]) +static void alb_send_lp_vid(struct slave *slave, u8 mac_addr[], +			    __be16 vlan_proto, u16 vid)  { -	struct bonding *bond = bond_get_bond_by_slave(slave);  	struct learning_pkt pkt; +	struct sk_buff *skb;  	int size = sizeof(struct learning_pkt); -	int i; +	char *data;  	memset(&pkt, 0, size); -	memcpy(pkt.mac_dst, mac_addr, ETH_ALEN); -	memcpy(pkt.mac_src, mac_addr, ETH_ALEN); -	pkt.type = cpu_to_be16(ETH_P_LOOP); +	ether_addr_copy(pkt.mac_dst, mac_addr); +	ether_addr_copy(pkt.mac_src, mac_addr); +	pkt.type = cpu_to_be16(ETH_P_LOOPBACK); -	for (i = 0; i < MAX_LP_BURST; i++) { -		struct sk_buff *skb; -		char *data; +	skb = dev_alloc_skb(size); +	if (!skb) +		return; + +	data = skb_put(skb, size); +	memcpy(data, &pkt, size); -		skb = dev_alloc_skb(size); +	skb_reset_mac_header(skb); +	skb->network_header = skb->mac_header + ETH_HLEN; +	skb->protocol = pkt.type; +	skb->priority = TC_PRIO_CONTROL; +	skb->dev = slave->dev; + +	if (vid) { +		skb = vlan_put_tag(skb, vlan_proto, vid);  		if (!skb) { +			pr_err("%s: Error: failed to insert VLAN tag\n", +			       slave->bond->dev->name);  			return;  		} +	} -		data = skb_put(skb, size); -		memcpy(data, &pkt, size); - -		skb_reset_mac_header(skb); -		skb->network_header = skb->mac_header + ETH_HLEN; -		skb->protocol = pkt.type; -		skb->priority = TC_PRIO_CONTROL; -		skb->dev = slave->dev; - -		if (bond->vlgrp) { -			struct vlan_entry *vlan; +	dev_queue_xmit(skb); +} -			vlan = bond_next_vlan(bond, -					      bond->alb_info.current_alb_vlan); +static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], +				      bool strict_match) +{ +	struct bonding *bond = bond_get_bond_by_slave(slave); +	struct net_device *upper; +	struct list_head *iter; +	struct bond_vlan_tag tags[BOND_MAX_VLAN_ENCAP]; -			bond->alb_info.current_alb_vlan = vlan; -			if (!vlan) { -				kfree_skb(skb); -				continue; -			} +	/* send untagged */ +	alb_send_lp_vid(slave, mac_addr, 0, 0); -			skb = vlan_put_tag(skb, vlan->vlan_id); -			if (!skb) { -				pr_err("%s: Error: failed to insert VLAN tag\n", -				       bond->dev->name); -				continue; +	/* loop through all devices and see if we need to send a packet +	 * for that device. +	 */ +	rcu_read_lock(); +	netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) { +		if (is_vlan_dev(upper) && vlan_get_encap_level(upper) == 0) { +			if (strict_match && +			    ether_addr_equal_64bits(mac_addr, +						    upper->dev_addr)) { +				alb_send_lp_vid(slave, mac_addr, +						vlan_dev_vlan_proto(upper), +						vlan_dev_vlan_id(upper)); +			} else if (!strict_match) { +				alb_send_lp_vid(slave, upper->dev_addr, +						vlan_dev_vlan_proto(upper), +						vlan_dev_vlan_id(upper));  			}  		} -		dev_queue_xmit(skb); +		/* If this is a macvlan device, then only send updates +		 * when strict_match is turned off. +		 */ +		if (netif_is_macvlan(upper) && !strict_match) { +			memset(tags, 0, sizeof(tags)); +			bond_verify_device_path(bond->dev, upper, tags); +			alb_send_lp_vid(slave, upper->dev_addr, +					tags[0].vlan_proto, tags[0].vlan_id); +		}  	} +	rcu_read_unlock();  } -/* hw is a boolean parameter that determines whether we should try and - * set the hw address of the device as well as the hw address of the - * net_device - */ -static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[], int hw) +static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[])  {  	struct net_device *dev = slave->dev;  	struct sockaddr s_addr; -	if (!hw) { +	if (BOND_MODE(slave->bond) == BOND_MODE_TLB) {  		memcpy(dev->dev_addr, addr, dev->addr_len);  		return 0;  	} @@ -952,7 +1093,7 @@ static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[], int hw)  	if (dev_set_mac_address(dev, &s_addr)) {  		pr_err("%s: Error: dev_set_mac_address of dev %s failed!\n"  		       "ALB mode requires that the base driver support setting the hw address also when the network device's interface is open\n", -		       dev->master->name, dev->name); +		       slave->bond->dev->name, dev->name);  		return -EOPNOTSUPP;  	}  	return 0; @@ -965,13 +1106,13 @@ static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[], int hw)   *   */ -static void alb_swap_mac_addr(struct bonding *bond, struct slave *slave1, struct slave *slave2) +static void alb_swap_mac_addr(struct slave *slave1, struct slave *slave2)  {  	u8 tmp_mac_addr[ETH_ALEN]; -	memcpy(tmp_mac_addr, slave1->dev->dev_addr, ETH_ALEN); -	alb_set_slave_mac_addr(slave1, slave2->dev->dev_addr, bond->alb_info.rlb_enabled); -	alb_set_slave_mac_addr(slave2, tmp_mac_addr, bond->alb_info.rlb_enabled); +	ether_addr_copy(tmp_mac_addr, slave1->dev->dev_addr); +	alb_set_slave_mac_addr(slave1, slave2->dev->dev_addr); +	alb_set_slave_mac_addr(slave2, tmp_mac_addr);  } @@ -983,14 +1124,14 @@ static void alb_swap_mac_addr(struct bonding *bond, struct slave *slave1, struct  static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1,  				struct slave *slave2)  { -	int slaves_state_differ = (SLAVE_IS_OK(slave1) != SLAVE_IS_OK(slave2)); +	int slaves_state_differ = (bond_slave_can_tx(slave1) != bond_slave_can_tx(slave2));  	struct slave *disabled_slave = NULL;  	ASSERT_RTNL();  	/* fasten the change in the switch */ -	if (SLAVE_IS_OK(slave1)) { -		alb_send_learning_packets(slave1, slave1->dev->dev_addr); +	if (bond_slave_can_tx(slave1)) { +		alb_send_learning_packets(slave1, slave1->dev->dev_addr, false);  		if (bond->alb_info.rlb_enabled) {  			/* inform the clients that the mac address  			 * has changed @@ -1001,8 +1142,8 @@ static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1,  		disabled_slave = slave1;  	} -	if (SLAVE_IS_OK(slave2)) { -		alb_send_learning_packets(slave2, slave2->dev->dev_addr); +	if (bond_slave_can_tx(slave2)) { +		alb_send_learning_packets(slave2, slave2->dev->dev_addr, false);  		if (bond->alb_info.rlb_enabled) {  			/* inform the clients that the mac address  			 * has changed @@ -1038,28 +1179,20 @@ static void alb_change_hw_addr_on_detach(struct bonding *bond, struct slave *sla  {  	int perm_curr_diff;  	int perm_bond_diff; +	struct slave *found_slave; -	perm_curr_diff = compare_ether_addr_64bits(slave->perm_hwaddr, -						   slave->dev->dev_addr); -	perm_bond_diff = compare_ether_addr_64bits(slave->perm_hwaddr, -						   bond->dev->dev_addr); +	perm_curr_diff = !ether_addr_equal_64bits(slave->perm_hwaddr, +						  slave->dev->dev_addr); +	perm_bond_diff = !ether_addr_equal_64bits(slave->perm_hwaddr, +						  bond->dev->dev_addr);  	if (perm_curr_diff && perm_bond_diff) { -		struct slave *tmp_slave; -		int i, found = 0; - -		bond_for_each_slave(bond, tmp_slave, i) { -			if (!compare_ether_addr_64bits(slave->perm_hwaddr, -						       tmp_slave->dev->dev_addr)) { -				found = 1; -				break; -			} -		} +		found_slave = bond_slave_has_mac(bond, slave->perm_hwaddr); -		if (found) { +		if (found_slave) {  			/* locking: needs RTNL and nothing else */ -			alb_swap_mac_addr(bond, slave, tmp_slave); -			alb_fasten_mac_swap(bond, slave, tmp_slave); +			alb_swap_mac_addr(slave, found_slave); +			alb_fasten_mac_swap(bond, slave, found_slave);  		}  	}  } @@ -1074,7 +1207,7 @@ static void alb_change_hw_addr_on_detach(struct bonding *bond, struct slave *sla   *   * If the permanent hw address of @slave is @bond's hw address, we need to   * find a different hw address to give @slave, that isn't in use by any other - * slave in the bond. This address must be, of course, one of the premanent + * slave in the bond. This address must be, of course, one of the permanent   * addresses of the other slaves.   *   * We go over the slave list, and for each slave there we compare its @@ -1084,18 +1217,15 @@ static void alb_change_hw_addr_on_detach(struct bonding *bond, struct slave *sla   * @slave.   *   * assumption: this function is called before @slave is attached to the - * 	       bond slave list. - * - * caller must hold the bond lock for write since the mac addresses are compared - * and may be swapped. + *	       bond slave list.   */  static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slave *slave)  { -	struct slave *tmp_slave1, *tmp_slave2, *free_mac_slave;  	struct slave *has_bond_addr = bond->curr_active_slave; -	int i, j, found = 0; +	struct slave *tmp_slave1, *free_mac_slave = NULL; +	struct list_head *iter; -	if (bond->slave_cnt == 0) { +	if (!bond_has_slaves(bond)) {  		/* this is the first slave */  		return 0;  	} @@ -1104,40 +1234,20 @@ static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slav  	 * check uniqueness of slave's mac address against the other  	 * slaves in the bond.  	 */ -	if (compare_ether_addr_64bits(slave->perm_hwaddr, bond->dev->dev_addr)) { -		bond_for_each_slave(bond, tmp_slave1, i) { -			if (!compare_ether_addr_64bits(tmp_slave1->dev->dev_addr, -						       slave->dev->dev_addr)) { -				found = 1; -				break; -			} -		} - -		if (!found) +	if (!ether_addr_equal_64bits(slave->perm_hwaddr, bond->dev->dev_addr)) { +		if (!bond_slave_has_mac(bond, slave->dev->dev_addr))  			return 0;  		/* Try setting slave mac to bond address and fall-through  		   to code handling that situation below... */ -		alb_set_slave_mac_addr(slave, bond->dev->dev_addr, -				       bond->alb_info.rlb_enabled); +		alb_set_slave_mac_addr(slave, bond->dev->dev_addr);  	}  	/* The slave's address is equal to the address of the bond.  	 * Search for a spare address in the bond for this slave.  	 */ -	free_mac_slave = NULL; - -	bond_for_each_slave(bond, tmp_slave1, i) { -		found = 0; -		bond_for_each_slave(bond, tmp_slave2, j) { -			if (!compare_ether_addr_64bits(tmp_slave1->perm_hwaddr, -						       tmp_slave2->dev->dev_addr)) { -				found = 1; -				break; -			} -		} - -		if (!found) { +	bond_for_each_slave(bond, tmp_slave1, iter) { +		if (!bond_slave_has_mac(bond, tmp_slave1->perm_hwaddr)) {  			/* no slave has tmp_slave1's perm addr  			 * as its curr addr  			 */ @@ -1146,8 +1256,8 @@ static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slav  		}  		if (!has_bond_addr) { -			if (!compare_ether_addr_64bits(tmp_slave1->dev->dev_addr, -						       bond->dev->dev_addr)) { +			if (ether_addr_equal_64bits(tmp_slave1->dev->dev_addr, +						    bond->dev->dev_addr)) {  				has_bond_addr = tmp_slave1;  			} @@ -1155,12 +1265,11 @@ static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slav  	}  	if (free_mac_slave) { -		alb_set_slave_mac_addr(slave, free_mac_slave->perm_hwaddr, -				       bond->alb_info.rlb_enabled); +		alb_set_slave_mac_addr(slave, free_mac_slave->perm_hwaddr); -		pr_warning("%s: Warning: the hw address of slave %s is in use by the bond; giving it the hw address of %s\n", -			   bond->dev->name, slave->dev->name, -			   free_mac_slave->dev->name); +		pr_warn("%s: Warning: the hw address of slave %s is in use by the bond; giving it the hw address of %s\n", +			bond->dev->name, slave->dev->name, +			free_mac_slave->dev->name);  	} else if (has_bond_addr) {  		pr_err("%s: Error: the hw address of slave %s is in use by the bond; couldn't find a slave with a free hw address to give it (this should not have happened)\n", @@ -1187,24 +1296,23 @@ static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slav   */  static int alb_set_mac_address(struct bonding *bond, void *addr)  { +	struct slave *slave, *rollback_slave; +	struct list_head *iter;  	struct sockaddr sa; -	struct slave *slave, *stop_at;  	char tmp_addr[ETH_ALEN];  	int res; -	int i; -	if (bond->alb_info.rlb_enabled) { +	if (bond->alb_info.rlb_enabled)  		return 0; -	} -	bond_for_each_slave(bond, slave, i) { +	bond_for_each_slave(bond, slave, iter) {  		/* save net_device's current hw address */ -		memcpy(tmp_addr, slave->dev->dev_addr, ETH_ALEN); +		ether_addr_copy(tmp_addr, slave->dev->dev_addr);  		res = dev_set_mac_address(slave->dev, addr);  		/* restore net_device's hw address */ -		memcpy(slave->dev->dev_addr, tmp_addr, ETH_ALEN); +		ether_addr_copy(slave->dev->dev_addr, tmp_addr);  		if (res)  			goto unwind; @@ -1217,11 +1325,12 @@ unwind:  	sa.sa_family = bond->dev->type;  	/* unwind from head to the slave that failed */ -	stop_at = slave; -	bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) { -		memcpy(tmp_addr, slave->dev->dev_addr, ETH_ALEN); -		dev_set_mac_address(slave->dev, &sa); -		memcpy(slave->dev->dev_addr, tmp_addr, ETH_ALEN); +	bond_for_each_slave(bond, rollback_slave, iter) { +		if (rollback_slave == slave) +			break; +		ether_addr_copy(tmp_addr, rollback_slave->dev->dev_addr); +		dev_set_mac_address(rollback_slave->dev, &sa); +		ether_addr_copy(rollback_slave->dev->dev_addr, tmp_addr);  	}  	return res; @@ -1234,9 +1343,8 @@ int bond_alb_initialize(struct bonding *bond, int rlb_enabled)  	int res;  	res = tlb_initialize(bond); -	if (res) { +	if (res)  		return res; -	}  	if (rlb_enabled) {  		bond->alb_info.rlb_enabled = 1; @@ -1259,9 +1367,79 @@ void bond_alb_deinitialize(struct bonding *bond)  	tlb_deinitialize(bond); -	if (bond_info->rlb_enabled) { +	if (bond_info->rlb_enabled)  		rlb_deinitialize(bond); +} + +static int bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond, +		struct slave *tx_slave) +{ +	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); +	struct ethhdr *eth_data = eth_hdr(skb); + +	if (!tx_slave) { +		/* unbalanced or unassigned, send through primary */ +		tx_slave = rcu_dereference(bond->curr_active_slave); +		if (bond->params.tlb_dynamic_lb) +			bond_info->unbalanced_load += skb->len;  	} + +	if (tx_slave && bond_slave_can_tx(tx_slave)) { +		if (tx_slave != rcu_dereference(bond->curr_active_slave)) { +			ether_addr_copy(eth_data->h_source, +					tx_slave->dev->dev_addr); +		} + +		bond_dev_queue_xmit(bond, skb, tx_slave->dev); +		goto out; +	} + +	if (tx_slave && bond->params.tlb_dynamic_lb) { +		_lock_tx_hashtbl(bond); +		__tlb_clear_slave(bond, tx_slave, 0); +		_unlock_tx_hashtbl(bond); +	} + +	/* no suitable interface, frame not sent */ +	dev_kfree_skb_any(skb); +out: +	return NETDEV_TX_OK; +} + +int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) +{ +	struct bonding *bond = netdev_priv(bond_dev); +	struct ethhdr *eth_data; +	struct slave *tx_slave = NULL; +	u32 hash_index; + +	skb_reset_mac_header(skb); +	eth_data = eth_hdr(skb); + +	/* Do not TX balance any multicast or broadcast */ +	if (!is_multicast_ether_addr(eth_data->h_dest)) { +		switch (skb->protocol) { +		case htons(ETH_P_IP): +		case htons(ETH_P_IPX): +		    /* In case of IPX, it will falback to L2 hash */ +		case htons(ETH_P_IPV6): +			hash_index = bond_xmit_hash(bond, skb); +			if (bond->params.tlb_dynamic_lb) { +				tx_slave = tlb_choose_channel(bond, +							      hash_index & 0xFF, +							      skb->len); +			} else { +				struct list_head *iter; +				int idx = hash_index % bond->slave_cnt; + +				bond_for_each_slave_rcu(bond, tx_slave, iter) +					if (--idx < 0) +						break; +			} +			break; +		} +	} +	return bond_do_alb_xmit(skb, bond, tx_slave);  }  int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) @@ -1272,33 +1450,22 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)  	struct slave *tx_slave = NULL;  	static const __be32 ip_bcast = htonl(0xffffffff);  	int hash_size = 0; -	int do_tx_balance = 1; +	bool do_tx_balance = true;  	u32 hash_index = 0;  	const u8 *hash_start = NULL; -	int res = 1;  	struct ipv6hdr *ip6hdr;  	skb_reset_mac_header(skb);  	eth_data = eth_hdr(skb); -	/* make sure that the curr_active_slave and the slaves list do -	 * not change during tx -	 */ -	read_lock(&bond->lock); -	read_lock(&bond->curr_slave_lock); - -	if (!BOND_IS_OK(bond)) { -		goto out; -	} -  	switch (ntohs(skb->protocol)) {  	case ETH_P_IP: {  		const struct iphdr *iph = ip_hdr(skb); -		if (!compare_ether_addr_64bits(eth_data->h_dest, mac_bcast) || +		if (ether_addr_equal_64bits(eth_data->h_dest, mac_bcast) ||  		    (iph->daddr == ip_bcast) ||  		    (iph->protocol == IPPROTO_IGMP)) { -			do_tx_balance = 0; +			do_tx_balance = false;  			break;  		}  		hash_start = (char *)&(iph->daddr); @@ -1309,16 +1476,16 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)  		/* IPv6 doesn't really use broadcast mac address, but leave  		 * that here just in case.  		 */ -		if (!compare_ether_addr_64bits(eth_data->h_dest, mac_bcast)) { -			do_tx_balance = 0; +		if (ether_addr_equal_64bits(eth_data->h_dest, mac_bcast)) { +			do_tx_balance = false;  			break;  		}  		/* IPv6 uses all-nodes multicast as an equivalent to  		 * broadcasts in IPv4.  		 */ -		if (!compare_ether_addr_64bits(eth_data->h_dest, mac_v6_allmcast)) { -			do_tx_balance = 0; +		if (ether_addr_equal_64bits(eth_data->h_dest, mac_v6_allmcast)) { +			do_tx_balance = false;  			break;  		} @@ -1328,7 +1495,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)  		 */  		ip6hdr = ipv6_hdr(skb);  		if (ipv6_addr_any(&ip6hdr->saddr)) { -			do_tx_balance = 0; +			do_tx_balance = false;  			break;  		} @@ -1338,7 +1505,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)  	case ETH_P_IPX:  		if (ipx_hdr(skb)->ipx_checksum != IPX_NO_CHECKSUM) {  			/* something is wrong with this packet */ -			do_tx_balance = 0; +			do_tx_balance = false;  			break;  		} @@ -1347,21 +1514,20 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)  			 * this family since it has an "ARP" like  			 * mechanism  			 */ -			do_tx_balance = 0; +			do_tx_balance = false;  			break;  		} -		hash_start = (char*)eth_data->h_dest; +		hash_start = (char *)eth_data->h_dest;  		hash_size = ETH_ALEN;  		break;  	case ETH_P_ARP: -		do_tx_balance = 0; -		if (bond_info->rlb_enabled) { +		do_tx_balance = false; +		if (bond_info->rlb_enabled)  			tx_slave = rlb_arp_xmit(skb, bond); -		}  		break;  	default: -		do_tx_balance = 0; +		do_tx_balance = false;  		break;  	} @@ -1370,34 +1536,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)  		tx_slave = tlb_choose_channel(bond, hash_index, skb->len);  	} -	if (!tx_slave) { -		/* unbalanced or unassigned, send through primary */ -		tx_slave = bond->curr_active_slave; -		bond_info->unbalanced_load += skb->len; -	} - -	if (tx_slave && SLAVE_IS_OK(tx_slave)) { -		if (tx_slave != bond->curr_active_slave) { -			memcpy(eth_data->h_source, -			       tx_slave->dev->dev_addr, -			       ETH_ALEN); -		} - -		res = bond_dev_queue_xmit(bond, skb, tx_slave->dev); -	} else { -		if (tx_slave) { -			tlb_clear_slave(bond, tx_slave, 0); -		} -	} - -out: -	if (res) { -		/* no suitable interface, frame not sent */ -		dev_kfree_skb(skb); -	} -	read_unlock(&bond->curr_slave_lock); -	read_unlock(&bond->lock); -	return NETDEV_TX_OK; +	return bond_do_alb_xmit(skb, bond, tx_slave);  }  void bond_alb_monitor(struct work_struct *work) @@ -1405,26 +1544,24 @@ void bond_alb_monitor(struct work_struct *work)  	struct bonding *bond = container_of(work, struct bonding,  					    alb_work.work);  	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); +	struct list_head *iter;  	struct slave *slave; -	int i; - -	read_lock(&bond->lock); - -	if (bond->kill_timers) { -		goto out; -	} -	if (bond->slave_cnt == 0) { +	if (!bond_has_slaves(bond)) {  		bond_info->tx_rebalance_counter = 0;  		bond_info->lp_counter = 0;  		goto re_arm;  	} +	rcu_read_lock(); +  	bond_info->tx_rebalance_counter++;  	bond_info->lp_counter++;  	/* send learning packets */ -	if (bond_info->lp_counter >= BOND_ALB_LP_TICKS) { +	if (bond_info->lp_counter >= BOND_ALB_LP_TICKS(bond)) { +		bool strict_match; +  		/* change of curr_active_slave involves swapping of mac addresses.  		 * in order to avoid this swapping from happening while  		 * sending the learning packets, the curr_slave_lock must be held for @@ -1432,8 +1569,16 @@ void bond_alb_monitor(struct work_struct *work)  		 */  		read_lock(&bond->curr_slave_lock); -		bond_for_each_slave(bond, slave, i) { -			alb_send_learning_packets(slave, slave->dev->dev_addr); +		bond_for_each_slave_rcu(bond, slave, iter) { +			/* If updating current_active, use all currently +			 * user mac addreses (!strict_match).  Otherwise, only +			 * use mac of the slave device. +			 * In RLB mode, we always use strict matches. +			 */ +			strict_match = (slave != bond->curr_active_slave || +					bond_info->rlb_enabled); +			alb_send_learning_packets(slave, slave->dev->dev_addr, +						  strict_match);  		}  		read_unlock(&bond->curr_slave_lock); @@ -1446,7 +1591,7 @@ void bond_alb_monitor(struct work_struct *work)  		read_lock(&bond->curr_slave_lock); -		bond_for_each_slave(bond, slave, i) { +		bond_for_each_slave_rcu(bond, slave, iter) {  			tlb_clear_slave(bond, slave, 1);  			if (slave == bond->curr_active_slave) {  				SLAVE_TLB_INFO(slave).load = @@ -1468,10 +1613,11 @@ void bond_alb_monitor(struct work_struct *work)  			/*  			 * dev_set_promiscuity requires rtnl and -			 * nothing else. +			 * nothing else.  Avoid race with bond_close.  			 */ -			read_unlock(&bond->lock); -			rtnl_lock(); +			rcu_read_unlock(); +			if (!rtnl_trylock()) +				goto re_arm;  			bond_info->rlb_promisc_timeout_counter = 0; @@ -1483,7 +1629,7 @@ void bond_alb_monitor(struct work_struct *work)  			bond_info->primary_is_promisc = 0;  			rtnl_unlock(); -			read_lock(&bond->lock); +			rcu_read_lock();  		}  		if (bond_info->rlb_rebalance) { @@ -1497,19 +1643,16 @@ void bond_alb_monitor(struct work_struct *work)  				--bond_info->rlb_update_delay_counter;  			} else {  				rlb_update_rx_clients(bond); -				if (bond_info->rlb_update_retry_counter) { +				if (bond_info->rlb_update_retry_counter)  					--bond_info->rlb_update_retry_counter; -				} else { +				else  					bond_info->rx_ntt = 0; -				}  			}  		}  	} - +	rcu_read_unlock();  re_arm:  	queue_delayed_work(bond->wq, &bond->alb_work, alb_delta_in_ticks); -out: -	read_unlock(&bond->lock);  }  /* assumption: called before the slave is attached to the bond @@ -1519,33 +1662,21 @@ int bond_alb_init_slave(struct bonding *bond, struct slave *slave)  {  	int res; -	res = alb_set_slave_mac_addr(slave, slave->perm_hwaddr, -				     bond->alb_info.rlb_enabled); -	if (res) { +	res = alb_set_slave_mac_addr(slave, slave->perm_hwaddr); +	if (res)  		return res; -	} - -	/* caller must hold the bond lock for write since the mac addresses -	 * are compared and may be swapped. -	 */ -	read_lock(&bond->lock);  	res = alb_handle_addr_collision_on_attach(bond, slave); - -	read_unlock(&bond->lock); - -	if (res) { +	if (res)  		return res; -	}  	tlb_init_slave(slave);  	/* order a rebalance ASAP */  	bond->alb_info.tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; -	if (bond->alb_info.rlb_enabled) { +	if (bond->alb_info.rlb_enabled)  		bond->alb_info.rlb_rebalance = 1; -	}  	return 0;  } @@ -1558,14 +1689,13 @@ int bond_alb_init_slave(struct bonding *bond, struct slave *slave)   */  void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave)  { -	if (bond->slave_cnt > 1) { +	if (bond_has_slaves(bond))  		alb_change_hw_addr_on_detach(bond, slave); -	}  	tlb_clear_slave(bond, slave, 0);  	if (bond->alb_info.rlb_enabled) { -		bond->alb_info.next_rx_slave = NULL; +		bond->alb_info.rx_slave = NULL;  		rlb_clear_slave(bond, slave);  	}  } @@ -1577,9 +1707,8 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char  	if (link == BOND_LINK_DOWN) {  		tlb_clear_slave(bond, slave, 0); -		if (bond->alb_info.rlb_enabled) { +		if (bond->alb_info.rlb_enabled)  			rlb_clear_slave(bond, slave); -		}  	} else if (link == BOND_LINK_UP) {  		/* order a rebalance ASAP */  		bond_info->tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; @@ -1605,22 +1734,17 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char   * If new_slave is NULL, caller must hold curr_slave_lock or   * bond->lock for write.   * - * If new_slave is not NULL, caller must hold RTNL, bond->lock for - * read and curr_slave_lock for write.  Processing here may sleep, so - * no other locks may be held. + * If new_slave is not NULL, caller must hold RTNL, curr_slave_lock + * for write.  Processing here may sleep, so no other locks may be held.   */  void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave)  	__releases(&bond->curr_slave_lock) -	__releases(&bond->lock) -	__acquires(&bond->lock)  	__acquires(&bond->curr_slave_lock)  {  	struct slave *swap_slave; -	int i; -	if (bond->curr_active_slave == new_slave) { +	if (bond->curr_active_slave == new_slave)  		return; -	}  	if (bond->curr_active_slave && bond->alb_info.primary_is_promisc) {  		dev_set_promiscuity(bond->curr_active_slave->dev, -1); @@ -1629,58 +1753,57 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave  	}  	swap_slave = bond->curr_active_slave; -	bond->curr_active_slave = new_slave; +	rcu_assign_pointer(bond->curr_active_slave, new_slave); -	if (!new_slave || (bond->slave_cnt == 0)) { +	if (!new_slave || !bond_has_slaves(bond))  		return; -	}  	/* set the new curr_active_slave to the bonds mac address  	 * i.e. swap mac addresses of old curr_active_slave and new curr_active_slave  	 */ -	if (!swap_slave) { -		struct slave *tmp_slave; -		/* find slave that is holding the bond's mac address */ -		bond_for_each_slave(bond, tmp_slave, i) { -			if (!compare_ether_addr_64bits(tmp_slave->dev->dev_addr, -						       bond->dev->dev_addr)) { -				swap_slave = tmp_slave; -				break; -			} -		} -	} +	if (!swap_slave) +		swap_slave = bond_slave_has_mac(bond, bond->dev->dev_addr);  	/*  	 * Arrange for swap_slave and new_slave to temporarily be  	 * ignored so we can mess with their MAC addresses without  	 * fear of interference from transmit activity.  	 */ -	if (swap_slave) { +	if (swap_slave)  		tlb_clear_slave(bond, swap_slave, 1); -	}  	tlb_clear_slave(bond, new_slave, 1);  	write_unlock_bh(&bond->curr_slave_lock); -	read_unlock(&bond->lock);  	ASSERT_RTNL(); -	/* curr_active_slave must be set before calling alb_swap_mac_addr */ -	if (swap_slave) { -		/* swap mac address */ -		alb_swap_mac_addr(bond, swap_slave, new_slave); -	} else { -		/* set the new_slave to the bond mac address */ -		alb_set_slave_mac_addr(new_slave, bond->dev->dev_addr, -				       bond->alb_info.rlb_enabled); +	/* in TLB mode, the slave might flip down/up with the old dev_addr, +	 * and thus filter bond->dev_addr's packets, so force bond's mac +	 */ +	if (BOND_MODE(bond) == BOND_MODE_TLB) { +		struct sockaddr sa; +		u8 tmp_addr[ETH_ALEN]; + +		ether_addr_copy(tmp_addr, new_slave->dev->dev_addr); + +		memcpy(sa.sa_data, bond->dev->dev_addr, bond->dev->addr_len); +		sa.sa_family = bond->dev->type; +		/* we don't care if it can't change its mac, best effort */ +		dev_set_mac_address(new_slave->dev, &sa); + +		ether_addr_copy(new_slave->dev->dev_addr, tmp_addr);  	} +	/* curr_active_slave must be set before calling alb_swap_mac_addr */  	if (swap_slave) { +		/* swap mac address */ +		alb_swap_mac_addr(swap_slave, new_slave);  		alb_fasten_mac_swap(bond, swap_slave, new_slave); -		read_lock(&bond->lock);  	} else { -		read_lock(&bond->lock); -		alb_send_learning_packets(new_slave, bond->dev->dev_addr); +		/* set the new_slave to the bond mac address */ +		alb_set_slave_mac_addr(new_slave, bond->dev->dev_addr); +		alb_send_learning_packets(new_slave, bond->dev->dev_addr, +					  false);  	}  	write_lock_bh(&bond->curr_slave_lock); @@ -1695,18 +1818,15 @@ int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr)  {  	struct bonding *bond = netdev_priv(bond_dev);  	struct sockaddr *sa = addr; -	struct slave *slave, *swap_slave; +	struct slave *swap_slave;  	int res; -	int i; -	if (!is_valid_ether_addr(sa->sa_data)) { +	if (!is_valid_ether_addr(sa->sa_data))  		return -EADDRNOTAVAIL; -	}  	res = alb_set_mac_address(bond, addr); -	if (res) { +	if (res)  		return res; -	}  	memcpy(bond_dev->dev_addr, sa->sa_data, bond_dev->addr_len); @@ -1714,29 +1834,20 @@ int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr)  	 * Otherwise we'll need to pass the new address to it and handle  	 * duplications.  	 */ -	if (!bond->curr_active_slave) { +	if (!bond->curr_active_slave)  		return 0; -	} - -	swap_slave = NULL; -	bond_for_each_slave(bond, slave, i) { -		if (!compare_ether_addr_64bits(slave->dev->dev_addr, -					       bond_dev->dev_addr)) { -			swap_slave = slave; -			break; -		} -	} +	swap_slave = bond_slave_has_mac(bond, bond_dev->dev_addr);  	if (swap_slave) { -		alb_swap_mac_addr(bond, swap_slave, bond->curr_active_slave); +		alb_swap_mac_addr(swap_slave, bond->curr_active_slave);  		alb_fasten_mac_swap(bond, swap_slave, bond->curr_active_slave);  	} else { -		alb_set_slave_mac_addr(bond->curr_active_slave, bond_dev->dev_addr, -				       bond->alb_info.rlb_enabled); +		alb_set_slave_mac_addr(bond->curr_active_slave, bond_dev->dev_addr);  		read_lock(&bond->lock); -		alb_send_learning_packets(bond->curr_active_slave, bond_dev->dev_addr); +		alb_send_learning_packets(bond->curr_active_slave, +					  bond_dev->dev_addr, false);  		if (bond->alb_info.rlb_enabled) {  			/* inform clients mac address has changed */  			rlb_req_update_slave_clients(bond, bond->curr_active_slave); @@ -1749,13 +1860,7 @@ int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr)  void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id)  { -	if (bond->alb_info.current_alb_vlan && -	    (bond->alb_info.current_alb_vlan->vlan_id == vlan_id)) { -		bond->alb_info.current_alb_vlan = NULL; -	} - -	if (bond->alb_info.rlb_enabled) { +	if (bond->alb_info.rlb_enabled)  		rlb_clear_vlan(bond, vlan_id); -	}  } diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h index 50968f8196c..5fc76c01636 100644 --- a/drivers/net/bonding/bond_alb.h +++ b/drivers/net/bonding/bond_alb.h @@ -12,8 +12,7 @@   * for more details.   *   * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA. + * with this program; if not, see <http://www.gnu.org/licenses/>.   *   * The full GNU General Public License is included in this distribution in the   * file called LICENSE. @@ -31,14 +30,52 @@ struct slave;  #define BOND_ALB_INFO(bond)   ((bond)->alb_info)  #define SLAVE_TLB_INFO(slave) ((slave)->tlb_info) +#define ALB_TIMER_TICKS_PER_SEC	    10	/* should be a divisor of HZ */ +#define BOND_TLB_REBALANCE_INTERVAL 10	/* In seconds, periodic re-balancing. +					 * Used for division - never set +					 * to zero !!! +					 */ +#define BOND_ALB_DEFAULT_LP_INTERVAL 1 +#define BOND_ALB_LP_INTERVAL(bond) (bond->params.lp_interval)	/* In seconds, periodic send of +								 * learning packets to the switch +								 */ + +#define BOND_TLB_REBALANCE_TICKS (BOND_TLB_REBALANCE_INTERVAL \ +				  * ALB_TIMER_TICKS_PER_SEC) + +#define BOND_ALB_LP_TICKS(bond) (BOND_ALB_LP_INTERVAL(bond) \ +			   * ALB_TIMER_TICKS_PER_SEC) + +#define TLB_HASH_TABLE_SIZE 256	/* The size of the clients hash table. +				 * Note that this value MUST NOT be smaller +				 * because the key hash table is BYTE wide ! +				 */ + + +#define TLB_NULL_INDEX		0xffffffff + +/* rlb defs */ +#define RLB_HASH_TABLE_SIZE	256 +#define RLB_NULL_INDEX		0xffffffff +#define RLB_UPDATE_DELAY	(2*ALB_TIMER_TICKS_PER_SEC) /* 2 seconds */ +#define RLB_ARP_BURST_SIZE	2 +#define RLB_UPDATE_RETRY	3 /* 3-ticks - must be smaller than the rlb +				   * rebalance interval (5 min). +				   */ +/* RLB_PROMISC_TIMEOUT = 10 sec equals the time that the current slave is + * promiscuous after failover + */ +#define RLB_PROMISC_TIMEOUT	(10*ALB_TIMER_TICKS_PER_SEC) + +  struct tlb_client_info {  	struct slave *tx_slave;	/* A pointer to slave used for transmiting  				 * packets to a Client that the Hash function  				 * gave this entry index.  				 */ -	u32 tx_bytes;		/* Each Client acumulates the BytesTx that -				 * were tranmitted to it, and after each -				 * CallBack the LoadHistory is devided +	u32 tx_bytes;		/* Each Client accumulates the BytesTx that +				 * were transmitted to it, and after each +				 * CallBack the LoadHistory is divided  				 * by the balance interval  				 */  	u32 load_history;	/* This field contains the amount of Bytes @@ -56,19 +93,38 @@ struct tlb_client_info {  /* -------------------------------------------------------------------------   * struct rlb_client_info contains all info related to a specific rx client - * connection. This is the Clients Hash Table entry struct + * connection. This is the Clients Hash Table entry struct. + * Note that this is not a proper hash table; if a new client's IP address + * hash collides with an existing client entry, the old entry is replaced. + * + * There is a linked list (linked by the used_next and used_prev members) + * linking all the used entries of the hash table. This allows updating + * all the clients without walking over all the unused elements of the table. + * + * There are also linked lists of entries with identical hash(ip_src). These + * allow cleaning up the table from ip_src<->mac_src associations that have + * become outdated and would cause sending out invalid ARP updates to the + * network. These are linked by the (src_next and src_prev members).   * -------------------------------------------------------------------------   */  struct rlb_client_info {  	__be32 ip_src;		/* the server IP address */  	__be32 ip_dst;		/* the client IP address */ +	u8  mac_src[ETH_ALEN];	/* the server MAC address */  	u8  mac_dst[ETH_ALEN];	/* the client MAC address */ -	u32 next;		/* The next Hash table entry index */ -	u32 prev;		/* The previous Hash table entry index */ + +	/* list of used hash table entries, starting at rx_hashtbl_used_head */ +	u32 used_next; +	u32 used_prev; + +	/* ip_src based hashing */ +	u32 src_next;	/* next entry with same hash(ip_src) */ +	u32 src_prev;	/* prev entry with same hash(ip_src) */ +	u32 src_first;	/* first entry with hash(ip_src) == this entry's index */ +  	u8  assigned;		/* checking whether this entry is assigned */  	u8  ntt;		/* flag - need to transmit client info */  	struct slave *slave;	/* the slave assigned to this client */ -	u8 tag;			/* flag - need to tag skb */  	unsigned short vlan_id;	/* VLAN tag associated with IP address */  }; @@ -84,7 +140,6 @@ struct tlb_slave_info {  };  struct alb_bond_info { -	struct timer_list	alb_timer;  	struct tlb_client_info	*tx_hashtbl; /* Dynamically allocated */  	spinlock_t		tx_hashtbl_lock;  	u32			unbalanced_load; @@ -92,17 +147,13 @@ struct alb_bond_info {  	int			lp_counter;  	/* -------- rlb parameters -------- */  	int rlb_enabled; -	struct packet_type	rlb_pkt_type;  	struct rlb_client_info	*rx_hashtbl;	/* Receive hash table */  	spinlock_t		rx_hashtbl_lock; -	u32			rx_hashtbl_head; +	u32			rx_hashtbl_used_head;  	u8			rx_ntt;	/* flag - need to transmit  					 * to all rx clients  					 */ -	struct slave		*next_rx_slave;/* next slave to be assigned -						* to a new rx client for -						*/ -	u32			rlb_interval_counter; +	struct slave		*rx_slave;/* last slave to xmit from */  	u8			primary_is_promisc;	   /* boolean */  	u32			rlb_promisc_timeout_counter;/* counts primary  							     * promiscuity time @@ -115,7 +166,6 @@ struct alb_bond_info {  						 * rx traffic should be  						 * rebalanced  						 */ -	struct vlan_entry	*current_alb_vlan;  };  int bond_alb_initialize(struct bonding *bond, int rlb_enabled); @@ -125,6 +175,7 @@ void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave);  void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link);  void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave);  int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev); +int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev);  void bond_alb_monitor(struct work_struct *);  int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr);  void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id); diff --git a/drivers/net/bonding/bond_debugfs.c b/drivers/net/bonding/bond_debugfs.c new file mode 100644 index 00000000000..658e761c456 --- /dev/null +++ b/drivers/net/bonding/bond_debugfs.c @@ -0,0 +1,145 @@ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/netdevice.h> + +#include "bonding.h" +#include "bond_alb.h" + +#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_NET_NS) + +#include <linux/debugfs.h> +#include <linux/seq_file.h> + +static struct dentry *bonding_debug_root; + +/* + *  Show RLB hash table + */ +static int bond_debug_rlb_hash_show(struct seq_file *m, void *v) +{ +	struct bonding *bond = m->private; +	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); +	struct rlb_client_info *client_info; +	u32 hash_index; + +	if (BOND_MODE(bond) != BOND_MODE_ALB) +		return 0; + +	seq_printf(m, "SourceIP        DestinationIP   " +			"Destination MAC   DEV\n"); + +	spin_lock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); + +	hash_index = bond_info->rx_hashtbl_used_head; +	for (; hash_index != RLB_NULL_INDEX; +	     hash_index = client_info->used_next) { +		client_info = &(bond_info->rx_hashtbl[hash_index]); +		seq_printf(m, "%-15pI4 %-15pI4 %-17pM %s\n", +			&client_info->ip_src, +			&client_info->ip_dst, +			&client_info->mac_dst, +			client_info->slave->dev->name); +	} + +	spin_unlock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); + +	return 0; +} + +static int bond_debug_rlb_hash_open(struct inode *inode, struct file *file) +{ +	return single_open(file, bond_debug_rlb_hash_show, inode->i_private); +} + +static const struct file_operations bond_debug_rlb_hash_fops = { +	.owner		= THIS_MODULE, +	.open		= bond_debug_rlb_hash_open, +	.read		= seq_read, +	.llseek		= seq_lseek, +	.release	= single_release, +}; + +void bond_debug_register(struct bonding *bond) +{ +	if (!bonding_debug_root) +		return; + +	bond->debug_dir = +		debugfs_create_dir(bond->dev->name, bonding_debug_root); + +	if (!bond->debug_dir) { +		pr_warn("%s: Warning: failed to register to debugfs\n", +			bond->dev->name); +		return; +	} + +	debugfs_create_file("rlb_hash_table", 0400, bond->debug_dir, +				bond, &bond_debug_rlb_hash_fops); +} + +void bond_debug_unregister(struct bonding *bond) +{ +	if (!bonding_debug_root) +		return; + +	debugfs_remove_recursive(bond->debug_dir); +} + +void bond_debug_reregister(struct bonding *bond) +{ +	struct dentry *d; + +	if (!bonding_debug_root) +		return; + +	d = debugfs_rename(bonding_debug_root, bond->debug_dir, +			   bonding_debug_root, bond->dev->name); +	if (d) { +		bond->debug_dir = d; +	} else { +		pr_warn("%s: Warning: failed to reregister, so just unregister old one\n", +			bond->dev->name); +		bond_debug_unregister(bond); +	} +} + +void bond_create_debugfs(void) +{ +	bonding_debug_root = debugfs_create_dir("bonding", NULL); + +	if (!bonding_debug_root) { +		pr_warn("Warning: Cannot create bonding directory in debugfs\n"); +	} +} + +void bond_destroy_debugfs(void) +{ +	debugfs_remove_recursive(bonding_debug_root); +	bonding_debug_root = NULL; +} + + +#else /* !CONFIG_DEBUG_FS */ + +void bond_debug_register(struct bonding *bond) +{ +} + +void bond_debug_unregister(struct bonding *bond) +{ +} + +void bond_debug_reregister(struct bonding *bond) +{ +} + +void bond_create_debugfs(void) +{ +} + +void bond_destroy_debugfs(void) +{ +} + +#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/net/bonding/bond_ipv6.c b/drivers/net/bonding/bond_ipv6.c deleted file mode 100644 index 121b073a6c3..00000000000 --- a/drivers/net/bonding/bond_ipv6.c +++ /dev/null @@ -1,220 +0,0 @@ -/* - * Copyright(c) 2008 Hewlett-Packard Development Company, L.P. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA. - * - * The full GNU General Public License is included in this distribution in the - * file called LICENSE. - * - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/types.h> -#include <linux/if_vlan.h> -#include <net/ipv6.h> -#include <net/ndisc.h> -#include <net/addrconf.h> -#include <net/netns/generic.h> -#include "bonding.h" - -/* - * Assign bond->master_ipv6 to the next IPv6 address in the list, or - * zero it out if there are none. - */ -static void bond_glean_dev_ipv6(struct net_device *dev, struct in6_addr *addr) -{ -	struct inet6_dev *idev; - -	if (!dev) -		return; - -	idev = in6_dev_get(dev); -	if (!idev) -		return; - -	read_lock_bh(&idev->lock); -	if (!list_empty(&idev->addr_list)) { -		struct inet6_ifaddr *ifa -			= list_first_entry(&idev->addr_list, -					   struct inet6_ifaddr, if_list); -		ipv6_addr_copy(addr, &ifa->addr); -	} else -		ipv6_addr_set(addr, 0, 0, 0, 0); - -	read_unlock_bh(&idev->lock); - -	in6_dev_put(idev); -} - -static void bond_na_send(struct net_device *slave_dev, -			 struct in6_addr *daddr, -			 int router, -			 unsigned short vlan_id) -{ -	struct in6_addr mcaddr; -	struct icmp6hdr icmp6h = { -		.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT, -	}; -	struct sk_buff *skb; - -	icmp6h.icmp6_router = router; -	icmp6h.icmp6_solicited = 0; -	icmp6h.icmp6_override = 1; - -	addrconf_addr_solict_mult(daddr, &mcaddr); - -	pr_debug("ipv6 na on slave %s: dest %pI6, src %pI6\n", -		 slave_dev->name, &mcaddr, daddr); - -	skb = ndisc_build_skb(slave_dev, &mcaddr, daddr, &icmp6h, daddr, -			      ND_OPT_TARGET_LL_ADDR); - -	if (!skb) { -		pr_err("NA packet allocation failed\n"); -		return; -	} - -	if (vlan_id) { -		skb = vlan_put_tag(skb, vlan_id); -		if (!skb) { -			pr_err("failed to insert VLAN tag\n"); -			return; -		} -	} - -	ndisc_send_skb(skb, slave_dev, NULL, &mcaddr, daddr, &icmp6h); -} - -/* - * Kick out an unsolicited Neighbor Advertisement for an IPv6 address on - * the bonding master.  This will help the switch learn our address - * if in active-backup mode. - * - * Caller must hold curr_slave_lock for read or better - */ -void bond_send_unsolicited_na(struct bonding *bond) -{ -	struct slave *slave = bond->curr_active_slave; -	struct vlan_entry *vlan; -	struct inet6_dev *idev; -	int is_router; - -	pr_debug("%s: bond %s slave %s\n", bond->dev->name, -		 __func__, slave ? slave->dev->name : "NULL"); - -	if (!slave || !bond->send_unsol_na || -	    test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state)) -		return; - -	bond->send_unsol_na--; - -	idev = in6_dev_get(bond->dev); -	if (!idev) -		return; - -	is_router = !!idev->cnf.forwarding; - -	in6_dev_put(idev); - -	if (!ipv6_addr_any(&bond->master_ipv6)) -		bond_na_send(slave->dev, &bond->master_ipv6, is_router, 0); - -	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { -		if (!ipv6_addr_any(&vlan->vlan_ipv6)) { -			bond_na_send(slave->dev, &vlan->vlan_ipv6, is_router, -				     vlan->vlan_id); -		} -	} -} - -/* - * bond_inet6addr_event: handle inet6addr notifier chain events. - * - * We keep track of device IPv6 addresses primarily to use as source - * addresses in NS probes. - * - * We track one IPv6 for the main device (if it has one). - */ -static int bond_inet6addr_event(struct notifier_block *this, -				unsigned long event, -				void *ptr) -{ -	struct inet6_ifaddr *ifa = ptr; -	struct net_device *vlan_dev, *event_dev = ifa->idev->dev; -	struct bonding *bond; -	struct vlan_entry *vlan; -	struct bond_net *bn = net_generic(dev_net(event_dev), bond_net_id); - -	list_for_each_entry(bond, &bn->dev_list, bond_list) { -		if (bond->dev == event_dev) { -			switch (event) { -			case NETDEV_UP: -				if (ipv6_addr_any(&bond->master_ipv6)) -					ipv6_addr_copy(&bond->master_ipv6, -						       &ifa->addr); -				return NOTIFY_OK; -			case NETDEV_DOWN: -				if (ipv6_addr_equal(&bond->master_ipv6, -						    &ifa->addr)) -					bond_glean_dev_ipv6(bond->dev, -							    &bond->master_ipv6); -				return NOTIFY_OK; -			default: -				return NOTIFY_DONE; -			} -		} - -		list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { -			if (!bond->vlgrp) -				continue; -			vlan_dev = vlan_group_get_device(bond->vlgrp, -							 vlan->vlan_id); -			if (vlan_dev == event_dev) { -				switch (event) { -				case NETDEV_UP: -					if (ipv6_addr_any(&vlan->vlan_ipv6)) -						ipv6_addr_copy(&vlan->vlan_ipv6, -							       &ifa->addr); -					return NOTIFY_OK; -				case NETDEV_DOWN: -					if (ipv6_addr_equal(&vlan->vlan_ipv6, -							    &ifa->addr)) -						bond_glean_dev_ipv6(vlan_dev, -								    &vlan->vlan_ipv6); -					return NOTIFY_OK; -				default: -					return NOTIFY_DONE; -				} -			} -		} -	} -	return NOTIFY_DONE; -} - -static struct notifier_block bond_inet6addr_notifier = { -	.notifier_call = bond_inet6addr_event, -}; - -void bond_register_ipv6_notifier(void) -{ -	register_inet6addr_notifier(&bond_inet6addr_notifier); -} - -void bond_unregister_ipv6_notifier(void) -{ -	unregister_inet6addr_notifier(&bond_inet6addr_notifier); -} - diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 0273ad0b57b..701f86cd599 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -54,20 +54,16 @@  #include <linux/inet.h>  #include <linux/bitops.h>  #include <linux/io.h> -#include <asm/system.h>  #include <asm/dma.h>  #include <linux/uaccess.h>  #include <linux/errno.h>  #include <linux/netdevice.h> -#include <linux/netpoll.h>  #include <linux/inetdevice.h>  #include <linux/igmp.h>  #include <linux/etherdevice.h>  #include <linux/skbuff.h>  #include <net/sock.h>  #include <linux/rtnetlink.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h>  #include <linux/smp.h>  #include <linux/if_ether.h>  #include <net/arp.h> @@ -80,6 +76,9 @@  #include <net/route.h>  #include <net/net_namespace.h>  #include <net/netns/generic.h> +#include <net/pkt_sched.h> +#include <linux/rculist.h> +#include <net/flow_keys.h>  #include "bonding.h"  #include "bond_3ad.h"  #include "bond_alb.h" @@ -87,14 +86,11 @@  /*---------------------------- Module parameters ----------------------------*/  /* monitor all links that often (in milliseconds). <=0 disables monitoring */ -#define BOND_LINK_MON_INTERV	0 -#define BOND_LINK_ARP_INTERV	0  static int max_bonds	= BOND_DEFAULT_MAX_BONDS;  static int tx_queues	= BOND_DEFAULT_TX_QUEUES; -static int num_grat_arp = 1; -static int num_unsol_na = 1; -static int miimon	= BOND_LINK_MON_INTERV; +static int num_peer_notif = 1; +static int miimon;  static int updelay;  static int downdelay;  static int use_carrier	= 1; @@ -102,24 +98,30 @@ static char *mode;  static char *primary;  static char *primary_reselect;  static char *lacp_rate; +static int min_links;  static char *ad_select;  static char *xmit_hash_policy; -static int arp_interval = BOND_LINK_ARP_INTERV; +static int arp_interval;  static char *arp_ip_target[BOND_MAX_ARP_TARGETS];  static char *arp_validate; +static char *arp_all_targets;  static char *fail_over_mac; -static int all_slaves_active = 0; +static int all_slaves_active;  static struct bond_params bonding_defaults;  static int resend_igmp = BOND_DEFAULT_RESEND_IGMP; +static int packets_per_slave = 1; +static int lp_interval = BOND_ALB_DEFAULT_LP_INTERVAL;  module_param(max_bonds, int, 0);  MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");  module_param(tx_queues, int, 0);  MODULE_PARM_DESC(tx_queues, "Max number of transmit queues (default = 16)"); -module_param(num_grat_arp, int, 0644); -MODULE_PARM_DESC(num_grat_arp, "Number of gratuitous ARP packets to send on failover event"); -module_param(num_unsol_na, int, 0644); -MODULE_PARM_DESC(num_unsol_na, "Number of unsolicited IPv6 Neighbor Advertisements packets to send on failover event"); +module_param_named(num_grat_arp, num_peer_notif, int, 0644); +MODULE_PARM_DESC(num_grat_arp, "Number of peer notifications to send on " +			       "failover event (alias of num_unsol_na)"); +module_param_named(num_unsol_na, num_peer_notif, int, 0644); +MODULE_PARM_DESC(num_unsol_na, "Number of peer notifications to send on " +			       "failover event (alias of num_grat_arp)");  module_param(miimon, int, 0);  MODULE_PARM_DESC(miimon, "Link check interval in milliseconds");  module_param(updelay, int, 0); @@ -131,7 +133,7 @@ module_param(use_carrier, int, 0);  MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; "  			      "0 for off, 1 for on (default)");  module_param(mode, charp, 0); -MODULE_PARM_DESC(mode, "Mode of operation : 0 for balance-rr, " +MODULE_PARM_DESC(mode, "Mode of operation; 0 for balance-rr, "  		       "1 for active-backup, 2 for balance-xor, "  		       "3 for broadcast, 4 for 802.3ad, 5 for balance-tlb, "  		       "6 for balance-alb"); @@ -146,37 +148,56 @@ MODULE_PARM_DESC(primary_reselect, "Reselect primary slave "  				   "2 for only on active slave "  				   "failure");  module_param(lacp_rate, charp, 0); -MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner " -			    "(slow/fast)"); +MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner; " +			    "0 for slow, 1 for fast");  module_param(ad_select, charp, 0); -MODULE_PARM_DESC(ad_select, "803.ad aggregation selection logic: stable (0, default), bandwidth (1), count (2)"); +MODULE_PARM_DESC(ad_select, "803.ad aggregation selection logic; " +			    "0 for stable (default), 1 for bandwidth, " +			    "2 for count"); +module_param(min_links, int, 0); +MODULE_PARM_DESC(min_links, "Minimum number of available links before turning on carrier"); +  module_param(xmit_hash_policy, charp, 0); -MODULE_PARM_DESC(xmit_hash_policy, "XOR hashing method: 0 for layer 2 (default)" -				   ", 1 for layer 3+4"); +MODULE_PARM_DESC(xmit_hash_policy, "balance-xor and 802.3ad hashing method; " +				   "0 for layer 2 (default), 1 for layer 3+4, " +				   "2 for layer 2+3, 3 for encap layer 2+3, " +				   "4 for encap layer 3+4");  module_param(arp_interval, int, 0);  MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");  module_param_array(arp_ip_target, charp, NULL, 0);  MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form");  module_param(arp_validate, charp, 0); -MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all"); +MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes; " +			       "0 for none (default), 1 for active, " +			       "2 for backup, 3 for all"); +module_param(arp_all_targets, charp, 0); +MODULE_PARM_DESC(arp_all_targets, "fail on any/all arp targets timeout; 0 for any (default), 1 for all");  module_param(fail_over_mac, charp, 0); -MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to the same MAC.  none (default), active or follow"); +MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to " +				"the same MAC; 0 for none (default), " +				"1 for active, 2 for follow");  module_param(all_slaves_active, int, 0);  MODULE_PARM_DESC(all_slaves_active, "Keep all frames received on an interface" -				     "by setting active flag for all slaves.  " +				     "by setting active flag for all slaves; "  				     "0 for never (default), 1 for always.");  module_param(resend_igmp, int, 0); -MODULE_PARM_DESC(resend_igmp, "Number of IGMP membership reports to send on link failure"); +MODULE_PARM_DESC(resend_igmp, "Number of IGMP membership reports to send on " +			      "link failure"); +module_param(packets_per_slave, int, 0); +MODULE_PARM_DESC(packets_per_slave, "Packets to send per slave in balance-rr " +				    "mode; 0 for a random slave, 1 packet per " +				    "slave (default), >1 packets per slave."); +module_param(lp_interval, uint, 0); +MODULE_PARM_DESC(lp_interval, "The number of seconds between instances where " +			      "the bonding driver sends learning packets to " +			      "each slaves peer switch. The default is 1.");  /*----------------------------- Global variables ----------------------------*/  #ifdef CONFIG_NET_POLL_CONTROLLER -cpumask_var_t netpoll_block_tx; +atomic_t netpoll_block_tx = ATOMIC_INIT(0);  #endif -static const char * const version = -	DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n"; -  int bond_net_id __read_mostly;  static __be32 arp_target[BOND_MAX_ARP_TARGETS]; @@ -185,68 +206,14 @@ static int bond_mode	= BOND_MODE_ROUNDROBIN;  static int xmit_hashtype = BOND_XMIT_POLICY_LAYER2;  static int lacp_fast; -const struct bond_parm_tbl bond_lacp_tbl[] = { -{	"slow",		AD_LACP_SLOW}, -{	"fast",		AD_LACP_FAST}, -{	NULL,		-1}, -}; - -const struct bond_parm_tbl bond_mode_tbl[] = { -{	"balance-rr",		BOND_MODE_ROUNDROBIN}, -{	"active-backup",	BOND_MODE_ACTIVEBACKUP}, -{	"balance-xor",		BOND_MODE_XOR}, -{	"broadcast",		BOND_MODE_BROADCAST}, -{	"802.3ad",		BOND_MODE_8023AD}, -{	"balance-tlb",		BOND_MODE_TLB}, -{	"balance-alb",		BOND_MODE_ALB}, -{	NULL,			-1}, -}; - -const struct bond_parm_tbl xmit_hashtype_tbl[] = { -{	"layer2",		BOND_XMIT_POLICY_LAYER2}, -{	"layer3+4",		BOND_XMIT_POLICY_LAYER34}, -{	"layer2+3",		BOND_XMIT_POLICY_LAYER23}, -{	NULL,			-1}, -}; - -const struct bond_parm_tbl arp_validate_tbl[] = { -{	"none",			BOND_ARP_VALIDATE_NONE}, -{	"active",		BOND_ARP_VALIDATE_ACTIVE}, -{	"backup",		BOND_ARP_VALIDATE_BACKUP}, -{	"all",			BOND_ARP_VALIDATE_ALL}, -{	NULL,			-1}, -}; - -const struct bond_parm_tbl fail_over_mac_tbl[] = { -{	"none",			BOND_FOM_NONE}, -{	"active",		BOND_FOM_ACTIVE}, -{	"follow",		BOND_FOM_FOLLOW}, -{	NULL,			-1}, -}; - -const struct bond_parm_tbl pri_reselect_tbl[] = { -{	"always",		BOND_PRI_RESELECT_ALWAYS}, -{	"better",		BOND_PRI_RESELECT_BETTER}, -{	"failure",		BOND_PRI_RESELECT_FAILURE}, -{	NULL,			-1}, -}; - -struct bond_parm_tbl ad_select_tbl[] = { -{	"stable",	BOND_AD_STABLE}, -{	"bandwidth",	BOND_AD_BANDWIDTH}, -{	"count",	BOND_AD_COUNT}, -{	NULL,		-1}, -}; -  /*-------------------------- Forward declarations ---------------------------*/ -static void bond_send_gratuitous_arp(struct bonding *bond);  static int bond_init(struct net_device *bond_dev);  static void bond_uninit(struct net_device *bond_dev);  /*---------------------------- General routines -----------------------------*/ -static const char *bond_mode_name(int mode) +const char *bond_mode_name(int mode)  {  	static const char *names[] = {  		[BOND_MODE_ROUNDROBIN] = "load balancing (round-robin)", @@ -258,7 +225,7 @@ static const char *bond_mode_name(int mode)  		[BOND_MODE_ALB] = "adaptive load balancing",  	}; -	if (mode < 0 || mode > BOND_MODE_ALB) +	if (mode < BOND_MODE_ROUNDROBIN || mode > BOND_MODE_ALB)  		return "unknown";  	return names[mode]; @@ -267,206 +234,30 @@ static const char *bond_mode_name(int mode)  /*---------------------------------- VLAN -----------------------------------*/  /** - * bond_add_vlan - add a new vlan id on bond - * @bond: bond that got the notification - * @vlan_id: the vlan id to add - * - * Returns -ENOMEM if allocation failed. - */ -static int bond_add_vlan(struct bonding *bond, unsigned short vlan_id) -{ -	struct vlan_entry *vlan; - -	pr_debug("bond: %s, vlan id %d\n", -		 (bond ? bond->dev->name : "None"), vlan_id); - -	vlan = kzalloc(sizeof(struct vlan_entry), GFP_KERNEL); -	if (!vlan) -		return -ENOMEM; - -	INIT_LIST_HEAD(&vlan->vlan_list); -	vlan->vlan_id = vlan_id; - -	write_lock_bh(&bond->lock); - -	list_add_tail(&vlan->vlan_list, &bond->vlan_list); - -	write_unlock_bh(&bond->lock); - -	pr_debug("added VLAN ID %d on bond %s\n", vlan_id, bond->dev->name); - -	return 0; -} - -/** - * bond_del_vlan - delete a vlan id from bond - * @bond: bond that got the notification - * @vlan_id: the vlan id to delete - * - * returns -ENODEV if @vlan_id was not found in @bond. - */ -static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id) -{ -	struct vlan_entry *vlan; -	int res = -ENODEV; - -	pr_debug("bond: %s, vlan id %d\n", bond->dev->name, vlan_id); - -	block_netpoll_tx(); -	write_lock_bh(&bond->lock); - -	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { -		if (vlan->vlan_id == vlan_id) { -			list_del(&vlan->vlan_list); - -			if (bond_is_lb(bond)) -				bond_alb_clear_vlan(bond, vlan_id); - -			pr_debug("removed VLAN ID %d from bond %s\n", -				 vlan_id, bond->dev->name); - -			kfree(vlan); - -			if (list_empty(&bond->vlan_list) && -			    (bond->slave_cnt == 0)) { -				/* Last VLAN removed and no slaves, so -				 * restore block on adding VLANs. This will -				 * be removed once new slaves that are not -				 * VLAN challenged will be added. -				 */ -				bond->dev->features |= NETIF_F_VLAN_CHALLENGED; -			} - -			res = 0; -			goto out; -		} -	} - -	pr_debug("couldn't find VLAN ID %d in bond %s\n", -		 vlan_id, bond->dev->name); - -out: -	write_unlock_bh(&bond->lock); -	unblock_netpoll_tx(); -	return res; -} - -/** - * bond_has_challenged_slaves - * @bond: the bond we're working on - * - * Searches the slave list. Returns 1 if a vlan challenged slave - * was found, 0 otherwise. - * - * Assumes bond->lock is held. - */ -static int bond_has_challenged_slaves(struct bonding *bond) -{ -	struct slave *slave; -	int i; - -	bond_for_each_slave(bond, slave, i) { -		if (slave->dev->features & NETIF_F_VLAN_CHALLENGED) { -			pr_debug("found VLAN challenged slave - %s\n", -				 slave->dev->name); -			return 1; -		} -	} - -	pr_debug("no VLAN challenged slaves found\n"); -	return 0; -} - -/** - * bond_next_vlan - safely skip to the next item in the vlans list. - * @bond: the bond we're working on - * @curr: item we're advancing from - * - * Returns %NULL if list is empty, bond->next_vlan if @curr is %NULL, - * or @curr->next otherwise (even if it is @curr itself again). - * - * Caller must hold bond->lock - */ -struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr) -{ -	struct vlan_entry *next, *last; - -	if (list_empty(&bond->vlan_list)) -		return NULL; - -	if (!curr) { -		next = list_entry(bond->vlan_list.next, -				  struct vlan_entry, vlan_list); -	} else { -		last = list_entry(bond->vlan_list.prev, -				  struct vlan_entry, vlan_list); -		if (last == curr) { -			next = list_entry(bond->vlan_list.next, -					  struct vlan_entry, vlan_list); -		} else { -			next = list_entry(curr->vlan_list.next, -					  struct vlan_entry, vlan_list); -		} -	} - -	return next; -} - -/**   * bond_dev_queue_xmit - Prepare skb for xmit.   *   * @bond: bond device that got this skb for tx.   * @skb: hw accel VLAN tagged skb to transmit   * @slave_dev: slave that is supposed to xmit this skbuff - * - * When the bond gets an skb to transmit that is - * already hardware accelerated VLAN tagged, and it - * needs to relay this skb to a slave that is not - * hw accel capable, the skb needs to be "unaccelerated", - * i.e. strip the hwaccel tag and re-insert it as part - * of the payload.   */ -int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, +void bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,  			struct net_device *slave_dev)  { -	unsigned short uninitialized_var(vlan_id); - -	/* Test vlan_list not vlgrp to catch and handle 802.1p tags */ -	if (!list_empty(&bond->vlan_list) && -	    !(slave_dev->features & NETIF_F_HW_VLAN_TX) && -	    vlan_get_tag(skb, &vlan_id) == 0) { -		skb->dev = slave_dev; -		skb = vlan_put_tag(skb, vlan_id); -		if (!skb) { -			/* vlan_put_tag() frees the skb in case of error, -			 * so return success here so the calling functions -			 * won't attempt to free is again. -			 */ -			return 0; -		} -	} else { -		skb->dev = slave_dev; -	} +	skb->dev = slave_dev; -	skb->priority = 1; -#ifdef CONFIG_NET_POLL_CONTROLLER -	if (unlikely(bond->dev->priv_flags & IFF_IN_NETPOLL)) { -		struct netpoll *np = bond->dev->npinfo->netpoll; -		slave_dev->npinfo = bond->dev->npinfo; -		slave_dev->priv_flags |= IFF_IN_NETPOLL; -		netpoll_send_skb_on_dev(np, skb, slave_dev); -		slave_dev->priv_flags &= ~IFF_IN_NETPOLL; -	} else -#endif -		dev_queue_xmit(skb); +	BUILD_BUG_ON(sizeof(skb->queue_mapping) != +		     sizeof(qdisc_skb_cb(skb)->slave_dev_queue_mapping)); +	skb->queue_mapping = qdisc_skb_cb(skb)->slave_dev_queue_mapping; -	return 0; +	if (unlikely(netpoll_tx_running(bond->dev))) +		bond_netpoll_send_skb(bond_get_slave_by_dev(bond, slave_dev), skb); +	else +		dev_queue_xmit(skb);  }  /* - * In the following 3 functions, bond_vlan_rx_register(), bond_vlan_rx_add_vid - * and bond_vlan_rx_kill_vid, We don't protect the slave list iteration with a - * lock because: + * In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid, + * We don't protect the slave list iteration with a lock because:   * a. This operation is performed in IOCTL context,   * b. The operation is protected by the RTNL semaphore in the 8021q code,   * c. Holding a lock with BH disabled while directly calling a base driver @@ -482,58 +273,36 @@ int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,  */  /** - * bond_vlan_rx_register - Propagates registration to slaves + * bond_vlan_rx_add_vid - Propagates adding an id to slaves   * @bond_dev: bonding net device that got called - * @grp: vlan group being registered + * @vid: vlan id being added   */ -static void bond_vlan_rx_register(struct net_device *bond_dev, -				  struct vlan_group *grp) +static int bond_vlan_rx_add_vid(struct net_device *bond_dev, +				__be16 proto, u16 vid)  {  	struct bonding *bond = netdev_priv(bond_dev); -	struct slave *slave; -	int i; - -	write_lock_bh(&bond->lock); -	bond->vlgrp = grp; -	write_unlock_bh(&bond->lock); - -	bond_for_each_slave(bond, slave, i) { -		struct net_device *slave_dev = slave->dev; -		const struct net_device_ops *slave_ops = slave_dev->netdev_ops; +	struct slave *slave, *rollback_slave; +	struct list_head *iter; +	int res; -		if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && -		    slave_ops->ndo_vlan_rx_register) { -			slave_ops->ndo_vlan_rx_register(slave_dev, grp); -		} +	bond_for_each_slave(bond, slave, iter) { +		res = vlan_vid_add(slave->dev, proto, vid); +		if (res) +			goto unwind;  	} -} -/** - * bond_vlan_rx_add_vid - Propagates adding an id to slaves - * @bond_dev: bonding net device that got called - * @vid: vlan id being added - */ -static void bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid) -{ -	struct bonding *bond = netdev_priv(bond_dev); -	struct slave *slave; -	int i, res; +	return 0; -	bond_for_each_slave(bond, slave, i) { -		struct net_device *slave_dev = slave->dev; -		const struct net_device_ops *slave_ops = slave_dev->netdev_ops; +unwind: +	/* unwind to the slave that failed */ +	bond_for_each_slave(bond, rollback_slave, iter) { +		if (rollback_slave == slave) +			break; -		if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) && -		    slave_ops->ndo_vlan_rx_add_vid) { -			slave_ops->ndo_vlan_rx_add_vid(slave_dev, vid); -		} +		vlan_vid_del(rollback_slave->dev, proto, vid);  	} -	res = bond_add_vlan(bond, vid); -	if (res) { -		pr_err("%s: Error: Failed to add vlan id %d\n", -		       bond_dev->name, vid); -	} +	return res;  }  /** @@ -541,84 +310,20 @@ static void bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid)   * @bond_dev: bonding net device that got called   * @vid: vlan id being removed   */ -static void bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid) +static int bond_vlan_rx_kill_vid(struct net_device *bond_dev, +				 __be16 proto, u16 vid)  {  	struct bonding *bond = netdev_priv(bond_dev); +	struct list_head *iter;  	struct slave *slave; -	struct net_device *vlan_dev; -	int i, res; -	bond_for_each_slave(bond, slave, i) { -		struct net_device *slave_dev = slave->dev; -		const struct net_device_ops *slave_ops = slave_dev->netdev_ops; +	bond_for_each_slave(bond, slave, iter) +		vlan_vid_del(slave->dev, proto, vid); -		if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) && -		    slave_ops->ndo_vlan_rx_kill_vid) { -			/* Save and then restore vlan_dev in the grp array, -			 * since the slave's driver might clear it. -			 */ -			vlan_dev = vlan_group_get_device(bond->vlgrp, vid); -			slave_ops->ndo_vlan_rx_kill_vid(slave_dev, vid); -			vlan_group_set_device(bond->vlgrp, vid, vlan_dev); -		} -	} - -	res = bond_del_vlan(bond, vid); -	if (res) { -		pr_err("%s: Error: Failed to remove vlan id %d\n", -		       bond_dev->name, vid); -	} -} - -static void bond_add_vlans_on_slave(struct bonding *bond, struct net_device *slave_dev) -{ -	struct vlan_entry *vlan; -	const struct net_device_ops *slave_ops = slave_dev->netdev_ops; - -	if (!bond->vlgrp) -		return; - -	if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && -	    slave_ops->ndo_vlan_rx_register) -		slave_ops->ndo_vlan_rx_register(slave_dev, bond->vlgrp); - -	if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) || -	    !(slave_ops->ndo_vlan_rx_add_vid)) -		return; - -	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) -		slave_ops->ndo_vlan_rx_add_vid(slave_dev, vlan->vlan_id); -} - -static void bond_del_vlans_from_slave(struct bonding *bond, -				      struct net_device *slave_dev) -{ -	const struct net_device_ops *slave_ops = slave_dev->netdev_ops; -	struct vlan_entry *vlan; -	struct net_device *vlan_dev; - -	if (!bond->vlgrp) -		return; - -	if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) || -	    !(slave_ops->ndo_vlan_rx_kill_vid)) -		goto unreg; +	if (bond_is_lb(bond)) +		bond_alb_clear_vlan(bond, vid); -	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { -		if (!vlan->vlan_id) -			continue; -		/* Save and then restore vlan_dev in the grp array, -		 * since the slave's driver might clear it. -		 */ -		vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); -		slave_ops->ndo_vlan_rx_kill_vid(slave_dev, vlan->vlan_id); -		vlan_group_set_device(bond->vlgrp, vlan->vlan_id, vlan_dev); -	} - -unreg: -	if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && -	    slave_ops->ndo_vlan_rx_register) -		slave_ops->ndo_vlan_rx_register(slave_dev, NULL); +	return 0;  }  /*------------------------------- Link status -------------------------------*/ @@ -632,16 +337,16 @@ unreg:   */  static int bond_set_carrier(struct bonding *bond)  { +	struct list_head *iter;  	struct slave *slave; -	int i; -	if (bond->slave_cnt == 0) +	if (!bond_has_slaves(bond))  		goto down; -	if (bond->params.mode == BOND_MODE_8023AD) +	if (BOND_MODE(bond) == BOND_MODE_8023AD)  		return bond_3ad_set_carrier(bond); -	bond_for_each_slave(bond, slave, i) { +	bond_for_each_slave(bond, slave, iter) {  		if (slave->link == BOND_LINK_UP) {  			if (!netif_carrier_ok(bond->dev)) {  				netif_carrier_on(bond->dev); @@ -662,48 +367,55 @@ down:  /*   * Get link speed and duplex from the slave's base driver   * using ethtool. If for some reason the call fails or the - * values are invalid, fake speed and duplex to 100/Full - * and return error. + * values are invalid, set speed and duplex to -1, + * and return.   */ -static int bond_update_speed_duplex(struct slave *slave) +static void bond_update_speed_duplex(struct slave *slave)  {  	struct net_device *slave_dev = slave->dev; -	struct ethtool_cmd etool; +	struct ethtool_cmd ecmd; +	u32 slave_speed;  	int res; -	/* Fake speed and duplex */ -	slave->speed = SPEED_100; -	slave->duplex = DUPLEX_FULL; +	slave->speed = SPEED_UNKNOWN; +	slave->duplex = DUPLEX_UNKNOWN; -	if (!slave_dev->ethtool_ops || !slave_dev->ethtool_ops->get_settings) -		return -1; - -	res = slave_dev->ethtool_ops->get_settings(slave_dev, &etool); +	res = __ethtool_get_settings(slave_dev, &ecmd);  	if (res < 0) -		return -1; +		return; -	switch (etool.speed) { -	case SPEED_10: -	case SPEED_100: -	case SPEED_1000: -	case SPEED_10000: -		break; -	default: -		return -1; -	} +	slave_speed = ethtool_cmd_speed(&ecmd); +	if (slave_speed == 0 || slave_speed == ((__u32) -1)) +		return; -	switch (etool.duplex) { +	switch (ecmd.duplex) {  	case DUPLEX_FULL:  	case DUPLEX_HALF:  		break;  	default: -		return -1; +		return;  	} -	slave->speed = etool.speed; -	slave->duplex = etool.duplex; +	slave->speed = slave_speed; +	slave->duplex = ecmd.duplex; -	return 0; +	return; +} + +const char *bond_slave_link_status(s8 link) +{ +	switch (link) { +	case BOND_LINK_UP: +		return "up"; +	case BOND_LINK_FAIL: +		return "going down"; +	case BOND_LINK_DOWN: +		return "down"; +	case BOND_LINK_BACK: +		return "going back"; +	default: +		return "unknown"; +	}  }  /* @@ -737,15 +449,9 @@ static int bond_check_dev_link(struct bonding *bond,  		return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0;  	/* Try to get link status using Ethtool first. */ -	if (slave_dev->ethtool_ops) { -		if (slave_dev->ethtool_ops->get_link) { -			u32 link; - -			link = slave_dev->ethtool_ops->get_link(slave_dev); - -			return link ? BMSR_LSTATUS : 0; -		} -	} +	if (slave_dev->ethtool_ops->get_link) +		return slave_dev->ethtool_ops->get_link(slave_dev) ? +			BMSR_LSTATUS : 0;  	/* Ethtool can't be used, fallback to MII ioctls. */  	ioctl = slave_ops->ndo_do_ioctl; @@ -788,8 +494,10 @@ static int bond_check_dev_link(struct bonding *bond,   */  static int bond_set_promiscuity(struct bonding *bond, int inc)  { +	struct list_head *iter;  	int err = 0; -	if (USES_PRIMARY(bond->params.mode)) { + +	if (bond_uses_primary(bond)) {  		/* write lock already acquired */  		if (bond->curr_active_slave) {  			err = dev_set_promiscuity(bond->curr_active_slave->dev, @@ -797,8 +505,8 @@ static int bond_set_promiscuity(struct bonding *bond, int inc)  		}  	} else {  		struct slave *slave; -		int i; -		bond_for_each_slave(bond, slave, i) { + +		bond_for_each_slave(bond, slave, iter) {  			err = dev_set_promiscuity(slave->dev, inc);  			if (err)  				return err; @@ -812,8 +520,10 @@ static int bond_set_promiscuity(struct bonding *bond, int inc)   */  static int bond_set_allmulti(struct bonding *bond, int inc)  { +	struct list_head *iter;  	int err = 0; -	if (USES_PRIMARY(bond->params.mode)) { + +	if (bond_uses_primary(bond)) {  		/* write lock already acquired */  		if (bond->curr_active_slave) {  			err = dev_set_allmulti(bond->curr_active_slave->dev, @@ -821,8 +531,8 @@ static int bond_set_allmulti(struct bonding *bond, int inc)  		}  	} else {  		struct slave *slave; -		int i; -		bond_for_each_slave(bond, slave, i) { + +		bond_for_each_slave(bond, slave, iter) {  			err = dev_set_allmulti(slave->dev, inc);  			if (err)  				return err; @@ -832,106 +542,39 @@ static int bond_set_allmulti(struct bonding *bond, int inc)  }  /* - * Add a Multicast address to slaves - * according to mode - */ -static void bond_mc_add(struct bonding *bond, void *addr) -{ -	if (USES_PRIMARY(bond->params.mode)) { -		/* write lock already acquired */ -		if (bond->curr_active_slave) -			dev_mc_add(bond->curr_active_slave->dev, addr); -	} else { -		struct slave *slave; -		int i; - -		bond_for_each_slave(bond, slave, i) -			dev_mc_add(slave->dev, addr); -	} -} - -/* - * Remove a multicast address from slave - * according to mode - */ -static void bond_mc_del(struct bonding *bond, void *addr) -{ -	if (USES_PRIMARY(bond->params.mode)) { -		/* write lock already acquired */ -		if (bond->curr_active_slave) -			dev_mc_del(bond->curr_active_slave->dev, addr); -	} else { -		struct slave *slave; -		int i; -		bond_for_each_slave(bond, slave, i) { -			dev_mc_del(slave->dev, addr); -		} -	} -} - - -static void __bond_resend_igmp_join_requests(struct net_device *dev) -{ -	struct in_device *in_dev; - -	rcu_read_lock(); -	in_dev = __in_dev_get_rcu(dev); -	if (in_dev) -		ip_mc_rejoin_groups(in_dev); -	rcu_read_unlock(); -} - -/*   * Retrieve the list of registered multicast addresses for the bonding   * device and retransmit an IGMP JOIN request to the current active   * slave.   */ -static void bond_resend_igmp_join_requests(struct bonding *bond) +static void bond_resend_igmp_join_requests_delayed(struct work_struct *work)  { -	struct net_device *vlan_dev; -	struct vlan_entry *vlan; - -	read_lock(&bond->lock); - -	/* rejoin all groups on bond device */ -	__bond_resend_igmp_join_requests(bond->dev); +	struct bonding *bond = container_of(work, struct bonding, +					    mcast_work.work); -	/* rejoin all groups on vlan devices */ -	if (bond->vlgrp) { -		list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { -			vlan_dev = vlan_group_get_device(bond->vlgrp, -							 vlan->vlan_id); -			if (vlan_dev) -				__bond_resend_igmp_join_requests(vlan_dev); -		} +	if (!rtnl_trylock()) { +		queue_delayed_work(bond->wq, &bond->mcast_work, 1); +		return;  	} +	call_netdevice_notifiers(NETDEV_RESEND_IGMP, bond->dev); -	if (--bond->igmp_retrans > 0) +	if (bond->igmp_retrans > 1) { +		bond->igmp_retrans--;  		queue_delayed_work(bond->wq, &bond->mcast_work, HZ/5); - -	read_unlock(&bond->lock); -} - -static void bond_resend_igmp_join_requests_delayed(struct work_struct *work) -{ -	struct bonding *bond = container_of(work, struct bonding, -							mcast_work.work); -	bond_resend_igmp_join_requests(bond); +	} +	rtnl_unlock();  } -/* - * flush all members of flush->mc_list from device dev->mc_list +/* Flush bond's hardware addresses from slave   */ -static void bond_mc_list_flush(struct net_device *bond_dev, +static void bond_hw_addr_flush(struct net_device *bond_dev,  			       struct net_device *slave_dev)  {  	struct bonding *bond = netdev_priv(bond_dev); -	struct netdev_hw_addr *ha; -	netdev_for_each_mc_addr(ha, bond_dev) -		dev_mc_del(slave_dev, ha->addr); +	dev_uc_unsync(slave_dev, bond_dev); +	dev_mc_unsync(slave_dev, bond_dev); -	if (bond->params.mode == BOND_MODE_8023AD) { +	if (BOND_MODE(bond) == BOND_MODE_8023AD) {  		/* del lacpdu mc addr from mc list */  		u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; @@ -941,21 +584,15 @@ static void bond_mc_list_flush(struct net_device *bond_dev,  /*--------------------------- Active slave change ---------------------------*/ -/* - * Update the mc list and multicast-related flags for the new and - * old active slaves (if any) according to the multicast mode, and - * promiscuous flags unconditionally. +/* Update the hardware address list and promisc/allmulti for the new and + * old active slaves (if any).  Modes that are not using primary keep all + * slaves up date at all times; only the modes that use primary need to call + * this function to swap these settings during a failover.   */ -static void bond_mc_swap(struct bonding *bond, struct slave *new_active, -			 struct slave *old_active) +static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active, +			      struct slave *old_active)  { -	struct netdev_hw_addr *ha; - -	if (!USES_PRIMARY(bond->params.mode)) -		/* nothing to do -  mc list is already up-to-date on -		 * all slaves -		 */ -		return; +	ASSERT_RTNL();  	if (old_active) {  		if (bond->dev->flags & IFF_PROMISC) @@ -964,8 +601,7 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active,  		if (bond->dev->flags & IFF_ALLMULTI)  			dev_set_allmulti(old_active->dev, -1); -		netdev_for_each_mc_addr(ha, bond->dev) -			dev_mc_del(old_active->dev, ha->addr); +		bond_hw_addr_flush(bond->dev, old_active->dev);  	}  	if (new_active) { @@ -976,24 +612,41 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active,  		if (bond->dev->flags & IFF_ALLMULTI)  			dev_set_allmulti(new_active->dev, 1); -		netdev_for_each_mc_addr(ha, bond->dev) -			dev_mc_add(new_active->dev, ha->addr); +		netif_addr_lock_bh(bond->dev); +		dev_uc_sync(new_active->dev, bond->dev); +		dev_mc_sync(new_active->dev, bond->dev); +		netif_addr_unlock_bh(bond->dev);  	}  } +/** + * bond_set_dev_addr - clone slave's address to bond + * @bond_dev: bond net device + * @slave_dev: slave net device + * + * Should be called with RTNL held. + */ +static void bond_set_dev_addr(struct net_device *bond_dev, +			      struct net_device *slave_dev) +{ +	pr_debug("bond_dev=%p slave_dev=%p slave_dev->addr_len=%d\n", +		 bond_dev, slave_dev, slave_dev->addr_len); +	memcpy(bond_dev->dev_addr, slave_dev->dev_addr, slave_dev->addr_len); +	bond_dev->addr_assign_type = NET_ADDR_STOLEN; +	call_netdevice_notifiers(NETDEV_CHANGEADDR, bond_dev); +} +  /*   * bond_do_fail_over_mac   *   * Perform special MAC address swapping for fail_over_mac settings   * - * Called with RTNL, bond->lock for read, curr_slave_lock for write_bh. + * Called with RTNL, curr_slave_lock for write_bh.   */  static void bond_do_fail_over_mac(struct bonding *bond,  				  struct slave *new_active,  				  struct slave *old_active)  	__releases(&bond->curr_slave_lock) -	__releases(&bond->lock) -	__acquires(&bond->lock)  	__acquires(&bond->curr_slave_lock)  {  	u8 tmp_mac[ETH_ALEN]; @@ -1002,9 +655,11 @@ static void bond_do_fail_over_mac(struct bonding *bond,  	switch (bond->params.fail_over_mac) {  	case BOND_FOM_ACTIVE: -		if (new_active) -			memcpy(bond->dev->dev_addr,  new_active->dev->dev_addr, -			       new_active->dev->addr_len); +		if (new_active) { +			write_unlock_bh(&bond->curr_slave_lock); +			bond_set_dev_addr(bond->dev, new_active->dev); +			write_lock_bh(&bond->curr_slave_lock); +		}  		break;  	case BOND_FOM_FOLLOW:  		/* @@ -1016,15 +671,14 @@ static void bond_do_fail_over_mac(struct bonding *bond,  			return;  		write_unlock_bh(&bond->curr_slave_lock); -		read_unlock(&bond->lock);  		if (old_active) { -			memcpy(tmp_mac, new_active->dev->dev_addr, ETH_ALEN); -			memcpy(saddr.sa_data, old_active->dev->dev_addr, -			       ETH_ALEN); +			ether_addr_copy(tmp_mac, new_active->dev->dev_addr); +			ether_addr_copy(saddr.sa_data, +					old_active->dev->dev_addr);  			saddr.sa_family = new_active->dev->type;  		} else { -			memcpy(saddr.sa_data, bond->dev->dev_addr, ETH_ALEN); +			ether_addr_copy(saddr.sa_data, bond->dev->dev_addr);  			saddr.sa_family = bond->dev->type;  		} @@ -1038,7 +692,7 @@ static void bond_do_fail_over_mac(struct bonding *bond,  		if (!old_active)  			goto out; -		memcpy(saddr.sa_data, tmp_mac, ETH_ALEN); +		ether_addr_copy(saddr.sa_data, tmp_mac);  		saddr.sa_family = old_active->dev->type;  		rv = dev_set_mac_address(old_active->dev, &saddr); @@ -1046,7 +700,6 @@ static void bond_do_fail_over_mac(struct bonding *bond,  			pr_err("%s: Error %d setting MAC of slave %s\n",  			       bond->dev->name, -rv, new_active->dev->name);  out: -		read_lock(&bond->lock);  		write_lock_bh(&bond->curr_slave_lock);  		break;  	default: @@ -1080,48 +733,46 @@ static bool bond_should_change_active(struct bonding *bond)  /**   * find_best_interface - select the best available slave to be the active one   * @bond: our bonding struct - * - * Warning: Caller must hold curr_slave_lock for writing.   */  static struct slave *bond_find_best_slave(struct bonding *bond)  { -	struct slave *new_active, *old_active; -	struct slave *bestslave = NULL; +	struct slave *slave, *bestslave = NULL; +	struct list_head *iter;  	int mintime = bond->params.updelay; -	int i; -	new_active = bond->curr_active_slave; +	if (bond->primary_slave && bond->primary_slave->link == BOND_LINK_UP && +	    bond_should_change_active(bond)) +		return bond->primary_slave; -	if (!new_active) { /* there were no active slaves left */ -		if (bond->slave_cnt > 0)   /* found one slave */ -			new_active = bond->first_slave; -		else -			return NULL; /* still no slave, return NULL */ +	bond_for_each_slave(bond, slave, iter) { +		if (slave->link == BOND_LINK_UP) +			return slave; +		if (slave->link == BOND_LINK_BACK && bond_slave_is_up(slave) && +		    slave->delay < mintime) { +			mintime = slave->delay; +			bestslave = slave; +		}  	} -	if ((bond->primary_slave) && -	    bond->primary_slave->link == BOND_LINK_UP && -	    bond_should_change_active(bond)) { -		new_active = bond->primary_slave; -	} +	return bestslave; +} -	/* remember where to stop iterating over the slaves */ -	old_active = new_active; +static bool bond_should_notify_peers(struct bonding *bond) +{ +	struct slave *slave; -	bond_for_each_slave_from(bond, new_active, i, old_active) { -		if (new_active->link == BOND_LINK_UP) { -			return new_active; -		} else if (new_active->link == BOND_LINK_BACK && -			   IS_UP(new_active->dev)) { -			/* link up, but waiting for stabilization */ -			if (new_active->delay < mintime) { -				mintime = new_active->delay; -				bestslave = new_active; -			} -		} -	} +	rcu_read_lock(); +	slave = rcu_dereference(bond->curr_active_slave); +	rcu_read_unlock(); -	return bestslave; +	pr_debug("bond_should_notify_peers: bond %s slave %s\n", +		 bond->dev->name, slave ? slave->dev->name : "NULL"); + +	if (!slave || !bond->send_peer_notif || +	    test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state)) +		return false; + +	return true;  }  /** @@ -1137,8 +788,7 @@ static struct slave *bond_find_best_slave(struct bonding *bond)   * because it is apparently the best available slave we have, even though its   * updelay hasn't timed out yet.   * - * If new_active is not NULL, caller must hold bond->lock for read and - * curr_slave_lock for write_bh. + * If new_active is not NULL, caller must hold curr_slave_lock for write_bh.   */  void bond_change_active_slave(struct bonding *bond, struct slave *new_active)  { @@ -1148,11 +798,11 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)  		return;  	if (new_active) { -		new_active->jiffies = jiffies; +		new_active->last_link_up = jiffies;  		if (new_active->link == BOND_LINK_BACK) { -			if (USES_PRIMARY(bond->params.mode)) { -				pr_info("%s: making interface %s the new active one %d ms earlier.\n", +			if (bond_uses_primary(bond)) { +				pr_info("%s: making interface %s the new active one %d ms earlier\n",  					bond->dev->name, new_active->dev->name,  					(bond->params.updelay - new_active->delay) * bond->params.miimon);  			} @@ -1160,65 +810,76 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)  			new_active->delay = 0;  			new_active->link = BOND_LINK_UP; -			if (bond->params.mode == BOND_MODE_8023AD) +			if (BOND_MODE(bond) == BOND_MODE_8023AD)  				bond_3ad_handle_link_change(new_active, BOND_LINK_UP);  			if (bond_is_lb(bond))  				bond_alb_handle_link_change(bond, new_active, BOND_LINK_UP);  		} else { -			if (USES_PRIMARY(bond->params.mode)) { -				pr_info("%s: making interface %s the new active one.\n", +			if (bond_uses_primary(bond)) { +				pr_info("%s: making interface %s the new active one\n",  					bond->dev->name, new_active->dev->name);  			}  		}  	} -	if (USES_PRIMARY(bond->params.mode)) -		bond_mc_swap(bond, new_active, old_active); +	if (bond_uses_primary(bond)) +		bond_hw_addr_swap(bond, new_active, old_active);  	if (bond_is_lb(bond)) {  		bond_alb_handle_active_change(bond, new_active);  		if (old_active) -			bond_set_slave_inactive_flags(old_active); +			bond_set_slave_inactive_flags(old_active, +						      BOND_SLAVE_NOTIFY_NOW);  		if (new_active) -			bond_set_slave_active_flags(new_active); +			bond_set_slave_active_flags(new_active, +						    BOND_SLAVE_NOTIFY_NOW);  	} else { -		bond->curr_active_slave = new_active; +		rcu_assign_pointer(bond->curr_active_slave, new_active);  	} -	if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) { +	if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) {  		if (old_active) -			bond_set_slave_inactive_flags(old_active); +			bond_set_slave_inactive_flags(old_active, +						      BOND_SLAVE_NOTIFY_NOW);  		if (new_active) { -			bond_set_slave_active_flags(new_active); +			bool should_notify_peers = false; + +			bond_set_slave_active_flags(new_active, +						    BOND_SLAVE_NOTIFY_NOW);  			if (bond->params.fail_over_mac)  				bond_do_fail_over_mac(bond, new_active,  						      old_active); -			bond->send_grat_arp = bond->params.num_grat_arp; -			bond_send_gratuitous_arp(bond); - -			bond->send_unsol_na = bond->params.num_unsol_na; -			bond_send_unsolicited_na(bond); +			if (netif_running(bond->dev)) { +				bond->send_peer_notif = +					bond->params.num_peer_notif; +				should_notify_peers = +					bond_should_notify_peers(bond); +			}  			write_unlock_bh(&bond->curr_slave_lock); -			read_unlock(&bond->lock); -			netdev_bonding_change(bond->dev, NETDEV_BONDING_FAILOVER); +			call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev); +			if (should_notify_peers) +				call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, +							 bond->dev); -			read_lock(&bond->lock);  			write_lock_bh(&bond->curr_slave_lock);  		}  	}  	/* resend IGMP joins since active slave has changed or -	 * all were sent on curr_active_slave */ -	if ((USES_PRIMARY(bond->params.mode) && new_active) || -	    bond->params.mode == BOND_MODE_ROUNDROBIN) { +	 * all were sent on curr_active_slave. +	 * resend only if bond is brought up with the affected +	 * bonding modes and the retransmission is enabled */ +	if (netif_running(bond->dev) && (bond->params.resend_igmp > 0) && +	    ((bond_uses_primary(bond) && new_active) || +	     BOND_MODE(bond) == BOND_MODE_ROUNDROBIN)) {  		bond->igmp_retrans = bond->params.resend_igmp; -		queue_delayed_work(bond->wq, &bond->mcast_work, 0); +		queue_delayed_work(bond->wq, &bond->mcast_work, 1);  	}  } @@ -1231,7 +892,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)   * - The primary_slave has got its link back.   * - A slave has got its link back and there's no old curr_active_slave.   * - * Caller must hold bond->lock for read and curr_slave_lock for write_bh. + * Caller must hold curr_slave_lock for write_bh.   */  void bond_select_active_slave(struct bonding *bond)  { @@ -1249,191 +910,171 @@ void bond_select_active_slave(struct bonding *bond)  			pr_info("%s: first active interface up!\n",  				bond->dev->name);  		} else { -			pr_info("%s: now running without any active interface !\n", +			pr_info("%s: now running without any active interface!\n",  				bond->dev->name);  		}  	}  } -/*--------------------------- slave list handling ---------------------------*/ - -/* - * This function attaches the slave to the end of list. - * - * bond->lock held for writing by caller. - */ -static void bond_attach_slave(struct bonding *bond, struct slave *new_slave) +#ifdef CONFIG_NET_POLL_CONTROLLER +static inline int slave_enable_netpoll(struct slave *slave)  { -	if (bond->first_slave == NULL) { /* attaching the first slave */ -		new_slave->next = new_slave; -		new_slave->prev = new_slave; -		bond->first_slave = new_slave; -	} else { -		new_slave->next = bond->first_slave; -		new_slave->prev = bond->first_slave->prev; -		new_slave->next->prev = new_slave; -		new_slave->prev->next = new_slave; -	} +	struct netpoll *np; +	int err = 0; -	bond->slave_cnt++; -} +	np = kzalloc(sizeof(*np), GFP_KERNEL); +	err = -ENOMEM; +	if (!np) +		goto out; -/* - * This function detaches the slave from the list. - * WARNING: no check is made to verify if the slave effectively - * belongs to <bond>. - * Nothing is freed on return, structures are just unchained. - * If any slave pointer in bond was pointing to <slave>, - * it should be changed by the calling function. - * - * bond->lock held for writing by caller. - */ -static void bond_detach_slave(struct bonding *bond, struct slave *slave) +	err = __netpoll_setup(np, slave->dev); +	if (err) { +		kfree(np); +		goto out; +	} +	slave->np = np; +out: +	return err; +} +static inline void slave_disable_netpoll(struct slave *slave)  { -	if (slave->next) -		slave->next->prev = slave->prev; +	struct netpoll *np = slave->np; -	if (slave->prev) -		slave->prev->next = slave->next; - -	if (bond->first_slave == slave) { /* slave is the first slave */ -		if (bond->slave_cnt > 1) { /* there are more slave */ -			bond->first_slave = slave->next; -		} else { -			bond->first_slave = NULL; /* slave was the last one */ -		} -	} +	if (!np) +		return; -	slave->next = NULL; -	slave->prev = NULL; -	bond->slave_cnt--; +	slave->np = NULL; +	__netpoll_free_async(np);  } -#ifdef CONFIG_NET_POLL_CONTROLLER -/* - * You must hold read lock on bond->lock before calling this. - */ -static bool slaves_support_netpoll(struct net_device *bond_dev) +static void bond_poll_controller(struct net_device *bond_dev)  { -	struct bonding *bond = netdev_priv(bond_dev); -	struct slave *slave; -	int i = 0; -	bool ret = true; - -	bond_for_each_slave(bond, slave, i) { -		if ((slave->dev->priv_flags & IFF_DISABLE_NETPOLL) || -		    !slave->dev->netdev_ops->ndo_poll_controller) -			ret = false; -	} -	return i != 0 && ret;  } -static void bond_poll_controller(struct net_device *bond_dev) +static void bond_netpoll_cleanup(struct net_device *bond_dev)  {  	struct bonding *bond = netdev_priv(bond_dev); +	struct list_head *iter;  	struct slave *slave; -	int i; -	bond_for_each_slave(bond, slave, i) { -		if (slave->dev && IS_UP(slave->dev)) -			netpoll_poll_dev(slave->dev); -	} +	bond_for_each_slave(bond, slave, iter) +		if (bond_slave_is_up(slave)) +			slave_disable_netpoll(slave);  } -static void bond_netpoll_cleanup(struct net_device *bond_dev) +static int bond_netpoll_setup(struct net_device *dev, struct netpoll_info *ni)  { -	struct bonding *bond = netdev_priv(bond_dev); +	struct bonding *bond = netdev_priv(dev); +	struct list_head *iter;  	struct slave *slave; -	const struct net_device_ops *ops; -	int i; +	int err = 0; -	read_lock(&bond->lock); -	bond_dev->npinfo = NULL; -	bond_for_each_slave(bond, slave, i) { -		if (slave->dev) { -			ops = slave->dev->netdev_ops; -			if (ops->ndo_netpoll_cleanup) -				ops->ndo_netpoll_cleanup(slave->dev); -			else -				slave->dev->npinfo = NULL; +	bond_for_each_slave(bond, slave, iter) { +		err = slave_enable_netpoll(slave); +		if (err) { +			bond_netpoll_cleanup(dev); +			break;  		}  	} -	read_unlock(&bond->lock); +	return err;  } -  #else - +static inline int slave_enable_netpoll(struct slave *slave) +{ +	return 0; +} +static inline void slave_disable_netpoll(struct slave *slave) +{ +}  static void bond_netpoll_cleanup(struct net_device *bond_dev)  {  } -  #endif  /*---------------------------------- IOCTL ----------------------------------*/ -static int bond_sethwaddr(struct net_device *bond_dev, -			  struct net_device *slave_dev) +static netdev_features_t bond_fix_features(struct net_device *dev, +					   netdev_features_t features)  { -	pr_debug("bond_dev=%p\n", bond_dev); -	pr_debug("slave_dev=%p\n", slave_dev); -	pr_debug("slave_dev->addr_len=%d\n", slave_dev->addr_len); -	memcpy(bond_dev->dev_addr, slave_dev->dev_addr, slave_dev->addr_len); -	return 0; +	struct bonding *bond = netdev_priv(dev); +	struct list_head *iter; +	netdev_features_t mask; +	struct slave *slave; + +	if (!bond_has_slaves(bond)) { +		/* Disable adding VLANs to empty bond. But why? --mq */ +		features |= NETIF_F_VLAN_CHALLENGED; +		return features; +	} + +	mask = features; +	features &= ~NETIF_F_ONE_FOR_ALL; +	features |= NETIF_F_ALL_FOR_ALL; + +	bond_for_each_slave(bond, slave, iter) { +		features = netdev_increment_features(features, +						     slave->dev->features, +						     mask); +	} +	features = netdev_add_tso_features(features, mask); + +	return features;  } -#define BOND_VLAN_FEATURES \ -	(NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX | \ -	 NETIF_F_HW_VLAN_FILTER) +#define BOND_VLAN_FEATURES	(NETIF_F_ALL_CSUM | NETIF_F_SG | \ +				 NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \ +				 NETIF_F_HIGHDMA | NETIF_F_LRO) -/* - * Compute the common dev->feature set available to all slaves.  Some - * feature bits are managed elsewhere, so preserve those feature bits - * on the master device. - */ -static int bond_compute_features(struct bonding *bond) +#define BOND_ENC_FEATURES	(NETIF_F_ALL_CSUM | NETIF_F_SG | NETIF_F_RXCSUM |\ +				 NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL) + +static void bond_compute_features(struct bonding *bond)  { -	struct slave *slave; +	unsigned int flags, dst_release_flag = IFF_XMIT_DST_RELEASE; +	netdev_features_t vlan_features = BOND_VLAN_FEATURES; +	netdev_features_t enc_features  = BOND_ENC_FEATURES;  	struct net_device *bond_dev = bond->dev; -	unsigned long features = bond_dev->features; -	unsigned long vlan_features = 0; -	unsigned short max_hard_header_len = max((u16)ETH_HLEN, -						bond_dev->hard_header_len); -	int i; - -	features &= ~(NETIF_F_ALL_CSUM | BOND_VLAN_FEATURES); -	features |=  NETIF_F_GSO_MASK | NETIF_F_NO_CSUM; +	struct list_head *iter; +	struct slave *slave; +	unsigned short max_hard_header_len = ETH_HLEN; +	unsigned int gso_max_size = GSO_MAX_SIZE; +	u16 gso_max_segs = GSO_MAX_SEGS; -	if (!bond->first_slave) +	if (!bond_has_slaves(bond))  		goto done; +	vlan_features &= NETIF_F_ALL_FOR_ALL; -	features &= ~NETIF_F_ONE_FOR_ALL; - -	vlan_features = bond->first_slave->dev->vlan_features; -	bond_for_each_slave(bond, slave, i) { -		features = netdev_increment_features(features, -						     slave->dev->features, -						     NETIF_F_ONE_FOR_ALL); +	bond_for_each_slave(bond, slave, iter) {  		vlan_features = netdev_increment_features(vlan_features, -							slave->dev->vlan_features, -							NETIF_F_ONE_FOR_ALL); +			slave->dev->vlan_features, BOND_VLAN_FEATURES); + +		enc_features = netdev_increment_features(enc_features, +							 slave->dev->hw_enc_features, +							 BOND_ENC_FEATURES); +		dst_release_flag &= slave->dev->priv_flags;  		if (slave->dev->hard_header_len > max_hard_header_len)  			max_hard_header_len = slave->dev->hard_header_len; + +		gso_max_size = min(gso_max_size, slave->dev->gso_max_size); +		gso_max_segs = min(gso_max_segs, slave->dev->gso_max_segs);  	}  done: -	features |= (bond_dev->features & BOND_VLAN_FEATURES); -	bond_dev->features = netdev_fix_features(features, NULL); -	bond_dev->vlan_features = netdev_fix_features(vlan_features, NULL); +	bond_dev->vlan_features = vlan_features; +	bond_dev->hw_enc_features = enc_features;  	bond_dev->hard_header_len = max_hard_header_len; +	bond_dev->gso_max_segs = gso_max_segs; +	netif_set_gso_max_size(bond_dev, gso_max_size); -	return 0; +	flags = bond_dev->priv_flags & ~IFF_XMIT_DST_RELEASE; +	bond_dev->priv_flags = flags | dst_release_flag; + +	netdev_change_features(bond_dev);  }  static void bond_setup_by_slave(struct net_device *bond_dev,  				struct net_device *slave_dev)  { -	struct bonding *bond = netdev_priv(bond_dev); -  	bond_dev->header_ops	    = slave_dev->header_ops;  	bond_dev->type		    = slave_dev->type; @@ -1442,7 +1083,122 @@ static void bond_setup_by_slave(struct net_device *bond_dev,  	memcpy(bond_dev->broadcast, slave_dev->broadcast,  		slave_dev->addr_len); -	bond->setup_by_slave = 1; +} + +/* On bonding slaves other than the currently active slave, suppress + * duplicates except for alb non-mcast/bcast. + */ +static bool bond_should_deliver_exact_match(struct sk_buff *skb, +					    struct slave *slave, +					    struct bonding *bond) +{ +	if (bond_is_slave_inactive(slave)) { +		if (BOND_MODE(bond) == BOND_MODE_ALB && +		    skb->pkt_type != PACKET_BROADCAST && +		    skb->pkt_type != PACKET_MULTICAST) +			return false; +		return true; +	} +	return false; +} + +static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb) +{ +	struct sk_buff *skb = *pskb; +	struct slave *slave; +	struct bonding *bond; +	int (*recv_probe)(const struct sk_buff *, struct bonding *, +			  struct slave *); +	int ret = RX_HANDLER_ANOTHER; + +	skb = skb_share_check(skb, GFP_ATOMIC); +	if (unlikely(!skb)) +		return RX_HANDLER_CONSUMED; + +	*pskb = skb; + +	slave = bond_slave_get_rcu(skb->dev); +	bond = slave->bond; + +	recv_probe = ACCESS_ONCE(bond->recv_probe); +	if (recv_probe) { +		ret = recv_probe(skb, bond, slave); +		if (ret == RX_HANDLER_CONSUMED) { +			consume_skb(skb); +			return ret; +		} +	} + +	if (bond_should_deliver_exact_match(skb, slave, bond)) { +		return RX_HANDLER_EXACT; +	} + +	skb->dev = bond->dev; + +	if (BOND_MODE(bond) == BOND_MODE_ALB && +	    bond->dev->priv_flags & IFF_BRIDGE_PORT && +	    skb->pkt_type == PACKET_HOST) { + +		if (unlikely(skb_cow_head(skb, +					  skb->data - skb_mac_header(skb)))) { +			kfree_skb(skb); +			return RX_HANDLER_CONSUMED; +		} +		ether_addr_copy(eth_hdr(skb)->h_dest, bond->dev->dev_addr); +	} + +	return ret; +} + +static int bond_master_upper_dev_link(struct net_device *bond_dev, +				      struct net_device *slave_dev, +				      struct slave *slave) +{ +	int err; + +	err = netdev_master_upper_dev_link_private(slave_dev, bond_dev, slave); +	if (err) +		return err; +	slave_dev->flags |= IFF_SLAVE; +	rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL); +	return 0; +} + +static void bond_upper_dev_unlink(struct net_device *bond_dev, +				  struct net_device *slave_dev) +{ +	netdev_upper_dev_unlink(slave_dev, bond_dev); +	slave_dev->flags &= ~IFF_SLAVE; +	rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL); +} + +static struct slave *bond_alloc_slave(struct bonding *bond) +{ +	struct slave *slave = NULL; + +	slave = kzalloc(sizeof(struct slave), GFP_KERNEL); +	if (!slave) +		return NULL; + +	if (BOND_MODE(bond) == BOND_MODE_8023AD) { +		SLAVE_AD_INFO(slave) = kzalloc(sizeof(struct ad_slave_info), +					       GFP_KERNEL); +		if (!SLAVE_AD_INFO(slave)) { +			kfree(slave); +			return NULL; +		} +	} +	return slave; +} + +static void bond_free_slave(struct slave *slave) +{ +	struct bonding *bond = bond_get_bond_by_slave(slave); + +	if (BOND_MODE(bond) == BOND_MODE_8023AD) +		kfree(SLAVE_AD_INFO(slave)); + +	kfree(slave);  }  /* enslave device <slave> to bond device <master> */ @@ -1450,53 +1206,44 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  {  	struct bonding *bond = netdev_priv(bond_dev);  	const struct net_device_ops *slave_ops = slave_dev->netdev_ops; -	struct slave *new_slave = NULL; -	struct netdev_hw_addr *ha; +	struct slave *new_slave = NULL, *prev_slave;  	struct sockaddr addr;  	int link_reporting; -	int old_features = bond_dev->features; -	int res = 0; +	int res = 0, i; -	if (!bond->params.use_carrier && slave_dev->ethtool_ops == NULL && -		slave_ops->ndo_do_ioctl == NULL) { -		pr_warning("%s: Warning: no link monitoring support for %s\n", -			   bond_dev->name, slave_dev->name); -	} - -	/* bond must be initialized by bond_open() before enslaving */ -	if (!(bond_dev->flags & IFF_UP)) { -		pr_warning("%s: master_dev is not up in bond_enslave\n", -			   bond_dev->name); +	if (!bond->params.use_carrier && +	    slave_dev->ethtool_ops->get_link == NULL && +	    slave_ops->ndo_do_ioctl == NULL) { +		pr_warn("%s: Warning: no link monitoring support for %s\n", +			bond_dev->name, slave_dev->name);  	}  	/* already enslaved */  	if (slave_dev->flags & IFF_SLAVE) { -		pr_debug("Error, Device was already enslaved\n"); +		pr_debug("Error: Device was already enslaved\n");  		return -EBUSY;  	} +	if (bond_dev == slave_dev) { +		pr_err("%s: cannot enslave bond to itself.\n", bond_dev->name); +		return -EPERM; +	} +  	/* vlan challenged mutual exclusion */  	/* no need to lock since we're protected by rtnl_lock */  	if (slave_dev->features & NETIF_F_VLAN_CHALLENGED) {  		pr_debug("%s: NETIF_F_VLAN_CHALLENGED\n", slave_dev->name); -		if (bond->vlgrp) { +		if (vlan_uses_dev(bond_dev)) {  			pr_err("%s: Error: cannot enslave VLAN challenged slave %s on VLAN enabled bond %s\n",  			       bond_dev->name, slave_dev->name, bond_dev->name);  			return -EPERM;  		} else { -			pr_warning("%s: Warning: enslaved VLAN challenged slave %s. Adding VLANs will be blocked as long as %s is part of bond %s\n", -				   bond_dev->name, slave_dev->name, -				   slave_dev->name, bond_dev->name); -			bond_dev->features |= NETIF_F_VLAN_CHALLENGED; +			pr_warn("%s: Warning: enslaved VLAN challenged slave %s. Adding VLANs will be blocked as long as %s is part of bond %s\n", +				bond_dev->name, slave_dev->name, +				slave_dev->name, bond_dev->name);  		}  	} else {  		pr_debug("%s: ! NETIF_F_VLAN_CHALLENGED\n", slave_dev->name); -		if (bond->slave_cnt == 0) { -			/* First slave, and it is not VLAN challenged, -			 * so remove the block of adding VLANs over the bond. -			 */ -			bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED; -		}  	}  	/* @@ -1506,7 +1253,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  	 * enslaving it; the old ifenslave will not.  	 */  	if ((slave_dev->flags & IFF_UP)) { -		pr_err("%s is up. This may be due to an out of date ifenslave.\n", +		pr_err("%s is up - this may be due to an out of date ifenslave\n",  		       slave_dev->name);  		res = -EPERM;  		goto err_undo_flags; @@ -1519,14 +1266,14 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  	 * bond ether type mutual exclusion - don't allow slaves of dissimilar  	 * ether type (eg ARPHRD_ETHER and ARPHRD_INFINIBAND) share the same bond  	 */ -	if (bond->slave_cnt == 0) { +	if (!bond_has_slaves(bond)) {  		if (bond_dev->type != slave_dev->type) {  			pr_debug("%s: change device type from %d to %d\n",  				 bond_dev->name,  				 bond_dev->type, slave_dev->type); -			res = netdev_bonding_change(bond_dev, -						    NETDEV_PRE_TYPE_CHANGE); +			res = call_netdevice_notifiers(NETDEV_PRE_TYPE_CHANGE, +						       bond_dev);  			res = notifier_to_errno(res);  			if (res) {  				pr_err("%s: refused to change device type\n", @@ -1541,46 +1288,54 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  			if (slave_dev->type != ARPHRD_ETHER)  				bond_setup_by_slave(bond_dev, slave_dev); -			else +			else {  				ether_setup(bond_dev); +				bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING; +			} -			netdev_bonding_change(bond_dev, -					      NETDEV_POST_TYPE_CHANGE); +			call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE, +						 bond_dev);  		}  	} else if (bond_dev->type != slave_dev->type) { -		pr_err("%s ether type (%d) is different from other slaves (%d), can not enslave it.\n", -		       slave_dev->name, -		       slave_dev->type, bond_dev->type); +		pr_err("%s ether type (%d) is different from other slaves (%d), can not enslave it\n", +		       slave_dev->name, slave_dev->type, bond_dev->type);  		res = -EINVAL;  		goto err_undo_flags;  	}  	if (slave_ops->ndo_set_mac_address == NULL) { -		if (bond->slave_cnt == 0) { -			pr_warning("%s: Warning: The first slave device specified does not support setting the MAC address. Setting fail_over_mac to active.", -				   bond_dev->name); -			bond->params.fail_over_mac = BOND_FOM_ACTIVE; +		if (!bond_has_slaves(bond)) { +			pr_warn("%s: Warning: The first slave device specified does not support setting the MAC address\n", +				bond_dev->name); +			if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) { +				bond->params.fail_over_mac = BOND_FOM_ACTIVE; +				pr_warn("%s: Setting fail_over_mac to active for active-backup mode\n", +					bond_dev->name); +			}  		} else if (bond->params.fail_over_mac != BOND_FOM_ACTIVE) { -			pr_err("%s: Error: The slave device specified does not support setting the MAC address, but fail_over_mac is not set to active.\n", +			pr_err("%s: Error: The slave device specified does not support setting the MAC address, but fail_over_mac is not set to active\n",  			       bond_dev->name);  			res = -EOPNOTSUPP;  			goto err_undo_flags;  		}  	} +	call_netdevice_notifiers(NETDEV_JOIN, slave_dev); +  	/* If this is the first slave, then we need to set the master's hardware  	 * address to be the same as the slave's. */ -	if (bond->slave_cnt == 0) -		memcpy(bond->dev->dev_addr, slave_dev->dev_addr, -		       slave_dev->addr_len); +	if (!bond_has_slaves(bond) && +	    bond->dev->addr_assign_type == NET_ADDR_RANDOM) +		bond_set_dev_addr(bond->dev, slave_dev); - -	new_slave = kzalloc(sizeof(struct slave), GFP_KERNEL); +	new_slave = bond_alloc_slave(bond);  	if (!new_slave) {  		res = -ENOMEM;  		goto err_undo_flags;  	} +	new_slave->bond = bond; +	new_slave->dev = slave_dev;  	/*  	 * Set the new_slave's queue_id to be zero.  Queue ID mapping  	 * is set via sysfs or module option if desired. @@ -1600,9 +1355,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  	 * that need it, and for restoring it upon release, and then  	 * set it to the master's address  	 */ -	memcpy(new_slave->perm_hwaddr, slave_dev->dev_addr, ETH_ALEN); +	ether_addr_copy(new_slave->perm_hwaddr, slave_dev->dev_addr); -	if (!bond->params.fail_over_mac) { +	if (!bond->params.fail_over_mac || +	    BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {  		/*  		 * Set slave to master's mac address.  The application already  		 * set the master's mac address to that of the first slave @@ -1616,19 +1372,13 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  		}  	} -	res = netdev_set_master(slave_dev, bond_dev); -	if (res) { -		pr_debug("Error %d calling netdev_set_master\n", res); -		goto err_restore_mac; -	}  	/* open the slave since the application closed it */  	res = dev_open(slave_dev);  	if (res) {  		pr_debug("Opening slave %s failed\n", slave_dev->name); -		goto err_unset_master; +		goto err_restore_mac;  	} -	new_slave->dev = slave_dev;  	slave_dev->priv_flags |= IFF_BONDING;  	if (bond_is_lb(bond)) { @@ -1640,12 +1390,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  			goto err_close;  	} -	/* If the mode USES_PRIMARY, then the new slave gets the -	 * master's promisc (and mc) settings only if it becomes the -	 * curr_active_slave, and that is taken care of later when calling -	 * bond_change_active() +	/* If the mode uses primary, then the following is handled by +	 * bond_change_active_slave().  	 */ -	if (!USES_PRIMARY(bond->params.mode)) { +	if (!bond_uses_primary(bond)) {  		/* set promiscuity level to new slave */  		if (bond_dev->flags & IFF_PROMISC) {  			res = dev_set_promiscuity(slave_dev, 1); @@ -1661,35 +1409,38 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  		}  		netif_addr_lock_bh(bond_dev); -		/* upload master's mc_list to new slave */ -		netdev_for_each_mc_addr(ha, bond_dev) -			dev_mc_add(slave_dev, ha->addr); + +		dev_mc_sync_multiple(slave_dev, bond_dev); +		dev_uc_sync_multiple(slave_dev, bond_dev); +  		netif_addr_unlock_bh(bond_dev);  	} -	if (bond->params.mode == BOND_MODE_8023AD) { +	if (BOND_MODE(bond) == BOND_MODE_8023AD) {  		/* add lacpdu mc addr to mc list */  		u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;  		dev_mc_add(slave_dev, lacpdu_multicast);  	} -	bond_add_vlans_on_slave(bond, slave_dev); - -	write_lock_bh(&bond->lock); +	res = vlan_vids_add_by_dev(slave_dev, bond_dev); +	if (res) { +		pr_err("%s: Error: Couldn't add bond vlan ids to %s\n", +		       bond_dev->name, slave_dev->name); +		goto err_close; +	} -	bond_attach_slave(bond, new_slave); +	prev_slave = bond_last_slave(bond);  	new_slave->delay = 0;  	new_slave->link_failure_count = 0; -	bond_compute_features(bond); - -	write_unlock_bh(&bond->lock); - -	read_lock(&bond->lock); +	bond_update_speed_duplex(new_slave); -	new_slave->last_arp_rx = jiffies; +	new_slave->last_rx = jiffies - +		(msecs_to_jiffies(bond->params.arp_interval) + 1); +	for (i = 0; i < BOND_MAX_ARP_TARGETS; i++) +		new_slave->target_last_arp_rx[i] = new_slave->last_rx;  	if (bond->params.miimon && !bond->params.use_carrier) {  		link_reporting = bond_check_dev_link(bond, slave_dev, 1); @@ -1704,44 +1455,41 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  			 * supported); thus, we don't need to change  			 * the messages for netif_carrier.  			 */ -			pr_warning("%s: Warning: MII and ETHTOOL support not available for interface %s, and arp_interval/arp_ip_target module parameters not specified, thus bonding will not detect link failures! see bonding.txt for details.\n", -			       bond_dev->name, slave_dev->name); +			pr_warn("%s: Warning: MII and ETHTOOL support not available for interface %s, and arp_interval/arp_ip_target module parameters not specified, thus bonding will not detect link failures! see bonding.txt for details\n", +				bond_dev->name, slave_dev->name);  		} else if (link_reporting == -1) {  			/* unable get link status using mii/ethtool */ -			pr_warning("%s: Warning: can't get link status from interface %s; the network driver associated with this interface does not support MII or ETHTOOL link status reporting, thus miimon has no effect on this interface.\n", -				   bond_dev->name, slave_dev->name); +			pr_warn("%s: Warning: can't get link status from interface %s; the network driver associated with this interface does not support MII or ETHTOOL link status reporting, thus miimon has no effect on this interface\n", +				bond_dev->name, slave_dev->name);  		}  	}  	/* check for initial state */ -	if (!bond->params.miimon || -	    (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS)) { -		if (bond->params.updelay) { -			pr_debug("Initial state of slave_dev is BOND_LINK_BACK\n"); -			new_slave->link  = BOND_LINK_BACK; -			new_slave->delay = bond->params.updelay; +	if (bond->params.miimon) { +		if (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS) { +			if (bond->params.updelay) { +				new_slave->link = BOND_LINK_BACK; +				new_slave->delay = bond->params.updelay; +			} else { +				new_slave->link = BOND_LINK_UP; +			}  		} else { -			pr_debug("Initial state of slave_dev is BOND_LINK_UP\n"); -			new_slave->link  = BOND_LINK_UP; +			new_slave->link = BOND_LINK_DOWN;  		} -		new_slave->jiffies = jiffies; +	} else if (bond->params.arp_interval) { +		new_slave->link = (netif_carrier_ok(slave_dev) ? +			BOND_LINK_UP : BOND_LINK_DOWN);  	} else { -		pr_debug("Initial state of slave_dev is BOND_LINK_DOWN\n"); -		new_slave->link  = BOND_LINK_DOWN; +		new_slave->link = BOND_LINK_UP;  	} -	if (bond_update_speed_duplex(new_slave) && -	    (new_slave->link != BOND_LINK_DOWN)) { -		pr_warning("%s: Warning: failed to get speed and duplex from %s, assumed to be 100Mb/sec and Full.\n", -			   bond_dev->name, new_slave->dev->name); +	if (new_slave->link != BOND_LINK_DOWN) +		new_slave->last_link_up = jiffies; +	pr_debug("Initial state of slave_dev is BOND_LINK_%s\n", +		 new_slave->link == BOND_LINK_DOWN ? "DOWN" : +		 (new_slave->link == BOND_LINK_UP ? "UP" : "BACK")); -		if (bond->params.mode == BOND_MODE_8023AD) { -			pr_warning("%s: Warning: Operation of 802.3ad mode requires ETHTOOL support in base driver for proper aggregator selection.\n", -				   bond_dev->name); -		} -	} - -	if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) { +	if (bond_uses_primary(bond) && bond->params.primary[0]) {  		/* if there is a primary slave, remember it */  		if (strcmp(bond->params.primary, new_slave->dev->name) == 0) {  			bond->primary_slave = new_slave; @@ -1749,100 +1497,139 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  		}  	} -	write_lock_bh(&bond->curr_slave_lock); - -	switch (bond->params.mode) { +	switch (BOND_MODE(bond)) {  	case BOND_MODE_ACTIVEBACKUP: -		bond_set_slave_inactive_flags(new_slave); -		bond_select_active_slave(bond); +		bond_set_slave_inactive_flags(new_slave, +					      BOND_SLAVE_NOTIFY_NOW);  		break;  	case BOND_MODE_8023AD:  		/* in 802.3ad mode, the internal mechanism  		 * will activate the slaves in the selected  		 * aggregator  		 */ -		bond_set_slave_inactive_flags(new_slave); +		bond_set_slave_inactive_flags(new_slave, BOND_SLAVE_NOTIFY_NOW);  		/* if this is the first slave */ -		if (bond->slave_cnt == 1) { -			SLAVE_AD_INFO(new_slave).id = 1; +		if (!prev_slave) { +			SLAVE_AD_INFO(new_slave)->id = 1;  			/* Initialize AD with the number of times that the AD timer is called in 1 second  			 * can be called only after the mac address of the bond is set  			 */ -			bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL, -					    bond->params.lacp_fast); +			bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL);  		} else { -			SLAVE_AD_INFO(new_slave).id = -				SLAVE_AD_INFO(new_slave->prev).id + 1; +			SLAVE_AD_INFO(new_slave)->id = +				SLAVE_AD_INFO(prev_slave)->id + 1;  		}  		bond_3ad_bind_slave(new_slave);  		break;  	case BOND_MODE_TLB:  	case BOND_MODE_ALB: -		new_slave->state = BOND_STATE_ACTIVE; -		bond_set_slave_inactive_flags(new_slave); -		bond_select_active_slave(bond); +		bond_set_active_slave(new_slave); +		bond_set_slave_inactive_flags(new_slave, BOND_SLAVE_NOTIFY_NOW);  		break;  	default:  		pr_debug("This slave is always active in trunk mode\n");  		/* always active in trunk mode */ -		new_slave->state = BOND_STATE_ACTIVE; +		bond_set_active_slave(new_slave);  		/* In trunking mode there is little meaning to curr_active_slave  		 * anyway (it holds no special properties of the bond device),  		 * so we can change it without calling change_active_interface()  		 */ -		if (!bond->curr_active_slave) -			bond->curr_active_slave = new_slave; +		if (!bond->curr_active_slave && new_slave->link == BOND_LINK_UP) +			rcu_assign_pointer(bond->curr_active_slave, new_slave);  		break;  	} /* switch(bond_mode) */ -	write_unlock_bh(&bond->curr_slave_lock); - -	bond_set_carrier(bond); -  #ifdef CONFIG_NET_POLL_CONTROLLER -	if (slaves_support_netpoll(bond_dev)) { -		bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL; -		if (bond_dev->npinfo) -			slave_dev->npinfo = bond_dev->npinfo; -	} else if (!(bond_dev->priv_flags & IFF_DISABLE_NETPOLL)) { -		bond_dev->priv_flags |= IFF_DISABLE_NETPOLL; -		pr_info("New slave device %s does not support netpoll\n", -			slave_dev->name); -		pr_info("Disabling netpoll support for %s\n", bond_dev->name); +	slave_dev->npinfo = bond->dev->npinfo; +	if (slave_dev->npinfo) { +		if (slave_enable_netpoll(new_slave)) { +			pr_info("Error, %s: master_dev is using netpoll, but new slave device does not support netpoll\n", +				bond_dev->name); +			res = -EBUSY; +			goto err_detach; +		}  	}  #endif -	read_unlock(&bond->lock); -	res = bond_create_slave_symlinks(bond_dev, slave_dev); -	if (res) -		goto err_close; +	res = netdev_rx_handler_register(slave_dev, bond_handle_frame, +					 new_slave); +	if (res) { +		pr_debug("Error %d calling netdev_rx_handler_register\n", res); +		goto err_detach; +	} -	pr_info("%s: enslaving %s as a%s interface with a%s link.\n", +	res = bond_master_upper_dev_link(bond_dev, slave_dev, new_slave); +	if (res) { +		pr_debug("Error %d calling bond_master_upper_dev_link\n", res); +		goto err_unregister; +	} + +	res = bond_sysfs_slave_add(new_slave); +	if (res) { +		pr_debug("Error %d calling bond_sysfs_slave_add\n", res); +		goto err_upper_unlink; +	} + +	bond->slave_cnt++; +	bond_compute_features(bond); +	bond_set_carrier(bond); + +	if (bond_uses_primary(bond)) { +		block_netpoll_tx(); +		write_lock_bh(&bond->curr_slave_lock); +		bond_select_active_slave(bond); +		write_unlock_bh(&bond->curr_slave_lock); +		unblock_netpoll_tx(); +	} + +	pr_info("%s: Enslaving %s as %s interface with %s link\n",  		bond_dev->name, slave_dev->name, -		new_slave->state == BOND_STATE_ACTIVE ? "n active" : " backup", -		new_slave->link != BOND_LINK_DOWN ? "n up" : " down"); +		bond_is_active_slave(new_slave) ? "an active" : "a backup", +		new_slave->link != BOND_LINK_DOWN ? "an up" : "a down");  	/* enslave is successful */  	return 0;  /* Undo stages on error */ +err_upper_unlink: +	bond_upper_dev_unlink(bond_dev, slave_dev); + +err_unregister: +	netdev_rx_handler_unregister(slave_dev); + +err_detach: +	if (!bond_uses_primary(bond)) +		bond_hw_addr_flush(bond_dev, slave_dev); + +	vlan_vids_del_by_dev(slave_dev, bond_dev); +	if (bond->primary_slave == new_slave) +		bond->primary_slave = NULL; +	if (bond->curr_active_slave == new_slave) { +		block_netpoll_tx(); +		write_lock_bh(&bond->curr_slave_lock); +		bond_change_active_slave(bond, NULL); +		bond_select_active_slave(bond); +		write_unlock_bh(&bond->curr_slave_lock); +		unblock_netpoll_tx(); +	} +	slave_disable_netpoll(new_slave); +  err_close: +	slave_dev->priv_flags &= ~IFF_BONDING;  	dev_close(slave_dev); -err_unset_master: -	netdev_set_master(slave_dev, NULL); -  err_restore_mac: -	if (!bond->params.fail_over_mac) { +	if (!bond->params.fail_over_mac || +	    BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {  		/* XXX TODO - fom follow mode needs to change master's  		 * MAC if this slave's MAC is in use by the bond, or at  		 * least print a warning.  		 */ -		memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN); +		ether_addr_copy(addr.sa_data, new_slave->perm_hwaddr);  		addr.sa_family = slave_dev->type;  		dev_set_mac_address(slave_dev, &addr);  	} @@ -1851,10 +1638,13 @@ err_restore_mtu:  	dev_set_mtu(slave_dev, new_slave->original_mtu);  err_free: -	kfree(new_slave); +	bond_free_slave(new_slave);  err_undo_flags: -	bond_dev->features = old_features; +	/* Enslave of first slave has failed and we need to fix master's mac */ +	if (!bond_has_slaves(bond) && +	    ether_addr_equal_64bits(bond_dev->dev_addr, slave_dev->dev_addr)) +		eth_hw_addr_random(bond_dev);  	return res;  } @@ -1862,7 +1652,8 @@ err_undo_flags:  /*   * Try to release the slave device <slave> from the bond device <master>   * It is legal to access curr_active_slave without a lock because all the function - * is write-locked. + * is write-locked. If "all" is true it means that the function is being called + * while destroying a bond interface and all slaves are being released.   *   * The rules for slave state should be:   *   for Active/Backup: @@ -1870,70 +1661,77 @@ err_undo_flags:   *   for Bonded connections:   *     The first up interface should be left on and all others downed.   */ -int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) +static int __bond_release_one(struct net_device *bond_dev, +			      struct net_device *slave_dev, +			      bool all)  {  	struct bonding *bond = netdev_priv(bond_dev);  	struct slave *slave, *oldcurrent;  	struct sockaddr addr; +	int old_flags = bond_dev->flags; +	netdev_features_t old_features = bond_dev->features;  	/* slave is not a slave or master is not master of this slave */  	if (!(slave_dev->flags & IFF_SLAVE) || -	    (slave_dev->master != bond_dev)) { -		pr_err("%s: Error: cannot release %s.\n", +	    !netdev_has_upper_dev(slave_dev, bond_dev)) { +		pr_err("%s: Error: cannot release %s\n",  		       bond_dev->name, slave_dev->name);  		return -EINVAL;  	}  	block_netpoll_tx(); -	netdev_bonding_change(bond_dev, NETDEV_BONDING_DESLAVE); -	write_lock_bh(&bond->lock);  	slave = bond_get_slave_by_dev(bond, slave_dev);  	if (!slave) {  		/* not a slave of this bond */  		pr_info("%s: %s not enslaved\n",  			bond_dev->name, slave_dev->name); -		write_unlock_bh(&bond->lock);  		unblock_netpoll_tx();  		return -EINVAL;  	} -	if (!bond->params.fail_over_mac) { -		if (!compare_ether_addr(bond_dev->dev_addr, slave->perm_hwaddr) && -		    bond->slave_cnt > 1) -			pr_warning("%s: Warning: the permanent HWaddr of %s - %pM - is still in use by %s. Set the HWaddr of %s to a different address to avoid conflicts.\n", -				   bond_dev->name, slave_dev->name, -				   slave->perm_hwaddr, -				   bond_dev->name, slave_dev->name); -	} +	bond_sysfs_slave_del(slave); + +	bond_upper_dev_unlink(bond_dev, slave_dev); +	/* unregister rx_handler early so bond_handle_frame wouldn't be called +	 * for this slave anymore. +	 */ +	netdev_rx_handler_unregister(slave_dev); +	write_lock_bh(&bond->lock);  	/* Inform AD package of unbinding of slave. */ -	if (bond->params.mode == BOND_MODE_8023AD) { -		/* must be called before the slave is -		 * detached from the list -		 */ +	if (BOND_MODE(bond) == BOND_MODE_8023AD)  		bond_3ad_unbind_slave(slave); -	} -	pr_info("%s: releasing %s interface %s\n", +	write_unlock_bh(&bond->lock); + +	pr_info("%s: Releasing %s interface %s\n",  		bond_dev->name, -		(slave->state == BOND_STATE_ACTIVE) ? "active" : "backup", +		bond_is_active_slave(slave) ? "active" : "backup",  		slave_dev->name);  	oldcurrent = bond->curr_active_slave;  	bond->current_arp_slave = NULL; -	/* release the slave from its bond */ -	bond_detach_slave(bond, slave); - -	bond_compute_features(bond); +	if (!all && (!bond->params.fail_over_mac || +		     BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP)) { +		if (ether_addr_equal_64bits(bond_dev->dev_addr, slave->perm_hwaddr) && +		    bond_has_slaves(bond)) +			pr_warn("%s: Warning: the permanent HWaddr of %s - %pM - is still in use by %s - set the HWaddr of %s to a different address to avoid conflicts\n", +				bond_dev->name, slave_dev->name, +				slave->perm_hwaddr, +				bond_dev->name, slave_dev->name); +	}  	if (bond->primary_slave == slave)  		bond->primary_slave = NULL; -	if (oldcurrent == slave) +	if (oldcurrent == slave) { +		write_lock_bh(&bond->curr_slave_lock);  		bond_change_active_slave(bond, NULL); +		write_unlock_bh(&bond->curr_slave_lock); +	}  	if (bond_is_lb(bond)) {  		/* Must be called only after the slave has been @@ -1941,116 +1739,105 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)  		 * has been cleared (if our_slave == old_current),  		 * but before a new active slave is selected.  		 */ -		write_unlock_bh(&bond->lock);  		bond_alb_deinit_slave(bond, slave); -		write_lock_bh(&bond->lock);  	} -	if (oldcurrent == slave) { +	if (all) { +		RCU_INIT_POINTER(bond->curr_active_slave, NULL); +	} else if (oldcurrent == slave) {  		/*  		 * Note that we hold RTNL over this sequence, so there  		 * is no concern that another slave add/remove event  		 * will interfere.  		 */ -		write_unlock_bh(&bond->lock); -		read_lock(&bond->lock);  		write_lock_bh(&bond->curr_slave_lock);  		bond_select_active_slave(bond);  		write_unlock_bh(&bond->curr_slave_lock); -		read_unlock(&bond->lock); -		write_lock_bh(&bond->lock);  	} -	if (bond->slave_cnt == 0) { +	if (!bond_has_slaves(bond)) {  		bond_set_carrier(bond); +		eth_hw_addr_random(bond_dev); -		/* if the last slave was removed, zero the mac address -		 * of the master so it will be set by the application -		 * to the mac address of the first slave -		 */ -		memset(bond_dev->dev_addr, 0, bond_dev->addr_len); - -		if (!bond->vlgrp) { -			bond_dev->features |= NETIF_F_VLAN_CHALLENGED; -		} else { -			pr_warning("%s: Warning: clearing HW address of %s while it still has VLANs.\n", -				   bond_dev->name, bond_dev->name); -			pr_warning("%s: When re-adding slaves, make sure the bond's HW address matches its VLANs'.\n", -				   bond_dev->name); +		if (vlan_uses_dev(bond_dev)) { +			pr_warn("%s: Warning: clearing HW address of %s while it still has VLANs\n", +				bond_dev->name, bond_dev->name); +			pr_warn("%s: When re-adding slaves, make sure the bond's HW address matches its VLANs\n", +				bond_dev->name);  		} -	} else if ((bond_dev->features & NETIF_F_VLAN_CHALLENGED) && -		   !bond_has_challenged_slaves(bond)) { -		pr_info("%s: last VLAN challenged slave %s left bond %s. VLAN blocking is removed\n", -			bond_dev->name, slave_dev->name, bond_dev->name); -		bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED;  	} -	write_unlock_bh(&bond->lock);  	unblock_netpoll_tx(); +	synchronize_rcu(); +	bond->slave_cnt--; -	/* must do this from outside any spinlocks */ -	bond_destroy_slave_symlinks(bond_dev, slave_dev); +	if (!bond_has_slaves(bond)) { +		call_netdevice_notifiers(NETDEV_CHANGEADDR, bond->dev); +		call_netdevice_notifiers(NETDEV_RELEASE, bond->dev); +	} + +	bond_compute_features(bond); +	if (!(bond_dev->features & NETIF_F_VLAN_CHALLENGED) && +	    (old_features & NETIF_F_VLAN_CHALLENGED)) +		pr_info("%s: last VLAN challenged slave %s left bond %s - VLAN blocking is removed\n", +			bond_dev->name, slave_dev->name, bond_dev->name); -	bond_del_vlans_from_slave(bond, slave_dev); +	/* must do this from outside any spinlocks */ +	vlan_vids_del_by_dev(slave_dev, bond_dev); -	/* If the mode USES_PRIMARY, then we should only remove its -	 * promisc and mc settings if it was the curr_active_slave, but that was -	 * already taken care of above when we detached the slave +	/* If the mode uses primary, then this cases was handled above by +	 * bond_change_active_slave(..., NULL)  	 */ -	if (!USES_PRIMARY(bond->params.mode)) { -		/* unset promiscuity level from slave */ -		if (bond_dev->flags & IFF_PROMISC) +	if (!bond_uses_primary(bond)) { +		/* unset promiscuity level from slave +		 * NOTE: The NETDEV_CHANGEADDR call above may change the value +		 * of the IFF_PROMISC flag in the bond_dev, but we need the +		 * value of that flag before that change, as that was the value +		 * when this slave was attached, so we cache at the start of the +		 * function and use it here. Same goes for ALLMULTI below +		 */ +		if (old_flags & IFF_PROMISC)  			dev_set_promiscuity(slave_dev, -1);  		/* unset allmulti level from slave */ -		if (bond_dev->flags & IFF_ALLMULTI) +		if (old_flags & IFF_ALLMULTI)  			dev_set_allmulti(slave_dev, -1); -		/* flush master's mc_list from slave */ -		netif_addr_lock_bh(bond_dev); -		bond_mc_list_flush(bond_dev, slave_dev); -		netif_addr_unlock_bh(bond_dev); +		bond_hw_addr_flush(bond_dev, slave_dev);  	} -	netdev_set_master(slave_dev, NULL); - -#ifdef CONFIG_NET_POLL_CONTROLLER -	read_lock_bh(&bond->lock); - -	if (slaves_support_netpoll(bond_dev)) -		bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL; -	read_unlock_bh(&bond->lock); -	if (slave_dev->netdev_ops->ndo_netpoll_cleanup) -		slave_dev->netdev_ops->ndo_netpoll_cleanup(slave_dev); -	else -		slave_dev->npinfo = NULL; -#endif +	slave_disable_netpoll(slave);  	/* close slave before restoring its mac address */  	dev_close(slave_dev); -	if (bond->params.fail_over_mac != BOND_FOM_ACTIVE) { +	if (bond->params.fail_over_mac != BOND_FOM_ACTIVE || +	    BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {  		/* restore original ("permanent") mac address */ -		memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); +		ether_addr_copy(addr.sa_data, slave->perm_hwaddr);  		addr.sa_family = slave_dev->type;  		dev_set_mac_address(slave_dev, &addr);  	}  	dev_set_mtu(slave_dev, slave->original_mtu); -	slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB | -				   IFF_SLAVE_INACTIVE | IFF_BONDING | -				   IFF_SLAVE_NEEDARP); +	slave_dev->priv_flags &= ~IFF_BONDING; -	kfree(slave); +	bond_free_slave(slave);  	return 0;  /* deletion OK */  } +/* A wrapper used because of ndo_del_link */ +int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) +{ +	return __bond_release_one(bond_dev, slave_dev, false); +} +  /* -* First release a slave and than destroy the bond if no more slaves are left. +* First release a slave and then destroy the bond if no more slaves are left.  * Must be under rtnl_lock when this function is called.  */  static int  bond_release_and_destroy(struct net_device *bond_dev, @@ -2060,193 +1847,23 @@ static int  bond_release_and_destroy(struct net_device *bond_dev,  	int ret;  	ret = bond_release(bond_dev, slave_dev); -	if ((ret == 0) && (bond->slave_cnt == 0)) { -		pr_info("%s: destroying bond %s.\n", +	if (ret == 0 && !bond_has_slaves(bond)) { +		bond_dev->priv_flags |= IFF_DISABLE_NETPOLL; +		pr_info("%s: Destroying bond %s\n",  			bond_dev->name, bond_dev->name);  		unregister_netdevice(bond_dev);  	}  	return ret;  } -/* - * This function releases all slaves. - */ -static int bond_release_all(struct net_device *bond_dev) -{ -	struct bonding *bond = netdev_priv(bond_dev); -	struct slave *slave; -	struct net_device *slave_dev; -	struct sockaddr addr; - -	write_lock_bh(&bond->lock); - -	netif_carrier_off(bond_dev); - -	if (bond->slave_cnt == 0) -		goto out; - -	bond->current_arp_slave = NULL; -	bond->primary_slave = NULL; -	bond_change_active_slave(bond, NULL); - -	while ((slave = bond->first_slave) != NULL) { -		/* Inform AD package of unbinding of slave -		 * before slave is detached from the list. -		 */ -		if (bond->params.mode == BOND_MODE_8023AD) -			bond_3ad_unbind_slave(slave); - -		slave_dev = slave->dev; -		bond_detach_slave(bond, slave); - -		/* now that the slave is detached, unlock and perform -		 * all the undo steps that should not be called from -		 * within a lock. -		 */ -		write_unlock_bh(&bond->lock); - -		if (bond_is_lb(bond)) { -			/* must be called only after the slave -			 * has been detached from the list -			 */ -			bond_alb_deinit_slave(bond, slave); -		} - -		bond_compute_features(bond); - -		bond_destroy_slave_symlinks(bond_dev, slave_dev); -		bond_del_vlans_from_slave(bond, slave_dev); - -		/* If the mode USES_PRIMARY, then we should only remove its -		 * promisc and mc settings if it was the curr_active_slave, but that was -		 * already taken care of above when we detached the slave -		 */ -		if (!USES_PRIMARY(bond->params.mode)) { -			/* unset promiscuity level from slave */ -			if (bond_dev->flags & IFF_PROMISC) -				dev_set_promiscuity(slave_dev, -1); - -			/* unset allmulti level from slave */ -			if (bond_dev->flags & IFF_ALLMULTI) -				dev_set_allmulti(slave_dev, -1); - -			/* flush master's mc_list from slave */ -			netif_addr_lock_bh(bond_dev); -			bond_mc_list_flush(bond_dev, slave_dev); -			netif_addr_unlock_bh(bond_dev); -		} - -		netdev_set_master(slave_dev, NULL); - -		/* close slave before restoring its mac address */ -		dev_close(slave_dev); - -		if (!bond->params.fail_over_mac) { -			/* restore original ("permanent") mac address*/ -			memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); -			addr.sa_family = slave_dev->type; -			dev_set_mac_address(slave_dev, &addr); -		} - -		slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB | -					   IFF_SLAVE_INACTIVE); - -		kfree(slave); - -		/* re-acquire the lock before getting the next slave */ -		write_lock_bh(&bond->lock); -	} - -	/* zero the mac address of the master so it will be -	 * set by the application to the mac address of the -	 * first slave -	 */ -	memset(bond_dev->dev_addr, 0, bond_dev->addr_len); - -	if (!bond->vlgrp) { -		bond_dev->features |= NETIF_F_VLAN_CHALLENGED; -	} else { -		pr_warning("%s: Warning: clearing HW address of %s while it still has VLANs.\n", -			   bond_dev->name, bond_dev->name); -		pr_warning("%s: When re-adding slaves, make sure the bond's HW address matches its VLANs'.\n", -			   bond_dev->name); -	} - -	pr_info("%s: released all slaves\n", bond_dev->name); - -out: -	write_unlock_bh(&bond->lock); -	return 0; -} - -/* - * This function changes the active slave to slave <slave_dev>. - * It returns -EINVAL in the following cases. - *  - <slave_dev> is not found in the list. - *  - There is not active slave now. - *  - <slave_dev> is already active. - *  - The link state of <slave_dev> is not BOND_LINK_UP. - *  - <slave_dev> is not running. - * In these cases, this function does nothing. - * In the other cases, current_slave pointer is changed and 0 is returned. - */ -static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_device *slave_dev) -{ -	struct bonding *bond = netdev_priv(bond_dev); -	struct slave *old_active = NULL; -	struct slave *new_active = NULL; -	int res = 0; - -	if (!USES_PRIMARY(bond->params.mode)) -		return -EINVAL; - -	/* Verify that master_dev is indeed the master of slave_dev */ -	if (!(slave_dev->flags & IFF_SLAVE) || (slave_dev->master != bond_dev)) -		return -EINVAL; - -	read_lock(&bond->lock); - -	read_lock(&bond->curr_slave_lock); -	old_active = bond->curr_active_slave; -	read_unlock(&bond->curr_slave_lock); - -	new_active = bond_get_slave_by_dev(bond, slave_dev); - -	/* -	 * Changing to the current active: do nothing; return success. -	 */ -	if (new_active && (new_active == old_active)) { -		read_unlock(&bond->lock); -		return 0; -	} - -	if ((new_active) && -	    (old_active) && -	    (new_active->link == BOND_LINK_UP) && -	    IS_UP(new_active->dev)) { -		block_netpoll_tx(); -		write_lock_bh(&bond->curr_slave_lock); -		bond_change_active_slave(bond, new_active); -		write_unlock_bh(&bond->curr_slave_lock); -		unblock_netpoll_tx(); -	} else -		res = -EINVAL; - -	read_unlock(&bond->lock); - -	return res; -} -  static int bond_info_query(struct net_device *bond_dev, struct ifbond *info)  {  	struct bonding *bond = netdev_priv(bond_dev); -	info->bond_mode = bond->params.mode; +	info->bond_mode = BOND_MODE(bond);  	info->miimon = bond->params.miimon; -	read_lock(&bond->lock);  	info->num_slaves = bond->slave_cnt; -	read_unlock(&bond->lock);  	return 0;  } @@ -2254,24 +1871,21 @@ static int bond_info_query(struct net_device *bond_dev, struct ifbond *info)  static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *info)  {  	struct bonding *bond = netdev_priv(bond_dev); +	struct list_head *iter; +	int i = 0, res = -ENODEV;  	struct slave *slave; -	int i, res = -ENODEV; -	read_lock(&bond->lock); - -	bond_for_each_slave(bond, slave, i) { -		if (i == (int)info->slave_id) { +	bond_for_each_slave(bond, slave, iter) { +		if (i++ == (int)info->slave_id) {  			res = 0;  			strcpy(info->slave_name, slave->dev->name);  			info->link = slave->link; -			info->state = slave->state; +			info->state = bond_slave_state(slave);  			info->link_failure_count = slave->link_failure_count;  			break;  		}  	} -	read_unlock(&bond->lock); -  	return res;  } @@ -2280,13 +1894,14 @@ static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *in  static int bond_miimon_inspect(struct bonding *bond)  { +	int link_state, commit = 0; +	struct list_head *iter;  	struct slave *slave; -	int i, link_state, commit = 0;  	bool ignore_updelay;  	ignore_updelay = !bond->curr_active_slave ? true : false; -	bond_for_each_slave(bond, slave, i) { +	bond_for_each_slave_rcu(bond, slave, iter) {  		slave->new_link = BOND_LINK_NOCHANGE;  		link_state = bond_check_dev_link(bond, slave->dev, 0); @@ -2299,11 +1914,11 @@ static int bond_miimon_inspect(struct bonding *bond)  			slave->link = BOND_LINK_FAIL;  			slave->delay = bond->params.downdelay;  			if (slave->delay) { -				pr_info("%s: link status down for %sinterface %s, disabling it in %d ms.\n", +				pr_info("%s: link status down for %sinterface %s, disabling it in %d ms\n",  					bond->dev->name, -					(bond->params.mode == +					(BOND_MODE(bond) ==  					 BOND_MODE_ACTIVEBACKUP) ? -					((slave->state == BOND_STATE_ACTIVE) ? +					(bond_is_active_slave(slave) ?  					 "active " : "backup ") : "",  					slave->dev->name,  					bond->params.downdelay * bond->params.miimon); @@ -2315,8 +1930,8 @@ static int bond_miimon_inspect(struct bonding *bond)  				 * recovered before downdelay expired  				 */  				slave->link = BOND_LINK_UP; -				slave->jiffies = jiffies; -				pr_info("%s: link status up again after %d ms for interface %s.\n", +				slave->last_link_up = jiffies; +				pr_info("%s: link status up again after %d ms for interface %s\n",  					bond->dev->name,  					(bond->params.downdelay - slave->delay) *  					bond->params.miimon, @@ -2341,7 +1956,7 @@ static int bond_miimon_inspect(struct bonding *bond)  			slave->delay = bond->params.updelay;  			if (slave->delay) { -				pr_info("%s: link status up for interface %s, enabling it in %d ms.\n", +				pr_info("%s: link status up for interface %s, enabling it in %d ms\n",  					bond->dev->name, slave->dev->name,  					ignore_updelay ? 0 :  					bond->params.updelay * @@ -2351,7 +1966,7 @@ static int bond_miimon_inspect(struct bonding *bond)  		case BOND_LINK_BACK:  			if (!link_state) {  				slave->link = BOND_LINK_DOWN; -				pr_info("%s: link status down again after %d ms for interface %s.\n", +				pr_info("%s: link status down again after %d ms for interface %s\n",  					bond->dev->name,  					(bond->params.updelay - slave->delay) *  					bond->params.miimon, @@ -2380,37 +1995,36 @@ static int bond_miimon_inspect(struct bonding *bond)  static void bond_miimon_commit(struct bonding *bond)  { +	struct list_head *iter;  	struct slave *slave; -	int i; -	bond_for_each_slave(bond, slave, i) { +	bond_for_each_slave(bond, slave, iter) {  		switch (slave->new_link) {  		case BOND_LINK_NOCHANGE:  			continue;  		case BOND_LINK_UP:  			slave->link = BOND_LINK_UP; -			slave->jiffies = jiffies; +			slave->last_link_up = jiffies; -			if (bond->params.mode == BOND_MODE_8023AD) { +			if (BOND_MODE(bond) == BOND_MODE_8023AD) {  				/* prevent it from being the active one */ -				slave->state = BOND_STATE_BACKUP; -			} else if (bond->params.mode != BOND_MODE_ACTIVEBACKUP) { +				bond_set_backup_slave(slave); +			} else if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {  				/* make it immediately active */ -				slave->state = BOND_STATE_ACTIVE; +				bond_set_active_slave(slave);  			} else if (slave != bond->primary_slave) {  				/* prevent it from being the active one */ -				slave->state = BOND_STATE_BACKUP; +				bond_set_backup_slave(slave);  			} -			bond_update_speed_duplex(slave); - -			pr_info("%s: link status definitely up for interface %s, %d Mbps %s duplex.\n", +			pr_info("%s: link status definitely up for interface %s, %u Mbps %s duplex\n",  				bond->dev->name, slave->dev->name, -				slave->speed, slave->duplex ? "full" : "half"); +				slave->speed == SPEED_UNKNOWN ? 0 : slave->speed, +				slave->duplex ? "full" : "half");  			/* notify ad that the link status has changed */ -			if (bond->params.mode == BOND_MODE_8023AD) +			if (BOND_MODE(bond) == BOND_MODE_8023AD)  				bond_3ad_handle_link_change(slave, BOND_LINK_UP);  			if (bond_is_lb(bond)) @@ -2429,14 +2043,15 @@ static void bond_miimon_commit(struct bonding *bond)  			slave->link = BOND_LINK_DOWN; -			if (bond->params.mode == BOND_MODE_ACTIVEBACKUP || -			    bond->params.mode == BOND_MODE_8023AD) -				bond_set_slave_inactive_flags(slave); +			if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP || +			    BOND_MODE(bond) == BOND_MODE_8023AD) +				bond_set_slave_inactive_flags(slave, +							      BOND_SLAVE_NOTIFY_NOW);  			pr_info("%s: link status definitely down for interface %s, disabling it\n",  				bond->dev->name, slave->dev->name); -			if (bond->params.mode == BOND_MODE_8023AD) +			if (BOND_MODE(bond) == BOND_MODE_8023AD)  				bond_3ad_handle_link_change(slave,  							    BOND_LINK_DOWN); @@ -2478,87 +2093,69 @@ do_failover:   * an acquisition of appropriate locks followed by a commit phase to   * implement whatever link state changes are indicated.   */ -void bond_mii_monitor(struct work_struct *work) +static void bond_mii_monitor(struct work_struct *work)  {  	struct bonding *bond = container_of(work, struct bonding,  					    mii_work.work); +	bool should_notify_peers = false; +	unsigned long delay; -	read_lock(&bond->lock); -	if (bond->kill_timers) -		goto out; +	delay = msecs_to_jiffies(bond->params.miimon); -	if (bond->slave_cnt == 0) +	if (!bond_has_slaves(bond))  		goto re_arm; -	if (bond->send_grat_arp) { -		read_lock(&bond->curr_slave_lock); -		bond_send_gratuitous_arp(bond); -		read_unlock(&bond->curr_slave_lock); -	} +	rcu_read_lock(); -	if (bond->send_unsol_na) { -		read_lock(&bond->curr_slave_lock); -		bond_send_unsolicited_na(bond); -		read_unlock(&bond->curr_slave_lock); -	} +	should_notify_peers = bond_should_notify_peers(bond);  	if (bond_miimon_inspect(bond)) { -		read_unlock(&bond->lock); -		rtnl_lock(); -		read_lock(&bond->lock); +		rcu_read_unlock(); + +		/* Race avoidance with bond_close cancel of workqueue */ +		if (!rtnl_trylock()) { +			delay = 1; +			should_notify_peers = false; +			goto re_arm; +		}  		bond_miimon_commit(bond); -		read_unlock(&bond->lock);  		rtnl_unlock();	/* might sleep, hold no other locks */ -		read_lock(&bond->lock); -	} +	} else +		rcu_read_unlock();  re_arm:  	if (bond->params.miimon) -		queue_delayed_work(bond->wq, &bond->mii_work, -				   msecs_to_jiffies(bond->params.miimon)); -out: -	read_unlock(&bond->lock); -} +		queue_delayed_work(bond->wq, &bond->mii_work, delay); -static __be32 bond_glean_dev_ip(struct net_device *dev) -{ -	struct in_device *idev; -	struct in_ifaddr *ifa; -	__be32 addr = 0; - -	if (!dev) -		return 0; - -	rcu_read_lock(); -	idev = __in_dev_get_rcu(dev); -	if (!idev) -		goto out; - -	ifa = idev->ifa_list; -	if (!ifa) -		goto out; - -	addr = ifa->ifa_local; -out: -	rcu_read_unlock(); -	return addr; +	if (should_notify_peers) { +		if (!rtnl_trylock()) +			return; +		call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); +		rtnl_unlock(); +	}  } -static int bond_has_this_ip(struct bonding *bond, __be32 ip) +static bool bond_has_this_ip(struct bonding *bond, __be32 ip)  { -	struct vlan_entry *vlan; +	struct net_device *upper; +	struct list_head *iter; +	bool ret = false; -	if (ip == bond->master_ip) -		return 1; +	if (ip == bond_confirm_addr(bond->dev, 0, ip)) +		return true; -	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { -		if (ip == vlan->vlan_ip) -			return 1; +	rcu_read_lock(); +	netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) { +		if (ip == bond_confirm_addr(upper, 0, ip)) { +			ret = true; +			break; +		}  	} +	rcu_read_unlock(); -	return 0; +	return ret;  }  /* @@ -2566,200 +2163,191 @@ static int bond_has_this_ip(struct bonding *bond, __be32 ip)   * switches in VLAN mode (especially if ports are configured as   * "native" to a VLAN) might not pass non-tagged frames.   */ -static void bond_arp_send(struct net_device *slave_dev, int arp_op, __be32 dest_ip, __be32 src_ip, unsigned short vlan_id) +static void bond_arp_send(struct net_device *slave_dev, int arp_op, +			  __be32 dest_ip, __be32 src_ip, +			  struct bond_vlan_tag *tags)  {  	struct sk_buff *skb; +	int i; -	pr_debug("arp %d on slave %s: dst %x src %x vid %d\n", arp_op, -		 slave_dev->name, dest_ip, src_ip, vlan_id); +	pr_debug("arp %d on slave %s: dst %pI4 src %pI4\n", +		 arp_op, slave_dev->name, &dest_ip, &src_ip);  	skb = arp_create(arp_op, ETH_P_ARP, dest_ip, slave_dev, src_ip,  			 NULL, slave_dev->dev_addr, NULL);  	if (!skb) { -		pr_err("ARP packet allocation failed\n"); +		net_err_ratelimited("ARP packet allocation failed\n");  		return;  	} -	if (vlan_id) { -		skb = vlan_put_tag(skb, vlan_id); + +	/* Go through all the tags backwards and add them to the packet */ +	for (i = BOND_MAX_VLAN_ENCAP - 1; i > 0; i--) { +		if (!tags[i].vlan_id) +			continue; + +		pr_debug("inner tag: proto %X vid %X\n", +			 ntohs(tags[i].vlan_proto), tags[i].vlan_id); +		skb = __vlan_put_tag(skb, tags[i].vlan_proto, +				     tags[i].vlan_id);  		if (!skb) { -			pr_err("failed to insert VLAN tag\n"); +			net_err_ratelimited("failed to insert inner VLAN tag\n"); +			return; +		} +	} +	/* Set the outer tag */ +	if (tags[0].vlan_id) { +		pr_debug("outer tag: proto %X vid %X\n", +			 ntohs(tags[0].vlan_proto), tags[0].vlan_id); +		skb = vlan_put_tag(skb, tags[0].vlan_proto, tags[0].vlan_id); +		if (!skb) { +			net_err_ratelimited("failed to insert outer VLAN tag\n");  			return;  		}  	}  	arp_xmit(skb);  } - -static void bond_arp_send_all(struct bonding *bond, struct slave *slave) +/* Validate the device path between the @start_dev and the @end_dev. + * The path is valid if the @end_dev is reachable through device + * stacking. + * When the path is validated, collect any vlan information in the + * path. + */ +bool bond_verify_device_path(struct net_device *start_dev, +			     struct net_device *end_dev, +			     struct bond_vlan_tag *tags)  { -	int i, vlan_id, rv; -	__be32 *targets = bond->params.arp_targets; -	struct vlan_entry *vlan; -	struct net_device *vlan_dev; -	struct flowi fl; -	struct rtable *rt; +	struct net_device *upper; +	struct list_head  *iter; +	int  idx; -	for (i = 0; (i < BOND_MAX_ARP_TARGETS); i++) { -		if (!targets[i]) -			break; -		pr_debug("basa: target %x\n", targets[i]); -		if (!bond->vlgrp) { -			pr_debug("basa: empty vlan: arp_send\n"); -			bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], -				      bond->master_ip, 0); -			continue; -		} +	if (start_dev == end_dev) +		return true; -		/* -		 * If VLANs are configured, we do a route lookup to -		 * determine which VLAN interface would be used, so we -		 * can tag the ARP with the proper VLAN tag. -		 */ -		memset(&fl, 0, sizeof(fl)); -		fl.fl4_dst = targets[i]; -		fl.fl4_tos = RTO_ONLINK; +	netdev_for_each_upper_dev_rcu(start_dev, upper, iter) { +		if (bond_verify_device_path(upper, end_dev, tags)) { +			if (is_vlan_dev(upper)) { +				idx = vlan_get_encap_level(upper); +				if (idx >= BOND_MAX_VLAN_ENCAP) +					return false; -		rv = ip_route_output_key(dev_net(bond->dev), &rt, &fl); -		if (rv) { -			if (net_ratelimit()) { -				pr_warning("%s: no route to arp_ip_target %pI4\n", -					   bond->dev->name, &fl.fl4_dst); +				tags[idx].vlan_proto = +						    vlan_dev_vlan_proto(upper); +				tags[idx].vlan_id = vlan_dev_vlan_id(upper);  			} -			continue; -		} - -		/* -		 * This target is not on a VLAN -		 */ -		if (rt->dst.dev == bond->dev) { -			ip_rt_put(rt); -			pr_debug("basa: rtdev == bond->dev: arp_send\n"); -			bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], -				      bond->master_ip, 0); -			continue; +			return true;  		} +	} -		vlan_id = 0; -		list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { -			vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); -			if (vlan_dev == rt->dst.dev) { -				vlan_id = vlan->vlan_id; -				pr_debug("basa: vlan match on %s %d\n", -				       vlan_dev->name, vlan_id); -				break; -			} -		} +	return false; +} -		if (vlan_id) { -			ip_rt_put(rt); +static void bond_arp_send_all(struct bonding *bond, struct slave *slave) +{ +	struct rtable *rt; +	struct bond_vlan_tag tags[BOND_MAX_VLAN_ENCAP]; +	__be32 *targets = bond->params.arp_targets, addr; +	int i; +	bool ret; + +	for (i = 0; i < BOND_MAX_ARP_TARGETS && targets[i]; i++) { +		pr_debug("basa: target %pI4\n", &targets[i]); +		memset(tags, 0, sizeof(tags)); + +		/* Find out through which dev should the packet go */ +		rt = ip_route_output(dev_net(bond->dev), targets[i], 0, +				     RTO_ONLINK, 0); +		if (IS_ERR(rt)) { +			/* there's no route to target - try to send arp +			 * probe to generate any traffic (arp_validate=0) +			 */ +			if (bond->params.arp_validate) +				net_warn_ratelimited("%s: no route to arp_ip_target %pI4 and arp_validate is set\n", +						     bond->dev->name, +						     &targets[i]);  			bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], -				      vlan->vlan_ip, vlan_id); +				      0, tags);  			continue;  		} -		if (net_ratelimit()) { -			pr_warning("%s: no path to arp_ip_target %pI4 via rt.dev %s\n", -				   bond->dev->name, &fl.fl4_dst, -				   rt->dst.dev ? rt->dst.dev->name : "NULL"); -		} -		ip_rt_put(rt); -	} -} - -/* - * Kick out a gratuitous ARP for an IP on the bonding master plus one - * for each VLAN above us. - * - * Caller must hold curr_slave_lock for read or better - */ -static void bond_send_gratuitous_arp(struct bonding *bond) -{ -	struct slave *slave = bond->curr_active_slave; -	struct vlan_entry *vlan; -	struct net_device *vlan_dev; - -	pr_debug("bond_send_grat_arp: bond %s slave %s\n", -		 bond->dev->name, slave ? slave->dev->name : "NULL"); +		/* bond device itself */ +		if (rt->dst.dev == bond->dev) +			goto found; -	if (!slave || !bond->send_grat_arp || -	    test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state)) -		return; +		rcu_read_lock(); +		ret = bond_verify_device_path(bond->dev, rt->dst.dev, tags); +		rcu_read_unlock(); -	bond->send_grat_arp--; +		if (ret) +			goto found; -	if (bond->master_ip) { -		bond_arp_send(slave->dev, ARPOP_REPLY, bond->master_ip, -				bond->master_ip, 0); -	} +		/* Not our device - skip */ +		pr_debug("%s: no path to arp_ip_target %pI4 via rt.dev %s\n", +			 bond->dev->name, &targets[i], +			 rt->dst.dev ? rt->dst.dev->name : "NULL"); -	if (!bond->vlgrp) -		return; +		ip_rt_put(rt); +		continue; -	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { -		vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); -		if (vlan->vlan_ip) { -			bond_arp_send(slave->dev, ARPOP_REPLY, vlan->vlan_ip, -				      vlan->vlan_ip, vlan->vlan_id); -		} +found: +		addr = bond_confirm_addr(rt->dst.dev, targets[i], 0); +		ip_rt_put(rt); +		bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], +			      addr, tags);  	}  }  static void bond_validate_arp(struct bonding *bond, struct slave *slave, __be32 sip, __be32 tip)  {  	int i; -	__be32 *targets = bond->params.arp_targets; - -	for (i = 0; (i < BOND_MAX_ARP_TARGETS) && targets[i]; i++) { -		pr_debug("bva: sip %pI4 tip %pI4 t[%d] %pI4 bhti(tip) %d\n", -			 &sip, &tip, i, &targets[i], -			 bond_has_this_ip(bond, tip)); -		if (sip == targets[i]) { -			if (bond_has_this_ip(bond, tip)) -				slave->last_arp_rx = jiffies; -			return; -		} + +	if (!sip || !bond_has_this_ip(bond, tip)) { +		pr_debug("bva: sip %pI4 tip %pI4 not found\n", &sip, &tip); +		return; +	} + +	i = bond_get_targets_ip(bond->params.arp_targets, sip); +	if (i == -1) { +		pr_debug("bva: sip %pI4 not found in targets\n", &sip); +		return;  	} +	slave->last_rx = jiffies; +	slave->target_last_arp_rx[i] = jiffies;  } -static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) +int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, +		 struct slave *slave)  { -	struct arphdr *arp; -	struct slave *slave; -	struct bonding *bond; +	struct arphdr *arp = (struct arphdr *)skb->data; +	struct slave *curr_active_slave;  	unsigned char *arp_ptr;  	__be32 sip, tip; +	int alen, is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP); -	if (dev->priv_flags & IFF_802_1Q_VLAN) { -		/* -		 * When using VLANS and bonding, dev and oriv_dev may be -		 * incorrect if the physical interface supports VLAN -		 * acceleration.  With this change ARP validation now -		 * works for hosts only reachable on the VLAN interface. -		 */ -		dev = vlan_dev_real_dev(dev); -		orig_dev = dev_get_by_index_rcu(dev_net(skb->dev),skb->skb_iif); +	if (!slave_do_arp_validate(bond, slave)) { +		if ((slave_do_arp_validate_only(bond) && is_arp) || +		    !slave_do_arp_validate_only(bond)) +			slave->last_rx = jiffies; +		return RX_HANDLER_ANOTHER; +	} else if (!is_arp) { +		return RX_HANDLER_ANOTHER;  	} -	if (!(dev->priv_flags & IFF_BONDING) || !(dev->flags & IFF_MASTER)) -		goto out; +	alen = arp_hdr_len(bond->dev); -	bond = netdev_priv(dev); -	read_lock(&bond->lock); - -	pr_debug("bond_arp_rcv: bond %s skb->dev %s orig_dev %s\n", -		 bond->dev->name, skb->dev ? skb->dev->name : "NULL", -		 orig_dev ? orig_dev->name : "NULL"); - -	slave = bond_get_slave_by_dev(bond, orig_dev); -	if (!slave || !slave_do_arp_validate(bond, slave)) -		goto out_unlock; +	pr_debug("bond_arp_rcv: bond %s skb->dev %s\n", +		 bond->dev->name, skb->dev->name); -	if (!pskb_may_pull(skb, arp_hdr_len(dev))) -		goto out_unlock; +	if (alen > skb_headlen(skb)) { +		arp = kmalloc(alen, GFP_ATOMIC); +		if (!arp) +			goto out_unlock; +		if (skb_copy_bits(skb, 0, arp, alen) < 0) +			goto out_unlock; +	} -	arp = arp_hdr(skb); -	if (arp->ar_hln != dev->addr_len || +	if (arp->ar_hln != bond->dev->addr_len ||  	    skb->pkt_type == PACKET_OTHERHOST ||  	    skb->pkt_type == PACKET_LOOPBACK ||  	    arp->ar_hrd != htons(ARPHRD_ETHER) || @@ -2768,16 +2356,18 @@ static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct pack  		goto out_unlock;  	arp_ptr = (unsigned char *)(arp + 1); -	arp_ptr += dev->addr_len; +	arp_ptr += bond->dev->addr_len;  	memcpy(&sip, arp_ptr, 4); -	arp_ptr += 4 + dev->addr_len; +	arp_ptr += 4 + bond->dev->addr_len;  	memcpy(&tip, arp_ptr, 4);  	pr_debug("bond_arp_rcv: %s %s/%d av %d sv %d sip %pI4 tip %pI4\n", -		 bond->dev->name, slave->dev->name, slave->state, +		 bond->dev->name, slave->dev->name, bond_slave_state(slave),  		 bond->params.arp_validate, slave_do_arp_validate(bond, slave),  		 &sip, &tip); +	curr_active_slave = rcu_dereference(bond->curr_active_slave); +  	/*  	 * Backup slaves won't see the ARP reply, but do come through  	 * here for each ARP probe (so we swap the sip/tip to validate @@ -2785,17 +2375,38 @@ static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct pack  	 * configuration, the ARP probe will (hopefully) travel from  	 * the active, through one switch, the router, then the other  	 * switch before reaching the backup. +	 * +	 * We 'trust' the arp requests if there is an active slave and +	 * it received valid arp reply(s) after it became active. This +	 * is done to avoid endless looping when we can't reach the +	 * arp_ip_target and fool ourselves with our own arp requests.  	 */ -	if (slave->state == BOND_STATE_ACTIVE) + +	if (bond_is_active_slave(slave))  		bond_validate_arp(bond, slave, sip, tip); -	else +	else if (curr_active_slave && +		 time_after(slave_last_rx(bond, curr_active_slave), +			    curr_active_slave->last_link_up))  		bond_validate_arp(bond, slave, tip, sip);  out_unlock: -	read_unlock(&bond->lock); -out: -	dev_kfree_skb(skb); -	return NET_RX_SUCCESS; +	if (arp != (struct arphdr *)skb->data) +		kfree(arp); +	return RX_HANDLER_ANOTHER; +} + +/* function to verify if we're in the arp_interval timeslice, returns true if + * (last_act - arp_interval) <= jiffies <= (last_act + mod * arp_interval + + * arp_interval/2) . the arp_interval/2 is needed for really fast networks. + */ +static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act, +				  int mod) +{ +	int delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); + +	return time_in_range(jiffies, +			     last_act - delta_in_ticks, +			     last_act + mod * delta_in_ticks + delta_in_ticks/2);  }  /* @@ -2805,50 +2416,37 @@ out:   * arp is transmitted to generate traffic. see activebackup_arp_monitor for   * arp monitoring in active backup mode.   */ -void bond_loadbalance_arp_mon(struct work_struct *work) +static void bond_loadbalance_arp_mon(struct work_struct *work)  {  	struct bonding *bond = container_of(work, struct bonding,  					    arp_work.work);  	struct slave *slave, *oldcurrent; -	int do_failover = 0; -	int delta_in_ticks; -	int i; - -	read_lock(&bond->lock); - -	delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); - -	if (bond->kill_timers) -		goto out; +	struct list_head *iter; +	int do_failover = 0, slave_state_changed = 0; -	if (bond->slave_cnt == 0) +	if (!bond_has_slaves(bond))  		goto re_arm; -	read_lock(&bond->curr_slave_lock); -	oldcurrent = bond->curr_active_slave; -	read_unlock(&bond->curr_slave_lock); +	rcu_read_lock(); +	oldcurrent = ACCESS_ONCE(bond->curr_active_slave);  	/* see if any of the previous devices are up now (i.e. they have  	 * xmt and rcv traffic). the curr_active_slave does not come into -	 * the picture unless it is null. also, slave->jiffies is not needed -	 * here because we send an arp on each slave and give a slave as -	 * long as it needs to get the tx/rx within the delta. +	 * the picture unless it is null. also, slave->last_link_up is not +	 * needed here because we send an arp on each slave and give a slave +	 * as long as it needs to get the tx/rx within the delta.  	 * TODO: what about up/down delay in arp mode? it wasn't here before  	 *       so it can wait  	 */ -	bond_for_each_slave(bond, slave, i) { +	bond_for_each_slave_rcu(bond, slave, iter) {  		unsigned long trans_start = dev_trans_start(slave->dev);  		if (slave->link != BOND_LINK_UP) { -			if (time_in_range(jiffies, -				trans_start - delta_in_ticks, -				trans_start + delta_in_ticks) && -			    time_in_range(jiffies, -				slave->dev->last_rx - delta_in_ticks, -				slave->dev->last_rx + delta_in_ticks)) { +			if (bond_time_in_interval(bond, trans_start, 1) && +			    bond_time_in_interval(bond, slave->last_rx, 1)) {  				slave->link  = BOND_LINK_UP; -				slave->state = BOND_STATE_ACTIVE; +				slave_state_changed = 1;  				/* primary_slave has no meaning in round-robin  				 * mode. the window of a slave being up and @@ -2856,7 +2454,7 @@ void bond_loadbalance_arp_mon(struct work_struct *work)  				 * is closed.  				 */  				if (!oldcurrent) { -					pr_info("%s: link status definitely up for interface %s, ", +					pr_info("%s: link status definitely up for interface %s\n",  						bond->dev->name,  						slave->dev->name);  					do_failover = 1; @@ -2873,22 +2471,17 @@ void bond_loadbalance_arp_mon(struct work_struct *work)  			 * when the source ip is 0, so don't take the link down  			 * if we don't know our ip yet  			 */ -			if (!time_in_range(jiffies, -				trans_start - delta_in_ticks, -				trans_start + 2 * delta_in_ticks) || -			    !time_in_range(jiffies, -				slave->dev->last_rx - delta_in_ticks, -				slave->dev->last_rx + 2 * delta_in_ticks)) { +			if (!bond_time_in_interval(bond, trans_start, 2) || +			    !bond_time_in_interval(bond, slave->last_rx, 2)) {  				slave->link  = BOND_LINK_DOWN; -				slave->state = BOND_STATE_BACKUP; +				slave_state_changed = 1;  				if (slave->link_failure_count < UINT_MAX)  					slave->link_failure_count++; -				pr_info("%s: interface %s is now down.\n", -					bond->dev->name, -					slave->dev->name); +				pr_info("%s: interface %s is now down\n", +					bond->dev->name, slave->dev->name);  				if (slave == oldcurrent)  					do_failover = 1; @@ -2902,25 +2495,37 @@ void bond_loadbalance_arp_mon(struct work_struct *work)  		 * do - all replies will be rx'ed on same link causing slaves  		 * to be unstable during low/no traffic periods  		 */ -		if (IS_UP(slave->dev)) +		if (bond_slave_is_up(slave))  			bond_arp_send_all(bond, slave);  	} -	if (do_failover) { -		block_netpoll_tx(); -		write_lock_bh(&bond->curr_slave_lock); +	rcu_read_unlock(); -		bond_select_active_slave(bond); +	if (do_failover || slave_state_changed) { +		if (!rtnl_trylock()) +			goto re_arm; -		write_unlock_bh(&bond->curr_slave_lock); -		unblock_netpoll_tx(); +		if (slave_state_changed) { +			bond_slave_state_change(bond); +		} else if (do_failover) { +			/* the bond_select_active_slave must hold RTNL +			 * and curr_slave_lock for write. +			 */ +			block_netpoll_tx(); +			write_lock_bh(&bond->curr_slave_lock); + +			bond_select_active_slave(bond); + +			write_unlock_bh(&bond->curr_slave_lock); +			unblock_netpoll_tx(); +		} +		rtnl_unlock();  	}  re_arm:  	if (bond->params.arp_interval) -		queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); -out: -	read_unlock(&bond->lock); +		queue_delayed_work(bond->wq, &bond->arp_work, +				   msecs_to_jiffies(bond->params.arp_interval));  }  /* @@ -2929,26 +2534,24 @@ out:   * place for the slave.  Returns 0 if no changes are found, >0 if changes   * to link states must be committed.   * - * Called with bond->lock held for read. + * Called with rcu_read_lock hold.   */ -static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks) +static int bond_ab_arp_inspect(struct bonding *bond)  { +	unsigned long trans_start, last_rx; +	struct list_head *iter;  	struct slave *slave; -	int i, commit = 0; -	unsigned long trans_start; +	int commit = 0; -	bond_for_each_slave(bond, slave, i) { +	bond_for_each_slave_rcu(bond, slave, iter) {  		slave->new_link = BOND_LINK_NOCHANGE; +		last_rx = slave_last_rx(bond, slave);  		if (slave->link != BOND_LINK_UP) { -			if (time_in_range(jiffies, -				slave_last_rx(bond, slave) - delta_in_ticks, -				slave_last_rx(bond, slave) + delta_in_ticks)) { - +			if (bond_time_in_interval(bond, last_rx, 1)) {  				slave->new_link = BOND_LINK_UP;  				commit++;  			} -  			continue;  		} @@ -2957,9 +2560,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)  		 * active.  This avoids bouncing, as the last receive  		 * times need a full ARP monitor cycle to be updated.  		 */ -		if (time_in_range(jiffies, -				  slave->jiffies - delta_in_ticks, -				  slave->jiffies + 2 * delta_in_ticks)) +		if (bond_time_in_interval(bond, slave->last_link_up, 2))  			continue;  		/* @@ -2975,12 +2576,9 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)  		 * gives each slave a chance to tx/rx traffic  		 * before being taken out  		 */ -		if (slave->state == BOND_STATE_BACKUP && +		if (!bond_is_active_slave(slave) &&  		    !bond->current_arp_slave && -		    !time_in_range(jiffies, -			slave_last_rx(bond, slave) - delta_in_ticks, -			slave_last_rx(bond, slave) + 3 * delta_in_ticks)) { - +		    !bond_time_in_interval(bond, last_rx, 3)) {  			slave->new_link = BOND_LINK_DOWN;  			commit++;  		} @@ -2992,14 +2590,9 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)  		 *    the bond has an IP address)  		 */  		trans_start = dev_trans_start(slave->dev); -		if ((slave->state == BOND_STATE_ACTIVE) && -		    (!time_in_range(jiffies, -			trans_start - delta_in_ticks, -			trans_start + 2 * delta_in_ticks) || -		     !time_in_range(jiffies, -			slave_last_rx(bond, slave) - delta_in_ticks, -			slave_last_rx(bond, slave) + 2 * delta_in_ticks))) { - +		if (bond_is_active_slave(slave) && +		    (!bond_time_in_interval(bond, trans_start, 2) || +		     !bond_time_in_interval(bond, last_rx, 2))) {  			slave->new_link = BOND_LINK_DOWN;  			commit++;  		} @@ -3012,30 +2605,33 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)   * Called to commit link state changes noted by inspection step of   * active-backup mode ARP monitor.   * - * Called with RTNL and bond->lock for read. + * Called with RTNL hold.   */ -static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks) +static void bond_ab_arp_commit(struct bonding *bond)  { -	struct slave *slave; -	int i;  	unsigned long trans_start; +	struct list_head *iter; +	struct slave *slave; -	bond_for_each_slave(bond, slave, i) { +	bond_for_each_slave(bond, slave, iter) {  		switch (slave->new_link) {  		case BOND_LINK_NOCHANGE:  			continue;  		case BOND_LINK_UP:  			trans_start = dev_trans_start(slave->dev); -			if ((!bond->curr_active_slave && -			     time_in_range(jiffies, -					   trans_start - delta_in_ticks, -					   trans_start + delta_in_ticks)) || -			    bond->curr_active_slave != slave) { +			if (bond->curr_active_slave != slave || +			    (!bond->curr_active_slave && +			     bond_time_in_interval(bond, trans_start, 1))) {  				slave->link = BOND_LINK_UP; -				bond->current_arp_slave = NULL; +				if (bond->current_arp_slave) { +					bond_set_slave_inactive_flags( +						bond->current_arp_slave, +						BOND_SLAVE_NOTIFY_NOW); +					bond->current_arp_slave = NULL; +				} -				pr_info("%s: link status definitely up for interface %s.\n", +				pr_info("%s: link status definitely up for interface %s\n",  					bond->dev->name, slave->dev->name);  				if (!bond->curr_active_slave || @@ -3051,7 +2647,8 @@ static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks)  				slave->link_failure_count++;  			slave->link = BOND_LINK_DOWN; -			bond_set_slave_inactive_flags(slave); +			bond_set_slave_inactive_flags(slave, +						      BOND_SLAVE_NOTIFY_NOW);  			pr_info("%s: link status definitely down for interface %s, disabling it\n",  				bond->dev->name, slave->dev->name); @@ -3085,52 +2682,46 @@ do_failover:  /*   * Send ARP probes for active-backup mode ARP monitor.   * - * Called with bond->lock held for read. + * Called with rcu_read_lock hold.   */ -static void bond_ab_arp_probe(struct bonding *bond) +static bool bond_ab_arp_probe(struct bonding *bond)  { -	struct slave *slave; -	int i; - -	read_lock(&bond->curr_slave_lock); - -	if (bond->current_arp_slave && bond->curr_active_slave) +	struct slave *slave, *before = NULL, *new_slave = NULL, +		     *curr_arp_slave = rcu_dereference(bond->current_arp_slave), +		     *curr_active_slave = rcu_dereference(bond->curr_active_slave); +	struct list_head *iter; +	bool found = false; +	bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER; + +	if (curr_arp_slave && curr_active_slave)  		pr_info("PROBE: c_arp %s && cas %s BAD\n", -			bond->current_arp_slave->dev->name, -			bond->curr_active_slave->dev->name); +			curr_arp_slave->dev->name, +			curr_active_slave->dev->name); -	if (bond->curr_active_slave) { -		bond_arp_send_all(bond, bond->curr_active_slave); -		read_unlock(&bond->curr_slave_lock); -		return; +	if (curr_active_slave) { +		bond_arp_send_all(bond, curr_active_slave); +		return should_notify_rtnl;  	} -	read_unlock(&bond->curr_slave_lock); -  	/* if we don't have a curr_active_slave, search for the next available  	 * backup slave from the current_arp_slave and make it the candidate  	 * for becoming the curr_active_slave  	 */ -	if (!bond->current_arp_slave) { -		bond->current_arp_slave = bond->first_slave; -		if (!bond->current_arp_slave) -			return; +	if (!curr_arp_slave) { +		curr_arp_slave = bond_first_slave_rcu(bond); +		if (!curr_arp_slave) +			return should_notify_rtnl;  	} -	bond_set_slave_inactive_flags(bond->current_arp_slave); +	bond_set_slave_inactive_flags(curr_arp_slave, BOND_SLAVE_NOTIFY_LATER); -	/* search for next candidate */ -	bond_for_each_slave_from(bond, slave, i, bond->current_arp_slave->next) { -		if (IS_UP(slave->dev)) { -			slave->link = BOND_LINK_BACK; -			bond_set_slave_active_flags(slave); -			bond_arp_send_all(bond, slave); -			slave->jiffies = jiffies; -			bond->current_arp_slave = slave; -			break; -		} +	bond_for_each_slave_rcu(bond, slave, iter) { +		if (!found && !before && bond_slave_is_up(slave)) +			before = slave; +		if (found && !new_slave && bond_slave_is_up(slave)) +			new_slave = slave;  		/* if the link state is up at this point, we  		 * mark it down - this can happen if we have  		 * simultaneous link failures and @@ -3138,361 +2729,97 @@ static void bond_ab_arp_probe(struct bonding *bond)  		 * one the current slave so it is still marked  		 * up when it is actually down  		 */ -		if (slave->link == BOND_LINK_UP) { +		if (!bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) {  			slave->link = BOND_LINK_DOWN;  			if (slave->link_failure_count < UINT_MAX)  				slave->link_failure_count++; -			bond_set_slave_inactive_flags(slave); +			bond_set_slave_inactive_flags(slave, +						      BOND_SLAVE_NOTIFY_LATER); -			pr_info("%s: backup interface %s is now down.\n", +			pr_info("%s: backup interface %s is now down\n",  				bond->dev->name, slave->dev->name);  		} +		if (slave == curr_arp_slave) +			found = true;  	} + +	if (!new_slave && before) +		new_slave = before; + +	if (!new_slave) +		goto check_state; + +	new_slave->link = BOND_LINK_BACK; +	bond_set_slave_active_flags(new_slave, BOND_SLAVE_NOTIFY_LATER); +	bond_arp_send_all(bond, new_slave); +	new_slave->last_link_up = jiffies; +	rcu_assign_pointer(bond->current_arp_slave, new_slave); + +check_state: +	bond_for_each_slave_rcu(bond, slave, iter) { +		if (slave->should_notify) { +			should_notify_rtnl = BOND_SLAVE_NOTIFY_NOW; +			break; +		} +	} +	return should_notify_rtnl;  } -void bond_activebackup_arp_mon(struct work_struct *work) +static void bond_activebackup_arp_mon(struct work_struct *work)  {  	struct bonding *bond = container_of(work, struct bonding,  					    arp_work.work); +	bool should_notify_peers = false; +	bool should_notify_rtnl = false;  	int delta_in_ticks; -	read_lock(&bond->lock); - -	if (bond->kill_timers) -		goto out; -  	delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); -	if (bond->slave_cnt == 0) +	if (!bond_has_slaves(bond))  		goto re_arm; -	if (bond->send_grat_arp) { -		read_lock(&bond->curr_slave_lock); -		bond_send_gratuitous_arp(bond); -		read_unlock(&bond->curr_slave_lock); -	} +	rcu_read_lock(); -	if (bond->send_unsol_na) { -		read_lock(&bond->curr_slave_lock); -		bond_send_unsolicited_na(bond); -		read_unlock(&bond->curr_slave_lock); -	} +	should_notify_peers = bond_should_notify_peers(bond); -	if (bond_ab_arp_inspect(bond, delta_in_ticks)) { -		read_unlock(&bond->lock); -		rtnl_lock(); -		read_lock(&bond->lock); +	if (bond_ab_arp_inspect(bond)) { +		rcu_read_unlock(); -		bond_ab_arp_commit(bond, delta_in_ticks); +		/* Race avoidance with bond_close flush of workqueue */ +		if (!rtnl_trylock()) { +			delta_in_ticks = 1; +			should_notify_peers = false; +			goto re_arm; +		} + +		bond_ab_arp_commit(bond); -		read_unlock(&bond->lock);  		rtnl_unlock(); -		read_lock(&bond->lock); +		rcu_read_lock();  	} -	bond_ab_arp_probe(bond); +	should_notify_rtnl = bond_ab_arp_probe(bond); +	rcu_read_unlock();  re_arm:  	if (bond->params.arp_interval)  		queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); -out: -	read_unlock(&bond->lock); -} - -/*------------------------------ proc/seq_file-------------------------------*/ - -#ifdef CONFIG_PROC_FS - -static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos) -	__acquires(RCU) -	__acquires(&bond->lock) -{ -	struct bonding *bond = seq->private; -	loff_t off = 0; -	struct slave *slave; -	int i; - -	/* make sure the bond won't be taken away */ -	rcu_read_lock(); -	read_lock(&bond->lock); - -	if (*pos == 0) -		return SEQ_START_TOKEN; - -	bond_for_each_slave(bond, slave, i) { -		if (++off == *pos) -			return slave; -	} - -	return NULL; -} - -static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ -	struct bonding *bond = seq->private; -	struct slave *slave = v; - -	++*pos; -	if (v == SEQ_START_TOKEN) -		return bond->first_slave; - -	slave = slave->next; - -	return (slave == bond->first_slave) ? NULL : slave; -} - -static void bond_info_seq_stop(struct seq_file *seq, void *v) -	__releases(&bond->lock) -	__releases(RCU) -{ -	struct bonding *bond = seq->private; - -	read_unlock(&bond->lock); -	rcu_read_unlock(); -} - -static void bond_info_show_master(struct seq_file *seq) -{ -	struct bonding *bond = seq->private; -	struct slave *curr; -	int i; -	read_lock(&bond->curr_slave_lock); -	curr = bond->curr_active_slave; -	read_unlock(&bond->curr_slave_lock); - -	seq_printf(seq, "Bonding Mode: %s", -		   bond_mode_name(bond->params.mode)); - -	if (bond->params.mode == BOND_MODE_ACTIVEBACKUP && -	    bond->params.fail_over_mac) -		seq_printf(seq, " (fail_over_mac %s)", -		   fail_over_mac_tbl[bond->params.fail_over_mac].modename); - -	seq_printf(seq, "\n"); - -	if (bond->params.mode == BOND_MODE_XOR || -		bond->params.mode == BOND_MODE_8023AD) { -		seq_printf(seq, "Transmit Hash Policy: %s (%d)\n", -			xmit_hashtype_tbl[bond->params.xmit_policy].modename, -			bond->params.xmit_policy); -	} - -	if (USES_PRIMARY(bond->params.mode)) { -		seq_printf(seq, "Primary Slave: %s", -			   (bond->primary_slave) ? -			   bond->primary_slave->dev->name : "None"); -		if (bond->primary_slave) -			seq_printf(seq, " (primary_reselect %s)", -		   pri_reselect_tbl[bond->params.primary_reselect].modename); - -		seq_printf(seq, "\nCurrently Active Slave: %s\n", -			   (curr) ? curr->dev->name : "None"); -	} - -	seq_printf(seq, "MII Status: %s\n", netif_carrier_ok(bond->dev) ? -		   "up" : "down"); -	seq_printf(seq, "MII Polling Interval (ms): %d\n", bond->params.miimon); -	seq_printf(seq, "Up Delay (ms): %d\n", -		   bond->params.updelay * bond->params.miimon); -	seq_printf(seq, "Down Delay (ms): %d\n", -		   bond->params.downdelay * bond->params.miimon); - - -	/* ARP information */ -	if (bond->params.arp_interval > 0) { -		int printed = 0; -		seq_printf(seq, "ARP Polling Interval (ms): %d\n", -				bond->params.arp_interval); - -		seq_printf(seq, "ARP IP target/s (n.n.n.n form):"); - -		for (i = 0; (i < BOND_MAX_ARP_TARGETS); i++) { -			if (!bond->params.arp_targets[i]) -				break; -			if (printed) -				seq_printf(seq, ","); -			seq_printf(seq, " %pI4", &bond->params.arp_targets[i]); -			printed = 1; -		} -		seq_printf(seq, "\n"); -	} - -	if (bond->params.mode == BOND_MODE_8023AD) { -		struct ad_info ad_info; - -		seq_puts(seq, "\n802.3ad info\n"); -		seq_printf(seq, "LACP rate: %s\n", -			   (bond->params.lacp_fast) ? "fast" : "slow"); -		seq_printf(seq, "Aggregator selection policy (ad_select): %s\n", -			   ad_select_tbl[bond->params.ad_select].modename); - -		if (bond_3ad_get_active_agg_info(bond, &ad_info)) { -			seq_printf(seq, "bond %s has no active aggregator\n", -				   bond->dev->name); -		} else { -			seq_printf(seq, "Active Aggregator Info:\n"); - -			seq_printf(seq, "\tAggregator ID: %d\n", -				   ad_info.aggregator_id); -			seq_printf(seq, "\tNumber of ports: %d\n", -				   ad_info.ports); -			seq_printf(seq, "\tActor Key: %d\n", -				   ad_info.actor_key); -			seq_printf(seq, "\tPartner Key: %d\n", -				   ad_info.partner_key); -			seq_printf(seq, "\tPartner Mac Address: %pM\n", -				   ad_info.partner_system); -		} -	} -} - -static void bond_info_show_slave(struct seq_file *seq, -				 const struct slave *slave) -{ -	struct bonding *bond = seq->private; - -	seq_printf(seq, "\nSlave Interface: %s\n", slave->dev->name); -	seq_printf(seq, "MII Status: %s\n", -		   (slave->link == BOND_LINK_UP) ?  "up" : "down"); -	seq_printf(seq, "Speed: %d Mbps\n", slave->speed); -	seq_printf(seq, "Duplex: %s\n", slave->duplex ? "full" : "half"); -	seq_printf(seq, "Link Failure Count: %u\n", -		   slave->link_failure_count); - -	seq_printf(seq, "Permanent HW addr: %pM\n", slave->perm_hwaddr); - -	if (bond->params.mode == BOND_MODE_8023AD) { -		const struct aggregator *agg -			= SLAVE_AD_INFO(slave).port.aggregator; - -		if (agg) -			seq_printf(seq, "Aggregator ID: %d\n", -				   agg->aggregator_identifier); -		else -			seq_puts(seq, "Aggregator ID: N/A\n"); -	} -	seq_printf(seq, "Slave queue ID: %d\n", slave->queue_id); -} - -static int bond_info_seq_show(struct seq_file *seq, void *v) -{ -	if (v == SEQ_START_TOKEN) { -		seq_printf(seq, "%s\n", version); -		bond_info_show_master(seq); -	} else -		bond_info_show_slave(seq, v); - -	return 0; -} - -static const struct seq_operations bond_info_seq_ops = { -	.start = bond_info_seq_start, -	.next  = bond_info_seq_next, -	.stop  = bond_info_seq_stop, -	.show  = bond_info_seq_show, -}; - -static int bond_info_open(struct inode *inode, struct file *file) -{ -	struct seq_file *seq; -	struct proc_dir_entry *proc; -	int res; - -	res = seq_open(file, &bond_info_seq_ops); -	if (!res) { -		/* recover the pointer buried in proc_dir_entry data */ -		seq = file->private_data; -		proc = PDE(inode); -		seq->private = proc->data; -	} - -	return res; -} - -static const struct file_operations bond_info_fops = { -	.owner   = THIS_MODULE, -	.open    = bond_info_open, -	.read    = seq_read, -	.llseek  = seq_lseek, -	.release = seq_release, -}; - -static void bond_create_proc_entry(struct bonding *bond) -{ -	struct net_device *bond_dev = bond->dev; -	struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id); - -	if (bn->proc_dir) { -		bond->proc_entry = proc_create_data(bond_dev->name, -						    S_IRUGO, bn->proc_dir, -						    &bond_info_fops, bond); -		if (bond->proc_entry == NULL) -			pr_warning("Warning: Cannot create /proc/net/%s/%s\n", -				   DRV_NAME, bond_dev->name); -		else -			memcpy(bond->proc_file_name, bond_dev->name, IFNAMSIZ); -	} -} +	if (should_notify_peers || should_notify_rtnl) { +		if (!rtnl_trylock()) +			return; -static void bond_remove_proc_entry(struct bonding *bond) -{ -	struct net_device *bond_dev = bond->dev; -	struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id); +		if (should_notify_peers) +			call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, +						 bond->dev); +		if (should_notify_rtnl) +			bond_slave_state_notify(bond); -	if (bn->proc_dir && bond->proc_entry) { -		remove_proc_entry(bond->proc_file_name, bn->proc_dir); -		memset(bond->proc_file_name, 0, IFNAMSIZ); -		bond->proc_entry = NULL; -	} -} - -/* Create the bonding directory under /proc/net, if doesn't exist yet. - * Caller must hold rtnl_lock. - */ -static void __net_init bond_create_proc_dir(struct bond_net *bn) -{ -	if (!bn->proc_dir) { -		bn->proc_dir = proc_mkdir(DRV_NAME, bn->net->proc_net); -		if (!bn->proc_dir) -			pr_warning("Warning: cannot create /proc/net/%s\n", -				   DRV_NAME); -	} -} - -/* Destroy the bonding directory under /proc/net, if empty. - * Caller must hold rtnl_lock. - */ -static void __net_exit bond_destroy_proc_dir(struct bond_net *bn) -{ -	if (bn->proc_dir) { -		remove_proc_entry(DRV_NAME, bn->net->proc_net); -		bn->proc_dir = NULL; +		rtnl_unlock();  	}  } -#else /* !CONFIG_PROC_FS */ - -static void bond_create_proc_entry(struct bonding *bond) -{ -} - -static void bond_remove_proc_entry(struct bonding *bond) -{ -} - -static inline void bond_create_proc_dir(struct bond_net *bn) -{ -} - -static inline void bond_destroy_proc_dir(struct bond_net *bn) -{ -} - -#endif /* CONFIG_PROC_FS */ - -  /*-------------------------- netdev event handling --------------------------*/  /* @@ -3503,6 +2830,8 @@ static int bond_event_changename(struct bonding *bond)  	bond_remove_proc_entry(bond);  	bond_create_proc_entry(bond); +	bond_debug_reregister(bond); +  	return NOTIFY_DONE;  } @@ -3514,6 +2843,16 @@ static int bond_master_netdev_event(unsigned long event,  	switch (event) {  	case NETDEV_CHANGENAME:  		return bond_event_changename(event_bond); +	case NETDEV_UNREGISTER: +		bond_remove_proc_entry(event_bond); +		break; +	case NETDEV_REGISTER: +		bond_create_proc_entry(event_bond); +		break; +	case NETDEV_NOTIFY_PEERS: +		if (event_bond->send_peer_notif) +			event_bond->send_peer_notif--; +		break;  	default:  		break;  	} @@ -3524,39 +2863,41 @@ static int bond_master_netdev_event(unsigned long event,  static int bond_slave_netdev_event(unsigned long event,  				   struct net_device *slave_dev)  { -	struct net_device *bond_dev = slave_dev->master; -	struct bonding *bond = netdev_priv(bond_dev); +	struct slave *slave = bond_slave_get_rtnl(slave_dev); +	struct bonding *bond; +	struct net_device *bond_dev; +	u32 old_speed; +	u8 old_duplex; + +	/* A netdev event can be generated while enslaving a device +	 * before netdev_rx_handler_register is called in which case +	 * slave will be NULL +	 */ +	if (!slave) +		return NOTIFY_DONE; +	bond_dev = slave->bond->dev; +	bond = slave->bond;  	switch (event) {  	case NETDEV_UNREGISTER: -		if (bond_dev) { -			if (bond->setup_by_slave) -				bond_release_and_destroy(bond_dev, slave_dev); -			else -				bond_release(bond_dev, slave_dev); -		} +		if (bond_dev->type != ARPHRD_ETHER) +			bond_release_and_destroy(bond_dev, slave_dev); +		else +			bond_release(bond_dev, slave_dev);  		break; +	case NETDEV_UP:  	case NETDEV_CHANGE: -		if (bond->params.mode == BOND_MODE_8023AD || bond_is_lb(bond)) { -			struct slave *slave; - -			slave = bond_get_slave_by_dev(bond, slave_dev); -			if (slave) { -				u16 old_speed = slave->speed; -				u16 old_duplex = slave->duplex; - -				bond_update_speed_duplex(slave); +		old_speed = slave->speed; +		old_duplex = slave->duplex; -				if (bond_is_lb(bond)) -					break; +		bond_update_speed_duplex(slave); -				if (old_speed != slave->speed) -					bond_3ad_adapter_speed_changed(slave); -				if (old_duplex != slave->duplex) -					bond_3ad_adapter_duplex_changed(slave); -			} +		if (BOND_MODE(bond) == BOND_MODE_8023AD) { +			if (old_speed != slave->speed) +				bond_3ad_adapter_speed_changed(slave); +			if (old_duplex != slave->duplex) +				bond_3ad_adapter_duplex_changed(slave);  		} -  		break;  	case NETDEV_DOWN:  		/* @@ -3578,13 +2919,38 @@ static int bond_slave_netdev_event(unsigned long event,  		 */  		break;  	case NETDEV_CHANGENAME: -		/* -		 * TODO: handle changing the primary's name -		 */ +		/* we don't care if we don't have primary set */ +		if (!bond_uses_primary(bond) || +		    !bond->params.primary[0]) +			break; + +		if (slave == bond->primary_slave) { +			/* slave's name changed - he's no longer primary */ +			bond->primary_slave = NULL; +		} else if (!strcmp(slave_dev->name, bond->params.primary)) { +			/* we have a new primary slave */ +			bond->primary_slave = slave; +		} else { /* we didn't change primary - exit */ +			break; +		} + +		pr_info("%s: Primary slave changed to %s, reselecting active slave\n", +			bond->dev->name, +			bond->primary_slave ? slave_dev->name : "none"); + +		block_netpoll_tx(); +		write_lock_bh(&bond->curr_slave_lock); +		bond_select_active_slave(bond); +		write_unlock_bh(&bond->curr_slave_lock); +		unblock_netpoll_tx();  		break;  	case NETDEV_FEAT_CHANGE:  		bond_compute_features(bond);  		break; +	case NETDEV_RESEND_IGMP: +		/* Propagate to master device */ +		call_netdevice_notifiers(event, slave->bond->dev); +		break;  	default:  		break;  	} @@ -3603,11 +2969,10 @@ static int bond_slave_netdev_event(unsigned long event,  static int bond_netdev_event(struct notifier_block *this,  			     unsigned long event, void *ptr)  { -	struct net_device *event_dev = (struct net_device *)ptr; +	struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);  	pr_debug("event_dev: %s, event: %lx\n", -		 event_dev ? event_dev->name : "None", -		 event); +		 event_dev ? event_dev->name : "None", event);  	if (!(event_dev->priv_flags & IFF_BONDING))  		return NOTIFY_DONE; @@ -3625,209 +2990,163 @@ static int bond_netdev_event(struct notifier_block *this,  	return NOTIFY_DONE;  } -/* - * bond_inetaddr_event: handle inetaddr notifier chain events. - * - * We keep track of device IPs primarily to use as source addresses in - * ARP monitor probes (rather than spewing out broadcasts all the time). - * - * We track one IP for the main device (if it has one), plus one per VLAN. - */ -static int bond_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) -{ -	struct in_ifaddr *ifa = ptr; -	struct net_device *vlan_dev, *event_dev = ifa->ifa_dev->dev; -	struct bond_net *bn = net_generic(dev_net(event_dev), bond_net_id); -	struct bonding *bond; -	struct vlan_entry *vlan; - -	list_for_each_entry(bond, &bn->dev_list, bond_list) { -		if (bond->dev == event_dev) { -			switch (event) { -			case NETDEV_UP: -				bond->master_ip = ifa->ifa_local; -				return NOTIFY_OK; -			case NETDEV_DOWN: -				bond->master_ip = bond_glean_dev_ip(bond->dev); -				return NOTIFY_OK; -			default: -				return NOTIFY_DONE; -			} -		} - -		list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { -			if (!bond->vlgrp) -				continue; -			vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); -			if (vlan_dev == event_dev) { -				switch (event) { -				case NETDEV_UP: -					vlan->vlan_ip = ifa->ifa_local; -					return NOTIFY_OK; -				case NETDEV_DOWN: -					vlan->vlan_ip = -						bond_glean_dev_ip(vlan_dev); -					return NOTIFY_OK; -				default: -					return NOTIFY_DONE; -				} -			} -		} -	} -	return NOTIFY_DONE; -} -  static struct notifier_block bond_netdev_notifier = {  	.notifier_call = bond_netdev_event,  }; -static struct notifier_block bond_inetaddr_notifier = { -	.notifier_call = bond_inetaddr_event, -}; - -/*-------------------------- Packet type handling ---------------------------*/ - -/* register to receive lacpdus on a bond */ -static void bond_register_lacpdu(struct bonding *bond) -{ -	struct packet_type *pk_type = &(BOND_AD_INFO(bond).ad_pkt_type); - -	/* initialize packet type */ -	pk_type->type = PKT_TYPE_LACPDU; -	pk_type->dev = bond->dev; -	pk_type->func = bond_3ad_lacpdu_recv; - -	dev_add_pack(pk_type); -} - -/* unregister to receive lacpdus on a bond */ -static void bond_unregister_lacpdu(struct bonding *bond) -{ -	dev_remove_pack(&(BOND_AD_INFO(bond).ad_pkt_type)); -} +/*---------------------------- Hashing Policies -----------------------------*/ -void bond_register_arp(struct bonding *bond) +/* L2 hash helper */ +static inline u32 bond_eth_hash(struct sk_buff *skb)  { -	struct packet_type *pt = &bond->arp_mon_pt; +	struct ethhdr *data = (struct ethhdr *)skb->data; -	if (pt->type) -		return; +	if (skb_headlen(skb) >= offsetof(struct ethhdr, h_proto)) +		return data->h_dest[5] ^ data->h_source[5]; -	pt->type = htons(ETH_P_ARP); -	pt->dev = bond->dev; -	pt->func = bond_arp_rcv; -	dev_add_pack(pt); +	return 0;  } -void bond_unregister_arp(struct bonding *bond) +/* Extract the appropriate headers based on bond's xmit policy */ +static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, +			      struct flow_keys *fk)  { -	struct packet_type *pt = &bond->arp_mon_pt; +	const struct ipv6hdr *iph6; +	const struct iphdr *iph; +	int noff, proto = -1; -	dev_remove_pack(pt); -	pt->type = 0; -} - -/*---------------------------- Hashing Policies -----------------------------*/ - -/* - * Hash for the output device based upon layer 2 and layer 3 data. If - * the packet is not IP mimic bond_xmit_hash_policy_l2() - */ -static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count) -{ -	struct ethhdr *data = (struct ethhdr *)skb->data; -	struct iphdr *iph = ip_hdr(skb); +	if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23) +		return skb_flow_dissect(skb, fk); +	fk->ports = 0; +	noff = skb_network_offset(skb);  	if (skb->protocol == htons(ETH_P_IP)) { -		return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^ -			(data->h_dest[5] ^ data->h_source[5])) % count; +		if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph)))) +			return false; +		iph = ip_hdr(skb); +		fk->src = iph->saddr; +		fk->dst = iph->daddr; +		noff += iph->ihl << 2; +		if (!ip_is_fragment(iph)) +			proto = iph->protocol; +	} else if (skb->protocol == htons(ETH_P_IPV6)) { +		if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph6)))) +			return false; +		iph6 = ipv6_hdr(skb); +		fk->src = (__force __be32)ipv6_addr_hash(&iph6->saddr); +		fk->dst = (__force __be32)ipv6_addr_hash(&iph6->daddr); +		noff += sizeof(*iph6); +		proto = iph6->nexthdr; +	} else { +		return false;  	} +	if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0) +		fk->ports = skb_flow_get_ports(skb, noff, proto); -	return (data->h_dest[5] ^ data->h_source[5]) % count; +	return true;  } -/* - * Hash for the output device based upon layer 3 and layer 4 data. If - * the packet is a frag or not TCP or UDP, just use layer 3 data.  If it is - * altogether not IP, mimic bond_xmit_hash_policy_l2() +/** + * bond_xmit_hash - generate a hash value based on the xmit policy + * @bond: bonding device + * @skb: buffer to use for headers + * + * This function will extract the necessary headers from the skb buffer and use + * them to generate a hash based on the xmit_policy set in the bonding device   */ -static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count) +u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)  { -	struct ethhdr *data = (struct ethhdr *)skb->data; -	struct iphdr *iph = ip_hdr(skb); -	__be16 *layer4hdr = (__be16 *)((u32 *)iph + iph->ihl); -	int layer4_xor = 0; +	struct flow_keys flow; +	u32 hash; -	if (skb->protocol == htons(ETH_P_IP)) { -		if (!(iph->frag_off & htons(IP_MF|IP_OFFSET)) && -		    (iph->protocol == IPPROTO_TCP || -		     iph->protocol == IPPROTO_UDP)) { -			layer4_xor = ntohs((*layer4hdr ^ *(layer4hdr + 1))); -		} -		return (layer4_xor ^ -			((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count; +	if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 || +	    !bond_flow_dissect(bond, skb, &flow)) +		return bond_eth_hash(skb); -	} +	if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 || +	    bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) +		hash = bond_eth_hash(skb); +	else +		hash = (__force u32)flow.ports; +	hash ^= (__force u32)flow.dst ^ (__force u32)flow.src; +	hash ^= (hash >> 16); +	hash ^= (hash >> 8); -	return (data->h_dest[5] ^ data->h_source[5]) % count; +	return hash;  } -/* - * Hash for the output device based upon layer 2 data - */ -static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count) -{ -	struct ethhdr *data = (struct ethhdr *)skb->data; +/*-------------------------- Device entry points ----------------------------*/ -	return (data->h_dest[5] ^ data->h_source[5]) % count; +static void bond_work_init_all(struct bonding *bond) +{ +	INIT_DELAYED_WORK(&bond->mcast_work, +			  bond_resend_igmp_join_requests_delayed); +	INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor); +	INIT_DELAYED_WORK(&bond->mii_work, bond_mii_monitor); +	if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) +		INIT_DELAYED_WORK(&bond->arp_work, bond_activebackup_arp_mon); +	else +		INIT_DELAYED_WORK(&bond->arp_work, bond_loadbalance_arp_mon); +	INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);  } -/*-------------------------- Device entry points ----------------------------*/ +static void bond_work_cancel_all(struct bonding *bond) +{ +	cancel_delayed_work_sync(&bond->mii_work); +	cancel_delayed_work_sync(&bond->arp_work); +	cancel_delayed_work_sync(&bond->alb_work); +	cancel_delayed_work_sync(&bond->ad_work); +	cancel_delayed_work_sync(&bond->mcast_work); +}  static int bond_open(struct net_device *bond_dev)  {  	struct bonding *bond = netdev_priv(bond_dev); +	struct list_head *iter; +	struct slave *slave; -	bond->kill_timers = 0; +	/* reset slave->backup and slave->inactive */ +	read_lock(&bond->lock); +	if (bond_has_slaves(bond)) { +		read_lock(&bond->curr_slave_lock); +		bond_for_each_slave(bond, slave, iter) { +			if (bond_uses_primary(bond) +				&& (slave != bond->curr_active_slave)) { +				bond_set_slave_inactive_flags(slave, +							      BOND_SLAVE_NOTIFY_NOW); +			} else { +				bond_set_slave_active_flags(slave, +							    BOND_SLAVE_NOTIFY_NOW); +			} +		} +		read_unlock(&bond->curr_slave_lock); +	} +	read_unlock(&bond->lock); -	INIT_DELAYED_WORK(&bond->mcast_work, bond_resend_igmp_join_requests_delayed); +	bond_work_init_all(bond);  	if (bond_is_lb(bond)) {  		/* bond_alb_initialize must be called before the timer  		 * is started.  		 */ -		if (bond_alb_initialize(bond, (bond->params.mode == BOND_MODE_ALB))) { -			/* something went wrong - fail the open operation */ +		if (bond_alb_initialize(bond, (BOND_MODE(bond) == BOND_MODE_ALB)))  			return -ENOMEM; -		} - -		INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor); -		queue_delayed_work(bond->wq, &bond->alb_work, 0); +		if (bond->params.tlb_dynamic_lb) +			queue_delayed_work(bond->wq, &bond->alb_work, 0);  	} -	if (bond->params.miimon) {  /* link check interval, in milliseconds. */ -		INIT_DELAYED_WORK(&bond->mii_work, bond_mii_monitor); +	if (bond->params.miimon)  /* link check interval, in milliseconds. */  		queue_delayed_work(bond->wq, &bond->mii_work, 0); -	}  	if (bond->params.arp_interval) {  /* arp interval, in milliseconds. */ -		if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) -			INIT_DELAYED_WORK(&bond->arp_work, -					  bond_activebackup_arp_mon); -		else -			INIT_DELAYED_WORK(&bond->arp_work, -					  bond_loadbalance_arp_mon); -  		queue_delayed_work(bond->wq, &bond->arp_work, 0); -		if (bond->params.arp_validate) -			bond_register_arp(bond); +		bond->recv_probe = bond_arp_rcv;  	} -	if (bond->params.mode == BOND_MODE_8023AD) { -		INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler); +	if (BOND_MODE(bond) == BOND_MODE_8023AD) {  		queue_delayed_work(bond->wq, &bond->ad_work, 0);  		/* register to receive LACPDUs */ -		bond_register_lacpdu(bond); +		bond->recv_probe = bond_3ad_lacpdu_recv;  		bond_3ad_initiate_agg_selection(bond, 1);  	} @@ -3838,53 +3157,11 @@ static int bond_close(struct net_device *bond_dev)  {  	struct bonding *bond = netdev_priv(bond_dev); -	if (bond->params.mode == BOND_MODE_8023AD) { -		/* Unregister the receive of LACPDUs */ -		bond_unregister_lacpdu(bond); -	} - -	if (bond->params.arp_validate) -		bond_unregister_arp(bond); - -	write_lock_bh(&bond->lock); - -	bond->send_grat_arp = 0; -	bond->send_unsol_na = 0; - -	/* signal timers not to re-arm */ -	bond->kill_timers = 1; - -	write_unlock_bh(&bond->lock); - -	if (bond->params.miimon) {  /* link check interval, in milliseconds. */ -		cancel_delayed_work(&bond->mii_work); -	} - -	if (bond->params.arp_interval) {  /* arp interval, in milliseconds. */ -		cancel_delayed_work(&bond->arp_work); -	} - -	switch (bond->params.mode) { -	case BOND_MODE_8023AD: -		cancel_delayed_work(&bond->ad_work); -		break; -	case BOND_MODE_TLB: -	case BOND_MODE_ALB: -		cancel_delayed_work(&bond->alb_work); -		break; -	default: -		break; -	} - -	if (delayed_work_pending(&bond->mcast_work)) -		cancel_delayed_work(&bond->mcast_work); - -	if (bond_is_lb(bond)) { -		/* Must be called only after all -		 * slaves have been released -		 */ +	bond_work_cancel_all(bond); +	bond->send_peer_notif = 0; +	if (bond_is_lb(bond))  		bond_alb_deinitialize(bond); -	} +	bond->recv_probe = NULL;  	return 0;  } @@ -3894,14 +3171,13 @@ static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,  {  	struct bonding *bond = netdev_priv(bond_dev);  	struct rtnl_link_stats64 temp; +	struct list_head *iter;  	struct slave *slave; -	int i;  	memset(stats, 0, sizeof(*stats));  	read_lock_bh(&bond->lock); - -	bond_for_each_slave(bond, slave, i) { +	bond_for_each_slave(bond, slave, iter) {  		const struct rtnl_link_stats64 *sstats =  			dev_get_stats(slave->dev, &temp); @@ -3931,7 +3207,6 @@ static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,  		stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors;  		stats->tx_window_errors += sstats->tx_window_errors;  	} -  	read_unlock_bh(&bond->lock);  	return stats; @@ -3939,12 +3214,15 @@ static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,  static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd)  { +	struct bonding *bond = netdev_priv(bond_dev);  	struct net_device *slave_dev = NULL;  	struct ifbond k_binfo;  	struct ifbond __user *u_binfo = NULL;  	struct ifslave k_sinfo;  	struct ifslave __user *u_sinfo = NULL;  	struct mii_ioctl_data *mii = NULL; +	struct bond_opt_value newval; +	struct net *net;  	int res = 0;  	pr_debug("bond_ioctl: master=%s, cmd=%d\n", bond_dev->name, cmd); @@ -3968,7 +3246,6 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd  		if (mii->reg_num == 1) { -			struct bonding *bond = netdev_priv(bond_dev);  			mii->val_out = 0;  			read_lock(&bond->lock);  			read_lock(&bond->curr_slave_lock); @@ -4011,130 +3288,132 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd  		break;  	} -	if (!capable(CAP_NET_ADMIN)) +	net = dev_net(bond_dev); + +	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))  		return -EPERM; -	slave_dev = dev_get_by_name(dev_net(bond_dev), ifr->ifr_slave); +	slave_dev = __dev_get_by_name(net, ifr->ifr_slave);  	pr_debug("slave_dev=%p:\n", slave_dev);  	if (!slave_dev) -		res = -ENODEV; -	else { -		pr_debug("slave_dev->name=%s:\n", slave_dev->name); -		switch (cmd) { -		case BOND_ENSLAVE_OLD: -		case SIOCBONDENSLAVE: -			res = bond_enslave(bond_dev, slave_dev); -			break; -		case BOND_RELEASE_OLD: -		case SIOCBONDRELEASE: -			res = bond_release(bond_dev, slave_dev); -			break; -		case BOND_SETHWADDR_OLD: -		case SIOCBONDSETHWADDR: -			res = bond_sethwaddr(bond_dev, slave_dev); -			break; -		case BOND_CHANGE_ACTIVE_OLD: -		case SIOCBONDCHANGEACTIVE: -			res = bond_ioctl_change_active(bond_dev, slave_dev); -			break; -		default: -			res = -EOPNOTSUPP; -		} +		return -ENODEV; -		dev_put(slave_dev); +	pr_debug("slave_dev->name=%s:\n", slave_dev->name); +	switch (cmd) { +	case BOND_ENSLAVE_OLD: +	case SIOCBONDENSLAVE: +		res = bond_enslave(bond_dev, slave_dev); +		break; +	case BOND_RELEASE_OLD: +	case SIOCBONDRELEASE: +		res = bond_release(bond_dev, slave_dev); +		break; +	case BOND_SETHWADDR_OLD: +	case SIOCBONDSETHWADDR: +		bond_set_dev_addr(bond_dev, slave_dev); +		res = 0; +		break; +	case BOND_CHANGE_ACTIVE_OLD: +	case SIOCBONDCHANGEACTIVE: +		bond_opt_initstr(&newval, slave_dev->name); +		res = __bond_opt_set(bond, BOND_OPT_ACTIVE_SLAVE, &newval); +		break; +	default: +		res = -EOPNOTSUPP;  	}  	return res;  } -static bool bond_addr_in_mc_list(unsigned char *addr, -				 struct netdev_hw_addr_list *list, -				 int addrlen) +static void bond_change_rx_flags(struct net_device *bond_dev, int change)  { -	struct netdev_hw_addr *ha; +	struct bonding *bond = netdev_priv(bond_dev); -	netdev_hw_addr_list_for_each(ha, list) -		if (!memcmp(ha->addr, addr, addrlen)) -			return true; +	if (change & IFF_PROMISC) +		bond_set_promiscuity(bond, +				     bond_dev->flags & IFF_PROMISC ? 1 : -1); -	return false; +	if (change & IFF_ALLMULTI) +		bond_set_allmulti(bond, +				  bond_dev->flags & IFF_ALLMULTI ? 1 : -1);  } -static void bond_set_multicast_list(struct net_device *bond_dev) +static void bond_set_rx_mode(struct net_device *bond_dev)  {  	struct bonding *bond = netdev_priv(bond_dev); -	struct netdev_hw_addr *ha; -	bool found; - -	/* -	 * Do promisc before checking multicast_mode -	 */ -	if ((bond_dev->flags & IFF_PROMISC) && !(bond->flags & IFF_PROMISC)) -		/* -		 * FIXME: Need to handle the error when one of the multi-slaves -		 * encounters error. -		 */ -		bond_set_promiscuity(bond, 1); - - -	if (!(bond_dev->flags & IFF_PROMISC) && (bond->flags & IFF_PROMISC)) -		bond_set_promiscuity(bond, -1); - - -	/* set allmulti flag to slaves */ -	if ((bond_dev->flags & IFF_ALLMULTI) && !(bond->flags & IFF_ALLMULTI)) -		/* -		 * FIXME: Need to handle the error when one of the multi-slaves -		 * encounters error. -		 */ -		bond_set_allmulti(bond, 1); - +	struct list_head *iter; +	struct slave *slave; -	if (!(bond_dev->flags & IFF_ALLMULTI) && (bond->flags & IFF_ALLMULTI)) -		bond_set_allmulti(bond, -1); +	rcu_read_lock(); +	if (bond_uses_primary(bond)) { +		slave = rcu_dereference(bond->curr_active_slave); +		if (slave) { +			dev_uc_sync(slave->dev, bond_dev); +			dev_mc_sync(slave->dev, bond_dev); +		} +	} else { +		bond_for_each_slave_rcu(bond, slave, iter) { +			dev_uc_sync_multiple(slave->dev, bond_dev); +			dev_mc_sync_multiple(slave->dev, bond_dev); +		} +	} +	rcu_read_unlock(); +} -	read_lock(&bond->lock); +static int bond_neigh_init(struct neighbour *n) +{ +	struct bonding *bond = netdev_priv(n->dev); +	const struct net_device_ops *slave_ops; +	struct neigh_parms parms; +	struct slave *slave; +	int ret; -	bond->flags = bond_dev->flags; +	slave = bond_first_slave(bond); +	if (!slave) +		return 0; +	slave_ops = slave->dev->netdev_ops; +	if (!slave_ops->ndo_neigh_setup) +		return 0; -	/* looking for addresses to add to slaves' mc list */ -	netdev_for_each_mc_addr(ha, bond_dev) { -		found = bond_addr_in_mc_list(ha->addr, &bond->mc_list, -					     bond_dev->addr_len); -		if (!found) -			bond_mc_add(bond, ha->addr); -	} +	parms.neigh_setup = NULL; +	parms.neigh_cleanup = NULL; +	ret = slave_ops->ndo_neigh_setup(slave->dev, &parms); +	if (ret) +		return ret; -	/* looking for addresses to delete from slaves' list */ -	netdev_hw_addr_list_for_each(ha, &bond->mc_list) { -		found = bond_addr_in_mc_list(ha->addr, &bond_dev->mc, -					     bond_dev->addr_len); -		if (!found) -			bond_mc_del(bond, ha->addr); -	} +	/* +	 * Assign slave's neigh_cleanup to neighbour in case cleanup is called +	 * after the last slave has been detached.  Assumes that all slaves +	 * utilize the same neigh_cleanup (true at this writing as only user +	 * is ipoib). +	 */ +	n->parms->neigh_cleanup = parms.neigh_cleanup; -	/* save master's multicast list */ -	__hw_addr_flush(&bond->mc_list); -	__hw_addr_add_multiple(&bond->mc_list, &bond_dev->mc, -			       bond_dev->addr_len, NETDEV_HW_ADDR_T_MULTICAST); +	if (!parms.neigh_setup) +		return 0; -	read_unlock(&bond->lock); +	return parms.neigh_setup(n);  } -static int bond_neigh_setup(struct net_device *dev, struct neigh_parms *parms) +/* + * The bonding ndo_neigh_setup is called at init time beofre any + * slave exists. So we must declare proxy setup function which will + * be used at run time to resolve the actual slave neigh param setup. + * + * It's also called by master devices (such as vlans) to setup their + * underlying devices. In that case - do nothing, we're already set up from + * our init. + */ +static int bond_neigh_setup(struct net_device *dev, +			    struct neigh_parms *parms)  { -	struct bonding *bond = netdev_priv(dev); -	struct slave *slave = bond->first_slave; +	/* modify only our neigh_parms */ +	if (parms->dev == dev) +		parms->neigh_setup = bond_neigh_init; -	if (slave) { -		const struct net_device_ops *slave_ops -			= slave->dev->netdev_ops; -		if (slave_ops->ndo_neigh_setup) -			return slave_ops->ndo_neigh_setup(slave->dev, parms); -	}  	return 0;  } @@ -4144,12 +3423,12 @@ static int bond_neigh_setup(struct net_device *dev, struct neigh_parms *parms)  static int bond_change_mtu(struct net_device *bond_dev, int new_mtu)  {  	struct bonding *bond = netdev_priv(bond_dev); -	struct slave *slave, *stop_at; +	struct slave *slave, *rollback_slave; +	struct list_head *iter;  	int res = 0; -	int i; -	pr_debug("bond=%p, name=%s, new_mtu=%d\n", bond, -		 (bond_dev ? bond_dev->name : "None"), new_mtu); +	pr_debug("bond=%p, name=%s, new_mtu=%d\n", +		 bond, bond_dev ? bond_dev->name : "None", new_mtu);  	/* Can't hold bond->lock with bh disabled here since  	 * some base drivers panic. On the other hand we can't @@ -4166,11 +3445,9 @@ static int bond_change_mtu(struct net_device *bond_dev, int new_mtu)  	 * call to the base driver.  	 */ -	bond_for_each_slave(bond, slave, i) { -		pr_debug("s %p s->p %p c_m %p\n", -			 slave, -			 slave->prev, -			 slave->dev->netdev_ops->ndo_change_mtu); +	bond_for_each_slave(bond, slave, iter) { +		pr_debug("s %p c_m %p\n", +			 slave, slave->dev->netdev_ops->ndo_change_mtu);  		res = dev_set_mtu(slave->dev, new_mtu); @@ -4194,14 +3471,16 @@ static int bond_change_mtu(struct net_device *bond_dev, int new_mtu)  unwind:  	/* unwind from head to the slave that failed */ -	stop_at = slave; -	bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) { +	bond_for_each_slave(bond, rollback_slave, iter) {  		int tmp_res; -		tmp_res = dev_set_mtu(slave->dev, bond_dev->mtu); +		if (rollback_slave == slave) +			break; + +		tmp_res = dev_set_mtu(rollback_slave->dev, bond_dev->mtu);  		if (tmp_res) {  			pr_debug("unwind err %d dev %s\n", -				 tmp_res, slave->dev->name); +				 tmp_res, rollback_slave->dev->name);  		}  	} @@ -4218,23 +3497,23 @@ unwind:  static int bond_set_mac_address(struct net_device *bond_dev, void *addr)  {  	struct bonding *bond = netdev_priv(bond_dev); +	struct slave *slave, *rollback_slave;  	struct sockaddr *sa = addr, tmp_sa; -	struct slave *slave, *stop_at; +	struct list_head *iter;  	int res = 0; -	int i; -	if (bond->params.mode == BOND_MODE_ALB) +	if (BOND_MODE(bond) == BOND_MODE_ALB)  		return bond_alb_set_mac_address(bond_dev, addr);  	pr_debug("bond=%p, name=%s\n",  		 bond, bond_dev ? bond_dev->name : "None"); -	/* -	 * If fail_over_mac is set to active, do nothing and return -	 * success.  Returning an error causes ifenslave to fail. +	/* If fail_over_mac is enabled, do nothing and return success. +	 * Returning an error causes ifenslave to fail.  	 */ -	if (bond->params.fail_over_mac == BOND_FOM_ACTIVE) +	if (bond->params.fail_over_mac && +	    BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP)  		return 0;  	if (!is_valid_ether_addr(sa->sa_data)) @@ -4255,16 +3534,8 @@ static int bond_set_mac_address(struct net_device *bond_dev, void *addr)  	 * call to the base driver.  	 */ -	bond_for_each_slave(bond, slave, i) { -		const struct net_device_ops *slave_ops = slave->dev->netdev_ops; +	bond_for_each_slave(bond, slave, iter) {  		pr_debug("slave %p %s\n", slave, slave->dev->name); - -		if (slave_ops->ndo_set_mac_address == NULL) { -			res = -EOPNOTSUPP; -			pr_debug("EOPNOTSUPP %s\n", slave->dev->name); -			goto unwind; -		} -  		res = dev_set_mac_address(slave->dev, addr);  		if (res) {  			/* TODO: consider downing the slave @@ -4287,82 +3558,122 @@ unwind:  	tmp_sa.sa_family = bond_dev->type;  	/* unwind from head to the slave that failed */ -	stop_at = slave; -	bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) { +	bond_for_each_slave(bond, rollback_slave, iter) {  		int tmp_res; -		tmp_res = dev_set_mac_address(slave->dev, &tmp_sa); +		if (rollback_slave == slave) +			break; + +		tmp_res = dev_set_mac_address(rollback_slave->dev, &tmp_sa);  		if (tmp_res) {  			pr_debug("unwind err %d dev %s\n", -				 tmp_res, slave->dev->name); +				 tmp_res, rollback_slave->dev->name);  		}  	}  	return res;  } +/** + * bond_xmit_slave_id - transmit skb through slave with slave_id + * @bond: bonding device that is transmitting + * @skb: buffer to transmit + * @slave_id: slave id up to slave_cnt-1 through which to transmit + * + * This function tries to transmit through slave with slave_id but in case + * it fails, it tries to find the first available slave for transmission. + * The skb is consumed in all cases, thus the function is void. + */ +static void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id) +{ +	struct list_head *iter; +	struct slave *slave; +	int i = slave_id; + +	/* Here we start from the slave with slave_id */ +	bond_for_each_slave_rcu(bond, slave, iter) { +		if (--i < 0) { +			if (bond_slave_can_tx(slave)) { +				bond_dev_queue_xmit(bond, skb, slave->dev); +				return; +			} +		} +	} + +	/* Here we start from the first slave up to slave_id */ +	i = slave_id; +	bond_for_each_slave_rcu(bond, slave, iter) { +		if (--i < 0) +			break; +		if (bond_slave_can_tx(slave)) { +			bond_dev_queue_xmit(bond, skb, slave->dev); +			return; +		} +	} +	/* no slave that can tx has been found */ +	dev_kfree_skb_any(skb); +} + +/** + * bond_rr_gen_slave_id - generate slave id based on packets_per_slave + * @bond: bonding device to use + * + * Based on the value of the bonding device's packets_per_slave parameter + * this function generates a slave id, which is usually used as the next + * slave to transmit through. + */ +static u32 bond_rr_gen_slave_id(struct bonding *bond) +{ +	u32 slave_id; +	struct reciprocal_value reciprocal_packets_per_slave; +	int packets_per_slave = bond->params.packets_per_slave; + +	switch (packets_per_slave) { +	case 0: +		slave_id = prandom_u32(); +		break; +	case 1: +		slave_id = bond->rr_tx_counter; +		break; +	default: +		reciprocal_packets_per_slave = +			bond->params.reciprocal_packets_per_slave; +		slave_id = reciprocal_divide(bond->rr_tx_counter, +					     reciprocal_packets_per_slave); +		break; +	} +	bond->rr_tx_counter++; + +	return slave_id; +} +  static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev)  {  	struct bonding *bond = netdev_priv(bond_dev); -	struct slave *slave, *start_at; -	int i, slave_no, res = 1;  	struct iphdr *iph = ip_hdr(skb); +	struct slave *slave; +	u32 slave_id; -	read_lock(&bond->lock); - -	if (!BOND_IS_OK(bond)) -		goto out; -	/* -	 * Start with the curr_active_slave that joined the bond as the +	/* Start with the curr_active_slave that joined the bond as the  	 * default for sending IGMP traffic.  For failover purposes one  	 * needs to maintain some consistency for the interface that will  	 * send the join/membership reports.  The curr_active_slave found  	 * will send all of this type of traffic.  	 */ -	if ((iph->protocol == IPPROTO_IGMP) && -	    (skb->protocol == htons(ETH_P_IP))) { - -		read_lock(&bond->curr_slave_lock); -		slave = bond->curr_active_slave; -		read_unlock(&bond->curr_slave_lock); - -		if (!slave) -			goto out; +	if (iph->protocol == IPPROTO_IGMP && skb->protocol == htons(ETH_P_IP)) { +		slave = rcu_dereference(bond->curr_active_slave); +		if (slave && bond_slave_can_tx(slave)) +			bond_dev_queue_xmit(bond, skb, slave->dev); +		else +			bond_xmit_slave_id(bond, skb, 0);  	} else { -		/* -		 * Concurrent TX may collide on rr_tx_counter; we accept -		 * that as being rare enough not to justify using an -		 * atomic op here. -		 */ -		slave_no = bond->rr_tx_counter++ % bond->slave_cnt; - -		bond_for_each_slave(bond, slave, i) { -			slave_no--; -			if (slave_no < 0) -				break; -		} -	} - -	start_at = slave; -	bond_for_each_slave_from(bond, slave, i, start_at) { -		if (IS_UP(slave->dev) && -		    (slave->link == BOND_LINK_UP) && -		    (slave->state == BOND_STATE_ACTIVE)) { -			res = bond_dev_queue_xmit(bond, skb, slave->dev); -			break; -		} +		slave_id = bond_rr_gen_slave_id(bond); +		bond_xmit_slave_id(bond, skb, slave_id % bond->slave_cnt);  	} -out: -	if (res) { -		/* no suitable interface, frame not sent */ -		dev_kfree_skb(skb); -	} -	read_unlock(&bond->lock);  	return NETDEV_TX_OK;  } -  /*   * in active-backup mode, we know that bond->curr_active_slave is always valid if   * the bond has a usable interface. @@ -4370,213 +3681,123 @@ out:  static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_dev)  {  	struct bonding *bond = netdev_priv(bond_dev); -	int res = 1; - -	read_lock(&bond->lock); -	read_lock(&bond->curr_slave_lock); - -	if (!BOND_IS_OK(bond)) -		goto out; - -	if (!bond->curr_active_slave) -		goto out; - -	res = bond_dev_queue_xmit(bond, skb, bond->curr_active_slave->dev); +	struct slave *slave; -out: -	if (res) -		/* no suitable interface, frame not sent */ -		dev_kfree_skb(skb); +	slave = rcu_dereference(bond->curr_active_slave); +	if (slave) +		bond_dev_queue_xmit(bond, skb, slave->dev); +	else +		dev_kfree_skb_any(skb); -	read_unlock(&bond->curr_slave_lock); -	read_unlock(&bond->lock);  	return NETDEV_TX_OK;  } -/* - * In bond_xmit_xor() , we determine the output device by using a pre- +/* In bond_xmit_xor() , we determine the output device by using a pre-   * determined xmit_hash_policy(), If the selected device is not enabled,   * find the next active slave.   */  static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev)  {  	struct bonding *bond = netdev_priv(bond_dev); -	struct slave *slave, *start_at; -	int slave_no; -	int i; -	int res = 1; - -	read_lock(&bond->lock); - -	if (!BOND_IS_OK(bond)) -		goto out; - -	slave_no = bond->xmit_hash_policy(skb, bond->slave_cnt); - -	bond_for_each_slave(bond, slave, i) { -		slave_no--; -		if (slave_no < 0) -			break; -	} - -	start_at = slave; -	bond_for_each_slave_from(bond, slave, i, start_at) { -		if (IS_UP(slave->dev) && -		    (slave->link == BOND_LINK_UP) && -		    (slave->state == BOND_STATE_ACTIVE)) { -			res = bond_dev_queue_xmit(bond, skb, slave->dev); -			break; -		} -	} +	bond_xmit_slave_id(bond, skb, bond_xmit_hash(bond, skb) % bond->slave_cnt); -out: -	if (res) { -		/* no suitable interface, frame not sent */ -		dev_kfree_skb(skb); -	} -	read_unlock(&bond->lock);  	return NETDEV_TX_OK;  } -/* - * in broadcast mode, we send everything to all usable interfaces. - */ +/* in broadcast mode, we send everything to all usable interfaces. */  static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev)  {  	struct bonding *bond = netdev_priv(bond_dev); -	struct slave *slave, *start_at; -	struct net_device *tx_dev = NULL; -	int i; -	int res = 1; - -	read_lock(&bond->lock); - -	if (!BOND_IS_OK(bond)) -		goto out; - -	read_lock(&bond->curr_slave_lock); -	start_at = bond->curr_active_slave; -	read_unlock(&bond->curr_slave_lock); - -	if (!start_at) -		goto out; +	struct slave *slave = NULL; +	struct list_head *iter; -	bond_for_each_slave_from(bond, slave, i, start_at) { -		if (IS_UP(slave->dev) && -		    (slave->link == BOND_LINK_UP) && -		    (slave->state == BOND_STATE_ACTIVE)) { -			if (tx_dev) { -				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); -				if (!skb2) { -					pr_err("%s: Error: bond_xmit_broadcast(): skb_clone() failed\n", -					       bond_dev->name); -					continue; -				} +	bond_for_each_slave_rcu(bond, slave, iter) { +		if (bond_is_last_slave(bond, slave)) +			break; +		if (bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) { +			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); -				res = bond_dev_queue_xmit(bond, skb2, tx_dev); -				if (res) { -					dev_kfree_skb(skb2); -					continue; -				} +			if (!skb2) { +				net_err_ratelimited("%s: Error: %s: skb_clone() failed\n", +						    bond_dev->name, __func__); +				continue;  			} -			tx_dev = slave->dev; +			/* bond_dev_queue_xmit always returns 0 */ +			bond_dev_queue_xmit(bond, skb2, slave->dev);  		}  	} +	if (slave && bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) +		bond_dev_queue_xmit(bond, skb, slave->dev); +	else +		dev_kfree_skb_any(skb); -	if (tx_dev) -		res = bond_dev_queue_xmit(bond, skb, tx_dev); - -out: -	if (res) -		/* no suitable interface, frame not sent */ -		dev_kfree_skb(skb); - -	/* frame sent to all suitable interfaces */ -	read_unlock(&bond->lock);  	return NETDEV_TX_OK;  }  /*------------------------- Device initialization ---------------------------*/ -static void bond_set_xmit_hash_policy(struct bonding *bond) -{ -	switch (bond->params.xmit_policy) { -	case BOND_XMIT_POLICY_LAYER23: -		bond->xmit_hash_policy = bond_xmit_hash_policy_l23; -		break; -	case BOND_XMIT_POLICY_LAYER34: -		bond->xmit_hash_policy = bond_xmit_hash_policy_l34; -		break; -	case BOND_XMIT_POLICY_LAYER2: -	default: -		bond->xmit_hash_policy = bond_xmit_hash_policy_l2; -		break; -	} -} -  /*   * Lookup the slave that corresponds to a qid   */  static inline int bond_slave_override(struct bonding *bond,  				      struct sk_buff *skb)  { -	int i, res = 1;  	struct slave *slave = NULL; -	struct slave *check_slave; +	struct list_head *iter; -	read_lock(&bond->lock); - -	if (!BOND_IS_OK(bond) || !skb->queue_mapping) -		goto out; +	if (!skb->queue_mapping) +		return 1;  	/* Find out if any slaves have the same mapping as this skb. */ -	bond_for_each_slave(bond, check_slave, i) { -		if (check_slave->queue_id == skb->queue_mapping) { -			slave = check_slave; +	bond_for_each_slave_rcu(bond, slave, iter) { +		if (slave->queue_id == skb->queue_mapping) { +			if (bond_slave_can_tx(slave)) { +				bond_dev_queue_xmit(bond, skb, slave->dev); +				return 0; +			} +			/* If the slave isn't UP, use default transmit policy. */  			break;  		}  	} -	/* If the slave isn't UP, use default transmit policy. */ -	if (slave && slave->queue_id && IS_UP(slave->dev) && -	    (slave->link == BOND_LINK_UP)) { -		res = bond_dev_queue_xmit(bond, skb, slave->dev); -	} - -out: -	read_unlock(&bond->lock); -	return res; +	return 1;  } -static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb) + +static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb, +			     void *accel_priv, select_queue_fallback_t fallback)  {  	/*  	 * This helper function exists to help dev_pick_tx get the correct -	 * destination queue.  Using a helper function skips the a call to +	 * destination queue.  Using a helper function skips a call to  	 * skb_tx_hash and will put the skbs in the queue we expect on their  	 * way down to the bonding driver.  	 */ -	return skb->queue_mapping; -} - -static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev) -{ -	struct bonding *bond = netdev_priv(dev); +	u16 txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0;  	/* -	 * If we risk deadlock from transmitting this in the -	 * netpoll path, tell netpoll to queue the frame for later tx +	 * Save the original txq to restore before passing to the driver  	 */ -	if (is_netpoll_tx_blocked(dev)) -		return NETDEV_TX_BUSY; +	qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping; -	if (TX_QUEUE_OVERRIDE(bond->params.mode)) { -		if (!bond_slave_override(bond, skb)) -			return NETDEV_TX_OK; +	if (unlikely(txq >= dev->real_num_tx_queues)) { +		do { +			txq -= dev->real_num_tx_queues; +		} while (txq >= dev->real_num_tx_queues);  	} +	return txq; +} -	switch (bond->params.mode) { +static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ +	struct bonding *bond = netdev_priv(dev); + +	if (bond_should_override_tx_queue(bond) && +	    !bond_slave_override(bond, skb)) +		return NETDEV_TX_OK; + +	switch (BOND_MODE(bond)) {  	case BOND_MODE_ROUNDROBIN:  		return bond_xmit_roundrobin(skb, dev);  	case BOND_MODE_ACTIVEBACKUP: @@ -4588,69 +3809,86 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)  	case BOND_MODE_8023AD:  		return bond_3ad_xmit_xor(skb, dev);  	case BOND_MODE_ALB: -	case BOND_MODE_TLB:  		return bond_alb_xmit(skb, dev); +	case BOND_MODE_TLB: +		return bond_tlb_xmit(skb, dev);  	default:  		/* Should never happen, mode already checked */  		pr_err("%s: Error: Unknown bonding mode %d\n", -		       dev->name, bond->params.mode); +		       dev->name, BOND_MODE(bond));  		WARN_ON_ONCE(1); -		dev_kfree_skb(skb); +		dev_kfree_skb_any(skb);  		return NETDEV_TX_OK;  	}  } +static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ +	struct bonding *bond = netdev_priv(dev); +	netdev_tx_t ret = NETDEV_TX_OK; -/* - * set bond mode specific net device operations - */ -void bond_set_mode_ops(struct bonding *bond, int mode) +	/* +	 * If we risk deadlock from transmitting this in the +	 * netpoll path, tell netpoll to queue the frame for later tx +	 */ +	if (unlikely(is_netpoll_tx_blocked(dev))) +		return NETDEV_TX_BUSY; + +	rcu_read_lock(); +	if (bond_has_slaves(bond)) +		ret = __bond_start_xmit(skb, dev); +	else +		dev_kfree_skb_any(skb); +	rcu_read_unlock(); + +	return ret; +} + +static int bond_ethtool_get_settings(struct net_device *bond_dev, +				     struct ethtool_cmd *ecmd)  { -	struct net_device *bond_dev = bond->dev; +	struct bonding *bond = netdev_priv(bond_dev); +	unsigned long speed = 0; +	struct list_head *iter; +	struct slave *slave; -	switch (mode) { -	case BOND_MODE_ROUNDROBIN: -		break; -	case BOND_MODE_ACTIVEBACKUP: -		break; -	case BOND_MODE_XOR: -		bond_set_xmit_hash_policy(bond); -		break; -	case BOND_MODE_BROADCAST: -		break; -	case BOND_MODE_8023AD: -		bond_set_master_3ad_flags(bond); -		bond_set_xmit_hash_policy(bond); -		break; -	case BOND_MODE_ALB: -		bond_set_master_alb_flags(bond); -		/* FALLTHRU */ -	case BOND_MODE_TLB: -		break; -	default: -		/* Should never happen, mode already checked */ -		pr_err("%s: Error: Unknown bonding mode %d\n", -		       bond_dev->name, mode); -		break; +	ecmd->duplex = DUPLEX_UNKNOWN; +	ecmd->port = PORT_OTHER; + +	/* Since bond_slave_can_tx returns false for all inactive or down slaves, we +	 * do not need to check mode.  Though link speed might not represent +	 * the true receive or transmit bandwidth (not all modes are symmetric) +	 * this is an accurate maximum. +	 */ +	read_lock(&bond->lock); +	bond_for_each_slave(bond, slave, iter) { +		if (bond_slave_can_tx(slave)) { +			if (slave->speed != SPEED_UNKNOWN) +				speed += slave->speed; +			if (ecmd->duplex == DUPLEX_UNKNOWN && +			    slave->duplex != DUPLEX_UNKNOWN) +				ecmd->duplex = slave->duplex; +		}  	} +	ethtool_cmd_speed_set(ecmd, speed ? : SPEED_UNKNOWN); +	read_unlock(&bond->lock); + +	return 0;  }  static void bond_ethtool_get_drvinfo(struct net_device *bond_dev, -				    struct ethtool_drvinfo *drvinfo) +				     struct ethtool_drvinfo *drvinfo)  { -	strncpy(drvinfo->driver, DRV_NAME, 32); -	strncpy(drvinfo->version, DRV_VERSION, 32); -	snprintf(drvinfo->fw_version, 32, "%d", BOND_ABI_VERSION); +	strlcpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver)); +	strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version)); +	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%d", +		 BOND_ABI_VERSION);  }  static const struct ethtool_ops bond_ethtool_ops = {  	.get_drvinfo		= bond_ethtool_get_drvinfo, +	.get_settings		= bond_ethtool_get_settings,  	.get_link		= ethtool_op_get_link, -	.get_tx_csum		= ethtool_op_get_tx_csum, -	.get_sg			= ethtool_op_get_sg, -	.get_tso		= ethtool_op_get_tso, -	.get_ufo		= ethtool_op_get_ufo, -	.get_flags		= ethtool_op_get_flags,  };  static const struct net_device_ops bond_netdev_ops = { @@ -4662,17 +3900,25 @@ static const struct net_device_ops bond_netdev_ops = {  	.ndo_select_queue	= bond_select_queue,  	.ndo_get_stats64	= bond_get_stats,  	.ndo_do_ioctl		= bond_do_ioctl, -	.ndo_set_multicast_list	= bond_set_multicast_list, +	.ndo_change_rx_flags	= bond_change_rx_flags, +	.ndo_set_rx_mode	= bond_set_rx_mode,  	.ndo_change_mtu		= bond_change_mtu, -	.ndo_set_mac_address 	= bond_set_mac_address, +	.ndo_set_mac_address	= bond_set_mac_address,  	.ndo_neigh_setup	= bond_neigh_setup, -	.ndo_vlan_rx_register	= bond_vlan_rx_register, -	.ndo_vlan_rx_add_vid 	= bond_vlan_rx_add_vid, +	.ndo_vlan_rx_add_vid	= bond_vlan_rx_add_vid,  	.ndo_vlan_rx_kill_vid	= bond_vlan_rx_kill_vid,  #ifdef CONFIG_NET_POLL_CONTROLLER +	.ndo_netpoll_setup	= bond_netpoll_setup,  	.ndo_netpoll_cleanup	= bond_netpoll_cleanup,  	.ndo_poll_controller	= bond_poll_controller,  #endif +	.ndo_add_slave		= bond_enslave, +	.ndo_del_slave		= bond_release, +	.ndo_fix_features	= bond_fix_features, +}; + +static const struct device_type bond_type = { +	.name = "bond",  };  static void bond_destructor(struct net_device *bond_dev) @@ -4683,36 +3929,32 @@ static void bond_destructor(struct net_device *bond_dev)  	free_netdev(bond_dev);  } -static void bond_setup(struct net_device *bond_dev) +void bond_setup(struct net_device *bond_dev)  {  	struct bonding *bond = netdev_priv(bond_dev);  	/* initialize rwlocks */  	rwlock_init(&bond->lock);  	rwlock_init(&bond->curr_slave_lock); -  	bond->params = bonding_defaults;  	/* Initialize pointers */  	bond->dev = bond_dev; -	INIT_LIST_HEAD(&bond->vlan_list);  	/* Initialize the device entry points */  	ether_setup(bond_dev);  	bond_dev->netdev_ops = &bond_netdev_ops;  	bond_dev->ethtool_ops = &bond_ethtool_ops; -	bond_set_mode_ops(bond, bond->params.mode);  	bond_dev->destructor = bond_destructor; +	SET_NETDEV_DEVTYPE(bond_dev, &bond_type); +  	/* Initialize the device options */  	bond_dev->tx_queue_len = 0;  	bond_dev->flags |= IFF_MASTER|IFF_MULTICAST; -	bond_dev->priv_flags |= IFF_BONDING; -	bond_dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; - -	if (bond->params.arp_interval) -		bond_dev->priv_flags |= IFF_MASTER_ARPMON; +	bond_dev->priv_flags |= IFF_BONDING | IFF_UNICAST_FLT; +	bond_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);  	/* At first, we block adding VLANs. That's the only way to  	 * prevent problems that occur when adding VLANs over an @@ -4731,38 +3973,18 @@ static void bond_setup(struct net_device *bond_dev)  	 * when there are slaves that are not hw accel  	 * capable  	 */ -	bond_dev->features |= (NETIF_F_HW_VLAN_TX | -			       NETIF_F_HW_VLAN_RX | -			       NETIF_F_HW_VLAN_FILTER); - -	/* By default, we enable GRO on bonding devices. -	 * Actual support requires lowlevel drivers are GRO ready. -	 */ -	bond_dev->features |= NETIF_F_GRO; -} - -static void bond_work_cancel_all(struct bonding *bond) -{ -	write_lock_bh(&bond->lock); -	bond->kill_timers = 1; -	write_unlock_bh(&bond->lock); -	if (bond->params.miimon && delayed_work_pending(&bond->mii_work)) -		cancel_delayed_work(&bond->mii_work); +	/* Don't allow bond devices to change network namespaces. */ +	bond_dev->features |= NETIF_F_NETNS_LOCAL; -	if (bond->params.arp_interval && delayed_work_pending(&bond->arp_work)) -		cancel_delayed_work(&bond->arp_work); +	bond_dev->hw_features = BOND_VLAN_FEATURES | +				NETIF_F_HW_VLAN_CTAG_TX | +				NETIF_F_HW_VLAN_CTAG_RX | +				NETIF_F_HW_VLAN_CTAG_FILTER; -	if (bond->params.mode == BOND_MODE_ALB && -	    delayed_work_pending(&bond->alb_work)) -		cancel_delayed_work(&bond->alb_work); - -	if (bond->params.mode == BOND_MODE_8023AD && -	    delayed_work_pending(&bond->ad_work)) -		cancel_delayed_work(&bond->ad_work); - -	if (delayed_work_pending(&bond->mcast_work)) -		cancel_delayed_work(&bond->mcast_work); +	bond_dev->hw_features &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_HW_CSUM); +	bond_dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; +	bond_dev->features |= bond_dev->hw_features;  }  /* @@ -4772,92 +3994,59 @@ static void bond_work_cancel_all(struct bonding *bond)  static void bond_uninit(struct net_device *bond_dev)  {  	struct bonding *bond = netdev_priv(bond_dev); -	struct vlan_entry *vlan, *tmp; +	struct list_head *iter; +	struct slave *slave;  	bond_netpoll_cleanup(bond_dev);  	/* Release the bonded slaves */ -	bond_release_all(bond_dev); +	bond_for_each_slave(bond, slave, iter) +		__bond_release_one(bond_dev, slave->dev, true); +	pr_info("%s: Released all slaves\n", bond_dev->name);  	list_del(&bond->bond_list); -	bond_work_cancel_all(bond); - -	bond_remove_proc_entry(bond); - -	__hw_addr_flush(&bond->mc_list); - -	list_for_each_entry_safe(vlan, tmp, &bond->vlan_list, vlan_list) { -		list_del(&vlan->vlan_list); -		kfree(vlan); -	} +	bond_debug_unregister(bond);  }  /*------------------------- Module initialization ---------------------------*/ -/* - * Convert string input module parms.  Accept either the - * number of the mode or its string name.  A bit complicated because - * some mode names are substrings of other names, and calls from sysfs - * may have whitespace in the name (trailing newlines, for example). - */ -int bond_parse_parm(const char *buf, const struct bond_parm_tbl *tbl) -{ -	int modeint = -1, i, rv; -	char *p, modestr[BOND_MAX_MODENAME_LEN + 1] = { 0, }; - -	for (p = (char *)buf; *p; p++) -		if (!(isdigit(*p) || isspace(*p))) -			break; - -	if (*p) -		rv = sscanf(buf, "%20s", modestr); -	else -		rv = sscanf(buf, "%d", &modeint); - -	if (!rv) -		return -1; - -	for (i = 0; tbl[i].modename; i++) { -		if (modeint == tbl[i].mode) -			return tbl[i].mode; -		if (strcmp(modestr, tbl[i].modename) == 0) -			return tbl[i].mode; -	} - -	return -1; -} -  static int bond_check_params(struct bond_params *params)  { -	int arp_validate_value, fail_over_mac_value, primary_reselect_value; +	int arp_validate_value, fail_over_mac_value, primary_reselect_value, i; +	struct bond_opt_value newval; +	const struct bond_opt_value *valptr; +	int arp_all_targets_value;  	/*  	 * Convert string parameters.  	 */  	if (mode) { -		bond_mode = bond_parse_parm(mode, bond_mode_tbl); -		if (bond_mode == -1) { -			pr_err("Error: Invalid bonding mode \"%s\"\n", -			       mode == NULL ? "NULL" : mode); +		bond_opt_initstr(&newval, mode); +		valptr = bond_opt_parse(bond_opt_get(BOND_OPT_MODE), &newval); +		if (!valptr) { +			pr_err("Error: Invalid bonding mode \"%s\"\n", mode);  			return -EINVAL;  		} +		bond_mode = valptr->value;  	}  	if (xmit_hash_policy) {  		if ((bond_mode != BOND_MODE_XOR) && -		    (bond_mode != BOND_MODE_8023AD)) { +		    (bond_mode != BOND_MODE_8023AD) && +		    (bond_mode != BOND_MODE_TLB)) {  			pr_info("xmit_hash_policy param is irrelevant in mode %s\n", -			       bond_mode_name(bond_mode)); +				bond_mode_name(bond_mode));  		} else { -			xmit_hashtype = bond_parse_parm(xmit_hash_policy, -							xmit_hashtype_tbl); -			if (xmit_hashtype == -1) { +			bond_opt_initstr(&newval, xmit_hash_policy); +			valptr = bond_opt_parse(bond_opt_get(BOND_OPT_XMIT_HASH), +						&newval); +			if (!valptr) {  				pr_err("Error: Invalid xmit_hash_policy \"%s\"\n", -				       xmit_hash_policy == NULL ? "NULL" :  				       xmit_hash_policy);  				return -EINVAL;  			} +			xmit_hashtype = valptr->value;  		}  	} @@ -4866,110 +4055,101 @@ static int bond_check_params(struct bond_params *params)  			pr_info("lacp_rate param is irrelevant in mode %s\n",  				bond_mode_name(bond_mode));  		} else { -			lacp_fast = bond_parse_parm(lacp_rate, bond_lacp_tbl); -			if (lacp_fast == -1) { +			bond_opt_initstr(&newval, lacp_rate); +			valptr = bond_opt_parse(bond_opt_get(BOND_OPT_LACP_RATE), +						&newval); +			if (!valptr) {  				pr_err("Error: Invalid lacp rate \"%s\"\n", -				       lacp_rate == NULL ? "NULL" : lacp_rate); +				       lacp_rate);  				return -EINVAL;  			} +			lacp_fast = valptr->value;  		}  	}  	if (ad_select) { -		params->ad_select = bond_parse_parm(ad_select, ad_select_tbl); -		if (params->ad_select == -1) { -			pr_err("Error: Invalid ad_select \"%s\"\n", -			       ad_select == NULL ? "NULL" : ad_select); +		bond_opt_initstr(&newval, ad_select); +		valptr = bond_opt_parse(bond_opt_get(BOND_OPT_AD_SELECT), +					&newval); +		if (!valptr) { +			pr_err("Error: Invalid ad_select \"%s\"\n", ad_select);  			return -EINVAL;  		} - -		if (bond_mode != BOND_MODE_8023AD) { -			pr_warning("ad_select param only affects 802.3ad mode\n"); -		} +		params->ad_select = valptr->value; +		if (bond_mode != BOND_MODE_8023AD) +			pr_warn("ad_select param only affects 802.3ad mode\n");  	} else {  		params->ad_select = BOND_AD_STABLE;  	}  	if (max_bonds < 0) { -		pr_warning("Warning: max_bonds (%d) not in range %d-%d, so it was reset to BOND_DEFAULT_MAX_BONDS (%d)\n", -			   max_bonds, 0, INT_MAX, BOND_DEFAULT_MAX_BONDS); +		pr_warn("Warning: max_bonds (%d) not in range %d-%d, so it was reset to BOND_DEFAULT_MAX_BONDS (%d)\n", +			max_bonds, 0, INT_MAX, BOND_DEFAULT_MAX_BONDS);  		max_bonds = BOND_DEFAULT_MAX_BONDS;  	}  	if (miimon < 0) { -		pr_warning("Warning: miimon module parameter (%d), not in range 0-%d, so it was reset to %d\n", -			   miimon, INT_MAX, BOND_LINK_MON_INTERV); -		miimon = BOND_LINK_MON_INTERV; +		pr_warn("Warning: miimon module parameter (%d), not in range 0-%d, so it was reset to 0\n", +			miimon, INT_MAX); +		miimon = 0;  	}  	if (updelay < 0) { -		pr_warning("Warning: updelay module parameter (%d), not in range 0-%d, so it was reset to 0\n", -			   updelay, INT_MAX); +		pr_warn("Warning: updelay module parameter (%d), not in range 0-%d, so it was reset to 0\n", +			updelay, INT_MAX);  		updelay = 0;  	}  	if (downdelay < 0) { -		pr_warning("Warning: downdelay module parameter (%d), not in range 0-%d, so it was reset to 0\n", -			   downdelay, INT_MAX); +		pr_warn("Warning: downdelay module parameter (%d), not in range 0-%d, so it was reset to 0\n", +			downdelay, INT_MAX);  		downdelay = 0;  	}  	if ((use_carrier != 0) && (use_carrier != 1)) { -		pr_warning("Warning: use_carrier module parameter (%d), not of valid value (0/1), so it was set to 1\n", -			   use_carrier); +		pr_warn("Warning: use_carrier module parameter (%d), not of valid value (0/1), so it was set to 1\n", +			use_carrier);  		use_carrier = 1;  	} -	if (num_grat_arp < 0 || num_grat_arp > 255) { -		pr_warning("Warning: num_grat_arp (%d) not in range 0-255 so it was reset to 1\n", -			   num_grat_arp); -		num_grat_arp = 1; +	if (num_peer_notif < 0 || num_peer_notif > 255) { +		pr_warn("Warning: num_grat_arp/num_unsol_na (%d) not in range 0-255 so it was reset to 1\n", +			num_peer_notif); +		num_peer_notif = 1;  	} -	if (num_unsol_na < 0 || num_unsol_na > 255) { -		pr_warning("Warning: num_unsol_na (%d) not in range 0-255 so it was reset to 1\n", -			   num_unsol_na); -		num_unsol_na = 1; -	} - -	/* reset values for 802.3ad */ -	if (bond_mode == BOND_MODE_8023AD) { +	/* reset values for 802.3ad/TLB/ALB */ +	if (!bond_mode_uses_arp(bond_mode)) {  		if (!miimon) { -			pr_warning("Warning: miimon must be specified, otherwise bonding will not detect link failure, speed and duplex which are essential for 802.3ad operation\n"); -			pr_warning("Forcing miimon to 100msec\n"); -			miimon = 100; +			pr_warn("Warning: miimon must be specified, otherwise bonding will not detect link failure, speed and duplex which are essential for 802.3ad operation\n"); +			pr_warn("Forcing miimon to 100msec\n"); +			miimon = BOND_DEFAULT_MIIMON;  		}  	}  	if (tx_queues < 1 || tx_queues > 255) { -		pr_warning("Warning: tx_queues (%d) should be between " -			   "1 and 255, resetting to %d\n", -			   tx_queues, BOND_DEFAULT_TX_QUEUES); +		pr_warn("Warning: tx_queues (%d) should be between 1 and 255, resetting to %d\n", +			tx_queues, BOND_DEFAULT_TX_QUEUES);  		tx_queues = BOND_DEFAULT_TX_QUEUES;  	}  	if ((all_slaves_active != 0) && (all_slaves_active != 1)) { -		pr_warning("Warning: all_slaves_active module parameter (%d), " -			   "not of valid value (0/1), so it was set to " -			   "0\n", all_slaves_active); +		pr_warn("Warning: all_slaves_active module parameter (%d), not of valid value (0/1), so it was set to 0\n", +			all_slaves_active);  		all_slaves_active = 0;  	}  	if (resend_igmp < 0 || resend_igmp > 255) { -		pr_warning("Warning: resend_igmp (%d) should be between " -			   "0 and 255, resetting to %d\n", -			   resend_igmp, BOND_DEFAULT_RESEND_IGMP); +		pr_warn("Warning: resend_igmp (%d) should be between 0 and 255, resetting to %d\n", +			resend_igmp, BOND_DEFAULT_RESEND_IGMP);  		resend_igmp = BOND_DEFAULT_RESEND_IGMP;  	} -	/* reset values for TLB/ALB */ -	if ((bond_mode == BOND_MODE_TLB) || -	    (bond_mode == BOND_MODE_ALB)) { -		if (!miimon) { -			pr_warning("Warning: miimon must be specified, otherwise bonding will not detect link failure and link speed which are essential for TLB/ALB load balancing\n"); -			pr_warning("Forcing miimon to 100msec\n"); -			miimon = 100; -		} +	bond_opt_initval(&newval, packets_per_slave); +	if (!bond_opt_parse(bond_opt_get(BOND_OPT_PACKETS_PER_SLAVE), &newval)) { +		pr_warn("Warning: packets_per_slave (%d) should be between 0 and %u resetting to 1\n", +			packets_per_slave, USHRT_MAX); +		packets_per_slave = 1;  	}  	if (bond_mode == BOND_MODE_ALB) { @@ -4982,149 +4162,171 @@ static int bond_check_params(struct bond_params *params)  			/* just warn the user the up/down delay will have  			 * no effect since miimon is zero...  			 */ -			pr_warning("Warning: miimon module parameter not set and updelay (%d) or downdelay (%d) module parameter is set; updelay and downdelay have no effect unless miimon is set\n", -				   updelay, downdelay); +			pr_warn("Warning: miimon module parameter not set and updelay (%d) or downdelay (%d) module parameter is set; updelay and downdelay have no effect unless miimon is set\n", +				updelay, downdelay);  		}  	} else {  		/* don't allow arp monitoring */  		if (arp_interval) { -			pr_warning("Warning: miimon (%d) and arp_interval (%d) can't be used simultaneously, disabling ARP monitoring\n", -				   miimon, arp_interval); +			pr_warn("Warning: miimon (%d) and arp_interval (%d) can't be used simultaneously, disabling ARP monitoring\n", +				miimon, arp_interval);  			arp_interval = 0;  		}  		if ((updelay % miimon) != 0) { -			pr_warning("Warning: updelay (%d) is not a multiple of miimon (%d), updelay rounded to %d ms\n", -				   updelay, miimon, -				   (updelay / miimon) * miimon); +			pr_warn("Warning: updelay (%d) is not a multiple of miimon (%d), updelay rounded to %d ms\n", +				updelay, miimon, (updelay / miimon) * miimon);  		}  		updelay /= miimon;  		if ((downdelay % miimon) != 0) { -			pr_warning("Warning: downdelay (%d) is not a multiple of miimon (%d), downdelay rounded to %d ms\n", -				   downdelay, miimon, -				   (downdelay / miimon) * miimon); +			pr_warn("Warning: downdelay (%d) is not a multiple of miimon (%d), downdelay rounded to %d ms\n", +				downdelay, miimon, +				(downdelay / miimon) * miimon);  		}  		downdelay /= miimon;  	}  	if (arp_interval < 0) { -		pr_warning("Warning: arp_interval module parameter (%d) , not in range 0-%d, so it was reset to %d\n", -			   arp_interval, INT_MAX, BOND_LINK_ARP_INTERV); -		arp_interval = BOND_LINK_ARP_INTERV; +		pr_warn("Warning: arp_interval module parameter (%d), not in range 0-%d, so it was reset to 0\n", +			arp_interval, INT_MAX); +		arp_interval = 0;  	} -	for (arp_ip_count = 0; -	     (arp_ip_count < BOND_MAX_ARP_TARGETS) && arp_ip_target[arp_ip_count]; -	     arp_ip_count++) { +	for (arp_ip_count = 0, i = 0; +	     (arp_ip_count < BOND_MAX_ARP_TARGETS) && arp_ip_target[i]; i++) {  		/* not complete check, but should be good enough to  		   catch mistakes */ -		if (!isdigit(arp_ip_target[arp_ip_count][0])) { -			pr_warning("Warning: bad arp_ip_target module parameter (%s), ARP monitoring will not be performed\n", -				   arp_ip_target[arp_ip_count]); +		__be32 ip; +		if (!in4_pton(arp_ip_target[i], -1, (u8 *)&ip, -1, NULL) || +		    !bond_is_ip_target_ok(ip)) { +			pr_warn("Warning: bad arp_ip_target module parameter (%s), ARP monitoring will not be performed\n", +				arp_ip_target[i]);  			arp_interval = 0;  		} else { -			__be32 ip = in_aton(arp_ip_target[arp_ip_count]); -			arp_target[arp_ip_count] = ip; +			if (bond_get_targets_ip(arp_target, ip) == -1) +				arp_target[arp_ip_count++] = ip; +			else +				pr_warn("Warning: duplicate address %pI4 in arp_ip_target, skipping\n", +					&ip);  		}  	}  	if (arp_interval && !arp_ip_count) {  		/* don't allow arping if no arp_ip_target given... */ -		pr_warning("Warning: arp_interval module parameter (%d) specified without providing an arp_ip_target parameter, arp_interval was reset to 0\n", -			   arp_interval); +		pr_warn("Warning: arp_interval module parameter (%d) specified without providing an arp_ip_target parameter, arp_interval was reset to 0\n", +			arp_interval);  		arp_interval = 0;  	}  	if (arp_validate) { -		if (bond_mode != BOND_MODE_ACTIVEBACKUP) { -			pr_err("arp_validate only supported in active-backup mode\n"); -			return -EINVAL; -		}  		if (!arp_interval) {  			pr_err("arp_validate requires arp_interval\n");  			return -EINVAL;  		} -		arp_validate_value = bond_parse_parm(arp_validate, -						     arp_validate_tbl); -		if (arp_validate_value == -1) { +		bond_opt_initstr(&newval, arp_validate); +		valptr = bond_opt_parse(bond_opt_get(BOND_OPT_ARP_VALIDATE), +					&newval); +		if (!valptr) {  			pr_err("Error: invalid arp_validate \"%s\"\n", -			       arp_validate == NULL ? "NULL" : arp_validate); +			       arp_validate);  			return -EINVAL;  		} -	} else +		arp_validate_value = valptr->value; +	} else {  		arp_validate_value = 0; +	} + +	arp_all_targets_value = 0; +	if (arp_all_targets) { +		bond_opt_initstr(&newval, arp_all_targets); +		valptr = bond_opt_parse(bond_opt_get(BOND_OPT_ARP_ALL_TARGETS), +					&newval); +		if (!valptr) { +			pr_err("Error: invalid arp_all_targets_value \"%s\"\n", +			       arp_all_targets); +			arp_all_targets_value = 0; +		} else { +			arp_all_targets_value = valptr->value; +		} +	}  	if (miimon) {  		pr_info("MII link monitoring set to %d ms\n", miimon);  	} else if (arp_interval) { -		int i; - +		valptr = bond_opt_get_val(BOND_OPT_ARP_VALIDATE, +					  arp_validate_value);  		pr_info("ARP monitoring set to %d ms, validate %s, with %d target(s):", -			arp_interval, -			arp_validate_tbl[arp_validate_value].modename, -			arp_ip_count); +			arp_interval, valptr->string, arp_ip_count);  		for (i = 0; i < arp_ip_count; i++) -			pr_info(" %s", arp_ip_target[i]); +			pr_cont(" %s", arp_ip_target[i]); -		pr_info("\n"); +		pr_cont("\n");  	} else if (max_bonds) {  		/* miimon and arp_interval not set, we need one so things  		 * work as expected, see bonding.txt for details  		 */ -		pr_warning("Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details.\n"); +		pr_debug("Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details\n");  	} -	if (primary && !USES_PRIMARY(bond_mode)) { +	if (primary && !bond_mode_uses_primary(bond_mode)) {  		/* currently, using a primary only makes sense  		 * in active backup, TLB or ALB modes  		 */ -		pr_warning("Warning: %s primary device specified but has no effect in %s mode\n", -			   primary, bond_mode_name(bond_mode)); +		pr_warn("Warning: %s primary device specified but has no effect in %s mode\n", +			primary, bond_mode_name(bond_mode));  		primary = NULL;  	}  	if (primary && primary_reselect) { -		primary_reselect_value = bond_parse_parm(primary_reselect, -							 pri_reselect_tbl); -		if (primary_reselect_value == -1) { +		bond_opt_initstr(&newval, primary_reselect); +		valptr = bond_opt_parse(bond_opt_get(BOND_OPT_PRIMARY_RESELECT), +					&newval); +		if (!valptr) {  			pr_err("Error: Invalid primary_reselect \"%s\"\n", -			       primary_reselect == -					NULL ? "NULL" : primary_reselect); +			       primary_reselect);  			return -EINVAL;  		} +		primary_reselect_value = valptr->value;  	} else {  		primary_reselect_value = BOND_PRI_RESELECT_ALWAYS;  	}  	if (fail_over_mac) { -		fail_over_mac_value = bond_parse_parm(fail_over_mac, -						      fail_over_mac_tbl); -		if (fail_over_mac_value == -1) { +		bond_opt_initstr(&newval, fail_over_mac); +		valptr = bond_opt_parse(bond_opt_get(BOND_OPT_FAIL_OVER_MAC), +					&newval); +		if (!valptr) {  			pr_err("Error: invalid fail_over_mac \"%s\"\n", -			       arp_validate == NULL ? "NULL" : arp_validate); +			       fail_over_mac);  			return -EINVAL;  		} - +		fail_over_mac_value = valptr->value;  		if (bond_mode != BOND_MODE_ACTIVEBACKUP) -			pr_warning("Warning: fail_over_mac only affects active-backup mode.\n"); +			pr_warn("Warning: fail_over_mac only affects active-backup mode\n");  	} else {  		fail_over_mac_value = BOND_FOM_NONE;  	} +	if (lp_interval == 0) { +		pr_warn("Warning: ip_interval must be between 1 and %d, so it was reset to %d\n", +			INT_MAX, BOND_ALB_DEFAULT_LP_INTERVAL); +		lp_interval = BOND_ALB_DEFAULT_LP_INTERVAL; +	} +  	/* fill params struct with the proper values */  	params->mode = bond_mode;  	params->xmit_policy = xmit_hashtype;  	params->miimon = miimon; -	params->num_grat_arp = num_grat_arp; -	params->num_unsol_na = num_unsol_na; +	params->num_peer_notif = num_peer_notif;  	params->arp_interval = arp_interval;  	params->arp_validate = arp_validate_value; +	params->arp_all_targets = arp_all_targets_value;  	params->updelay = updelay;  	params->downdelay = downdelay;  	params->use_carrier = use_carrier; @@ -5135,6 +4337,20 @@ static int bond_check_params(struct bond_params *params)  	params->tx_queues = tx_queues;  	params->all_slaves_active = all_slaves_active;  	params->resend_igmp = resend_igmp; +	params->min_links = min_links; +	params->lp_interval = lp_interval; +	params->packets_per_slave = packets_per_slave; +	params->tlb_dynamic_lb = 1; /* Default value */ +	if (packets_per_slave > 0) { +		params->reciprocal_packets_per_slave = +			reciprocal_value(packets_per_slave); +	} else { +		/* reciprocal_packets_per_slave is unused if +		 * packets_per_slave is 0 or 1, just initialize it +		 */ +		params->reciprocal_packets_per_slave = +			(struct reciprocal_value) { 0 }; +	}  	if (primary) {  		strncpy(params->primary, primary, IFNAMSIZ); @@ -5148,6 +4364,7 @@ static int bond_check_params(struct bond_params *params)  static struct lock_class_key bonding_netdev_xmit_lock_key;  static struct lock_class_key bonding_netdev_addr_lock_key; +static struct lock_class_key bonding_tx_busylock_key;  static void bond_set_lockdep_class_one(struct net_device *dev,  				       struct netdev_queue *txq, @@ -5162,6 +4379,7 @@ static void bond_set_lockdep_class(struct net_device *dev)  	lockdep_set_class(&dev->addr_list_lock,  			  &bonding_netdev_addr_lock_key);  	netdev_for_each_tx_queue(dev, bond_set_lockdep_class_one, NULL); +	dev->qdisc_tx_busylock = &bonding_tx_busylock_key;  }  /* @@ -5171,44 +4389,44 @@ static int bond_init(struct net_device *bond_dev)  {  	struct bonding *bond = netdev_priv(bond_dev);  	struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id); +	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));  	pr_debug("Begin bond_init for %s\n", bond_dev->name); +	/* +	 * Initialize locks that may be required during +	 * en/deslave operations.  All of the bond_open work +	 * (of which this is part) should really be moved to +	 * a phase prior to dev_open +	 */ +	spin_lock_init(&(bond_info->tx_hashtbl_lock)); +	spin_lock_init(&(bond_info->rx_hashtbl_lock)); +  	bond->wq = create_singlethread_workqueue(bond_dev->name);  	if (!bond->wq)  		return -ENOMEM;  	bond_set_lockdep_class(bond_dev); -	netif_carrier_off(bond_dev); - -	bond_create_proc_entry(bond);  	list_add_tail(&bond->bond_list, &bn->dev_list);  	bond_prepare_sysfs_group(bond); -	__hw_addr_init(&bond->mc_list); +	bond_debug_register(bond); + +	/* Ensure valid dev_addr */ +	if (is_zero_ether_addr(bond_dev->dev_addr) && +	    bond_dev->addr_assign_type == NET_ADDR_PERM) +		eth_hw_addr_random(bond_dev); +  	return 0;  } -static int bond_validate(struct nlattr *tb[], struct nlattr *data[]) +unsigned int bond_get_num_tx_queues(void)  { -	if (tb[IFLA_ADDRESS]) { -		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) -			return -EINVAL; -		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) -			return -EADDRNOTAVAIL; -	} -	return 0; +	return tx_queues;  } -static struct rtnl_link_ops bond_link_ops __read_mostly = { -	.kind		= "bond", -	.priv_size	= sizeof(struct bonding), -	.setup		= bond_setup, -	.validate	= bond_validate, -}; -  /* Create a new bond based on the specified name and bonding parameters.   * If name is NULL, obtain a suitable "bond%d" name for us.   * Caller must NOT hold rtnl_lock; we need to release it here before we @@ -5221,8 +4439,9 @@ int bond_create(struct net *net, const char *name)  	rtnl_lock(); -	bond_dev = alloc_netdev_mq(sizeof(struct bonding), name ? name : "", -				bond_setup, tx_queues); +	bond_dev = alloc_netdev_mq(sizeof(struct bonding), +				   name ? name : "bond%d", +				   bond_setup, tx_queues);  	if (!bond_dev) {  		pr_err("%s: eek! can't alloc netdev!\n", name);  		rtnl_unlock(); @@ -5232,24 +4451,10 @@ int bond_create(struct net *net, const char *name)  	dev_net_set(bond_dev, net);  	bond_dev->rtnl_link_ops = &bond_link_ops; -	if (!name) { -		res = dev_alloc_name(bond_dev, "bond%d"); -		if (res < 0) -			goto out; -	} else { -		/* -		 * If we're given a name to register -		 * we need to ensure that its not already -		 * registered -		 */ -		res = -EEXIST; -		if (__dev_get_by_name(net, name) != NULL) -			goto out; -	} -  	res = register_netdevice(bond_dev); -out: +	netif_carrier_off(bond_dev); +  	rtnl_unlock();  	if (res < 0)  		bond_destructor(bond_dev); @@ -5264,15 +4469,26 @@ static int __net_init bond_net_init(struct net *net)  	INIT_LIST_HEAD(&bn->dev_list);  	bond_create_proc_dir(bn); -	 +	bond_create_sysfs(bn); +  	return 0;  }  static void __net_exit bond_net_exit(struct net *net)  {  	struct bond_net *bn = net_generic(net, bond_net_id); +	struct bonding *bond, *tmp_bond; +	LIST_HEAD(list); +	bond_destroy_sysfs(bn);  	bond_destroy_proc_dir(bn); + +	/* Kill off any bonds created after unregistering bond rtnl ops */ +	rtnl_lock(); +	list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list) +		unregister_netdevice_queue(bond->dev, &list); +	unregister_netdevice_many(&list); +	rtnl_unlock();  }  static struct pernet_operations bond_net_ops = { @@ -5287,50 +4503,36 @@ static int __init bonding_init(void)  	int i;  	int res; -	pr_info("%s", version); +	pr_info("%s", bond_version);  	res = bond_check_params(&bonding_defaults);  	if (res)  		goto out; -#ifdef CONFIG_NET_POLL_CONTROLLER -	if (!alloc_cpumask_var(&netpoll_block_tx, GFP_KERNEL)) { -		res = -ENOMEM; -		goto out; -	} -#endif -  	res = register_pernet_subsys(&bond_net_ops);  	if (res)  		goto out; -	res = rtnl_link_register(&bond_link_ops); +	res = bond_netlink_init();  	if (res)  		goto err_link; +	bond_create_debugfs(); +  	for (i = 0; i < max_bonds; i++) {  		res = bond_create(&init_net, NULL);  		if (res)  			goto err;  	} -	res = bond_create_sysfs(); -	if (res) -		goto err; - -  	register_netdevice_notifier(&bond_netdev_notifier); -	register_inetaddr_notifier(&bond_inetaddr_notifier); -	bond_register_ipv6_notifier();  out:  	return res;  err: -	rtnl_link_unregister(&bond_link_ops); +	bond_destroy_debugfs(); +	bond_netlink_fini();  err_link:  	unregister_pernet_subsys(&bond_net_ops); -#ifdef CONFIG_NET_POLL_CONTROLLER -	free_cpumask_var(netpoll_block_tx); -#endif  	goto out;  } @@ -5338,16 +4540,17 @@ err_link:  static void __exit bonding_exit(void)  {  	unregister_netdevice_notifier(&bond_netdev_notifier); -	unregister_inetaddr_notifier(&bond_inetaddr_notifier); -	bond_unregister_ipv6_notifier(); -	bond_destroy_sysfs(); +	bond_destroy_debugfs(); -	rtnl_link_unregister(&bond_link_ops); +	bond_netlink_fini();  	unregister_pernet_subsys(&bond_net_ops);  #ifdef CONFIG_NET_POLL_CONTROLLER -	free_cpumask_var(netpoll_block_tx); +	/* +	 * Make sure we don't have an imbalance on our netpoll blocking +	 */ +	WARN_ON(atomic_read(&netpoll_block_tx));  #endif  } @@ -5357,4 +4560,3 @@ MODULE_LICENSE("GPL");  MODULE_VERSION(DRV_VERSION);  MODULE_DESCRIPTION(DRV_DESCRIPTION ", v" DRV_VERSION);  MODULE_AUTHOR("Thomas Davis, tadavis@lbl.gov and many others"); -MODULE_ALIAS_RTNL_LINK("bond"); diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c new file mode 100644 index 00000000000..5ab3c1847e6 --- /dev/null +++ b/drivers/net/bonding/bond_netlink.c @@ -0,0 +1,573 @@ +/* + * drivers/net/bond/bond_netlink.c - Netlink interface for bonding + * Copyright (c) 2013 Jiri Pirko <jiri@resnulli.us> + * Copyright (c) 2013 Scott Feldman <sfeldma@cumulusnetworks.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/if_link.h> +#include <linux/if_ether.h> +#include <net/netlink.h> +#include <net/rtnetlink.h> +#include "bonding.h" + +static size_t bond_get_slave_size(const struct net_device *bond_dev, +				  const struct net_device *slave_dev) +{ +	return nla_total_size(sizeof(u8)) +	/* IFLA_BOND_SLAVE_STATE */ +		nla_total_size(sizeof(u8)) +	/* IFLA_BOND_SLAVE_MII_STATUS */ +		nla_total_size(sizeof(u32)) +	/* IFLA_BOND_SLAVE_LINK_FAILURE_COUNT */ +		nla_total_size(MAX_ADDR_LEN) +	/* IFLA_BOND_SLAVE_PERM_HWADDR */ +		nla_total_size(sizeof(u16)) +	/* IFLA_BOND_SLAVE_QUEUE_ID */ +		nla_total_size(sizeof(u16)) +	/* IFLA_BOND_SLAVE_AD_AGGREGATOR_ID */ +		0; +} + +static int bond_fill_slave_info(struct sk_buff *skb, +				const struct net_device *bond_dev, +				const struct net_device *slave_dev) +{ +	struct slave *slave = bond_slave_get_rtnl(slave_dev); + +	if (nla_put_u8(skb, IFLA_BOND_SLAVE_STATE, bond_slave_state(slave))) +		goto nla_put_failure; + +	if (nla_put_u8(skb, IFLA_BOND_SLAVE_MII_STATUS, slave->link)) +		goto nla_put_failure; + +	if (nla_put_u32(skb, IFLA_BOND_SLAVE_LINK_FAILURE_COUNT, +			slave->link_failure_count)) +		goto nla_put_failure; + +	if (nla_put(skb, IFLA_BOND_SLAVE_PERM_HWADDR, +		    slave_dev->addr_len, slave->perm_hwaddr)) +		goto nla_put_failure; + +	if (nla_put_u16(skb, IFLA_BOND_SLAVE_QUEUE_ID, slave->queue_id)) +		goto nla_put_failure; + +	if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) { +		const struct aggregator *agg; + +		agg = SLAVE_AD_INFO(slave)->port.aggregator; +		if (agg) +			if (nla_put_u16(skb, IFLA_BOND_SLAVE_AD_AGGREGATOR_ID, +					agg->aggregator_identifier)) +				goto nla_put_failure; +	} + +	return 0; + +nla_put_failure: +	return -EMSGSIZE; +} + +static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = { +	[IFLA_BOND_MODE]		= { .type = NLA_U8 }, +	[IFLA_BOND_ACTIVE_SLAVE]	= { .type = NLA_U32 }, +	[IFLA_BOND_MIIMON]		= { .type = NLA_U32 }, +	[IFLA_BOND_UPDELAY]		= { .type = NLA_U32 }, +	[IFLA_BOND_DOWNDELAY]		= { .type = NLA_U32 }, +	[IFLA_BOND_USE_CARRIER]		= { .type = NLA_U8 }, +	[IFLA_BOND_ARP_INTERVAL]	= { .type = NLA_U32 }, +	[IFLA_BOND_ARP_IP_TARGET]	= { .type = NLA_NESTED }, +	[IFLA_BOND_ARP_VALIDATE]	= { .type = NLA_U32 }, +	[IFLA_BOND_ARP_ALL_TARGETS]	= { .type = NLA_U32 }, +	[IFLA_BOND_PRIMARY]		= { .type = NLA_U32 }, +	[IFLA_BOND_PRIMARY_RESELECT]	= { .type = NLA_U8 }, +	[IFLA_BOND_FAIL_OVER_MAC]	= { .type = NLA_U8 }, +	[IFLA_BOND_XMIT_HASH_POLICY]	= { .type = NLA_U8 }, +	[IFLA_BOND_RESEND_IGMP]		= { .type = NLA_U32 }, +	[IFLA_BOND_NUM_PEER_NOTIF]	= { .type = NLA_U8 }, +	[IFLA_BOND_ALL_SLAVES_ACTIVE]	= { .type = NLA_U8 }, +	[IFLA_BOND_MIN_LINKS]		= { .type = NLA_U32 }, +	[IFLA_BOND_LP_INTERVAL]		= { .type = NLA_U32 }, +	[IFLA_BOND_PACKETS_PER_SLAVE]	= { .type = NLA_U32 }, +	[IFLA_BOND_AD_LACP_RATE]	= { .type = NLA_U8 }, +	[IFLA_BOND_AD_SELECT]		= { .type = NLA_U8 }, +	[IFLA_BOND_AD_INFO]		= { .type = NLA_NESTED }, +}; + +static int bond_validate(struct nlattr *tb[], struct nlattr *data[]) +{ +	if (tb[IFLA_ADDRESS]) { +		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) +			return -EINVAL; +		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) +			return -EADDRNOTAVAIL; +	} +	return 0; +} + +static int bond_changelink(struct net_device *bond_dev, +			   struct nlattr *tb[], struct nlattr *data[]) +{ +	struct bonding *bond = netdev_priv(bond_dev); +	struct bond_opt_value newval; +	int miimon = 0; +	int err; + +	if (!data) +		return 0; + +	if (data[IFLA_BOND_MODE]) { +		int mode = nla_get_u8(data[IFLA_BOND_MODE]); + +		bond_opt_initval(&newval, mode); +		err = __bond_opt_set(bond, BOND_OPT_MODE, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_ACTIVE_SLAVE]) { +		int ifindex = nla_get_u32(data[IFLA_BOND_ACTIVE_SLAVE]); +		struct net_device *slave_dev; +		char *active_slave = ""; + +		if (ifindex != 0) { +			slave_dev = __dev_get_by_index(dev_net(bond_dev), +						       ifindex); +			if (!slave_dev) +				return -ENODEV; +			active_slave = slave_dev->name; +		} +		bond_opt_initstr(&newval, active_slave); +		err = __bond_opt_set(bond, BOND_OPT_ACTIVE_SLAVE, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_MIIMON]) { +		miimon = nla_get_u32(data[IFLA_BOND_MIIMON]); + +		bond_opt_initval(&newval, miimon); +		err = __bond_opt_set(bond, BOND_OPT_MIIMON, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_UPDELAY]) { +		int updelay = nla_get_u32(data[IFLA_BOND_UPDELAY]); + +		bond_opt_initval(&newval, updelay); +		err = __bond_opt_set(bond, BOND_OPT_UPDELAY, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_DOWNDELAY]) { +		int downdelay = nla_get_u32(data[IFLA_BOND_DOWNDELAY]); + +		bond_opt_initval(&newval, downdelay); +		err = __bond_opt_set(bond, BOND_OPT_DOWNDELAY, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_USE_CARRIER]) { +		int use_carrier = nla_get_u8(data[IFLA_BOND_USE_CARRIER]); + +		bond_opt_initval(&newval, use_carrier); +		err = __bond_opt_set(bond, BOND_OPT_USE_CARRIER, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_ARP_INTERVAL]) { +		int arp_interval = nla_get_u32(data[IFLA_BOND_ARP_INTERVAL]); + +		if (arp_interval && miimon) { +			pr_err("%s: ARP monitoring cannot be used with MII monitoring\n", +			       bond->dev->name); +			return -EINVAL; +		} + +		bond_opt_initval(&newval, arp_interval); +		err = __bond_opt_set(bond, BOND_OPT_ARP_INTERVAL, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_ARP_IP_TARGET]) { +		struct nlattr *attr; +		int i = 0, rem; + +		bond_option_arp_ip_targets_clear(bond); +		nla_for_each_nested(attr, data[IFLA_BOND_ARP_IP_TARGET], rem) { +			__be32 target = nla_get_be32(attr); + +			bond_opt_initval(&newval, (__force u64)target); +			err = __bond_opt_set(bond, BOND_OPT_ARP_TARGETS, +					     &newval); +			if (err) +				break; +			i++; +		} +		if (i == 0 && bond->params.arp_interval) +			pr_warn("%s: Removing last arp target with arp_interval on\n", +				bond->dev->name); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_ARP_VALIDATE]) { +		int arp_validate = nla_get_u32(data[IFLA_BOND_ARP_VALIDATE]); + +		if (arp_validate && miimon) { +			pr_err("%s: ARP validating cannot be used with MII monitoring\n", +			       bond->dev->name); +			return -EINVAL; +		} + +		bond_opt_initval(&newval, arp_validate); +		err = __bond_opt_set(bond, BOND_OPT_ARP_VALIDATE, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_ARP_ALL_TARGETS]) { +		int arp_all_targets = +			nla_get_u32(data[IFLA_BOND_ARP_ALL_TARGETS]); + +		bond_opt_initval(&newval, arp_all_targets); +		err = __bond_opt_set(bond, BOND_OPT_ARP_ALL_TARGETS, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_PRIMARY]) { +		int ifindex = nla_get_u32(data[IFLA_BOND_PRIMARY]); +		struct net_device *dev; +		char *primary = ""; + +		dev = __dev_get_by_index(dev_net(bond_dev), ifindex); +		if (dev) +			primary = dev->name; + +		bond_opt_initstr(&newval, primary); +		err = __bond_opt_set(bond, BOND_OPT_PRIMARY, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_PRIMARY_RESELECT]) { +		int primary_reselect = +			nla_get_u8(data[IFLA_BOND_PRIMARY_RESELECT]); + +		bond_opt_initval(&newval, primary_reselect); +		err = __bond_opt_set(bond, BOND_OPT_PRIMARY_RESELECT, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_FAIL_OVER_MAC]) { +		int fail_over_mac = +			nla_get_u8(data[IFLA_BOND_FAIL_OVER_MAC]); + +		bond_opt_initval(&newval, fail_over_mac); +		err = __bond_opt_set(bond, BOND_OPT_FAIL_OVER_MAC, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_XMIT_HASH_POLICY]) { +		int xmit_hash_policy = +			nla_get_u8(data[IFLA_BOND_XMIT_HASH_POLICY]); + +		bond_opt_initval(&newval, xmit_hash_policy); +		err = __bond_opt_set(bond, BOND_OPT_XMIT_HASH, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_RESEND_IGMP]) { +		int resend_igmp = +			nla_get_u32(data[IFLA_BOND_RESEND_IGMP]); + +		bond_opt_initval(&newval, resend_igmp); +		err = __bond_opt_set(bond, BOND_OPT_RESEND_IGMP, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_NUM_PEER_NOTIF]) { +		int num_peer_notif = +			nla_get_u8(data[IFLA_BOND_NUM_PEER_NOTIF]); + +		bond_opt_initval(&newval, num_peer_notif); +		err = __bond_opt_set(bond, BOND_OPT_NUM_PEER_NOTIF, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_ALL_SLAVES_ACTIVE]) { +		int all_slaves_active = +			nla_get_u8(data[IFLA_BOND_ALL_SLAVES_ACTIVE]); + +		bond_opt_initval(&newval, all_slaves_active); +		err = __bond_opt_set(bond, BOND_OPT_ALL_SLAVES_ACTIVE, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_MIN_LINKS]) { +		int min_links = +			nla_get_u32(data[IFLA_BOND_MIN_LINKS]); + +		bond_opt_initval(&newval, min_links); +		err = __bond_opt_set(bond, BOND_OPT_MINLINKS, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_LP_INTERVAL]) { +		int lp_interval = +			nla_get_u32(data[IFLA_BOND_LP_INTERVAL]); + +		bond_opt_initval(&newval, lp_interval); +		err = __bond_opt_set(bond, BOND_OPT_LP_INTERVAL, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_PACKETS_PER_SLAVE]) { +		int packets_per_slave = +			nla_get_u32(data[IFLA_BOND_PACKETS_PER_SLAVE]); + +		bond_opt_initval(&newval, packets_per_slave); +		err = __bond_opt_set(bond, BOND_OPT_PACKETS_PER_SLAVE, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_AD_LACP_RATE]) { +		int lacp_rate = +			nla_get_u8(data[IFLA_BOND_AD_LACP_RATE]); + +		bond_opt_initval(&newval, lacp_rate); +		err = __bond_opt_set(bond, BOND_OPT_LACP_RATE, &newval); +		if (err) +			return err; +	} +	if (data[IFLA_BOND_AD_SELECT]) { +		int ad_select = +			nla_get_u8(data[IFLA_BOND_AD_SELECT]); + +		bond_opt_initval(&newval, ad_select); +		err = __bond_opt_set(bond, BOND_OPT_AD_SELECT, &newval); +		if (err) +			return err; +	} +	return 0; +} + +static int bond_newlink(struct net *src_net, struct net_device *bond_dev, +			struct nlattr *tb[], struct nlattr *data[]) +{ +	int err; + +	err = bond_changelink(bond_dev, tb, data); +	if (err < 0) +		return err; + +	return register_netdevice(bond_dev); +} + +static size_t bond_get_size(const struct net_device *bond_dev) +{ +	return nla_total_size(sizeof(u8)) +	/* IFLA_BOND_MODE */ +		nla_total_size(sizeof(u32)) +	/* IFLA_BOND_ACTIVE_SLAVE */ +		nla_total_size(sizeof(u32)) +	/* IFLA_BOND_MIIMON */ +		nla_total_size(sizeof(u32)) +	/* IFLA_BOND_UPDELAY */ +		nla_total_size(sizeof(u32)) +	/* IFLA_BOND_DOWNDELAY */ +		nla_total_size(sizeof(u8)) +	/* IFLA_BOND_USE_CARRIER */ +		nla_total_size(sizeof(u32)) +	/* IFLA_BOND_ARP_INTERVAL */ +						/* IFLA_BOND_ARP_IP_TARGET */ +		nla_total_size(sizeof(struct nlattr)) + +		nla_total_size(sizeof(u32)) * BOND_MAX_ARP_TARGETS + +		nla_total_size(sizeof(u32)) +	/* IFLA_BOND_ARP_VALIDATE */ +		nla_total_size(sizeof(u32)) +	/* IFLA_BOND_ARP_ALL_TARGETS */ +		nla_total_size(sizeof(u32)) +	/* IFLA_BOND_PRIMARY */ +		nla_total_size(sizeof(u8)) +	/* IFLA_BOND_PRIMARY_RESELECT */ +		nla_total_size(sizeof(u8)) +	/* IFLA_BOND_FAIL_OVER_MAC */ +		nla_total_size(sizeof(u8)) +	/* IFLA_BOND_XMIT_HASH_POLICY */ +		nla_total_size(sizeof(u32)) +	/* IFLA_BOND_RESEND_IGMP */ +		nla_total_size(sizeof(u8)) +	/* IFLA_BOND_NUM_PEER_NOTIF */ +		nla_total_size(sizeof(u8)) +   /* IFLA_BOND_ALL_SLAVES_ACTIVE */ +		nla_total_size(sizeof(u32)) +	/* IFLA_BOND_MIN_LINKS */ +		nla_total_size(sizeof(u32)) +	/* IFLA_BOND_LP_INTERVAL */ +		nla_total_size(sizeof(u32)) +  /* IFLA_BOND_PACKETS_PER_SLAVE */ +		nla_total_size(sizeof(u8)) +	/* IFLA_BOND_AD_LACP_RATE */ +		nla_total_size(sizeof(u8)) +	/* IFLA_BOND_AD_SELECT */ +		nla_total_size(sizeof(struct nlattr)) + /* IFLA_BOND_AD_INFO */ +		nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_AGGREGATOR */ +		nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_NUM_PORTS */ +		nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_ACTOR_KEY */ +		nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_PARTNER_KEY*/ +		nla_total_size(ETH_ALEN) +    /* IFLA_BOND_AD_INFO_PARTNER_MAC*/ +		0; +} + +static int bond_fill_info(struct sk_buff *skb, +			  const struct net_device *bond_dev) +{ +	struct bonding *bond = netdev_priv(bond_dev); +	struct net_device *slave_dev = bond_option_active_slave_get(bond); +	struct nlattr *targets; +	unsigned int packets_per_slave; +	int i, targets_added; + +	if (nla_put_u8(skb, IFLA_BOND_MODE, BOND_MODE(bond))) +		goto nla_put_failure; + +	if (slave_dev && +	    nla_put_u32(skb, IFLA_BOND_ACTIVE_SLAVE, slave_dev->ifindex)) +		goto nla_put_failure; + +	if (nla_put_u32(skb, IFLA_BOND_MIIMON, bond->params.miimon)) +		goto nla_put_failure; + +	if (nla_put_u32(skb, IFLA_BOND_UPDELAY, +			bond->params.updelay * bond->params.miimon)) +		goto nla_put_failure; + +	if (nla_put_u32(skb, IFLA_BOND_DOWNDELAY, +			bond->params.downdelay * bond->params.miimon)) +		goto nla_put_failure; + +	if (nla_put_u8(skb, IFLA_BOND_USE_CARRIER, bond->params.use_carrier)) +		goto nla_put_failure; + +	if (nla_put_u32(skb, IFLA_BOND_ARP_INTERVAL, bond->params.arp_interval)) +		goto nla_put_failure; + +	targets = nla_nest_start(skb, IFLA_BOND_ARP_IP_TARGET); +	if (!targets) +		goto nla_put_failure; + +	targets_added = 0; +	for (i = 0; i < BOND_MAX_ARP_TARGETS; i++) { +		if (bond->params.arp_targets[i]) { +			nla_put_be32(skb, i, bond->params.arp_targets[i]); +			targets_added = 1; +		} +	} + +	if (targets_added) +		nla_nest_end(skb, targets); +	else +		nla_nest_cancel(skb, targets); + +	if (nla_put_u32(skb, IFLA_BOND_ARP_VALIDATE, bond->params.arp_validate)) +		goto nla_put_failure; + +	if (nla_put_u32(skb, IFLA_BOND_ARP_ALL_TARGETS, +			bond->params.arp_all_targets)) +		goto nla_put_failure; + +	if (bond->primary_slave && +	    nla_put_u32(skb, IFLA_BOND_PRIMARY, +			bond->primary_slave->dev->ifindex)) +		goto nla_put_failure; + +	if (nla_put_u8(skb, IFLA_BOND_PRIMARY_RESELECT, +		       bond->params.primary_reselect)) +		goto nla_put_failure; + +	if (nla_put_u8(skb, IFLA_BOND_FAIL_OVER_MAC, +		       bond->params.fail_over_mac)) +		goto nla_put_failure; + +	if (nla_put_u8(skb, IFLA_BOND_XMIT_HASH_POLICY, +		       bond->params.xmit_policy)) +		goto nla_put_failure; + +	if (nla_put_u32(skb, IFLA_BOND_RESEND_IGMP, +		        bond->params.resend_igmp)) +		goto nla_put_failure; + +	if (nla_put_u8(skb, IFLA_BOND_NUM_PEER_NOTIF, +		       bond->params.num_peer_notif)) +		goto nla_put_failure; + +	if (nla_put_u8(skb, IFLA_BOND_ALL_SLAVES_ACTIVE, +		       bond->params.all_slaves_active)) +		goto nla_put_failure; + +	if (nla_put_u32(skb, IFLA_BOND_MIN_LINKS, +			bond->params.min_links)) +		goto nla_put_failure; + +	if (nla_put_u32(skb, IFLA_BOND_LP_INTERVAL, +			bond->params.lp_interval)) +		goto nla_put_failure; + +	packets_per_slave = bond->params.packets_per_slave; +	if (nla_put_u32(skb, IFLA_BOND_PACKETS_PER_SLAVE, +			packets_per_slave)) +		goto nla_put_failure; + +	if (nla_put_u8(skb, IFLA_BOND_AD_LACP_RATE, +		       bond->params.lacp_fast)) +		goto nla_put_failure; + +	if (nla_put_u8(skb, IFLA_BOND_AD_SELECT, +		       bond->params.ad_select)) +		goto nla_put_failure; + +	if (BOND_MODE(bond) == BOND_MODE_8023AD) { +		struct ad_info info; + +		if (!bond_3ad_get_active_agg_info(bond, &info)) { +			struct nlattr *nest; + +			nest = nla_nest_start(skb, IFLA_BOND_AD_INFO); +			if (!nest) +				goto nla_put_failure; + +			if (nla_put_u16(skb, IFLA_BOND_AD_INFO_AGGREGATOR, +					info.aggregator_id)) +				goto nla_put_failure; +			if (nla_put_u16(skb, IFLA_BOND_AD_INFO_NUM_PORTS, +					info.ports)) +				goto nla_put_failure; +			if (nla_put_u16(skb, IFLA_BOND_AD_INFO_ACTOR_KEY, +					info.actor_key)) +				goto nla_put_failure; +			if (nla_put_u16(skb, IFLA_BOND_AD_INFO_PARTNER_KEY, +					info.partner_key)) +				goto nla_put_failure; +			if (nla_put(skb, IFLA_BOND_AD_INFO_PARTNER_MAC, +				    sizeof(info.partner_system), +				    &info.partner_system)) +				goto nla_put_failure; + +			nla_nest_end(skb, nest); +		} +	} + +	return 0; + +nla_put_failure: +	return -EMSGSIZE; +} + +struct rtnl_link_ops bond_link_ops __read_mostly = { +	.kind			= "bond", +	.priv_size		= sizeof(struct bonding), +	.setup			= bond_setup, +	.maxtype		= IFLA_BOND_MAX, +	.policy			= bond_policy, +	.validate		= bond_validate, +	.newlink		= bond_newlink, +	.changelink		= bond_changelink, +	.get_size		= bond_get_size, +	.fill_info		= bond_fill_info, +	.get_num_tx_queues	= bond_get_num_tx_queues, +	.get_num_rx_queues	= bond_get_num_tx_queues, /* Use the same number +							     as for TX queues */ +	.get_slave_size		= bond_get_slave_size, +	.fill_slave_info	= bond_fill_slave_info, +}; + +int __init bond_netlink_init(void) +{ +	return rtnl_link_register(&bond_link_ops); +} + +void bond_netlink_fini(void) +{ +	rtnl_link_unregister(&bond_link_ops); +} + +MODULE_ALIAS_RTNL_LINK("bond"); diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c new file mode 100644 index 00000000000..540e0167bf2 --- /dev/null +++ b/drivers/net/bonding/bond_options.c @@ -0,0 +1,1394 @@ +/* + * drivers/net/bond/bond_options.c - bonding options + * Copyright (c) 2013 Jiri Pirko <jiri@resnulli.us> + * Copyright (c) 2013 Scott Feldman <sfeldma@cumulusnetworks.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/errno.h> +#include <linux/if.h> +#include <linux/netdevice.h> +#include <linux/spinlock.h> +#include <linux/rcupdate.h> +#include <linux/ctype.h> +#include <linux/inet.h> +#include "bonding.h" + +static int bond_option_active_slave_set(struct bonding *bond, +					const struct bond_opt_value *newval); +static int bond_option_miimon_set(struct bonding *bond, +				  const struct bond_opt_value *newval); +static int bond_option_updelay_set(struct bonding *bond, +				   const struct bond_opt_value *newval); +static int bond_option_downdelay_set(struct bonding *bond, +				     const struct bond_opt_value *newval); +static int bond_option_use_carrier_set(struct bonding *bond, +				       const struct bond_opt_value *newval); +static int bond_option_arp_interval_set(struct bonding *bond, +					const struct bond_opt_value *newval); +static int bond_option_arp_ip_target_add(struct bonding *bond, __be32 target); +static int bond_option_arp_ip_target_rem(struct bonding *bond, __be32 target); +static int bond_option_arp_ip_targets_set(struct bonding *bond, +					  const struct bond_opt_value *newval); +static int bond_option_arp_validate_set(struct bonding *bond, +					const struct bond_opt_value *newval); +static int bond_option_arp_all_targets_set(struct bonding *bond, +					   const struct bond_opt_value *newval); +static int bond_option_primary_set(struct bonding *bond, +				   const struct bond_opt_value *newval); +static int bond_option_primary_reselect_set(struct bonding *bond, +					    const struct bond_opt_value *newval); +static int bond_option_fail_over_mac_set(struct bonding *bond, +					 const struct bond_opt_value *newval); +static int bond_option_xmit_hash_policy_set(struct bonding *bond, +					    const struct bond_opt_value *newval); +static int bond_option_resend_igmp_set(struct bonding *bond, +				       const struct bond_opt_value *newval); +static int bond_option_num_peer_notif_set(struct bonding *bond, +					  const struct bond_opt_value *newval); +static int bond_option_all_slaves_active_set(struct bonding *bond, +					     const struct bond_opt_value *newval); +static int bond_option_min_links_set(struct bonding *bond, +				     const struct bond_opt_value *newval); +static int bond_option_lp_interval_set(struct bonding *bond, +				       const struct bond_opt_value *newval); +static int bond_option_pps_set(struct bonding *bond, +			       const struct bond_opt_value *newval); +static int bond_option_lacp_rate_set(struct bonding *bond, +				     const struct bond_opt_value *newval); +static int bond_option_ad_select_set(struct bonding *bond, +				     const struct bond_opt_value *newval); +static int bond_option_queue_id_set(struct bonding *bond, +				    const struct bond_opt_value *newval); +static int bond_option_mode_set(struct bonding *bond, +				const struct bond_opt_value *newval); +static int bond_option_slaves_set(struct bonding *bond, +				  const struct bond_opt_value *newval); +static int bond_option_tlb_dynamic_lb_set(struct bonding *bond, +				  const struct bond_opt_value *newval); + + +static const struct bond_opt_value bond_mode_tbl[] = { +	{ "balance-rr",    BOND_MODE_ROUNDROBIN,   BOND_VALFLAG_DEFAULT}, +	{ "active-backup", BOND_MODE_ACTIVEBACKUP, 0}, +	{ "balance-xor",   BOND_MODE_XOR,          0}, +	{ "broadcast",     BOND_MODE_BROADCAST,    0}, +	{ "802.3ad",       BOND_MODE_8023AD,       0}, +	{ "balance-tlb",   BOND_MODE_TLB,          0}, +	{ "balance-alb",   BOND_MODE_ALB,          0}, +	{ NULL,            -1,                     0}, +}; + +static const struct bond_opt_value bond_pps_tbl[] = { +	{ "default", 1,         BOND_VALFLAG_DEFAULT}, +	{ "maxval",  USHRT_MAX, BOND_VALFLAG_MAX}, +	{ NULL,      -1,        0}, +}; + +static const struct bond_opt_value bond_xmit_hashtype_tbl[] = { +	{ "layer2",   BOND_XMIT_POLICY_LAYER2, BOND_VALFLAG_DEFAULT}, +	{ "layer3+4", BOND_XMIT_POLICY_LAYER34, 0}, +	{ "layer2+3", BOND_XMIT_POLICY_LAYER23, 0}, +	{ "encap2+3", BOND_XMIT_POLICY_ENCAP23, 0}, +	{ "encap3+4", BOND_XMIT_POLICY_ENCAP34, 0}, +	{ NULL,       -1,                       0}, +}; + +static const struct bond_opt_value bond_arp_validate_tbl[] = { +	{ "none",		BOND_ARP_VALIDATE_NONE,		BOND_VALFLAG_DEFAULT}, +	{ "active",		BOND_ARP_VALIDATE_ACTIVE,	0}, +	{ "backup",		BOND_ARP_VALIDATE_BACKUP,	0}, +	{ "all",		BOND_ARP_VALIDATE_ALL,		0}, +	{ "filter",		BOND_ARP_FILTER,		0}, +	{ "filter_active",	BOND_ARP_FILTER_ACTIVE,		0}, +	{ "filter_backup",	BOND_ARP_FILTER_BACKUP,		0}, +	{ NULL,			-1,				0}, +}; + +static const struct bond_opt_value bond_arp_all_targets_tbl[] = { +	{ "any", BOND_ARP_TARGETS_ANY, BOND_VALFLAG_DEFAULT}, +	{ "all", BOND_ARP_TARGETS_ALL, 0}, +	{ NULL,  -1,                   0}, +}; + +static const struct bond_opt_value bond_fail_over_mac_tbl[] = { +	{ "none",   BOND_FOM_NONE,   BOND_VALFLAG_DEFAULT}, +	{ "active", BOND_FOM_ACTIVE, 0}, +	{ "follow", BOND_FOM_FOLLOW, 0}, +	{ NULL,     -1,              0}, +}; + +static const struct bond_opt_value bond_intmax_tbl[] = { +	{ "off",     0,       BOND_VALFLAG_DEFAULT}, +	{ "maxval",  INT_MAX, BOND_VALFLAG_MAX}, +	{ NULL,      -1,      0} +}; + +static const struct bond_opt_value bond_lacp_rate_tbl[] = { +	{ "slow", AD_LACP_SLOW, 0}, +	{ "fast", AD_LACP_FAST, 0}, +	{ NULL,   -1,           0}, +}; + +static const struct bond_opt_value bond_ad_select_tbl[] = { +	{ "stable",    BOND_AD_STABLE,    BOND_VALFLAG_DEFAULT}, +	{ "bandwidth", BOND_AD_BANDWIDTH, 0}, +	{ "count",     BOND_AD_COUNT,     0}, +	{ NULL,        -1,                0}, +}; + +static const struct bond_opt_value bond_num_peer_notif_tbl[] = { +	{ "off",     0,   0}, +	{ "maxval",  255, BOND_VALFLAG_MAX}, +	{ "default", 1,   BOND_VALFLAG_DEFAULT}, +	{ NULL,      -1,  0} +}; + +static const struct bond_opt_value bond_primary_reselect_tbl[] = { +	{ "always",  BOND_PRI_RESELECT_ALWAYS,  BOND_VALFLAG_DEFAULT}, +	{ "better",  BOND_PRI_RESELECT_BETTER,  0}, +	{ "failure", BOND_PRI_RESELECT_FAILURE, 0}, +	{ NULL,      -1}, +}; + +static const struct bond_opt_value bond_use_carrier_tbl[] = { +	{ "off", 0,  0}, +	{ "on",  1,  BOND_VALFLAG_DEFAULT}, +	{ NULL,  -1, 0} +}; + +static const struct bond_opt_value bond_all_slaves_active_tbl[] = { +	{ "off", 0,  BOND_VALFLAG_DEFAULT}, +	{ "on",  1,  0}, +	{ NULL,  -1, 0} +}; + +static const struct bond_opt_value bond_resend_igmp_tbl[] = { +	{ "off",     0,   0}, +	{ "maxval",  255, BOND_VALFLAG_MAX}, +	{ "default", 1,   BOND_VALFLAG_DEFAULT}, +	{ NULL,      -1,  0} +}; + +static const struct bond_opt_value bond_lp_interval_tbl[] = { +	{ "minval",  1,       BOND_VALFLAG_MIN | BOND_VALFLAG_DEFAULT}, +	{ "maxval",  INT_MAX, BOND_VALFLAG_MAX}, +	{ NULL,      -1,      0}, +}; + +static const struct bond_opt_value bond_tlb_dynamic_lb_tbl[] = { +	{ "off", 0,  0}, +	{ "on",  1,  BOND_VALFLAG_DEFAULT}, +	{ NULL,  -1, 0} +}; + +static const struct bond_option bond_opts[] = { +	[BOND_OPT_MODE] = { +		.id = BOND_OPT_MODE, +		.name = "mode", +		.desc = "bond device mode", +		.flags = BOND_OPTFLAG_NOSLAVES | BOND_OPTFLAG_IFDOWN, +		.values = bond_mode_tbl, +		.set = bond_option_mode_set +	}, +	[BOND_OPT_PACKETS_PER_SLAVE] = { +		.id = BOND_OPT_PACKETS_PER_SLAVE, +		.name = "packets_per_slave", +		.desc = "Packets to send per slave in RR mode", +		.unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_ROUNDROBIN)), +		.values = bond_pps_tbl, +		.set = bond_option_pps_set +	}, +	[BOND_OPT_XMIT_HASH] = { +		.id = BOND_OPT_XMIT_HASH, +		.name = "xmit_hash_policy", +		.desc = "balance-xor, 802.3ad, and tlb hashing method", +		.values = bond_xmit_hashtype_tbl, +		.set = bond_option_xmit_hash_policy_set +	}, +	[BOND_OPT_ARP_VALIDATE] = { +		.id = BOND_OPT_ARP_VALIDATE, +		.name = "arp_validate", +		.desc = "validate src/dst of ARP probes", +		.unsuppmodes = BIT(BOND_MODE_8023AD) | BIT(BOND_MODE_TLB) | +			       BIT(BOND_MODE_ALB), +		.values = bond_arp_validate_tbl, +		.set = bond_option_arp_validate_set +	}, +	[BOND_OPT_ARP_ALL_TARGETS] = { +		.id = BOND_OPT_ARP_ALL_TARGETS, +		.name = "arp_all_targets", +		.desc = "fail on any/all arp targets timeout", +		.values = bond_arp_all_targets_tbl, +		.set = bond_option_arp_all_targets_set +	}, +	[BOND_OPT_FAIL_OVER_MAC] = { +		.id = BOND_OPT_FAIL_OVER_MAC, +		.name = "fail_over_mac", +		.desc = "For active-backup, do not set all slaves to the same MAC", +		.flags = BOND_OPTFLAG_NOSLAVES, +		.values = bond_fail_over_mac_tbl, +		.set = bond_option_fail_over_mac_set +	}, +	[BOND_OPT_ARP_INTERVAL] = { +		.id = BOND_OPT_ARP_INTERVAL, +		.name = "arp_interval", +		.desc = "arp interval in milliseconds", +		.unsuppmodes = BIT(BOND_MODE_8023AD) | BIT(BOND_MODE_TLB) | +			       BIT(BOND_MODE_ALB), +		.values = bond_intmax_tbl, +		.set = bond_option_arp_interval_set +	}, +	[BOND_OPT_ARP_TARGETS] = { +		.id = BOND_OPT_ARP_TARGETS, +		.name = "arp_ip_target", +		.desc = "arp targets in n.n.n.n form", +		.flags = BOND_OPTFLAG_RAWVAL, +		.set = bond_option_arp_ip_targets_set +	}, +	[BOND_OPT_DOWNDELAY] = { +		.id = BOND_OPT_DOWNDELAY, +		.name = "downdelay", +		.desc = "Delay before considering link down, in milliseconds", +		.values = bond_intmax_tbl, +		.set = bond_option_downdelay_set +	}, +	[BOND_OPT_UPDELAY] = { +		.id = BOND_OPT_UPDELAY, +		.name = "updelay", +		.desc = "Delay before considering link up, in milliseconds", +		.values = bond_intmax_tbl, +		.set = bond_option_updelay_set +	}, +	[BOND_OPT_LACP_RATE] = { +		.id = BOND_OPT_LACP_RATE, +		.name = "lacp_rate", +		.desc = "LACPDU tx rate to request from 802.3ad partner", +		.flags = BOND_OPTFLAG_IFDOWN, +		.unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), +		.values = bond_lacp_rate_tbl, +		.set = bond_option_lacp_rate_set +	}, +	[BOND_OPT_MINLINKS] = { +		.id = BOND_OPT_MINLINKS, +		.name = "min_links", +		.desc = "Minimum number of available links before turning on carrier", +		.values = bond_intmax_tbl, +		.set = bond_option_min_links_set +	}, +	[BOND_OPT_AD_SELECT] = { +		.id = BOND_OPT_AD_SELECT, +		.name = "ad_select", +		.desc = "803.ad aggregation selection logic", +		.flags = BOND_OPTFLAG_IFDOWN, +		.values = bond_ad_select_tbl, +		.set = bond_option_ad_select_set +	}, +	[BOND_OPT_NUM_PEER_NOTIF] = { +		.id = BOND_OPT_NUM_PEER_NOTIF, +		.name = "num_unsol_na", +		.desc = "Number of peer notifications to send on failover event", +		.values = bond_num_peer_notif_tbl, +		.set = bond_option_num_peer_notif_set +	}, +	[BOND_OPT_MIIMON] = { +		.id = BOND_OPT_MIIMON, +		.name = "miimon", +		.desc = "Link check interval in milliseconds", +		.values = bond_intmax_tbl, +		.set = bond_option_miimon_set +	}, +	[BOND_OPT_PRIMARY] = { +		.id = BOND_OPT_PRIMARY, +		.name = "primary", +		.desc = "Primary network device to use", +		.flags = BOND_OPTFLAG_RAWVAL, +		.unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_ACTIVEBACKUP) | +						BIT(BOND_MODE_TLB) | +						BIT(BOND_MODE_ALB)), +		.set = bond_option_primary_set +	}, +	[BOND_OPT_PRIMARY_RESELECT] = { +		.id = BOND_OPT_PRIMARY_RESELECT, +		.name = "primary_reselect", +		.desc = "Reselect primary slave once it comes up", +		.values = bond_primary_reselect_tbl, +		.set = bond_option_primary_reselect_set +	}, +	[BOND_OPT_USE_CARRIER] = { +		.id = BOND_OPT_USE_CARRIER, +		.name = "use_carrier", +		.desc = "Use netif_carrier_ok (vs MII ioctls) in miimon", +		.values = bond_use_carrier_tbl, +		.set = bond_option_use_carrier_set +	}, +	[BOND_OPT_ACTIVE_SLAVE] = { +		.id = BOND_OPT_ACTIVE_SLAVE, +		.name = "active_slave", +		.desc = "Currently active slave", +		.flags = BOND_OPTFLAG_RAWVAL, +		.unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_ACTIVEBACKUP) | +						BIT(BOND_MODE_TLB) | +						BIT(BOND_MODE_ALB)), +		.set = bond_option_active_slave_set +	}, +	[BOND_OPT_QUEUE_ID] = { +		.id = BOND_OPT_QUEUE_ID, +		.name = "queue_id", +		.desc = "Set queue id of a slave", +		.flags = BOND_OPTFLAG_RAWVAL, +		.set = bond_option_queue_id_set +	}, +	[BOND_OPT_ALL_SLAVES_ACTIVE] = { +		.id = BOND_OPT_ALL_SLAVES_ACTIVE, +		.name = "all_slaves_active", +		.desc = "Keep all frames received on an interface by setting active flag for all slaves", +		.values = bond_all_slaves_active_tbl, +		.set = bond_option_all_slaves_active_set +	}, +	[BOND_OPT_RESEND_IGMP] = { +		.id = BOND_OPT_RESEND_IGMP, +		.name = "resend_igmp", +		.desc = "Number of IGMP membership reports to send on link failure", +		.values = bond_resend_igmp_tbl, +		.set = bond_option_resend_igmp_set +	}, +	[BOND_OPT_LP_INTERVAL] = { +		.id = BOND_OPT_LP_INTERVAL, +		.name = "lp_interval", +		.desc = "The number of seconds between instances where the bonding driver sends learning packets to each slave's peer switch", +		.values = bond_lp_interval_tbl, +		.set = bond_option_lp_interval_set +	}, +	[BOND_OPT_SLAVES] = { +		.id = BOND_OPT_SLAVES, +		.name = "slaves", +		.desc = "Slave membership management", +		.flags = BOND_OPTFLAG_RAWVAL, +		.set = bond_option_slaves_set +	}, +	[BOND_OPT_TLB_DYNAMIC_LB] = { +		.id = BOND_OPT_TLB_DYNAMIC_LB, +		.name = "tlb_dynamic_lb", +		.desc = "Enable dynamic flow shuffling", +		.unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_TLB)), +		.values = bond_tlb_dynamic_lb_tbl, +		.flags = BOND_OPTFLAG_IFDOWN, +		.set = bond_option_tlb_dynamic_lb_set, +	}, +	{ } +}; + +/* Searches for an option by name */ +const struct bond_option *bond_opt_get_by_name(const char *name) +{ +	const struct bond_option *opt; +	int option; + +	for (option = 0; option < BOND_OPT_LAST; option++) { +		opt = bond_opt_get(option); +		if (opt && !strcmp(opt->name, name)) +			return opt; +	} + +	return NULL; +} + +/* Searches for a value in opt's values[] table */ +const struct bond_opt_value *bond_opt_get_val(unsigned int option, u64 val) +{ +	const struct bond_option *opt; +	int i; + +	opt = bond_opt_get(option); +	if (WARN_ON(!opt)) +		return NULL; +	for (i = 0; opt->values && opt->values[i].string; i++) +		if (opt->values[i].value == val) +			return &opt->values[i]; + +	return NULL; +} + +/* Searches for a value in opt's values[] table which matches the flagmask */ +static const struct bond_opt_value *bond_opt_get_flags(const struct bond_option *opt, +						 u32 flagmask) +{ +	int i; + +	for (i = 0; opt->values && opt->values[i].string; i++) +		if (opt->values[i].flags & flagmask) +			return &opt->values[i]; + +	return NULL; +} + +/* If maxval is missing then there's no range to check. In case minval is + * missing then it's considered to be 0. + */ +static bool bond_opt_check_range(const struct bond_option *opt, u64 val) +{ +	const struct bond_opt_value *minval, *maxval; + +	minval = bond_opt_get_flags(opt, BOND_VALFLAG_MIN); +	maxval = bond_opt_get_flags(opt, BOND_VALFLAG_MAX); +	if (!maxval || (minval && val < minval->value) || val > maxval->value) +		return false; + +	return true; +} + +/** + * bond_opt_parse - parse option value + * @opt: the option to parse against + * @val: value to parse + * + * This function tries to extract the value from @val and check if it's + * a possible match for the option and returns NULL if a match isn't found, + * or the struct_opt_value that matched. It also strips the new line from + * @val->string if it's present. + */ +const struct bond_opt_value *bond_opt_parse(const struct bond_option *opt, +					    struct bond_opt_value *val) +{ +	char *p, valstr[BOND_OPT_MAX_NAMELEN + 1] = { 0, }; +	const struct bond_opt_value *tbl; +	const struct bond_opt_value *ret = NULL; +	bool checkval; +	int i, rv; + +	/* No parsing if the option wants a raw val */ +	if (opt->flags & BOND_OPTFLAG_RAWVAL) +		return val; + +	tbl = opt->values; +	if (!tbl) +		goto out; + +	/* ULLONG_MAX is used to bypass string processing */ +	checkval = val->value != ULLONG_MAX; +	if (!checkval) { +		if (!val->string) +			goto out; +		p = strchr(val->string, '\n'); +		if (p) +			*p = '\0'; +		for (p = val->string; *p; p++) +			if (!(isdigit(*p) || isspace(*p))) +				break; +		/* The following code extracts the string to match or the value +		 * and sets checkval appropriately +		 */ +		if (*p) { +			rv = sscanf(val->string, "%32s", valstr); +		} else { +			rv = sscanf(val->string, "%llu", &val->value); +			checkval = true; +		} +		if (!rv) +			goto out; +	} + +	for (i = 0; tbl[i].string; i++) { +		/* Check for exact match */ +		if (checkval) { +			if (val->value == tbl[i].value) +				ret = &tbl[i]; +		} else { +			if (!strcmp(valstr, "default") && +			    (tbl[i].flags & BOND_VALFLAG_DEFAULT)) +				ret = &tbl[i]; + +			if (!strcmp(valstr, tbl[i].string)) +				ret = &tbl[i]; +		} +		/* Found an exact match */ +		if (ret) +			goto out; +	} +	/* Possible range match */ +	if (checkval && bond_opt_check_range(opt, val->value)) +		ret = val; +out: +	return ret; +} + +/* Check opt's dependencies against bond mode and currently set options */ +static int bond_opt_check_deps(struct bonding *bond, +			       const struct bond_option *opt) +{ +	struct bond_params *params = &bond->params; + +	if (test_bit(params->mode, &opt->unsuppmodes)) +		return -EACCES; +	if ((opt->flags & BOND_OPTFLAG_NOSLAVES) && bond_has_slaves(bond)) +		return -ENOTEMPTY; +	if ((opt->flags & BOND_OPTFLAG_IFDOWN) && (bond->dev->flags & IFF_UP)) +		return -EBUSY; + +	return 0; +} + +static void bond_opt_dep_print(struct bonding *bond, +			       const struct bond_option *opt) +{ +	const struct bond_opt_value *modeval; +	struct bond_params *params; + +	params = &bond->params; +	modeval = bond_opt_get_val(BOND_OPT_MODE, params->mode); +	if (test_bit(params->mode, &opt->unsuppmodes)) +		pr_err("%s: option %s: mode dependency failed, not supported in mode %s(%llu)\n", +		       bond->dev->name, opt->name, +		       modeval->string, modeval->value); +} + +static void bond_opt_error_interpret(struct bonding *bond, +				     const struct bond_option *opt, +				     int error, const struct bond_opt_value *val) +{ +	const struct bond_opt_value *minval, *maxval; +	char *p; + +	switch (error) { +	case -EINVAL: +		if (val) { +			if (val->string) { +				/* sometimes RAWVAL opts may have new lines */ +				p = strchr(val->string, '\n'); +				if (p) +					*p = '\0'; +				pr_err("%s: option %s: invalid value (%s)\n", +				       bond->dev->name, opt->name, val->string); +			} else { +				pr_err("%s: option %s: invalid value (%llu)\n", +				       bond->dev->name, opt->name, val->value); +			} +		} +		minval = bond_opt_get_flags(opt, BOND_VALFLAG_MIN); +		maxval = bond_opt_get_flags(opt, BOND_VALFLAG_MAX); +		if (!maxval) +			break; +		pr_err("%s: option %s: allowed values %llu - %llu\n", +		       bond->dev->name, opt->name, minval ? minval->value : 0, +		       maxval->value); +		break; +	case -EACCES: +		bond_opt_dep_print(bond, opt); +		break; +	case -ENOTEMPTY: +		pr_err("%s: option %s: unable to set because the bond device has slaves\n", +		       bond->dev->name, opt->name); +		break; +	case -EBUSY: +		pr_err("%s: option %s: unable to set because the bond device is up\n", +		       bond->dev->name, opt->name); +		break; +	default: +		break; +	} +} + +/** + * __bond_opt_set - set a bonding option + * @bond: target bond device + * @option: option to set + * @val: value to set it to + * + * This function is used to change the bond's option value, it can be + * used for both enabling/changing an option and for disabling it. RTNL lock + * must be obtained before calling this function. + */ +int __bond_opt_set(struct bonding *bond, +		   unsigned int option, struct bond_opt_value *val) +{ +	const struct bond_opt_value *retval = NULL; +	const struct bond_option *opt; +	int ret = -ENOENT; + +	ASSERT_RTNL(); + +	opt = bond_opt_get(option); +	if (WARN_ON(!val) || WARN_ON(!opt)) +		goto out; +	ret = bond_opt_check_deps(bond, opt); +	if (ret) +		goto out; +	retval = bond_opt_parse(opt, val); +	if (!retval) { +		ret = -EINVAL; +		goto out; +	} +	ret = opt->set(bond, retval); +out: +	if (ret) +		bond_opt_error_interpret(bond, opt, ret, val); + +	return ret; +} + +/** + * bond_opt_tryset_rtnl - try to acquire rtnl and call __bond_opt_set + * @bond: target bond device + * @option: option to set + * @buf: value to set it to + * + * This function tries to acquire RTNL without blocking and if successful + * calls __bond_opt_set. It is mainly used for sysfs option manipulation. + */ +int bond_opt_tryset_rtnl(struct bonding *bond, unsigned int option, char *buf) +{ +	struct bond_opt_value optval; +	int ret; + +	if (!rtnl_trylock()) +		return restart_syscall(); +	bond_opt_initstr(&optval, buf); +	ret = __bond_opt_set(bond, option, &optval); +	rtnl_unlock(); + +	return ret; +} + +/** + * bond_opt_get - get a pointer to an option + * @option: option for which to return a pointer + * + * This function checks if option is valid and if so returns a pointer + * to its entry in the bond_opts[] option array. + */ +const struct bond_option *bond_opt_get(unsigned int option) +{ +	if (!BOND_OPT_VALID(option)) +		return NULL; + +	return &bond_opts[option]; +} + +int bond_option_mode_set(struct bonding *bond, const struct bond_opt_value *newval) +{ +	if (!bond_mode_uses_arp(newval->value) && bond->params.arp_interval) { +		pr_info("%s: %s mode is incompatible with arp monitoring, start mii monitoring\n", +			bond->dev->name, newval->string); +		/* disable arp monitoring */ +		bond->params.arp_interval = 0; +		/* set miimon to default value */ +		bond->params.miimon = BOND_DEFAULT_MIIMON; +		pr_info("%s: Setting MII monitoring interval to %d\n", +			bond->dev->name, bond->params.miimon); +	} + +	/* don't cache arp_validate between modes */ +	bond->params.arp_validate = BOND_ARP_VALIDATE_NONE; +	bond->params.mode = newval->value; + +	return 0; +} + +static struct net_device *__bond_option_active_slave_get(struct bonding *bond, +							 struct slave *slave) +{ +	return bond_uses_primary(bond) && slave ? slave->dev : NULL; +} + +struct net_device *bond_option_active_slave_get_rcu(struct bonding *bond) +{ +	struct slave *slave = rcu_dereference(bond->curr_active_slave); + +	return __bond_option_active_slave_get(bond, slave); +} + +struct net_device *bond_option_active_slave_get(struct bonding *bond) +{ +	return __bond_option_active_slave_get(bond, bond->curr_active_slave); +} + +static int bond_option_active_slave_set(struct bonding *bond, +					const struct bond_opt_value *newval) +{ +	char ifname[IFNAMSIZ] = { 0, }; +	struct net_device *slave_dev; +	int ret = 0; + +	sscanf(newval->string, "%15s", ifname); /* IFNAMSIZ */ +	if (!strlen(ifname) || newval->string[0] == '\n') { +		slave_dev = NULL; +	} else { +		slave_dev = __dev_get_by_name(dev_net(bond->dev), ifname); +		if (!slave_dev) +			return -ENODEV; +	} + +	if (slave_dev) { +		if (!netif_is_bond_slave(slave_dev)) { +			pr_err("Device %s is not bonding slave\n", +			       slave_dev->name); +			return -EINVAL; +		} + +		if (bond->dev != netdev_master_upper_dev_get(slave_dev)) { +			pr_err("%s: Device %s is not our slave\n", +			       bond->dev->name, slave_dev->name); +			return -EINVAL; +		} +	} + +	block_netpoll_tx(); +	write_lock_bh(&bond->curr_slave_lock); + +	/* check to see if we are clearing active */ +	if (!slave_dev) { +		pr_info("%s: Clearing current active slave\n", bond->dev->name); +		RCU_INIT_POINTER(bond->curr_active_slave, NULL); +		bond_select_active_slave(bond); +	} else { +		struct slave *old_active = bond->curr_active_slave; +		struct slave *new_active = bond_slave_get_rtnl(slave_dev); + +		BUG_ON(!new_active); + +		if (new_active == old_active) { +			/* do nothing */ +			pr_info("%s: %s is already the current active slave\n", +				bond->dev->name, new_active->dev->name); +		} else { +			if (old_active && (new_active->link == BOND_LINK_UP) && +			    bond_slave_is_up(new_active)) { +				pr_info("%s: Setting %s as active slave\n", +					bond->dev->name, new_active->dev->name); +				bond_change_active_slave(bond, new_active); +			} else { +				pr_err("%s: Could not set %s as active slave; either %s is down or the link is down\n", +				       bond->dev->name, new_active->dev->name, +				       new_active->dev->name); +				ret = -EINVAL; +			} +		} +	} + +	write_unlock_bh(&bond->curr_slave_lock); +	unblock_netpoll_tx(); + +	return ret; +} + +/* There are two tricky bits here.  First, if MII monitoring is activated, then + * we must disable ARP monitoring.  Second, if the timer isn't running, we must + * start it. + */ +static int bond_option_miimon_set(struct bonding *bond, +				  const struct bond_opt_value *newval) +{ +	pr_info("%s: Setting MII monitoring interval to %llu\n", +		bond->dev->name, newval->value); +	bond->params.miimon = newval->value; +	if (bond->params.updelay) +		pr_info("%s: Note: Updating updelay (to %d) since it is a multiple of the miimon value\n", +			bond->dev->name, +			bond->params.updelay * bond->params.miimon); +	if (bond->params.downdelay) +		pr_info("%s: Note: Updating downdelay (to %d) since it is a multiple of the miimon value\n", +			bond->dev->name, +			bond->params.downdelay * bond->params.miimon); +	if (newval->value && bond->params.arp_interval) { +		pr_info("%s: MII monitoring cannot be used with ARP monitoring - disabling ARP monitoring...\n", +			bond->dev->name); +		bond->params.arp_interval = 0; +		if (bond->params.arp_validate) +			bond->params.arp_validate = BOND_ARP_VALIDATE_NONE; +	} +	if (bond->dev->flags & IFF_UP) { +		/* If the interface is up, we may need to fire off +		 * the MII timer. If the interface is down, the +		 * timer will get fired off when the open function +		 * is called. +		 */ +		if (!newval->value) { +			cancel_delayed_work_sync(&bond->mii_work); +		} else { +			cancel_delayed_work_sync(&bond->arp_work); +			queue_delayed_work(bond->wq, &bond->mii_work, 0); +		} +	} + +	return 0; +} + +/* Set up and down delays. These must be multiples of the + * MII monitoring value, and are stored internally as the multiplier. + * Thus, we must translate to MS for the real world. + */ +static int bond_option_updelay_set(struct bonding *bond, +				   const struct bond_opt_value *newval) +{ +	int value = newval->value; + +	if (!bond->params.miimon) { +		pr_err("%s: Unable to set up delay as MII monitoring is disabled\n", +		       bond->dev->name); +		return -EPERM; +	} +	if ((value % bond->params.miimon) != 0) { +		pr_warn("%s: Warning: up delay (%d) is not a multiple of miimon (%d), updelay rounded to %d ms\n", +			bond->dev->name, value, +			bond->params.miimon, +			(value / bond->params.miimon) * +			bond->params.miimon); +	} +	bond->params.updelay = value / bond->params.miimon; +	pr_info("%s: Setting up delay to %d\n", +		bond->dev->name, bond->params.updelay * bond->params.miimon); + +	return 0; +} + +static int bond_option_downdelay_set(struct bonding *bond, +				     const struct bond_opt_value *newval) +{ +	int value = newval->value; + +	if (!bond->params.miimon) { +		pr_err("%s: Unable to set down delay as MII monitoring is disabled\n", +		       bond->dev->name); +		return -EPERM; +	} +	if ((value % bond->params.miimon) != 0) { +		pr_warn("%s: Warning: down delay (%d) is not a multiple of miimon (%d), delay rounded to %d ms\n", +			bond->dev->name, value, +			bond->params.miimon, +			(value / bond->params.miimon) * +			bond->params.miimon); +	} +	bond->params.downdelay = value / bond->params.miimon; +	pr_info("%s: Setting down delay to %d\n", +		bond->dev->name, bond->params.downdelay * bond->params.miimon); + +	return 0; +} + +static int bond_option_use_carrier_set(struct bonding *bond, +				       const struct bond_opt_value *newval) +{ +	pr_info("%s: Setting use_carrier to %llu\n", +		bond->dev->name, newval->value); +	bond->params.use_carrier = newval->value; + +	return 0; +} + +/* There are two tricky bits here.  First, if ARP monitoring is activated, then + * we must disable MII monitoring.  Second, if the ARP timer isn't running, + * we must start it. + */ +static int bond_option_arp_interval_set(struct bonding *bond, +					const struct bond_opt_value *newval) +{ +	pr_info("%s: Setting ARP monitoring interval to %llu\n", +		bond->dev->name, newval->value); +	bond->params.arp_interval = newval->value; +	if (newval->value) { +		if (bond->params.miimon) { +			pr_info("%s: ARP monitoring cannot be used with MII monitoring. %s Disabling MII monitoring\n", +				bond->dev->name, bond->dev->name); +			bond->params.miimon = 0; +		} +		if (!bond->params.arp_targets[0]) +			pr_info("%s: ARP monitoring has been set up, but no ARP targets have been specified\n", +				bond->dev->name); +	} +	if (bond->dev->flags & IFF_UP) { +		/* If the interface is up, we may need to fire off +		 * the ARP timer.  If the interface is down, the +		 * timer will get fired off when the open function +		 * is called. +		 */ +		if (!newval->value) { +			if (bond->params.arp_validate) +				bond->recv_probe = NULL; +			cancel_delayed_work_sync(&bond->arp_work); +		} else { +			/* arp_validate can be set only in active-backup mode */ +			bond->recv_probe = bond_arp_rcv; +			cancel_delayed_work_sync(&bond->mii_work); +			queue_delayed_work(bond->wq, &bond->arp_work, 0); +		} +	} + +	return 0; +} + +static void _bond_options_arp_ip_target_set(struct bonding *bond, int slot, +					    __be32 target, +					    unsigned long last_rx) +{ +	__be32 *targets = bond->params.arp_targets; +	struct list_head *iter; +	struct slave *slave; + +	if (slot >= 0 && slot < BOND_MAX_ARP_TARGETS) { +		bond_for_each_slave(bond, slave, iter) +			slave->target_last_arp_rx[slot] = last_rx; +		targets[slot] = target; +	} +} + +static int _bond_option_arp_ip_target_add(struct bonding *bond, __be32 target) +{ +	__be32 *targets = bond->params.arp_targets; +	int ind; + +	if (!bond_is_ip_target_ok(target)) { +		pr_err("%s: invalid ARP target %pI4 specified for addition\n", +		       bond->dev->name, &target); +		return -EINVAL; +	} + +	if (bond_get_targets_ip(targets, target) != -1) { /* dup */ +		pr_err("%s: ARP target %pI4 is already present\n", +		       bond->dev->name, &target); +		return -EINVAL; +	} + +	ind = bond_get_targets_ip(targets, 0); /* first free slot */ +	if (ind == -1) { +		pr_err("%s: ARP target table is full!\n", bond->dev->name); +		return -EINVAL; +	} + +	pr_info("%s: Adding ARP target %pI4\n", bond->dev->name, &target); + +	_bond_options_arp_ip_target_set(bond, ind, target, jiffies); + +	return 0; +} + +static int bond_option_arp_ip_target_add(struct bonding *bond, __be32 target) +{ +	int ret; + +	/* not to race with bond_arp_rcv */ +	write_lock_bh(&bond->lock); +	ret = _bond_option_arp_ip_target_add(bond, target); +	write_unlock_bh(&bond->lock); + +	return ret; +} + +static int bond_option_arp_ip_target_rem(struct bonding *bond, __be32 target) +{ +	__be32 *targets = bond->params.arp_targets; +	struct list_head *iter; +	struct slave *slave; +	unsigned long *targets_rx; +	int ind, i; + +	if (!bond_is_ip_target_ok(target)) { +		pr_err("%s: invalid ARP target %pI4 specified for removal\n", +		       bond->dev->name, &target); +		return -EINVAL; +	} + +	ind = bond_get_targets_ip(targets, target); +	if (ind == -1) { +		pr_err("%s: unable to remove nonexistent ARP target %pI4\n", +		       bond->dev->name, &target); +		return -EINVAL; +	} + +	if (ind == 0 && !targets[1] && bond->params.arp_interval) +		pr_warn("%s: Removing last arp target with arp_interval on\n", +			bond->dev->name); + +	pr_info("%s: Removing ARP target %pI4\n", bond->dev->name, &target); + +	/* not to race with bond_arp_rcv */ +	write_lock_bh(&bond->lock); + +	bond_for_each_slave(bond, slave, iter) { +		targets_rx = slave->target_last_arp_rx; +		for (i = ind; (i < BOND_MAX_ARP_TARGETS-1) && targets[i+1]; i++) +			targets_rx[i] = targets_rx[i+1]; +		targets_rx[i] = 0; +	} +	for (i = ind; (i < BOND_MAX_ARP_TARGETS-1) && targets[i+1]; i++) +		targets[i] = targets[i+1]; +	targets[i] = 0; + +	write_unlock_bh(&bond->lock); + +	return 0; +} + +void bond_option_arp_ip_targets_clear(struct bonding *bond) +{ +	int i; + +	/* not to race with bond_arp_rcv */ +	write_lock_bh(&bond->lock); +	for (i = 0; i < BOND_MAX_ARP_TARGETS; i++) +		_bond_options_arp_ip_target_set(bond, i, 0, 0); +	write_unlock_bh(&bond->lock); +} + +static int bond_option_arp_ip_targets_set(struct bonding *bond, +					  const struct bond_opt_value *newval) +{ +	int ret = -EPERM; +	__be32 target; + +	if (newval->string) { +		if (!in4_pton(newval->string+1, -1, (u8 *)&target, -1, NULL)) { +			pr_err("%s: invalid ARP target %pI4 specified\n", +			       bond->dev->name, &target); +			return ret; +		} +		if (newval->string[0] == '+') +			ret = bond_option_arp_ip_target_add(bond, target); +		else if (newval->string[0] == '-') +			ret = bond_option_arp_ip_target_rem(bond, target); +		else +			pr_err("no command found in arp_ip_targets file for bond %s - use +<addr> or -<addr>\n", +			       bond->dev->name); +	} else { +		target = newval->value; +		ret = bond_option_arp_ip_target_add(bond, target); +	} + +	return ret; +} + +static int bond_option_arp_validate_set(struct bonding *bond, +					const struct bond_opt_value *newval) +{ +	pr_info("%s: Setting arp_validate to %s (%llu)\n", +		bond->dev->name, newval->string, newval->value); + +	if (bond->dev->flags & IFF_UP) { +		if (!newval->value) +			bond->recv_probe = NULL; +		else if (bond->params.arp_interval) +			bond->recv_probe = bond_arp_rcv; +	} +	bond->params.arp_validate = newval->value; + +	return 0; +} + +static int bond_option_arp_all_targets_set(struct bonding *bond, +					   const struct bond_opt_value *newval) +{ +	pr_info("%s: Setting arp_all_targets to %s (%llu)\n", +		bond->dev->name, newval->string, newval->value); +	bond->params.arp_all_targets = newval->value; + +	return 0; +} + +static int bond_option_primary_set(struct bonding *bond, +				   const struct bond_opt_value *newval) +{ +	char *p, *primary = newval->string; +	struct list_head *iter; +	struct slave *slave; + +	block_netpoll_tx(); +	read_lock(&bond->lock); +	write_lock_bh(&bond->curr_slave_lock); + +	p = strchr(primary, '\n'); +	if (p) +		*p = '\0'; +	/* check to see if we are clearing primary */ +	if (!strlen(primary)) { +		pr_info("%s: Setting primary slave to None\n", bond->dev->name); +		bond->primary_slave = NULL; +		memset(bond->params.primary, 0, sizeof(bond->params.primary)); +		bond_select_active_slave(bond); +		goto out; +	} + +	bond_for_each_slave(bond, slave, iter) { +		if (strncmp(slave->dev->name, primary, IFNAMSIZ) == 0) { +			pr_info("%s: Setting %s as primary slave\n", +				bond->dev->name, slave->dev->name); +			bond->primary_slave = slave; +			strcpy(bond->params.primary, slave->dev->name); +			bond_select_active_slave(bond); +			goto out; +		} +	} + +	if (bond->primary_slave) { +		pr_info("%s: Setting primary slave to None\n", bond->dev->name); +		bond->primary_slave = NULL; +		bond_select_active_slave(bond); +	} +	strncpy(bond->params.primary, primary, IFNAMSIZ); +	bond->params.primary[IFNAMSIZ - 1] = 0; + +	pr_info("%s: Recording %s as primary, but it has not been enslaved to %s yet\n", +		bond->dev->name, primary, bond->dev->name); + +out: +	write_unlock_bh(&bond->curr_slave_lock); +	read_unlock(&bond->lock); +	unblock_netpoll_tx(); + +	return 0; +} + +static int bond_option_primary_reselect_set(struct bonding *bond, +					    const struct bond_opt_value *newval) +{ +	pr_info("%s: Setting primary_reselect to %s (%llu)\n", +		bond->dev->name, newval->string, newval->value); +	bond->params.primary_reselect = newval->value; + +	block_netpoll_tx(); +	write_lock_bh(&bond->curr_slave_lock); +	bond_select_active_slave(bond); +	write_unlock_bh(&bond->curr_slave_lock); +	unblock_netpoll_tx(); + +	return 0; +} + +static int bond_option_fail_over_mac_set(struct bonding *bond, +					 const struct bond_opt_value *newval) +{ +	pr_info("%s: Setting fail_over_mac to %s (%llu)\n", +		bond->dev->name, newval->string, newval->value); +	bond->params.fail_over_mac = newval->value; + +	return 0; +} + +static int bond_option_xmit_hash_policy_set(struct bonding *bond, +					    const struct bond_opt_value *newval) +{ +	pr_info("%s: Setting xmit hash policy to %s (%llu)\n", +		bond->dev->name, newval->string, newval->value); +	bond->params.xmit_policy = newval->value; + +	return 0; +} + +static int bond_option_resend_igmp_set(struct bonding *bond, +				       const struct bond_opt_value *newval) +{ +	pr_info("%s: Setting resend_igmp to %llu\n", +		bond->dev->name, newval->value); +	bond->params.resend_igmp = newval->value; + +	return 0; +} + +static int bond_option_num_peer_notif_set(struct bonding *bond, +				   const struct bond_opt_value *newval) +{ +	bond->params.num_peer_notif = newval->value; + +	return 0; +} + +static int bond_option_all_slaves_active_set(struct bonding *bond, +					     const struct bond_opt_value *newval) +{ +	struct list_head *iter; +	struct slave *slave; + +	if (newval->value == bond->params.all_slaves_active) +		return 0; +	bond->params.all_slaves_active = newval->value; +	bond_for_each_slave(bond, slave, iter) { +		if (!bond_is_active_slave(slave)) { +			if (newval->value) +				slave->inactive = 0; +			else +				slave->inactive = 1; +		} +	} + +	return 0; +} + +static int bond_option_min_links_set(struct bonding *bond, +				     const struct bond_opt_value *newval) +{ +	pr_info("%s: Setting min links value to %llu\n", +		bond->dev->name, newval->value); +	bond->params.min_links = newval->value; + +	return 0; +} + +static int bond_option_lp_interval_set(struct bonding *bond, +				       const struct bond_opt_value *newval) +{ +	bond->params.lp_interval = newval->value; + +	return 0; +} + +static int bond_option_pps_set(struct bonding *bond, +			       const struct bond_opt_value *newval) +{ +	bond->params.packets_per_slave = newval->value; +	if (newval->value > 0) { +		bond->params.reciprocal_packets_per_slave = +			reciprocal_value(newval->value); +	} else { +		/* reciprocal_packets_per_slave is unused if +		 * packets_per_slave is 0 or 1, just initialize it +		 */ +		bond->params.reciprocal_packets_per_slave = +			(struct reciprocal_value) { 0 }; +	} + +	return 0; +} + +static int bond_option_lacp_rate_set(struct bonding *bond, +				     const struct bond_opt_value *newval) +{ +	pr_info("%s: Setting LACP rate to %s (%llu)\n", +		bond->dev->name, newval->string, newval->value); +	bond->params.lacp_fast = newval->value; +	bond_3ad_update_lacp_rate(bond); + +	return 0; +} + +static int bond_option_ad_select_set(struct bonding *bond, +				     const struct bond_opt_value *newval) +{ +	pr_info("%s: Setting ad_select to %s (%llu)\n", +		bond->dev->name, newval->string, newval->value); +	bond->params.ad_select = newval->value; + +	return 0; +} + +static int bond_option_queue_id_set(struct bonding *bond, +				    const struct bond_opt_value *newval) +{ +	struct slave *slave, *update_slave; +	struct net_device *sdev; +	struct list_head *iter; +	char *delim; +	int ret = 0; +	u16 qid; + +	/* delim will point to queue id if successful */ +	delim = strchr(newval->string, ':'); +	if (!delim) +		goto err_no_cmd; + +	/* Terminate string that points to device name and bump it +	 * up one, so we can read the queue id there. +	 */ +	*delim = '\0'; +	if (sscanf(++delim, "%hd\n", &qid) != 1) +		goto err_no_cmd; + +	/* Check buffer length, valid ifname and queue id */ +	if (!dev_valid_name(newval->string) || +	    qid > bond->dev->real_num_tx_queues) +		goto err_no_cmd; + +	/* Get the pointer to that interface if it exists */ +	sdev = __dev_get_by_name(dev_net(bond->dev), newval->string); +	if (!sdev) +		goto err_no_cmd; + +	/* Search for thes slave and check for duplicate qids */ +	update_slave = NULL; +	bond_for_each_slave(bond, slave, iter) { +		if (sdev == slave->dev) +			/* We don't need to check the matching +			 * slave for dups, since we're overwriting it +			 */ +			update_slave = slave; +		else if (qid && qid == slave->queue_id) { +			goto err_no_cmd; +		} +	} + +	if (!update_slave) +		goto err_no_cmd; + +	/* Actually set the qids for the slave */ +	update_slave->queue_id = qid; + +out: +	return ret; + +err_no_cmd: +	pr_info("invalid input for queue_id set for %s\n", bond->dev->name); +	ret = -EPERM; +	goto out; + +} + +static int bond_option_slaves_set(struct bonding *bond, +				  const struct bond_opt_value *newval) +{ +	char command[IFNAMSIZ + 1] = { 0, }; +	struct net_device *dev; +	char *ifname; +	int ret; + +	sscanf(newval->string, "%16s", command); /* IFNAMSIZ*/ +	ifname = command + 1; +	if ((strlen(command) <= 1) || +	    !dev_valid_name(ifname)) +		goto err_no_cmd; + +	dev = __dev_get_by_name(dev_net(bond->dev), ifname); +	if (!dev) { +		pr_info("%s: interface %s does not exist!\n", +			bond->dev->name, ifname); +		ret = -ENODEV; +		goto out; +	} + +	switch (command[0]) { +	case '+': +		pr_info("%s: Adding slave %s\n", bond->dev->name, dev->name); +		ret = bond_enslave(bond->dev, dev); +		break; + +	case '-': +		pr_info("%s: Removing slave %s\n", bond->dev->name, dev->name); +		ret = bond_release(bond->dev, dev); +		break; + +	default: +		goto err_no_cmd; +	} + +out: +	return ret; + +err_no_cmd: +	pr_err("no command found in slaves file for bond %s - use +ifname or -ifname\n", +	       bond->dev->name); +	ret = -EPERM; +	goto out; +} + +static int bond_option_tlb_dynamic_lb_set(struct bonding *bond, +					  const struct bond_opt_value *newval) +{ +	pr_info("%s: Setting dynamic-lb to %s (%llu)\n", +		bond->dev->name, newval->string, newval->value); +	bond->params.tlb_dynamic_lb = newval->value; + +	return 0; +} diff --git a/drivers/net/bonding/bond_options.h b/drivers/net/bonding/bond_options.h new file mode 100644 index 00000000000..17ded5b2917 --- /dev/null +++ b/drivers/net/bonding/bond_options.h @@ -0,0 +1,130 @@ +/* + * drivers/net/bond/bond_options.h - bonding options + * Copyright (c) 2013 Nikolay Aleksandrov <nikolay@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _BOND_OPTIONS_H +#define _BOND_OPTIONS_H + +#define BOND_OPT_MAX_NAMELEN 32 +#define BOND_OPT_VALID(opt) ((opt) < BOND_OPT_LAST) +#define BOND_MODE_ALL_EX(x) (~(x)) + +/* Option flags: + * BOND_OPTFLAG_NOSLAVES - check if the bond device is empty before setting + * BOND_OPTFLAG_IFDOWN - check if the bond device is down before setting + * BOND_OPTFLAG_RAWVAL - the option parses the value itself + */ +enum { +	BOND_OPTFLAG_NOSLAVES	= BIT(0), +	BOND_OPTFLAG_IFDOWN	= BIT(1), +	BOND_OPTFLAG_RAWVAL	= BIT(2) +}; + +/* Value type flags: + * BOND_VALFLAG_DEFAULT - mark the value as default + * BOND_VALFLAG_(MIN|MAX) - mark the value as min/max + */ +enum { +	BOND_VALFLAG_DEFAULT	= BIT(0), +	BOND_VALFLAG_MIN	= BIT(1), +	BOND_VALFLAG_MAX	= BIT(2) +}; + +/* Option IDs, their bit positions correspond to their IDs */ +enum { +	BOND_OPT_MODE, +	BOND_OPT_PACKETS_PER_SLAVE, +	BOND_OPT_XMIT_HASH, +	BOND_OPT_ARP_VALIDATE, +	BOND_OPT_ARP_ALL_TARGETS, +	BOND_OPT_FAIL_OVER_MAC, +	BOND_OPT_ARP_INTERVAL, +	BOND_OPT_ARP_TARGETS, +	BOND_OPT_DOWNDELAY, +	BOND_OPT_UPDELAY, +	BOND_OPT_LACP_RATE, +	BOND_OPT_MINLINKS, +	BOND_OPT_AD_SELECT, +	BOND_OPT_NUM_PEER_NOTIF, +	BOND_OPT_MIIMON, +	BOND_OPT_PRIMARY, +	BOND_OPT_PRIMARY_RESELECT, +	BOND_OPT_USE_CARRIER, +	BOND_OPT_ACTIVE_SLAVE, +	BOND_OPT_QUEUE_ID, +	BOND_OPT_ALL_SLAVES_ACTIVE, +	BOND_OPT_RESEND_IGMP, +	BOND_OPT_LP_INTERVAL, +	BOND_OPT_SLAVES, +	BOND_OPT_TLB_DYNAMIC_LB, +	BOND_OPT_LAST +}; + +/* This structure is used for storing option values and for passing option + * values when changing an option. The logic when used as an arg is as follows: + * - if string != NULL -> parse it, if the opt is RAW type then return it, else + *   return the parse result + * - if string == NULL -> parse value + */ +struct bond_opt_value { +	char *string; +	u64 value; +	u32 flags; +}; + +struct bonding; + +struct bond_option { +	int id; +	const char *name; +	const char *desc; +	u32 flags; + +	/* unsuppmodes is used to denote modes in which the option isn't +	 * supported. +	 */ +	unsigned long unsuppmodes; +	/* supported values which this option can have, can be a subset of +	 * BOND_OPTVAL_RANGE's value range +	 */ +	const struct bond_opt_value *values; + +	int (*set)(struct bonding *bond, const struct bond_opt_value *val); +}; + +int __bond_opt_set(struct bonding *bond, unsigned int option, +		   struct bond_opt_value *val); +int bond_opt_tryset_rtnl(struct bonding *bond, unsigned int option, char *buf); + +const struct bond_opt_value *bond_opt_parse(const struct bond_option *opt, +					    struct bond_opt_value *val); +const struct bond_option *bond_opt_get(unsigned int option); +const struct bond_option *bond_opt_get_by_name(const char *name); +const struct bond_opt_value *bond_opt_get_val(unsigned int option, u64 val); + +/* This helper is used to initialize a bond_opt_value structure for parameter + * passing. There should be either a valid string or value, but not both. + * When value is ULLONG_MAX then string will be used. + */ +static inline void __bond_opt_init(struct bond_opt_value *optval, +				   char *string, u64 value) +{ +	memset(optval, 0, sizeof(*optval)); +	optval->value = ULLONG_MAX; +	if (value == ULLONG_MAX) +		optval->string = string; +	else +		optval->value = value; +} +#define bond_opt_initval(optval, value) __bond_opt_init(optval, NULL, value) +#define bond_opt_initstr(optval, str) __bond_opt_init(optval, str, ULLONG_MAX) + +void bond_option_arp_ip_targets_clear(struct bonding *bond); + +#endif /* _BOND_OPTIONS_H */ diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c new file mode 100644 index 00000000000..b215b479bb3 --- /dev/null +++ b/drivers/net/bonding/bond_procfs.c @@ -0,0 +1,296 @@ +#include <linux/proc_fs.h> +#include <linux/export.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> +#include "bonding.h" + + +static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos) +	__acquires(RCU) +	__acquires(&bond->lock) +{ +	struct bonding *bond = seq->private; +	struct list_head *iter; +	struct slave *slave; +	loff_t off = 0; + +	/* make sure the bond won't be taken away */ +	rcu_read_lock(); +	read_lock(&bond->lock); + +	if (*pos == 0) +		return SEQ_START_TOKEN; + +	bond_for_each_slave(bond, slave, iter) +		if (++off == *pos) +			return slave; + +	return NULL; +} + +static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ +	struct bonding *bond = seq->private; +	struct list_head *iter; +	struct slave *slave; +	bool found = false; + +	++*pos; +	if (v == SEQ_START_TOKEN) +		return bond_first_slave(bond); + +	if (bond_is_last_slave(bond, v)) +		return NULL; + +	bond_for_each_slave(bond, slave, iter) { +		if (found) +			return slave; +		if (slave == v) +			found = true; +	} + +	return NULL; +} + +static void bond_info_seq_stop(struct seq_file *seq, void *v) +	__releases(&bond->lock) +	__releases(RCU) +{ +	struct bonding *bond = seq->private; + +	read_unlock(&bond->lock); +	rcu_read_unlock(); +} + +static void bond_info_show_master(struct seq_file *seq) +{ +	struct bonding *bond = seq->private; +	const struct bond_opt_value *optval; +	struct slave *curr; +	int i; + +	curr = rcu_dereference(bond->curr_active_slave); + +	seq_printf(seq, "Bonding Mode: %s", +		   bond_mode_name(BOND_MODE(bond))); + +	if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP && +	    bond->params.fail_over_mac) { +		optval = bond_opt_get_val(BOND_OPT_FAIL_OVER_MAC, +					  bond->params.fail_over_mac); +		seq_printf(seq, " (fail_over_mac %s)", optval->string); +	} + +	seq_printf(seq, "\n"); + +	if (BOND_MODE(bond) == BOND_MODE_XOR || +		BOND_MODE(bond) == BOND_MODE_8023AD) { +		optval = bond_opt_get_val(BOND_OPT_XMIT_HASH, +					  bond->params.xmit_policy); +		seq_printf(seq, "Transmit Hash Policy: %s (%d)\n", +			   optval->string, bond->params.xmit_policy); +	} + +	if (bond_uses_primary(bond)) { +		seq_printf(seq, "Primary Slave: %s", +			   (bond->primary_slave) ? +			   bond->primary_slave->dev->name : "None"); +		if (bond->primary_slave) { +			optval = bond_opt_get_val(BOND_OPT_PRIMARY_RESELECT, +						  bond->params.primary_reselect); +			seq_printf(seq, " (primary_reselect %s)", +				   optval->string); +		} + +		seq_printf(seq, "\nCurrently Active Slave: %s\n", +			   (curr) ? curr->dev->name : "None"); +	} + +	seq_printf(seq, "MII Status: %s\n", netif_carrier_ok(bond->dev) ? +		   "up" : "down"); +	seq_printf(seq, "MII Polling Interval (ms): %d\n", bond->params.miimon); +	seq_printf(seq, "Up Delay (ms): %d\n", +		   bond->params.updelay * bond->params.miimon); +	seq_printf(seq, "Down Delay (ms): %d\n", +		   bond->params.downdelay * bond->params.miimon); + + +	/* ARP information */ +	if (bond->params.arp_interval > 0) { +		int printed = 0; +		seq_printf(seq, "ARP Polling Interval (ms): %d\n", +				bond->params.arp_interval); + +		seq_printf(seq, "ARP IP target/s (n.n.n.n form):"); + +		for (i = 0; (i < BOND_MAX_ARP_TARGETS); i++) { +			if (!bond->params.arp_targets[i]) +				break; +			if (printed) +				seq_printf(seq, ","); +			seq_printf(seq, " %pI4", &bond->params.arp_targets[i]); +			printed = 1; +		} +		seq_printf(seq, "\n"); +	} + +	if (BOND_MODE(bond) == BOND_MODE_8023AD) { +		struct ad_info ad_info; + +		seq_puts(seq, "\n802.3ad info\n"); +		seq_printf(seq, "LACP rate: %s\n", +			   (bond->params.lacp_fast) ? "fast" : "slow"); +		seq_printf(seq, "Min links: %d\n", bond->params.min_links); +		optval = bond_opt_get_val(BOND_OPT_AD_SELECT, +					  bond->params.ad_select); +		seq_printf(seq, "Aggregator selection policy (ad_select): %s\n", +			   optval->string); + +		if (__bond_3ad_get_active_agg_info(bond, &ad_info)) { +			seq_printf(seq, "bond %s has no active aggregator\n", +				   bond->dev->name); +		} else { +			seq_printf(seq, "Active Aggregator Info:\n"); + +			seq_printf(seq, "\tAggregator ID: %d\n", +				   ad_info.aggregator_id); +			seq_printf(seq, "\tNumber of ports: %d\n", +				   ad_info.ports); +			seq_printf(seq, "\tActor Key: %d\n", +				   ad_info.actor_key); +			seq_printf(seq, "\tPartner Key: %d\n", +				   ad_info.partner_key); +			seq_printf(seq, "\tPartner Mac Address: %pM\n", +				   ad_info.partner_system); +		} +	} +} + +static void bond_info_show_slave(struct seq_file *seq, +				 const struct slave *slave) +{ +	struct bonding *bond = seq->private; + +	seq_printf(seq, "\nSlave Interface: %s\n", slave->dev->name); +	seq_printf(seq, "MII Status: %s\n", bond_slave_link_status(slave->link)); +	if (slave->speed == SPEED_UNKNOWN) +		seq_printf(seq, "Speed: %s\n", "Unknown"); +	else +		seq_printf(seq, "Speed: %d Mbps\n", slave->speed); + +	if (slave->duplex == DUPLEX_UNKNOWN) +		seq_printf(seq, "Duplex: %s\n", "Unknown"); +	else +		seq_printf(seq, "Duplex: %s\n", slave->duplex ? "full" : "half"); + +	seq_printf(seq, "Link Failure Count: %u\n", +		   slave->link_failure_count); + +	seq_printf(seq, "Permanent HW addr: %pM\n", slave->perm_hwaddr); + +	if (BOND_MODE(bond) == BOND_MODE_8023AD) { +		const struct aggregator *agg +			= SLAVE_AD_INFO(slave)->port.aggregator; + +		if (agg) +			seq_printf(seq, "Aggregator ID: %d\n", +				   agg->aggregator_identifier); +		else +			seq_puts(seq, "Aggregator ID: N/A\n"); +	} +	seq_printf(seq, "Slave queue ID: %d\n", slave->queue_id); +} + +static int bond_info_seq_show(struct seq_file *seq, void *v) +{ +	if (v == SEQ_START_TOKEN) { +		seq_printf(seq, "%s\n", bond_version); +		bond_info_show_master(seq); +	} else +		bond_info_show_slave(seq, v); + +	return 0; +} + +static const struct seq_operations bond_info_seq_ops = { +	.start = bond_info_seq_start, +	.next  = bond_info_seq_next, +	.stop  = bond_info_seq_stop, +	.show  = bond_info_seq_show, +}; + +static int bond_info_open(struct inode *inode, struct file *file) +{ +	struct seq_file *seq; +	int res; + +	res = seq_open(file, &bond_info_seq_ops); +	if (!res) { +		/* recover the pointer buried in proc_dir_entry data */ +		seq = file->private_data; +		seq->private = PDE_DATA(inode); +	} + +	return res; +} + +static const struct file_operations bond_info_fops = { +	.owner   = THIS_MODULE, +	.open    = bond_info_open, +	.read    = seq_read, +	.llseek  = seq_lseek, +	.release = seq_release, +}; + +void bond_create_proc_entry(struct bonding *bond) +{ +	struct net_device *bond_dev = bond->dev; +	struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id); + +	if (bn->proc_dir) { +		bond->proc_entry = proc_create_data(bond_dev->name, +						    S_IRUGO, bn->proc_dir, +						    &bond_info_fops, bond); +		if (bond->proc_entry == NULL) +			pr_warn("Warning: Cannot create /proc/net/%s/%s\n", +				DRV_NAME, bond_dev->name); +		else +			memcpy(bond->proc_file_name, bond_dev->name, IFNAMSIZ); +	} +} + +void bond_remove_proc_entry(struct bonding *bond) +{ +	struct net_device *bond_dev = bond->dev; +	struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id); + +	if (bn->proc_dir && bond->proc_entry) { +		remove_proc_entry(bond->proc_file_name, bn->proc_dir); +		memset(bond->proc_file_name, 0, IFNAMSIZ); +		bond->proc_entry = NULL; +	} +} + +/* Create the bonding directory under /proc/net, if doesn't exist yet. + * Caller must hold rtnl_lock. + */ +void __net_init bond_create_proc_dir(struct bond_net *bn) +{ +	if (!bn->proc_dir) { +		bn->proc_dir = proc_mkdir(DRV_NAME, bn->net->proc_net); +		if (!bn->proc_dir) +			pr_warn("Warning: Cannot create /proc/net/%s\n", +				DRV_NAME); +	} +} + +/* Destroy the bonding directory under /proc/net, if empty. + * Caller must hold rtnl_lock. + */ +void __net_exit bond_destroy_proc_dir(struct bond_net *bn) +{ +	if (bn->proc_dir) { +		remove_proc_entry(DRV_NAME, bn->net->proc_net); +		bn->proc_dir = NULL; +	} +} diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 8fd0174c538..daed52f68ce 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -12,8 +12,7 @@   * for more details.   *   * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA. + * with this program; if not, see <http://www.gnu.org/licenses/>.   *   * The full GNU General Public License is included in this distribution in the   * file called LICENSE. @@ -26,7 +25,6 @@  #include <linux/module.h>  #include <linux/device.h>  #include <linux/sched.h> -#include <linux/sysdev.h>  #include <linux/fs.h>  #include <linux/types.h>  #include <linux/string.h> @@ -47,16 +45,15 @@  #define to_dev(obj)	container_of(obj, struct device, kobj)  #define to_bond(cd)	((struct bonding *)(netdev_priv(to_net_dev(cd)))) -/* - * "show" function for the bond_masters attribute. +/* "show" function for the bond_masters attribute.   * The class parameter is ignored.   */  static ssize_t bonding_show_bonds(struct class *cls,  				  struct class_attribute *attr,  				  char *buf)  { -	struct net *net = current->nsproxy->net_ns; -	struct bond_net *bn = net_generic(net, bond_net_id); +	struct bond_net *bn = +		container_of(attr, struct bond_net, class_attr_bonding_masters);  	int res = 0;  	struct bonding *bond; @@ -79,9 +76,8 @@ static ssize_t bonding_show_bonds(struct class *cls,  	return res;  } -static struct net_device *bond_get_by_name(struct net *net, const char *ifname) +static struct net_device *bond_get_by_name(struct bond_net *bn, const char *ifname)  { -	struct bond_net *bn = net_generic(net, bond_net_id);  	struct bonding *bond;  	list_for_each_entry(bond, &bn->dev_list, bond_list) { @@ -91,19 +87,18 @@ static struct net_device *bond_get_by_name(struct net *net, const char *ifname)  	return NULL;  } -/* - * "store" function for the bond_masters attribute.  This is what +/* "store" function for the bond_masters attribute.  This is what   * creates and deletes entire bonds.   *   * The class parameter is ignored.   *   */ -  static ssize_t bonding_store_bonds(struct class *cls,  				   struct class_attribute *attr,  				   const char *buffer, size_t count)  { -	struct net *net = current->nsproxy->net_ns; +	struct bond_net *bn = +		container_of(attr, struct bond_net, class_attr_bonding_masters);  	char command[IFNAMSIZ + 1] = {0, };  	char *ifname;  	int rv, res = count; @@ -116,16 +111,19 @@ static ssize_t bonding_store_bonds(struct class *cls,  	if (command[0] == '+') {  		pr_info("%s is being created...\n", ifname); -		rv = bond_create(net, ifname); +		rv = bond_create(bn->net, ifname);  		if (rv) { -			pr_info("Bond creation failed.\n"); +			if (rv == -EEXIST) +				pr_info("%s already exists\n", ifname); +			else +				pr_info("%s creation failed\n", ifname);  			res = rv;  		}  	} else if (command[0] == '-') {  		struct net_device *bond_dev;  		rtnl_lock(); -		bond_dev = bond_get_by_name(net, ifname); +		bond_dev = bond_get_by_name(bn, ifname);  		if (bond_dev) {  			pr_info("%s is being deleted...\n", ifname);  			unregister_netdevice(bond_dev); @@ -143,56 +141,52 @@ static ssize_t bonding_store_bonds(struct class *cls,  	return res;  err_no_cmd: -	pr_err("no command found in bonding_masters. Use +ifname or -ifname.\n"); +	pr_err("no command found in bonding_masters - use +ifname or -ifname\n");  	return -EPERM;  }  /* class attribute for bond_masters file.  This ends up in /sys/class/net */ -static CLASS_ATTR(bonding_masters,  S_IWUSR | S_IRUGO, -		  bonding_show_bonds, bonding_store_bonds); +static const struct class_attribute class_attr_bonding_masters = { +	.attr = { +		.name = "bonding_masters", +		.mode = S_IWUSR | S_IRUGO, +	}, +	.show = bonding_show_bonds, +	.store = bonding_store_bonds, +}; -int bond_create_slave_symlinks(struct net_device *master, -			       struct net_device *slave) +/* Generic "store" method for bonding sysfs option setting */ +static ssize_t bonding_sysfs_store_option(struct device *d, +					  struct device_attribute *attr, +					  const char *buffer, size_t count)  { -	char linkname[IFNAMSIZ+7]; -	int ret = 0; - -	/* first, create a link from the slave back to the master */ -	ret = sysfs_create_link(&(slave->dev.kobj), &(master->dev.kobj), -				"master"); -	if (ret) -		return ret; -	/* next, create a link from the master to the slave */ -	sprintf(linkname, "slave_%s", slave->name); -	ret = sysfs_create_link(&(master->dev.kobj), &(slave->dev.kobj), -				linkname); -	return ret; +	struct bonding *bond = to_bond(d); +	const struct bond_option *opt; +	int ret; -} +	opt = bond_opt_get_by_name(attr->attr.name); +	if (WARN_ON(!opt)) +		return -ENOENT; +	ret = bond_opt_tryset_rtnl(bond, opt->id, (char *)buffer); +	if (!ret) +		ret = count; -void bond_destroy_slave_symlinks(struct net_device *master, -				 struct net_device *slave) -{ -	char linkname[IFNAMSIZ+7]; - -	sysfs_remove_link(&(slave->dev.kobj), "master"); -	sprintf(linkname, "slave_%s", slave->name); -	sysfs_remove_link(&(master->dev.kobj), linkname); +	return ret;  } - -/* - * Show the slaves in the current bond. - */ +/* Show the slaves in the current bond. */  static ssize_t bonding_show_slaves(struct device *d,  				   struct device_attribute *attr, char *buf)  { -	struct slave *slave; -	int i, res = 0;  	struct bonding *bond = to_bond(d); +	struct list_head *iter; +	struct slave *slave; +	int res = 0; -	read_lock(&bond->lock); -	bond_for_each_slave(bond, slave, i) { +	if (!rtnl_trylock()) +		return restart_syscall(); + +	bond_for_each_slave(bond, slave, iter) {  		if (res > (PAGE_SIZE - IFNAMSIZ)) {  			/* not enough space for another interface name */  			if ((PAGE_SIZE - res) > 10) @@ -202,293 +196,95 @@ static ssize_t bonding_show_slaves(struct device *d,  		}  		res += sprintf(buf + res, "%s ", slave->dev->name);  	} -	read_unlock(&bond->lock); -	if (res) -		buf[res-1] = '\n'; /* eat the leftover space */ -	return res; -} -/* - * Set the slaves in the current bond.  The bond interface must be - * up for this to succeed. - * This is supposed to be only thin wrapper for bond_enslave and bond_release. - * All hard work should be done there. - */ -static ssize_t bonding_store_slaves(struct device *d, -				    struct device_attribute *attr, -				    const char *buffer, size_t count) -{ -	char command[IFNAMSIZ + 1] = { 0, }; -	char *ifname; -	int res, ret = count; -	struct net_device *dev; -	struct bonding *bond = to_bond(d); - -	/* Quick sanity check -- is the bond interface up? */ -	if (!(bond->dev->flags & IFF_UP)) { -		pr_warning("%s: doing slave updates when interface is down.\n", -			   bond->dev->name); -	} - -	if (!rtnl_trylock()) -		return restart_syscall(); - -	sscanf(buffer, "%16s", command); /* IFNAMSIZ*/ -	ifname = command + 1; -	if ((strlen(command) <= 1) || -	    !dev_valid_name(ifname)) -		goto err_no_cmd; - -	dev = __dev_get_by_name(dev_net(bond->dev), ifname); -	if (!dev) { -		pr_info("%s: Interface %s does not exist!\n", -			bond->dev->name, ifname); -		ret = -ENODEV; -		goto out; -	} - -	switch (command[0]) { -	case '+': -		pr_info("%s: Adding slave %s.\n", bond->dev->name, dev->name); -		res = bond_enslave(bond->dev, dev); -		break; - -	case '-': -		pr_info("%s: Removing slave %s.\n", bond->dev->name, dev->name); -		res = bond_release(bond->dev, dev); -		break; - -	default: -		goto err_no_cmd; -	} +	rtnl_unlock();  	if (res) -		ret = res; -	goto out; - -err_no_cmd: -	pr_err("no command found in slaves file for bond %s. Use +ifname or -ifname.\n", -	       bond->dev->name); -	ret = -EPERM; +		buf[res-1] = '\n'; /* eat the leftover space */ -out: -	rtnl_unlock(); -	return ret; +	return res;  } -  static DEVICE_ATTR(slaves, S_IRUGO | S_IWUSR, bonding_show_slaves, -		   bonding_store_slaves); +		   bonding_sysfs_store_option); -/* - * Show and set the bonding mode.  The bond interface must be down to - * change the mode. - */ +/* Show the bonding mode. */  static ssize_t bonding_show_mode(struct device *d,  				 struct device_attribute *attr, char *buf)  {  	struct bonding *bond = to_bond(d); +	const struct bond_opt_value *val; -	return sprintf(buf, "%s %d\n", -			bond_mode_tbl[bond->params.mode].modename, -			bond->params.mode); -} - -static ssize_t bonding_store_mode(struct device *d, -				  struct device_attribute *attr, -				  const char *buf, size_t count) -{ -	int new_value, ret = count; -	struct bonding *bond = to_bond(d); - -	if (bond->dev->flags & IFF_UP) { -		pr_err("unable to update mode of %s because interface is up.\n", -		       bond->dev->name); -		ret = -EPERM; -		goto out; -	} +	val = bond_opt_get_val(BOND_OPT_MODE, BOND_MODE(bond)); -	new_value = bond_parse_parm(buf, bond_mode_tbl); -	if (new_value < 0)  { -		pr_err("%s: Ignoring invalid mode value %.*s.\n", -		       bond->dev->name, (int)strlen(buf) - 1, buf); -		ret = -EINVAL; -		goto out; -	} -	if ((new_value == BOND_MODE_ALB || -	     new_value == BOND_MODE_TLB) && -	    bond->params.arp_interval) { -		pr_err("%s: %s mode is incompatible with arp monitoring.\n", -		       bond->dev->name, bond_mode_tbl[new_value].modename); -		ret = -EINVAL; -		goto out; -	} -	if (bond->params.mode == BOND_MODE_8023AD) -		bond_unset_master_3ad_flags(bond); - -	if (bond->params.mode == BOND_MODE_ALB) -		bond_unset_master_alb_flags(bond); - -	bond->params.mode = new_value; -	bond_set_mode_ops(bond, bond->params.mode); -	pr_info("%s: setting mode to %s (%d).\n", -		bond->dev->name, bond_mode_tbl[new_value].modename, -		new_value); -out: -	return ret; +	return sprintf(buf, "%s %d\n", val->string, BOND_MODE(bond));  }  static DEVICE_ATTR(mode, S_IRUGO | S_IWUSR, -		   bonding_show_mode, bonding_store_mode); +		   bonding_show_mode, bonding_sysfs_store_option); -/* - * Show and set the bonding transmit hash method. - * The bond interface must be down to change the xmit hash policy. - */ +/* Show the bonding transmit hash method. */  static ssize_t bonding_show_xmit_hash(struct device *d,  				      struct device_attribute *attr,  				      char *buf)  {  	struct bonding *bond = to_bond(d); +	const struct bond_opt_value *val; -	return sprintf(buf, "%s %d\n", -		       xmit_hashtype_tbl[bond->params.xmit_policy].modename, -		       bond->params.xmit_policy); -} - -static ssize_t bonding_store_xmit_hash(struct device *d, -				       struct device_attribute *attr, -				       const char *buf, size_t count) -{ -	int new_value, ret = count; -	struct bonding *bond = to_bond(d); - -	if (bond->dev->flags & IFF_UP) { -		pr_err("%s: Interface is up. Unable to update xmit policy.\n", -		       bond->dev->name); -		ret = -EPERM; -		goto out; -	} +	val = bond_opt_get_val(BOND_OPT_XMIT_HASH, bond->params.xmit_policy); -	new_value = bond_parse_parm(buf, xmit_hashtype_tbl); -	if (new_value < 0)  { -		pr_err("%s: Ignoring invalid xmit hash policy value %.*s.\n", -		       bond->dev->name, -		       (int)strlen(buf) - 1, buf); -		ret = -EINVAL; -		goto out; -	} else { -		bond->params.xmit_policy = new_value; -		bond_set_mode_ops(bond, bond->params.mode); -		pr_info("%s: setting xmit hash policy to %s (%d).\n", -			bond->dev->name, -			xmit_hashtype_tbl[new_value].modename, new_value); -	} -out: -	return ret; +	return sprintf(buf, "%s %d\n", val->string, bond->params.xmit_policy);  }  static DEVICE_ATTR(xmit_hash_policy, S_IRUGO | S_IWUSR, -		   bonding_show_xmit_hash, bonding_store_xmit_hash); +		   bonding_show_xmit_hash, bonding_sysfs_store_option); -/* - * Show and set arp_validate. - */ +/* Show arp_validate. */  static ssize_t bonding_show_arp_validate(struct device *d,  					 struct device_attribute *attr,  					 char *buf)  {  	struct bonding *bond = to_bond(d); +	const struct bond_opt_value *val; -	return sprintf(buf, "%s %d\n", -		       arp_validate_tbl[bond->params.arp_validate].modename, -		       bond->params.arp_validate); -} - -static ssize_t bonding_store_arp_validate(struct device *d, -					  struct device_attribute *attr, -					  const char *buf, size_t count) -{ -	int new_value; -	struct bonding *bond = to_bond(d); - -	new_value = bond_parse_parm(buf, arp_validate_tbl); -	if (new_value < 0) { -		pr_err("%s: Ignoring invalid arp_validate value %s\n", -		       bond->dev->name, buf); -		return -EINVAL; -	} -	if (new_value && (bond->params.mode != BOND_MODE_ACTIVEBACKUP)) { -		pr_err("%s: arp_validate only supported in active-backup mode.\n", -		       bond->dev->name); -		return -EINVAL; -	} -	pr_info("%s: setting arp_validate to %s (%d).\n", -		bond->dev->name, arp_validate_tbl[new_value].modename, -		new_value); - -	if (!bond->params.arp_validate && new_value) -		bond_register_arp(bond); -	else if (bond->params.arp_validate && !new_value) -		bond_unregister_arp(bond); - -	bond->params.arp_validate = new_value; +	val = bond_opt_get_val(BOND_OPT_ARP_VALIDATE, +			       bond->params.arp_validate); -	return count; +	return sprintf(buf, "%s %d\n", val->string, bond->params.arp_validate);  } -  static DEVICE_ATTR(arp_validate, S_IRUGO | S_IWUSR, bonding_show_arp_validate, -		   bonding_store_arp_validate); +		   bonding_sysfs_store_option); -/* - * Show and store fail_over_mac.  User only allowed to change the - * value when there are no slaves. - */ -static ssize_t bonding_show_fail_over_mac(struct device *d, -					  struct device_attribute *attr, -					  char *buf) +/* Show arp_all_targets. */ +static ssize_t bonding_show_arp_all_targets(struct device *d, +					 struct device_attribute *attr, +					 char *buf)  {  	struct bonding *bond = to_bond(d); +	const struct bond_opt_value *val; +	val = bond_opt_get_val(BOND_OPT_ARP_ALL_TARGETS, +			       bond->params.arp_all_targets);  	return sprintf(buf, "%s %d\n", -		       fail_over_mac_tbl[bond->params.fail_over_mac].modename, -		       bond->params.fail_over_mac); +		       val->string, bond->params.arp_all_targets);  } +static DEVICE_ATTR(arp_all_targets, S_IRUGO | S_IWUSR, +		   bonding_show_arp_all_targets, bonding_sysfs_store_option); -static ssize_t bonding_store_fail_over_mac(struct device *d, -					   struct device_attribute *attr, -					   const char *buf, size_t count) +/* Show fail_over_mac. */ +static ssize_t bonding_show_fail_over_mac(struct device *d, +					  struct device_attribute *attr, +					  char *buf)  { -	int new_value;  	struct bonding *bond = to_bond(d); +	const struct bond_opt_value *val; -	if (bond->slave_cnt != 0) { -		pr_err("%s: Can't alter fail_over_mac with slaves in bond.\n", -		       bond->dev->name); -		return -EPERM; -	} - -	new_value = bond_parse_parm(buf, fail_over_mac_tbl); -	if (new_value < 0) { -		pr_err("%s: Ignoring invalid fail_over_mac value %s.\n", -		       bond->dev->name, buf); -		return -EINVAL; -	} - -	bond->params.fail_over_mac = new_value; -	pr_info("%s: Setting fail_over_mac to %s (%d).\n", -		bond->dev->name, fail_over_mac_tbl[new_value].modename, -		new_value); +	val = bond_opt_get_val(BOND_OPT_FAIL_OVER_MAC, +			       bond->params.fail_over_mac); -	return count; +	return sprintf(buf, "%s %d\n", val->string, bond->params.fail_over_mac);  } -  static DEVICE_ATTR(fail_over_mac, S_IRUGO | S_IWUSR, -		   bonding_show_fail_over_mac, bonding_store_fail_over_mac); +		   bonding_show_fail_over_mac, bonding_sysfs_store_option); -/* - * Show and set the arp timer interval.  There are two tricky bits - * here.  First, if ARP monitoring is activated, then we must disable - * MII monitoring.  Second, if the ARP timer isn't running, we must - * start it. - */ +/* Show the arp timer interval. */  static ssize_t bonding_show_arp_interval(struct device *d,  					 struct device_attribute *attr,  					 char *buf) @@ -497,84 +293,16 @@ static ssize_t bonding_show_arp_interval(struct device *d,  	return sprintf(buf, "%d\n", bond->params.arp_interval);  } - -static ssize_t bonding_store_arp_interval(struct device *d, -					  struct device_attribute *attr, -					  const char *buf, size_t count) -{ -	int new_value, ret = count; -	struct bonding *bond = to_bond(d); - -	if (sscanf(buf, "%d", &new_value) != 1) { -		pr_err("%s: no arp_interval value specified.\n", -		       bond->dev->name); -		ret = -EINVAL; -		goto out; -	} -	if (new_value < 0) { -		pr_err("%s: Invalid arp_interval value %d not in range 1-%d; rejected.\n", -		       bond->dev->name, new_value, INT_MAX); -		ret = -EINVAL; -		goto out; -	} -	if (bond->params.mode == BOND_MODE_ALB || -	    bond->params.mode == BOND_MODE_TLB) { -		pr_info("%s: ARP monitoring cannot be used with ALB/TLB. Only MII monitoring is supported on %s.\n", -			bond->dev->name, bond->dev->name); -		ret = -EINVAL; -		goto out; -	} -	pr_info("%s: Setting ARP monitoring interval to %d.\n", -		bond->dev->name, new_value); -	bond->params.arp_interval = new_value; -	if (bond->params.arp_interval) -		bond->dev->priv_flags |= IFF_MASTER_ARPMON; -	if (bond->params.miimon) { -		pr_info("%s: ARP monitoring cannot be used with MII monitoring. %s Disabling MII monitoring.\n", -			bond->dev->name, bond->dev->name); -		bond->params.miimon = 0; -		if (delayed_work_pending(&bond->mii_work)) { -			cancel_delayed_work(&bond->mii_work); -			flush_workqueue(bond->wq); -		} -	} -	if (!bond->params.arp_targets[0]) { -		pr_info("%s: ARP monitoring has been set up, but no ARP targets have been specified.\n", -			bond->dev->name); -	} -	if (bond->dev->flags & IFF_UP) { -		/* If the interface is up, we may need to fire off -		 * the ARP timer.  If the interface is down, the -		 * timer will get fired off when the open function -		 * is called. -		 */ -		if (!delayed_work_pending(&bond->arp_work)) { -			if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) -				INIT_DELAYED_WORK(&bond->arp_work, -						  bond_activebackup_arp_mon); -			else -				INIT_DELAYED_WORK(&bond->arp_work, -						  bond_loadbalance_arp_mon); - -			queue_delayed_work(bond->wq, &bond->arp_work, 0); -		} -	} - -out: -	return ret; -}  static DEVICE_ATTR(arp_interval, S_IRUGO | S_IWUSR, -		   bonding_show_arp_interval, bonding_store_arp_interval); +		   bonding_show_arp_interval, bonding_sysfs_store_option); -/* - * Show and set the arp targets. - */ +/* Show the arp targets. */  static ssize_t bonding_show_arp_targets(struct device *d,  					struct device_attribute *attr,  					char *buf)  { -	int i, res = 0;  	struct bonding *bond = to_bond(d); +	int i, res = 0;  	for (i = 0; i < BOND_MAX_ARP_TARGETS; i++) {  		if (bond->params.arp_targets[i]) @@ -583,93 +311,13 @@ static ssize_t bonding_show_arp_targets(struct device *d,  	}  	if (res)  		buf[res-1] = '\n'; /* eat the leftover space */ -	return res; -} - -static ssize_t bonding_store_arp_targets(struct device *d, -					 struct device_attribute *attr, -					 const char *buf, size_t count) -{ -	__be32 newtarget; -	int i = 0, done = 0, ret = count; -	struct bonding *bond = to_bond(d); -	__be32 *targets; - -	targets = bond->params.arp_targets; -	newtarget = in_aton(buf + 1); -	/* look for adds */ -	if (buf[0] == '+') { -		if ((newtarget == 0) || (newtarget == htonl(INADDR_BROADCAST))) { -			pr_err("%s: invalid ARP target %pI4 specified for addition\n", -			       bond->dev->name, &newtarget); -			ret = -EINVAL; -			goto out; -		} -		/* look for an empty slot to put the target in, and check for dupes */ -		for (i = 0; (i < BOND_MAX_ARP_TARGETS) && !done; i++) { -			if (targets[i] == newtarget) { /* duplicate */ -				pr_err("%s: ARP target %pI4 is already present\n", -				       bond->dev->name, &newtarget); -				ret = -EINVAL; -				goto out; -			} -			if (targets[i] == 0) { -				pr_info("%s: adding ARP target %pI4.\n", -					bond->dev->name, &newtarget); -				done = 1; -				targets[i] = newtarget; -			} -		} -		if (!done) { -			pr_err("%s: ARP target table is full!\n", -			       bond->dev->name); -			ret = -EINVAL; -			goto out; -		} -	} else if (buf[0] == '-')	{ -		if ((newtarget == 0) || (newtarget == htonl(INADDR_BROADCAST))) { -			pr_err("%s: invalid ARP target %pI4 specified for removal\n", -			       bond->dev->name, &newtarget); -			ret = -EINVAL; -			goto out; -		} - -		for (i = 0; (i < BOND_MAX_ARP_TARGETS) && !done; i++) { -			if (targets[i] == newtarget) { -				int j; -				pr_info("%s: removing ARP target %pI4.\n", -					bond->dev->name, &newtarget); -				for (j = i; (j < (BOND_MAX_ARP_TARGETS-1)) && targets[j+1]; j++) -					targets[j] = targets[j+1]; - -				targets[j] = 0; -				done = 1; -			} -		} -		if (!done) { -			pr_info("%s: unable to remove nonexistent ARP target %pI4.\n", -				bond->dev->name, &newtarget); -			ret = -EINVAL; -			goto out; -		} -	} else { -		pr_err("no command found in arp_ip_targets file for bond %s. Use +<addr> or -<addr>.\n", -		       bond->dev->name); -		ret = -EPERM; -		goto out; -	} - -out: -	return ret; +	return res;  } -static DEVICE_ATTR(arp_ip_target, S_IRUGO | S_IWUSR , bonding_show_arp_targets, bonding_store_arp_targets); +static DEVICE_ATTR(arp_ip_target, S_IRUGO | S_IWUSR, +		   bonding_show_arp_targets, bonding_sysfs_store_option); -/* - * Show and set the up and down delays.  These must be multiples of the - * MII monitoring value, and are stored internally as the multiplier. - * Thus, we must translate to MS for the real world. - */ +/* Show the up and down delays. */  static ssize_t bonding_show_downdelay(struct device *d,  				      struct device_attribute *attr,  				      char *buf) @@ -678,51 +326,8 @@ static ssize_t bonding_show_downdelay(struct device *d,  	return sprintf(buf, "%d\n", bond->params.downdelay * bond->params.miimon);  } - -static ssize_t bonding_store_downdelay(struct device *d, -				       struct device_attribute *attr, -				       const char *buf, size_t count) -{ -	int new_value, ret = count; -	struct bonding *bond = to_bond(d); - -	if (!(bond->params.miimon)) { -		pr_err("%s: Unable to set down delay as MII monitoring is disabled\n", -		       bond->dev->name); -		ret = -EPERM; -		goto out; -	} - -	if (sscanf(buf, "%d", &new_value) != 1) { -		pr_err("%s: no down delay value specified.\n", bond->dev->name); -		ret = -EINVAL; -		goto out; -	} -	if (new_value < 0) { -		pr_err("%s: Invalid down delay value %d not in range %d-%d; rejected.\n", -		       bond->dev->name, new_value, 1, INT_MAX); -		ret = -EINVAL; -		goto out; -	} else { -		if ((new_value % bond->params.miimon) != 0) { -			pr_warning("%s: Warning: down delay (%d) is not a multiple of miimon (%d), delay rounded to %d ms\n", -				   bond->dev->name, new_value, -				   bond->params.miimon, -				   (new_value / bond->params.miimon) * -				   bond->params.miimon); -		} -		bond->params.downdelay = new_value / bond->params.miimon; -		pr_info("%s: Setting down delay to %d.\n", -			bond->dev->name, -			bond->params.downdelay * bond->params.miimon); - -	} - -out: -	return ret; -}  static DEVICE_ATTR(downdelay, S_IRUGO | S_IWUSR, -		   bonding_show_downdelay, bonding_store_downdelay); +		   bonding_show_downdelay, bonding_sysfs_store_option);  static ssize_t bonding_show_updelay(struct device *d,  				    struct device_attribute *attr, @@ -733,234 +338,77 @@ static ssize_t bonding_show_updelay(struct device *d,  	return sprintf(buf, "%d\n", bond->params.updelay * bond->params.miimon);  } - -static ssize_t bonding_store_updelay(struct device *d, -				     struct device_attribute *attr, -				     const char *buf, size_t count) -{ -	int new_value, ret = count; -	struct bonding *bond = to_bond(d); - -	if (!(bond->params.miimon)) { -		pr_err("%s: Unable to set up delay as MII monitoring is disabled\n", -		       bond->dev->name); -		ret = -EPERM; -		goto out; -	} - -	if (sscanf(buf, "%d", &new_value) != 1) { -		pr_err("%s: no up delay value specified.\n", -		       bond->dev->name); -		ret = -EINVAL; -		goto out; -	} -	if (new_value < 0) { -		pr_err("%s: Invalid down delay value %d not in range %d-%d; rejected.\n", -		       bond->dev->name, new_value, 1, INT_MAX); -		ret = -EINVAL; -		goto out; -	} else { -		if ((new_value % bond->params.miimon) != 0) { -			pr_warning("%s: Warning: up delay (%d) is not a multiple of miimon (%d), updelay rounded to %d ms\n", -				   bond->dev->name, new_value, -				   bond->params.miimon, -				   (new_value / bond->params.miimon) * -				   bond->params.miimon); -		} -		bond->params.updelay = new_value / bond->params.miimon; -		pr_info("%s: Setting up delay to %d.\n", -			bond->dev->name, -			bond->params.updelay * bond->params.miimon); -	} - -out: -	return ret; -}  static DEVICE_ATTR(updelay, S_IRUGO | S_IWUSR, -		   bonding_show_updelay, bonding_store_updelay); +		   bonding_show_updelay, bonding_sysfs_store_option); -/* - * Show and set the LACP interval.  Interface must be down, and the mode - * must be set to 802.3ad mode. - */ +/* Show the LACP interval. */  static ssize_t bonding_show_lacp(struct device *d,  				 struct device_attribute *attr,  				 char *buf)  {  	struct bonding *bond = to_bond(d); +	const struct bond_opt_value *val; -	return sprintf(buf, "%s %d\n", -		bond_lacp_tbl[bond->params.lacp_fast].modename, -		bond->params.lacp_fast); -} +	val = bond_opt_get_val(BOND_OPT_LACP_RATE, bond->params.lacp_fast); -static ssize_t bonding_store_lacp(struct device *d, -				  struct device_attribute *attr, -				  const char *buf, size_t count) -{ -	int new_value, ret = count; -	struct bonding *bond = to_bond(d); - -	if (bond->dev->flags & IFF_UP) { -		pr_err("%s: Unable to update LACP rate because interface is up.\n", -		       bond->dev->name); -		ret = -EPERM; -		goto out; -	} - -	if (bond->params.mode != BOND_MODE_8023AD) { -		pr_err("%s: Unable to update LACP rate because bond is not in 802.3ad mode.\n", -		       bond->dev->name); -		ret = -EPERM; -		goto out; -	} - -	new_value = bond_parse_parm(buf, bond_lacp_tbl); - -	if ((new_value == 1) || (new_value == 0)) { -		bond->params.lacp_fast = new_value; -		pr_info("%s: Setting LACP rate to %s (%d).\n", -			bond->dev->name, bond_lacp_tbl[new_value].modename, -			new_value); -	} else { -		pr_err("%s: Ignoring invalid LACP rate value %.*s.\n", -		       bond->dev->name, (int)strlen(buf) - 1, buf); -		ret = -EINVAL; -	} -out: -	return ret; +	return sprintf(buf, "%s %d\n", val->string, bond->params.lacp_fast);  }  static DEVICE_ATTR(lacp_rate, S_IRUGO | S_IWUSR, -		   bonding_show_lacp, bonding_store_lacp); +		   bonding_show_lacp, bonding_sysfs_store_option); -static ssize_t bonding_show_ad_select(struct device *d, +static ssize_t bonding_show_min_links(struct device *d,  				      struct device_attribute *attr,  				      char *buf)  {  	struct bonding *bond = to_bond(d); -	return sprintf(buf, "%s %d\n", -		ad_select_tbl[bond->params.ad_select].modename, -		bond->params.ad_select); +	return sprintf(buf, "%u\n", bond->params.min_links);  } +static DEVICE_ATTR(min_links, S_IRUGO | S_IWUSR, +		   bonding_show_min_links, bonding_sysfs_store_option); - -static ssize_t bonding_store_ad_select(struct device *d, -				       struct device_attribute *attr, -				       const char *buf, size_t count) +static ssize_t bonding_show_ad_select(struct device *d, +				      struct device_attribute *attr, +				      char *buf)  { -	int new_value, ret = count;  	struct bonding *bond = to_bond(d); +	const struct bond_opt_value *val; -	if (bond->dev->flags & IFF_UP) { -		pr_err("%s: Unable to update ad_select because interface is up.\n", -		       bond->dev->name); -		ret = -EPERM; -		goto out; -	} +	val = bond_opt_get_val(BOND_OPT_AD_SELECT, bond->params.ad_select); -	new_value = bond_parse_parm(buf, ad_select_tbl); - -	if (new_value != -1) { -		bond->params.ad_select = new_value; -		pr_info("%s: Setting ad_select to %s (%d).\n", -			bond->dev->name, ad_select_tbl[new_value].modename, -			new_value); -	} else { -		pr_err("%s: Ignoring invalid ad_select value %.*s.\n", -		       bond->dev->name, (int)strlen(buf) - 1, buf); -		ret = -EINVAL; -	} -out: -	return ret; +	return sprintf(buf, "%s %d\n", val->string, bond->params.ad_select);  }  static DEVICE_ATTR(ad_select, S_IRUGO | S_IWUSR, -		   bonding_show_ad_select, bonding_store_ad_select); - -/* - * Show and set the number of grat ARP to send after a failover event. - */ -static ssize_t bonding_show_n_grat_arp(struct device *d, -				   struct device_attribute *attr, -				   char *buf) -{ -	struct bonding *bond = to_bond(d); - -	return sprintf(buf, "%d\n", bond->params.num_grat_arp); -} +		   bonding_show_ad_select, bonding_sysfs_store_option); -static ssize_t bonding_store_n_grat_arp(struct device *d, -				    struct device_attribute *attr, -				    const char *buf, size_t count) -{ -	int new_value, ret = count; -	struct bonding *bond = to_bond(d); - -	if (sscanf(buf, "%d", &new_value) != 1) { -		pr_err("%s: no num_grat_arp value specified.\n", -		       bond->dev->name); -		ret = -EINVAL; -		goto out; -	} -	if (new_value < 0 || new_value > 255) { -		pr_err("%s: Invalid num_grat_arp value %d not in range 0-255; rejected.\n", -		       bond->dev->name, new_value); -		ret = -EINVAL; -		goto out; -	} else { -		bond->params.num_grat_arp = new_value; -	} -out: -	return ret; -} -static DEVICE_ATTR(num_grat_arp, S_IRUGO | S_IWUSR, -		   bonding_show_n_grat_arp, bonding_store_n_grat_arp); - -/* - * Show and set the number of unsolicited NA's to send after a failover event. - */ -static ssize_t bonding_show_n_unsol_na(struct device *d, -				       struct device_attribute *attr, -				       char *buf) +/* Show and set the number of peer notifications to send after a failover event. */ +static ssize_t bonding_show_num_peer_notif(struct device *d, +					   struct device_attribute *attr, +					   char *buf)  {  	struct bonding *bond = to_bond(d); - -	return sprintf(buf, "%d\n", bond->params.num_unsol_na); +	return sprintf(buf, "%d\n", bond->params.num_peer_notif);  } -static ssize_t bonding_store_n_unsol_na(struct device *d, -					struct device_attribute *attr, -					const char *buf, size_t count) +static ssize_t bonding_store_num_peer_notif(struct device *d, +					    struct device_attribute *attr, +					    const char *buf, size_t count)  { -	int new_value, ret = count;  	struct bonding *bond = to_bond(d); +	int ret; -	if (sscanf(buf, "%d", &new_value) != 1) { -		pr_err("%s: no num_unsol_na value specified.\n", -		       bond->dev->name); -		ret = -EINVAL; -		goto out; -	} +	ret = bond_opt_tryset_rtnl(bond, BOND_OPT_NUM_PEER_NOTIF, (char *)buf); +	if (!ret) +		ret = count; -	if (new_value < 0 || new_value > 255) { -		pr_err("%s: Invalid num_unsol_na value %d not in range 0-255; rejected.\n", -		       bond->dev->name, new_value); -		ret = -EINVAL; -		goto out; -	} else -		bond->params.num_unsol_na = new_value; -out:  	return ret;  } +static DEVICE_ATTR(num_grat_arp, S_IRUGO | S_IWUSR, +		   bonding_show_num_peer_notif, bonding_store_num_peer_notif);  static DEVICE_ATTR(num_unsol_na, S_IRUGO | S_IWUSR, -		   bonding_show_n_unsol_na, bonding_store_n_unsol_na); +		   bonding_show_num_peer_notif, bonding_store_num_peer_notif); -/* - * Show and set the MII monitor interval.  There are two tricky bits - * here.  First, if MII monitoring is activated, then we must disable - * ARP monitoring.  Second, if the timer isn't running, we must - * start it. - */ +/* Show the MII monitor interval. */  static ssize_t bonding_show_miimon(struct device *d,  				   struct device_attribute *attr,  				   char *buf) @@ -969,80 +417,10 @@ static ssize_t bonding_show_miimon(struct device *d,  	return sprintf(buf, "%d\n", bond->params.miimon);  } - -static ssize_t bonding_store_miimon(struct device *d, -				    struct device_attribute *attr, -				    const char *buf, size_t count) -{ -	int new_value, ret = count; -	struct bonding *bond = to_bond(d); - -	if (sscanf(buf, "%d", &new_value) != 1) { -		pr_err("%s: no miimon value specified.\n", -		       bond->dev->name); -		ret = -EINVAL; -		goto out; -	} -	if (new_value < 0) { -		pr_err("%s: Invalid miimon value %d not in range %d-%d; rejected.\n", -		       bond->dev->name, new_value, 1, INT_MAX); -		ret = -EINVAL; -		goto out; -	} else { -		pr_info("%s: Setting MII monitoring interval to %d.\n", -			bond->dev->name, new_value); -		bond->params.miimon = new_value; -		if (bond->params.updelay) -			pr_info("%s: Note: Updating updelay (to %d) since it is a multiple of the miimon value.\n", -				bond->dev->name, -				bond->params.updelay * bond->params.miimon); -		if (bond->params.downdelay) -			pr_info("%s: Note: Updating downdelay (to %d) since it is a multiple of the miimon value.\n", -				bond->dev->name, -				bond->params.downdelay * bond->params.miimon); -		if (bond->params.arp_interval) { -			pr_info("%s: MII monitoring cannot be used with ARP monitoring. Disabling ARP monitoring...\n", -				bond->dev->name); -			bond->params.arp_interval = 0; -			bond->dev->priv_flags &= ~IFF_MASTER_ARPMON; -			if (bond->params.arp_validate) { -				bond_unregister_arp(bond); -				bond->params.arp_validate = -					BOND_ARP_VALIDATE_NONE; -			} -			if (delayed_work_pending(&bond->arp_work)) { -				cancel_delayed_work(&bond->arp_work); -				flush_workqueue(bond->wq); -			} -		} - -		if (bond->dev->flags & IFF_UP) { -			/* If the interface is up, we may need to fire off -			 * the MII timer. If the interface is down, the -			 * timer will get fired off when the open function -			 * is called. -			 */ -			if (!delayed_work_pending(&bond->mii_work)) { -				INIT_DELAYED_WORK(&bond->mii_work, -						  bond_mii_monitor); -				queue_delayed_work(bond->wq, -						   &bond->mii_work, 0); -			} -		} -	} -out: -	return ret; -}  static DEVICE_ATTR(miimon, S_IRUGO | S_IWUSR, -		   bonding_show_miimon, bonding_store_miimon); +		   bonding_show_miimon, bonding_sysfs_store_option); -/* - * Show and set the primary slave.  The store function is much - * simpler than bonding_store_slaves function because it only needs to - * handle one interface name. - * The bond must be a mode that supports a primary for this be - * set. - */ +/* Show the primary slave. */  static ssize_t bonding_show_primary(struct device *d,  				    struct device_attribute *attr,  				    char *buf) @@ -1055,117 +433,27 @@ static ssize_t bonding_show_primary(struct device *d,  	return count;  } - -static ssize_t bonding_store_primary(struct device *d, -				     struct device_attribute *attr, -				     const char *buf, size_t count) -{ -	int i; -	struct slave *slave; -	struct bonding *bond = to_bond(d); - -	if (!rtnl_trylock()) -		return restart_syscall(); -	block_netpoll_tx(); -	read_lock(&bond->lock); -	write_lock_bh(&bond->curr_slave_lock); - -	if (!USES_PRIMARY(bond->params.mode)) { -		pr_info("%s: Unable to set primary slave; %s is in mode %d\n", -			bond->dev->name, bond->dev->name, bond->params.mode); -	} else { -		bond_for_each_slave(bond, slave, i) { -			if (strnicmp -			    (slave->dev->name, buf, -			     strlen(slave->dev->name)) == 0) { -				pr_info("%s: Setting %s as primary slave.\n", -					bond->dev->name, slave->dev->name); -				bond->primary_slave = slave; -				strcpy(bond->params.primary, slave->dev->name); -				bond_select_active_slave(bond); -				goto out; -			} -		} - -		/* if we got here, then we didn't match the name of any slave */ - -		if (strlen(buf) == 0 || buf[0] == '\n') { -			pr_info("%s: Setting primary slave to None.\n", -				bond->dev->name); -			bond->primary_slave = NULL; -				bond_select_active_slave(bond); -		} else { -			pr_info("%s: Unable to set %.*s as primary slave as it is not a slave.\n", -				bond->dev->name, (int)strlen(buf) - 1, buf); -		} -	} -out: -	write_unlock_bh(&bond->curr_slave_lock); -	read_unlock(&bond->lock); -	unblock_netpoll_tx(); -	rtnl_unlock(); - -	return count; -}  static DEVICE_ATTR(primary, S_IRUGO | S_IWUSR, -		   bonding_show_primary, bonding_store_primary); +		   bonding_show_primary, bonding_sysfs_store_option); -/* - * Show and set the primary_reselect flag. - */ +/* Show the primary_reselect flag. */  static ssize_t bonding_show_primary_reselect(struct device *d,  					     struct device_attribute *attr,  					     char *buf)  {  	struct bonding *bond = to_bond(d); +	const struct bond_opt_value *val; -	return sprintf(buf, "%s %d\n", -		       pri_reselect_tbl[bond->params.primary_reselect].modename, -		       bond->params.primary_reselect); -} - -static ssize_t bonding_store_primary_reselect(struct device *d, -					      struct device_attribute *attr, -					      const char *buf, size_t count) -{ -	int new_value, ret = count; -	struct bonding *bond = to_bond(d); - -	if (!rtnl_trylock()) -		return restart_syscall(); - -	new_value = bond_parse_parm(buf, pri_reselect_tbl); -	if (new_value < 0)  { -		pr_err("%s: Ignoring invalid primary_reselect value %.*s.\n", -		       bond->dev->name, -		       (int) strlen(buf) - 1, buf); -		ret = -EINVAL; -		goto out; -	} +	val = bond_opt_get_val(BOND_OPT_PRIMARY_RESELECT, +			       bond->params.primary_reselect); -	bond->params.primary_reselect = new_value; -	pr_info("%s: setting primary_reselect to %s (%d).\n", -		bond->dev->name, pri_reselect_tbl[new_value].modename, -		new_value); - -	block_netpoll_tx(); -	read_lock(&bond->lock); -	write_lock_bh(&bond->curr_slave_lock); -	bond_select_active_slave(bond); -	write_unlock_bh(&bond->curr_slave_lock); -	read_unlock(&bond->lock); -	unblock_netpoll_tx(); -out: -	rtnl_unlock(); -	return ret; +	return sprintf(buf, "%s %d\n", +		       val->string, bond->params.primary_reselect);  }  static DEVICE_ATTR(primary_reselect, S_IRUGO | S_IWUSR, -		   bonding_show_primary_reselect, -		   bonding_store_primary_reselect); +		   bonding_show_primary_reselect, bonding_sysfs_store_option); -/* - * Show and set the use_carrier flag. - */ +/* Show the use_carrier flag. */  static ssize_t bonding_show_carrier(struct device *d,  				    struct device_attribute *attr,  				    char *buf) @@ -1174,159 +462,42 @@ static ssize_t bonding_show_carrier(struct device *d,  	return sprintf(buf, "%d\n", bond->params.use_carrier);  } - -static ssize_t bonding_store_carrier(struct device *d, -				     struct device_attribute *attr, -				     const char *buf, size_t count) -{ -	int new_value, ret = count; -	struct bonding *bond = to_bond(d); - - -	if (sscanf(buf, "%d", &new_value) != 1) { -		pr_err("%s: no use_carrier value specified.\n", -		       bond->dev->name); -		ret = -EINVAL; -		goto out; -	} -	if ((new_value == 0) || (new_value == 1)) { -		bond->params.use_carrier = new_value; -		pr_info("%s: Setting use_carrier to %d.\n", -			bond->dev->name, new_value); -	} else { -		pr_info("%s: Ignoring invalid use_carrier value %d.\n", -			bond->dev->name, new_value); -	} -out: -	return count; -}  static DEVICE_ATTR(use_carrier, S_IRUGO | S_IWUSR, -		   bonding_show_carrier, bonding_store_carrier); +		   bonding_show_carrier, bonding_sysfs_store_option); -/* - * Show and set currently active_slave. - */ +/* Show currently active_slave. */  static ssize_t bonding_show_active_slave(struct device *d,  					 struct device_attribute *attr,  					 char *buf)  { -	struct slave *curr;  	struct bonding *bond = to_bond(d); +	struct net_device *slave_dev;  	int count = 0; -	read_lock(&bond->curr_slave_lock); -	curr = bond->curr_active_slave; -	read_unlock(&bond->curr_slave_lock); - -	if (USES_PRIMARY(bond->params.mode) && curr) -		count = sprintf(buf, "%s\n", curr->dev->name); -	return count; -} - -static ssize_t bonding_store_active_slave(struct device *d, -					  struct device_attribute *attr, -					  const char *buf, size_t count) -{ -	int i; -	struct slave *slave; -	struct slave *old_active = NULL; -	struct slave *new_active = NULL; -	struct bonding *bond = to_bond(d); - -	if (!rtnl_trylock()) -		return restart_syscall(); - -	block_netpoll_tx(); -	read_lock(&bond->lock); -	write_lock_bh(&bond->curr_slave_lock); - -	if (!USES_PRIMARY(bond->params.mode)) -		pr_info("%s: Unable to change active slave; %s is in mode %d\n", -			bond->dev->name, bond->dev->name, bond->params.mode); -	else { -		bond_for_each_slave(bond, slave, i) { -			if (strnicmp -			    (slave->dev->name, buf, -			     strlen(slave->dev->name)) == 0) { -        			old_active = bond->curr_active_slave; -        			new_active = slave; -        			if (new_active == old_active) { -					/* do nothing */ -					pr_info("%s: %s is already the current active slave.\n", -						bond->dev->name, -						slave->dev->name); -					goto out; -				} -				else { -        				if ((new_active) && -            				    (old_active) && -				            (new_active->link == BOND_LINK_UP) && -				            IS_UP(new_active->dev)) { -						pr_info("%s: Setting %s as active slave.\n", -							bond->dev->name, -							slave->dev->name); -							bond_change_active_slave(bond, new_active); -        				} -					else { -						pr_info("%s: Could not set %s as active slave; either %s is down or the link is down.\n", -							bond->dev->name, -							slave->dev->name, -							slave->dev->name); -					} -					goto out; -				} -			} -		} - -		/* if we got here, then we didn't match the name of any slave */ - -		if (strlen(buf) == 0 || buf[0] == '\n') { -			pr_info("%s: Setting active slave to None.\n", -				bond->dev->name); -			bond->primary_slave = NULL; -			bond_select_active_slave(bond); -		} else { -			pr_info("%s: Unable to set %.*s as active slave as it is not a slave.\n", -				bond->dev->name, (int)strlen(buf) - 1, buf); -		} -	} - out: -	write_unlock_bh(&bond->curr_slave_lock); -	read_unlock(&bond->lock); -	unblock_netpoll_tx(); - -	rtnl_unlock(); +	rcu_read_lock(); +	slave_dev = bond_option_active_slave_get_rcu(bond); +	if (slave_dev) +		count = sprintf(buf, "%s\n", slave_dev->name); +	rcu_read_unlock();  	return count; -  }  static DEVICE_ATTR(active_slave, S_IRUGO | S_IWUSR, -		   bonding_show_active_slave, bonding_store_active_slave); +		   bonding_show_active_slave, bonding_sysfs_store_option); - -/* - * Show link status of the bond interface. - */ +/* Show link status of the bond interface. */  static ssize_t bonding_show_mii_status(struct device *d,  				       struct device_attribute *attr,  				       char *buf)  { -	struct slave *curr;  	struct bonding *bond = to_bond(d); -	read_lock(&bond->curr_slave_lock); -	curr = bond->curr_active_slave; -	read_unlock(&bond->curr_slave_lock); - -	return sprintf(buf, "%s\n", curr ? "up" : "down"); +	return sprintf(buf, "%s\n", bond->curr_active_slave ? "up" : "down");  }  static DEVICE_ATTR(mii_status, S_IRUGO, bonding_show_mii_status, NULL); - -/* - * Show current 802.3ad aggregator ID. - */ +/* Show current 802.3ad aggregator ID. */  static ssize_t bonding_show_ad_aggregator(struct device *d,  					  struct device_attribute *attr,  					  char *buf) @@ -1334,10 +505,10 @@ static ssize_t bonding_show_ad_aggregator(struct device *d,  	int count = 0;  	struct bonding *bond = to_bond(d); -	if (bond->params.mode == BOND_MODE_8023AD) { +	if (BOND_MODE(bond) == BOND_MODE_8023AD) {  		struct ad_info ad_info;  		count = sprintf(buf, "%d\n", -				(bond_3ad_get_active_agg_info(bond, &ad_info)) +				bond_3ad_get_active_agg_info(bond, &ad_info)  				?  0 : ad_info.aggregator_id);  	} @@ -1346,9 +517,7 @@ static ssize_t bonding_show_ad_aggregator(struct device *d,  static DEVICE_ATTR(ad_aggregator, S_IRUGO, bonding_show_ad_aggregator, NULL); -/* - * Show number of active 802.3ad ports. - */ +/* Show number of active 802.3ad ports. */  static ssize_t bonding_show_ad_num_ports(struct device *d,  					 struct device_attribute *attr,  					 char *buf) @@ -1356,10 +525,10 @@ static ssize_t bonding_show_ad_num_ports(struct device *d,  	int count = 0;  	struct bonding *bond = to_bond(d); -	if (bond->params.mode == BOND_MODE_8023AD) { +	if (BOND_MODE(bond) == BOND_MODE_8023AD) {  		struct ad_info ad_info;  		count = sprintf(buf, "%d\n", -				(bond_3ad_get_active_agg_info(bond, &ad_info)) +				bond_3ad_get_active_agg_info(bond, &ad_info)  				?  0 : ad_info.ports);  	} @@ -1368,9 +537,7 @@ static ssize_t bonding_show_ad_num_ports(struct device *d,  static DEVICE_ATTR(ad_num_ports, S_IRUGO, bonding_show_ad_num_ports, NULL); -/* - * Show current 802.3ad actor key. - */ +/* Show current 802.3ad actor key. */  static ssize_t bonding_show_ad_actor_key(struct device *d,  					 struct device_attribute *attr,  					 char *buf) @@ -1378,10 +545,10 @@ static ssize_t bonding_show_ad_actor_key(struct device *d,  	int count = 0;  	struct bonding *bond = to_bond(d); -	if (bond->params.mode == BOND_MODE_8023AD) { +	if (BOND_MODE(bond) == BOND_MODE_8023AD) {  		struct ad_info ad_info;  		count = sprintf(buf, "%d\n", -				(bond_3ad_get_active_agg_info(bond, &ad_info)) +				bond_3ad_get_active_agg_info(bond, &ad_info)  				?  0 : ad_info.actor_key);  	} @@ -1390,9 +557,7 @@ static ssize_t bonding_show_ad_actor_key(struct device *d,  static DEVICE_ATTR(ad_actor_key, S_IRUGO, bonding_show_ad_actor_key, NULL); -/* - * Show current 802.3ad partner key. - */ +/* Show current 802.3ad partner key. */  static ssize_t bonding_show_ad_partner_key(struct device *d,  					   struct device_attribute *attr,  					   char *buf) @@ -1400,10 +565,10 @@ static ssize_t bonding_show_ad_partner_key(struct device *d,  	int count = 0;  	struct bonding *bond = to_bond(d); -	if (bond->params.mode == BOND_MODE_8023AD) { +	if (BOND_MODE(bond) == BOND_MODE_8023AD) {  		struct ad_info ad_info;  		count = sprintf(buf, "%d\n", -				(bond_3ad_get_active_agg_info(bond, &ad_info)) +				bond_3ad_get_active_agg_info(bond, &ad_info)  				?  0 : ad_info.partner_key);  	} @@ -1412,9 +577,7 @@ static ssize_t bonding_show_ad_partner_key(struct device *d,  static DEVICE_ATTR(ad_partner_key, S_IRUGO, bonding_show_ad_partner_key, NULL); -/* - * Show current 802.3ad partner mac. - */ +/* Show current 802.3ad partner mac. */  static ssize_t bonding_show_ad_partner_mac(struct device *d,  					   struct device_attribute *attr,  					   char *buf) @@ -1422,7 +585,7 @@ static ssize_t bonding_show_ad_partner_mac(struct device *d,  	int count = 0;  	struct bonding *bond = to_bond(d); -	if (bond->params.mode == BOND_MODE_8023AD) { +	if (BOND_MODE(bond) == BOND_MODE_8023AD) {  		struct ad_info ad_info;  		if (!bond_3ad_get_active_agg_info(bond, &ad_info))  			count = sprintf(buf, "%pM\n", ad_info.partner_system); @@ -1432,22 +595,20 @@ static ssize_t bonding_show_ad_partner_mac(struct device *d,  }  static DEVICE_ATTR(ad_partner_mac, S_IRUGO, bonding_show_ad_partner_mac, NULL); -/* - * Show the queue_ids of the slaves in the current bond. - */ +/* Show the queue_ids of the slaves in the current bond. */  static ssize_t bonding_show_queue_id(struct device *d,  				     struct device_attribute *attr,  				     char *buf)  { -	struct slave *slave; -	int i, res = 0;  	struct bonding *bond = to_bond(d); +	struct list_head *iter; +	struct slave *slave; +	int res = 0;  	if (!rtnl_trylock())  		return restart_syscall(); -	read_lock(&bond->lock); -	bond_for_each_slave(bond, slave, i) { +	bond_for_each_slave(bond, slave, iter) {  		if (res > (PAGE_SIZE - IFNAMSIZ - 6)) {  			/* not enough space for another interface_name:queue_id pair */  			if ((PAGE_SIZE - res) > 10) @@ -1458,98 +619,18 @@ static ssize_t bonding_show_queue_id(struct device *d,  		res += sprintf(buf + res, "%s:%d ",  			       slave->dev->name, slave->queue_id);  	} -	read_unlock(&bond->lock);  	if (res)  		buf[res-1] = '\n'; /* eat the leftover space */ -	rtnl_unlock(); -	return res; -} - -/* - * Set the queue_ids of the  slaves in the current bond.  The bond - * interface must be enslaved for this to work. - */ -static ssize_t bonding_store_queue_id(struct device *d, -				      struct device_attribute *attr, -				      const char *buffer, size_t count) -{ -	struct slave *slave, *update_slave; -	struct bonding *bond = to_bond(d); -	u16 qid; -	int i, ret = count; -	char *delim; -	struct net_device *sdev = NULL; - -	if (!rtnl_trylock()) -		return restart_syscall(); - -	/* delim will point to queue id if successful */ -	delim = strchr(buffer, ':'); -	if (!delim) -		goto err_no_cmd; - -	/* -	 * Terminate string that points to device name and bump it -	 * up one, so we can read the queue id there. -	 */ -	*delim = '\0'; -	if (sscanf(++delim, "%hd\n", &qid) != 1) -		goto err_no_cmd; -	/* Check buffer length, valid ifname and queue id */ -	if (strlen(buffer) > IFNAMSIZ || -	    !dev_valid_name(buffer) || -	    qid > bond->params.tx_queues) -		goto err_no_cmd; - -	/* Get the pointer to that interface if it exists */ -	sdev = __dev_get_by_name(dev_net(bond->dev), buffer); -	if (!sdev) -		goto err_no_cmd; - -	read_lock(&bond->lock); - -	/* Search for thes slave and check for duplicate qids */ -	update_slave = NULL; -	bond_for_each_slave(bond, slave, i) { -		if (sdev == slave->dev) -			/* -			 * We don't need to check the matching -			 * slave for dups, since we're overwriting it -			 */ -			update_slave = slave; -		else if (qid && qid == slave->queue_id) { -			goto err_no_cmd_unlock; -		} -	} - -	if (!update_slave) -		goto err_no_cmd_unlock; - -	/* Actually set the qids for the slave */ -	update_slave->queue_id = qid; - -	read_unlock(&bond->lock); -out:  	rtnl_unlock(); -	return ret; -err_no_cmd_unlock: -	read_unlock(&bond->lock); -err_no_cmd: -	pr_info("invalid input for queue_id set for %s.\n", -		bond->dev->name); -	ret = -EPERM; -	goto out; +	return res;  } -  static DEVICE_ATTR(queue_id, S_IRUGO | S_IWUSR, bonding_show_queue_id, -		   bonding_store_queue_id); +		   bonding_sysfs_store_option); -/* - * Show and set the all_slaves_active flag. - */ +/* Show the all_slaves_active flag. */  static ssize_t bonding_show_slaves_active(struct device *d,  					  struct device_attribute *attr,  					  char *buf) @@ -1558,96 +639,61 @@ static ssize_t bonding_show_slaves_active(struct device *d,  	return sprintf(buf, "%d\n", bond->params.all_slaves_active);  } +static DEVICE_ATTR(all_slaves_active, S_IRUGO | S_IWUSR, +		   bonding_show_slaves_active, bonding_sysfs_store_option); -static ssize_t bonding_store_slaves_active(struct device *d, -					   struct device_attribute *attr, -					   const char *buf, size_t count) +/* Show the number of IGMP membership reports to send on link failure */ +static ssize_t bonding_show_resend_igmp(struct device *d, +					struct device_attribute *attr, +					char *buf)  { -	int i, new_value, ret = count;  	struct bonding *bond = to_bond(d); -	struct slave *slave; -	if (sscanf(buf, "%d", &new_value) != 1) { -		pr_err("%s: no all_slaves_active value specified.\n", -		       bond->dev->name); -		ret = -EINVAL; -		goto out; -	} +	return sprintf(buf, "%d\n", bond->params.resend_igmp); +} +static DEVICE_ATTR(resend_igmp, S_IRUGO | S_IWUSR, +		   bonding_show_resend_igmp, bonding_sysfs_store_option); -	if (new_value == bond->params.all_slaves_active) -		goto out; -	if ((new_value == 0) || (new_value == 1)) { -		bond->params.all_slaves_active = new_value; -	} else { -		pr_info("%s: Ignoring invalid all_slaves_active value %d.\n", -			bond->dev->name, new_value); -		ret = -EINVAL; -		goto out; -	} +static ssize_t bonding_show_lp_interval(struct device *d, +					struct device_attribute *attr, +					char *buf) +{ +	struct bonding *bond = to_bond(d); -	bond_for_each_slave(bond, slave, i) { -		if (slave->state == BOND_STATE_BACKUP) { -			if (new_value) -				slave->dev->priv_flags &= ~IFF_SLAVE_INACTIVE; -			else -				slave->dev->priv_flags |= IFF_SLAVE_INACTIVE; -		} -	} -out: -	return count; +	return sprintf(buf, "%d\n", bond->params.lp_interval);  } -static DEVICE_ATTR(all_slaves_active, S_IRUGO | S_IWUSR, -		   bonding_show_slaves_active, bonding_store_slaves_active); +static DEVICE_ATTR(lp_interval, S_IRUGO | S_IWUSR, +		   bonding_show_lp_interval, bonding_sysfs_store_option); -/* - * Show and set the number of IGMP membership reports to send on link failure - */ -static ssize_t bonding_show_resend_igmp(struct device *d, -					 struct device_attribute *attr, -					 char *buf) +static ssize_t bonding_show_tlb_dynamic_lb(struct device *d, +					   struct device_attribute *attr, +					   char *buf)  {  	struct bonding *bond = to_bond(d); - -	return sprintf(buf, "%d\n", bond->params.resend_igmp); +	return sprintf(buf, "%d\n", bond->params.tlb_dynamic_lb);  } +static DEVICE_ATTR(tlb_dynamic_lb, S_IRUGO | S_IWUSR, +		   bonding_show_tlb_dynamic_lb, bonding_sysfs_store_option); -static ssize_t bonding_store_resend_igmp(struct device *d, -					  struct device_attribute *attr, -					  const char *buf, size_t count) +static ssize_t bonding_show_packets_per_slave(struct device *d, +					      struct device_attribute *attr, +					      char *buf)  { -	int new_value, ret = count;  	struct bonding *bond = to_bond(d); +	unsigned int packets_per_slave = bond->params.packets_per_slave; -	if (sscanf(buf, "%d", &new_value) != 1) { -		pr_err("%s: no resend_igmp value specified.\n", -		       bond->dev->name); -		ret = -EINVAL; -		goto out; -	} - -	if (new_value < 0) { -		pr_err("%s: Invalid resend_igmp value %d not in range 0-255; rejected.\n", -		       bond->dev->name, new_value); -		ret = -EINVAL; -		goto out; -	} - -	pr_info("%s: Setting resend_igmp to %d.\n", -		bond->dev->name, new_value); -	bond->params.resend_igmp = new_value; -out: -	return ret; +	return sprintf(buf, "%u\n", packets_per_slave);  } - -static DEVICE_ATTR(resend_igmp, S_IRUGO | S_IWUSR, -		   bonding_show_resend_igmp, bonding_store_resend_igmp); +static DEVICE_ATTR(packets_per_slave, S_IRUGO | S_IWUSR, +		   bonding_show_packets_per_slave, bonding_sysfs_store_option);  static struct attribute *per_bond_attrs[] = {  	&dev_attr_slaves.attr,  	&dev_attr_mode.attr,  	&dev_attr_fail_over_mac.attr,  	&dev_attr_arp_validate.attr, +	&dev_attr_arp_all_targets.attr,  	&dev_attr_arp_interval.attr,  	&dev_attr_arp_ip_target.attr,  	&dev_attr_downdelay.attr, @@ -1671,6 +717,10 @@ static struct attribute *per_bond_attrs[] = {  	&dev_attr_queue_id.attr,  	&dev_attr_all_slaves_active.attr,  	&dev_attr_resend_igmp.attr, +	&dev_attr_min_links.attr, +	&dev_attr_lp_interval.attr, +	&dev_attr_packets_per_slave.attr, +	&dev_attr_tlb_dynamic_lb.attr,  	NULL,  }; @@ -1679,17 +729,19 @@ static struct attribute_group bonding_group = {  	.attrs = per_bond_attrs,  }; -/* - * Initialize sysfs.  This sets up the bonding_masters file in +/* Initialize sysfs.  This sets up the bonding_masters file in   * /sys/class/net.   */ -int bond_create_sysfs(void) +int bond_create_sysfs(struct bond_net *bn)  {  	int ret; -	ret = netdev_class_create_file(&class_attr_bonding_masters); -	/* -	 * Permit multiple loads of the module by ignoring failures to +	bn->class_attr_bonding_masters = class_attr_bonding_masters; +	sysfs_attr_init(&bn->class_attr_bonding_masters.attr); + +	ret = netdev_class_create_file_ns(&bn->class_attr_bonding_masters, +					  bn->net); +	/* Permit multiple loads of the module by ignoring failures to  	 * create the bonding_masters sysfs file.  Bonding devices  	 * created by second or subsequent loads of the module will  	 * not be listed in, or controllable by, bonding_masters, but @@ -1701,9 +753,9 @@ int bond_create_sysfs(void)  	 */  	if (ret == -EEXIST) {  		/* Is someone being kinky and naming a device bonding_master? */ -		if (__dev_get_by_name(&init_net, +		if (__dev_get_by_name(bn->net,  				      class_attr_bonding_masters.attr.name)) -			pr_err("network device named %s already exists in sysfs", +			pr_err("network device named %s already exists in sysfs\n",  			       class_attr_bonding_masters.attr.name);  		ret = 0;  	} @@ -1712,16 +764,13 @@ int bond_create_sysfs(void)  } -/* - * Remove /sys/class/net/bonding_masters. - */ -void bond_destroy_sysfs(void) +/* Remove /sys/class/net/bonding_masters. */ +void bond_destroy_sysfs(struct bond_net *bn)  { -	netdev_class_remove_file(&class_attr_bonding_masters); +	netdev_class_remove_file_ns(&bn->class_attr_bonding_masters, bn->net);  } -/* - * Initialize sysfs for each bond.  This sets up and registers +/* Initialize sysfs for each bond.  This sets up and registers   * the 'bondctl' directory for each individual bond under /sys/class/net.   */  void bond_prepare_sysfs_group(struct bonding *bond) diff --git a/drivers/net/bonding/bond_sysfs_slave.c b/drivers/net/bonding/bond_sysfs_slave.c new file mode 100644 index 00000000000..198677f58ce --- /dev/null +++ b/drivers/net/bonding/bond_sysfs_slave.c @@ -0,0 +1,144 @@ +/*	Sysfs attributes of bond slaves + * + *      Copyright (c) 2014 Scott Feldman <sfeldma@cumulusnetworks.com> + * + *	This program is free software; you can redistribute it and/or + *	modify it under the terms of the GNU General Public License + *	as published by the Free Software Foundation; either version + *	2 of the License, or (at your option) any later version. + */ + +#include <linux/capability.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> + +#include "bonding.h" + +struct slave_attribute { +	struct attribute attr; +	ssize_t (*show)(struct slave *, char *); +}; + +#define SLAVE_ATTR(_name, _mode, _show)				\ +const struct slave_attribute slave_attr_##_name = {		\ +	.attr = {.name = __stringify(_name),			\ +		 .mode = _mode },				\ +	.show	= _show,					\ +}; +#define SLAVE_ATTR_RO(_name) \ +	SLAVE_ATTR(_name, S_IRUGO, _name##_show) + +static ssize_t state_show(struct slave *slave, char *buf) +{ +	switch (bond_slave_state(slave)) { +	case BOND_STATE_ACTIVE: +		return sprintf(buf, "active\n"); +	case BOND_STATE_BACKUP: +		return sprintf(buf, "backup\n"); +	default: +		return sprintf(buf, "UNKONWN\n"); +	} +} +static SLAVE_ATTR_RO(state); + +static ssize_t mii_status_show(struct slave *slave, char *buf) +{ +	return sprintf(buf, "%s\n", bond_slave_link_status(slave->link)); +} +static SLAVE_ATTR_RO(mii_status); + +static ssize_t link_failure_count_show(struct slave *slave, char *buf) +{ +	return sprintf(buf, "%d\n", slave->link_failure_count); +} +static SLAVE_ATTR_RO(link_failure_count); + +static ssize_t perm_hwaddr_show(struct slave *slave, char *buf) +{ +	return sprintf(buf, "%pM\n", slave->perm_hwaddr); +} +static SLAVE_ATTR_RO(perm_hwaddr); + +static ssize_t queue_id_show(struct slave *slave, char *buf) +{ +	return sprintf(buf, "%d\n", slave->queue_id); +} +static SLAVE_ATTR_RO(queue_id); + +static ssize_t ad_aggregator_id_show(struct slave *slave, char *buf) +{ +	const struct aggregator *agg; + +	if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) { +		agg = SLAVE_AD_INFO(slave)->port.aggregator; +		if (agg) +			return sprintf(buf, "%d\n", +				       agg->aggregator_identifier); +	} + +	return sprintf(buf, "N/A\n"); +} +static SLAVE_ATTR_RO(ad_aggregator_id); + +static const struct slave_attribute *slave_attrs[] = { +	&slave_attr_state, +	&slave_attr_mii_status, +	&slave_attr_link_failure_count, +	&slave_attr_perm_hwaddr, +	&slave_attr_queue_id, +	&slave_attr_ad_aggregator_id, +	NULL +}; + +#define to_slave_attr(_at) container_of(_at, struct slave_attribute, attr) +#define to_slave(obj)	container_of(obj, struct slave, kobj) + +static ssize_t slave_show(struct kobject *kobj, +			  struct attribute *attr, char *buf) +{ +	struct slave_attribute *slave_attr = to_slave_attr(attr); +	struct slave *slave = to_slave(kobj); + +	return slave_attr->show(slave, buf); +} + +static const struct sysfs_ops slave_sysfs_ops = { +	.show = slave_show, +}; + +static struct kobj_type slave_ktype = { +#ifdef CONFIG_SYSFS +	.sysfs_ops = &slave_sysfs_ops, +#endif +}; + +int bond_sysfs_slave_add(struct slave *slave) +{ +	const struct slave_attribute **a; +	int err; + +	err = kobject_init_and_add(&slave->kobj, &slave_ktype, +				   &(slave->dev->dev.kobj), "bonding_slave"); +	if (err) +		return err; + +	for (a = slave_attrs; *a; ++a) { +		err = sysfs_create_file(&slave->kobj, &((*a)->attr)); +		if (err) { +			kobject_del(&slave->kobj); +			return err; +		} +	} + +	return 0; +} + +void bond_sysfs_slave_del(struct slave *slave) +{ +	const struct slave_attribute **a; + +	for (a = slave_attrs; *a; ++a) +		sysfs_remove_file(&slave->kobj, &((*a)->attr)); + +	kobject_del(&slave->kobj); +} diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index ad3ae46a4c0..0b4d9cde0b0 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -18,52 +18,29 @@  #include <linux/timer.h>  #include <linux/proc_fs.h>  #include <linux/if_bonding.h> -#include <linux/kobject.h>  #include <linux/cpumask.h>  #include <linux/in6.h> +#include <linux/netpoll.h> +#include <linux/inetdevice.h> +#include <linux/etherdevice.h> +#include <linux/reciprocal_div.h> +  #include "bond_3ad.h"  #include "bond_alb.h" +#include "bond_options.h" -#define DRV_VERSION	"3.7.0" -#define DRV_RELDATE	"June 2, 2010" +#define DRV_VERSION	"3.7.1" +#define DRV_RELDATE	"April 27, 2011"  #define DRV_NAME	"bonding"  #define DRV_DESCRIPTION	"Ethernet Channel Bonding Driver" -#define BOND_MAX_ARP_TARGETS	16 - -#define IS_UP(dev)					   \ -	      ((((dev)->flags & IFF_UP) == IFF_UP)	&& \ -	       netif_running(dev)			&& \ -	       netif_carrier_ok(dev)) - -/* - * Checks whether bond is ready for transmit. - * - * Caller must hold bond->lock - */ -#define BOND_IS_OK(bond)			     \ -		   (((bond)->dev->flags & IFF_UP) && \ -		    netif_running((bond)->dev)	  && \ -		    ((bond)->slave_cnt > 0)) - -/* - * Checks whether slave is ready for transmit. - */ -#define SLAVE_IS_OK(slave)			        \ -		    (((slave)->dev->flags & IFF_UP)  && \ -		     netif_running((slave)->dev)     && \ -		     ((slave)->link == BOND_LINK_UP) && \ -		     ((slave)->state == BOND_STATE_ACTIVE)) +#define bond_version DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n" +#define BOND_MAX_VLAN_ENCAP	2 +#define BOND_MAX_ARP_TARGETS	16 -#define USES_PRIMARY(mode)				\ -		(((mode) == BOND_MODE_ACTIVEBACKUP) ||	\ -		 ((mode) == BOND_MODE_TLB)          ||	\ -		 ((mode) == BOND_MODE_ALB)) +#define BOND_DEFAULT_MIIMON	100 -#define TX_QUEUE_OVERRIDE(mode)				\ -			(((mode) == BOND_MODE_ACTIVEBACKUP) ||	\ -			 ((mode) == BOND_MODE_ROUNDROBIN))  /*   * Less bad way to call ioctl from within the kernel; this needs to be   * done some other way to get the call out of interrupt context. @@ -77,68 +54,62 @@  	set_fs(fs);			\  	res; }) -/** - * bond_for_each_slave_from - iterate the slaves list from a starting point - * @bond:	the bond holding this list. - * @pos:	current slave. - * @cnt:	counter for max number of moves - * @start:	starting point. - * - * Caller must hold bond->lock - */ -#define bond_for_each_slave_from(bond, pos, cnt, start)	\ -	for (cnt = 0, pos = start;				\ -	     cnt < (bond)->slave_cnt;				\ -             cnt++, pos = (pos)->next) +#define BOND_MODE(bond) ((bond)->params.mode) -/** - * bond_for_each_slave_from_to - iterate the slaves list from start point to stop point - * @bond:	the bond holding this list. - * @pos:	current slave. - * @cnt:	counter for number max of moves - * @start:	start point. - * @stop:	stop point. - * - * Caller must hold bond->lock - */ -#define bond_for_each_slave_from_to(bond, pos, cnt, start, stop)	\ -	for (cnt = 0, pos = start;					\ -	     ((cnt < (bond)->slave_cnt) && (pos != (stop)->next));	\ -             cnt++, pos = (pos)->next) +/* slave list primitives */ +#define bond_slave_list(bond) (&(bond)->dev->adj_list.lower) + +#define bond_has_slaves(bond) !list_empty(bond_slave_list(bond)) + +/* IMPORTANT: bond_first/last_slave can return NULL in case of an empty list */ +#define bond_first_slave(bond) \ +	(bond_has_slaves(bond) ? \ +		netdev_adjacent_get_private(bond_slave_list(bond)->next) : \ +		NULL) +#define bond_last_slave(bond) \ +	(bond_has_slaves(bond) ? \ +		netdev_adjacent_get_private(bond_slave_list(bond)->prev) : \ +		NULL) + +/* Caller must have rcu_read_lock */ +#define bond_first_slave_rcu(bond) \ +	netdev_lower_get_first_private_rcu(bond->dev) + +#define bond_is_first_slave(bond, pos) (pos == bond_first_slave(bond)) +#define bond_is_last_slave(bond, pos) (pos == bond_last_slave(bond))  /** - * bond_for_each_slave - iterate the slaves list from head - * @bond:	the bond holding this list. - * @pos:	current slave. - * @cnt:	counter for max number of moves + * bond_for_each_slave - iterate over all slaves + * @bond:	the bond holding this list + * @pos:	current slave + * @iter:	list_head * iterator   *   * Caller must hold bond->lock   */ -#define bond_for_each_slave(bond, pos, cnt)	\ -		bond_for_each_slave_from(bond, pos, cnt, (bond)->first_slave) +#define bond_for_each_slave(bond, pos, iter) \ +	netdev_for_each_lower_private((bond)->dev, pos, iter) +/* Caller must have rcu_read_lock */ +#define bond_for_each_slave_rcu(bond, pos, iter) \ +	netdev_for_each_lower_private_rcu((bond)->dev, pos, iter)  #ifdef CONFIG_NET_POLL_CONTROLLER -extern cpumask_var_t netpoll_block_tx; +extern atomic_t netpoll_block_tx;  static inline void block_netpoll_tx(void)  { -	preempt_disable(); -	BUG_ON(cpumask_test_and_set_cpu(smp_processor_id(), -					netpoll_block_tx)); +	atomic_inc(&netpoll_block_tx);  }  static inline void unblock_netpoll_tx(void)  { -	BUG_ON(!cpumask_test_and_clear_cpu(smp_processor_id(), -					   netpoll_block_tx)); -	preempt_enable(); +	atomic_dec(&netpoll_block_tx);  }  static inline int is_netpoll_tx_blocked(struct net_device *dev)  { -	if (unlikely(dev->priv_flags & IFF_IN_NETPOLL)) -		return cpumask_test_cpu(smp_processor_id(), netpoll_block_tx); +	if (unlikely(netpoll_tx_running(dev))) +		return atomic_read(&netpoll_block_tx);  	return 0;  }  #else @@ -151,15 +122,16 @@ struct bond_params {  	int mode;  	int xmit_policy;  	int miimon; -	int num_grat_arp; -	int num_unsol_na; +	u8 num_peer_notif;  	int arp_interval;  	int arp_validate; +	int arp_all_targets;  	int use_carrier;  	int fail_over_mac;  	int updelay;  	int downdelay;  	int lacp_fast; +	unsigned int min_links;  	int ad_select;  	char primary[IFNAMSIZ];  	int primary_reselect; @@ -167,6 +139,10 @@ struct bond_params {  	int tx_queues;  	int all_slaves_active;  	int resend_igmp; +	int lp_interval; +	int packets_per_slave; +	int tlb_dynamic_lb; +	struct reciprocal_value reciprocal_packets_per_slave;  };  struct bond_parm_tbl { @@ -174,35 +150,32 @@ struct bond_parm_tbl {  	int mode;  }; -#define BOND_MAX_MODENAME_LEN 20 - -struct vlan_entry { -	struct list_head vlan_list; -	__be32 vlan_ip; -	unsigned short vlan_id; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -	struct in6_addr vlan_ipv6; -#endif -}; -  struct slave {  	struct net_device *dev; /* first - useful for panic debug */ -	struct slave *next; -	struct slave *prev; +	struct bonding *bond; /* our master */  	int    delay; -	unsigned long jiffies; -	unsigned long last_arp_rx; +	/* all three in jiffies */ +	unsigned long last_link_up; +	unsigned long last_rx; +	unsigned long target_last_arp_rx[BOND_MAX_ARP_TARGETS];  	s8     link;    /* one of BOND_LINK_XXXX */  	s8     new_link; -	s8     state;   /* one of BOND_STATE_XXXX */ +	u8     backup:1,   /* indicates backup slave. Value corresponds with +			      BOND_STATE_ACTIVE and BOND_STATE_BACKUP */ +	       inactive:1, /* indicates inactive slave */ +	       should_notify:1; /* indicateds whether the state changed */ +	u8     duplex;  	u32    original_mtu;  	u32    link_failure_count; -	u8     perm_hwaddr[ETH_ALEN]; -	u16    speed; -	u8     duplex; +	u32    speed;  	u16    queue_id; -	struct ad_slave_info ad_info; /* HUGE - better to dynamically alloc */ +	u8     perm_hwaddr[ETH_ALEN]; +	struct ad_slave_info *ad_info;  	struct tlb_slave_info tlb_info; +#ifdef CONFIG_NET_POLL_CONTROLLER +	struct netpoll *np; +#endif +	struct kobject kobj;  };  /* @@ -221,44 +194,47 @@ struct slave {   */  struct bonding {  	struct   net_device *dev; /* first - useful for panic debug */ -	struct   slave *first_slave;  	struct   slave *curr_active_slave;  	struct   slave *current_arp_slave;  	struct   slave *primary_slave;  	bool     force_primary;  	s32      slave_cnt; /* never change this value outside the attach/detach wrappers */ +	int     (*recv_probe)(const struct sk_buff *, struct bonding *, +			      struct slave *);  	rwlock_t lock;  	rwlock_t curr_slave_lock; -	s8       kill_timers; -	s8	 send_grat_arp; -	s8	 send_unsol_na; -	s8	 setup_by_slave; -	s8       igmp_retrans; +	u8	 send_peer_notif; +	u8       igmp_retrans;  #ifdef CONFIG_PROC_FS  	struct   proc_dir_entry *proc_entry;  	char     proc_file_name[IFNAMSIZ];  #endif /* CONFIG_PROC_FS */  	struct   list_head bond_list; -	struct   netdev_hw_addr_list mc_list; -	int      (*xmit_hash_policy)(struct sk_buff *, int); -	__be32   master_ip; -	u16      flags; -	u16      rr_tx_counter; +	u32      rr_tx_counter;  	struct   ad_bond_info ad_info;  	struct   alb_bond_info alb_info;  	struct   bond_params params; -	struct   list_head vlan_list; -	struct   vlan_group *vlgrp; -	struct   packet_type arp_mon_pt;  	struct   workqueue_struct *wq;  	struct   delayed_work mii_work;  	struct   delayed_work arp_work;  	struct   delayed_work alb_work;  	struct   delayed_work ad_work;  	struct   delayed_work mcast_work; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -	struct   in6_addr master_ipv6; -#endif +#ifdef CONFIG_DEBUG_FS +	/* debugging support via debugfs */ +	struct	 dentry *debug_dir; +#endif /* CONFIG_DEBUG_FS */ +}; + +#define bond_slave_get_rcu(dev) \ +	((struct slave *) rcu_dereference(dev->rx_handler_data)) + +#define bond_slave_get_rtnl(dev) \ +	((struct slave *) rtnl_dereference(dev->rx_handler_data)) + +struct bond_vlan_tag { +	__be16		vlan_proto; +	unsigned short	vlan_id;  };  /** @@ -266,33 +242,125 @@ struct bonding {   *   * Caller must hold bond lock for read   */ -static inline struct slave *bond_get_slave_by_dev(struct bonding *bond, struct net_device *slave_dev) +static inline struct slave *bond_get_slave_by_dev(struct bonding *bond, +						  struct net_device *slave_dev)  { -	struct slave *slave = NULL; -	int i; +	return netdev_lower_dev_get_private(bond->dev, slave_dev); +} -	bond_for_each_slave(bond, slave, i) { -		if (slave->dev == slave_dev) { -			break; -		} +static inline struct bonding *bond_get_bond_by_slave(struct slave *slave) +{ +	return slave->bond; +} + +static inline bool bond_should_override_tx_queue(struct bonding *bond) +{ +	return BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP || +	       BOND_MODE(bond) == BOND_MODE_ROUNDROBIN; +} + +static inline bool bond_is_lb(const struct bonding *bond) +{ +	return BOND_MODE(bond) == BOND_MODE_TLB || +	       BOND_MODE(bond) == BOND_MODE_ALB; +} + +static inline bool bond_mode_uses_arp(int mode) +{ +	return mode != BOND_MODE_8023AD && mode != BOND_MODE_TLB && +	       mode != BOND_MODE_ALB; +} + +static inline bool bond_mode_uses_primary(int mode) +{ +	return mode == BOND_MODE_ACTIVEBACKUP || mode == BOND_MODE_TLB || +	       mode == BOND_MODE_ALB; +} + +static inline bool bond_uses_primary(struct bonding *bond) +{ +	return bond_mode_uses_primary(BOND_MODE(bond)); +} + +static inline bool bond_slave_is_up(struct slave *slave) +{ +	return netif_running(slave->dev) && netif_carrier_ok(slave->dev); +} + +static inline void bond_set_active_slave(struct slave *slave) +{ +	if (slave->backup) { +		slave->backup = 0; +		rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC);  	} +} -	return slave; +static inline void bond_set_backup_slave(struct slave *slave) +{ +	if (!slave->backup) { +		slave->backup = 1; +		rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC); +	}  } -static inline struct bonding *bond_get_bond_by_slave(struct slave *slave) +static inline void bond_set_slave_state(struct slave *slave, +					int slave_state, bool notify)  { -	if (!slave || !slave->dev->master) { -		return NULL; +	if (slave->backup == slave_state) +		return; + +	slave->backup = slave_state; +	if (notify) { +		rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC); +		slave->should_notify = 0; +	} else { +		if (slave->should_notify) +			slave->should_notify = 0; +		else +			slave->should_notify = 1;  	} +} -	return netdev_priv(slave->dev->master); +static inline void bond_slave_state_change(struct bonding *bond) +{ +	struct list_head *iter; +	struct slave *tmp; + +	bond_for_each_slave(bond, tmp, iter) { +		if (tmp->link == BOND_LINK_UP) +			bond_set_active_slave(tmp); +		else if (tmp->link == BOND_LINK_DOWN) +			bond_set_backup_slave(tmp); +	}  } -static inline bool bond_is_lb(const struct bonding *bond) +static inline void bond_slave_state_notify(struct bonding *bond) +{ +	struct list_head *iter; +	struct slave *tmp; + +	bond_for_each_slave(bond, tmp, iter) { +		if (tmp->should_notify) { +			rtmsg_ifinfo(RTM_NEWLINK, tmp->dev, 0, GFP_ATOMIC); +			tmp->should_notify = 0; +		} +	} +} + +static inline int bond_slave_state(struct slave *slave)  { -	return (bond->params.mode == BOND_MODE_TLB || -		bond->params.mode == BOND_MODE_ALB); +	return slave->backup; +} + +static inline bool bond_is_active_slave(struct slave *slave) +{ +	return !bond_slave_state(slave); +} + +static inline bool bond_slave_can_tx(struct slave *slave) +{ +	return bond_slave_is_up(slave) && slave->link == BOND_LINK_UP && +	       bond_is_active_slave(slave);  }  #define BOND_PRI_RESELECT_ALWAYS	0 @@ -303,119 +371,253 @@ static inline bool bond_is_lb(const struct bonding *bond)  #define BOND_FOM_ACTIVE			1  #define BOND_FOM_FOLLOW			2 +#define BOND_ARP_TARGETS_ANY		0 +#define BOND_ARP_TARGETS_ALL		1 +  #define BOND_ARP_VALIDATE_NONE		0  #define BOND_ARP_VALIDATE_ACTIVE	(1 << BOND_STATE_ACTIVE)  #define BOND_ARP_VALIDATE_BACKUP	(1 << BOND_STATE_BACKUP)  #define BOND_ARP_VALIDATE_ALL		(BOND_ARP_VALIDATE_ACTIVE | \  					 BOND_ARP_VALIDATE_BACKUP) +#define BOND_ARP_FILTER			(BOND_ARP_VALIDATE_ALL + 1) +#define BOND_ARP_FILTER_ACTIVE		(BOND_ARP_VALIDATE_ACTIVE | \ +					 BOND_ARP_FILTER) +#define BOND_ARP_FILTER_BACKUP		(BOND_ARP_VALIDATE_BACKUP | \ +					 BOND_ARP_FILTER) + +#define BOND_SLAVE_NOTIFY_NOW		true +#define BOND_SLAVE_NOTIFY_LATER		false  static inline int slave_do_arp_validate(struct bonding *bond,  					struct slave *slave)  { -	return bond->params.arp_validate & (1 << slave->state); +	return bond->params.arp_validate & (1 << bond_slave_state(slave)); +} + +static inline int slave_do_arp_validate_only(struct bonding *bond) +{ +	return bond->params.arp_validate & BOND_ARP_FILTER; +} + +static inline int bond_is_ip_target_ok(__be32 addr) +{ +	return !ipv4_is_lbcast(addr) && !ipv4_is_zeronet(addr); +} + +/* Get the oldest arp which we've received on this slave for bond's + * arp_targets. + */ +static inline unsigned long slave_oldest_target_arp_rx(struct bonding *bond, +						       struct slave *slave) +{ +	int i = 1; +	unsigned long ret = slave->target_last_arp_rx[0]; + +	for (; (i < BOND_MAX_ARP_TARGETS) && bond->params.arp_targets[i]; i++) +		if (time_before(slave->target_last_arp_rx[i], ret)) +			ret = slave->target_last_arp_rx[i]; + +	return ret;  }  static inline unsigned long slave_last_rx(struct bonding *bond,  					struct slave *slave)  { -	if (slave_do_arp_validate(bond, slave)) -		return slave->last_arp_rx; +	if (bond->params.arp_all_targets == BOND_ARP_TARGETS_ALL) +		return slave_oldest_target_arp_rx(bond, slave); -	return slave->dev->last_rx; +	return slave->last_rx;  } -static inline void bond_set_slave_inactive_flags(struct slave *slave) +#ifdef CONFIG_NET_POLL_CONTROLLER +static inline void bond_netpoll_send_skb(const struct slave *slave, +					 struct sk_buff *skb)  { -	struct bonding *bond = netdev_priv(slave->dev->master); -	if (!bond_is_lb(bond)) -		slave->state = BOND_STATE_BACKUP; -	if (!bond->params.all_slaves_active) -		slave->dev->priv_flags |= IFF_SLAVE_INACTIVE; -	if (slave_do_arp_validate(bond, slave)) -		slave->dev->priv_flags |= IFF_SLAVE_NEEDARP; -} +	struct netpoll *np = slave->np; -static inline void bond_set_slave_active_flags(struct slave *slave) +	if (np) +		netpoll_send_skb(np, skb); +} +#else +static inline void bond_netpoll_send_skb(const struct slave *slave, +					 struct sk_buff *skb)  { -	slave->state = BOND_STATE_ACTIVE; -	slave->dev->priv_flags &= ~(IFF_SLAVE_INACTIVE | IFF_SLAVE_NEEDARP);  } +#endif -static inline void bond_set_master_3ad_flags(struct bonding *bond) +static inline void bond_set_slave_inactive_flags(struct slave *slave, +						 bool notify)  { -	bond->dev->priv_flags |= IFF_MASTER_8023AD; +	if (!bond_is_lb(slave->bond)) +		bond_set_slave_state(slave, BOND_STATE_BACKUP, notify); +	if (!slave->bond->params.all_slaves_active) +		slave->inactive = 1;  } -static inline void bond_unset_master_3ad_flags(struct bonding *bond) +static inline void bond_set_slave_active_flags(struct slave *slave, +					       bool notify)  { -	bond->dev->priv_flags &= ~IFF_MASTER_8023AD; +	bond_set_slave_state(slave, BOND_STATE_ACTIVE, notify); +	slave->inactive = 0;  } -static inline void bond_set_master_alb_flags(struct bonding *bond) +static inline bool bond_is_slave_inactive(struct slave *slave)  { -	bond->dev->priv_flags |= IFF_MASTER_ALB; +	return slave->inactive;  } -static inline void bond_unset_master_alb_flags(struct bonding *bond) +static inline __be32 bond_confirm_addr(struct net_device *dev, __be32 dst, __be32 local)  { -	bond->dev->priv_flags &= ~IFF_MASTER_ALB; +	struct in_device *in_dev; +	__be32 addr = 0; + +	rcu_read_lock(); +	in_dev = __in_dev_get_rcu(dev); + +	if (in_dev) +		addr = inet_confirm_addr(dev_net(dev), in_dev, dst, local, +					 RT_SCOPE_HOST); +	rcu_read_unlock(); +	return addr;  } -struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr); -int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev); +struct bond_net { +	struct net		*net;	/* Associated network namespace */ +	struct list_head	dev_list; +#ifdef CONFIG_PROC_FS +	struct proc_dir_entry	*proc_dir; +#endif +	struct class_attribute	class_attr_bonding_masters; +}; + +int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave); +void bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev);  int bond_create(struct net *net, const char *name); -int bond_create_sysfs(void); -void bond_destroy_sysfs(void); +int bond_create_sysfs(struct bond_net *net); +void bond_destroy_sysfs(struct bond_net *net);  void bond_prepare_sysfs_group(struct bonding *bond); -int bond_create_slave_symlinks(struct net_device *master, struct net_device *slave); -void bond_destroy_slave_symlinks(struct net_device *master, struct net_device *slave); +int bond_sysfs_slave_add(struct slave *slave); +void bond_sysfs_slave_del(struct slave *slave);  int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev);  int bond_release(struct net_device *bond_dev, struct net_device *slave_dev); -void bond_mii_monitor(struct work_struct *); -void bond_loadbalance_arp_mon(struct work_struct *); -void bond_activebackup_arp_mon(struct work_struct *); -void bond_set_mode_ops(struct bonding *bond, int mode); -int bond_parse_parm(const char *mode_arg, const struct bond_parm_tbl *tbl); +u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb);  void bond_select_active_slave(struct bonding *bond);  void bond_change_active_slave(struct bonding *bond, struct slave *new_active); -void bond_register_arp(struct bonding *); -void bond_unregister_arp(struct bonding *); +void bond_create_debugfs(void); +void bond_destroy_debugfs(void); +void bond_debug_register(struct bonding *bond); +void bond_debug_unregister(struct bonding *bond); +void bond_debug_reregister(struct bonding *bond); +const char *bond_mode_name(int mode); +void bond_setup(struct net_device *bond_dev); +unsigned int bond_get_num_tx_queues(void); +int bond_netlink_init(void); +void bond_netlink_fini(void); +struct net_device *bond_option_active_slave_get_rcu(struct bonding *bond); +struct net_device *bond_option_active_slave_get(struct bonding *bond); +const char *bond_slave_link_status(s8 link); +bool bond_verify_device_path(struct net_device *start_dev, +			     struct net_device *end_dev, +			     struct bond_vlan_tag *tags); -struct bond_net { -	struct net *		net;	/* Associated network namespace */ -	struct list_head	dev_list;  #ifdef CONFIG_PROC_FS -	struct proc_dir_entry *	proc_dir; +void bond_create_proc_entry(struct bonding *bond); +void bond_remove_proc_entry(struct bonding *bond); +void bond_create_proc_dir(struct bond_net *bn); +void bond_destroy_proc_dir(struct bond_net *bn); +#else +static inline void bond_create_proc_entry(struct bonding *bond) +{ +} + +static inline void bond_remove_proc_entry(struct bonding *bond) +{ +} + +static inline void bond_create_proc_dir(struct bond_net *bn) +{ +} + +static inline void bond_destroy_proc_dir(struct bond_net *bn) +{ +}  #endif -}; + +static inline struct slave *bond_slave_has_mac(struct bonding *bond, +					       const u8 *mac) +{ +	struct list_head *iter; +	struct slave *tmp; + +	bond_for_each_slave(bond, tmp, iter) +		if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr)) +			return tmp; + +	return NULL; +} + +/* Caller must hold rcu_read_lock() for read */ +static inline struct slave *bond_slave_has_mac_rcu(struct bonding *bond, +					       const u8 *mac) +{ +	struct list_head *iter; +	struct slave *tmp; + +	bond_for_each_slave_rcu(bond, tmp, iter) +		if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr)) +			return tmp; + +	return NULL; +} + +/* Caller must hold rcu_read_lock() for read */ +static inline bool bond_slave_has_mac_rx(struct bonding *bond, const u8 *mac) +{ +	struct list_head *iter; +	struct slave *tmp; +	struct netdev_hw_addr *ha; + +	bond_for_each_slave_rcu(bond, tmp, iter) +		if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr)) +			return true; + +	if (netdev_uc_empty(bond->dev)) +		return false; + +	netdev_for_each_uc_addr(ha, bond->dev) +		if (ether_addr_equal_64bits(mac, ha->addr)) +			return true; + +	return false; +} + +/* Check if the ip is present in arp ip list, or first free slot if ip == 0 + * Returns -1 if not found, index if found + */ +static inline int bond_get_targets_ip(__be32 *targets, __be32 ip) +{ +	int i; + +	for (i = 0; i < BOND_MAX_ARP_TARGETS; i++) +		if (targets[i] == ip) +			return i; +		else if (targets[i] == 0) +			break; + +	return -1; +}  /* exported from bond_main.c */  extern int bond_net_id;  extern const struct bond_parm_tbl bond_lacp_tbl[]; -extern const struct bond_parm_tbl bond_mode_tbl[];  extern const struct bond_parm_tbl xmit_hashtype_tbl[];  extern const struct bond_parm_tbl arp_validate_tbl[]; +extern const struct bond_parm_tbl arp_all_targets_tbl[];  extern const struct bond_parm_tbl fail_over_mac_tbl[];  extern const struct bond_parm_tbl pri_reselect_tbl[];  extern struct bond_parm_tbl ad_select_tbl[]; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -void bond_send_unsolicited_na(struct bonding *bond); -void bond_register_ipv6_notifier(void); -void bond_unregister_ipv6_notifier(void); -#else -static inline void bond_send_unsolicited_na(struct bonding *bond) -{ -	return; -} -static inline void bond_register_ipv6_notifier(void) -{ -	return; -} -static inline void bond_unregister_ipv6_notifier(void) -{ -	return; -} -#endif +/* exported from bond_netlink.c */ +extern struct rtnl_link_ops bond_link_ops;  #endif /* _LINUX_BONDING_H */  | 
