diff options
Diffstat (limited to 'drivers/net/bonding/bond_main.c')
| -rw-r--r-- | drivers/net/bonding/bond_main.c | 4570 | 
1 files changed, 1886 insertions, 2684 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 0273ad0b57b..701f86cd599 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -54,20 +54,16 @@  #include <linux/inet.h>  #include <linux/bitops.h>  #include <linux/io.h> -#include <asm/system.h>  #include <asm/dma.h>  #include <linux/uaccess.h>  #include <linux/errno.h>  #include <linux/netdevice.h> -#include <linux/netpoll.h>  #include <linux/inetdevice.h>  #include <linux/igmp.h>  #include <linux/etherdevice.h>  #include <linux/skbuff.h>  #include <net/sock.h>  #include <linux/rtnetlink.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h>  #include <linux/smp.h>  #include <linux/if_ether.h>  #include <net/arp.h> @@ -80,6 +76,9 @@  #include <net/route.h>  #include <net/net_namespace.h>  #include <net/netns/generic.h> +#include <net/pkt_sched.h> +#include <linux/rculist.h> +#include <net/flow_keys.h>  #include "bonding.h"  #include "bond_3ad.h"  #include "bond_alb.h" @@ -87,14 +86,11 @@  /*---------------------------- Module parameters ----------------------------*/  /* monitor all links that often (in milliseconds). <=0 disables monitoring */ -#define BOND_LINK_MON_INTERV	0 -#define BOND_LINK_ARP_INTERV	0  static int max_bonds	= BOND_DEFAULT_MAX_BONDS;  static int tx_queues	= BOND_DEFAULT_TX_QUEUES; -static int num_grat_arp = 1; -static int num_unsol_na = 1; -static int miimon	= BOND_LINK_MON_INTERV; +static int num_peer_notif = 1; +static int miimon;  static int updelay;  static int downdelay;  static int use_carrier	= 1; @@ -102,24 +98,30 @@ static char *mode;  static char *primary;  static char *primary_reselect;  static char *lacp_rate; +static int min_links;  static char *ad_select;  static char *xmit_hash_policy; -static int arp_interval = BOND_LINK_ARP_INTERV; +static int arp_interval;  static char *arp_ip_target[BOND_MAX_ARP_TARGETS];  static char *arp_validate; +static char *arp_all_targets;  static char *fail_over_mac; -static int all_slaves_active = 0; +static int all_slaves_active;  static struct bond_params bonding_defaults;  static int resend_igmp = BOND_DEFAULT_RESEND_IGMP; +static int packets_per_slave = 1; +static int lp_interval = BOND_ALB_DEFAULT_LP_INTERVAL;  module_param(max_bonds, int, 0);  MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");  module_param(tx_queues, int, 0);  MODULE_PARM_DESC(tx_queues, "Max number of transmit queues (default = 16)"); -module_param(num_grat_arp, int, 0644); -MODULE_PARM_DESC(num_grat_arp, "Number of gratuitous ARP packets to send on failover event"); -module_param(num_unsol_na, int, 0644); -MODULE_PARM_DESC(num_unsol_na, "Number of unsolicited IPv6 Neighbor Advertisements packets to send on failover event"); +module_param_named(num_grat_arp, num_peer_notif, int, 0644); +MODULE_PARM_DESC(num_grat_arp, "Number of peer notifications to send on " +			       "failover event (alias of num_unsol_na)"); +module_param_named(num_unsol_na, num_peer_notif, int, 0644); +MODULE_PARM_DESC(num_unsol_na, "Number of peer notifications to send on " +			       "failover event (alias of num_grat_arp)");  module_param(miimon, int, 0);  MODULE_PARM_DESC(miimon, "Link check interval in milliseconds");  module_param(updelay, int, 0); @@ -131,7 +133,7 @@ module_param(use_carrier, int, 0);  MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; "  			      "0 for off, 1 for on (default)");  module_param(mode, charp, 0); -MODULE_PARM_DESC(mode, "Mode of operation : 0 for balance-rr, " +MODULE_PARM_DESC(mode, "Mode of operation; 0 for balance-rr, "  		       "1 for active-backup, 2 for balance-xor, "  		       "3 for broadcast, 4 for 802.3ad, 5 for balance-tlb, "  		       "6 for balance-alb"); @@ -146,37 +148,56 @@ MODULE_PARM_DESC(primary_reselect, "Reselect primary slave "  				   "2 for only on active slave "  				   "failure");  module_param(lacp_rate, charp, 0); -MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner " -			    "(slow/fast)"); +MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner; " +			    "0 for slow, 1 for fast");  module_param(ad_select, charp, 0); -MODULE_PARM_DESC(ad_select, "803.ad aggregation selection logic: stable (0, default), bandwidth (1), count (2)"); +MODULE_PARM_DESC(ad_select, "803.ad aggregation selection logic; " +			    "0 for stable (default), 1 for bandwidth, " +			    "2 for count"); +module_param(min_links, int, 0); +MODULE_PARM_DESC(min_links, "Minimum number of available links before turning on carrier"); +  module_param(xmit_hash_policy, charp, 0); -MODULE_PARM_DESC(xmit_hash_policy, "XOR hashing method: 0 for layer 2 (default)" -				   ", 1 for layer 3+4"); +MODULE_PARM_DESC(xmit_hash_policy, "balance-xor and 802.3ad hashing method; " +				   "0 for layer 2 (default), 1 for layer 3+4, " +				   "2 for layer 2+3, 3 for encap layer 2+3, " +				   "4 for encap layer 3+4");  module_param(arp_interval, int, 0);  MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");  module_param_array(arp_ip_target, charp, NULL, 0);  MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form");  module_param(arp_validate, charp, 0); -MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all"); +MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes; " +			       "0 for none (default), 1 for active, " +			       "2 for backup, 3 for all"); +module_param(arp_all_targets, charp, 0); +MODULE_PARM_DESC(arp_all_targets, "fail on any/all arp targets timeout; 0 for any (default), 1 for all");  module_param(fail_over_mac, charp, 0); -MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to the same MAC.  none (default), active or follow"); +MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to " +				"the same MAC; 0 for none (default), " +				"1 for active, 2 for follow");  module_param(all_slaves_active, int, 0);  MODULE_PARM_DESC(all_slaves_active, "Keep all frames received on an interface" -				     "by setting active flag for all slaves.  " +				     "by setting active flag for all slaves; "  				     "0 for never (default), 1 for always.");  module_param(resend_igmp, int, 0); -MODULE_PARM_DESC(resend_igmp, "Number of IGMP membership reports to send on link failure"); +MODULE_PARM_DESC(resend_igmp, "Number of IGMP membership reports to send on " +			      "link failure"); +module_param(packets_per_slave, int, 0); +MODULE_PARM_DESC(packets_per_slave, "Packets to send per slave in balance-rr " +				    "mode; 0 for a random slave, 1 packet per " +				    "slave (default), >1 packets per slave."); +module_param(lp_interval, uint, 0); +MODULE_PARM_DESC(lp_interval, "The number of seconds between instances where " +			      "the bonding driver sends learning packets to " +			      "each slaves peer switch. The default is 1.");  /*----------------------------- Global variables ----------------------------*/  #ifdef CONFIG_NET_POLL_CONTROLLER -cpumask_var_t netpoll_block_tx; +atomic_t netpoll_block_tx = ATOMIC_INIT(0);  #endif -static const char * const version = -	DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n"; -  int bond_net_id __read_mostly;  static __be32 arp_target[BOND_MAX_ARP_TARGETS]; @@ -185,68 +206,14 @@ static int bond_mode	= BOND_MODE_ROUNDROBIN;  static int xmit_hashtype = BOND_XMIT_POLICY_LAYER2;  static int lacp_fast; -const struct bond_parm_tbl bond_lacp_tbl[] = { -{	"slow",		AD_LACP_SLOW}, -{	"fast",		AD_LACP_FAST}, -{	NULL,		-1}, -}; - -const struct bond_parm_tbl bond_mode_tbl[] = { -{	"balance-rr",		BOND_MODE_ROUNDROBIN}, -{	"active-backup",	BOND_MODE_ACTIVEBACKUP}, -{	"balance-xor",		BOND_MODE_XOR}, -{	"broadcast",		BOND_MODE_BROADCAST}, -{	"802.3ad",		BOND_MODE_8023AD}, -{	"balance-tlb",		BOND_MODE_TLB}, -{	"balance-alb",		BOND_MODE_ALB}, -{	NULL,			-1}, -}; - -const struct bond_parm_tbl xmit_hashtype_tbl[] = { -{	"layer2",		BOND_XMIT_POLICY_LAYER2}, -{	"layer3+4",		BOND_XMIT_POLICY_LAYER34}, -{	"layer2+3",		BOND_XMIT_POLICY_LAYER23}, -{	NULL,			-1}, -}; - -const struct bond_parm_tbl arp_validate_tbl[] = { -{	"none",			BOND_ARP_VALIDATE_NONE}, -{	"active",		BOND_ARP_VALIDATE_ACTIVE}, -{	"backup",		BOND_ARP_VALIDATE_BACKUP}, -{	"all",			BOND_ARP_VALIDATE_ALL}, -{	NULL,			-1}, -}; - -const struct bond_parm_tbl fail_over_mac_tbl[] = { -{	"none",			BOND_FOM_NONE}, -{	"active",		BOND_FOM_ACTIVE}, -{	"follow",		BOND_FOM_FOLLOW}, -{	NULL,			-1}, -}; - -const struct bond_parm_tbl pri_reselect_tbl[] = { -{	"always",		BOND_PRI_RESELECT_ALWAYS}, -{	"better",		BOND_PRI_RESELECT_BETTER}, -{	"failure",		BOND_PRI_RESELECT_FAILURE}, -{	NULL,			-1}, -}; - -struct bond_parm_tbl ad_select_tbl[] = { -{	"stable",	BOND_AD_STABLE}, -{	"bandwidth",	BOND_AD_BANDWIDTH}, -{	"count",	BOND_AD_COUNT}, -{	NULL,		-1}, -}; -  /*-------------------------- Forward declarations ---------------------------*/ -static void bond_send_gratuitous_arp(struct bonding *bond);  static int bond_init(struct net_device *bond_dev);  static void bond_uninit(struct net_device *bond_dev);  /*---------------------------- General routines -----------------------------*/ -static const char *bond_mode_name(int mode) +const char *bond_mode_name(int mode)  {  	static const char *names[] = {  		[BOND_MODE_ROUNDROBIN] = "load balancing (round-robin)", @@ -258,7 +225,7 @@ static const char *bond_mode_name(int mode)  		[BOND_MODE_ALB] = "adaptive load balancing",  	}; -	if (mode < 0 || mode > BOND_MODE_ALB) +	if (mode < BOND_MODE_ROUNDROBIN || mode > BOND_MODE_ALB)  		return "unknown";  	return names[mode]; @@ -267,206 +234,30 @@ static const char *bond_mode_name(int mode)  /*---------------------------------- VLAN -----------------------------------*/  /** - * bond_add_vlan - add a new vlan id on bond - * @bond: bond that got the notification - * @vlan_id: the vlan id to add - * - * Returns -ENOMEM if allocation failed. - */ -static int bond_add_vlan(struct bonding *bond, unsigned short vlan_id) -{ -	struct vlan_entry *vlan; - -	pr_debug("bond: %s, vlan id %d\n", -		 (bond ? bond->dev->name : "None"), vlan_id); - -	vlan = kzalloc(sizeof(struct vlan_entry), GFP_KERNEL); -	if (!vlan) -		return -ENOMEM; - -	INIT_LIST_HEAD(&vlan->vlan_list); -	vlan->vlan_id = vlan_id; - -	write_lock_bh(&bond->lock); - -	list_add_tail(&vlan->vlan_list, &bond->vlan_list); - -	write_unlock_bh(&bond->lock); - -	pr_debug("added VLAN ID %d on bond %s\n", vlan_id, bond->dev->name); - -	return 0; -} - -/** - * bond_del_vlan - delete a vlan id from bond - * @bond: bond that got the notification - * @vlan_id: the vlan id to delete - * - * returns -ENODEV if @vlan_id was not found in @bond. - */ -static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id) -{ -	struct vlan_entry *vlan; -	int res = -ENODEV; - -	pr_debug("bond: %s, vlan id %d\n", bond->dev->name, vlan_id); - -	block_netpoll_tx(); -	write_lock_bh(&bond->lock); - -	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { -		if (vlan->vlan_id == vlan_id) { -			list_del(&vlan->vlan_list); - -			if (bond_is_lb(bond)) -				bond_alb_clear_vlan(bond, vlan_id); - -			pr_debug("removed VLAN ID %d from bond %s\n", -				 vlan_id, bond->dev->name); - -			kfree(vlan); - -			if (list_empty(&bond->vlan_list) && -			    (bond->slave_cnt == 0)) { -				/* Last VLAN removed and no slaves, so -				 * restore block on adding VLANs. This will -				 * be removed once new slaves that are not -				 * VLAN challenged will be added. -				 */ -				bond->dev->features |= NETIF_F_VLAN_CHALLENGED; -			} - -			res = 0; -			goto out; -		} -	} - -	pr_debug("couldn't find VLAN ID %d in bond %s\n", -		 vlan_id, bond->dev->name); - -out: -	write_unlock_bh(&bond->lock); -	unblock_netpoll_tx(); -	return res; -} - -/** - * bond_has_challenged_slaves - * @bond: the bond we're working on - * - * Searches the slave list. Returns 1 if a vlan challenged slave - * was found, 0 otherwise. - * - * Assumes bond->lock is held. - */ -static int bond_has_challenged_slaves(struct bonding *bond) -{ -	struct slave *slave; -	int i; - -	bond_for_each_slave(bond, slave, i) { -		if (slave->dev->features & NETIF_F_VLAN_CHALLENGED) { -			pr_debug("found VLAN challenged slave - %s\n", -				 slave->dev->name); -			return 1; -		} -	} - -	pr_debug("no VLAN challenged slaves found\n"); -	return 0; -} - -/** - * bond_next_vlan - safely skip to the next item in the vlans list. - * @bond: the bond we're working on - * @curr: item we're advancing from - * - * Returns %NULL if list is empty, bond->next_vlan if @curr is %NULL, - * or @curr->next otherwise (even if it is @curr itself again). - * - * Caller must hold bond->lock - */ -struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr) -{ -	struct vlan_entry *next, *last; - -	if (list_empty(&bond->vlan_list)) -		return NULL; - -	if (!curr) { -		next = list_entry(bond->vlan_list.next, -				  struct vlan_entry, vlan_list); -	} else { -		last = list_entry(bond->vlan_list.prev, -				  struct vlan_entry, vlan_list); -		if (last == curr) { -			next = list_entry(bond->vlan_list.next, -					  struct vlan_entry, vlan_list); -		} else { -			next = list_entry(curr->vlan_list.next, -					  struct vlan_entry, vlan_list); -		} -	} - -	return next; -} - -/**   * bond_dev_queue_xmit - Prepare skb for xmit.   *   * @bond: bond device that got this skb for tx.   * @skb: hw accel VLAN tagged skb to transmit   * @slave_dev: slave that is supposed to xmit this skbuff - * - * When the bond gets an skb to transmit that is - * already hardware accelerated VLAN tagged, and it - * needs to relay this skb to a slave that is not - * hw accel capable, the skb needs to be "unaccelerated", - * i.e. strip the hwaccel tag and re-insert it as part - * of the payload.   */ -int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, +void bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,  			struct net_device *slave_dev)  { -	unsigned short uninitialized_var(vlan_id); - -	/* Test vlan_list not vlgrp to catch and handle 802.1p tags */ -	if (!list_empty(&bond->vlan_list) && -	    !(slave_dev->features & NETIF_F_HW_VLAN_TX) && -	    vlan_get_tag(skb, &vlan_id) == 0) { -		skb->dev = slave_dev; -		skb = vlan_put_tag(skb, vlan_id); -		if (!skb) { -			/* vlan_put_tag() frees the skb in case of error, -			 * so return success here so the calling functions -			 * won't attempt to free is again. -			 */ -			return 0; -		} -	} else { -		skb->dev = slave_dev; -	} +	skb->dev = slave_dev; -	skb->priority = 1; -#ifdef CONFIG_NET_POLL_CONTROLLER -	if (unlikely(bond->dev->priv_flags & IFF_IN_NETPOLL)) { -		struct netpoll *np = bond->dev->npinfo->netpoll; -		slave_dev->npinfo = bond->dev->npinfo; -		slave_dev->priv_flags |= IFF_IN_NETPOLL; -		netpoll_send_skb_on_dev(np, skb, slave_dev); -		slave_dev->priv_flags &= ~IFF_IN_NETPOLL; -	} else -#endif -		dev_queue_xmit(skb); +	BUILD_BUG_ON(sizeof(skb->queue_mapping) != +		     sizeof(qdisc_skb_cb(skb)->slave_dev_queue_mapping)); +	skb->queue_mapping = qdisc_skb_cb(skb)->slave_dev_queue_mapping; -	return 0; +	if (unlikely(netpoll_tx_running(bond->dev))) +		bond_netpoll_send_skb(bond_get_slave_by_dev(bond, slave_dev), skb); +	else +		dev_queue_xmit(skb);  }  /* - * In the following 3 functions, bond_vlan_rx_register(), bond_vlan_rx_add_vid - * and bond_vlan_rx_kill_vid, We don't protect the slave list iteration with a - * lock because: + * In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid, + * We don't protect the slave list iteration with a lock because:   * a. This operation is performed in IOCTL context,   * b. The operation is protected by the RTNL semaphore in the 8021q code,   * c. Holding a lock with BH disabled while directly calling a base driver @@ -482,58 +273,36 @@ int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,  */  /** - * bond_vlan_rx_register - Propagates registration to slaves + * bond_vlan_rx_add_vid - Propagates adding an id to slaves   * @bond_dev: bonding net device that got called - * @grp: vlan group being registered + * @vid: vlan id being added   */ -static void bond_vlan_rx_register(struct net_device *bond_dev, -				  struct vlan_group *grp) +static int bond_vlan_rx_add_vid(struct net_device *bond_dev, +				__be16 proto, u16 vid)  {  	struct bonding *bond = netdev_priv(bond_dev); -	struct slave *slave; -	int i; - -	write_lock_bh(&bond->lock); -	bond->vlgrp = grp; -	write_unlock_bh(&bond->lock); - -	bond_for_each_slave(bond, slave, i) { -		struct net_device *slave_dev = slave->dev; -		const struct net_device_ops *slave_ops = slave_dev->netdev_ops; +	struct slave *slave, *rollback_slave; +	struct list_head *iter; +	int res; -		if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && -		    slave_ops->ndo_vlan_rx_register) { -			slave_ops->ndo_vlan_rx_register(slave_dev, grp); -		} +	bond_for_each_slave(bond, slave, iter) { +		res = vlan_vid_add(slave->dev, proto, vid); +		if (res) +			goto unwind;  	} -} -/** - * bond_vlan_rx_add_vid - Propagates adding an id to slaves - * @bond_dev: bonding net device that got called - * @vid: vlan id being added - */ -static void bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid) -{ -	struct bonding *bond = netdev_priv(bond_dev); -	struct slave *slave; -	int i, res; +	return 0; -	bond_for_each_slave(bond, slave, i) { -		struct net_device *slave_dev = slave->dev; -		const struct net_device_ops *slave_ops = slave_dev->netdev_ops; +unwind: +	/* unwind to the slave that failed */ +	bond_for_each_slave(bond, rollback_slave, iter) { +		if (rollback_slave == slave) +			break; -		if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) && -		    slave_ops->ndo_vlan_rx_add_vid) { -			slave_ops->ndo_vlan_rx_add_vid(slave_dev, vid); -		} +		vlan_vid_del(rollback_slave->dev, proto, vid);  	} -	res = bond_add_vlan(bond, vid); -	if (res) { -		pr_err("%s: Error: Failed to add vlan id %d\n", -		       bond_dev->name, vid); -	} +	return res;  }  /** @@ -541,84 +310,20 @@ static void bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid)   * @bond_dev: bonding net device that got called   * @vid: vlan id being removed   */ -static void bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid) +static int bond_vlan_rx_kill_vid(struct net_device *bond_dev, +				 __be16 proto, u16 vid)  {  	struct bonding *bond = netdev_priv(bond_dev); +	struct list_head *iter;  	struct slave *slave; -	struct net_device *vlan_dev; -	int i, res; -	bond_for_each_slave(bond, slave, i) { -		struct net_device *slave_dev = slave->dev; -		const struct net_device_ops *slave_ops = slave_dev->netdev_ops; +	bond_for_each_slave(bond, slave, iter) +		vlan_vid_del(slave->dev, proto, vid); -		if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) && -		    slave_ops->ndo_vlan_rx_kill_vid) { -			/* Save and then restore vlan_dev in the grp array, -			 * since the slave's driver might clear it. -			 */ -			vlan_dev = vlan_group_get_device(bond->vlgrp, vid); -			slave_ops->ndo_vlan_rx_kill_vid(slave_dev, vid); -			vlan_group_set_device(bond->vlgrp, vid, vlan_dev); -		} -	} - -	res = bond_del_vlan(bond, vid); -	if (res) { -		pr_err("%s: Error: Failed to remove vlan id %d\n", -		       bond_dev->name, vid); -	} -} - -static void bond_add_vlans_on_slave(struct bonding *bond, struct net_device *slave_dev) -{ -	struct vlan_entry *vlan; -	const struct net_device_ops *slave_ops = slave_dev->netdev_ops; - -	if (!bond->vlgrp) -		return; - -	if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && -	    slave_ops->ndo_vlan_rx_register) -		slave_ops->ndo_vlan_rx_register(slave_dev, bond->vlgrp); - -	if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) || -	    !(slave_ops->ndo_vlan_rx_add_vid)) -		return; - -	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) -		slave_ops->ndo_vlan_rx_add_vid(slave_dev, vlan->vlan_id); -} - -static void bond_del_vlans_from_slave(struct bonding *bond, -				      struct net_device *slave_dev) -{ -	const struct net_device_ops *slave_ops = slave_dev->netdev_ops; -	struct vlan_entry *vlan; -	struct net_device *vlan_dev; - -	if (!bond->vlgrp) -		return; - -	if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) || -	    !(slave_ops->ndo_vlan_rx_kill_vid)) -		goto unreg; +	if (bond_is_lb(bond)) +		bond_alb_clear_vlan(bond, vid); -	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { -		if (!vlan->vlan_id) -			continue; -		/* Save and then restore vlan_dev in the grp array, -		 * since the slave's driver might clear it. -		 */ -		vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); -		slave_ops->ndo_vlan_rx_kill_vid(slave_dev, vlan->vlan_id); -		vlan_group_set_device(bond->vlgrp, vlan->vlan_id, vlan_dev); -	} - -unreg: -	if ((slave_dev->features & NETIF_F_HW_VLAN_RX) && -	    slave_ops->ndo_vlan_rx_register) -		slave_ops->ndo_vlan_rx_register(slave_dev, NULL); +	return 0;  }  /*------------------------------- Link status -------------------------------*/ @@ -632,16 +337,16 @@ unreg:   */  static int bond_set_carrier(struct bonding *bond)  { +	struct list_head *iter;  	struct slave *slave; -	int i; -	if (bond->slave_cnt == 0) +	if (!bond_has_slaves(bond))  		goto down; -	if (bond->params.mode == BOND_MODE_8023AD) +	if (BOND_MODE(bond) == BOND_MODE_8023AD)  		return bond_3ad_set_carrier(bond); -	bond_for_each_slave(bond, slave, i) { +	bond_for_each_slave(bond, slave, iter) {  		if (slave->link == BOND_LINK_UP) {  			if (!netif_carrier_ok(bond->dev)) {  				netif_carrier_on(bond->dev); @@ -662,48 +367,55 @@ down:  /*   * Get link speed and duplex from the slave's base driver   * using ethtool. If for some reason the call fails or the - * values are invalid, fake speed and duplex to 100/Full - * and return error. + * values are invalid, set speed and duplex to -1, + * and return.   */ -static int bond_update_speed_duplex(struct slave *slave) +static void bond_update_speed_duplex(struct slave *slave)  {  	struct net_device *slave_dev = slave->dev; -	struct ethtool_cmd etool; +	struct ethtool_cmd ecmd; +	u32 slave_speed;  	int res; -	/* Fake speed and duplex */ -	slave->speed = SPEED_100; -	slave->duplex = DUPLEX_FULL; +	slave->speed = SPEED_UNKNOWN; +	slave->duplex = DUPLEX_UNKNOWN; -	if (!slave_dev->ethtool_ops || !slave_dev->ethtool_ops->get_settings) -		return -1; - -	res = slave_dev->ethtool_ops->get_settings(slave_dev, &etool); +	res = __ethtool_get_settings(slave_dev, &ecmd);  	if (res < 0) -		return -1; +		return; -	switch (etool.speed) { -	case SPEED_10: -	case SPEED_100: -	case SPEED_1000: -	case SPEED_10000: -		break; -	default: -		return -1; -	} +	slave_speed = ethtool_cmd_speed(&ecmd); +	if (slave_speed == 0 || slave_speed == ((__u32) -1)) +		return; -	switch (etool.duplex) { +	switch (ecmd.duplex) {  	case DUPLEX_FULL:  	case DUPLEX_HALF:  		break;  	default: -		return -1; +		return;  	} -	slave->speed = etool.speed; -	slave->duplex = etool.duplex; +	slave->speed = slave_speed; +	slave->duplex = ecmd.duplex; -	return 0; +	return; +} + +const char *bond_slave_link_status(s8 link) +{ +	switch (link) { +	case BOND_LINK_UP: +		return "up"; +	case BOND_LINK_FAIL: +		return "going down"; +	case BOND_LINK_DOWN: +		return "down"; +	case BOND_LINK_BACK: +		return "going back"; +	default: +		return "unknown"; +	}  }  /* @@ -737,15 +449,9 @@ static int bond_check_dev_link(struct bonding *bond,  		return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0;  	/* Try to get link status using Ethtool first. */ -	if (slave_dev->ethtool_ops) { -		if (slave_dev->ethtool_ops->get_link) { -			u32 link; - -			link = slave_dev->ethtool_ops->get_link(slave_dev); - -			return link ? BMSR_LSTATUS : 0; -		} -	} +	if (slave_dev->ethtool_ops->get_link) +		return slave_dev->ethtool_ops->get_link(slave_dev) ? +			BMSR_LSTATUS : 0;  	/* Ethtool can't be used, fallback to MII ioctls. */  	ioctl = slave_ops->ndo_do_ioctl; @@ -788,8 +494,10 @@ static int bond_check_dev_link(struct bonding *bond,   */  static int bond_set_promiscuity(struct bonding *bond, int inc)  { +	struct list_head *iter;  	int err = 0; -	if (USES_PRIMARY(bond->params.mode)) { + +	if (bond_uses_primary(bond)) {  		/* write lock already acquired */  		if (bond->curr_active_slave) {  			err = dev_set_promiscuity(bond->curr_active_slave->dev, @@ -797,8 +505,8 @@ static int bond_set_promiscuity(struct bonding *bond, int inc)  		}  	} else {  		struct slave *slave; -		int i; -		bond_for_each_slave(bond, slave, i) { + +		bond_for_each_slave(bond, slave, iter) {  			err = dev_set_promiscuity(slave->dev, inc);  			if (err)  				return err; @@ -812,8 +520,10 @@ static int bond_set_promiscuity(struct bonding *bond, int inc)   */  static int bond_set_allmulti(struct bonding *bond, int inc)  { +	struct list_head *iter;  	int err = 0; -	if (USES_PRIMARY(bond->params.mode)) { + +	if (bond_uses_primary(bond)) {  		/* write lock already acquired */  		if (bond->curr_active_slave) {  			err = dev_set_allmulti(bond->curr_active_slave->dev, @@ -821,8 +531,8 @@ static int bond_set_allmulti(struct bonding *bond, int inc)  		}  	} else {  		struct slave *slave; -		int i; -		bond_for_each_slave(bond, slave, i) { + +		bond_for_each_slave(bond, slave, iter) {  			err = dev_set_allmulti(slave->dev, inc);  			if (err)  				return err; @@ -832,106 +542,39 @@ static int bond_set_allmulti(struct bonding *bond, int inc)  }  /* - * Add a Multicast address to slaves - * according to mode - */ -static void bond_mc_add(struct bonding *bond, void *addr) -{ -	if (USES_PRIMARY(bond->params.mode)) { -		/* write lock already acquired */ -		if (bond->curr_active_slave) -			dev_mc_add(bond->curr_active_slave->dev, addr); -	} else { -		struct slave *slave; -		int i; - -		bond_for_each_slave(bond, slave, i) -			dev_mc_add(slave->dev, addr); -	} -} - -/* - * Remove a multicast address from slave - * according to mode - */ -static void bond_mc_del(struct bonding *bond, void *addr) -{ -	if (USES_PRIMARY(bond->params.mode)) { -		/* write lock already acquired */ -		if (bond->curr_active_slave) -			dev_mc_del(bond->curr_active_slave->dev, addr); -	} else { -		struct slave *slave; -		int i; -		bond_for_each_slave(bond, slave, i) { -			dev_mc_del(slave->dev, addr); -		} -	} -} - - -static void __bond_resend_igmp_join_requests(struct net_device *dev) -{ -	struct in_device *in_dev; - -	rcu_read_lock(); -	in_dev = __in_dev_get_rcu(dev); -	if (in_dev) -		ip_mc_rejoin_groups(in_dev); -	rcu_read_unlock(); -} - -/*   * Retrieve the list of registered multicast addresses for the bonding   * device and retransmit an IGMP JOIN request to the current active   * slave.   */ -static void bond_resend_igmp_join_requests(struct bonding *bond) +static void bond_resend_igmp_join_requests_delayed(struct work_struct *work)  { -	struct net_device *vlan_dev; -	struct vlan_entry *vlan; - -	read_lock(&bond->lock); - -	/* rejoin all groups on bond device */ -	__bond_resend_igmp_join_requests(bond->dev); +	struct bonding *bond = container_of(work, struct bonding, +					    mcast_work.work); -	/* rejoin all groups on vlan devices */ -	if (bond->vlgrp) { -		list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { -			vlan_dev = vlan_group_get_device(bond->vlgrp, -							 vlan->vlan_id); -			if (vlan_dev) -				__bond_resend_igmp_join_requests(vlan_dev); -		} +	if (!rtnl_trylock()) { +		queue_delayed_work(bond->wq, &bond->mcast_work, 1); +		return;  	} +	call_netdevice_notifiers(NETDEV_RESEND_IGMP, bond->dev); -	if (--bond->igmp_retrans > 0) +	if (bond->igmp_retrans > 1) { +		bond->igmp_retrans--;  		queue_delayed_work(bond->wq, &bond->mcast_work, HZ/5); - -	read_unlock(&bond->lock); -} - -static void bond_resend_igmp_join_requests_delayed(struct work_struct *work) -{ -	struct bonding *bond = container_of(work, struct bonding, -							mcast_work.work); -	bond_resend_igmp_join_requests(bond); +	} +	rtnl_unlock();  } -/* - * flush all members of flush->mc_list from device dev->mc_list +/* Flush bond's hardware addresses from slave   */ -static void bond_mc_list_flush(struct net_device *bond_dev, +static void bond_hw_addr_flush(struct net_device *bond_dev,  			       struct net_device *slave_dev)  {  	struct bonding *bond = netdev_priv(bond_dev); -	struct netdev_hw_addr *ha; -	netdev_for_each_mc_addr(ha, bond_dev) -		dev_mc_del(slave_dev, ha->addr); +	dev_uc_unsync(slave_dev, bond_dev); +	dev_mc_unsync(slave_dev, bond_dev); -	if (bond->params.mode == BOND_MODE_8023AD) { +	if (BOND_MODE(bond) == BOND_MODE_8023AD) {  		/* del lacpdu mc addr from mc list */  		u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; @@ -941,21 +584,15 @@ static void bond_mc_list_flush(struct net_device *bond_dev,  /*--------------------------- Active slave change ---------------------------*/ -/* - * Update the mc list and multicast-related flags for the new and - * old active slaves (if any) according to the multicast mode, and - * promiscuous flags unconditionally. +/* Update the hardware address list and promisc/allmulti for the new and + * old active slaves (if any).  Modes that are not using primary keep all + * slaves up date at all times; only the modes that use primary need to call + * this function to swap these settings during a failover.   */ -static void bond_mc_swap(struct bonding *bond, struct slave *new_active, -			 struct slave *old_active) +static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active, +			      struct slave *old_active)  { -	struct netdev_hw_addr *ha; - -	if (!USES_PRIMARY(bond->params.mode)) -		/* nothing to do -  mc list is already up-to-date on -		 * all slaves -		 */ -		return; +	ASSERT_RTNL();  	if (old_active) {  		if (bond->dev->flags & IFF_PROMISC) @@ -964,8 +601,7 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active,  		if (bond->dev->flags & IFF_ALLMULTI)  			dev_set_allmulti(old_active->dev, -1); -		netdev_for_each_mc_addr(ha, bond->dev) -			dev_mc_del(old_active->dev, ha->addr); +		bond_hw_addr_flush(bond->dev, old_active->dev);  	}  	if (new_active) { @@ -976,24 +612,41 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active,  		if (bond->dev->flags & IFF_ALLMULTI)  			dev_set_allmulti(new_active->dev, 1); -		netdev_for_each_mc_addr(ha, bond->dev) -			dev_mc_add(new_active->dev, ha->addr); +		netif_addr_lock_bh(bond->dev); +		dev_uc_sync(new_active->dev, bond->dev); +		dev_mc_sync(new_active->dev, bond->dev); +		netif_addr_unlock_bh(bond->dev);  	}  } +/** + * bond_set_dev_addr - clone slave's address to bond + * @bond_dev: bond net device + * @slave_dev: slave net device + * + * Should be called with RTNL held. + */ +static void bond_set_dev_addr(struct net_device *bond_dev, +			      struct net_device *slave_dev) +{ +	pr_debug("bond_dev=%p slave_dev=%p slave_dev->addr_len=%d\n", +		 bond_dev, slave_dev, slave_dev->addr_len); +	memcpy(bond_dev->dev_addr, slave_dev->dev_addr, slave_dev->addr_len); +	bond_dev->addr_assign_type = NET_ADDR_STOLEN; +	call_netdevice_notifiers(NETDEV_CHANGEADDR, bond_dev); +} +  /*   * bond_do_fail_over_mac   *   * Perform special MAC address swapping for fail_over_mac settings   * - * Called with RTNL, bond->lock for read, curr_slave_lock for write_bh. + * Called with RTNL, curr_slave_lock for write_bh.   */  static void bond_do_fail_over_mac(struct bonding *bond,  				  struct slave *new_active,  				  struct slave *old_active)  	__releases(&bond->curr_slave_lock) -	__releases(&bond->lock) -	__acquires(&bond->lock)  	__acquires(&bond->curr_slave_lock)  {  	u8 tmp_mac[ETH_ALEN]; @@ -1002,9 +655,11 @@ static void bond_do_fail_over_mac(struct bonding *bond,  	switch (bond->params.fail_over_mac) {  	case BOND_FOM_ACTIVE: -		if (new_active) -			memcpy(bond->dev->dev_addr,  new_active->dev->dev_addr, -			       new_active->dev->addr_len); +		if (new_active) { +			write_unlock_bh(&bond->curr_slave_lock); +			bond_set_dev_addr(bond->dev, new_active->dev); +			write_lock_bh(&bond->curr_slave_lock); +		}  		break;  	case BOND_FOM_FOLLOW:  		/* @@ -1016,15 +671,14 @@ static void bond_do_fail_over_mac(struct bonding *bond,  			return;  		write_unlock_bh(&bond->curr_slave_lock); -		read_unlock(&bond->lock);  		if (old_active) { -			memcpy(tmp_mac, new_active->dev->dev_addr, ETH_ALEN); -			memcpy(saddr.sa_data, old_active->dev->dev_addr, -			       ETH_ALEN); +			ether_addr_copy(tmp_mac, new_active->dev->dev_addr); +			ether_addr_copy(saddr.sa_data, +					old_active->dev->dev_addr);  			saddr.sa_family = new_active->dev->type;  		} else { -			memcpy(saddr.sa_data, bond->dev->dev_addr, ETH_ALEN); +			ether_addr_copy(saddr.sa_data, bond->dev->dev_addr);  			saddr.sa_family = bond->dev->type;  		} @@ -1038,7 +692,7 @@ static void bond_do_fail_over_mac(struct bonding *bond,  		if (!old_active)  			goto out; -		memcpy(saddr.sa_data, tmp_mac, ETH_ALEN); +		ether_addr_copy(saddr.sa_data, tmp_mac);  		saddr.sa_family = old_active->dev->type;  		rv = dev_set_mac_address(old_active->dev, &saddr); @@ -1046,7 +700,6 @@ static void bond_do_fail_over_mac(struct bonding *bond,  			pr_err("%s: Error %d setting MAC of slave %s\n",  			       bond->dev->name, -rv, new_active->dev->name);  out: -		read_lock(&bond->lock);  		write_lock_bh(&bond->curr_slave_lock);  		break;  	default: @@ -1080,48 +733,46 @@ static bool bond_should_change_active(struct bonding *bond)  /**   * find_best_interface - select the best available slave to be the active one   * @bond: our bonding struct - * - * Warning: Caller must hold curr_slave_lock for writing.   */  static struct slave *bond_find_best_slave(struct bonding *bond)  { -	struct slave *new_active, *old_active; -	struct slave *bestslave = NULL; +	struct slave *slave, *bestslave = NULL; +	struct list_head *iter;  	int mintime = bond->params.updelay; -	int i; -	new_active = bond->curr_active_slave; +	if (bond->primary_slave && bond->primary_slave->link == BOND_LINK_UP && +	    bond_should_change_active(bond)) +		return bond->primary_slave; -	if (!new_active) { /* there were no active slaves left */ -		if (bond->slave_cnt > 0)   /* found one slave */ -			new_active = bond->first_slave; -		else -			return NULL; /* still no slave, return NULL */ +	bond_for_each_slave(bond, slave, iter) { +		if (slave->link == BOND_LINK_UP) +			return slave; +		if (slave->link == BOND_LINK_BACK && bond_slave_is_up(slave) && +		    slave->delay < mintime) { +			mintime = slave->delay; +			bestslave = slave; +		}  	} -	if ((bond->primary_slave) && -	    bond->primary_slave->link == BOND_LINK_UP && -	    bond_should_change_active(bond)) { -		new_active = bond->primary_slave; -	} +	return bestslave; +} -	/* remember where to stop iterating over the slaves */ -	old_active = new_active; +static bool bond_should_notify_peers(struct bonding *bond) +{ +	struct slave *slave; -	bond_for_each_slave_from(bond, new_active, i, old_active) { -		if (new_active->link == BOND_LINK_UP) { -			return new_active; -		} else if (new_active->link == BOND_LINK_BACK && -			   IS_UP(new_active->dev)) { -			/* link up, but waiting for stabilization */ -			if (new_active->delay < mintime) { -				mintime = new_active->delay; -				bestslave = new_active; -			} -		} -	} +	rcu_read_lock(); +	slave = rcu_dereference(bond->curr_active_slave); +	rcu_read_unlock(); -	return bestslave; +	pr_debug("bond_should_notify_peers: bond %s slave %s\n", +		 bond->dev->name, slave ? slave->dev->name : "NULL"); + +	if (!slave || !bond->send_peer_notif || +	    test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state)) +		return false; + +	return true;  }  /** @@ -1137,8 +788,7 @@ static struct slave *bond_find_best_slave(struct bonding *bond)   * because it is apparently the best available slave we have, even though its   * updelay hasn't timed out yet.   * - * If new_active is not NULL, caller must hold bond->lock for read and - * curr_slave_lock for write_bh. + * If new_active is not NULL, caller must hold curr_slave_lock for write_bh.   */  void bond_change_active_slave(struct bonding *bond, struct slave *new_active)  { @@ -1148,11 +798,11 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)  		return;  	if (new_active) { -		new_active->jiffies = jiffies; +		new_active->last_link_up = jiffies;  		if (new_active->link == BOND_LINK_BACK) { -			if (USES_PRIMARY(bond->params.mode)) { -				pr_info("%s: making interface %s the new active one %d ms earlier.\n", +			if (bond_uses_primary(bond)) { +				pr_info("%s: making interface %s the new active one %d ms earlier\n",  					bond->dev->name, new_active->dev->name,  					(bond->params.updelay - new_active->delay) * bond->params.miimon);  			} @@ -1160,65 +810,76 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)  			new_active->delay = 0;  			new_active->link = BOND_LINK_UP; -			if (bond->params.mode == BOND_MODE_8023AD) +			if (BOND_MODE(bond) == BOND_MODE_8023AD)  				bond_3ad_handle_link_change(new_active, BOND_LINK_UP);  			if (bond_is_lb(bond))  				bond_alb_handle_link_change(bond, new_active, BOND_LINK_UP);  		} else { -			if (USES_PRIMARY(bond->params.mode)) { -				pr_info("%s: making interface %s the new active one.\n", +			if (bond_uses_primary(bond)) { +				pr_info("%s: making interface %s the new active one\n",  					bond->dev->name, new_active->dev->name);  			}  		}  	} -	if (USES_PRIMARY(bond->params.mode)) -		bond_mc_swap(bond, new_active, old_active); +	if (bond_uses_primary(bond)) +		bond_hw_addr_swap(bond, new_active, old_active);  	if (bond_is_lb(bond)) {  		bond_alb_handle_active_change(bond, new_active);  		if (old_active) -			bond_set_slave_inactive_flags(old_active); +			bond_set_slave_inactive_flags(old_active, +						      BOND_SLAVE_NOTIFY_NOW);  		if (new_active) -			bond_set_slave_active_flags(new_active); +			bond_set_slave_active_flags(new_active, +						    BOND_SLAVE_NOTIFY_NOW);  	} else { -		bond->curr_active_slave = new_active; +		rcu_assign_pointer(bond->curr_active_slave, new_active);  	} -	if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) { +	if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) {  		if (old_active) -			bond_set_slave_inactive_flags(old_active); +			bond_set_slave_inactive_flags(old_active, +						      BOND_SLAVE_NOTIFY_NOW);  		if (new_active) { -			bond_set_slave_active_flags(new_active); +			bool should_notify_peers = false; + +			bond_set_slave_active_flags(new_active, +						    BOND_SLAVE_NOTIFY_NOW);  			if (bond->params.fail_over_mac)  				bond_do_fail_over_mac(bond, new_active,  						      old_active); -			bond->send_grat_arp = bond->params.num_grat_arp; -			bond_send_gratuitous_arp(bond); - -			bond->send_unsol_na = bond->params.num_unsol_na; -			bond_send_unsolicited_na(bond); +			if (netif_running(bond->dev)) { +				bond->send_peer_notif = +					bond->params.num_peer_notif; +				should_notify_peers = +					bond_should_notify_peers(bond); +			}  			write_unlock_bh(&bond->curr_slave_lock); -			read_unlock(&bond->lock); -			netdev_bonding_change(bond->dev, NETDEV_BONDING_FAILOVER); +			call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev); +			if (should_notify_peers) +				call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, +							 bond->dev); -			read_lock(&bond->lock);  			write_lock_bh(&bond->curr_slave_lock);  		}  	}  	/* resend IGMP joins since active slave has changed or -	 * all were sent on curr_active_slave */ -	if ((USES_PRIMARY(bond->params.mode) && new_active) || -	    bond->params.mode == BOND_MODE_ROUNDROBIN) { +	 * all were sent on curr_active_slave. +	 * resend only if bond is brought up with the affected +	 * bonding modes and the retransmission is enabled */ +	if (netif_running(bond->dev) && (bond->params.resend_igmp > 0) && +	    ((bond_uses_primary(bond) && new_active) || +	     BOND_MODE(bond) == BOND_MODE_ROUNDROBIN)) {  		bond->igmp_retrans = bond->params.resend_igmp; -		queue_delayed_work(bond->wq, &bond->mcast_work, 0); +		queue_delayed_work(bond->wq, &bond->mcast_work, 1);  	}  } @@ -1231,7 +892,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)   * - The primary_slave has got its link back.   * - A slave has got its link back and there's no old curr_active_slave.   * - * Caller must hold bond->lock for read and curr_slave_lock for write_bh. + * Caller must hold curr_slave_lock for write_bh.   */  void bond_select_active_slave(struct bonding *bond)  { @@ -1249,191 +910,171 @@ void bond_select_active_slave(struct bonding *bond)  			pr_info("%s: first active interface up!\n",  				bond->dev->name);  		} else { -			pr_info("%s: now running without any active interface !\n", +			pr_info("%s: now running without any active interface!\n",  				bond->dev->name);  		}  	}  } -/*--------------------------- slave list handling ---------------------------*/ - -/* - * This function attaches the slave to the end of list. - * - * bond->lock held for writing by caller. - */ -static void bond_attach_slave(struct bonding *bond, struct slave *new_slave) +#ifdef CONFIG_NET_POLL_CONTROLLER +static inline int slave_enable_netpoll(struct slave *slave)  { -	if (bond->first_slave == NULL) { /* attaching the first slave */ -		new_slave->next = new_slave; -		new_slave->prev = new_slave; -		bond->first_slave = new_slave; -	} else { -		new_slave->next = bond->first_slave; -		new_slave->prev = bond->first_slave->prev; -		new_slave->next->prev = new_slave; -		new_slave->prev->next = new_slave; -	} +	struct netpoll *np; +	int err = 0; -	bond->slave_cnt++; -} +	np = kzalloc(sizeof(*np), GFP_KERNEL); +	err = -ENOMEM; +	if (!np) +		goto out; -/* - * This function detaches the slave from the list. - * WARNING: no check is made to verify if the slave effectively - * belongs to <bond>. - * Nothing is freed on return, structures are just unchained. - * If any slave pointer in bond was pointing to <slave>, - * it should be changed by the calling function. - * - * bond->lock held for writing by caller. - */ -static void bond_detach_slave(struct bonding *bond, struct slave *slave) +	err = __netpoll_setup(np, slave->dev); +	if (err) { +		kfree(np); +		goto out; +	} +	slave->np = np; +out: +	return err; +} +static inline void slave_disable_netpoll(struct slave *slave)  { -	if (slave->next) -		slave->next->prev = slave->prev; +	struct netpoll *np = slave->np; -	if (slave->prev) -		slave->prev->next = slave->next; - -	if (bond->first_slave == slave) { /* slave is the first slave */ -		if (bond->slave_cnt > 1) { /* there are more slave */ -			bond->first_slave = slave->next; -		} else { -			bond->first_slave = NULL; /* slave was the last one */ -		} -	} +	if (!np) +		return; -	slave->next = NULL; -	slave->prev = NULL; -	bond->slave_cnt--; +	slave->np = NULL; +	__netpoll_free_async(np);  } -#ifdef CONFIG_NET_POLL_CONTROLLER -/* - * You must hold read lock on bond->lock before calling this. - */ -static bool slaves_support_netpoll(struct net_device *bond_dev) +static void bond_poll_controller(struct net_device *bond_dev)  { -	struct bonding *bond = netdev_priv(bond_dev); -	struct slave *slave; -	int i = 0; -	bool ret = true; - -	bond_for_each_slave(bond, slave, i) { -		if ((slave->dev->priv_flags & IFF_DISABLE_NETPOLL) || -		    !slave->dev->netdev_ops->ndo_poll_controller) -			ret = false; -	} -	return i != 0 && ret;  } -static void bond_poll_controller(struct net_device *bond_dev) +static void bond_netpoll_cleanup(struct net_device *bond_dev)  {  	struct bonding *bond = netdev_priv(bond_dev); +	struct list_head *iter;  	struct slave *slave; -	int i; -	bond_for_each_slave(bond, slave, i) { -		if (slave->dev && IS_UP(slave->dev)) -			netpoll_poll_dev(slave->dev); -	} +	bond_for_each_slave(bond, slave, iter) +		if (bond_slave_is_up(slave)) +			slave_disable_netpoll(slave);  } -static void bond_netpoll_cleanup(struct net_device *bond_dev) +static int bond_netpoll_setup(struct net_device *dev, struct netpoll_info *ni)  { -	struct bonding *bond = netdev_priv(bond_dev); +	struct bonding *bond = netdev_priv(dev); +	struct list_head *iter;  	struct slave *slave; -	const struct net_device_ops *ops; -	int i; +	int err = 0; -	read_lock(&bond->lock); -	bond_dev->npinfo = NULL; -	bond_for_each_slave(bond, slave, i) { -		if (slave->dev) { -			ops = slave->dev->netdev_ops; -			if (ops->ndo_netpoll_cleanup) -				ops->ndo_netpoll_cleanup(slave->dev); -			else -				slave->dev->npinfo = NULL; +	bond_for_each_slave(bond, slave, iter) { +		err = slave_enable_netpoll(slave); +		if (err) { +			bond_netpoll_cleanup(dev); +			break;  		}  	} -	read_unlock(&bond->lock); +	return err;  } -  #else - +static inline int slave_enable_netpoll(struct slave *slave) +{ +	return 0; +} +static inline void slave_disable_netpoll(struct slave *slave) +{ +}  static void bond_netpoll_cleanup(struct net_device *bond_dev)  {  } -  #endif  /*---------------------------------- IOCTL ----------------------------------*/ -static int bond_sethwaddr(struct net_device *bond_dev, -			  struct net_device *slave_dev) +static netdev_features_t bond_fix_features(struct net_device *dev, +					   netdev_features_t features)  { -	pr_debug("bond_dev=%p\n", bond_dev); -	pr_debug("slave_dev=%p\n", slave_dev); -	pr_debug("slave_dev->addr_len=%d\n", slave_dev->addr_len); -	memcpy(bond_dev->dev_addr, slave_dev->dev_addr, slave_dev->addr_len); -	return 0; +	struct bonding *bond = netdev_priv(dev); +	struct list_head *iter; +	netdev_features_t mask; +	struct slave *slave; + +	if (!bond_has_slaves(bond)) { +		/* Disable adding VLANs to empty bond. But why? --mq */ +		features |= NETIF_F_VLAN_CHALLENGED; +		return features; +	} + +	mask = features; +	features &= ~NETIF_F_ONE_FOR_ALL; +	features |= NETIF_F_ALL_FOR_ALL; + +	bond_for_each_slave(bond, slave, iter) { +		features = netdev_increment_features(features, +						     slave->dev->features, +						     mask); +	} +	features = netdev_add_tso_features(features, mask); + +	return features;  } -#define BOND_VLAN_FEATURES \ -	(NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX | \ -	 NETIF_F_HW_VLAN_FILTER) +#define BOND_VLAN_FEATURES	(NETIF_F_ALL_CSUM | NETIF_F_SG | \ +				 NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \ +				 NETIF_F_HIGHDMA | NETIF_F_LRO) -/* - * Compute the common dev->feature set available to all slaves.  Some - * feature bits are managed elsewhere, so preserve those feature bits - * on the master device. - */ -static int bond_compute_features(struct bonding *bond) +#define BOND_ENC_FEATURES	(NETIF_F_ALL_CSUM | NETIF_F_SG | NETIF_F_RXCSUM |\ +				 NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL) + +static void bond_compute_features(struct bonding *bond)  { -	struct slave *slave; +	unsigned int flags, dst_release_flag = IFF_XMIT_DST_RELEASE; +	netdev_features_t vlan_features = BOND_VLAN_FEATURES; +	netdev_features_t enc_features  = BOND_ENC_FEATURES;  	struct net_device *bond_dev = bond->dev; -	unsigned long features = bond_dev->features; -	unsigned long vlan_features = 0; -	unsigned short max_hard_header_len = max((u16)ETH_HLEN, -						bond_dev->hard_header_len); -	int i; - -	features &= ~(NETIF_F_ALL_CSUM | BOND_VLAN_FEATURES); -	features |=  NETIF_F_GSO_MASK | NETIF_F_NO_CSUM; +	struct list_head *iter; +	struct slave *slave; +	unsigned short max_hard_header_len = ETH_HLEN; +	unsigned int gso_max_size = GSO_MAX_SIZE; +	u16 gso_max_segs = GSO_MAX_SEGS; -	if (!bond->first_slave) +	if (!bond_has_slaves(bond))  		goto done; +	vlan_features &= NETIF_F_ALL_FOR_ALL; -	features &= ~NETIF_F_ONE_FOR_ALL; - -	vlan_features = bond->first_slave->dev->vlan_features; -	bond_for_each_slave(bond, slave, i) { -		features = netdev_increment_features(features, -						     slave->dev->features, -						     NETIF_F_ONE_FOR_ALL); +	bond_for_each_slave(bond, slave, iter) {  		vlan_features = netdev_increment_features(vlan_features, -							slave->dev->vlan_features, -							NETIF_F_ONE_FOR_ALL); +			slave->dev->vlan_features, BOND_VLAN_FEATURES); + +		enc_features = netdev_increment_features(enc_features, +							 slave->dev->hw_enc_features, +							 BOND_ENC_FEATURES); +		dst_release_flag &= slave->dev->priv_flags;  		if (slave->dev->hard_header_len > max_hard_header_len)  			max_hard_header_len = slave->dev->hard_header_len; + +		gso_max_size = min(gso_max_size, slave->dev->gso_max_size); +		gso_max_segs = min(gso_max_segs, slave->dev->gso_max_segs);  	}  done: -	features |= (bond_dev->features & BOND_VLAN_FEATURES); -	bond_dev->features = netdev_fix_features(features, NULL); -	bond_dev->vlan_features = netdev_fix_features(vlan_features, NULL); +	bond_dev->vlan_features = vlan_features; +	bond_dev->hw_enc_features = enc_features;  	bond_dev->hard_header_len = max_hard_header_len; +	bond_dev->gso_max_segs = gso_max_segs; +	netif_set_gso_max_size(bond_dev, gso_max_size); -	return 0; +	flags = bond_dev->priv_flags & ~IFF_XMIT_DST_RELEASE; +	bond_dev->priv_flags = flags | dst_release_flag; + +	netdev_change_features(bond_dev);  }  static void bond_setup_by_slave(struct net_device *bond_dev,  				struct net_device *slave_dev)  { -	struct bonding *bond = netdev_priv(bond_dev); -  	bond_dev->header_ops	    = slave_dev->header_ops;  	bond_dev->type		    = slave_dev->type; @@ -1442,7 +1083,122 @@ static void bond_setup_by_slave(struct net_device *bond_dev,  	memcpy(bond_dev->broadcast, slave_dev->broadcast,  		slave_dev->addr_len); -	bond->setup_by_slave = 1; +} + +/* On bonding slaves other than the currently active slave, suppress + * duplicates except for alb non-mcast/bcast. + */ +static bool bond_should_deliver_exact_match(struct sk_buff *skb, +					    struct slave *slave, +					    struct bonding *bond) +{ +	if (bond_is_slave_inactive(slave)) { +		if (BOND_MODE(bond) == BOND_MODE_ALB && +		    skb->pkt_type != PACKET_BROADCAST && +		    skb->pkt_type != PACKET_MULTICAST) +			return false; +		return true; +	} +	return false; +} + +static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb) +{ +	struct sk_buff *skb = *pskb; +	struct slave *slave; +	struct bonding *bond; +	int (*recv_probe)(const struct sk_buff *, struct bonding *, +			  struct slave *); +	int ret = RX_HANDLER_ANOTHER; + +	skb = skb_share_check(skb, GFP_ATOMIC); +	if (unlikely(!skb)) +		return RX_HANDLER_CONSUMED; + +	*pskb = skb; + +	slave = bond_slave_get_rcu(skb->dev); +	bond = slave->bond; + +	recv_probe = ACCESS_ONCE(bond->recv_probe); +	if (recv_probe) { +		ret = recv_probe(skb, bond, slave); +		if (ret == RX_HANDLER_CONSUMED) { +			consume_skb(skb); +			return ret; +		} +	} + +	if (bond_should_deliver_exact_match(skb, slave, bond)) { +		return RX_HANDLER_EXACT; +	} + +	skb->dev = bond->dev; + +	if (BOND_MODE(bond) == BOND_MODE_ALB && +	    bond->dev->priv_flags & IFF_BRIDGE_PORT && +	    skb->pkt_type == PACKET_HOST) { + +		if (unlikely(skb_cow_head(skb, +					  skb->data - skb_mac_header(skb)))) { +			kfree_skb(skb); +			return RX_HANDLER_CONSUMED; +		} +		ether_addr_copy(eth_hdr(skb)->h_dest, bond->dev->dev_addr); +	} + +	return ret; +} + +static int bond_master_upper_dev_link(struct net_device *bond_dev, +				      struct net_device *slave_dev, +				      struct slave *slave) +{ +	int err; + +	err = netdev_master_upper_dev_link_private(slave_dev, bond_dev, slave); +	if (err) +		return err; +	slave_dev->flags |= IFF_SLAVE; +	rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL); +	return 0; +} + +static void bond_upper_dev_unlink(struct net_device *bond_dev, +				  struct net_device *slave_dev) +{ +	netdev_upper_dev_unlink(slave_dev, bond_dev); +	slave_dev->flags &= ~IFF_SLAVE; +	rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL); +} + +static struct slave *bond_alloc_slave(struct bonding *bond) +{ +	struct slave *slave = NULL; + +	slave = kzalloc(sizeof(struct slave), GFP_KERNEL); +	if (!slave) +		return NULL; + +	if (BOND_MODE(bond) == BOND_MODE_8023AD) { +		SLAVE_AD_INFO(slave) = kzalloc(sizeof(struct ad_slave_info), +					       GFP_KERNEL); +		if (!SLAVE_AD_INFO(slave)) { +			kfree(slave); +			return NULL; +		} +	} +	return slave; +} + +static void bond_free_slave(struct slave *slave) +{ +	struct bonding *bond = bond_get_bond_by_slave(slave); + +	if (BOND_MODE(bond) == BOND_MODE_8023AD) +		kfree(SLAVE_AD_INFO(slave)); + +	kfree(slave);  }  /* enslave device <slave> to bond device <master> */ @@ -1450,53 +1206,44 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  {  	struct bonding *bond = netdev_priv(bond_dev);  	const struct net_device_ops *slave_ops = slave_dev->netdev_ops; -	struct slave *new_slave = NULL; -	struct netdev_hw_addr *ha; +	struct slave *new_slave = NULL, *prev_slave;  	struct sockaddr addr;  	int link_reporting; -	int old_features = bond_dev->features; -	int res = 0; +	int res = 0, i; -	if (!bond->params.use_carrier && slave_dev->ethtool_ops == NULL && -		slave_ops->ndo_do_ioctl == NULL) { -		pr_warning("%s: Warning: no link monitoring support for %s\n", -			   bond_dev->name, slave_dev->name); -	} - -	/* bond must be initialized by bond_open() before enslaving */ -	if (!(bond_dev->flags & IFF_UP)) { -		pr_warning("%s: master_dev is not up in bond_enslave\n", -			   bond_dev->name); +	if (!bond->params.use_carrier && +	    slave_dev->ethtool_ops->get_link == NULL && +	    slave_ops->ndo_do_ioctl == NULL) { +		pr_warn("%s: Warning: no link monitoring support for %s\n", +			bond_dev->name, slave_dev->name);  	}  	/* already enslaved */  	if (slave_dev->flags & IFF_SLAVE) { -		pr_debug("Error, Device was already enslaved\n"); +		pr_debug("Error: Device was already enslaved\n");  		return -EBUSY;  	} +	if (bond_dev == slave_dev) { +		pr_err("%s: cannot enslave bond to itself.\n", bond_dev->name); +		return -EPERM; +	} +  	/* vlan challenged mutual exclusion */  	/* no need to lock since we're protected by rtnl_lock */  	if (slave_dev->features & NETIF_F_VLAN_CHALLENGED) {  		pr_debug("%s: NETIF_F_VLAN_CHALLENGED\n", slave_dev->name); -		if (bond->vlgrp) { +		if (vlan_uses_dev(bond_dev)) {  			pr_err("%s: Error: cannot enslave VLAN challenged slave %s on VLAN enabled bond %s\n",  			       bond_dev->name, slave_dev->name, bond_dev->name);  			return -EPERM;  		} else { -			pr_warning("%s: Warning: enslaved VLAN challenged slave %s. Adding VLANs will be blocked as long as %s is part of bond %s\n", -				   bond_dev->name, slave_dev->name, -				   slave_dev->name, bond_dev->name); -			bond_dev->features |= NETIF_F_VLAN_CHALLENGED; +			pr_warn("%s: Warning: enslaved VLAN challenged slave %s. Adding VLANs will be blocked as long as %s is part of bond %s\n", +				bond_dev->name, slave_dev->name, +				slave_dev->name, bond_dev->name);  		}  	} else {  		pr_debug("%s: ! NETIF_F_VLAN_CHALLENGED\n", slave_dev->name); -		if (bond->slave_cnt == 0) { -			/* First slave, and it is not VLAN challenged, -			 * so remove the block of adding VLANs over the bond. -			 */ -			bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED; -		}  	}  	/* @@ -1506,7 +1253,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  	 * enslaving it; the old ifenslave will not.  	 */  	if ((slave_dev->flags & IFF_UP)) { -		pr_err("%s is up. This may be due to an out of date ifenslave.\n", +		pr_err("%s is up - this may be due to an out of date ifenslave\n",  		       slave_dev->name);  		res = -EPERM;  		goto err_undo_flags; @@ -1519,14 +1266,14 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  	 * bond ether type mutual exclusion - don't allow slaves of dissimilar  	 * ether type (eg ARPHRD_ETHER and ARPHRD_INFINIBAND) share the same bond  	 */ -	if (bond->slave_cnt == 0) { +	if (!bond_has_slaves(bond)) {  		if (bond_dev->type != slave_dev->type) {  			pr_debug("%s: change device type from %d to %d\n",  				 bond_dev->name,  				 bond_dev->type, slave_dev->type); -			res = netdev_bonding_change(bond_dev, -						    NETDEV_PRE_TYPE_CHANGE); +			res = call_netdevice_notifiers(NETDEV_PRE_TYPE_CHANGE, +						       bond_dev);  			res = notifier_to_errno(res);  			if (res) {  				pr_err("%s: refused to change device type\n", @@ -1541,46 +1288,54 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  			if (slave_dev->type != ARPHRD_ETHER)  				bond_setup_by_slave(bond_dev, slave_dev); -			else +			else {  				ether_setup(bond_dev); +				bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING; +			} -			netdev_bonding_change(bond_dev, -					      NETDEV_POST_TYPE_CHANGE); +			call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE, +						 bond_dev);  		}  	} else if (bond_dev->type != slave_dev->type) { -		pr_err("%s ether type (%d) is different from other slaves (%d), can not enslave it.\n", -		       slave_dev->name, -		       slave_dev->type, bond_dev->type); +		pr_err("%s ether type (%d) is different from other slaves (%d), can not enslave it\n", +		       slave_dev->name, slave_dev->type, bond_dev->type);  		res = -EINVAL;  		goto err_undo_flags;  	}  	if (slave_ops->ndo_set_mac_address == NULL) { -		if (bond->slave_cnt == 0) { -			pr_warning("%s: Warning: The first slave device specified does not support setting the MAC address. Setting fail_over_mac to active.", -				   bond_dev->name); -			bond->params.fail_over_mac = BOND_FOM_ACTIVE; +		if (!bond_has_slaves(bond)) { +			pr_warn("%s: Warning: The first slave device specified does not support setting the MAC address\n", +				bond_dev->name); +			if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) { +				bond->params.fail_over_mac = BOND_FOM_ACTIVE; +				pr_warn("%s: Setting fail_over_mac to active for active-backup mode\n", +					bond_dev->name); +			}  		} else if (bond->params.fail_over_mac != BOND_FOM_ACTIVE) { -			pr_err("%s: Error: The slave device specified does not support setting the MAC address, but fail_over_mac is not set to active.\n", +			pr_err("%s: Error: The slave device specified does not support setting the MAC address, but fail_over_mac is not set to active\n",  			       bond_dev->name);  			res = -EOPNOTSUPP;  			goto err_undo_flags;  		}  	} +	call_netdevice_notifiers(NETDEV_JOIN, slave_dev); +  	/* If this is the first slave, then we need to set the master's hardware  	 * address to be the same as the slave's. */ -	if (bond->slave_cnt == 0) -		memcpy(bond->dev->dev_addr, slave_dev->dev_addr, -		       slave_dev->addr_len); +	if (!bond_has_slaves(bond) && +	    bond->dev->addr_assign_type == NET_ADDR_RANDOM) +		bond_set_dev_addr(bond->dev, slave_dev); - -	new_slave = kzalloc(sizeof(struct slave), GFP_KERNEL); +	new_slave = bond_alloc_slave(bond);  	if (!new_slave) {  		res = -ENOMEM;  		goto err_undo_flags;  	} +	new_slave->bond = bond; +	new_slave->dev = slave_dev;  	/*  	 * Set the new_slave's queue_id to be zero.  Queue ID mapping  	 * is set via sysfs or module option if desired. @@ -1600,9 +1355,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  	 * that need it, and for restoring it upon release, and then  	 * set it to the master's address  	 */ -	memcpy(new_slave->perm_hwaddr, slave_dev->dev_addr, ETH_ALEN); +	ether_addr_copy(new_slave->perm_hwaddr, slave_dev->dev_addr); -	if (!bond->params.fail_over_mac) { +	if (!bond->params.fail_over_mac || +	    BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {  		/*  		 * Set slave to master's mac address.  The application already  		 * set the master's mac address to that of the first slave @@ -1616,19 +1372,13 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  		}  	} -	res = netdev_set_master(slave_dev, bond_dev); -	if (res) { -		pr_debug("Error %d calling netdev_set_master\n", res); -		goto err_restore_mac; -	}  	/* open the slave since the application closed it */  	res = dev_open(slave_dev);  	if (res) {  		pr_debug("Opening slave %s failed\n", slave_dev->name); -		goto err_unset_master; +		goto err_restore_mac;  	} -	new_slave->dev = slave_dev;  	slave_dev->priv_flags |= IFF_BONDING;  	if (bond_is_lb(bond)) { @@ -1640,12 +1390,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  			goto err_close;  	} -	/* If the mode USES_PRIMARY, then the new slave gets the -	 * master's promisc (and mc) settings only if it becomes the -	 * curr_active_slave, and that is taken care of later when calling -	 * bond_change_active() +	/* If the mode uses primary, then the following is handled by +	 * bond_change_active_slave().  	 */ -	if (!USES_PRIMARY(bond->params.mode)) { +	if (!bond_uses_primary(bond)) {  		/* set promiscuity level to new slave */  		if (bond_dev->flags & IFF_PROMISC) {  			res = dev_set_promiscuity(slave_dev, 1); @@ -1661,35 +1409,38 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  		}  		netif_addr_lock_bh(bond_dev); -		/* upload master's mc_list to new slave */ -		netdev_for_each_mc_addr(ha, bond_dev) -			dev_mc_add(slave_dev, ha->addr); + +		dev_mc_sync_multiple(slave_dev, bond_dev); +		dev_uc_sync_multiple(slave_dev, bond_dev); +  		netif_addr_unlock_bh(bond_dev);  	} -	if (bond->params.mode == BOND_MODE_8023AD) { +	if (BOND_MODE(bond) == BOND_MODE_8023AD) {  		/* add lacpdu mc addr to mc list */  		u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;  		dev_mc_add(slave_dev, lacpdu_multicast);  	} -	bond_add_vlans_on_slave(bond, slave_dev); - -	write_lock_bh(&bond->lock); +	res = vlan_vids_add_by_dev(slave_dev, bond_dev); +	if (res) { +		pr_err("%s: Error: Couldn't add bond vlan ids to %s\n", +		       bond_dev->name, slave_dev->name); +		goto err_close; +	} -	bond_attach_slave(bond, new_slave); +	prev_slave = bond_last_slave(bond);  	new_slave->delay = 0;  	new_slave->link_failure_count = 0; -	bond_compute_features(bond); - -	write_unlock_bh(&bond->lock); - -	read_lock(&bond->lock); +	bond_update_speed_duplex(new_slave); -	new_slave->last_arp_rx = jiffies; +	new_slave->last_rx = jiffies - +		(msecs_to_jiffies(bond->params.arp_interval) + 1); +	for (i = 0; i < BOND_MAX_ARP_TARGETS; i++) +		new_slave->target_last_arp_rx[i] = new_slave->last_rx;  	if (bond->params.miimon && !bond->params.use_carrier) {  		link_reporting = bond_check_dev_link(bond, slave_dev, 1); @@ -1704,44 +1455,41 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  			 * supported); thus, we don't need to change  			 * the messages for netif_carrier.  			 */ -			pr_warning("%s: Warning: MII and ETHTOOL support not available for interface %s, and arp_interval/arp_ip_target module parameters not specified, thus bonding will not detect link failures! see bonding.txt for details.\n", -			       bond_dev->name, slave_dev->name); +			pr_warn("%s: Warning: MII and ETHTOOL support not available for interface %s, and arp_interval/arp_ip_target module parameters not specified, thus bonding will not detect link failures! see bonding.txt for details\n", +				bond_dev->name, slave_dev->name);  		} else if (link_reporting == -1) {  			/* unable get link status using mii/ethtool */ -			pr_warning("%s: Warning: can't get link status from interface %s; the network driver associated with this interface does not support MII or ETHTOOL link status reporting, thus miimon has no effect on this interface.\n", -				   bond_dev->name, slave_dev->name); +			pr_warn("%s: Warning: can't get link status from interface %s; the network driver associated with this interface does not support MII or ETHTOOL link status reporting, thus miimon has no effect on this interface\n", +				bond_dev->name, slave_dev->name);  		}  	}  	/* check for initial state */ -	if (!bond->params.miimon || -	    (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS)) { -		if (bond->params.updelay) { -			pr_debug("Initial state of slave_dev is BOND_LINK_BACK\n"); -			new_slave->link  = BOND_LINK_BACK; -			new_slave->delay = bond->params.updelay; +	if (bond->params.miimon) { +		if (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS) { +			if (bond->params.updelay) { +				new_slave->link = BOND_LINK_BACK; +				new_slave->delay = bond->params.updelay; +			} else { +				new_slave->link = BOND_LINK_UP; +			}  		} else { -			pr_debug("Initial state of slave_dev is BOND_LINK_UP\n"); -			new_slave->link  = BOND_LINK_UP; +			new_slave->link = BOND_LINK_DOWN;  		} -		new_slave->jiffies = jiffies; +	} else if (bond->params.arp_interval) { +		new_slave->link = (netif_carrier_ok(slave_dev) ? +			BOND_LINK_UP : BOND_LINK_DOWN);  	} else { -		pr_debug("Initial state of slave_dev is BOND_LINK_DOWN\n"); -		new_slave->link  = BOND_LINK_DOWN; +		new_slave->link = BOND_LINK_UP;  	} -	if (bond_update_speed_duplex(new_slave) && -	    (new_slave->link != BOND_LINK_DOWN)) { -		pr_warning("%s: Warning: failed to get speed and duplex from %s, assumed to be 100Mb/sec and Full.\n", -			   bond_dev->name, new_slave->dev->name); +	if (new_slave->link != BOND_LINK_DOWN) +		new_slave->last_link_up = jiffies; +	pr_debug("Initial state of slave_dev is BOND_LINK_%s\n", +		 new_slave->link == BOND_LINK_DOWN ? "DOWN" : +		 (new_slave->link == BOND_LINK_UP ? "UP" : "BACK")); -		if (bond->params.mode == BOND_MODE_8023AD) { -			pr_warning("%s: Warning: Operation of 802.3ad mode requires ETHTOOL support in base driver for proper aggregator selection.\n", -				   bond_dev->name); -		} -	} - -	if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) { +	if (bond_uses_primary(bond) && bond->params.primary[0]) {  		/* if there is a primary slave, remember it */  		if (strcmp(bond->params.primary, new_slave->dev->name) == 0) {  			bond->primary_slave = new_slave; @@ -1749,100 +1497,139 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)  		}  	} -	write_lock_bh(&bond->curr_slave_lock); - -	switch (bond->params.mode) { +	switch (BOND_MODE(bond)) {  	case BOND_MODE_ACTIVEBACKUP: -		bond_set_slave_inactive_flags(new_slave); -		bond_select_active_slave(bond); +		bond_set_slave_inactive_flags(new_slave, +					      BOND_SLAVE_NOTIFY_NOW);  		break;  	case BOND_MODE_8023AD:  		/* in 802.3ad mode, the internal mechanism  		 * will activate the slaves in the selected  		 * aggregator  		 */ -		bond_set_slave_inactive_flags(new_slave); +		bond_set_slave_inactive_flags(new_slave, BOND_SLAVE_NOTIFY_NOW);  		/* if this is the first slave */ -		if (bond->slave_cnt == 1) { -			SLAVE_AD_INFO(new_slave).id = 1; +		if (!prev_slave) { +			SLAVE_AD_INFO(new_slave)->id = 1;  			/* Initialize AD with the number of times that the AD timer is called in 1 second  			 * can be called only after the mac address of the bond is set  			 */ -			bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL, -					    bond->params.lacp_fast); +			bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL);  		} else { -			SLAVE_AD_INFO(new_slave).id = -				SLAVE_AD_INFO(new_slave->prev).id + 1; +			SLAVE_AD_INFO(new_slave)->id = +				SLAVE_AD_INFO(prev_slave)->id + 1;  		}  		bond_3ad_bind_slave(new_slave);  		break;  	case BOND_MODE_TLB:  	case BOND_MODE_ALB: -		new_slave->state = BOND_STATE_ACTIVE; -		bond_set_slave_inactive_flags(new_slave); -		bond_select_active_slave(bond); +		bond_set_active_slave(new_slave); +		bond_set_slave_inactive_flags(new_slave, BOND_SLAVE_NOTIFY_NOW);  		break;  	default:  		pr_debug("This slave is always active in trunk mode\n");  		/* always active in trunk mode */ -		new_slave->state = BOND_STATE_ACTIVE; +		bond_set_active_slave(new_slave);  		/* In trunking mode there is little meaning to curr_active_slave  		 * anyway (it holds no special properties of the bond device),  		 * so we can change it without calling change_active_interface()  		 */ -		if (!bond->curr_active_slave) -			bond->curr_active_slave = new_slave; +		if (!bond->curr_active_slave && new_slave->link == BOND_LINK_UP) +			rcu_assign_pointer(bond->curr_active_slave, new_slave);  		break;  	} /* switch(bond_mode) */ -	write_unlock_bh(&bond->curr_slave_lock); - -	bond_set_carrier(bond); -  #ifdef CONFIG_NET_POLL_CONTROLLER -	if (slaves_support_netpoll(bond_dev)) { -		bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL; -		if (bond_dev->npinfo) -			slave_dev->npinfo = bond_dev->npinfo; -	} else if (!(bond_dev->priv_flags & IFF_DISABLE_NETPOLL)) { -		bond_dev->priv_flags |= IFF_DISABLE_NETPOLL; -		pr_info("New slave device %s does not support netpoll\n", -			slave_dev->name); -		pr_info("Disabling netpoll support for %s\n", bond_dev->name); +	slave_dev->npinfo = bond->dev->npinfo; +	if (slave_dev->npinfo) { +		if (slave_enable_netpoll(new_slave)) { +			pr_info("Error, %s: master_dev is using netpoll, but new slave device does not support netpoll\n", +				bond_dev->name); +			res = -EBUSY; +			goto err_detach; +		}  	}  #endif -	read_unlock(&bond->lock); -	res = bond_create_slave_symlinks(bond_dev, slave_dev); -	if (res) -		goto err_close; +	res = netdev_rx_handler_register(slave_dev, bond_handle_frame, +					 new_slave); +	if (res) { +		pr_debug("Error %d calling netdev_rx_handler_register\n", res); +		goto err_detach; +	} -	pr_info("%s: enslaving %s as a%s interface with a%s link.\n", +	res = bond_master_upper_dev_link(bond_dev, slave_dev, new_slave); +	if (res) { +		pr_debug("Error %d calling bond_master_upper_dev_link\n", res); +		goto err_unregister; +	} + +	res = bond_sysfs_slave_add(new_slave); +	if (res) { +		pr_debug("Error %d calling bond_sysfs_slave_add\n", res); +		goto err_upper_unlink; +	} + +	bond->slave_cnt++; +	bond_compute_features(bond); +	bond_set_carrier(bond); + +	if (bond_uses_primary(bond)) { +		block_netpoll_tx(); +		write_lock_bh(&bond->curr_slave_lock); +		bond_select_active_slave(bond); +		write_unlock_bh(&bond->curr_slave_lock); +		unblock_netpoll_tx(); +	} + +	pr_info("%s: Enslaving %s as %s interface with %s link\n",  		bond_dev->name, slave_dev->name, -		new_slave->state == BOND_STATE_ACTIVE ? "n active" : " backup", -		new_slave->link != BOND_LINK_DOWN ? "n up" : " down"); +		bond_is_active_slave(new_slave) ? "an active" : "a backup", +		new_slave->link != BOND_LINK_DOWN ? "an up" : "a down");  	/* enslave is successful */  	return 0;  /* Undo stages on error */ +err_upper_unlink: +	bond_upper_dev_unlink(bond_dev, slave_dev); + +err_unregister: +	netdev_rx_handler_unregister(slave_dev); + +err_detach: +	if (!bond_uses_primary(bond)) +		bond_hw_addr_flush(bond_dev, slave_dev); + +	vlan_vids_del_by_dev(slave_dev, bond_dev); +	if (bond->primary_slave == new_slave) +		bond->primary_slave = NULL; +	if (bond->curr_active_slave == new_slave) { +		block_netpoll_tx(); +		write_lock_bh(&bond->curr_slave_lock); +		bond_change_active_slave(bond, NULL); +		bond_select_active_slave(bond); +		write_unlock_bh(&bond->curr_slave_lock); +		unblock_netpoll_tx(); +	} +	slave_disable_netpoll(new_slave); +  err_close: +	slave_dev->priv_flags &= ~IFF_BONDING;  	dev_close(slave_dev); -err_unset_master: -	netdev_set_master(slave_dev, NULL); -  err_restore_mac: -	if (!bond->params.fail_over_mac) { +	if (!bond->params.fail_over_mac || +	    BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {  		/* XXX TODO - fom follow mode needs to change master's  		 * MAC if this slave's MAC is in use by the bond, or at  		 * least print a warning.  		 */ -		memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN); +		ether_addr_copy(addr.sa_data, new_slave->perm_hwaddr);  		addr.sa_family = slave_dev->type;  		dev_set_mac_address(slave_dev, &addr);  	} @@ -1851,10 +1638,13 @@ err_restore_mtu:  	dev_set_mtu(slave_dev, new_slave->original_mtu);  err_free: -	kfree(new_slave); +	bond_free_slave(new_slave);  err_undo_flags: -	bond_dev->features = old_features; +	/* Enslave of first slave has failed and we need to fix master's mac */ +	if (!bond_has_slaves(bond) && +	    ether_addr_equal_64bits(bond_dev->dev_addr, slave_dev->dev_addr)) +		eth_hw_addr_random(bond_dev);  	return res;  } @@ -1862,7 +1652,8 @@ err_undo_flags:  /*   * Try to release the slave device <slave> from the bond device <master>   * It is legal to access curr_active_slave without a lock because all the function - * is write-locked. + * is write-locked. If "all" is true it means that the function is being called + * while destroying a bond interface and all slaves are being released.   *   * The rules for slave state should be:   *   for Active/Backup: @@ -1870,70 +1661,77 @@ err_undo_flags:   *   for Bonded connections:   *     The first up interface should be left on and all others downed.   */ -int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) +static int __bond_release_one(struct net_device *bond_dev, +			      struct net_device *slave_dev, +			      bool all)  {  	struct bonding *bond = netdev_priv(bond_dev);  	struct slave *slave, *oldcurrent;  	struct sockaddr addr; +	int old_flags = bond_dev->flags; +	netdev_features_t old_features = bond_dev->features;  	/* slave is not a slave or master is not master of this slave */  	if (!(slave_dev->flags & IFF_SLAVE) || -	    (slave_dev->master != bond_dev)) { -		pr_err("%s: Error: cannot release %s.\n", +	    !netdev_has_upper_dev(slave_dev, bond_dev)) { +		pr_err("%s: Error: cannot release %s\n",  		       bond_dev->name, slave_dev->name);  		return -EINVAL;  	}  	block_netpoll_tx(); -	netdev_bonding_change(bond_dev, NETDEV_BONDING_DESLAVE); -	write_lock_bh(&bond->lock);  	slave = bond_get_slave_by_dev(bond, slave_dev);  	if (!slave) {  		/* not a slave of this bond */  		pr_info("%s: %s not enslaved\n",  			bond_dev->name, slave_dev->name); -		write_unlock_bh(&bond->lock);  		unblock_netpoll_tx();  		return -EINVAL;  	} -	if (!bond->params.fail_over_mac) { -		if (!compare_ether_addr(bond_dev->dev_addr, slave->perm_hwaddr) && -		    bond->slave_cnt > 1) -			pr_warning("%s: Warning: the permanent HWaddr of %s - %pM - is still in use by %s. Set the HWaddr of %s to a different address to avoid conflicts.\n", -				   bond_dev->name, slave_dev->name, -				   slave->perm_hwaddr, -				   bond_dev->name, slave_dev->name); -	} +	bond_sysfs_slave_del(slave); + +	bond_upper_dev_unlink(bond_dev, slave_dev); +	/* unregister rx_handler early so bond_handle_frame wouldn't be called +	 * for this slave anymore. +	 */ +	netdev_rx_handler_unregister(slave_dev); +	write_lock_bh(&bond->lock);  	/* Inform AD package of unbinding of slave. */ -	if (bond->params.mode == BOND_MODE_8023AD) { -		/* must be called before the slave is -		 * detached from the list -		 */ +	if (BOND_MODE(bond) == BOND_MODE_8023AD)  		bond_3ad_unbind_slave(slave); -	} -	pr_info("%s: releasing %s interface %s\n", +	write_unlock_bh(&bond->lock); + +	pr_info("%s: Releasing %s interface %s\n",  		bond_dev->name, -		(slave->state == BOND_STATE_ACTIVE) ? "active" : "backup", +		bond_is_active_slave(slave) ? "active" : "backup",  		slave_dev->name);  	oldcurrent = bond->curr_active_slave;  	bond->current_arp_slave = NULL; -	/* release the slave from its bond */ -	bond_detach_slave(bond, slave); - -	bond_compute_features(bond); +	if (!all && (!bond->params.fail_over_mac || +		     BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP)) { +		if (ether_addr_equal_64bits(bond_dev->dev_addr, slave->perm_hwaddr) && +		    bond_has_slaves(bond)) +			pr_warn("%s: Warning: the permanent HWaddr of %s - %pM - is still in use by %s - set the HWaddr of %s to a different address to avoid conflicts\n", +				bond_dev->name, slave_dev->name, +				slave->perm_hwaddr, +				bond_dev->name, slave_dev->name); +	}  	if (bond->primary_slave == slave)  		bond->primary_slave = NULL; -	if (oldcurrent == slave) +	if (oldcurrent == slave) { +		write_lock_bh(&bond->curr_slave_lock);  		bond_change_active_slave(bond, NULL); +		write_unlock_bh(&bond->curr_slave_lock); +	}  	if (bond_is_lb(bond)) {  		/* Must be called only after the slave has been @@ -1941,116 +1739,105 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)  		 * has been cleared (if our_slave == old_current),  		 * but before a new active slave is selected.  		 */ -		write_unlock_bh(&bond->lock);  		bond_alb_deinit_slave(bond, slave); -		write_lock_bh(&bond->lock);  	} -	if (oldcurrent == slave) { +	if (all) { +		RCU_INIT_POINTER(bond->curr_active_slave, NULL); +	} else if (oldcurrent == slave) {  		/*  		 * Note that we hold RTNL over this sequence, so there  		 * is no concern that another slave add/remove event  		 * will interfere.  		 */ -		write_unlock_bh(&bond->lock); -		read_lock(&bond->lock);  		write_lock_bh(&bond->curr_slave_lock);  		bond_select_active_slave(bond);  		write_unlock_bh(&bond->curr_slave_lock); -		read_unlock(&bond->lock); -		write_lock_bh(&bond->lock);  	} -	if (bond->slave_cnt == 0) { +	if (!bond_has_slaves(bond)) {  		bond_set_carrier(bond); +		eth_hw_addr_random(bond_dev); -		/* if the last slave was removed, zero the mac address -		 * of the master so it will be set by the application -		 * to the mac address of the first slave -		 */ -		memset(bond_dev->dev_addr, 0, bond_dev->addr_len); - -		if (!bond->vlgrp) { -			bond_dev->features |= NETIF_F_VLAN_CHALLENGED; -		} else { -			pr_warning("%s: Warning: clearing HW address of %s while it still has VLANs.\n", -				   bond_dev->name, bond_dev->name); -			pr_warning("%s: When re-adding slaves, make sure the bond's HW address matches its VLANs'.\n", -				   bond_dev->name); +		if (vlan_uses_dev(bond_dev)) { +			pr_warn("%s: Warning: clearing HW address of %s while it still has VLANs\n", +				bond_dev->name, bond_dev->name); +			pr_warn("%s: When re-adding slaves, make sure the bond's HW address matches its VLANs\n", +				bond_dev->name);  		} -	} else if ((bond_dev->features & NETIF_F_VLAN_CHALLENGED) && -		   !bond_has_challenged_slaves(bond)) { -		pr_info("%s: last VLAN challenged slave %s left bond %s. VLAN blocking is removed\n", -			bond_dev->name, slave_dev->name, bond_dev->name); -		bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED;  	} -	write_unlock_bh(&bond->lock);  	unblock_netpoll_tx(); +	synchronize_rcu(); +	bond->slave_cnt--; -	/* must do this from outside any spinlocks */ -	bond_destroy_slave_symlinks(bond_dev, slave_dev); +	if (!bond_has_slaves(bond)) { +		call_netdevice_notifiers(NETDEV_CHANGEADDR, bond->dev); +		call_netdevice_notifiers(NETDEV_RELEASE, bond->dev); +	} + +	bond_compute_features(bond); +	if (!(bond_dev->features & NETIF_F_VLAN_CHALLENGED) && +	    (old_features & NETIF_F_VLAN_CHALLENGED)) +		pr_info("%s: last VLAN challenged slave %s left bond %s - VLAN blocking is removed\n", +			bond_dev->name, slave_dev->name, bond_dev->name); -	bond_del_vlans_from_slave(bond, slave_dev); +	/* must do this from outside any spinlocks */ +	vlan_vids_del_by_dev(slave_dev, bond_dev); -	/* If the mode USES_PRIMARY, then we should only remove its -	 * promisc and mc settings if it was the curr_active_slave, but that was -	 * already taken care of above when we detached the slave +	/* If the mode uses primary, then this cases was handled above by +	 * bond_change_active_slave(..., NULL)  	 */ -	if (!USES_PRIMARY(bond->params.mode)) { -		/* unset promiscuity level from slave */ -		if (bond_dev->flags & IFF_PROMISC) +	if (!bond_uses_primary(bond)) { +		/* unset promiscuity level from slave +		 * NOTE: The NETDEV_CHANGEADDR call above may change the value +		 * of the IFF_PROMISC flag in the bond_dev, but we need the +		 * value of that flag before that change, as that was the value +		 * when this slave was attached, so we cache at the start of the +		 * function and use it here. Same goes for ALLMULTI below +		 */ +		if (old_flags & IFF_PROMISC)  			dev_set_promiscuity(slave_dev, -1);  		/* unset allmulti level from slave */ -		if (bond_dev->flags & IFF_ALLMULTI) +		if (old_flags & IFF_ALLMULTI)  			dev_set_allmulti(slave_dev, -1); -		/* flush master's mc_list from slave */ -		netif_addr_lock_bh(bond_dev); -		bond_mc_list_flush(bond_dev, slave_dev); -		netif_addr_unlock_bh(bond_dev); +		bond_hw_addr_flush(bond_dev, slave_dev);  	} -	netdev_set_master(slave_dev, NULL); - -#ifdef CONFIG_NET_POLL_CONTROLLER -	read_lock_bh(&bond->lock); - -	if (slaves_support_netpoll(bond_dev)) -		bond_dev->priv_flags &= ~IFF_DISABLE_NETPOLL; -	read_unlock_bh(&bond->lock); -	if (slave_dev->netdev_ops->ndo_netpoll_cleanup) -		slave_dev->netdev_ops->ndo_netpoll_cleanup(slave_dev); -	else -		slave_dev->npinfo = NULL; -#endif +	slave_disable_netpoll(slave);  	/* close slave before restoring its mac address */  	dev_close(slave_dev); -	if (bond->params.fail_over_mac != BOND_FOM_ACTIVE) { +	if (bond->params.fail_over_mac != BOND_FOM_ACTIVE || +	    BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {  		/* restore original ("permanent") mac address */ -		memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); +		ether_addr_copy(addr.sa_data, slave->perm_hwaddr);  		addr.sa_family = slave_dev->type;  		dev_set_mac_address(slave_dev, &addr);  	}  	dev_set_mtu(slave_dev, slave->original_mtu); -	slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB | -				   IFF_SLAVE_INACTIVE | IFF_BONDING | -				   IFF_SLAVE_NEEDARP); +	slave_dev->priv_flags &= ~IFF_BONDING; -	kfree(slave); +	bond_free_slave(slave);  	return 0;  /* deletion OK */  } +/* A wrapper used because of ndo_del_link */ +int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) +{ +	return __bond_release_one(bond_dev, slave_dev, false); +} +  /* -* First release a slave and than destroy the bond if no more slaves are left. +* First release a slave and then destroy the bond if no more slaves are left.  * Must be under rtnl_lock when this function is called.  */  static int  bond_release_and_destroy(struct net_device *bond_dev, @@ -2060,193 +1847,23 @@ static int  bond_release_and_destroy(struct net_device *bond_dev,  	int ret;  	ret = bond_release(bond_dev, slave_dev); -	if ((ret == 0) && (bond->slave_cnt == 0)) { -		pr_info("%s: destroying bond %s.\n", +	if (ret == 0 && !bond_has_slaves(bond)) { +		bond_dev->priv_flags |= IFF_DISABLE_NETPOLL; +		pr_info("%s: Destroying bond %s\n",  			bond_dev->name, bond_dev->name);  		unregister_netdevice(bond_dev);  	}  	return ret;  } -/* - * This function releases all slaves. - */ -static int bond_release_all(struct net_device *bond_dev) -{ -	struct bonding *bond = netdev_priv(bond_dev); -	struct slave *slave; -	struct net_device *slave_dev; -	struct sockaddr addr; - -	write_lock_bh(&bond->lock); - -	netif_carrier_off(bond_dev); - -	if (bond->slave_cnt == 0) -		goto out; - -	bond->current_arp_slave = NULL; -	bond->primary_slave = NULL; -	bond_change_active_slave(bond, NULL); - -	while ((slave = bond->first_slave) != NULL) { -		/* Inform AD package of unbinding of slave -		 * before slave is detached from the list. -		 */ -		if (bond->params.mode == BOND_MODE_8023AD) -			bond_3ad_unbind_slave(slave); - -		slave_dev = slave->dev; -		bond_detach_slave(bond, slave); - -		/* now that the slave is detached, unlock and perform -		 * all the undo steps that should not be called from -		 * within a lock. -		 */ -		write_unlock_bh(&bond->lock); - -		if (bond_is_lb(bond)) { -			/* must be called only after the slave -			 * has been detached from the list -			 */ -			bond_alb_deinit_slave(bond, slave); -		} - -		bond_compute_features(bond); - -		bond_destroy_slave_symlinks(bond_dev, slave_dev); -		bond_del_vlans_from_slave(bond, slave_dev); - -		/* If the mode USES_PRIMARY, then we should only remove its -		 * promisc and mc settings if it was the curr_active_slave, but that was -		 * already taken care of above when we detached the slave -		 */ -		if (!USES_PRIMARY(bond->params.mode)) { -			/* unset promiscuity level from slave */ -			if (bond_dev->flags & IFF_PROMISC) -				dev_set_promiscuity(slave_dev, -1); - -			/* unset allmulti level from slave */ -			if (bond_dev->flags & IFF_ALLMULTI) -				dev_set_allmulti(slave_dev, -1); - -			/* flush master's mc_list from slave */ -			netif_addr_lock_bh(bond_dev); -			bond_mc_list_flush(bond_dev, slave_dev); -			netif_addr_unlock_bh(bond_dev); -		} - -		netdev_set_master(slave_dev, NULL); - -		/* close slave before restoring its mac address */ -		dev_close(slave_dev); - -		if (!bond->params.fail_over_mac) { -			/* restore original ("permanent") mac address*/ -			memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); -			addr.sa_family = slave_dev->type; -			dev_set_mac_address(slave_dev, &addr); -		} - -		slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB | -					   IFF_SLAVE_INACTIVE); - -		kfree(slave); - -		/* re-acquire the lock before getting the next slave */ -		write_lock_bh(&bond->lock); -	} - -	/* zero the mac address of the master so it will be -	 * set by the application to the mac address of the -	 * first slave -	 */ -	memset(bond_dev->dev_addr, 0, bond_dev->addr_len); - -	if (!bond->vlgrp) { -		bond_dev->features |= NETIF_F_VLAN_CHALLENGED; -	} else { -		pr_warning("%s: Warning: clearing HW address of %s while it still has VLANs.\n", -			   bond_dev->name, bond_dev->name); -		pr_warning("%s: When re-adding slaves, make sure the bond's HW address matches its VLANs'.\n", -			   bond_dev->name); -	} - -	pr_info("%s: released all slaves\n", bond_dev->name); - -out: -	write_unlock_bh(&bond->lock); -	return 0; -} - -/* - * This function changes the active slave to slave <slave_dev>. - * It returns -EINVAL in the following cases. - *  - <slave_dev> is not found in the list. - *  - There is not active slave now. - *  - <slave_dev> is already active. - *  - The link state of <slave_dev> is not BOND_LINK_UP. - *  - <slave_dev> is not running. - * In these cases, this function does nothing. - * In the other cases, current_slave pointer is changed and 0 is returned. - */ -static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_device *slave_dev) -{ -	struct bonding *bond = netdev_priv(bond_dev); -	struct slave *old_active = NULL; -	struct slave *new_active = NULL; -	int res = 0; - -	if (!USES_PRIMARY(bond->params.mode)) -		return -EINVAL; - -	/* Verify that master_dev is indeed the master of slave_dev */ -	if (!(slave_dev->flags & IFF_SLAVE) || (slave_dev->master != bond_dev)) -		return -EINVAL; - -	read_lock(&bond->lock); - -	read_lock(&bond->curr_slave_lock); -	old_active = bond->curr_active_slave; -	read_unlock(&bond->curr_slave_lock); - -	new_active = bond_get_slave_by_dev(bond, slave_dev); - -	/* -	 * Changing to the current active: do nothing; return success. -	 */ -	if (new_active && (new_active == old_active)) { -		read_unlock(&bond->lock); -		return 0; -	} - -	if ((new_active) && -	    (old_active) && -	    (new_active->link == BOND_LINK_UP) && -	    IS_UP(new_active->dev)) { -		block_netpoll_tx(); -		write_lock_bh(&bond->curr_slave_lock); -		bond_change_active_slave(bond, new_active); -		write_unlock_bh(&bond->curr_slave_lock); -		unblock_netpoll_tx(); -	} else -		res = -EINVAL; - -	read_unlock(&bond->lock); - -	return res; -} -  static int bond_info_query(struct net_device *bond_dev, struct ifbond *info)  {  	struct bonding *bond = netdev_priv(bond_dev); -	info->bond_mode = bond->params.mode; +	info->bond_mode = BOND_MODE(bond);  	info->miimon = bond->params.miimon; -	read_lock(&bond->lock);  	info->num_slaves = bond->slave_cnt; -	read_unlock(&bond->lock);  	return 0;  } @@ -2254,24 +1871,21 @@ static int bond_info_query(struct net_device *bond_dev, struct ifbond *info)  static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *info)  {  	struct bonding *bond = netdev_priv(bond_dev); +	struct list_head *iter; +	int i = 0, res = -ENODEV;  	struct slave *slave; -	int i, res = -ENODEV; -	read_lock(&bond->lock); - -	bond_for_each_slave(bond, slave, i) { -		if (i == (int)info->slave_id) { +	bond_for_each_slave(bond, slave, iter) { +		if (i++ == (int)info->slave_id) {  			res = 0;  			strcpy(info->slave_name, slave->dev->name);  			info->link = slave->link; -			info->state = slave->state; +			info->state = bond_slave_state(slave);  			info->link_failure_count = slave->link_failure_count;  			break;  		}  	} -	read_unlock(&bond->lock); -  	return res;  } @@ -2280,13 +1894,14 @@ static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *in  static int bond_miimon_inspect(struct bonding *bond)  { +	int link_state, commit = 0; +	struct list_head *iter;  	struct slave *slave; -	int i, link_state, commit = 0;  	bool ignore_updelay;  	ignore_updelay = !bond->curr_active_slave ? true : false; -	bond_for_each_slave(bond, slave, i) { +	bond_for_each_slave_rcu(bond, slave, iter) {  		slave->new_link = BOND_LINK_NOCHANGE;  		link_state = bond_check_dev_link(bond, slave->dev, 0); @@ -2299,11 +1914,11 @@ static int bond_miimon_inspect(struct bonding *bond)  			slave->link = BOND_LINK_FAIL;  			slave->delay = bond->params.downdelay;  			if (slave->delay) { -				pr_info("%s: link status down for %sinterface %s, disabling it in %d ms.\n", +				pr_info("%s: link status down for %sinterface %s, disabling it in %d ms\n",  					bond->dev->name, -					(bond->params.mode == +					(BOND_MODE(bond) ==  					 BOND_MODE_ACTIVEBACKUP) ? -					((slave->state == BOND_STATE_ACTIVE) ? +					(bond_is_active_slave(slave) ?  					 "active " : "backup ") : "",  					slave->dev->name,  					bond->params.downdelay * bond->params.miimon); @@ -2315,8 +1930,8 @@ static int bond_miimon_inspect(struct bonding *bond)  				 * recovered before downdelay expired  				 */  				slave->link = BOND_LINK_UP; -				slave->jiffies = jiffies; -				pr_info("%s: link status up again after %d ms for interface %s.\n", +				slave->last_link_up = jiffies; +				pr_info("%s: link status up again after %d ms for interface %s\n",  					bond->dev->name,  					(bond->params.downdelay - slave->delay) *  					bond->params.miimon, @@ -2341,7 +1956,7 @@ static int bond_miimon_inspect(struct bonding *bond)  			slave->delay = bond->params.updelay;  			if (slave->delay) { -				pr_info("%s: link status up for interface %s, enabling it in %d ms.\n", +				pr_info("%s: link status up for interface %s, enabling it in %d ms\n",  					bond->dev->name, slave->dev->name,  					ignore_updelay ? 0 :  					bond->params.updelay * @@ -2351,7 +1966,7 @@ static int bond_miimon_inspect(struct bonding *bond)  		case BOND_LINK_BACK:  			if (!link_state) {  				slave->link = BOND_LINK_DOWN; -				pr_info("%s: link status down again after %d ms for interface %s.\n", +				pr_info("%s: link status down again after %d ms for interface %s\n",  					bond->dev->name,  					(bond->params.updelay - slave->delay) *  					bond->params.miimon, @@ -2380,37 +1995,36 @@ static int bond_miimon_inspect(struct bonding *bond)  static void bond_miimon_commit(struct bonding *bond)  { +	struct list_head *iter;  	struct slave *slave; -	int i; -	bond_for_each_slave(bond, slave, i) { +	bond_for_each_slave(bond, slave, iter) {  		switch (slave->new_link) {  		case BOND_LINK_NOCHANGE:  			continue;  		case BOND_LINK_UP:  			slave->link = BOND_LINK_UP; -			slave->jiffies = jiffies; +			slave->last_link_up = jiffies; -			if (bond->params.mode == BOND_MODE_8023AD) { +			if (BOND_MODE(bond) == BOND_MODE_8023AD) {  				/* prevent it from being the active one */ -				slave->state = BOND_STATE_BACKUP; -			} else if (bond->params.mode != BOND_MODE_ACTIVEBACKUP) { +				bond_set_backup_slave(slave); +			} else if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {  				/* make it immediately active */ -				slave->state = BOND_STATE_ACTIVE; +				bond_set_active_slave(slave);  			} else if (slave != bond->primary_slave) {  				/* prevent it from being the active one */ -				slave->state = BOND_STATE_BACKUP; +				bond_set_backup_slave(slave);  			} -			bond_update_speed_duplex(slave); - -			pr_info("%s: link status definitely up for interface %s, %d Mbps %s duplex.\n", +			pr_info("%s: link status definitely up for interface %s, %u Mbps %s duplex\n",  				bond->dev->name, slave->dev->name, -				slave->speed, slave->duplex ? "full" : "half"); +				slave->speed == SPEED_UNKNOWN ? 0 : slave->speed, +				slave->duplex ? "full" : "half");  			/* notify ad that the link status has changed */ -			if (bond->params.mode == BOND_MODE_8023AD) +			if (BOND_MODE(bond) == BOND_MODE_8023AD)  				bond_3ad_handle_link_change(slave, BOND_LINK_UP);  			if (bond_is_lb(bond)) @@ -2429,14 +2043,15 @@ static void bond_miimon_commit(struct bonding *bond)  			slave->link = BOND_LINK_DOWN; -			if (bond->params.mode == BOND_MODE_ACTIVEBACKUP || -			    bond->params.mode == BOND_MODE_8023AD) -				bond_set_slave_inactive_flags(slave); +			if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP || +			    BOND_MODE(bond) == BOND_MODE_8023AD) +				bond_set_slave_inactive_flags(slave, +							      BOND_SLAVE_NOTIFY_NOW);  			pr_info("%s: link status definitely down for interface %s, disabling it\n",  				bond->dev->name, slave->dev->name); -			if (bond->params.mode == BOND_MODE_8023AD) +			if (BOND_MODE(bond) == BOND_MODE_8023AD)  				bond_3ad_handle_link_change(slave,  							    BOND_LINK_DOWN); @@ -2478,87 +2093,69 @@ do_failover:   * an acquisition of appropriate locks followed by a commit phase to   * implement whatever link state changes are indicated.   */ -void bond_mii_monitor(struct work_struct *work) +static void bond_mii_monitor(struct work_struct *work)  {  	struct bonding *bond = container_of(work, struct bonding,  					    mii_work.work); +	bool should_notify_peers = false; +	unsigned long delay; -	read_lock(&bond->lock); -	if (bond->kill_timers) -		goto out; +	delay = msecs_to_jiffies(bond->params.miimon); -	if (bond->slave_cnt == 0) +	if (!bond_has_slaves(bond))  		goto re_arm; -	if (bond->send_grat_arp) { -		read_lock(&bond->curr_slave_lock); -		bond_send_gratuitous_arp(bond); -		read_unlock(&bond->curr_slave_lock); -	} +	rcu_read_lock(); -	if (bond->send_unsol_na) { -		read_lock(&bond->curr_slave_lock); -		bond_send_unsolicited_na(bond); -		read_unlock(&bond->curr_slave_lock); -	} +	should_notify_peers = bond_should_notify_peers(bond);  	if (bond_miimon_inspect(bond)) { -		read_unlock(&bond->lock); -		rtnl_lock(); -		read_lock(&bond->lock); +		rcu_read_unlock(); + +		/* Race avoidance with bond_close cancel of workqueue */ +		if (!rtnl_trylock()) { +			delay = 1; +			should_notify_peers = false; +			goto re_arm; +		}  		bond_miimon_commit(bond); -		read_unlock(&bond->lock);  		rtnl_unlock();	/* might sleep, hold no other locks */ -		read_lock(&bond->lock); -	} +	} else +		rcu_read_unlock();  re_arm:  	if (bond->params.miimon) -		queue_delayed_work(bond->wq, &bond->mii_work, -				   msecs_to_jiffies(bond->params.miimon)); -out: -	read_unlock(&bond->lock); -} +		queue_delayed_work(bond->wq, &bond->mii_work, delay); -static __be32 bond_glean_dev_ip(struct net_device *dev) -{ -	struct in_device *idev; -	struct in_ifaddr *ifa; -	__be32 addr = 0; - -	if (!dev) -		return 0; - -	rcu_read_lock(); -	idev = __in_dev_get_rcu(dev); -	if (!idev) -		goto out; - -	ifa = idev->ifa_list; -	if (!ifa) -		goto out; - -	addr = ifa->ifa_local; -out: -	rcu_read_unlock(); -	return addr; +	if (should_notify_peers) { +		if (!rtnl_trylock()) +			return; +		call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); +		rtnl_unlock(); +	}  } -static int bond_has_this_ip(struct bonding *bond, __be32 ip) +static bool bond_has_this_ip(struct bonding *bond, __be32 ip)  { -	struct vlan_entry *vlan; +	struct net_device *upper; +	struct list_head *iter; +	bool ret = false; -	if (ip == bond->master_ip) -		return 1; +	if (ip == bond_confirm_addr(bond->dev, 0, ip)) +		return true; -	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { -		if (ip == vlan->vlan_ip) -			return 1; +	rcu_read_lock(); +	netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) { +		if (ip == bond_confirm_addr(upper, 0, ip)) { +			ret = true; +			break; +		}  	} +	rcu_read_unlock(); -	return 0; +	return ret;  }  /* @@ -2566,200 +2163,191 @@ static int bond_has_this_ip(struct bonding *bond, __be32 ip)   * switches in VLAN mode (especially if ports are configured as   * "native" to a VLAN) might not pass non-tagged frames.   */ -static void bond_arp_send(struct net_device *slave_dev, int arp_op, __be32 dest_ip, __be32 src_ip, unsigned short vlan_id) +static void bond_arp_send(struct net_device *slave_dev, int arp_op, +			  __be32 dest_ip, __be32 src_ip, +			  struct bond_vlan_tag *tags)  {  	struct sk_buff *skb; +	int i; -	pr_debug("arp %d on slave %s: dst %x src %x vid %d\n", arp_op, -		 slave_dev->name, dest_ip, src_ip, vlan_id); +	pr_debug("arp %d on slave %s: dst %pI4 src %pI4\n", +		 arp_op, slave_dev->name, &dest_ip, &src_ip);  	skb = arp_create(arp_op, ETH_P_ARP, dest_ip, slave_dev, src_ip,  			 NULL, slave_dev->dev_addr, NULL);  	if (!skb) { -		pr_err("ARP packet allocation failed\n"); +		net_err_ratelimited("ARP packet allocation failed\n");  		return;  	} -	if (vlan_id) { -		skb = vlan_put_tag(skb, vlan_id); + +	/* Go through all the tags backwards and add them to the packet */ +	for (i = BOND_MAX_VLAN_ENCAP - 1; i > 0; i--) { +		if (!tags[i].vlan_id) +			continue; + +		pr_debug("inner tag: proto %X vid %X\n", +			 ntohs(tags[i].vlan_proto), tags[i].vlan_id); +		skb = __vlan_put_tag(skb, tags[i].vlan_proto, +				     tags[i].vlan_id);  		if (!skb) { -			pr_err("failed to insert VLAN tag\n"); +			net_err_ratelimited("failed to insert inner VLAN tag\n"); +			return; +		} +	} +	/* Set the outer tag */ +	if (tags[0].vlan_id) { +		pr_debug("outer tag: proto %X vid %X\n", +			 ntohs(tags[0].vlan_proto), tags[0].vlan_id); +		skb = vlan_put_tag(skb, tags[0].vlan_proto, tags[0].vlan_id); +		if (!skb) { +			net_err_ratelimited("failed to insert outer VLAN tag\n");  			return;  		}  	}  	arp_xmit(skb);  } - -static void bond_arp_send_all(struct bonding *bond, struct slave *slave) +/* Validate the device path between the @start_dev and the @end_dev. + * The path is valid if the @end_dev is reachable through device + * stacking. + * When the path is validated, collect any vlan information in the + * path. + */ +bool bond_verify_device_path(struct net_device *start_dev, +			     struct net_device *end_dev, +			     struct bond_vlan_tag *tags)  { -	int i, vlan_id, rv; -	__be32 *targets = bond->params.arp_targets; -	struct vlan_entry *vlan; -	struct net_device *vlan_dev; -	struct flowi fl; -	struct rtable *rt; +	struct net_device *upper; +	struct list_head  *iter; +	int  idx; -	for (i = 0; (i < BOND_MAX_ARP_TARGETS); i++) { -		if (!targets[i]) -			break; -		pr_debug("basa: target %x\n", targets[i]); -		if (!bond->vlgrp) { -			pr_debug("basa: empty vlan: arp_send\n"); -			bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], -				      bond->master_ip, 0); -			continue; -		} +	if (start_dev == end_dev) +		return true; -		/* -		 * If VLANs are configured, we do a route lookup to -		 * determine which VLAN interface would be used, so we -		 * can tag the ARP with the proper VLAN tag. -		 */ -		memset(&fl, 0, sizeof(fl)); -		fl.fl4_dst = targets[i]; -		fl.fl4_tos = RTO_ONLINK; +	netdev_for_each_upper_dev_rcu(start_dev, upper, iter) { +		if (bond_verify_device_path(upper, end_dev, tags)) { +			if (is_vlan_dev(upper)) { +				idx = vlan_get_encap_level(upper); +				if (idx >= BOND_MAX_VLAN_ENCAP) +					return false; -		rv = ip_route_output_key(dev_net(bond->dev), &rt, &fl); -		if (rv) { -			if (net_ratelimit()) { -				pr_warning("%s: no route to arp_ip_target %pI4\n", -					   bond->dev->name, &fl.fl4_dst); +				tags[idx].vlan_proto = +						    vlan_dev_vlan_proto(upper); +				tags[idx].vlan_id = vlan_dev_vlan_id(upper);  			} -			continue; -		} - -		/* -		 * This target is not on a VLAN -		 */ -		if (rt->dst.dev == bond->dev) { -			ip_rt_put(rt); -			pr_debug("basa: rtdev == bond->dev: arp_send\n"); -			bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], -				      bond->master_ip, 0); -			continue; +			return true;  		} +	} -		vlan_id = 0; -		list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { -			vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); -			if (vlan_dev == rt->dst.dev) { -				vlan_id = vlan->vlan_id; -				pr_debug("basa: vlan match on %s %d\n", -				       vlan_dev->name, vlan_id); -				break; -			} -		} +	return false; +} -		if (vlan_id) { -			ip_rt_put(rt); +static void bond_arp_send_all(struct bonding *bond, struct slave *slave) +{ +	struct rtable *rt; +	struct bond_vlan_tag tags[BOND_MAX_VLAN_ENCAP]; +	__be32 *targets = bond->params.arp_targets, addr; +	int i; +	bool ret; + +	for (i = 0; i < BOND_MAX_ARP_TARGETS && targets[i]; i++) { +		pr_debug("basa: target %pI4\n", &targets[i]); +		memset(tags, 0, sizeof(tags)); + +		/* Find out through which dev should the packet go */ +		rt = ip_route_output(dev_net(bond->dev), targets[i], 0, +				     RTO_ONLINK, 0); +		if (IS_ERR(rt)) { +			/* there's no route to target - try to send arp +			 * probe to generate any traffic (arp_validate=0) +			 */ +			if (bond->params.arp_validate) +				net_warn_ratelimited("%s: no route to arp_ip_target %pI4 and arp_validate is set\n", +						     bond->dev->name, +						     &targets[i]);  			bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], -				      vlan->vlan_ip, vlan_id); +				      0, tags);  			continue;  		} -		if (net_ratelimit()) { -			pr_warning("%s: no path to arp_ip_target %pI4 via rt.dev %s\n", -				   bond->dev->name, &fl.fl4_dst, -				   rt->dst.dev ? rt->dst.dev->name : "NULL"); -		} -		ip_rt_put(rt); -	} -} - -/* - * Kick out a gratuitous ARP for an IP on the bonding master plus one - * for each VLAN above us. - * - * Caller must hold curr_slave_lock for read or better - */ -static void bond_send_gratuitous_arp(struct bonding *bond) -{ -	struct slave *slave = bond->curr_active_slave; -	struct vlan_entry *vlan; -	struct net_device *vlan_dev; - -	pr_debug("bond_send_grat_arp: bond %s slave %s\n", -		 bond->dev->name, slave ? slave->dev->name : "NULL"); +		/* bond device itself */ +		if (rt->dst.dev == bond->dev) +			goto found; -	if (!slave || !bond->send_grat_arp || -	    test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state)) -		return; +		rcu_read_lock(); +		ret = bond_verify_device_path(bond->dev, rt->dst.dev, tags); +		rcu_read_unlock(); -	bond->send_grat_arp--; +		if (ret) +			goto found; -	if (bond->master_ip) { -		bond_arp_send(slave->dev, ARPOP_REPLY, bond->master_ip, -				bond->master_ip, 0); -	} +		/* Not our device - skip */ +		pr_debug("%s: no path to arp_ip_target %pI4 via rt.dev %s\n", +			 bond->dev->name, &targets[i], +			 rt->dst.dev ? rt->dst.dev->name : "NULL"); -	if (!bond->vlgrp) -		return; +		ip_rt_put(rt); +		continue; -	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { -		vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); -		if (vlan->vlan_ip) { -			bond_arp_send(slave->dev, ARPOP_REPLY, vlan->vlan_ip, -				      vlan->vlan_ip, vlan->vlan_id); -		} +found: +		addr = bond_confirm_addr(rt->dst.dev, targets[i], 0); +		ip_rt_put(rt); +		bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], +			      addr, tags);  	}  }  static void bond_validate_arp(struct bonding *bond, struct slave *slave, __be32 sip, __be32 tip)  {  	int i; -	__be32 *targets = bond->params.arp_targets; - -	for (i = 0; (i < BOND_MAX_ARP_TARGETS) && targets[i]; i++) { -		pr_debug("bva: sip %pI4 tip %pI4 t[%d] %pI4 bhti(tip) %d\n", -			 &sip, &tip, i, &targets[i], -			 bond_has_this_ip(bond, tip)); -		if (sip == targets[i]) { -			if (bond_has_this_ip(bond, tip)) -				slave->last_arp_rx = jiffies; -			return; -		} + +	if (!sip || !bond_has_this_ip(bond, tip)) { +		pr_debug("bva: sip %pI4 tip %pI4 not found\n", &sip, &tip); +		return; +	} + +	i = bond_get_targets_ip(bond->params.arp_targets, sip); +	if (i == -1) { +		pr_debug("bva: sip %pI4 not found in targets\n", &sip); +		return;  	} +	slave->last_rx = jiffies; +	slave->target_last_arp_rx[i] = jiffies;  } -static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) +int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, +		 struct slave *slave)  { -	struct arphdr *arp; -	struct slave *slave; -	struct bonding *bond; +	struct arphdr *arp = (struct arphdr *)skb->data; +	struct slave *curr_active_slave;  	unsigned char *arp_ptr;  	__be32 sip, tip; +	int alen, is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP); -	if (dev->priv_flags & IFF_802_1Q_VLAN) { -		/* -		 * When using VLANS and bonding, dev and oriv_dev may be -		 * incorrect if the physical interface supports VLAN -		 * acceleration.  With this change ARP validation now -		 * works for hosts only reachable on the VLAN interface. -		 */ -		dev = vlan_dev_real_dev(dev); -		orig_dev = dev_get_by_index_rcu(dev_net(skb->dev),skb->skb_iif); +	if (!slave_do_arp_validate(bond, slave)) { +		if ((slave_do_arp_validate_only(bond) && is_arp) || +		    !slave_do_arp_validate_only(bond)) +			slave->last_rx = jiffies; +		return RX_HANDLER_ANOTHER; +	} else if (!is_arp) { +		return RX_HANDLER_ANOTHER;  	} -	if (!(dev->priv_flags & IFF_BONDING) || !(dev->flags & IFF_MASTER)) -		goto out; +	alen = arp_hdr_len(bond->dev); -	bond = netdev_priv(dev); -	read_lock(&bond->lock); - -	pr_debug("bond_arp_rcv: bond %s skb->dev %s orig_dev %s\n", -		 bond->dev->name, skb->dev ? skb->dev->name : "NULL", -		 orig_dev ? orig_dev->name : "NULL"); - -	slave = bond_get_slave_by_dev(bond, orig_dev); -	if (!slave || !slave_do_arp_validate(bond, slave)) -		goto out_unlock; +	pr_debug("bond_arp_rcv: bond %s skb->dev %s\n", +		 bond->dev->name, skb->dev->name); -	if (!pskb_may_pull(skb, arp_hdr_len(dev))) -		goto out_unlock; +	if (alen > skb_headlen(skb)) { +		arp = kmalloc(alen, GFP_ATOMIC); +		if (!arp) +			goto out_unlock; +		if (skb_copy_bits(skb, 0, arp, alen) < 0) +			goto out_unlock; +	} -	arp = arp_hdr(skb); -	if (arp->ar_hln != dev->addr_len || +	if (arp->ar_hln != bond->dev->addr_len ||  	    skb->pkt_type == PACKET_OTHERHOST ||  	    skb->pkt_type == PACKET_LOOPBACK ||  	    arp->ar_hrd != htons(ARPHRD_ETHER) || @@ -2768,16 +2356,18 @@ static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct pack  		goto out_unlock;  	arp_ptr = (unsigned char *)(arp + 1); -	arp_ptr += dev->addr_len; +	arp_ptr += bond->dev->addr_len;  	memcpy(&sip, arp_ptr, 4); -	arp_ptr += 4 + dev->addr_len; +	arp_ptr += 4 + bond->dev->addr_len;  	memcpy(&tip, arp_ptr, 4);  	pr_debug("bond_arp_rcv: %s %s/%d av %d sv %d sip %pI4 tip %pI4\n", -		 bond->dev->name, slave->dev->name, slave->state, +		 bond->dev->name, slave->dev->name, bond_slave_state(slave),  		 bond->params.arp_validate, slave_do_arp_validate(bond, slave),  		 &sip, &tip); +	curr_active_slave = rcu_dereference(bond->curr_active_slave); +  	/*  	 * Backup slaves won't see the ARP reply, but do come through  	 * here for each ARP probe (so we swap the sip/tip to validate @@ -2785,17 +2375,38 @@ static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct pack  	 * configuration, the ARP probe will (hopefully) travel from  	 * the active, through one switch, the router, then the other  	 * switch before reaching the backup. +	 * +	 * We 'trust' the arp requests if there is an active slave and +	 * it received valid arp reply(s) after it became active. This +	 * is done to avoid endless looping when we can't reach the +	 * arp_ip_target and fool ourselves with our own arp requests.  	 */ -	if (slave->state == BOND_STATE_ACTIVE) + +	if (bond_is_active_slave(slave))  		bond_validate_arp(bond, slave, sip, tip); -	else +	else if (curr_active_slave && +		 time_after(slave_last_rx(bond, curr_active_slave), +			    curr_active_slave->last_link_up))  		bond_validate_arp(bond, slave, tip, sip);  out_unlock: -	read_unlock(&bond->lock); -out: -	dev_kfree_skb(skb); -	return NET_RX_SUCCESS; +	if (arp != (struct arphdr *)skb->data) +		kfree(arp); +	return RX_HANDLER_ANOTHER; +} + +/* function to verify if we're in the arp_interval timeslice, returns true if + * (last_act - arp_interval) <= jiffies <= (last_act + mod * arp_interval + + * arp_interval/2) . the arp_interval/2 is needed for really fast networks. + */ +static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act, +				  int mod) +{ +	int delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); + +	return time_in_range(jiffies, +			     last_act - delta_in_ticks, +			     last_act + mod * delta_in_ticks + delta_in_ticks/2);  }  /* @@ -2805,50 +2416,37 @@ out:   * arp is transmitted to generate traffic. see activebackup_arp_monitor for   * arp monitoring in active backup mode.   */ -void bond_loadbalance_arp_mon(struct work_struct *work) +static void bond_loadbalance_arp_mon(struct work_struct *work)  {  	struct bonding *bond = container_of(work, struct bonding,  					    arp_work.work);  	struct slave *slave, *oldcurrent; -	int do_failover = 0; -	int delta_in_ticks; -	int i; - -	read_lock(&bond->lock); - -	delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); - -	if (bond->kill_timers) -		goto out; +	struct list_head *iter; +	int do_failover = 0, slave_state_changed = 0; -	if (bond->slave_cnt == 0) +	if (!bond_has_slaves(bond))  		goto re_arm; -	read_lock(&bond->curr_slave_lock); -	oldcurrent = bond->curr_active_slave; -	read_unlock(&bond->curr_slave_lock); +	rcu_read_lock(); +	oldcurrent = ACCESS_ONCE(bond->curr_active_slave);  	/* see if any of the previous devices are up now (i.e. they have  	 * xmt and rcv traffic). the curr_active_slave does not come into -	 * the picture unless it is null. also, slave->jiffies is not needed -	 * here because we send an arp on each slave and give a slave as -	 * long as it needs to get the tx/rx within the delta. +	 * the picture unless it is null. also, slave->last_link_up is not +	 * needed here because we send an arp on each slave and give a slave +	 * as long as it needs to get the tx/rx within the delta.  	 * TODO: what about up/down delay in arp mode? it wasn't here before  	 *       so it can wait  	 */ -	bond_for_each_slave(bond, slave, i) { +	bond_for_each_slave_rcu(bond, slave, iter) {  		unsigned long trans_start = dev_trans_start(slave->dev);  		if (slave->link != BOND_LINK_UP) { -			if (time_in_range(jiffies, -				trans_start - delta_in_ticks, -				trans_start + delta_in_ticks) && -			    time_in_range(jiffies, -				slave->dev->last_rx - delta_in_ticks, -				slave->dev->last_rx + delta_in_ticks)) { +			if (bond_time_in_interval(bond, trans_start, 1) && +			    bond_time_in_interval(bond, slave->last_rx, 1)) {  				slave->link  = BOND_LINK_UP; -				slave->state = BOND_STATE_ACTIVE; +				slave_state_changed = 1;  				/* primary_slave has no meaning in round-robin  				 * mode. the window of a slave being up and @@ -2856,7 +2454,7 @@ void bond_loadbalance_arp_mon(struct work_struct *work)  				 * is closed.  				 */  				if (!oldcurrent) { -					pr_info("%s: link status definitely up for interface %s, ", +					pr_info("%s: link status definitely up for interface %s\n",  						bond->dev->name,  						slave->dev->name);  					do_failover = 1; @@ -2873,22 +2471,17 @@ void bond_loadbalance_arp_mon(struct work_struct *work)  			 * when the source ip is 0, so don't take the link down  			 * if we don't know our ip yet  			 */ -			if (!time_in_range(jiffies, -				trans_start - delta_in_ticks, -				trans_start + 2 * delta_in_ticks) || -			    !time_in_range(jiffies, -				slave->dev->last_rx - delta_in_ticks, -				slave->dev->last_rx + 2 * delta_in_ticks)) { +			if (!bond_time_in_interval(bond, trans_start, 2) || +			    !bond_time_in_interval(bond, slave->last_rx, 2)) {  				slave->link  = BOND_LINK_DOWN; -				slave->state = BOND_STATE_BACKUP; +				slave_state_changed = 1;  				if (slave->link_failure_count < UINT_MAX)  					slave->link_failure_count++; -				pr_info("%s: interface %s is now down.\n", -					bond->dev->name, -					slave->dev->name); +				pr_info("%s: interface %s is now down\n", +					bond->dev->name, slave->dev->name);  				if (slave == oldcurrent)  					do_failover = 1; @@ -2902,25 +2495,37 @@ void bond_loadbalance_arp_mon(struct work_struct *work)  		 * do - all replies will be rx'ed on same link causing slaves  		 * to be unstable during low/no traffic periods  		 */ -		if (IS_UP(slave->dev)) +		if (bond_slave_is_up(slave))  			bond_arp_send_all(bond, slave);  	} -	if (do_failover) { -		block_netpoll_tx(); -		write_lock_bh(&bond->curr_slave_lock); +	rcu_read_unlock(); -		bond_select_active_slave(bond); +	if (do_failover || slave_state_changed) { +		if (!rtnl_trylock()) +			goto re_arm; -		write_unlock_bh(&bond->curr_slave_lock); -		unblock_netpoll_tx(); +		if (slave_state_changed) { +			bond_slave_state_change(bond); +		} else if (do_failover) { +			/* the bond_select_active_slave must hold RTNL +			 * and curr_slave_lock for write. +			 */ +			block_netpoll_tx(); +			write_lock_bh(&bond->curr_slave_lock); + +			bond_select_active_slave(bond); + +			write_unlock_bh(&bond->curr_slave_lock); +			unblock_netpoll_tx(); +		} +		rtnl_unlock();  	}  re_arm:  	if (bond->params.arp_interval) -		queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); -out: -	read_unlock(&bond->lock); +		queue_delayed_work(bond->wq, &bond->arp_work, +				   msecs_to_jiffies(bond->params.arp_interval));  }  /* @@ -2929,26 +2534,24 @@ out:   * place for the slave.  Returns 0 if no changes are found, >0 if changes   * to link states must be committed.   * - * Called with bond->lock held for read. + * Called with rcu_read_lock hold.   */ -static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks) +static int bond_ab_arp_inspect(struct bonding *bond)  { +	unsigned long trans_start, last_rx; +	struct list_head *iter;  	struct slave *slave; -	int i, commit = 0; -	unsigned long trans_start; +	int commit = 0; -	bond_for_each_slave(bond, slave, i) { +	bond_for_each_slave_rcu(bond, slave, iter) {  		slave->new_link = BOND_LINK_NOCHANGE; +		last_rx = slave_last_rx(bond, slave);  		if (slave->link != BOND_LINK_UP) { -			if (time_in_range(jiffies, -				slave_last_rx(bond, slave) - delta_in_ticks, -				slave_last_rx(bond, slave) + delta_in_ticks)) { - +			if (bond_time_in_interval(bond, last_rx, 1)) {  				slave->new_link = BOND_LINK_UP;  				commit++;  			} -  			continue;  		} @@ -2957,9 +2560,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)  		 * active.  This avoids bouncing, as the last receive  		 * times need a full ARP monitor cycle to be updated.  		 */ -		if (time_in_range(jiffies, -				  slave->jiffies - delta_in_ticks, -				  slave->jiffies + 2 * delta_in_ticks)) +		if (bond_time_in_interval(bond, slave->last_link_up, 2))  			continue;  		/* @@ -2975,12 +2576,9 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)  		 * gives each slave a chance to tx/rx traffic  		 * before being taken out  		 */ -		if (slave->state == BOND_STATE_BACKUP && +		if (!bond_is_active_slave(slave) &&  		    !bond->current_arp_slave && -		    !time_in_range(jiffies, -			slave_last_rx(bond, slave) - delta_in_ticks, -			slave_last_rx(bond, slave) + 3 * delta_in_ticks)) { - +		    !bond_time_in_interval(bond, last_rx, 3)) {  			slave->new_link = BOND_LINK_DOWN;  			commit++;  		} @@ -2992,14 +2590,9 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)  		 *    the bond has an IP address)  		 */  		trans_start = dev_trans_start(slave->dev); -		if ((slave->state == BOND_STATE_ACTIVE) && -		    (!time_in_range(jiffies, -			trans_start - delta_in_ticks, -			trans_start + 2 * delta_in_ticks) || -		     !time_in_range(jiffies, -			slave_last_rx(bond, slave) - delta_in_ticks, -			slave_last_rx(bond, slave) + 2 * delta_in_ticks))) { - +		if (bond_is_active_slave(slave) && +		    (!bond_time_in_interval(bond, trans_start, 2) || +		     !bond_time_in_interval(bond, last_rx, 2))) {  			slave->new_link = BOND_LINK_DOWN;  			commit++;  		} @@ -3012,30 +2605,33 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)   * Called to commit link state changes noted by inspection step of   * active-backup mode ARP monitor.   * - * Called with RTNL and bond->lock for read. + * Called with RTNL hold.   */ -static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks) +static void bond_ab_arp_commit(struct bonding *bond)  { -	struct slave *slave; -	int i;  	unsigned long trans_start; +	struct list_head *iter; +	struct slave *slave; -	bond_for_each_slave(bond, slave, i) { +	bond_for_each_slave(bond, slave, iter) {  		switch (slave->new_link) {  		case BOND_LINK_NOCHANGE:  			continue;  		case BOND_LINK_UP:  			trans_start = dev_trans_start(slave->dev); -			if ((!bond->curr_active_slave && -			     time_in_range(jiffies, -					   trans_start - delta_in_ticks, -					   trans_start + delta_in_ticks)) || -			    bond->curr_active_slave != slave) { +			if (bond->curr_active_slave != slave || +			    (!bond->curr_active_slave && +			     bond_time_in_interval(bond, trans_start, 1))) {  				slave->link = BOND_LINK_UP; -				bond->current_arp_slave = NULL; +				if (bond->current_arp_slave) { +					bond_set_slave_inactive_flags( +						bond->current_arp_slave, +						BOND_SLAVE_NOTIFY_NOW); +					bond->current_arp_slave = NULL; +				} -				pr_info("%s: link status definitely up for interface %s.\n", +				pr_info("%s: link status definitely up for interface %s\n",  					bond->dev->name, slave->dev->name);  				if (!bond->curr_active_slave || @@ -3051,7 +2647,8 @@ static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks)  				slave->link_failure_count++;  			slave->link = BOND_LINK_DOWN; -			bond_set_slave_inactive_flags(slave); +			bond_set_slave_inactive_flags(slave, +						      BOND_SLAVE_NOTIFY_NOW);  			pr_info("%s: link status definitely down for interface %s, disabling it\n",  				bond->dev->name, slave->dev->name); @@ -3085,52 +2682,46 @@ do_failover:  /*   * Send ARP probes for active-backup mode ARP monitor.   * - * Called with bond->lock held for read. + * Called with rcu_read_lock hold.   */ -static void bond_ab_arp_probe(struct bonding *bond) +static bool bond_ab_arp_probe(struct bonding *bond)  { -	struct slave *slave; -	int i; - -	read_lock(&bond->curr_slave_lock); - -	if (bond->current_arp_slave && bond->curr_active_slave) +	struct slave *slave, *before = NULL, *new_slave = NULL, +		     *curr_arp_slave = rcu_dereference(bond->current_arp_slave), +		     *curr_active_slave = rcu_dereference(bond->curr_active_slave); +	struct list_head *iter; +	bool found = false; +	bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER; + +	if (curr_arp_slave && curr_active_slave)  		pr_info("PROBE: c_arp %s && cas %s BAD\n", -			bond->current_arp_slave->dev->name, -			bond->curr_active_slave->dev->name); +			curr_arp_slave->dev->name, +			curr_active_slave->dev->name); -	if (bond->curr_active_slave) { -		bond_arp_send_all(bond, bond->curr_active_slave); -		read_unlock(&bond->curr_slave_lock); -		return; +	if (curr_active_slave) { +		bond_arp_send_all(bond, curr_active_slave); +		return should_notify_rtnl;  	} -	read_unlock(&bond->curr_slave_lock); -  	/* if we don't have a curr_active_slave, search for the next available  	 * backup slave from the current_arp_slave and make it the candidate  	 * for becoming the curr_active_slave  	 */ -	if (!bond->current_arp_slave) { -		bond->current_arp_slave = bond->first_slave; -		if (!bond->current_arp_slave) -			return; +	if (!curr_arp_slave) { +		curr_arp_slave = bond_first_slave_rcu(bond); +		if (!curr_arp_slave) +			return should_notify_rtnl;  	} -	bond_set_slave_inactive_flags(bond->current_arp_slave); +	bond_set_slave_inactive_flags(curr_arp_slave, BOND_SLAVE_NOTIFY_LATER); -	/* search for next candidate */ -	bond_for_each_slave_from(bond, slave, i, bond->current_arp_slave->next) { -		if (IS_UP(slave->dev)) { -			slave->link = BOND_LINK_BACK; -			bond_set_slave_active_flags(slave); -			bond_arp_send_all(bond, slave); -			slave->jiffies = jiffies; -			bond->current_arp_slave = slave; -			break; -		} +	bond_for_each_slave_rcu(bond, slave, iter) { +		if (!found && !before && bond_slave_is_up(slave)) +			before = slave; +		if (found && !new_slave && bond_slave_is_up(slave)) +			new_slave = slave;  		/* if the link state is up at this point, we  		 * mark it down - this can happen if we have  		 * simultaneous link failures and @@ -3138,361 +2729,97 @@ static void bond_ab_arp_probe(struct bonding *bond)  		 * one the current slave so it is still marked  		 * up when it is actually down  		 */ -		if (slave->link == BOND_LINK_UP) { +		if (!bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) {  			slave->link = BOND_LINK_DOWN;  			if (slave->link_failure_count < UINT_MAX)  				slave->link_failure_count++; -			bond_set_slave_inactive_flags(slave); +			bond_set_slave_inactive_flags(slave, +						      BOND_SLAVE_NOTIFY_LATER); -			pr_info("%s: backup interface %s is now down.\n", +			pr_info("%s: backup interface %s is now down\n",  				bond->dev->name, slave->dev->name);  		} +		if (slave == curr_arp_slave) +			found = true;  	} + +	if (!new_slave && before) +		new_slave = before; + +	if (!new_slave) +		goto check_state; + +	new_slave->link = BOND_LINK_BACK; +	bond_set_slave_active_flags(new_slave, BOND_SLAVE_NOTIFY_LATER); +	bond_arp_send_all(bond, new_slave); +	new_slave->last_link_up = jiffies; +	rcu_assign_pointer(bond->current_arp_slave, new_slave); + +check_state: +	bond_for_each_slave_rcu(bond, slave, iter) { +		if (slave->should_notify) { +			should_notify_rtnl = BOND_SLAVE_NOTIFY_NOW; +			break; +		} +	} +	return should_notify_rtnl;  } -void bond_activebackup_arp_mon(struct work_struct *work) +static void bond_activebackup_arp_mon(struct work_struct *work)  {  	struct bonding *bond = container_of(work, struct bonding,  					    arp_work.work); +	bool should_notify_peers = false; +	bool should_notify_rtnl = false;  	int delta_in_ticks; -	read_lock(&bond->lock); - -	if (bond->kill_timers) -		goto out; -  	delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); -	if (bond->slave_cnt == 0) +	if (!bond_has_slaves(bond))  		goto re_arm; -	if (bond->send_grat_arp) { -		read_lock(&bond->curr_slave_lock); -		bond_send_gratuitous_arp(bond); -		read_unlock(&bond->curr_slave_lock); -	} +	rcu_read_lock(); -	if (bond->send_unsol_na) { -		read_lock(&bond->curr_slave_lock); -		bond_send_unsolicited_na(bond); -		read_unlock(&bond->curr_slave_lock); -	} +	should_notify_peers = bond_should_notify_peers(bond); -	if (bond_ab_arp_inspect(bond, delta_in_ticks)) { -		read_unlock(&bond->lock); -		rtnl_lock(); -		read_lock(&bond->lock); +	if (bond_ab_arp_inspect(bond)) { +		rcu_read_unlock(); -		bond_ab_arp_commit(bond, delta_in_ticks); +		/* Race avoidance with bond_close flush of workqueue */ +		if (!rtnl_trylock()) { +			delta_in_ticks = 1; +			should_notify_peers = false; +			goto re_arm; +		} + +		bond_ab_arp_commit(bond); -		read_unlock(&bond->lock);  		rtnl_unlock(); -		read_lock(&bond->lock); +		rcu_read_lock();  	} -	bond_ab_arp_probe(bond); +	should_notify_rtnl = bond_ab_arp_probe(bond); +	rcu_read_unlock();  re_arm:  	if (bond->params.arp_interval)  		queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); -out: -	read_unlock(&bond->lock); -} - -/*------------------------------ proc/seq_file-------------------------------*/ - -#ifdef CONFIG_PROC_FS - -static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos) -	__acquires(RCU) -	__acquires(&bond->lock) -{ -	struct bonding *bond = seq->private; -	loff_t off = 0; -	struct slave *slave; -	int i; - -	/* make sure the bond won't be taken away */ -	rcu_read_lock(); -	read_lock(&bond->lock); - -	if (*pos == 0) -		return SEQ_START_TOKEN; - -	bond_for_each_slave(bond, slave, i) { -		if (++off == *pos) -			return slave; -	} - -	return NULL; -} - -static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ -	struct bonding *bond = seq->private; -	struct slave *slave = v; - -	++*pos; -	if (v == SEQ_START_TOKEN) -		return bond->first_slave; - -	slave = slave->next; - -	return (slave == bond->first_slave) ? NULL : slave; -} - -static void bond_info_seq_stop(struct seq_file *seq, void *v) -	__releases(&bond->lock) -	__releases(RCU) -{ -	struct bonding *bond = seq->private; - -	read_unlock(&bond->lock); -	rcu_read_unlock(); -} - -static void bond_info_show_master(struct seq_file *seq) -{ -	struct bonding *bond = seq->private; -	struct slave *curr; -	int i; -	read_lock(&bond->curr_slave_lock); -	curr = bond->curr_active_slave; -	read_unlock(&bond->curr_slave_lock); - -	seq_printf(seq, "Bonding Mode: %s", -		   bond_mode_name(bond->params.mode)); - -	if (bond->params.mode == BOND_MODE_ACTIVEBACKUP && -	    bond->params.fail_over_mac) -		seq_printf(seq, " (fail_over_mac %s)", -		   fail_over_mac_tbl[bond->params.fail_over_mac].modename); - -	seq_printf(seq, "\n"); - -	if (bond->params.mode == BOND_MODE_XOR || -		bond->params.mode == BOND_MODE_8023AD) { -		seq_printf(seq, "Transmit Hash Policy: %s (%d)\n", -			xmit_hashtype_tbl[bond->params.xmit_policy].modename, -			bond->params.xmit_policy); -	} - -	if (USES_PRIMARY(bond->params.mode)) { -		seq_printf(seq, "Primary Slave: %s", -			   (bond->primary_slave) ? -			   bond->primary_slave->dev->name : "None"); -		if (bond->primary_slave) -			seq_printf(seq, " (primary_reselect %s)", -		   pri_reselect_tbl[bond->params.primary_reselect].modename); - -		seq_printf(seq, "\nCurrently Active Slave: %s\n", -			   (curr) ? curr->dev->name : "None"); -	} - -	seq_printf(seq, "MII Status: %s\n", netif_carrier_ok(bond->dev) ? -		   "up" : "down"); -	seq_printf(seq, "MII Polling Interval (ms): %d\n", bond->params.miimon); -	seq_printf(seq, "Up Delay (ms): %d\n", -		   bond->params.updelay * bond->params.miimon); -	seq_printf(seq, "Down Delay (ms): %d\n", -		   bond->params.downdelay * bond->params.miimon); - - -	/* ARP information */ -	if (bond->params.arp_interval > 0) { -		int printed = 0; -		seq_printf(seq, "ARP Polling Interval (ms): %d\n", -				bond->params.arp_interval); - -		seq_printf(seq, "ARP IP target/s (n.n.n.n form):"); - -		for (i = 0; (i < BOND_MAX_ARP_TARGETS); i++) { -			if (!bond->params.arp_targets[i]) -				break; -			if (printed) -				seq_printf(seq, ","); -			seq_printf(seq, " %pI4", &bond->params.arp_targets[i]); -			printed = 1; -		} -		seq_printf(seq, "\n"); -	} - -	if (bond->params.mode == BOND_MODE_8023AD) { -		struct ad_info ad_info; - -		seq_puts(seq, "\n802.3ad info\n"); -		seq_printf(seq, "LACP rate: %s\n", -			   (bond->params.lacp_fast) ? "fast" : "slow"); -		seq_printf(seq, "Aggregator selection policy (ad_select): %s\n", -			   ad_select_tbl[bond->params.ad_select].modename); - -		if (bond_3ad_get_active_agg_info(bond, &ad_info)) { -			seq_printf(seq, "bond %s has no active aggregator\n", -				   bond->dev->name); -		} else { -			seq_printf(seq, "Active Aggregator Info:\n"); - -			seq_printf(seq, "\tAggregator ID: %d\n", -				   ad_info.aggregator_id); -			seq_printf(seq, "\tNumber of ports: %d\n", -				   ad_info.ports); -			seq_printf(seq, "\tActor Key: %d\n", -				   ad_info.actor_key); -			seq_printf(seq, "\tPartner Key: %d\n", -				   ad_info.partner_key); -			seq_printf(seq, "\tPartner Mac Address: %pM\n", -				   ad_info.partner_system); -		} -	} -} - -static void bond_info_show_slave(struct seq_file *seq, -				 const struct slave *slave) -{ -	struct bonding *bond = seq->private; - -	seq_printf(seq, "\nSlave Interface: %s\n", slave->dev->name); -	seq_printf(seq, "MII Status: %s\n", -		   (slave->link == BOND_LINK_UP) ?  "up" : "down"); -	seq_printf(seq, "Speed: %d Mbps\n", slave->speed); -	seq_printf(seq, "Duplex: %s\n", slave->duplex ? "full" : "half"); -	seq_printf(seq, "Link Failure Count: %u\n", -		   slave->link_failure_count); - -	seq_printf(seq, "Permanent HW addr: %pM\n", slave->perm_hwaddr); - -	if (bond->params.mode == BOND_MODE_8023AD) { -		const struct aggregator *agg -			= SLAVE_AD_INFO(slave).port.aggregator; - -		if (agg) -			seq_printf(seq, "Aggregator ID: %d\n", -				   agg->aggregator_identifier); -		else -			seq_puts(seq, "Aggregator ID: N/A\n"); -	} -	seq_printf(seq, "Slave queue ID: %d\n", slave->queue_id); -} - -static int bond_info_seq_show(struct seq_file *seq, void *v) -{ -	if (v == SEQ_START_TOKEN) { -		seq_printf(seq, "%s\n", version); -		bond_info_show_master(seq); -	} else -		bond_info_show_slave(seq, v); - -	return 0; -} - -static const struct seq_operations bond_info_seq_ops = { -	.start = bond_info_seq_start, -	.next  = bond_info_seq_next, -	.stop  = bond_info_seq_stop, -	.show  = bond_info_seq_show, -}; - -static int bond_info_open(struct inode *inode, struct file *file) -{ -	struct seq_file *seq; -	struct proc_dir_entry *proc; -	int res; - -	res = seq_open(file, &bond_info_seq_ops); -	if (!res) { -		/* recover the pointer buried in proc_dir_entry data */ -		seq = file->private_data; -		proc = PDE(inode); -		seq->private = proc->data; -	} - -	return res; -} - -static const struct file_operations bond_info_fops = { -	.owner   = THIS_MODULE, -	.open    = bond_info_open, -	.read    = seq_read, -	.llseek  = seq_lseek, -	.release = seq_release, -}; - -static void bond_create_proc_entry(struct bonding *bond) -{ -	struct net_device *bond_dev = bond->dev; -	struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id); - -	if (bn->proc_dir) { -		bond->proc_entry = proc_create_data(bond_dev->name, -						    S_IRUGO, bn->proc_dir, -						    &bond_info_fops, bond); -		if (bond->proc_entry == NULL) -			pr_warning("Warning: Cannot create /proc/net/%s/%s\n", -				   DRV_NAME, bond_dev->name); -		else -			memcpy(bond->proc_file_name, bond_dev->name, IFNAMSIZ); -	} -} +	if (should_notify_peers || should_notify_rtnl) { +		if (!rtnl_trylock()) +			return; -static void bond_remove_proc_entry(struct bonding *bond) -{ -	struct net_device *bond_dev = bond->dev; -	struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id); +		if (should_notify_peers) +			call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, +						 bond->dev); +		if (should_notify_rtnl) +			bond_slave_state_notify(bond); -	if (bn->proc_dir && bond->proc_entry) { -		remove_proc_entry(bond->proc_file_name, bn->proc_dir); -		memset(bond->proc_file_name, 0, IFNAMSIZ); -		bond->proc_entry = NULL; -	} -} - -/* Create the bonding directory under /proc/net, if doesn't exist yet. - * Caller must hold rtnl_lock. - */ -static void __net_init bond_create_proc_dir(struct bond_net *bn) -{ -	if (!bn->proc_dir) { -		bn->proc_dir = proc_mkdir(DRV_NAME, bn->net->proc_net); -		if (!bn->proc_dir) -			pr_warning("Warning: cannot create /proc/net/%s\n", -				   DRV_NAME); -	} -} - -/* Destroy the bonding directory under /proc/net, if empty. - * Caller must hold rtnl_lock. - */ -static void __net_exit bond_destroy_proc_dir(struct bond_net *bn) -{ -	if (bn->proc_dir) { -		remove_proc_entry(DRV_NAME, bn->net->proc_net); -		bn->proc_dir = NULL; +		rtnl_unlock();  	}  } -#else /* !CONFIG_PROC_FS */ - -static void bond_create_proc_entry(struct bonding *bond) -{ -} - -static void bond_remove_proc_entry(struct bonding *bond) -{ -} - -static inline void bond_create_proc_dir(struct bond_net *bn) -{ -} - -static inline void bond_destroy_proc_dir(struct bond_net *bn) -{ -} - -#endif /* CONFIG_PROC_FS */ - -  /*-------------------------- netdev event handling --------------------------*/  /* @@ -3503,6 +2830,8 @@ static int bond_event_changename(struct bonding *bond)  	bond_remove_proc_entry(bond);  	bond_create_proc_entry(bond); +	bond_debug_reregister(bond); +  	return NOTIFY_DONE;  } @@ -3514,6 +2843,16 @@ static int bond_master_netdev_event(unsigned long event,  	switch (event) {  	case NETDEV_CHANGENAME:  		return bond_event_changename(event_bond); +	case NETDEV_UNREGISTER: +		bond_remove_proc_entry(event_bond); +		break; +	case NETDEV_REGISTER: +		bond_create_proc_entry(event_bond); +		break; +	case NETDEV_NOTIFY_PEERS: +		if (event_bond->send_peer_notif) +			event_bond->send_peer_notif--; +		break;  	default:  		break;  	} @@ -3524,39 +2863,41 @@ static int bond_master_netdev_event(unsigned long event,  static int bond_slave_netdev_event(unsigned long event,  				   struct net_device *slave_dev)  { -	struct net_device *bond_dev = slave_dev->master; -	struct bonding *bond = netdev_priv(bond_dev); +	struct slave *slave = bond_slave_get_rtnl(slave_dev); +	struct bonding *bond; +	struct net_device *bond_dev; +	u32 old_speed; +	u8 old_duplex; + +	/* A netdev event can be generated while enslaving a device +	 * before netdev_rx_handler_register is called in which case +	 * slave will be NULL +	 */ +	if (!slave) +		return NOTIFY_DONE; +	bond_dev = slave->bond->dev; +	bond = slave->bond;  	switch (event) {  	case NETDEV_UNREGISTER: -		if (bond_dev) { -			if (bond->setup_by_slave) -				bond_release_and_destroy(bond_dev, slave_dev); -			else -				bond_release(bond_dev, slave_dev); -		} +		if (bond_dev->type != ARPHRD_ETHER) +			bond_release_and_destroy(bond_dev, slave_dev); +		else +			bond_release(bond_dev, slave_dev);  		break; +	case NETDEV_UP:  	case NETDEV_CHANGE: -		if (bond->params.mode == BOND_MODE_8023AD || bond_is_lb(bond)) { -			struct slave *slave; - -			slave = bond_get_slave_by_dev(bond, slave_dev); -			if (slave) { -				u16 old_speed = slave->speed; -				u16 old_duplex = slave->duplex; - -				bond_update_speed_duplex(slave); +		old_speed = slave->speed; +		old_duplex = slave->duplex; -				if (bond_is_lb(bond)) -					break; +		bond_update_speed_duplex(slave); -				if (old_speed != slave->speed) -					bond_3ad_adapter_speed_changed(slave); -				if (old_duplex != slave->duplex) -					bond_3ad_adapter_duplex_changed(slave); -			} +		if (BOND_MODE(bond) == BOND_MODE_8023AD) { +			if (old_speed != slave->speed) +				bond_3ad_adapter_speed_changed(slave); +			if (old_duplex != slave->duplex) +				bond_3ad_adapter_duplex_changed(slave);  		} -  		break;  	case NETDEV_DOWN:  		/* @@ -3578,13 +2919,38 @@ static int bond_slave_netdev_event(unsigned long event,  		 */  		break;  	case NETDEV_CHANGENAME: -		/* -		 * TODO: handle changing the primary's name -		 */ +		/* we don't care if we don't have primary set */ +		if (!bond_uses_primary(bond) || +		    !bond->params.primary[0]) +			break; + +		if (slave == bond->primary_slave) { +			/* slave's name changed - he's no longer primary */ +			bond->primary_slave = NULL; +		} else if (!strcmp(slave_dev->name, bond->params.primary)) { +			/* we have a new primary slave */ +			bond->primary_slave = slave; +		} else { /* we didn't change primary - exit */ +			break; +		} + +		pr_info("%s: Primary slave changed to %s, reselecting active slave\n", +			bond->dev->name, +			bond->primary_slave ? slave_dev->name : "none"); + +		block_netpoll_tx(); +		write_lock_bh(&bond->curr_slave_lock); +		bond_select_active_slave(bond); +		write_unlock_bh(&bond->curr_slave_lock); +		unblock_netpoll_tx();  		break;  	case NETDEV_FEAT_CHANGE:  		bond_compute_features(bond);  		break; +	case NETDEV_RESEND_IGMP: +		/* Propagate to master device */ +		call_netdevice_notifiers(event, slave->bond->dev); +		break;  	default:  		break;  	} @@ -3603,11 +2969,10 @@ static int bond_slave_netdev_event(unsigned long event,  static int bond_netdev_event(struct notifier_block *this,  			     unsigned long event, void *ptr)  { -	struct net_device *event_dev = (struct net_device *)ptr; +	struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);  	pr_debug("event_dev: %s, event: %lx\n", -		 event_dev ? event_dev->name : "None", -		 event); +		 event_dev ? event_dev->name : "None", event);  	if (!(event_dev->priv_flags & IFF_BONDING))  		return NOTIFY_DONE; @@ -3625,209 +2990,163 @@ static int bond_netdev_event(struct notifier_block *this,  	return NOTIFY_DONE;  } -/* - * bond_inetaddr_event: handle inetaddr notifier chain events. - * - * We keep track of device IPs primarily to use as source addresses in - * ARP monitor probes (rather than spewing out broadcasts all the time). - * - * We track one IP for the main device (if it has one), plus one per VLAN. - */ -static int bond_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) -{ -	struct in_ifaddr *ifa = ptr; -	struct net_device *vlan_dev, *event_dev = ifa->ifa_dev->dev; -	struct bond_net *bn = net_generic(dev_net(event_dev), bond_net_id); -	struct bonding *bond; -	struct vlan_entry *vlan; - -	list_for_each_entry(bond, &bn->dev_list, bond_list) { -		if (bond->dev == event_dev) { -			switch (event) { -			case NETDEV_UP: -				bond->master_ip = ifa->ifa_local; -				return NOTIFY_OK; -			case NETDEV_DOWN: -				bond->master_ip = bond_glean_dev_ip(bond->dev); -				return NOTIFY_OK; -			default: -				return NOTIFY_DONE; -			} -		} - -		list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { -			if (!bond->vlgrp) -				continue; -			vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); -			if (vlan_dev == event_dev) { -				switch (event) { -				case NETDEV_UP: -					vlan->vlan_ip = ifa->ifa_local; -					return NOTIFY_OK; -				case NETDEV_DOWN: -					vlan->vlan_ip = -						bond_glean_dev_ip(vlan_dev); -					return NOTIFY_OK; -				default: -					return NOTIFY_DONE; -				} -			} -		} -	} -	return NOTIFY_DONE; -} -  static struct notifier_block bond_netdev_notifier = {  	.notifier_call = bond_netdev_event,  }; -static struct notifier_block bond_inetaddr_notifier = { -	.notifier_call = bond_inetaddr_event, -}; - -/*-------------------------- Packet type handling ---------------------------*/ - -/* register to receive lacpdus on a bond */ -static void bond_register_lacpdu(struct bonding *bond) -{ -	struct packet_type *pk_type = &(BOND_AD_INFO(bond).ad_pkt_type); - -	/* initialize packet type */ -	pk_type->type = PKT_TYPE_LACPDU; -	pk_type->dev = bond->dev; -	pk_type->func = bond_3ad_lacpdu_recv; - -	dev_add_pack(pk_type); -} - -/* unregister to receive lacpdus on a bond */ -static void bond_unregister_lacpdu(struct bonding *bond) -{ -	dev_remove_pack(&(BOND_AD_INFO(bond).ad_pkt_type)); -} +/*---------------------------- Hashing Policies -----------------------------*/ -void bond_register_arp(struct bonding *bond) +/* L2 hash helper */ +static inline u32 bond_eth_hash(struct sk_buff *skb)  { -	struct packet_type *pt = &bond->arp_mon_pt; +	struct ethhdr *data = (struct ethhdr *)skb->data; -	if (pt->type) -		return; +	if (skb_headlen(skb) >= offsetof(struct ethhdr, h_proto)) +		return data->h_dest[5] ^ data->h_source[5]; -	pt->type = htons(ETH_P_ARP); -	pt->dev = bond->dev; -	pt->func = bond_arp_rcv; -	dev_add_pack(pt); +	return 0;  } -void bond_unregister_arp(struct bonding *bond) +/* Extract the appropriate headers based on bond's xmit policy */ +static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, +			      struct flow_keys *fk)  { -	struct packet_type *pt = &bond->arp_mon_pt; +	const struct ipv6hdr *iph6; +	const struct iphdr *iph; +	int noff, proto = -1; -	dev_remove_pack(pt); -	pt->type = 0; -} - -/*---------------------------- Hashing Policies -----------------------------*/ - -/* - * Hash for the output device based upon layer 2 and layer 3 data. If - * the packet is not IP mimic bond_xmit_hash_policy_l2() - */ -static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count) -{ -	struct ethhdr *data = (struct ethhdr *)skb->data; -	struct iphdr *iph = ip_hdr(skb); +	if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23) +		return skb_flow_dissect(skb, fk); +	fk->ports = 0; +	noff = skb_network_offset(skb);  	if (skb->protocol == htons(ETH_P_IP)) { -		return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^ -			(data->h_dest[5] ^ data->h_source[5])) % count; +		if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph)))) +			return false; +		iph = ip_hdr(skb); +		fk->src = iph->saddr; +		fk->dst = iph->daddr; +		noff += iph->ihl << 2; +		if (!ip_is_fragment(iph)) +			proto = iph->protocol; +	} else if (skb->protocol == htons(ETH_P_IPV6)) { +		if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph6)))) +			return false; +		iph6 = ipv6_hdr(skb); +		fk->src = (__force __be32)ipv6_addr_hash(&iph6->saddr); +		fk->dst = (__force __be32)ipv6_addr_hash(&iph6->daddr); +		noff += sizeof(*iph6); +		proto = iph6->nexthdr; +	} else { +		return false;  	} +	if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0) +		fk->ports = skb_flow_get_ports(skb, noff, proto); -	return (data->h_dest[5] ^ data->h_source[5]) % count; +	return true;  } -/* - * Hash for the output device based upon layer 3 and layer 4 data. If - * the packet is a frag or not TCP or UDP, just use layer 3 data.  If it is - * altogether not IP, mimic bond_xmit_hash_policy_l2() +/** + * bond_xmit_hash - generate a hash value based on the xmit policy + * @bond: bonding device + * @skb: buffer to use for headers + * + * This function will extract the necessary headers from the skb buffer and use + * them to generate a hash based on the xmit_policy set in the bonding device   */ -static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count) +u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)  { -	struct ethhdr *data = (struct ethhdr *)skb->data; -	struct iphdr *iph = ip_hdr(skb); -	__be16 *layer4hdr = (__be16 *)((u32 *)iph + iph->ihl); -	int layer4_xor = 0; +	struct flow_keys flow; +	u32 hash; -	if (skb->protocol == htons(ETH_P_IP)) { -		if (!(iph->frag_off & htons(IP_MF|IP_OFFSET)) && -		    (iph->protocol == IPPROTO_TCP || -		     iph->protocol == IPPROTO_UDP)) { -			layer4_xor = ntohs((*layer4hdr ^ *(layer4hdr + 1))); -		} -		return (layer4_xor ^ -			((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count; +	if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 || +	    !bond_flow_dissect(bond, skb, &flow)) +		return bond_eth_hash(skb); -	} +	if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 || +	    bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) +		hash = bond_eth_hash(skb); +	else +		hash = (__force u32)flow.ports; +	hash ^= (__force u32)flow.dst ^ (__force u32)flow.src; +	hash ^= (hash >> 16); +	hash ^= (hash >> 8); -	return (data->h_dest[5] ^ data->h_source[5]) % count; +	return hash;  } -/* - * Hash for the output device based upon layer 2 data - */ -static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count) -{ -	struct ethhdr *data = (struct ethhdr *)skb->data; +/*-------------------------- Device entry points ----------------------------*/ -	return (data->h_dest[5] ^ data->h_source[5]) % count; +static void bond_work_init_all(struct bonding *bond) +{ +	INIT_DELAYED_WORK(&bond->mcast_work, +			  bond_resend_igmp_join_requests_delayed); +	INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor); +	INIT_DELAYED_WORK(&bond->mii_work, bond_mii_monitor); +	if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) +		INIT_DELAYED_WORK(&bond->arp_work, bond_activebackup_arp_mon); +	else +		INIT_DELAYED_WORK(&bond->arp_work, bond_loadbalance_arp_mon); +	INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);  } -/*-------------------------- Device entry points ----------------------------*/ +static void bond_work_cancel_all(struct bonding *bond) +{ +	cancel_delayed_work_sync(&bond->mii_work); +	cancel_delayed_work_sync(&bond->arp_work); +	cancel_delayed_work_sync(&bond->alb_work); +	cancel_delayed_work_sync(&bond->ad_work); +	cancel_delayed_work_sync(&bond->mcast_work); +}  static int bond_open(struct net_device *bond_dev)  {  	struct bonding *bond = netdev_priv(bond_dev); +	struct list_head *iter; +	struct slave *slave; -	bond->kill_timers = 0; +	/* reset slave->backup and slave->inactive */ +	read_lock(&bond->lock); +	if (bond_has_slaves(bond)) { +		read_lock(&bond->curr_slave_lock); +		bond_for_each_slave(bond, slave, iter) { +			if (bond_uses_primary(bond) +				&& (slave != bond->curr_active_slave)) { +				bond_set_slave_inactive_flags(slave, +							      BOND_SLAVE_NOTIFY_NOW); +			} else { +				bond_set_slave_active_flags(slave, +							    BOND_SLAVE_NOTIFY_NOW); +			} +		} +		read_unlock(&bond->curr_slave_lock); +	} +	read_unlock(&bond->lock); -	INIT_DELAYED_WORK(&bond->mcast_work, bond_resend_igmp_join_requests_delayed); +	bond_work_init_all(bond);  	if (bond_is_lb(bond)) {  		/* bond_alb_initialize must be called before the timer  		 * is started.  		 */ -		if (bond_alb_initialize(bond, (bond->params.mode == BOND_MODE_ALB))) { -			/* something went wrong - fail the open operation */ +		if (bond_alb_initialize(bond, (BOND_MODE(bond) == BOND_MODE_ALB)))  			return -ENOMEM; -		} - -		INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor); -		queue_delayed_work(bond->wq, &bond->alb_work, 0); +		if (bond->params.tlb_dynamic_lb) +			queue_delayed_work(bond->wq, &bond->alb_work, 0);  	} -	if (bond->params.miimon) {  /* link check interval, in milliseconds. */ -		INIT_DELAYED_WORK(&bond->mii_work, bond_mii_monitor); +	if (bond->params.miimon)  /* link check interval, in milliseconds. */  		queue_delayed_work(bond->wq, &bond->mii_work, 0); -	}  	if (bond->params.arp_interval) {  /* arp interval, in milliseconds. */ -		if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) -			INIT_DELAYED_WORK(&bond->arp_work, -					  bond_activebackup_arp_mon); -		else -			INIT_DELAYED_WORK(&bond->arp_work, -					  bond_loadbalance_arp_mon); -  		queue_delayed_work(bond->wq, &bond->arp_work, 0); -		if (bond->params.arp_validate) -			bond_register_arp(bond); +		bond->recv_probe = bond_arp_rcv;  	} -	if (bond->params.mode == BOND_MODE_8023AD) { -		INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler); +	if (BOND_MODE(bond) == BOND_MODE_8023AD) {  		queue_delayed_work(bond->wq, &bond->ad_work, 0);  		/* register to receive LACPDUs */ -		bond_register_lacpdu(bond); +		bond->recv_probe = bond_3ad_lacpdu_recv;  		bond_3ad_initiate_agg_selection(bond, 1);  	} @@ -3838,53 +3157,11 @@ static int bond_close(struct net_device *bond_dev)  {  	struct bonding *bond = netdev_priv(bond_dev); -	if (bond->params.mode == BOND_MODE_8023AD) { -		/* Unregister the receive of LACPDUs */ -		bond_unregister_lacpdu(bond); -	} - -	if (bond->params.arp_validate) -		bond_unregister_arp(bond); - -	write_lock_bh(&bond->lock); - -	bond->send_grat_arp = 0; -	bond->send_unsol_na = 0; - -	/* signal timers not to re-arm */ -	bond->kill_timers = 1; - -	write_unlock_bh(&bond->lock); - -	if (bond->params.miimon) {  /* link check interval, in milliseconds. */ -		cancel_delayed_work(&bond->mii_work); -	} - -	if (bond->params.arp_interval) {  /* arp interval, in milliseconds. */ -		cancel_delayed_work(&bond->arp_work); -	} - -	switch (bond->params.mode) { -	case BOND_MODE_8023AD: -		cancel_delayed_work(&bond->ad_work); -		break; -	case BOND_MODE_TLB: -	case BOND_MODE_ALB: -		cancel_delayed_work(&bond->alb_work); -		break; -	default: -		break; -	} - -	if (delayed_work_pending(&bond->mcast_work)) -		cancel_delayed_work(&bond->mcast_work); - -	if (bond_is_lb(bond)) { -		/* Must be called only after all -		 * slaves have been released -		 */ +	bond_work_cancel_all(bond); +	bond->send_peer_notif = 0; +	if (bond_is_lb(bond))  		bond_alb_deinitialize(bond); -	} +	bond->recv_probe = NULL;  	return 0;  } @@ -3894,14 +3171,13 @@ static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,  {  	struct bonding *bond = netdev_priv(bond_dev);  	struct rtnl_link_stats64 temp; +	struct list_head *iter;  	struct slave *slave; -	int i;  	memset(stats, 0, sizeof(*stats));  	read_lock_bh(&bond->lock); - -	bond_for_each_slave(bond, slave, i) { +	bond_for_each_slave(bond, slave, iter) {  		const struct rtnl_link_stats64 *sstats =  			dev_get_stats(slave->dev, &temp); @@ -3931,7 +3207,6 @@ static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,  		stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors;  		stats->tx_window_errors += sstats->tx_window_errors;  	} -  	read_unlock_bh(&bond->lock);  	return stats; @@ -3939,12 +3214,15 @@ static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,  static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd)  { +	struct bonding *bond = netdev_priv(bond_dev);  	struct net_device *slave_dev = NULL;  	struct ifbond k_binfo;  	struct ifbond __user *u_binfo = NULL;  	struct ifslave k_sinfo;  	struct ifslave __user *u_sinfo = NULL;  	struct mii_ioctl_data *mii = NULL; +	struct bond_opt_value newval; +	struct net *net;  	int res = 0;  	pr_debug("bond_ioctl: master=%s, cmd=%d\n", bond_dev->name, cmd); @@ -3968,7 +3246,6 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd  		if (mii->reg_num == 1) { -			struct bonding *bond = netdev_priv(bond_dev);  			mii->val_out = 0;  			read_lock(&bond->lock);  			read_lock(&bond->curr_slave_lock); @@ -4011,130 +3288,132 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd  		break;  	} -	if (!capable(CAP_NET_ADMIN)) +	net = dev_net(bond_dev); + +	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))  		return -EPERM; -	slave_dev = dev_get_by_name(dev_net(bond_dev), ifr->ifr_slave); +	slave_dev = __dev_get_by_name(net, ifr->ifr_slave);  	pr_debug("slave_dev=%p:\n", slave_dev);  	if (!slave_dev) -		res = -ENODEV; -	else { -		pr_debug("slave_dev->name=%s:\n", slave_dev->name); -		switch (cmd) { -		case BOND_ENSLAVE_OLD: -		case SIOCBONDENSLAVE: -			res = bond_enslave(bond_dev, slave_dev); -			break; -		case BOND_RELEASE_OLD: -		case SIOCBONDRELEASE: -			res = bond_release(bond_dev, slave_dev); -			break; -		case BOND_SETHWADDR_OLD: -		case SIOCBONDSETHWADDR: -			res = bond_sethwaddr(bond_dev, slave_dev); -			break; -		case BOND_CHANGE_ACTIVE_OLD: -		case SIOCBONDCHANGEACTIVE: -			res = bond_ioctl_change_active(bond_dev, slave_dev); -			break; -		default: -			res = -EOPNOTSUPP; -		} +		return -ENODEV; -		dev_put(slave_dev); +	pr_debug("slave_dev->name=%s:\n", slave_dev->name); +	switch (cmd) { +	case BOND_ENSLAVE_OLD: +	case SIOCBONDENSLAVE: +		res = bond_enslave(bond_dev, slave_dev); +		break; +	case BOND_RELEASE_OLD: +	case SIOCBONDRELEASE: +		res = bond_release(bond_dev, slave_dev); +		break; +	case BOND_SETHWADDR_OLD: +	case SIOCBONDSETHWADDR: +		bond_set_dev_addr(bond_dev, slave_dev); +		res = 0; +		break; +	case BOND_CHANGE_ACTIVE_OLD: +	case SIOCBONDCHANGEACTIVE: +		bond_opt_initstr(&newval, slave_dev->name); +		res = __bond_opt_set(bond, BOND_OPT_ACTIVE_SLAVE, &newval); +		break; +	default: +		res = -EOPNOTSUPP;  	}  	return res;  } -static bool bond_addr_in_mc_list(unsigned char *addr, -				 struct netdev_hw_addr_list *list, -				 int addrlen) +static void bond_change_rx_flags(struct net_device *bond_dev, int change)  { -	struct netdev_hw_addr *ha; +	struct bonding *bond = netdev_priv(bond_dev); -	netdev_hw_addr_list_for_each(ha, list) -		if (!memcmp(ha->addr, addr, addrlen)) -			return true; +	if (change & IFF_PROMISC) +		bond_set_promiscuity(bond, +				     bond_dev->flags & IFF_PROMISC ? 1 : -1); -	return false; +	if (change & IFF_ALLMULTI) +		bond_set_allmulti(bond, +				  bond_dev->flags & IFF_ALLMULTI ? 1 : -1);  } -static void bond_set_multicast_list(struct net_device *bond_dev) +static void bond_set_rx_mode(struct net_device *bond_dev)  {  	struct bonding *bond = netdev_priv(bond_dev); -	struct netdev_hw_addr *ha; -	bool found; - -	/* -	 * Do promisc before checking multicast_mode -	 */ -	if ((bond_dev->flags & IFF_PROMISC) && !(bond->flags & IFF_PROMISC)) -		/* -		 * FIXME: Need to handle the error when one of the multi-slaves -		 * encounters error. -		 */ -		bond_set_promiscuity(bond, 1); - - -	if (!(bond_dev->flags & IFF_PROMISC) && (bond->flags & IFF_PROMISC)) -		bond_set_promiscuity(bond, -1); - - -	/* set allmulti flag to slaves */ -	if ((bond_dev->flags & IFF_ALLMULTI) && !(bond->flags & IFF_ALLMULTI)) -		/* -		 * FIXME: Need to handle the error when one of the multi-slaves -		 * encounters error. -		 */ -		bond_set_allmulti(bond, 1); - +	struct list_head *iter; +	struct slave *slave; -	if (!(bond_dev->flags & IFF_ALLMULTI) && (bond->flags & IFF_ALLMULTI)) -		bond_set_allmulti(bond, -1); +	rcu_read_lock(); +	if (bond_uses_primary(bond)) { +		slave = rcu_dereference(bond->curr_active_slave); +		if (slave) { +			dev_uc_sync(slave->dev, bond_dev); +			dev_mc_sync(slave->dev, bond_dev); +		} +	} else { +		bond_for_each_slave_rcu(bond, slave, iter) { +			dev_uc_sync_multiple(slave->dev, bond_dev); +			dev_mc_sync_multiple(slave->dev, bond_dev); +		} +	} +	rcu_read_unlock(); +} -	read_lock(&bond->lock); +static int bond_neigh_init(struct neighbour *n) +{ +	struct bonding *bond = netdev_priv(n->dev); +	const struct net_device_ops *slave_ops; +	struct neigh_parms parms; +	struct slave *slave; +	int ret; -	bond->flags = bond_dev->flags; +	slave = bond_first_slave(bond); +	if (!slave) +		return 0; +	slave_ops = slave->dev->netdev_ops; +	if (!slave_ops->ndo_neigh_setup) +		return 0; -	/* looking for addresses to add to slaves' mc list */ -	netdev_for_each_mc_addr(ha, bond_dev) { -		found = bond_addr_in_mc_list(ha->addr, &bond->mc_list, -					     bond_dev->addr_len); -		if (!found) -			bond_mc_add(bond, ha->addr); -	} +	parms.neigh_setup = NULL; +	parms.neigh_cleanup = NULL; +	ret = slave_ops->ndo_neigh_setup(slave->dev, &parms); +	if (ret) +		return ret; -	/* looking for addresses to delete from slaves' list */ -	netdev_hw_addr_list_for_each(ha, &bond->mc_list) { -		found = bond_addr_in_mc_list(ha->addr, &bond_dev->mc, -					     bond_dev->addr_len); -		if (!found) -			bond_mc_del(bond, ha->addr); -	} +	/* +	 * Assign slave's neigh_cleanup to neighbour in case cleanup is called +	 * after the last slave has been detached.  Assumes that all slaves +	 * utilize the same neigh_cleanup (true at this writing as only user +	 * is ipoib). +	 */ +	n->parms->neigh_cleanup = parms.neigh_cleanup; -	/* save master's multicast list */ -	__hw_addr_flush(&bond->mc_list); -	__hw_addr_add_multiple(&bond->mc_list, &bond_dev->mc, -			       bond_dev->addr_len, NETDEV_HW_ADDR_T_MULTICAST); +	if (!parms.neigh_setup) +		return 0; -	read_unlock(&bond->lock); +	return parms.neigh_setup(n);  } -static int bond_neigh_setup(struct net_device *dev, struct neigh_parms *parms) +/* + * The bonding ndo_neigh_setup is called at init time beofre any + * slave exists. So we must declare proxy setup function which will + * be used at run time to resolve the actual slave neigh param setup. + * + * It's also called by master devices (such as vlans) to setup their + * underlying devices. In that case - do nothing, we're already set up from + * our init. + */ +static int bond_neigh_setup(struct net_device *dev, +			    struct neigh_parms *parms)  { -	struct bonding *bond = netdev_priv(dev); -	struct slave *slave = bond->first_slave; +	/* modify only our neigh_parms */ +	if (parms->dev == dev) +		parms->neigh_setup = bond_neigh_init; -	if (slave) { -		const struct net_device_ops *slave_ops -			= slave->dev->netdev_ops; -		if (slave_ops->ndo_neigh_setup) -			return slave_ops->ndo_neigh_setup(slave->dev, parms); -	}  	return 0;  } @@ -4144,12 +3423,12 @@ static int bond_neigh_setup(struct net_device *dev, struct neigh_parms *parms)  static int bond_change_mtu(struct net_device *bond_dev, int new_mtu)  {  	struct bonding *bond = netdev_priv(bond_dev); -	struct slave *slave, *stop_at; +	struct slave *slave, *rollback_slave; +	struct list_head *iter;  	int res = 0; -	int i; -	pr_debug("bond=%p, name=%s, new_mtu=%d\n", bond, -		 (bond_dev ? bond_dev->name : "None"), new_mtu); +	pr_debug("bond=%p, name=%s, new_mtu=%d\n", +		 bond, bond_dev ? bond_dev->name : "None", new_mtu);  	/* Can't hold bond->lock with bh disabled here since  	 * some base drivers panic. On the other hand we can't @@ -4166,11 +3445,9 @@ static int bond_change_mtu(struct net_device *bond_dev, int new_mtu)  	 * call to the base driver.  	 */ -	bond_for_each_slave(bond, slave, i) { -		pr_debug("s %p s->p %p c_m %p\n", -			 slave, -			 slave->prev, -			 slave->dev->netdev_ops->ndo_change_mtu); +	bond_for_each_slave(bond, slave, iter) { +		pr_debug("s %p c_m %p\n", +			 slave, slave->dev->netdev_ops->ndo_change_mtu);  		res = dev_set_mtu(slave->dev, new_mtu); @@ -4194,14 +3471,16 @@ static int bond_change_mtu(struct net_device *bond_dev, int new_mtu)  unwind:  	/* unwind from head to the slave that failed */ -	stop_at = slave; -	bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) { +	bond_for_each_slave(bond, rollback_slave, iter) {  		int tmp_res; -		tmp_res = dev_set_mtu(slave->dev, bond_dev->mtu); +		if (rollback_slave == slave) +			break; + +		tmp_res = dev_set_mtu(rollback_slave->dev, bond_dev->mtu);  		if (tmp_res) {  			pr_debug("unwind err %d dev %s\n", -				 tmp_res, slave->dev->name); +				 tmp_res, rollback_slave->dev->name);  		}  	} @@ -4218,23 +3497,23 @@ unwind:  static int bond_set_mac_address(struct net_device *bond_dev, void *addr)  {  	struct bonding *bond = netdev_priv(bond_dev); +	struct slave *slave, *rollback_slave;  	struct sockaddr *sa = addr, tmp_sa; -	struct slave *slave, *stop_at; +	struct list_head *iter;  	int res = 0; -	int i; -	if (bond->params.mode == BOND_MODE_ALB) +	if (BOND_MODE(bond) == BOND_MODE_ALB)  		return bond_alb_set_mac_address(bond_dev, addr);  	pr_debug("bond=%p, name=%s\n",  		 bond, bond_dev ? bond_dev->name : "None"); -	/* -	 * If fail_over_mac is set to active, do nothing and return -	 * success.  Returning an error causes ifenslave to fail. +	/* If fail_over_mac is enabled, do nothing and return success. +	 * Returning an error causes ifenslave to fail.  	 */ -	if (bond->params.fail_over_mac == BOND_FOM_ACTIVE) +	if (bond->params.fail_over_mac && +	    BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP)  		return 0;  	if (!is_valid_ether_addr(sa->sa_data)) @@ -4255,16 +3534,8 @@ static int bond_set_mac_address(struct net_device *bond_dev, void *addr)  	 * call to the base driver.  	 */ -	bond_for_each_slave(bond, slave, i) { -		const struct net_device_ops *slave_ops = slave->dev->netdev_ops; +	bond_for_each_slave(bond, slave, iter) {  		pr_debug("slave %p %s\n", slave, slave->dev->name); - -		if (slave_ops->ndo_set_mac_address == NULL) { -			res = -EOPNOTSUPP; -			pr_debug("EOPNOTSUPP %s\n", slave->dev->name); -			goto unwind; -		} -  		res = dev_set_mac_address(slave->dev, addr);  		if (res) {  			/* TODO: consider downing the slave @@ -4287,82 +3558,122 @@ unwind:  	tmp_sa.sa_family = bond_dev->type;  	/* unwind from head to the slave that failed */ -	stop_at = slave; -	bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) { +	bond_for_each_slave(bond, rollback_slave, iter) {  		int tmp_res; -		tmp_res = dev_set_mac_address(slave->dev, &tmp_sa); +		if (rollback_slave == slave) +			break; + +		tmp_res = dev_set_mac_address(rollback_slave->dev, &tmp_sa);  		if (tmp_res) {  			pr_debug("unwind err %d dev %s\n", -				 tmp_res, slave->dev->name); +				 tmp_res, rollback_slave->dev->name);  		}  	}  	return res;  } +/** + * bond_xmit_slave_id - transmit skb through slave with slave_id + * @bond: bonding device that is transmitting + * @skb: buffer to transmit + * @slave_id: slave id up to slave_cnt-1 through which to transmit + * + * This function tries to transmit through slave with slave_id but in case + * it fails, it tries to find the first available slave for transmission. + * The skb is consumed in all cases, thus the function is void. + */ +static void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id) +{ +	struct list_head *iter; +	struct slave *slave; +	int i = slave_id; + +	/* Here we start from the slave with slave_id */ +	bond_for_each_slave_rcu(bond, slave, iter) { +		if (--i < 0) { +			if (bond_slave_can_tx(slave)) { +				bond_dev_queue_xmit(bond, skb, slave->dev); +				return; +			} +		} +	} + +	/* Here we start from the first slave up to slave_id */ +	i = slave_id; +	bond_for_each_slave_rcu(bond, slave, iter) { +		if (--i < 0) +			break; +		if (bond_slave_can_tx(slave)) { +			bond_dev_queue_xmit(bond, skb, slave->dev); +			return; +		} +	} +	/* no slave that can tx has been found */ +	dev_kfree_skb_any(skb); +} + +/** + * bond_rr_gen_slave_id - generate slave id based on packets_per_slave + * @bond: bonding device to use + * + * Based on the value of the bonding device's packets_per_slave parameter + * this function generates a slave id, which is usually used as the next + * slave to transmit through. + */ +static u32 bond_rr_gen_slave_id(struct bonding *bond) +{ +	u32 slave_id; +	struct reciprocal_value reciprocal_packets_per_slave; +	int packets_per_slave = bond->params.packets_per_slave; + +	switch (packets_per_slave) { +	case 0: +		slave_id = prandom_u32(); +		break; +	case 1: +		slave_id = bond->rr_tx_counter; +		break; +	default: +		reciprocal_packets_per_slave = +			bond->params.reciprocal_packets_per_slave; +		slave_id = reciprocal_divide(bond->rr_tx_counter, +					     reciprocal_packets_per_slave); +		break; +	} +	bond->rr_tx_counter++; + +	return slave_id; +} +  static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev)  {  	struct bonding *bond = netdev_priv(bond_dev); -	struct slave *slave, *start_at; -	int i, slave_no, res = 1;  	struct iphdr *iph = ip_hdr(skb); +	struct slave *slave; +	u32 slave_id; -	read_lock(&bond->lock); - -	if (!BOND_IS_OK(bond)) -		goto out; -	/* -	 * Start with the curr_active_slave that joined the bond as the +	/* Start with the curr_active_slave that joined the bond as the  	 * default for sending IGMP traffic.  For failover purposes one  	 * needs to maintain some consistency for the interface that will  	 * send the join/membership reports.  The curr_active_slave found  	 * will send all of this type of traffic.  	 */ -	if ((iph->protocol == IPPROTO_IGMP) && -	    (skb->protocol == htons(ETH_P_IP))) { - -		read_lock(&bond->curr_slave_lock); -		slave = bond->curr_active_slave; -		read_unlock(&bond->curr_slave_lock); - -		if (!slave) -			goto out; +	if (iph->protocol == IPPROTO_IGMP && skb->protocol == htons(ETH_P_IP)) { +		slave = rcu_dereference(bond->curr_active_slave); +		if (slave && bond_slave_can_tx(slave)) +			bond_dev_queue_xmit(bond, skb, slave->dev); +		else +			bond_xmit_slave_id(bond, skb, 0);  	} else { -		/* -		 * Concurrent TX may collide on rr_tx_counter; we accept -		 * that as being rare enough not to justify using an -		 * atomic op here. -		 */ -		slave_no = bond->rr_tx_counter++ % bond->slave_cnt; - -		bond_for_each_slave(bond, slave, i) { -			slave_no--; -			if (slave_no < 0) -				break; -		} -	} - -	start_at = slave; -	bond_for_each_slave_from(bond, slave, i, start_at) { -		if (IS_UP(slave->dev) && -		    (slave->link == BOND_LINK_UP) && -		    (slave->state == BOND_STATE_ACTIVE)) { -			res = bond_dev_queue_xmit(bond, skb, slave->dev); -			break; -		} +		slave_id = bond_rr_gen_slave_id(bond); +		bond_xmit_slave_id(bond, skb, slave_id % bond->slave_cnt);  	} -out: -	if (res) { -		/* no suitable interface, frame not sent */ -		dev_kfree_skb(skb); -	} -	read_unlock(&bond->lock);  	return NETDEV_TX_OK;  } -  /*   * in active-backup mode, we know that bond->curr_active_slave is always valid if   * the bond has a usable interface. @@ -4370,213 +3681,123 @@ out:  static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_dev)  {  	struct bonding *bond = netdev_priv(bond_dev); -	int res = 1; - -	read_lock(&bond->lock); -	read_lock(&bond->curr_slave_lock); - -	if (!BOND_IS_OK(bond)) -		goto out; - -	if (!bond->curr_active_slave) -		goto out; - -	res = bond_dev_queue_xmit(bond, skb, bond->curr_active_slave->dev); +	struct slave *slave; -out: -	if (res) -		/* no suitable interface, frame not sent */ -		dev_kfree_skb(skb); +	slave = rcu_dereference(bond->curr_active_slave); +	if (slave) +		bond_dev_queue_xmit(bond, skb, slave->dev); +	else +		dev_kfree_skb_any(skb); -	read_unlock(&bond->curr_slave_lock); -	read_unlock(&bond->lock);  	return NETDEV_TX_OK;  } -/* - * In bond_xmit_xor() , we determine the output device by using a pre- +/* In bond_xmit_xor() , we determine the output device by using a pre-   * determined xmit_hash_policy(), If the selected device is not enabled,   * find the next active slave.   */  static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev)  {  	struct bonding *bond = netdev_priv(bond_dev); -	struct slave *slave, *start_at; -	int slave_no; -	int i; -	int res = 1; - -	read_lock(&bond->lock); - -	if (!BOND_IS_OK(bond)) -		goto out; - -	slave_no = bond->xmit_hash_policy(skb, bond->slave_cnt); - -	bond_for_each_slave(bond, slave, i) { -		slave_no--; -		if (slave_no < 0) -			break; -	} - -	start_at = slave; -	bond_for_each_slave_from(bond, slave, i, start_at) { -		if (IS_UP(slave->dev) && -		    (slave->link == BOND_LINK_UP) && -		    (slave->state == BOND_STATE_ACTIVE)) { -			res = bond_dev_queue_xmit(bond, skb, slave->dev); -			break; -		} -	} +	bond_xmit_slave_id(bond, skb, bond_xmit_hash(bond, skb) % bond->slave_cnt); -out: -	if (res) { -		/* no suitable interface, frame not sent */ -		dev_kfree_skb(skb); -	} -	read_unlock(&bond->lock);  	return NETDEV_TX_OK;  } -/* - * in broadcast mode, we send everything to all usable interfaces. - */ +/* in broadcast mode, we send everything to all usable interfaces. */  static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev)  {  	struct bonding *bond = netdev_priv(bond_dev); -	struct slave *slave, *start_at; -	struct net_device *tx_dev = NULL; -	int i; -	int res = 1; - -	read_lock(&bond->lock); - -	if (!BOND_IS_OK(bond)) -		goto out; - -	read_lock(&bond->curr_slave_lock); -	start_at = bond->curr_active_slave; -	read_unlock(&bond->curr_slave_lock); - -	if (!start_at) -		goto out; +	struct slave *slave = NULL; +	struct list_head *iter; -	bond_for_each_slave_from(bond, slave, i, start_at) { -		if (IS_UP(slave->dev) && -		    (slave->link == BOND_LINK_UP) && -		    (slave->state == BOND_STATE_ACTIVE)) { -			if (tx_dev) { -				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); -				if (!skb2) { -					pr_err("%s: Error: bond_xmit_broadcast(): skb_clone() failed\n", -					       bond_dev->name); -					continue; -				} +	bond_for_each_slave_rcu(bond, slave, iter) { +		if (bond_is_last_slave(bond, slave)) +			break; +		if (bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) { +			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); -				res = bond_dev_queue_xmit(bond, skb2, tx_dev); -				if (res) { -					dev_kfree_skb(skb2); -					continue; -				} +			if (!skb2) { +				net_err_ratelimited("%s: Error: %s: skb_clone() failed\n", +						    bond_dev->name, __func__); +				continue;  			} -			tx_dev = slave->dev; +			/* bond_dev_queue_xmit always returns 0 */ +			bond_dev_queue_xmit(bond, skb2, slave->dev);  		}  	} +	if (slave && bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) +		bond_dev_queue_xmit(bond, skb, slave->dev); +	else +		dev_kfree_skb_any(skb); -	if (tx_dev) -		res = bond_dev_queue_xmit(bond, skb, tx_dev); - -out: -	if (res) -		/* no suitable interface, frame not sent */ -		dev_kfree_skb(skb); - -	/* frame sent to all suitable interfaces */ -	read_unlock(&bond->lock);  	return NETDEV_TX_OK;  }  /*------------------------- Device initialization ---------------------------*/ -static void bond_set_xmit_hash_policy(struct bonding *bond) -{ -	switch (bond->params.xmit_policy) { -	case BOND_XMIT_POLICY_LAYER23: -		bond->xmit_hash_policy = bond_xmit_hash_policy_l23; -		break; -	case BOND_XMIT_POLICY_LAYER34: -		bond->xmit_hash_policy = bond_xmit_hash_policy_l34; -		break; -	case BOND_XMIT_POLICY_LAYER2: -	default: -		bond->xmit_hash_policy = bond_xmit_hash_policy_l2; -		break; -	} -} -  /*   * Lookup the slave that corresponds to a qid   */  static inline int bond_slave_override(struct bonding *bond,  				      struct sk_buff *skb)  { -	int i, res = 1;  	struct slave *slave = NULL; -	struct slave *check_slave; +	struct list_head *iter; -	read_lock(&bond->lock); - -	if (!BOND_IS_OK(bond) || !skb->queue_mapping) -		goto out; +	if (!skb->queue_mapping) +		return 1;  	/* Find out if any slaves have the same mapping as this skb. */ -	bond_for_each_slave(bond, check_slave, i) { -		if (check_slave->queue_id == skb->queue_mapping) { -			slave = check_slave; +	bond_for_each_slave_rcu(bond, slave, iter) { +		if (slave->queue_id == skb->queue_mapping) { +			if (bond_slave_can_tx(slave)) { +				bond_dev_queue_xmit(bond, skb, slave->dev); +				return 0; +			} +			/* If the slave isn't UP, use default transmit policy. */  			break;  		}  	} -	/* If the slave isn't UP, use default transmit policy. */ -	if (slave && slave->queue_id && IS_UP(slave->dev) && -	    (slave->link == BOND_LINK_UP)) { -		res = bond_dev_queue_xmit(bond, skb, slave->dev); -	} - -out: -	read_unlock(&bond->lock); -	return res; +	return 1;  } -static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb) + +static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb, +			     void *accel_priv, select_queue_fallback_t fallback)  {  	/*  	 * This helper function exists to help dev_pick_tx get the correct -	 * destination queue.  Using a helper function skips the a call to +	 * destination queue.  Using a helper function skips a call to  	 * skb_tx_hash and will put the skbs in the queue we expect on their  	 * way down to the bonding driver.  	 */ -	return skb->queue_mapping; -} - -static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev) -{ -	struct bonding *bond = netdev_priv(dev); +	u16 txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0;  	/* -	 * If we risk deadlock from transmitting this in the -	 * netpoll path, tell netpoll to queue the frame for later tx +	 * Save the original txq to restore before passing to the driver  	 */ -	if (is_netpoll_tx_blocked(dev)) -		return NETDEV_TX_BUSY; +	qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping; -	if (TX_QUEUE_OVERRIDE(bond->params.mode)) { -		if (!bond_slave_override(bond, skb)) -			return NETDEV_TX_OK; +	if (unlikely(txq >= dev->real_num_tx_queues)) { +		do { +			txq -= dev->real_num_tx_queues; +		} while (txq >= dev->real_num_tx_queues);  	} +	return txq; +} -	switch (bond->params.mode) { +static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ +	struct bonding *bond = netdev_priv(dev); + +	if (bond_should_override_tx_queue(bond) && +	    !bond_slave_override(bond, skb)) +		return NETDEV_TX_OK; + +	switch (BOND_MODE(bond)) {  	case BOND_MODE_ROUNDROBIN:  		return bond_xmit_roundrobin(skb, dev);  	case BOND_MODE_ACTIVEBACKUP: @@ -4588,69 +3809,86 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)  	case BOND_MODE_8023AD:  		return bond_3ad_xmit_xor(skb, dev);  	case BOND_MODE_ALB: -	case BOND_MODE_TLB:  		return bond_alb_xmit(skb, dev); +	case BOND_MODE_TLB: +		return bond_tlb_xmit(skb, dev);  	default:  		/* Should never happen, mode already checked */  		pr_err("%s: Error: Unknown bonding mode %d\n", -		       dev->name, bond->params.mode); +		       dev->name, BOND_MODE(bond));  		WARN_ON_ONCE(1); -		dev_kfree_skb(skb); +		dev_kfree_skb_any(skb);  		return NETDEV_TX_OK;  	}  } +static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ +	struct bonding *bond = netdev_priv(dev); +	netdev_tx_t ret = NETDEV_TX_OK; -/* - * set bond mode specific net device operations - */ -void bond_set_mode_ops(struct bonding *bond, int mode) +	/* +	 * If we risk deadlock from transmitting this in the +	 * netpoll path, tell netpoll to queue the frame for later tx +	 */ +	if (unlikely(is_netpoll_tx_blocked(dev))) +		return NETDEV_TX_BUSY; + +	rcu_read_lock(); +	if (bond_has_slaves(bond)) +		ret = __bond_start_xmit(skb, dev); +	else +		dev_kfree_skb_any(skb); +	rcu_read_unlock(); + +	return ret; +} + +static int bond_ethtool_get_settings(struct net_device *bond_dev, +				     struct ethtool_cmd *ecmd)  { -	struct net_device *bond_dev = bond->dev; +	struct bonding *bond = netdev_priv(bond_dev); +	unsigned long speed = 0; +	struct list_head *iter; +	struct slave *slave; -	switch (mode) { -	case BOND_MODE_ROUNDROBIN: -		break; -	case BOND_MODE_ACTIVEBACKUP: -		break; -	case BOND_MODE_XOR: -		bond_set_xmit_hash_policy(bond); -		break; -	case BOND_MODE_BROADCAST: -		break; -	case BOND_MODE_8023AD: -		bond_set_master_3ad_flags(bond); -		bond_set_xmit_hash_policy(bond); -		break; -	case BOND_MODE_ALB: -		bond_set_master_alb_flags(bond); -		/* FALLTHRU */ -	case BOND_MODE_TLB: -		break; -	default: -		/* Should never happen, mode already checked */ -		pr_err("%s: Error: Unknown bonding mode %d\n", -		       bond_dev->name, mode); -		break; +	ecmd->duplex = DUPLEX_UNKNOWN; +	ecmd->port = PORT_OTHER; + +	/* Since bond_slave_can_tx returns false for all inactive or down slaves, we +	 * do not need to check mode.  Though link speed might not represent +	 * the true receive or transmit bandwidth (not all modes are symmetric) +	 * this is an accurate maximum. +	 */ +	read_lock(&bond->lock); +	bond_for_each_slave(bond, slave, iter) { +		if (bond_slave_can_tx(slave)) { +			if (slave->speed != SPEED_UNKNOWN) +				speed += slave->speed; +			if (ecmd->duplex == DUPLEX_UNKNOWN && +			    slave->duplex != DUPLEX_UNKNOWN) +				ecmd->duplex = slave->duplex; +		}  	} +	ethtool_cmd_speed_set(ecmd, speed ? : SPEED_UNKNOWN); +	read_unlock(&bond->lock); + +	return 0;  }  static void bond_ethtool_get_drvinfo(struct net_device *bond_dev, -				    struct ethtool_drvinfo *drvinfo) +				     struct ethtool_drvinfo *drvinfo)  { -	strncpy(drvinfo->driver, DRV_NAME, 32); -	strncpy(drvinfo->version, DRV_VERSION, 32); -	snprintf(drvinfo->fw_version, 32, "%d", BOND_ABI_VERSION); +	strlcpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver)); +	strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version)); +	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%d", +		 BOND_ABI_VERSION);  }  static const struct ethtool_ops bond_ethtool_ops = {  	.get_drvinfo		= bond_ethtool_get_drvinfo, +	.get_settings		= bond_ethtool_get_settings,  	.get_link		= ethtool_op_get_link, -	.get_tx_csum		= ethtool_op_get_tx_csum, -	.get_sg			= ethtool_op_get_sg, -	.get_tso		= ethtool_op_get_tso, -	.get_ufo		= ethtool_op_get_ufo, -	.get_flags		= ethtool_op_get_flags,  };  static const struct net_device_ops bond_netdev_ops = { @@ -4662,17 +3900,25 @@ static const struct net_device_ops bond_netdev_ops = {  	.ndo_select_queue	= bond_select_queue,  	.ndo_get_stats64	= bond_get_stats,  	.ndo_do_ioctl		= bond_do_ioctl, -	.ndo_set_multicast_list	= bond_set_multicast_list, +	.ndo_change_rx_flags	= bond_change_rx_flags, +	.ndo_set_rx_mode	= bond_set_rx_mode,  	.ndo_change_mtu		= bond_change_mtu, -	.ndo_set_mac_address 	= bond_set_mac_address, +	.ndo_set_mac_address	= bond_set_mac_address,  	.ndo_neigh_setup	= bond_neigh_setup, -	.ndo_vlan_rx_register	= bond_vlan_rx_register, -	.ndo_vlan_rx_add_vid 	= bond_vlan_rx_add_vid, +	.ndo_vlan_rx_add_vid	= bond_vlan_rx_add_vid,  	.ndo_vlan_rx_kill_vid	= bond_vlan_rx_kill_vid,  #ifdef CONFIG_NET_POLL_CONTROLLER +	.ndo_netpoll_setup	= bond_netpoll_setup,  	.ndo_netpoll_cleanup	= bond_netpoll_cleanup,  	.ndo_poll_controller	= bond_poll_controller,  #endif +	.ndo_add_slave		= bond_enslave, +	.ndo_del_slave		= bond_release, +	.ndo_fix_features	= bond_fix_features, +}; + +static const struct device_type bond_type = { +	.name = "bond",  };  static void bond_destructor(struct net_device *bond_dev) @@ -4683,36 +3929,32 @@ static void bond_destructor(struct net_device *bond_dev)  	free_netdev(bond_dev);  } -static void bond_setup(struct net_device *bond_dev) +void bond_setup(struct net_device *bond_dev)  {  	struct bonding *bond = netdev_priv(bond_dev);  	/* initialize rwlocks */  	rwlock_init(&bond->lock);  	rwlock_init(&bond->curr_slave_lock); -  	bond->params = bonding_defaults;  	/* Initialize pointers */  	bond->dev = bond_dev; -	INIT_LIST_HEAD(&bond->vlan_list);  	/* Initialize the device entry points */  	ether_setup(bond_dev);  	bond_dev->netdev_ops = &bond_netdev_ops;  	bond_dev->ethtool_ops = &bond_ethtool_ops; -	bond_set_mode_ops(bond, bond->params.mode);  	bond_dev->destructor = bond_destructor; +	SET_NETDEV_DEVTYPE(bond_dev, &bond_type); +  	/* Initialize the device options */  	bond_dev->tx_queue_len = 0;  	bond_dev->flags |= IFF_MASTER|IFF_MULTICAST; -	bond_dev->priv_flags |= IFF_BONDING; -	bond_dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; - -	if (bond->params.arp_interval) -		bond_dev->priv_flags |= IFF_MASTER_ARPMON; +	bond_dev->priv_flags |= IFF_BONDING | IFF_UNICAST_FLT; +	bond_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);  	/* At first, we block adding VLANs. That's the only way to  	 * prevent problems that occur when adding VLANs over an @@ -4731,38 +3973,18 @@ static void bond_setup(struct net_device *bond_dev)  	 * when there are slaves that are not hw accel  	 * capable  	 */ -	bond_dev->features |= (NETIF_F_HW_VLAN_TX | -			       NETIF_F_HW_VLAN_RX | -			       NETIF_F_HW_VLAN_FILTER); - -	/* By default, we enable GRO on bonding devices. -	 * Actual support requires lowlevel drivers are GRO ready. -	 */ -	bond_dev->features |= NETIF_F_GRO; -} - -static void bond_work_cancel_all(struct bonding *bond) -{ -	write_lock_bh(&bond->lock); -	bond->kill_timers = 1; -	write_unlock_bh(&bond->lock); -	if (bond->params.miimon && delayed_work_pending(&bond->mii_work)) -		cancel_delayed_work(&bond->mii_work); +	/* Don't allow bond devices to change network namespaces. */ +	bond_dev->features |= NETIF_F_NETNS_LOCAL; -	if (bond->params.arp_interval && delayed_work_pending(&bond->arp_work)) -		cancel_delayed_work(&bond->arp_work); +	bond_dev->hw_features = BOND_VLAN_FEATURES | +				NETIF_F_HW_VLAN_CTAG_TX | +				NETIF_F_HW_VLAN_CTAG_RX | +				NETIF_F_HW_VLAN_CTAG_FILTER; -	if (bond->params.mode == BOND_MODE_ALB && -	    delayed_work_pending(&bond->alb_work)) -		cancel_delayed_work(&bond->alb_work); - -	if (bond->params.mode == BOND_MODE_8023AD && -	    delayed_work_pending(&bond->ad_work)) -		cancel_delayed_work(&bond->ad_work); - -	if (delayed_work_pending(&bond->mcast_work)) -		cancel_delayed_work(&bond->mcast_work); +	bond_dev->hw_features &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_HW_CSUM); +	bond_dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; +	bond_dev->features |= bond_dev->hw_features;  }  /* @@ -4772,92 +3994,59 @@ static void bond_work_cancel_all(struct bonding *bond)  static void bond_uninit(struct net_device *bond_dev)  {  	struct bonding *bond = netdev_priv(bond_dev); -	struct vlan_entry *vlan, *tmp; +	struct list_head *iter; +	struct slave *slave;  	bond_netpoll_cleanup(bond_dev);  	/* Release the bonded slaves */ -	bond_release_all(bond_dev); +	bond_for_each_slave(bond, slave, iter) +		__bond_release_one(bond_dev, slave->dev, true); +	pr_info("%s: Released all slaves\n", bond_dev->name);  	list_del(&bond->bond_list); -	bond_work_cancel_all(bond); - -	bond_remove_proc_entry(bond); - -	__hw_addr_flush(&bond->mc_list); - -	list_for_each_entry_safe(vlan, tmp, &bond->vlan_list, vlan_list) { -		list_del(&vlan->vlan_list); -		kfree(vlan); -	} +	bond_debug_unregister(bond);  }  /*------------------------- Module initialization ---------------------------*/ -/* - * Convert string input module parms.  Accept either the - * number of the mode or its string name.  A bit complicated because - * some mode names are substrings of other names, and calls from sysfs - * may have whitespace in the name (trailing newlines, for example). - */ -int bond_parse_parm(const char *buf, const struct bond_parm_tbl *tbl) -{ -	int modeint = -1, i, rv; -	char *p, modestr[BOND_MAX_MODENAME_LEN + 1] = { 0, }; - -	for (p = (char *)buf; *p; p++) -		if (!(isdigit(*p) || isspace(*p))) -			break; - -	if (*p) -		rv = sscanf(buf, "%20s", modestr); -	else -		rv = sscanf(buf, "%d", &modeint); - -	if (!rv) -		return -1; - -	for (i = 0; tbl[i].modename; i++) { -		if (modeint == tbl[i].mode) -			return tbl[i].mode; -		if (strcmp(modestr, tbl[i].modename) == 0) -			return tbl[i].mode; -	} - -	return -1; -} -  static int bond_check_params(struct bond_params *params)  { -	int arp_validate_value, fail_over_mac_value, primary_reselect_value; +	int arp_validate_value, fail_over_mac_value, primary_reselect_value, i; +	struct bond_opt_value newval; +	const struct bond_opt_value *valptr; +	int arp_all_targets_value;  	/*  	 * Convert string parameters.  	 */  	if (mode) { -		bond_mode = bond_parse_parm(mode, bond_mode_tbl); -		if (bond_mode == -1) { -			pr_err("Error: Invalid bonding mode \"%s\"\n", -			       mode == NULL ? "NULL" : mode); +		bond_opt_initstr(&newval, mode); +		valptr = bond_opt_parse(bond_opt_get(BOND_OPT_MODE), &newval); +		if (!valptr) { +			pr_err("Error: Invalid bonding mode \"%s\"\n", mode);  			return -EINVAL;  		} +		bond_mode = valptr->value;  	}  	if (xmit_hash_policy) {  		if ((bond_mode != BOND_MODE_XOR) && -		    (bond_mode != BOND_MODE_8023AD)) { +		    (bond_mode != BOND_MODE_8023AD) && +		    (bond_mode != BOND_MODE_TLB)) {  			pr_info("xmit_hash_policy param is irrelevant in mode %s\n", -			       bond_mode_name(bond_mode)); +				bond_mode_name(bond_mode));  		} else { -			xmit_hashtype = bond_parse_parm(xmit_hash_policy, -							xmit_hashtype_tbl); -			if (xmit_hashtype == -1) { +			bond_opt_initstr(&newval, xmit_hash_policy); +			valptr = bond_opt_parse(bond_opt_get(BOND_OPT_XMIT_HASH), +						&newval); +			if (!valptr) {  				pr_err("Error: Invalid xmit_hash_policy \"%s\"\n", -				       xmit_hash_policy == NULL ? "NULL" :  				       xmit_hash_policy);  				return -EINVAL;  			} +			xmit_hashtype = valptr->value;  		}  	} @@ -4866,110 +4055,101 @@ static int bond_check_params(struct bond_params *params)  			pr_info("lacp_rate param is irrelevant in mode %s\n",  				bond_mode_name(bond_mode));  		} else { -			lacp_fast = bond_parse_parm(lacp_rate, bond_lacp_tbl); -			if (lacp_fast == -1) { +			bond_opt_initstr(&newval, lacp_rate); +			valptr = bond_opt_parse(bond_opt_get(BOND_OPT_LACP_RATE), +						&newval); +			if (!valptr) {  				pr_err("Error: Invalid lacp rate \"%s\"\n", -				       lacp_rate == NULL ? "NULL" : lacp_rate); +				       lacp_rate);  				return -EINVAL;  			} +			lacp_fast = valptr->value;  		}  	}  	if (ad_select) { -		params->ad_select = bond_parse_parm(ad_select, ad_select_tbl); -		if (params->ad_select == -1) { -			pr_err("Error: Invalid ad_select \"%s\"\n", -			       ad_select == NULL ? "NULL" : ad_select); +		bond_opt_initstr(&newval, ad_select); +		valptr = bond_opt_parse(bond_opt_get(BOND_OPT_AD_SELECT), +					&newval); +		if (!valptr) { +			pr_err("Error: Invalid ad_select \"%s\"\n", ad_select);  			return -EINVAL;  		} - -		if (bond_mode != BOND_MODE_8023AD) { -			pr_warning("ad_select param only affects 802.3ad mode\n"); -		} +		params->ad_select = valptr->value; +		if (bond_mode != BOND_MODE_8023AD) +			pr_warn("ad_select param only affects 802.3ad mode\n");  	} else {  		params->ad_select = BOND_AD_STABLE;  	}  	if (max_bonds < 0) { -		pr_warning("Warning: max_bonds (%d) not in range %d-%d, so it was reset to BOND_DEFAULT_MAX_BONDS (%d)\n", -			   max_bonds, 0, INT_MAX, BOND_DEFAULT_MAX_BONDS); +		pr_warn("Warning: max_bonds (%d) not in range %d-%d, so it was reset to BOND_DEFAULT_MAX_BONDS (%d)\n", +			max_bonds, 0, INT_MAX, BOND_DEFAULT_MAX_BONDS);  		max_bonds = BOND_DEFAULT_MAX_BONDS;  	}  	if (miimon < 0) { -		pr_warning("Warning: miimon module parameter (%d), not in range 0-%d, so it was reset to %d\n", -			   miimon, INT_MAX, BOND_LINK_MON_INTERV); -		miimon = BOND_LINK_MON_INTERV; +		pr_warn("Warning: miimon module parameter (%d), not in range 0-%d, so it was reset to 0\n", +			miimon, INT_MAX); +		miimon = 0;  	}  	if (updelay < 0) { -		pr_warning("Warning: updelay module parameter (%d), not in range 0-%d, so it was reset to 0\n", -			   updelay, INT_MAX); +		pr_warn("Warning: updelay module parameter (%d), not in range 0-%d, so it was reset to 0\n", +			updelay, INT_MAX);  		updelay = 0;  	}  	if (downdelay < 0) { -		pr_warning("Warning: downdelay module parameter (%d), not in range 0-%d, so it was reset to 0\n", -			   downdelay, INT_MAX); +		pr_warn("Warning: downdelay module parameter (%d), not in range 0-%d, so it was reset to 0\n", +			downdelay, INT_MAX);  		downdelay = 0;  	}  	if ((use_carrier != 0) && (use_carrier != 1)) { -		pr_warning("Warning: use_carrier module parameter (%d), not of valid value (0/1), so it was set to 1\n", -			   use_carrier); +		pr_warn("Warning: use_carrier module parameter (%d), not of valid value (0/1), so it was set to 1\n", +			use_carrier);  		use_carrier = 1;  	} -	if (num_grat_arp < 0 || num_grat_arp > 255) { -		pr_warning("Warning: num_grat_arp (%d) not in range 0-255 so it was reset to 1\n", -			   num_grat_arp); -		num_grat_arp = 1; +	if (num_peer_notif < 0 || num_peer_notif > 255) { +		pr_warn("Warning: num_grat_arp/num_unsol_na (%d) not in range 0-255 so it was reset to 1\n", +			num_peer_notif); +		num_peer_notif = 1;  	} -	if (num_unsol_na < 0 || num_unsol_na > 255) { -		pr_warning("Warning: num_unsol_na (%d) not in range 0-255 so it was reset to 1\n", -			   num_unsol_na); -		num_unsol_na = 1; -	} - -	/* reset values for 802.3ad */ -	if (bond_mode == BOND_MODE_8023AD) { +	/* reset values for 802.3ad/TLB/ALB */ +	if (!bond_mode_uses_arp(bond_mode)) {  		if (!miimon) { -			pr_warning("Warning: miimon must be specified, otherwise bonding will not detect link failure, speed and duplex which are essential for 802.3ad operation\n"); -			pr_warning("Forcing miimon to 100msec\n"); -			miimon = 100; +			pr_warn("Warning: miimon must be specified, otherwise bonding will not detect link failure, speed and duplex which are essential for 802.3ad operation\n"); +			pr_warn("Forcing miimon to 100msec\n"); +			miimon = BOND_DEFAULT_MIIMON;  		}  	}  	if (tx_queues < 1 || tx_queues > 255) { -		pr_warning("Warning: tx_queues (%d) should be between " -			   "1 and 255, resetting to %d\n", -			   tx_queues, BOND_DEFAULT_TX_QUEUES); +		pr_warn("Warning: tx_queues (%d) should be between 1 and 255, resetting to %d\n", +			tx_queues, BOND_DEFAULT_TX_QUEUES);  		tx_queues = BOND_DEFAULT_TX_QUEUES;  	}  	if ((all_slaves_active != 0) && (all_slaves_active != 1)) { -		pr_warning("Warning: all_slaves_active module parameter (%d), " -			   "not of valid value (0/1), so it was set to " -			   "0\n", all_slaves_active); +		pr_warn("Warning: all_slaves_active module parameter (%d), not of valid value (0/1), so it was set to 0\n", +			all_slaves_active);  		all_slaves_active = 0;  	}  	if (resend_igmp < 0 || resend_igmp > 255) { -		pr_warning("Warning: resend_igmp (%d) should be between " -			   "0 and 255, resetting to %d\n", -			   resend_igmp, BOND_DEFAULT_RESEND_IGMP); +		pr_warn("Warning: resend_igmp (%d) should be between 0 and 255, resetting to %d\n", +			resend_igmp, BOND_DEFAULT_RESEND_IGMP);  		resend_igmp = BOND_DEFAULT_RESEND_IGMP;  	} -	/* reset values for TLB/ALB */ -	if ((bond_mode == BOND_MODE_TLB) || -	    (bond_mode == BOND_MODE_ALB)) { -		if (!miimon) { -			pr_warning("Warning: miimon must be specified, otherwise bonding will not detect link failure and link speed which are essential for TLB/ALB load balancing\n"); -			pr_warning("Forcing miimon to 100msec\n"); -			miimon = 100; -		} +	bond_opt_initval(&newval, packets_per_slave); +	if (!bond_opt_parse(bond_opt_get(BOND_OPT_PACKETS_PER_SLAVE), &newval)) { +		pr_warn("Warning: packets_per_slave (%d) should be between 0 and %u resetting to 1\n", +			packets_per_slave, USHRT_MAX); +		packets_per_slave = 1;  	}  	if (bond_mode == BOND_MODE_ALB) { @@ -4982,149 +4162,171 @@ static int bond_check_params(struct bond_params *params)  			/* just warn the user the up/down delay will have  			 * no effect since miimon is zero...  			 */ -			pr_warning("Warning: miimon module parameter not set and updelay (%d) or downdelay (%d) module parameter is set; updelay and downdelay have no effect unless miimon is set\n", -				   updelay, downdelay); +			pr_warn("Warning: miimon module parameter not set and updelay (%d) or downdelay (%d) module parameter is set; updelay and downdelay have no effect unless miimon is set\n", +				updelay, downdelay);  		}  	} else {  		/* don't allow arp monitoring */  		if (arp_interval) { -			pr_warning("Warning: miimon (%d) and arp_interval (%d) can't be used simultaneously, disabling ARP monitoring\n", -				   miimon, arp_interval); +			pr_warn("Warning: miimon (%d) and arp_interval (%d) can't be used simultaneously, disabling ARP monitoring\n", +				miimon, arp_interval);  			arp_interval = 0;  		}  		if ((updelay % miimon) != 0) { -			pr_warning("Warning: updelay (%d) is not a multiple of miimon (%d), updelay rounded to %d ms\n", -				   updelay, miimon, -				   (updelay / miimon) * miimon); +			pr_warn("Warning: updelay (%d) is not a multiple of miimon (%d), updelay rounded to %d ms\n", +				updelay, miimon, (updelay / miimon) * miimon);  		}  		updelay /= miimon;  		if ((downdelay % miimon) != 0) { -			pr_warning("Warning: downdelay (%d) is not a multiple of miimon (%d), downdelay rounded to %d ms\n", -				   downdelay, miimon, -				   (downdelay / miimon) * miimon); +			pr_warn("Warning: downdelay (%d) is not a multiple of miimon (%d), downdelay rounded to %d ms\n", +				downdelay, miimon, +				(downdelay / miimon) * miimon);  		}  		downdelay /= miimon;  	}  	if (arp_interval < 0) { -		pr_warning("Warning: arp_interval module parameter (%d) , not in range 0-%d, so it was reset to %d\n", -			   arp_interval, INT_MAX, BOND_LINK_ARP_INTERV); -		arp_interval = BOND_LINK_ARP_INTERV; +		pr_warn("Warning: arp_interval module parameter (%d), not in range 0-%d, so it was reset to 0\n", +			arp_interval, INT_MAX); +		arp_interval = 0;  	} -	for (arp_ip_count = 0; -	     (arp_ip_count < BOND_MAX_ARP_TARGETS) && arp_ip_target[arp_ip_count]; -	     arp_ip_count++) { +	for (arp_ip_count = 0, i = 0; +	     (arp_ip_count < BOND_MAX_ARP_TARGETS) && arp_ip_target[i]; i++) {  		/* not complete check, but should be good enough to  		   catch mistakes */ -		if (!isdigit(arp_ip_target[arp_ip_count][0])) { -			pr_warning("Warning: bad arp_ip_target module parameter (%s), ARP monitoring will not be performed\n", -				   arp_ip_target[arp_ip_count]); +		__be32 ip; +		if (!in4_pton(arp_ip_target[i], -1, (u8 *)&ip, -1, NULL) || +		    !bond_is_ip_target_ok(ip)) { +			pr_warn("Warning: bad arp_ip_target module parameter (%s), ARP monitoring will not be performed\n", +				arp_ip_target[i]);  			arp_interval = 0;  		} else { -			__be32 ip = in_aton(arp_ip_target[arp_ip_count]); -			arp_target[arp_ip_count] = ip; +			if (bond_get_targets_ip(arp_target, ip) == -1) +				arp_target[arp_ip_count++] = ip; +			else +				pr_warn("Warning: duplicate address %pI4 in arp_ip_target, skipping\n", +					&ip);  		}  	}  	if (arp_interval && !arp_ip_count) {  		/* don't allow arping if no arp_ip_target given... */ -		pr_warning("Warning: arp_interval module parameter (%d) specified without providing an arp_ip_target parameter, arp_interval was reset to 0\n", -			   arp_interval); +		pr_warn("Warning: arp_interval module parameter (%d) specified without providing an arp_ip_target parameter, arp_interval was reset to 0\n", +			arp_interval);  		arp_interval = 0;  	}  	if (arp_validate) { -		if (bond_mode != BOND_MODE_ACTIVEBACKUP) { -			pr_err("arp_validate only supported in active-backup mode\n"); -			return -EINVAL; -		}  		if (!arp_interval) {  			pr_err("arp_validate requires arp_interval\n");  			return -EINVAL;  		} -		arp_validate_value = bond_parse_parm(arp_validate, -						     arp_validate_tbl); -		if (arp_validate_value == -1) { +		bond_opt_initstr(&newval, arp_validate); +		valptr = bond_opt_parse(bond_opt_get(BOND_OPT_ARP_VALIDATE), +					&newval); +		if (!valptr) {  			pr_err("Error: invalid arp_validate \"%s\"\n", -			       arp_validate == NULL ? "NULL" : arp_validate); +			       arp_validate);  			return -EINVAL;  		} -	} else +		arp_validate_value = valptr->value; +	} else {  		arp_validate_value = 0; +	} + +	arp_all_targets_value = 0; +	if (arp_all_targets) { +		bond_opt_initstr(&newval, arp_all_targets); +		valptr = bond_opt_parse(bond_opt_get(BOND_OPT_ARP_ALL_TARGETS), +					&newval); +		if (!valptr) { +			pr_err("Error: invalid arp_all_targets_value \"%s\"\n", +			       arp_all_targets); +			arp_all_targets_value = 0; +		} else { +			arp_all_targets_value = valptr->value; +		} +	}  	if (miimon) {  		pr_info("MII link monitoring set to %d ms\n", miimon);  	} else if (arp_interval) { -		int i; - +		valptr = bond_opt_get_val(BOND_OPT_ARP_VALIDATE, +					  arp_validate_value);  		pr_info("ARP monitoring set to %d ms, validate %s, with %d target(s):", -			arp_interval, -			arp_validate_tbl[arp_validate_value].modename, -			arp_ip_count); +			arp_interval, valptr->string, arp_ip_count);  		for (i = 0; i < arp_ip_count; i++) -			pr_info(" %s", arp_ip_target[i]); +			pr_cont(" %s", arp_ip_target[i]); -		pr_info("\n"); +		pr_cont("\n");  	} else if (max_bonds) {  		/* miimon and arp_interval not set, we need one so things  		 * work as expected, see bonding.txt for details  		 */ -		pr_warning("Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details.\n"); +		pr_debug("Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details\n");  	} -	if (primary && !USES_PRIMARY(bond_mode)) { +	if (primary && !bond_mode_uses_primary(bond_mode)) {  		/* currently, using a primary only makes sense  		 * in active backup, TLB or ALB modes  		 */ -		pr_warning("Warning: %s primary device specified but has no effect in %s mode\n", -			   primary, bond_mode_name(bond_mode)); +		pr_warn("Warning: %s primary device specified but has no effect in %s mode\n", +			primary, bond_mode_name(bond_mode));  		primary = NULL;  	}  	if (primary && primary_reselect) { -		primary_reselect_value = bond_parse_parm(primary_reselect, -							 pri_reselect_tbl); -		if (primary_reselect_value == -1) { +		bond_opt_initstr(&newval, primary_reselect); +		valptr = bond_opt_parse(bond_opt_get(BOND_OPT_PRIMARY_RESELECT), +					&newval); +		if (!valptr) {  			pr_err("Error: Invalid primary_reselect \"%s\"\n", -			       primary_reselect == -					NULL ? "NULL" : primary_reselect); +			       primary_reselect);  			return -EINVAL;  		} +		primary_reselect_value = valptr->value;  	} else {  		primary_reselect_value = BOND_PRI_RESELECT_ALWAYS;  	}  	if (fail_over_mac) { -		fail_over_mac_value = bond_parse_parm(fail_over_mac, -						      fail_over_mac_tbl); -		if (fail_over_mac_value == -1) { +		bond_opt_initstr(&newval, fail_over_mac); +		valptr = bond_opt_parse(bond_opt_get(BOND_OPT_FAIL_OVER_MAC), +					&newval); +		if (!valptr) {  			pr_err("Error: invalid fail_over_mac \"%s\"\n", -			       arp_validate == NULL ? "NULL" : arp_validate); +			       fail_over_mac);  			return -EINVAL;  		} - +		fail_over_mac_value = valptr->value;  		if (bond_mode != BOND_MODE_ACTIVEBACKUP) -			pr_warning("Warning: fail_over_mac only affects active-backup mode.\n"); +			pr_warn("Warning: fail_over_mac only affects active-backup mode\n");  	} else {  		fail_over_mac_value = BOND_FOM_NONE;  	} +	if (lp_interval == 0) { +		pr_warn("Warning: ip_interval must be between 1 and %d, so it was reset to %d\n", +			INT_MAX, BOND_ALB_DEFAULT_LP_INTERVAL); +		lp_interval = BOND_ALB_DEFAULT_LP_INTERVAL; +	} +  	/* fill params struct with the proper values */  	params->mode = bond_mode;  	params->xmit_policy = xmit_hashtype;  	params->miimon = miimon; -	params->num_grat_arp = num_grat_arp; -	params->num_unsol_na = num_unsol_na; +	params->num_peer_notif = num_peer_notif;  	params->arp_interval = arp_interval;  	params->arp_validate = arp_validate_value; +	params->arp_all_targets = arp_all_targets_value;  	params->updelay = updelay;  	params->downdelay = downdelay;  	params->use_carrier = use_carrier; @@ -5135,6 +4337,20 @@ static int bond_check_params(struct bond_params *params)  	params->tx_queues = tx_queues;  	params->all_slaves_active = all_slaves_active;  	params->resend_igmp = resend_igmp; +	params->min_links = min_links; +	params->lp_interval = lp_interval; +	params->packets_per_slave = packets_per_slave; +	params->tlb_dynamic_lb = 1; /* Default value */ +	if (packets_per_slave > 0) { +		params->reciprocal_packets_per_slave = +			reciprocal_value(packets_per_slave); +	} else { +		/* reciprocal_packets_per_slave is unused if +		 * packets_per_slave is 0 or 1, just initialize it +		 */ +		params->reciprocal_packets_per_slave = +			(struct reciprocal_value) { 0 }; +	}  	if (primary) {  		strncpy(params->primary, primary, IFNAMSIZ); @@ -5148,6 +4364,7 @@ static int bond_check_params(struct bond_params *params)  static struct lock_class_key bonding_netdev_xmit_lock_key;  static struct lock_class_key bonding_netdev_addr_lock_key; +static struct lock_class_key bonding_tx_busylock_key;  static void bond_set_lockdep_class_one(struct net_device *dev,  				       struct netdev_queue *txq, @@ -5162,6 +4379,7 @@ static void bond_set_lockdep_class(struct net_device *dev)  	lockdep_set_class(&dev->addr_list_lock,  			  &bonding_netdev_addr_lock_key);  	netdev_for_each_tx_queue(dev, bond_set_lockdep_class_one, NULL); +	dev->qdisc_tx_busylock = &bonding_tx_busylock_key;  }  /* @@ -5171,44 +4389,44 @@ static int bond_init(struct net_device *bond_dev)  {  	struct bonding *bond = netdev_priv(bond_dev);  	struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id); +	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));  	pr_debug("Begin bond_init for %s\n", bond_dev->name); +	/* +	 * Initialize locks that may be required during +	 * en/deslave operations.  All of the bond_open work +	 * (of which this is part) should really be moved to +	 * a phase prior to dev_open +	 */ +	spin_lock_init(&(bond_info->tx_hashtbl_lock)); +	spin_lock_init(&(bond_info->rx_hashtbl_lock)); +  	bond->wq = create_singlethread_workqueue(bond_dev->name);  	if (!bond->wq)  		return -ENOMEM;  	bond_set_lockdep_class(bond_dev); -	netif_carrier_off(bond_dev); - -	bond_create_proc_entry(bond);  	list_add_tail(&bond->bond_list, &bn->dev_list);  	bond_prepare_sysfs_group(bond); -	__hw_addr_init(&bond->mc_list); +	bond_debug_register(bond); + +	/* Ensure valid dev_addr */ +	if (is_zero_ether_addr(bond_dev->dev_addr) && +	    bond_dev->addr_assign_type == NET_ADDR_PERM) +		eth_hw_addr_random(bond_dev); +  	return 0;  } -static int bond_validate(struct nlattr *tb[], struct nlattr *data[]) +unsigned int bond_get_num_tx_queues(void)  { -	if (tb[IFLA_ADDRESS]) { -		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) -			return -EINVAL; -		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) -			return -EADDRNOTAVAIL; -	} -	return 0; +	return tx_queues;  } -static struct rtnl_link_ops bond_link_ops __read_mostly = { -	.kind		= "bond", -	.priv_size	= sizeof(struct bonding), -	.setup		= bond_setup, -	.validate	= bond_validate, -}; -  /* Create a new bond based on the specified name and bonding parameters.   * If name is NULL, obtain a suitable "bond%d" name for us.   * Caller must NOT hold rtnl_lock; we need to release it here before we @@ -5221,8 +4439,9 @@ int bond_create(struct net *net, const char *name)  	rtnl_lock(); -	bond_dev = alloc_netdev_mq(sizeof(struct bonding), name ? name : "", -				bond_setup, tx_queues); +	bond_dev = alloc_netdev_mq(sizeof(struct bonding), +				   name ? name : "bond%d", +				   bond_setup, tx_queues);  	if (!bond_dev) {  		pr_err("%s: eek! can't alloc netdev!\n", name);  		rtnl_unlock(); @@ -5232,24 +4451,10 @@ int bond_create(struct net *net, const char *name)  	dev_net_set(bond_dev, net);  	bond_dev->rtnl_link_ops = &bond_link_ops; -	if (!name) { -		res = dev_alloc_name(bond_dev, "bond%d"); -		if (res < 0) -			goto out; -	} else { -		/* -		 * If we're given a name to register -		 * we need to ensure that its not already -		 * registered -		 */ -		res = -EEXIST; -		if (__dev_get_by_name(net, name) != NULL) -			goto out; -	} -  	res = register_netdevice(bond_dev); -out: +	netif_carrier_off(bond_dev); +  	rtnl_unlock();  	if (res < 0)  		bond_destructor(bond_dev); @@ -5264,15 +4469,26 @@ static int __net_init bond_net_init(struct net *net)  	INIT_LIST_HEAD(&bn->dev_list);  	bond_create_proc_dir(bn); -	 +	bond_create_sysfs(bn); +  	return 0;  }  static void __net_exit bond_net_exit(struct net *net)  {  	struct bond_net *bn = net_generic(net, bond_net_id); +	struct bonding *bond, *tmp_bond; +	LIST_HEAD(list); +	bond_destroy_sysfs(bn);  	bond_destroy_proc_dir(bn); + +	/* Kill off any bonds created after unregistering bond rtnl ops */ +	rtnl_lock(); +	list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list) +		unregister_netdevice_queue(bond->dev, &list); +	unregister_netdevice_many(&list); +	rtnl_unlock();  }  static struct pernet_operations bond_net_ops = { @@ -5287,50 +4503,36 @@ static int __init bonding_init(void)  	int i;  	int res; -	pr_info("%s", version); +	pr_info("%s", bond_version);  	res = bond_check_params(&bonding_defaults);  	if (res)  		goto out; -#ifdef CONFIG_NET_POLL_CONTROLLER -	if (!alloc_cpumask_var(&netpoll_block_tx, GFP_KERNEL)) { -		res = -ENOMEM; -		goto out; -	} -#endif -  	res = register_pernet_subsys(&bond_net_ops);  	if (res)  		goto out; -	res = rtnl_link_register(&bond_link_ops); +	res = bond_netlink_init();  	if (res)  		goto err_link; +	bond_create_debugfs(); +  	for (i = 0; i < max_bonds; i++) {  		res = bond_create(&init_net, NULL);  		if (res)  			goto err;  	} -	res = bond_create_sysfs(); -	if (res) -		goto err; - -  	register_netdevice_notifier(&bond_netdev_notifier); -	register_inetaddr_notifier(&bond_inetaddr_notifier); -	bond_register_ipv6_notifier();  out:  	return res;  err: -	rtnl_link_unregister(&bond_link_ops); +	bond_destroy_debugfs(); +	bond_netlink_fini();  err_link:  	unregister_pernet_subsys(&bond_net_ops); -#ifdef CONFIG_NET_POLL_CONTROLLER -	free_cpumask_var(netpoll_block_tx); -#endif  	goto out;  } @@ -5338,16 +4540,17 @@ err_link:  static void __exit bonding_exit(void)  {  	unregister_netdevice_notifier(&bond_netdev_notifier); -	unregister_inetaddr_notifier(&bond_inetaddr_notifier); -	bond_unregister_ipv6_notifier(); -	bond_destroy_sysfs(); +	bond_destroy_debugfs(); -	rtnl_link_unregister(&bond_link_ops); +	bond_netlink_fini();  	unregister_pernet_subsys(&bond_net_ops);  #ifdef CONFIG_NET_POLL_CONTROLLER -	free_cpumask_var(netpoll_block_tx); +	/* +	 * Make sure we don't have an imbalance on our netpoll blocking +	 */ +	WARN_ON(atomic_read(&netpoll_block_tx));  #endif  } @@ -5357,4 +4560,3 @@ MODULE_LICENSE("GPL");  MODULE_VERSION(DRV_VERSION);  MODULE_DESCRIPTION(DRV_DESCRIPTION ", v" DRV_VERSION);  MODULE_AUTHOR("Thomas Davis, tadavis@lbl.gov and many others"); -MODULE_ALIAS_RTNL_LINK("bond");  | 
