diff options
Diffstat (limited to 'drivers/net/bonding/bond_alb.c')
-rw-r--r-- | drivers/net/bonding/bond_alb.c | 1696 |
1 files changed, 1696 insertions, 0 deletions
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c new file mode 100644 index 00000000000..5ce606d9dc0 --- /dev/null +++ b/drivers/net/bonding/bond_alb.c @@ -0,0 +1,1696 @@ +/* + * Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called LICENSE. + * + * + * Changes: + * + * 2003/06/25 - Shmulik Hen <shmulik.hen at intel dot com> + * - Fixed signed/unsigned calculation errors that caused load sharing + * to collapse to one slave under very heavy UDP Tx stress. + * + * 2003/08/06 - Amir Noam <amir.noam at intel dot com> + * - Add support for setting bond's MAC address with special + * handling required for ALB/TLB. + * + * 2003/12/01 - Shmulik Hen <shmulik.hen at intel dot com> + * - Code cleanup and style changes + * + * 2003/12/30 - Amir Noam <amir.noam at intel dot com> + * - Fixed: Cannot remove and re-enslave the original active slave. + * + * 2004/01/14 - Shmulik Hen <shmulik.hen at intel dot com> + * - Add capability to tag self generated packets in ALB/TLB modes. + */ + +//#define BONDING_DEBUG 1 + +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/pkt_sched.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <linux/timer.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/if_arp.h> +#include <linux/if_ether.h> +#include <linux/if_bonding.h> +#include <linux/if_vlan.h> +#include <linux/in.h> +#include <net/ipx.h> +#include <net/arp.h> +#include <asm/byteorder.h> +#include "bonding.h" +#include "bond_alb.h" + + +#define ALB_TIMER_TICKS_PER_SEC 10 /* should be a divisor of HZ */ +#define BOND_TLB_REBALANCE_INTERVAL 10 /* In seconds, periodic re-balancing. + * Used for division - never set + * to zero !!! + */ +#define BOND_ALB_LP_INTERVAL 1 /* In seconds, periodic send of + * learning packets to the switch + */ + +#define BOND_TLB_REBALANCE_TICKS (BOND_TLB_REBALANCE_INTERVAL \ + * ALB_TIMER_TICKS_PER_SEC) + +#define BOND_ALB_LP_TICKS (BOND_ALB_LP_INTERVAL \ + * ALB_TIMER_TICKS_PER_SEC) + +#define TLB_HASH_TABLE_SIZE 256 /* The size of the clients hash table. + * Note that this value MUST NOT be smaller + * because the key hash table is BYTE wide ! + */ + + +#define TLB_NULL_INDEX 0xffffffff +#define MAX_LP_BURST 3 + +/* rlb defs */ +#define RLB_HASH_TABLE_SIZE 256 +#define RLB_NULL_INDEX 0xffffffff +#define RLB_UPDATE_DELAY 2*ALB_TIMER_TICKS_PER_SEC /* 2 seconds */ +#define RLB_ARP_BURST_SIZE 2 +#define RLB_UPDATE_RETRY 3 /* 3-ticks - must be smaller than the rlb + * rebalance interval (5 min). + */ +/* RLB_PROMISC_TIMEOUT = 10 sec equals the time that the current slave is + * promiscuous after failover + */ +#define RLB_PROMISC_TIMEOUT 10*ALB_TIMER_TICKS_PER_SEC + +static const u8 mac_bcast[ETH_ALEN] = {0xff,0xff,0xff,0xff,0xff,0xff}; +static const int alb_delta_in_ticks = HZ / ALB_TIMER_TICKS_PER_SEC; + +#pragma pack(1) +struct learning_pkt { + u8 mac_dst[ETH_ALEN]; + u8 mac_src[ETH_ALEN]; + u16 type; + u8 padding[ETH_ZLEN - ETH_HLEN]; +}; + +struct arp_pkt { + u16 hw_addr_space; + u16 prot_addr_space; + u8 hw_addr_len; + u8 prot_addr_len; + u16 op_code; + u8 mac_src[ETH_ALEN]; /* sender hardware address */ + u32 ip_src; /* sender IP address */ + u8 mac_dst[ETH_ALEN]; /* target hardware address */ + u32 ip_dst; /* target IP address */ +}; +#pragma pack() + +/* Forward declaration */ +static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]); + +static inline u8 _simple_hash(u8 *hash_start, int hash_size) +{ + int i; + u8 hash = 0; + + for (i = 0; i < hash_size; i++) { + hash ^= hash_start[i]; + } + + return hash; +} + +/*********************** tlb specific functions ***************************/ + +static inline void _lock_tx_hashtbl(struct bonding *bond) +{ + spin_lock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); +} + +static inline void _unlock_tx_hashtbl(struct bonding *bond) +{ + spin_unlock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock)); +} + +/* Caller must hold tx_hashtbl lock */ +static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load) +{ + if (save_load) { + entry->load_history = 1 + entry->tx_bytes / + BOND_TLB_REBALANCE_INTERVAL; + entry->tx_bytes = 0; + } + + entry->tx_slave = NULL; + entry->next = TLB_NULL_INDEX; + entry->prev = TLB_NULL_INDEX; +} + +static inline void tlb_init_slave(struct slave *slave) +{ + SLAVE_TLB_INFO(slave).load = 0; + SLAVE_TLB_INFO(slave).head = TLB_NULL_INDEX; +} + +/* Caller must hold bond lock for read */ +static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_load) +{ + struct tlb_client_info *tx_hash_table; + u32 index; + + _lock_tx_hashtbl(bond); + + /* clear slave from tx_hashtbl */ + tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl; + + index = SLAVE_TLB_INFO(slave).head; + while (index != TLB_NULL_INDEX) { + u32 next_index = tx_hash_table[index].next; + tlb_init_table_entry(&tx_hash_table[index], save_load); + index = next_index; + } + + _unlock_tx_hashtbl(bond); + + tlb_init_slave(slave); +} + +/* Must be called before starting the monitor timer */ +static int tlb_initialize(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + int size = TLB_HASH_TABLE_SIZE * sizeof(struct tlb_client_info); + int i; + + spin_lock_init(&(bond_info->tx_hashtbl_lock)); + + _lock_tx_hashtbl(bond); + + bond_info->tx_hashtbl = kmalloc(size, GFP_KERNEL); + if (!bond_info->tx_hashtbl) { + printk(KERN_ERR DRV_NAME + ": Error: %s: Failed to allocate TLB hash table\n", + bond->dev->name); + _unlock_tx_hashtbl(bond); + return -1; + } + + memset(bond_info->tx_hashtbl, 0, size); + + for (i = 0; i < TLB_HASH_TABLE_SIZE; i++) { + tlb_init_table_entry(&bond_info->tx_hashtbl[i], 1); + } + + _unlock_tx_hashtbl(bond); + + return 0; +} + +/* Must be called only after all slaves have been released */ +static void tlb_deinitialize(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + + _lock_tx_hashtbl(bond); + + kfree(bond_info->tx_hashtbl); + bond_info->tx_hashtbl = NULL; + + _unlock_tx_hashtbl(bond); +} + +/* Caller must hold bond lock for read */ +static struct slave *tlb_get_least_loaded_slave(struct bonding *bond) +{ + struct slave *slave, *least_loaded; + s64 max_gap; + int i, found = 0; + + /* Find the first enabled slave */ + bond_for_each_slave(bond, slave, i) { + if (SLAVE_IS_OK(slave)) { + found = 1; + break; + } + } + + if (!found) { + return NULL; + } + + least_loaded = slave; + max_gap = (s64)(slave->speed << 20) - /* Convert to Megabit per sec */ + (s64)(SLAVE_TLB_INFO(slave).load << 3); /* Bytes to bits */ + + /* Find the slave with the largest gap */ + bond_for_each_slave_from(bond, slave, i, least_loaded) { + if (SLAVE_IS_OK(slave)) { + s64 gap = (s64)(slave->speed << 20) - + (s64)(SLAVE_TLB_INFO(slave).load << 3); + if (max_gap < gap) { + least_loaded = slave; + max_gap = gap; + } + } + } + + return least_loaded; +} + +/* Caller must hold bond lock for read */ +static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u32 skb_len) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct tlb_client_info *hash_table; + struct slave *assigned_slave; + + _lock_tx_hashtbl(bond); + + hash_table = bond_info->tx_hashtbl; + assigned_slave = hash_table[hash_index].tx_slave; + if (!assigned_slave) { + assigned_slave = tlb_get_least_loaded_slave(bond); + + if (assigned_slave) { + struct tlb_slave_info *slave_info = + &(SLAVE_TLB_INFO(assigned_slave)); + u32 next_index = slave_info->head; + + hash_table[hash_index].tx_slave = assigned_slave; + hash_table[hash_index].next = next_index; + hash_table[hash_index].prev = TLB_NULL_INDEX; + + if (next_index != TLB_NULL_INDEX) { + hash_table[next_index].prev = hash_index; + } + + slave_info->head = hash_index; + slave_info->load += + hash_table[hash_index].load_history; + } + } + + if (assigned_slave) { + hash_table[hash_index].tx_bytes += skb_len; + } + + _unlock_tx_hashtbl(bond); + + return assigned_slave; +} + +/*********************** rlb specific functions ***************************/ +static inline void _lock_rx_hashtbl(struct bonding *bond) +{ + spin_lock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); +} + +static inline void _unlock_rx_hashtbl(struct bonding *bond) +{ + spin_unlock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock)); +} + +/* when an ARP REPLY is received from a client update its info + * in the rx_hashtbl + */ +static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct rlb_client_info *client_info; + u32 hash_index; + + _lock_rx_hashtbl(bond); + + hash_index = _simple_hash((u8*)&(arp->ip_src), sizeof(arp->ip_src)); + client_info = &(bond_info->rx_hashtbl[hash_index]); + + if ((client_info->assigned) && + (client_info->ip_src == arp->ip_dst) && + (client_info->ip_dst == arp->ip_src)) { + /* update the clients MAC address */ + memcpy(client_info->mac_dst, arp->mac_src, ETH_ALEN); + client_info->ntt = 1; + bond_info->rx_ntt = 1; + } + + _unlock_rx_hashtbl(bond); +} + +static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct packet_type *ptype) +{ + struct bonding *bond = bond_dev->priv; + struct arp_pkt *arp = (struct arp_pkt *)skb->data; + int res = NET_RX_DROP; + + if (!(bond_dev->flags & IFF_MASTER)) { + goto out; + } + + if (!arp) { + dprintk("Packet has no ARP data\n"); + goto out; + } + + if (skb->len < sizeof(struct arp_pkt)) { + dprintk("Packet is too small to be an ARP\n"); + goto out; + } + + if (arp->op_code == htons(ARPOP_REPLY)) { + /* update rx hash table for this ARP */ + rlb_update_entry_from_arp(bond, arp); + dprintk("Server received an ARP Reply from client\n"); + } + + res = NET_RX_SUCCESS; + +out: + dev_kfree_skb(skb); + + return res; +} + +/* Caller must hold bond lock for read */ +static struct slave *rlb_next_rx_slave(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct slave *rx_slave, *slave, *start_at; + int i = 0; + + if (bond_info->next_rx_slave) { + start_at = bond_info->next_rx_slave; + } else { + start_at = bond->first_slave; + } + + rx_slave = NULL; + + bond_for_each_slave_from(bond, slave, i, start_at) { + if (SLAVE_IS_OK(slave)) { + if (!rx_slave) { + rx_slave = slave; + } else if (slave->speed > rx_slave->speed) { + rx_slave = slave; + } + } + } + + if (rx_slave) { + bond_info->next_rx_slave = rx_slave->next; + } + + return rx_slave; +} + +/* teach the switch the mac of a disabled slave + * on the primary for fault tolerance + * + * Caller must hold bond->curr_slave_lock for write or bond lock for write + */ +static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[]) +{ + if (!bond->curr_active_slave) { + return; + } + + if (!bond->alb_info.primary_is_promisc) { + bond->alb_info.primary_is_promisc = 1; + dev_set_promiscuity(bond->curr_active_slave->dev, 1); + } + + bond->alb_info.rlb_promisc_timeout_counter = 0; + + alb_send_learning_packets(bond->curr_active_slave, addr); +} + +/* slave being removed should not be active at this point + * + * Caller must hold bond lock for read + */ +static void rlb_clear_slave(struct bonding *bond, struct slave *slave) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct rlb_client_info *rx_hash_table; + u32 index, next_index; + + /* clear slave from rx_hashtbl */ + _lock_rx_hashtbl(bond); + + rx_hash_table = bond_info->rx_hashtbl; + index = bond_info->rx_hashtbl_head; + for (; index != RLB_NULL_INDEX; index = next_index) { + next_index = rx_hash_table[index].next; + if (rx_hash_table[index].slave == slave) { + struct slave *assigned_slave = rlb_next_rx_slave(bond); + + if (assigned_slave) { + rx_hash_table[index].slave = assigned_slave; + if (memcmp(rx_hash_table[index].mac_dst, + mac_bcast, ETH_ALEN)) { + bond_info->rx_hashtbl[index].ntt = 1; + bond_info->rx_ntt = 1; + /* A slave has been removed from the + * table because it is either disabled + * or being released. We must retry the + * update to avoid clients from not + * being updated & disconnecting when + * there is stress + */ + bond_info->rlb_update_retry_counter = + RLB_UPDATE_RETRY; + } + } else { /* there is no active slave */ + rx_hash_table[index].slave = NULL; + } + } + } + + _unlock_rx_hashtbl(bond); + + write_lock(&bond->curr_slave_lock); + + if (slave != bond->curr_active_slave) { + rlb_teach_disabled_mac_on_primary(bond, slave->dev->dev_addr); + } + + write_unlock(&bond->curr_slave_lock); +} + +static void rlb_update_client(struct rlb_client_info *client_info) +{ + int i; + + if (!client_info->slave) { + return; + } + + for (i = 0; i < RLB_ARP_BURST_SIZE; i++) { + struct sk_buff *skb; + + skb = arp_create(ARPOP_REPLY, ETH_P_ARP, + client_info->ip_dst, + client_info->slave->dev, + client_info->ip_src, + client_info->mac_dst, + client_info->slave->dev->dev_addr, + client_info->mac_dst); + if (!skb) { + printk(KERN_ERR DRV_NAME + ": Error: failed to create an ARP packet\n"); + continue; + } + + skb->dev = client_info->slave->dev; + + if (client_info->tag) { + skb = vlan_put_tag(skb, client_info->vlan_id); + if (!skb) { + printk(KERN_ERR DRV_NAME + ": Error: failed to insert VLAN tag\n"); + continue; + } + } + + arp_xmit(skb); + } +} + +/* sends ARP REPLIES that update the clients that need updating */ +static void rlb_update_rx_clients(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct rlb_client_info *client_info; + u32 hash_index; + + _lock_rx_hashtbl(bond); + + hash_index = bond_info->rx_hashtbl_head; + for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { + client_info = &(bond_info->rx_hashtbl[hash_index]); + if (client_info->ntt) { + rlb_update_client(client_info); + if (bond_info->rlb_update_retry_counter == 0) { + client_info->ntt = 0; + } + } + } + + /* do not update the entries again untill this counter is zero so that + * not to confuse the clients. + */ + bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY; + + _unlock_rx_hashtbl(bond); +} + +/* The slave was assigned a new mac address - update the clients */ +static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *slave) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct rlb_client_info *client_info; + int ntt = 0; + u32 hash_index; + + _lock_rx_hashtbl(bond); + + hash_index = bond_info->rx_hashtbl_head; + for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { + client_info = &(bond_info->rx_hashtbl[hash_index]); + + if ((client_info->slave == slave) && + memcmp(client_info->mac_dst, mac_bcast, ETH_ALEN)) { + client_info->ntt = 1; + ntt = 1; + } + } + + // update the team's flag only after the whole iteration + if (ntt) { + bond_info->rx_ntt = 1; + //fasten the change + bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY; + } + + _unlock_rx_hashtbl(bond); +} + +/* mark all clients using src_ip to be updated */ +static void rlb_req_update_subnet_clients(struct bonding *bond, u32 src_ip) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct rlb_client_info *client_info; + u32 hash_index; + + _lock_rx_hashtbl(bond); + + hash_index = bond_info->rx_hashtbl_head; + for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { + client_info = &(bond_info->rx_hashtbl[hash_index]); + + if (!client_info->slave) { + printk(KERN_ERR DRV_NAME + ": Error: found a client with no channel in " + "the client's hash table\n"); + continue; + } + /*update all clients using this src_ip, that are not assigned + * to the team's address (curr_active_slave) and have a known + * unicast mac address. + */ + if ((client_info->ip_src == src_ip) && + memcmp(client_info->slave->dev->dev_addr, + bond->dev->dev_addr, ETH_ALEN) && + memcmp(client_info->mac_dst, mac_bcast, ETH_ALEN)) { + client_info->ntt = 1; + bond_info->rx_ntt = 1; + } + } + + _unlock_rx_hashtbl(bond); +} + +/* Caller must hold both bond and ptr locks for read */ +static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct arp_pkt *arp = (struct arp_pkt *)skb->nh.raw; + struct slave *assigned_slave; + struct rlb_client_info *client_info; + u32 hash_index = 0; + + _lock_rx_hashtbl(bond); + + hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_src)); + client_info = &(bond_info->rx_hashtbl[hash_index]); + + if (client_info->assigned) { + if ((client_info->ip_src == arp->ip_src) && + (client_info->ip_dst == arp->ip_dst)) { + /* the entry is already assigned to this client */ + if (memcmp(arp->mac_dst, mac_bcast, ETH_ALEN)) { + /* update mac address from arp */ + memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN); + } + + assigned_slave = client_info->slave; + if (assigned_slave) { + _unlock_rx_hashtbl(bond); + return assigned_slave; + } + } else { + /* the entry is already assigned to some other client, + * move the old client to primary (curr_active_slave) so + * that the new client can be assigned to this entry. + */ + if (bond->curr_active_slave && + client_info->slave != bond->curr_active_slave) { + client_info->slave = bond->curr_active_slave; + rlb_update_client(client_info); + } + } + } + /* assign a new slave */ + assigned_slave = rlb_next_rx_slave(bond); + + if (assigned_slave) { + client_info->ip_src = arp->ip_src; + client_info->ip_dst = arp->ip_dst; + /* arp->mac_dst is broadcast for arp reqeusts. + * will be updated with clients actual unicast mac address + * upon receiving an arp reply. + */ + memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN); + client_info->slave = assigned_slave; + + if (memcmp(client_info->mac_dst, mac_bcast, ETH_ALEN)) { + client_info->ntt = 1; + bond->alb_info.rx_ntt = 1; + } else { + client_info->ntt = 0; + } + + if (!list_empty(&bond->vlan_list)) { + unsigned short vlan_id; + int res = vlan_get_tag(skb, &vlan_id); + if (!res) { + client_info->tag = 1; + client_info->vlan_id = vlan_id; + } + } + + if (!client_info->assigned) { + u32 prev_tbl_head = bond_info->rx_hashtbl_head; + bond_info->rx_hashtbl_head = hash_index; + client_info->next = prev_tbl_head; + if (prev_tbl_head != RLB_NULL_INDEX) { + bond_info->rx_hashtbl[prev_tbl_head].prev = + hash_index; + } + client_info->assigned = 1; + } + } + + _unlock_rx_hashtbl(bond); + + return assigned_slave; +} + +/* chooses (and returns) transmit channel for arp reply + * does not choose channel for other arp types since they are + * sent on the curr_active_slave + */ +static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) +{ + struct arp_pkt *arp = (struct arp_pkt *)skb->nh.raw; + struct slave *tx_slave = NULL; + + if (arp->op_code == __constant_htons(ARPOP_REPLY)) { + /* the arp must be sent on the selected + * rx channel + */ + tx_slave = rlb_choose_channel(skb, bond); + if (tx_slave) { + memcpy(arp->mac_src,tx_slave->dev->dev_addr, ETH_ALEN); + } + dprintk("Server sent ARP Reply packet\n"); + } else if (arp->op_code == __constant_htons(ARPOP_REQUEST)) { + /* Create an entry in the rx_hashtbl for this client as a + * place holder. + * When the arp reply is received the entry will be updated + * with the correct unicast address of the client. + */ + rlb_choose_channel(skb, bond); + + /* The ARP relpy packets must be delayed so that + * they can cancel out the influence of the ARP request. + */ + bond->alb_info.rlb_update_delay_counter = RLB_UPDATE_DELAY; + + /* arp requests are broadcast and are sent on the primary + * the arp request will collapse all clients on the subnet to + * the primary slave. We must register these clients to be + * updated with their assigned mac. + */ + rlb_req_update_subnet_clients(bond, arp->ip_src); + dprintk("Server sent ARP Request packet\n"); + } + + return tx_slave; +} + +/* Caller must hold bond lock for read */ +static void rlb_rebalance(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct slave *assigned_slave; + struct rlb_client_info *client_info; + int ntt; + u32 hash_index; + + _lock_rx_hashtbl(bond); + + ntt = 0; + hash_index = bond_info->rx_hashtbl_head; + for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) { + client_info = &(bond_info->rx_hashtbl[hash_index]); + assigned_slave = rlb_next_rx_slave(bond); + if (assigned_slave && (client_info->slave != assigned_slave)) { + client_info->slave = assigned_slave; + client_info->ntt = 1; + ntt = 1; + } + } + + /* update the team's flag only after the whole iteration */ + if (ntt) { + bond_info->rx_ntt = 1; + } + _unlock_rx_hashtbl(bond); +} + +/* Caller must hold rx_hashtbl lock */ +static void rlb_init_table_entry(struct rlb_client_info *entry) +{ + memset(entry, 0, sizeof(struct rlb_client_info)); + entry->next = RLB_NULL_INDEX; + entry->prev = RLB_NULL_INDEX; +} + +static int rlb_initialize(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct packet_type *pk_type = &(BOND_ALB_INFO(bond).rlb_pkt_type); + int size = RLB_HASH_TABLE_SIZE * sizeof(struct rlb_client_info); + int i; + + spin_lock_init(&(bond_info->rx_hashtbl_lock)); + + _lock_rx_hashtbl(bond); + + bond_info->rx_hashtbl = kmalloc(size, GFP_KERNEL); + if (!bond_info->rx_hashtbl) { + printk(KERN_ERR DRV_NAME + ": Error: %s: Failed to allocate RLB hash table\n", + bond->dev->name); + _unlock_rx_hashtbl(bond); + return -1; + } + + bond_info->rx_hashtbl_head = RLB_NULL_INDEX; + + for (i = 0; i < RLB_HASH_TABLE_SIZE; i++) { + rlb_init_table_entry(bond_info->rx_hashtbl + i); + } + + _unlock_rx_hashtbl(bond); + + /*initialize packet type*/ + pk_type->type = __constant_htons(ETH_P_ARP); + pk_type->dev = bond->dev; + pk_type->func = rlb_arp_recv; + + /* register to receive ARPs */ + dev_add_pack(pk_type); + + return 0; +} + +static void rlb_deinitialize(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + + dev_remove_pack(&(bond_info->rlb_pkt_type)); + + _lock_rx_hashtbl(bond); + + kfree(bond_info->rx_hashtbl); + bond_info->rx_hashtbl = NULL; + bond_info->rx_hashtbl_head = RLB_NULL_INDEX; + + _unlock_rx_hashtbl(bond); +} + +static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + u32 curr_index; + + _lock_rx_hashtbl(bond); + + curr_index = bond_info->rx_hashtbl_head; + while (curr_index != RLB_NULL_INDEX) { + struct rlb_client_info *curr = &(bond_info->rx_hashtbl[curr_index]); + u32 next_index = bond_info->rx_hashtbl[curr_index].next; + u32 prev_index = bond_info->rx_hashtbl[curr_index].prev; + + if (curr->tag && (curr->vlan_id == vlan_id)) { + if (curr_index == bond_info->rx_hashtbl_head) { + bond_info->rx_hashtbl_head = next_index; + } + if (prev_index != RLB_NULL_INDEX) { + bond_info->rx_hashtbl[prev_index].next = next_index; + } + if (next_index != RLB_NULL_INDEX) { + bond_info->rx_hashtbl[next_index].prev = prev_index; + } + + rlb_init_table_entry(curr); + } + + curr_index = next_index; + } + + _unlock_rx_hashtbl(bond); +} + +/*********************** tlb/rlb shared functions *********************/ + +static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]) +{ + struct bonding *bond = bond_get_bond_by_slave(slave); + struct learning_pkt pkt; + int size = sizeof(struct learning_pkt); + int i; + + memset(&pkt, 0, size); + memcpy(pkt.mac_dst, mac_addr, ETH_ALEN); + memcpy(pkt.mac_src, mac_addr, ETH_ALEN); + pkt.type = __constant_htons(ETH_P_LOOP); + + for (i = 0; i < MAX_LP_BURST; i++) { + struct sk_buff *skb; + char *data; + + skb = dev_alloc_skb(size); + if (!skb) { + return; + } + + data = skb_put(skb, size); + memcpy(data, &pkt, size); + + skb->mac.raw = data; + skb->nh.raw = data + ETH_HLEN; + skb->protocol = pkt.type; + skb->priority = TC_PRIO_CONTROL; + skb->dev = slave->dev; + + if (!list_empty(&bond->vlan_list)) { + struct vlan_entry *vlan; + + vlan = bond_next_vlan(bond, + bond->alb_info.current_alb_vlan); + + bond->alb_info.current_alb_vlan = vlan; + if (!vlan) { + kfree_skb(skb); + continue; + } + + skb = vlan_put_tag(skb, vlan->vlan_id); + if (!skb) { + printk(KERN_ERR DRV_NAME + ": Error: failed to insert VLAN tag\n"); + continue; + } + } + + dev_queue_xmit(skb); + } +} + +/* hw is a boolean parameter that determines whether we should try and + * set the hw address of the device as well as the hw address of the + * net_device + */ +static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[], int hw) +{ + struct net_device *dev = slave->dev; + struct sockaddr s_addr; + + if (!hw) { + memcpy(dev->dev_addr, addr, dev->addr_len); + return 0; + } + + /* for rlb each slave must have a unique hw mac addresses so that */ + /* each slave will receive packets destined to a different mac */ + memcpy(s_addr.sa_data, addr, dev->addr_len); + s_addr.sa_family = dev->type; + if (dev_set_mac_address(dev, &s_addr)) { + printk(KERN_ERR DRV_NAME + ": Error: dev_set_mac_address of dev %s failed! ALB " + "mode requires that the base driver support setting " + "the hw address also when the network device's " + "interface is open\n", + dev->name); + return -EOPNOTSUPP; + } + return 0; +} + +/* Caller must hold bond lock for write or curr_slave_lock for write*/ +static void alb_swap_mac_addr(struct bonding *bond, struct slave *slave1, struct slave *slave2) +{ + struct slave *disabled_slave = NULL; + u8 tmp_mac_addr[ETH_ALEN]; + int slaves_state_differ; + + slaves_state_differ = (SLAVE_IS_OK(slave1) != SLAVE_IS_OK(slave2)); + + memcpy(tmp_mac_addr, slave1->dev->dev_addr, ETH_ALEN); + alb_set_slave_mac_addr(slave1, slave2->dev->dev_addr, bond->alb_info.rlb_enabled); + alb_set_slave_mac_addr(slave2, tmp_mac_addr, bond->alb_info.rlb_enabled); + + /* fasten the change in the switch */ + if (SLAVE_IS_OK(slave1)) { + alb_send_learning_packets(slave1, slave1->dev->dev_addr); + if (bond->alb_info.rlb_enabled) { + /* inform the clients that the mac address + * has changed + */ + rlb_req_update_slave_clients(bond, slave1); + } + } else { + disabled_slave = slave1; + } + + if (SLAVE_IS_OK(slave2)) { + alb_send_learning_packets(slave2, slave2->dev->dev_addr); + if (bond->alb_info.rlb_enabled) { + /* inform the clients that the mac address + * has changed + */ + rlb_req_update_slave_clients(bond, slave2); + } + } else { + disabled_slave = slave2; + } + + if (bond->alb_info.rlb_enabled && slaves_state_differ) { + /* A disabled slave was assigned an active mac addr */ + rlb_teach_disabled_mac_on_primary(bond, + disabled_slave->dev->dev_addr); + } +} + +/** + * alb_change_hw_addr_on_detach + * @bond: bonding we're working on + * @slave: the slave that was just detached + * + * We assume that @slave was already detached from the slave list. + * + * If @slave's permanent hw address is different both from its current + * address and from @bond's address, then somewhere in the bond there's + * a slave that has @slave's permanet address as its current address. + * We'll make sure that that slave no longer uses @slave's permanent address. + * + * Caller must hold bond lock + */ +static void alb_change_hw_addr_on_detach(struct bonding *bond, struct slave *slave) +{ + int perm_curr_diff; + int perm_bond_diff; + + perm_curr_diff = memcmp(slave->perm_hwaddr, + slave->dev->dev_addr, + ETH_ALEN); + perm_bond_diff = memcmp(slave->perm_hwaddr, + bond->dev->dev_addr, + ETH_ALEN); + + if (perm_curr_diff && perm_bond_diff) { + struct slave *tmp_slave; + int i, found = 0; + + bond_for_each_slave(bond, tmp_slave, i) { + if (!memcmp(slave->perm_hwaddr, + tmp_slave->dev->dev_addr, + ETH_ALEN)) { + found = 1; + break; + } + } + + if (found) { + alb_swap_mac_addr(bond, slave, tmp_slave); + } + } +} + +/** + * alb_handle_addr_collision_on_attach + * @bond: bonding we're working on + * @slave: the slave that was just attached + * + * checks uniqueness of slave's mac address and handles the case the + * new slave uses the bonds mac address. + * + * If the permanent hw address of @slave is @bond's hw address, we need to + * find a different hw address to give @slave, that isn't in use by any other + * slave in the bond. This address must be, of course, one of the premanent + * addresses of the other slaves. + * + * We go over the slave list, and for each slave there we compare its + * permanent hw address with the current address of all the other slaves. + * If no match was found, then we've found a slave with a permanent address + * that isn't used by any other slave in the bond, so we can assign it to + * @slave. + * + * assumption: this function is called before @slave is attached to the + * bond slave list. + * + * caller must hold the bond lock for write since the mac addresses are compared + * and may be swapped. + */ +static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slave *slave) +{ + struct slave *tmp_slave1, *tmp_slave2, *free_mac_slave; + struct slave *has_bond_addr = bond->curr_active_slave; + int i, j, found = 0; + + if (bond->slave_cnt == 0) { + /* this is the first slave */ + return 0; + } + + /* if slave's mac address differs from bond's mac address + * check uniqueness of slave's mac address against the other + * slaves in the bond. + */ + if (memcmp(slave->perm_hwaddr, bond->dev->dev_addr, ETH_ALEN)) { + bond_for_each_slave(bond, tmp_slave1, i) { + if (!memcmp(tmp_slave1->dev->dev_addr, slave->dev->dev_addr, + ETH_ALEN)) { + found = 1; + break; + } + } + + if (found) { + /* a slave was found that is using the mac address + * of the new slave + */ + printk(KERN_ERR DRV_NAME + ": Error: the hw address of slave %s is not " + "unique - cannot enslave it!", + slave->dev->name); + return -EINVAL; + } + + return 0; + } + + /* The slave's address is equal to the address of the bond. + * Search for a spare address in the bond for this slave. + */ + free_mac_slave = NULL; + + bond_for_each_slave(bond, tmp_slave1, i) { + found = 0; + bond_for_each_slave(bond, tmp_slave2, j) { + if (!memcmp(tmp_slave1->perm_hwaddr, + tmp_slave2->dev->dev_addr, + ETH_ALEN)) { + found = 1; + break; + } + } + + if (!found) { + /* no slave has tmp_slave1's perm addr + * as its curr addr + */ + free_mac_slave = tmp_slave1; + break; + } + + if (!has_bond_addr) { + if (!memcmp(tmp_slave1->dev->dev_addr, + bond->dev->dev_addr, + ETH_ALEN)) { + + has_bond_addr = tmp_slave1; + } + } + } + + if (free_mac_slave) { + alb_set_slave_mac_addr(slave, free_mac_slave->perm_hwaddr, + bond->alb_info.rlb_enabled); + + printk(KERN_WARNING DRV_NAME + ": Warning: the hw address of slave %s is in use by " + "the bond; giving it the hw address of %s\n", + slave->dev->name, free_mac_slave->dev->name); + + } else if (has_bond_addr) { + printk(KERN_ERR DRV_NAME + ": Error: the hw address of slave %s is in use by the " + "bond; couldn't find a slave with a free hw address to " + "give it (this should not have happened)\n", + slave->dev->name); + return -EFAULT; + } + + return 0; +} + +/** |