diff options
Diffstat (limited to 'drivers/net/team/team_mode_loadbalance.c')
| -rw-r--r-- | drivers/net/team/team_mode_loadbalance.c | 678 | 
1 files changed, 678 insertions, 0 deletions
diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c new file mode 100644 index 00000000000..a58dfebb551 --- /dev/null +++ b/drivers/net/team/team_mode_loadbalance.c @@ -0,0 +1,678 @@ +/* + * drivers/net/team/team_mode_loadbalance.c - Load-balancing mode for team + * Copyright (c) 2012 Jiri Pirko <jpirko@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/netdevice.h> +#include <linux/filter.h> +#include <linux/if_team.h> + +struct lb_priv; + +typedef struct team_port *lb_select_tx_port_func_t(struct team *, +						   struct lb_priv *, +						   struct sk_buff *, +						   unsigned char); + +#define LB_TX_HASHTABLE_SIZE 256 /* hash is a char */ + +struct lb_stats { +	u64 tx_bytes; +}; + +struct lb_pcpu_stats { +	struct lb_stats hash_stats[LB_TX_HASHTABLE_SIZE]; +	struct u64_stats_sync syncp; +}; + +struct lb_stats_info { +	struct lb_stats stats; +	struct lb_stats last_stats; +	struct team_option_inst_info *opt_inst_info; +}; + +struct lb_port_mapping { +	struct team_port __rcu *port; +	struct team_option_inst_info *opt_inst_info; +}; + +struct lb_priv_ex { +	struct team *team; +	struct lb_port_mapping tx_hash_to_port_mapping[LB_TX_HASHTABLE_SIZE]; +	struct sock_fprog_kern *orig_fprog; +	struct { +		unsigned int refresh_interval; /* in tenths of second */ +		struct delayed_work refresh_dw; +		struct lb_stats_info info[LB_TX_HASHTABLE_SIZE]; +	} stats; +}; + +struct lb_priv { +	struct sk_filter __rcu *fp; +	lb_select_tx_port_func_t __rcu *select_tx_port_func; +	struct lb_pcpu_stats __percpu *pcpu_stats; +	struct lb_priv_ex *ex; /* priv extension */ +}; + +static struct lb_priv *get_lb_priv(struct team *team) +{ +	return (struct lb_priv *) &team->mode_priv; +} + +struct lb_port_priv { +	struct lb_stats __percpu *pcpu_stats; +	struct lb_stats_info stats_info; +}; + +static struct lb_port_priv *get_lb_port_priv(struct team_port *port) +{ +	return (struct lb_port_priv *) &port->mode_priv; +} + +#define LB_HTPM_PORT_BY_HASH(lp_priv, hash) \ +	(lb_priv)->ex->tx_hash_to_port_mapping[hash].port + +#define LB_HTPM_OPT_INST_INFO_BY_HASH(lp_priv, hash) \ +	(lb_priv)->ex->tx_hash_to_port_mapping[hash].opt_inst_info + +static void lb_tx_hash_to_port_mapping_null_port(struct team *team, +						 struct team_port *port) +{ +	struct lb_priv *lb_priv = get_lb_priv(team); +	bool changed = false; +	int i; + +	for (i = 0; i < LB_TX_HASHTABLE_SIZE; i++) { +		struct lb_port_mapping *pm; + +		pm = &lb_priv->ex->tx_hash_to_port_mapping[i]; +		if (rcu_access_pointer(pm->port) == port) { +			RCU_INIT_POINTER(pm->port, NULL); +			team_option_inst_set_change(pm->opt_inst_info); +			changed = true; +		} +	} +	if (changed) +		team_options_change_check(team); +} + +/* Basic tx selection based solely by hash */ +static struct team_port *lb_hash_select_tx_port(struct team *team, +						struct lb_priv *lb_priv, +						struct sk_buff *skb, +						unsigned char hash) +{ +	int port_index = team_num_to_port_index(team, hash); + +	return team_get_port_by_index_rcu(team, port_index); +} + +/* Hash to port mapping select tx port */ +static struct team_port *lb_htpm_select_tx_port(struct team *team, +						struct lb_priv *lb_priv, +						struct sk_buff *skb, +						unsigned char hash) +{ +	return rcu_dereference_bh(LB_HTPM_PORT_BY_HASH(lb_priv, hash)); +} + +struct lb_select_tx_port { +	char *name; +	lb_select_tx_port_func_t *func; +}; + +static const struct lb_select_tx_port lb_select_tx_port_list[] = { +	{ +		.name = "hash", +		.func = lb_hash_select_tx_port, +	}, +	{ +		.name = "hash_to_port_mapping", +		.func = lb_htpm_select_tx_port, +	}, +}; +#define LB_SELECT_TX_PORT_LIST_COUNT ARRAY_SIZE(lb_select_tx_port_list) + +static char *lb_select_tx_port_get_name(lb_select_tx_port_func_t *func) +{ +	int i; + +	for (i = 0; i < LB_SELECT_TX_PORT_LIST_COUNT; i++) { +		const struct lb_select_tx_port *item; + +		item = &lb_select_tx_port_list[i]; +		if (item->func == func) +			return item->name; +	} +	return NULL; +} + +static lb_select_tx_port_func_t *lb_select_tx_port_get_func(const char *name) +{ +	int i; + +	for (i = 0; i < LB_SELECT_TX_PORT_LIST_COUNT; i++) { +		const struct lb_select_tx_port *item; + +		item = &lb_select_tx_port_list[i]; +		if (!strcmp(item->name, name)) +			return item->func; +	} +	return NULL; +} + +static unsigned int lb_get_skb_hash(struct lb_priv *lb_priv, +				    struct sk_buff *skb) +{ +	struct sk_filter *fp; +	uint32_t lhash; +	unsigned char *c; + +	fp = rcu_dereference_bh(lb_priv->fp); +	if (unlikely(!fp)) +		return 0; +	lhash = SK_RUN_FILTER(fp, skb); +	c = (char *) &lhash; +	return c[0] ^ c[1] ^ c[2] ^ c[3]; +} + +static void lb_update_tx_stats(unsigned int tx_bytes, struct lb_priv *lb_priv, +			       struct lb_port_priv *lb_port_priv, +			       unsigned char hash) +{ +	struct lb_pcpu_stats *pcpu_stats; +	struct lb_stats *port_stats; +	struct lb_stats *hash_stats; + +	pcpu_stats = this_cpu_ptr(lb_priv->pcpu_stats); +	port_stats = this_cpu_ptr(lb_port_priv->pcpu_stats); +	hash_stats = &pcpu_stats->hash_stats[hash]; +	u64_stats_update_begin(&pcpu_stats->syncp); +	port_stats->tx_bytes += tx_bytes; +	hash_stats->tx_bytes += tx_bytes; +	u64_stats_update_end(&pcpu_stats->syncp); +} + +static bool lb_transmit(struct team *team, struct sk_buff *skb) +{ +	struct lb_priv *lb_priv = get_lb_priv(team); +	lb_select_tx_port_func_t *select_tx_port_func; +	struct team_port *port; +	unsigned char hash; +	unsigned int tx_bytes = skb->len; + +	hash = lb_get_skb_hash(lb_priv, skb); +	select_tx_port_func = rcu_dereference_bh(lb_priv->select_tx_port_func); +	port = select_tx_port_func(team, lb_priv, skb, hash); +	if (unlikely(!port)) +		goto drop; +	if (team_dev_queue_xmit(team, port, skb)) +		return false; +	lb_update_tx_stats(tx_bytes, lb_priv, get_lb_port_priv(port), hash); +	return true; + +drop: +	dev_kfree_skb_any(skb); +	return false; +} + +static int lb_bpf_func_get(struct team *team, struct team_gsetter_ctx *ctx) +{ +	struct lb_priv *lb_priv = get_lb_priv(team); + +	if (!lb_priv->ex->orig_fprog) { +		ctx->data.bin_val.len = 0; +		ctx->data.bin_val.ptr = NULL; +		return 0; +	} +	ctx->data.bin_val.len = lb_priv->ex->orig_fprog->len * +				sizeof(struct sock_filter); +	ctx->data.bin_val.ptr = lb_priv->ex->orig_fprog->filter; +	return 0; +} + +static int __fprog_create(struct sock_fprog_kern **pfprog, u32 data_len, +			  const void *data) +{ +	struct sock_fprog_kern *fprog; +	struct sock_filter *filter = (struct sock_filter *) data; + +	if (data_len % sizeof(struct sock_filter)) +		return -EINVAL; +	fprog = kmalloc(sizeof(*fprog), GFP_KERNEL); +	if (!fprog) +		return -ENOMEM; +	fprog->filter = kmemdup(filter, data_len, GFP_KERNEL); +	if (!fprog->filter) { +		kfree(fprog); +		return -ENOMEM; +	} +	fprog->len = data_len / sizeof(struct sock_filter); +	*pfprog = fprog; +	return 0; +} + +static void __fprog_destroy(struct sock_fprog_kern *fprog) +{ +	kfree(fprog->filter); +	kfree(fprog); +} + +static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx) +{ +	struct lb_priv *lb_priv = get_lb_priv(team); +	struct sk_filter *fp = NULL; +	struct sk_filter *orig_fp; +	struct sock_fprog_kern *fprog = NULL; +	int err; + +	if (ctx->data.bin_val.len) { +		err = __fprog_create(&fprog, ctx->data.bin_val.len, +				     ctx->data.bin_val.ptr); +		if (err) +			return err; +		err = sk_unattached_filter_create(&fp, fprog); +		if (err) { +			__fprog_destroy(fprog); +			return err; +		} +	} + +	if (lb_priv->ex->orig_fprog) { +		/* Clear old filter data */ +		__fprog_destroy(lb_priv->ex->orig_fprog); +		orig_fp = rcu_dereference_protected(lb_priv->fp, +						lockdep_is_held(&team->lock)); +		sk_unattached_filter_destroy(orig_fp); +	} + +	rcu_assign_pointer(lb_priv->fp, fp); +	lb_priv->ex->orig_fprog = fprog; +	return 0; +} + +static int lb_tx_method_get(struct team *team, struct team_gsetter_ctx *ctx) +{ +	struct lb_priv *lb_priv = get_lb_priv(team); +	lb_select_tx_port_func_t *func; +	char *name; + +	func = rcu_dereference_protected(lb_priv->select_tx_port_func, +					 lockdep_is_held(&team->lock)); +	name = lb_select_tx_port_get_name(func); +	BUG_ON(!name); +	ctx->data.str_val = name; +	return 0; +} + +static int lb_tx_method_set(struct team *team, struct team_gsetter_ctx *ctx) +{ +	struct lb_priv *lb_priv = get_lb_priv(team); +	lb_select_tx_port_func_t *func; + +	func = lb_select_tx_port_get_func(ctx->data.str_val); +	if (!func) +		return -EINVAL; +	rcu_assign_pointer(lb_priv->select_tx_port_func, func); +	return 0; +} + +static int lb_tx_hash_to_port_mapping_init(struct team *team, +					   struct team_option_inst_info *info) +{ +	struct lb_priv *lb_priv = get_lb_priv(team); +	unsigned char hash = info->array_index; + +	LB_HTPM_OPT_INST_INFO_BY_HASH(lb_priv, hash) = info; +	return 0; +} + +static int lb_tx_hash_to_port_mapping_get(struct team *team, +					  struct team_gsetter_ctx *ctx) +{ +	struct lb_priv *lb_priv = get_lb_priv(team); +	struct team_port *port; +	unsigned char hash = ctx->info->array_index; + +	port = LB_HTPM_PORT_BY_HASH(lb_priv, hash); +	ctx->data.u32_val = port ? port->dev->ifindex : 0; +	return 0; +} + +static int lb_tx_hash_to_port_mapping_set(struct team *team, +					  struct team_gsetter_ctx *ctx) +{ +	struct lb_priv *lb_priv = get_lb_priv(team); +	struct team_port *port; +	unsigned char hash = ctx->info->array_index; + +	list_for_each_entry(port, &team->port_list, list) { +		if (ctx->data.u32_val == port->dev->ifindex && +		    team_port_enabled(port)) { +			rcu_assign_pointer(LB_HTPM_PORT_BY_HASH(lb_priv, hash), +					   port); +			return 0; +		} +	} +	return -ENODEV; +} + +static int lb_hash_stats_init(struct team *team, +			      struct team_option_inst_info *info) +{ +	struct lb_priv *lb_priv = get_lb_priv(team); +	unsigned char hash = info->array_index; + +	lb_priv->ex->stats.info[hash].opt_inst_info = info; +	return 0; +} + +static int lb_hash_stats_get(struct team *team, struct team_gsetter_ctx *ctx) +{ +	struct lb_priv *lb_priv = get_lb_priv(team); +	unsigned char hash = ctx->info->array_index; + +	ctx->data.bin_val.ptr = &lb_priv->ex->stats.info[hash].stats; +	ctx->data.bin_val.len = sizeof(struct lb_stats); +	return 0; +} + +static int lb_port_stats_init(struct team *team, +			      struct team_option_inst_info *info) +{ +	struct team_port *port = info->port; +	struct lb_port_priv *lb_port_priv = get_lb_port_priv(port); + +	lb_port_priv->stats_info.opt_inst_info = info; +	return 0; +} + +static int lb_port_stats_get(struct team *team, struct team_gsetter_ctx *ctx) +{ +	struct team_port *port = ctx->info->port; +	struct lb_port_priv *lb_port_priv = get_lb_port_priv(port); + +	ctx->data.bin_val.ptr = &lb_port_priv->stats_info.stats; +	ctx->data.bin_val.len = sizeof(struct lb_stats); +	return 0; +} + +static void __lb_stats_info_refresh_prepare(struct lb_stats_info *s_info) +{ +	memcpy(&s_info->last_stats, &s_info->stats, sizeof(struct lb_stats)); +	memset(&s_info->stats, 0, sizeof(struct lb_stats)); +} + +static bool __lb_stats_info_refresh_check(struct lb_stats_info *s_info, +					  struct team *team) +{ +	if (memcmp(&s_info->last_stats, &s_info->stats, +	    sizeof(struct lb_stats))) { +		team_option_inst_set_change(s_info->opt_inst_info); +		return true; +	} +	return false; +} + +static void __lb_one_cpu_stats_add(struct lb_stats *acc_stats, +				   struct lb_stats *cpu_stats, +				   struct u64_stats_sync *syncp) +{ +	unsigned int start; +	struct lb_stats tmp; + +	do { +		start = u64_stats_fetch_begin_irq(syncp); +		tmp.tx_bytes = cpu_stats->tx_bytes; +	} while (u64_stats_fetch_retry_irq(syncp, start)); +	acc_stats->tx_bytes += tmp.tx_bytes; +} + +static void lb_stats_refresh(struct work_struct *work) +{ +	struct team *team; +	struct lb_priv *lb_priv; +	struct lb_priv_ex *lb_priv_ex; +	struct lb_pcpu_stats *pcpu_stats; +	struct lb_stats *stats; +	struct lb_stats_info *s_info; +	struct team_port *port; +	bool changed = false; +	int i; +	int j; + +	lb_priv_ex = container_of(work, struct lb_priv_ex, +				  stats.refresh_dw.work); + +	team = lb_priv_ex->team; +	lb_priv = get_lb_priv(team); + +	if (!mutex_trylock(&team->lock)) { +		schedule_delayed_work(&lb_priv_ex->stats.refresh_dw, 0); +		return; +	} + +	for (j = 0; j < LB_TX_HASHTABLE_SIZE; j++) { +		s_info = &lb_priv->ex->stats.info[j]; +		__lb_stats_info_refresh_prepare(s_info); +		for_each_possible_cpu(i) { +			pcpu_stats = per_cpu_ptr(lb_priv->pcpu_stats, i); +			stats = &pcpu_stats->hash_stats[j]; +			__lb_one_cpu_stats_add(&s_info->stats, stats, +					       &pcpu_stats->syncp); +		} +		changed |= __lb_stats_info_refresh_check(s_info, team); +	} + +	list_for_each_entry(port, &team->port_list, list) { +		struct lb_port_priv *lb_port_priv = get_lb_port_priv(port); + +		s_info = &lb_port_priv->stats_info; +		__lb_stats_info_refresh_prepare(s_info); +		for_each_possible_cpu(i) { +			pcpu_stats = per_cpu_ptr(lb_priv->pcpu_stats, i); +			stats = per_cpu_ptr(lb_port_priv->pcpu_stats, i); +			__lb_one_cpu_stats_add(&s_info->stats, stats, +					       &pcpu_stats->syncp); +		} +		changed |= __lb_stats_info_refresh_check(s_info, team); +	} + +	if (changed) +		team_options_change_check(team); + +	schedule_delayed_work(&lb_priv_ex->stats.refresh_dw, +			      (lb_priv_ex->stats.refresh_interval * HZ) / 10); + +	mutex_unlock(&team->lock); +} + +static int lb_stats_refresh_interval_get(struct team *team, +					 struct team_gsetter_ctx *ctx) +{ +	struct lb_priv *lb_priv = get_lb_priv(team); + +	ctx->data.u32_val = lb_priv->ex->stats.refresh_interval; +	return 0; +} + +static int lb_stats_refresh_interval_set(struct team *team, +					 struct team_gsetter_ctx *ctx) +{ +	struct lb_priv *lb_priv = get_lb_priv(team); +	unsigned int interval; + +	interval = ctx->data.u32_val; +	if (lb_priv->ex->stats.refresh_interval == interval) +		return 0; +	lb_priv->ex->stats.refresh_interval = interval; +	if (interval) +		schedule_delayed_work(&lb_priv->ex->stats.refresh_dw, 0); +	else +		cancel_delayed_work(&lb_priv->ex->stats.refresh_dw); +	return 0; +} + +static const struct team_option lb_options[] = { +	{ +		.name = "bpf_hash_func", +		.type = TEAM_OPTION_TYPE_BINARY, +		.getter = lb_bpf_func_get, +		.setter = lb_bpf_func_set, +	}, +	{ +		.name = "lb_tx_method", +		.type = TEAM_OPTION_TYPE_STRING, +		.getter = lb_tx_method_get, +		.setter = lb_tx_method_set, +	}, +	{ +		.name = "lb_tx_hash_to_port_mapping", +		.array_size = LB_TX_HASHTABLE_SIZE, +		.type = TEAM_OPTION_TYPE_U32, +		.init = lb_tx_hash_to_port_mapping_init, +		.getter = lb_tx_hash_to_port_mapping_get, +		.setter = lb_tx_hash_to_port_mapping_set, +	}, +	{ +		.name = "lb_hash_stats", +		.array_size = LB_TX_HASHTABLE_SIZE, +		.type = TEAM_OPTION_TYPE_BINARY, +		.init = lb_hash_stats_init, +		.getter = lb_hash_stats_get, +	}, +	{ +		.name = "lb_port_stats", +		.per_port = true, +		.type = TEAM_OPTION_TYPE_BINARY, +		.init = lb_port_stats_init, +		.getter = lb_port_stats_get, +	}, +	{ +		.name = "lb_stats_refresh_interval", +		.type = TEAM_OPTION_TYPE_U32, +		.getter = lb_stats_refresh_interval_get, +		.setter = lb_stats_refresh_interval_set, +	}, +}; + +static int lb_init(struct team *team) +{ +	struct lb_priv *lb_priv = get_lb_priv(team); +	lb_select_tx_port_func_t *func; +	int i, err; + +	/* set default tx port selector */ +	func = lb_select_tx_port_get_func("hash"); +	BUG_ON(!func); +	rcu_assign_pointer(lb_priv->select_tx_port_func, func); + +	lb_priv->ex = kzalloc(sizeof(*lb_priv->ex), GFP_KERNEL); +	if (!lb_priv->ex) +		return -ENOMEM; +	lb_priv->ex->team = team; + +	lb_priv->pcpu_stats = alloc_percpu(struct lb_pcpu_stats); +	if (!lb_priv->pcpu_stats) { +		err = -ENOMEM; +		goto err_alloc_pcpu_stats; +	} + +	for_each_possible_cpu(i) { +		struct lb_pcpu_stats *team_lb_stats; +		team_lb_stats = per_cpu_ptr(lb_priv->pcpu_stats, i); +		u64_stats_init(&team_lb_stats->syncp); +	} + + +	INIT_DELAYED_WORK(&lb_priv->ex->stats.refresh_dw, lb_stats_refresh); + +	err = team_options_register(team, lb_options, ARRAY_SIZE(lb_options)); +	if (err) +		goto err_options_register; +	return 0; + +err_options_register: +	free_percpu(lb_priv->pcpu_stats); +err_alloc_pcpu_stats: +	kfree(lb_priv->ex); +	return err; +} + +static void lb_exit(struct team *team) +{ +	struct lb_priv *lb_priv = get_lb_priv(team); + +	team_options_unregister(team, lb_options, +				ARRAY_SIZE(lb_options)); +	cancel_delayed_work_sync(&lb_priv->ex->stats.refresh_dw); +	free_percpu(lb_priv->pcpu_stats); +	kfree(lb_priv->ex); +} + +static int lb_port_enter(struct team *team, struct team_port *port) +{ +	struct lb_port_priv *lb_port_priv = get_lb_port_priv(port); + +	lb_port_priv->pcpu_stats = alloc_percpu(struct lb_stats); +	if (!lb_port_priv->pcpu_stats) +		return -ENOMEM; +	return 0; +} + +static void lb_port_leave(struct team *team, struct team_port *port) +{ +	struct lb_port_priv *lb_port_priv = get_lb_port_priv(port); + +	free_percpu(lb_port_priv->pcpu_stats); +} + +static void lb_port_disabled(struct team *team, struct team_port *port) +{ +	lb_tx_hash_to_port_mapping_null_port(team, port); +} + +static const struct team_mode_ops lb_mode_ops = { +	.init			= lb_init, +	.exit			= lb_exit, +	.port_enter		= lb_port_enter, +	.port_leave		= lb_port_leave, +	.port_disabled		= lb_port_disabled, +	.transmit		= lb_transmit, +}; + +static const struct team_mode lb_mode = { +	.kind		= "loadbalance", +	.owner		= THIS_MODULE, +	.priv_size	= sizeof(struct lb_priv), +	.port_priv_size	= sizeof(struct lb_port_priv), +	.ops		= &lb_mode_ops, +}; + +static int __init lb_init_module(void) +{ +	return team_mode_register(&lb_mode); +} + +static void __exit lb_cleanup_module(void) +{ +	team_mode_unregister(&lb_mode); +} + +module_init(lb_init_module); +module_exit(lb_cleanup_module); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>"); +MODULE_DESCRIPTION("Load-balancing mode for team"); +MODULE_ALIAS("team-mode-loadbalance");  | 
