diff options
Diffstat (limited to 'net/sched/sch_fq.c')
| -rw-r--r-- | net/sched/sch_fq.c | 94 | 
1 files changed, 64 insertions, 30 deletions
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index a9dfdda9ed1..ba32c2b005d 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -47,6 +47,7 @@  #include <linux/rbtree.h>  #include <linux/hash.h>  #include <linux/prefetch.h> +#include <linux/vmalloc.h>  #include <net/netlink.h>  #include <net/pkt_sched.h>  #include <net/sock.h> @@ -88,7 +89,7 @@ struct fq_sched_data {  	struct fq_flow	internal;	/* for non classified or high prio packets */  	u32		quantum;  	u32		initial_quantum; -	u32		flow_default_rate;/* rate per flow : bytes per second */ +	u32		flow_refill_delay;  	u32		flow_max_rate;	/* optional max rate per flow */  	u32		flow_plimit;	/* max packets per flow */  	struct rb_root	*fq_root; @@ -115,6 +116,7 @@ static struct fq_flow detached, throttled;  static void fq_flow_set_detached(struct fq_flow *f)  {  	f->next = &detached; +	f->age = jiffies;  }  static bool fq_flow_is_detached(const struct fq_flow *f) @@ -209,28 +211,22 @@ static void fq_gc(struct fq_sched_data *q,  	}  } -static const u8 prio2band[TC_PRIO_MAX + 1] = { -	1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 -}; -  static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)  {  	struct rb_node **p, *parent;  	struct sock *sk = skb->sk;  	struct rb_root *root;  	struct fq_flow *f; -	int band;  	/* warning: no starvation prevention... */ -	band = prio2band[skb->priority & TC_PRIO_MAX]; -	if (unlikely(band == 0)) +	if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL))  		return &q->internal;  	if (unlikely(!sk)) {  		/* By forcing low order bit to 1, we make sure to not  		 * collide with a local flow (socket pointers are word aligned)  		 */ -		sk = (struct sock *)(skb_get_rxhash(skb) | 1L); +		sk = (struct sock *)(skb_get_hash(skb) | 1L);  	}  	root = &q->fq_root[hash_32((u32)(long)sk, q->fq_trees_log)]; @@ -255,6 +251,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)  				     f->socket_hash != sk->sk_hash)) {  				f->credit = q->initial_quantum;  				f->socket_hash = sk->sk_hash; +				f->time_next_packet = 0ULL;  			}  			return f;  		} @@ -372,17 +369,20 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch)  	}  	f->qlen++; -	flow_queue_add(f, skb);  	if (skb_is_retransmit(skb))  		q->stat_tcp_retrans++;  	sch->qstats.backlog += qdisc_pkt_len(skb);  	if (fq_flow_is_detached(f)) {  		fq_flow_add_tail(&q->new_flows, f); -		if (q->quantum > f->credit) -			f->credit = q->quantum; +		if (time_after(jiffies, f->age + q->flow_refill_delay)) +			f->credit = max_t(u32, f->credit, q->quantum);  		q->inactive_flows--;  		qdisc_unthrottled(sch);  	} + +	/* Note: this overwrites f->age */ +	flow_queue_add(f, skb); +  	if (unlikely(f == &q->internal)) {  		q->stat_internal_packets++;  		qdisc_unthrottled(sch); @@ -460,7 +460,6 @@ begin:  			fq_flow_add_tail(&q->old_flows, f);  		} else {  			fq_flow_set_detached(f); -			f->age = jiffies;  			q->inactive_flows++;  		}  		goto begin; @@ -580,28 +579,53 @@ static void fq_rehash(struct fq_sched_data *q,  	q->stat_gc_flows += fcnt;  } -static int fq_resize(struct fq_sched_data *q, u32 log) +static void *fq_alloc_node(size_t sz, int node) +{ +	void *ptr; + +	ptr = kmalloc_node(sz, GFP_KERNEL | __GFP_REPEAT | __GFP_NOWARN, node); +	if (!ptr) +		ptr = vmalloc_node(sz, node); +	return ptr; +} + +static void fq_free(void *addr) +{ +	kvfree(addr); +} + +static int fq_resize(struct Qdisc *sch, u32 log)  { +	struct fq_sched_data *q = qdisc_priv(sch);  	struct rb_root *array; +	void *old_fq_root;  	u32 idx;  	if (q->fq_root && log == q->fq_trees_log)  		return 0; -	array = kmalloc(sizeof(struct rb_root) << log, GFP_KERNEL); +	/* If XPS was setup, we can allocate memory on right NUMA node */ +	array = fq_alloc_node(sizeof(struct rb_root) << log, +			      netdev_queue_numa_node_read(sch->dev_queue));  	if (!array)  		return -ENOMEM;  	for (idx = 0; idx < (1U << log); idx++)  		array[idx] = RB_ROOT; -	if (q->fq_root) { -		fq_rehash(q, q->fq_root, q->fq_trees_log, array, log); -		kfree(q->fq_root); -	} +	sch_tree_lock(sch); + +	old_fq_root = q->fq_root; +	if (old_fq_root) +		fq_rehash(q, old_fq_root, q->fq_trees_log, array, log); +  	q->fq_root = array;  	q->fq_trees_log = log; +	sch_tree_unlock(sch); + +	fq_free(old_fq_root); +  	return 0;  } @@ -614,6 +638,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {  	[TCA_FQ_FLOW_DEFAULT_RATE]	= { .type = NLA_U32 },  	[TCA_FQ_FLOW_MAX_RATE]		= { .type = NLA_U32 },  	[TCA_FQ_BUCKETS_LOG]		= { .type = NLA_U32 }, +	[TCA_FQ_FLOW_REFILL_DELAY]	= { .type = NLA_U32 },  };  static int fq_change(struct Qdisc *sch, struct nlattr *opt) @@ -655,7 +680,8 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)  		q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]);  	if (tb[TCA_FQ_FLOW_DEFAULT_RATE]) -		q->flow_default_rate = nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]); +		pr_warn_ratelimited("sch_fq: defrate %u ignored.\n", +				    nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]));  	if (tb[TCA_FQ_FLOW_MAX_RATE])  		q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]); @@ -669,9 +695,17 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)  			err = -EINVAL;  	} -	if (!err) -		err = fq_resize(q, fq_log); +	if (tb[TCA_FQ_FLOW_REFILL_DELAY]) { +		u32 usecs_delay = nla_get_u32(tb[TCA_FQ_FLOW_REFILL_DELAY]) ; + +		q->flow_refill_delay = usecs_to_jiffies(usecs_delay); +	} +	if (!err) { +		sch_tree_unlock(sch); +		err = fq_resize(sch, fq_log); +		sch_tree_lock(sch); +	}  	while (sch->q.qlen > sch->limit) {  		struct sk_buff *skb = fq_dequeue(sch); @@ -691,7 +725,7 @@ static void fq_destroy(struct Qdisc *sch)  	struct fq_sched_data *q = qdisc_priv(sch);  	fq_reset(sch); -	kfree(q->fq_root); +	fq_free(q->fq_root);  	qdisc_watchdog_cancel(&q->watchdog);  } @@ -704,7 +738,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt)  	q->flow_plimit		= 100;  	q->quantum		= 2 * psched_mtu(qdisc_dev(sch));  	q->initial_quantum	= 10 * psched_mtu(qdisc_dev(sch)); -	q->flow_default_rate	= 0; +	q->flow_refill_delay	= msecs_to_jiffies(40);  	q->flow_max_rate	= ~0U;  	q->rate_enable		= 1;  	q->new_flows.first	= NULL; @@ -717,7 +751,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt)  	if (opt)  		err = fq_change(sch, opt);  	else -		err = fq_resize(q, q->fq_trees_log); +		err = fq_resize(sch, q->fq_trees_log);  	return err;  } @@ -731,20 +765,20 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)  	if (opts == NULL)  		goto nla_put_failure; -	/* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore, -	 * do not bother giving its value -	 */ +	/* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore */ +  	if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) ||  	    nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) ||  	    nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) ||  	    nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) ||  	    nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) ||  	    nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) || +	    nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY, +			jiffies_to_usecs(q->flow_refill_delay)) ||  	    nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))  		goto nla_put_failure; -	nla_nest_end(skb, opts); -	return skb->len; +	return nla_nest_end(skb, opts);  nla_put_failure:  	return -1;  | 
