aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@g5.osdl.org>2005-11-07 08:05:11 -0800
committerLinus Torvalds <torvalds@g5.osdl.org>2005-11-07 08:05:11 -0800
commit8e33ba49765484bc6de3a2f8143733713fa93bc1 (patch)
tree2ea080e478e4ee86a893b75db2d5c81ce14cbf10 /net
parent8cde0776ec1e86c270f65bf482f96288e6bf0023 (diff)
parent2d43f1128a4282fbe8442f40b4cbbac05d8f10aa (diff)
Merge master.kernel.org:/pub/scm/linux/kernel/git/acme/net-2.6
Diffstat (limited to 'net')
-rw-r--r--net/core/stream.c12
-rw-r--r--net/dccp/ipv4.c32
-rw-r--r--net/ipv4/inet_connection_sock.c14
-rw-r--r--net/ipv4/netfilter/ip_conntrack_helper_pptp.c4
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netlink.c19
-rw-r--r--net/ipv4/netfilter/ip_nat_core.c6
-rw-r--r--net/ipv4/netfilter/ip_nat_helper_pptp.c2
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_gre.c4
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_unknown.c2
-rw-r--r--net/ipv4/netfilter/ipt_CONNMARK.c1
-rw-r--r--net/ipv4/tcp.c1
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv6/tcp_ipv6.c15
-rw-r--r--net/netfilter/nf_queue.c2
-rw-r--r--net/netfilter/nfnetlink_log.c6
-rw-r--r--net/netfilter/nfnetlink_queue.c6
-rw-r--r--net/sched/sch_gred.c841
-rw-r--r--net/sched/sch_netem.c122
-rw-r--r--net/sched/sch_red.c418
19 files changed, 663 insertions, 846 deletions
diff --git a/net/core/stream.c b/net/core/stream.c
index ac9edfdf874..15bfd03e802 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -52,8 +52,9 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
{
struct task_struct *tsk = current;
DEFINE_WAIT(wait);
+ int done;
- while (1) {
+ do {
if (sk->sk_err)
return sock_error(sk);
if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV))
@@ -65,13 +66,12 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
sk->sk_write_pending++;
- if (sk_wait_event(sk, timeo_p,
- !((1 << sk->sk_state) &
- ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))))
- break;
+ done = sk_wait_event(sk, timeo_p,
+ !((1 << sk->sk_state) &
+ ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)));
finish_wait(sk->sk_sleep, &wait);
sk->sk_write_pending--;
- }
+ } while (!done);
return 0;
}
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 6298cf58ff9..4b9bc81ae1a 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -31,8 +31,6 @@ struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
.lhash_lock = RW_LOCK_UNLOCKED,
.lhash_users = ATOMIC_INIT(0),
.lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
- .portalloc_lock = SPIN_LOCK_UNLOCKED,
- .port_rover = 1024 - 1,
};
EXPORT_SYMBOL_GPL(dccp_hashinfo);
@@ -125,36 +123,15 @@ static int dccp_v4_hash_connect(struct sock *sk)
int ret;
if (snum == 0) {
- int rover;
int low = sysctl_local_port_range[0];
int high = sysctl_local_port_range[1];
int remaining = (high - low) + 1;
+ int rover = net_random() % (high - low) + low;
struct hlist_node *node;
struct inet_timewait_sock *tw = NULL;
local_bh_disable();
-
- /* TODO. Actually it is not so bad idea to remove
- * dccp_hashinfo.portalloc_lock before next submission to
- * Linus.
- * As soon as we touch this place at all it is time to think.
- *
- * Now it protects single _advisory_ variable
- * dccp_hashinfo.port_rover, hence it is mostly useless.
- * Code will work nicely if we just delete it, but
- * I am afraid in contented case it will work not better or
- * even worse: another cpu just will hit the same bucket
- * and spin there.
- * So some cpu salt could remove both contention and
- * memory pingpong. Any ideas how to do this in a nice way?
- */
- spin_lock(&dccp_hashinfo.portalloc_lock);
- rover = dccp_hashinfo.port_rover;
-
do {
- rover++;
- if ((rover < low) || (rover > high))
- rover = low;
head = &dccp_hashinfo.bhash[inet_bhashfn(rover,
dccp_hashinfo.bhash_size)];
spin_lock(&head->lock);
@@ -187,9 +164,9 @@ static int dccp_v4_hash_connect(struct sock *sk)
next_port:
spin_unlock(&head->lock);
+ if (++rover > high)
+ rover = low;
} while (--remaining > 0);
- dccp_hashinfo.port_rover = rover;
- spin_unlock(&dccp_hashinfo.portalloc_lock);
local_bh_enable();
@@ -197,9 +174,6 @@ static int dccp_v4_hash_connect(struct sock *sk)
ok:
/* All locks still held and bhs disabled */
- dccp_hashinfo.port_rover = rover;
- spin_unlock(&dccp_hashinfo.portalloc_lock);
-
inet_bind_hash(sk, tb, rover);
if (sk_unhashed(sk)) {
inet_sk(sk)->sport = htons(rover);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 94468a76c5b..3fe021f1a56 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -78,17 +78,9 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo,
int low = sysctl_local_port_range[0];
int high = sysctl_local_port_range[1];
int remaining = (high - low) + 1;
- int rover;
+ int rover = net_random() % (high - low) + low;
- spin_lock(&hashinfo->portalloc_lock);
- if (hashinfo->port_rover < low)
- rover = low;
- else
- rover = hashinfo->port_rover;
do {
- rover++;
- if (rover > high)
- rover = low;
head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)];
spin_lock(&head->lock);
inet_bind_bucket_for_each(tb, node, &head->chain)
@@ -97,9 +89,9 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo,
break;
next:
spin_unlock(&head->lock);
+ if (++rover > high)
+ rover = low;
} while (--remaining > 0);
- hashinfo->port_rover = rover;
- spin_unlock(&hashinfo->portalloc_lock);
/* Exhausted local port range during search? It is not
* possible for us to be holding one of the bind hash
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index 926a6684643..4108a5e12b3 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -270,14 +270,10 @@ exp_gre(struct ip_conntrack *master,
exp_orig->expectfn = pptp_expectfn;
exp_orig->flags = 0;
- exp_orig->dir = IP_CT_DIR_ORIGINAL;
-
/* both expectations are identical apart from tuple */
memcpy(exp_reply, exp_orig, sizeof(*exp_reply));
memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple));
- exp_reply->dir = !exp_orig->dir;
-
if (ip_nat_pptp_hook_exp_gre)
ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply);
else {
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 166e6069f12..82a65043a8e 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -815,7 +815,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
IPCTNL_MSG_CT_NEW, 1, ct);
ip_conntrack_put(ct);
if (err <= 0)
- goto out;
+ goto free;
err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
if (err < 0)
@@ -824,9 +824,9 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
DEBUGP("leaving\n");
return 0;
+free:
+ kfree_skb(skb2);
out:
- if (skb2)
- kfree_skb(skb2);
return -1;
}
@@ -1322,21 +1322,16 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
1, exp);
if (err <= 0)
- goto out;
+ goto free;
ip_conntrack_expect_put(exp);
- err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
- if (err < 0)
- goto free;
-
- return err;
+ return netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+free:
+ kfree_skb(skb2);
out:
ip_conntrack_expect_put(exp);
-free:
- if (skb2)
- kfree_skb(skb2);
return err;
}
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index c5e3abd2467..762f4d93936 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -66,10 +66,8 @@ ip_nat_proto_find_get(u_int8_t protonum)
* removed until we've grabbed the reference */
preempt_disable();
p = __ip_nat_proto_find(protonum);
- if (p) {
- if (!try_module_get(p->me))
- p = &ip_nat_unknown_protocol;
- }
+ if (!try_module_get(p->me))
+ p = &ip_nat_unknown_protocol;
preempt_enable();
return p;
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
index 3cdd0684d30..ee6ab74ad3a 100644
--- a/net/ipv4/netfilter/ip_nat_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c
@@ -216,6 +216,7 @@ pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
expect_orig->saved_proto.gre.key = htons(nat_pptp_info->pac_call_id);
expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
+ expect_orig->dir = IP_CT_DIR_ORIGINAL;
inv_t.src.ip = reply_t->src.ip;
inv_t.dst.ip = reply_t->dst.ip;
inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
@@ -233,6 +234,7 @@ pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id);
expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
+ expect_reply->dir = IP_CT_DIR_REPLY;
inv_t.src.ip = orig_t->src.ip;
inv_t.dst.ip = orig_t->dst.ip;
inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
index 7c128540167..f7cad7cf1ae 100644
--- a/net/ipv4/netfilter/ip_nat_proto_gre.c
+++ b/net/ipv4/netfilter/ip_nat_proto_gre.c
@@ -139,8 +139,8 @@ gre_manip_pkt(struct sk_buff **pskb,
break;
case GRE_VERSION_PPTP:
DEBUGP("call_id -> 0x%04x\n",
- ntohl(tuple->dst.u.gre.key));
- pgreh->call_id = htons(ntohl(tuple->dst.u.gre.key));
+ ntohs(tuple->dst.u.gre.key));
+ pgreh->call_id = tuple->dst.u.gre.key;
break;
default:
DEBUGP("can't nat unknown GRE version\n");
diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c
index 99bbef56f84..f0099a646a0 100644
--- a/net/ipv4/netfilter/ip_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/ip_nat_proto_unknown.c
@@ -62,7 +62,7 @@ unknown_print_range(char *buffer, const struct ip_nat_range *range)
struct ip_nat_protocol ip_nat_unknown_protocol = {
.name = "unknown",
- .me = THIS_MODULE,
+ /* .me isn't set: getting a ref to this cannot fail. */
.manip_pkt = unknown_manip_pkt,
.in_range = unknown_in_range,
.unique_tuple = unknown_unique_tuple,
diff --git a/net/ipv4/netfilter/ipt_CONNMARK.c b/net/ipv4/netfilter/ipt_CONNMARK.c
index 13463802133..05d66ab5942 100644
--- a/net/ipv4/netfilter/ipt_CONNMARK.c
+++ b/net/ipv4/netfilter/ipt_CONNMARK.c
@@ -109,6 +109,7 @@ static struct ipt_target ipt_connmark_reg = {
static int __init init(void)
{
+ need_ip_conntrack();
return ipt_register_target(&ipt_connmark_reg);
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f3f0013a958..72b7c22e1ea 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2112,7 +2112,6 @@ void __init tcp_init(void)
sysctl_tcp_max_orphans >>= (3 - order);
sysctl_max_syn_backlog = 128;
}
- tcp_hashinfo.port_rover = sysctl_local_port_range[0] - 1;
sysctl_tcp_mem[0] = 768 << order;
sysctl_tcp_mem[1] = 1024 << order;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c85819d8474..49d67cd75ed 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -93,8 +93,6 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
.lhash_lock = RW_LOCK_UNLOCKED,
.lhash_users = ATOMIC_INIT(0),
.lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
- .portalloc_lock = SPIN_LOCK_UNLOCKED,
- .port_rover = 1024 - 1,
};
static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d693cb988b7..d746d3b27ef 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -114,16 +114,9 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
int low = sysctl_local_port_range[0];
int high = sysctl_local_port_range[1];
int remaining = (high - low) + 1;
- int rover;
+ int rover = net_random() % (high - low) + low;
- spin_lock(&tcp_hashinfo.portalloc_lock);
- if (tcp_hashinfo.port_rover < low)
- rover = low;
- else
- rover = tcp_hashinfo.port_rover;
- do { rover++;
- if (rover > high)
- rover = low;
+ do {
head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
spin_lock(&head->lock);
inet_bind_bucket_for_each(tb, node, &head->chain)
@@ -132,9 +125,9 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
break;
next:
spin_unlock(&head->lock);
+ if (++rover > high)
+ rover = low;
} while (--remaining > 0);
- tcp_hashinfo.port_rover = rover;
- spin_unlock(&tcp_hashinfo.portalloc_lock);
/* Exhausted local port range during search? It is not
* possible for us to be holding one of the bind hash
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index d10d552d9c4..d3a4f30a7f2 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -117,7 +117,7 @@ int nf_queue(struct sk_buff **skb,
/* QUEUE == DROP if noone is waiting, to be safe. */
read_lock(&queue_handler_lock);
- if (!queue_handler[pf]->outfn) {
+ if (!queue_handler[pf] || !queue_handler[pf]->outfn) {
read_unlock(&queue_handler_lock);
kfree_skb(*skb);
return 1;
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index efcd10f996b..d194676f365 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -146,11 +146,10 @@ instance_create(u_int16_t group_num, int pid)
goto out_unlock;
}
- inst = kmalloc(sizeof(*inst), GFP_ATOMIC);
+ inst = kzalloc(sizeof(*inst), GFP_ATOMIC);
if (!inst)
goto out_unlock;
- memset(inst, 0, sizeof(*inst));
INIT_HLIST_NODE(&inst->hlist);
inst->lock = SPIN_LOCK_UNLOCKED;
/* needs to be two, since we _put() after creation */
@@ -962,10 +961,9 @@ static int nful_open(struct inode *inode, struct file *file)
struct iter_state *is;
int ret;
- is = kmalloc(sizeof(*is), GFP_KERNEL);
+ is = kzalloc(sizeof(*is), GFP_KERNEL);
if (!is)
return -ENOMEM;
- memset(is, 0, sizeof(*is));
ret = seq_open(file, &nful_seq_ops);
if (ret < 0)
goto out_free;
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index eaa44c49567..f065a6c9495 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -136,11 +136,10 @@ instance_create(u_int16_t queue_num, int pid)
goto out_unlock;
}
- inst = kmalloc(sizeof(*inst), GFP_ATOMIC);
+ inst = kzalloc(sizeof(*inst), GFP_ATOMIC);
if (!inst)
goto out_unlock;
- memset(inst, 0, sizeof(*inst));
inst->queue_num = queue_num;
inst->peer_pid = pid;
inst->queue_maxlen = NFQNL_QMAX_DEFAULT;
@@ -1036,10 +1035,9 @@ static int nfqnl_open(struct inode *inode, struct file *file)
struct iter_state *is;
int ret;
- is = kmalloc(sizeof(*is), GFP_KERNEL);
+ is = kzalloc(sizeof(*is), GFP_KERNEL);
if (!is)
return -ENOMEM;
- memset(is, 0, sizeof(*is));
ret = seq_open(file, &nfqnl_seq_ops);
if (ret < 0)
goto out_free;
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 25c171c3271..29a2dd9f302 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -15,247 +15,281 @@
* from Ren Liu
* - More error checks
*
- *
- *
- * For all the glorious comments look at Alexey's sch_red.c
+ * For all the glorious comments look at include/net/red.h
*/
#include <linux/config.h>
#include <linux/module.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
#include <linux/types.h>
#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/if_ether.h>
-#include <linux/inet.h>
#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/notifier.h>
-#include <net/ip.h>
-#include <net/route.h>
#include <linux/skbuff.h>
-#include <net/sock.h>
#include <net/pkt_sched.h>
+#include <net/red.h>
-#if 1 /* control */
-#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
-#else
-#define DPRINTK(format,args...)
-#endif
-
-#if 0 /* data */
-#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
-#else
-#define D2PRINTK(format,args...)
-#endif
+#define GRED_DEF_PRIO (MAX_DPs / 2)
+#define GRED_VQ_MASK (MAX_DPs - 1)
struct gred_sched_data;
struct gred_sched;
struct gred_sched_data
{
-/* Parameters */
u32 limit; /* HARD maximal queue length */
- u32 qth_min; /* Min average length threshold: A scaled */
- u32 qth_max; /* Max average length threshold: A scaled */
u32 DP; /* the drop pramaters */
- char Wlog; /* log(W) */
- char Plog; /* random number bits */
- u32 Scell_max;
- u32 Rmask;
u32 bytesin; /* bytes seen on virtualQ so far*/
u32 packetsin; /* packets seen on virtualQ so far*/
u32 backlog; /* bytes on the virtualQ */
- u32 forced; /* packets dropped for exceeding limits */
- u32 early; /* packets dropped as a warning */
- u32 other; /* packets dropped by invoking drop() */
- u32 pdrop; /* packets dropped because we exceeded physical queue limits */
- char Scell_log;
- u8 Stab[256];
- u8 prio; /* the prio of this vq */
-
-/* Variables */
- unsigned long qave; /* Average queue length: A scaled */
- int qcount; /* Packets since last random number generation */
- u32 qR; /* Cached random number */
-
- psched_time_t qidlestart; /* Start of idle period */
+ u8 prio; /* the prio of this vq */
+
+ struct red_parms parms;
+ struct red_stats stats;
+};
+
+enum {
+ GRED_WRED_MODE = 1,
+ GRED_RIO_MODE,
};
struct gred_sched
{
struct gred_sched_data *tab[MAX_DPs];
- u32 DPs;
- u32 def;
- u8 initd;
- u8 grio;
- u8 eqp;
+ unsigned long flags;
+ u32 red_flags;
+ u32 DPs;
+ u32 def;
+ struct red_parms wred_set;
};
-static int
-gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static inline int gred_wred_mode(struct gred_sched *table)
{
- psched_time_t now;
- struct gred_sched_data *q=NULL;
- struct gred_sched *t= qdisc_priv(sch);
- unsigned long qave=0;
- int i=0;
+ return test_bit(GRED_WRED_MODE, &table->flags);
+}
+
+static inline void gred_enable_wred_mode(struct gred_sched *table)
+{
+ __set_bit(GRED_WRED_MODE, &table->flags);
+}
+
+static inline void gred_disable_wred_mode(struct gred_sched *table)
+{
+ __clear_bit(GRED_WRED_MODE, &table->flags);
+}
+
+static inline int gred_rio_mode(struct gred_sched *table)
+{
+ return test_bit(GRED_RIO_MODE, &table->flags);
+}
+
+static inline void gred_enable_rio_mode(struct gred_sched *table)
+{
+ __set_bit(GRED_RIO_MODE, &table->flags);
+}
+
+static inline void gred_disable_rio_mode(struct gred_sched *table)
+{
+ __clear_bit(GRED_RIO_MODE, &table->flags);
+}
+
+static inline int gred_wred_mode_check(struct Qdisc *sch)
+{
+ struct gred_sched *table = qdisc_priv(sch);
+ int i;
- if (!t->initd && skb_queue_len(&sch->q) < (sch->dev->tx_queue_len ? : 1)) {
- D2PRINTK("NO GRED Queues setup yet! Enqueued anyway\n");
- goto do_enqueue;
+ /* Really ugly O(n^2) but shouldn't be necessary too frequent. */
+ for (i = 0; i < table->DPs; i++) {
+ struct gred_sched_data *q = table->tab[i];
+ int n;
+
+ if (q == NULL)
+ continue;
+
+ for (n = 0; n < table->DPs; n++)
+ if (table->tab[n] && table->tab[n] != q &&
+ table->tab[n]->prio == q->prio)
+ return 1;
}
+ return 0;
+}
+
+static inline unsigned int gred_backlog(struct gred_sched *table,
+ struct gred_sched_data *q,
+ struct Qdisc *sch)
+{
+ if (gred_wred_mode(table))
+ return sch->qstats.backlog;
+ else
+ return q->backlog;
+}
+
+static inline u16 tc_index_to_dp(struct sk_buff *skb)
+{
+ return skb->tc_index & GRED_VQ_MASK;
+}
+
+static inline void gred_load_wred_set(struct gred_sched *table,
+ struct gred_sched_data *q)
+{
+ q->parms.qavg = table->wred_set.qavg;
+ q->parms.qidlestart = table->wred_set.qidlestart;
+}
+
+static inline void gred_store_wred_set(struct gred_sched *table,
+ struct gred_sched_data *q)
+{
+ table->wred_set.qavg = q->parms.qavg;
+}
+
+static inline int gred_use_ecn(struct gred_sched *t)
+{
+ return t->red_flags & TC_RED_ECN;
+}
- if ( ((skb->tc_index&0xf) > (t->DPs -1)) || !(q=t->tab[skb->tc_index&0xf])) {
- printk("GRED: setting to default (%d)\n ",t->def);
- if (!(q=t->tab[t->def])) {
- DPRINTK("GRED: setting to default FAILED! dropping!! "
- "(%d)\n ", t->def);
- goto drop;
+static inline int gred_use_harddrop(struct gred_sched *t)
+{
+ return t->red_flags & TC_RED_HARDDROP;
+}
+
+static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+ struct gred_sched_data *q=NULL;
+ struct gred_sched *t= qdisc_priv(sch);
+ unsigned long qavg = 0;
+ u16 dp = tc_index_to_dp(skb);
+
+ if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
+ dp = t->def;
+
+ if ((q = t->tab[dp]) == NULL) {
+ /* Pass through packets not assigned to a DP
+ * if no default DP has been configured. This
+ * allows for DP flows to be left untouched.
+ */
+ if (skb_queue_len(&sch->q) < sch->dev->tx_queue_len)
+ return qdisc_enqueue_tail(skb, sch);
+ else
+ goto drop;
}
+
/* fix tc_index? --could be controvesial but needed for
requeueing */
- skb->tc_index=(skb->tc_index&0xfffffff0) | t->def;
+ skb->tc_index = (skb->tc_index & ~GRED_VQ_MASK) | dp;
}
- D2PRINTK("gred_enqueue virtualQ 0x%x classid %x backlog %d "
- "general backlog %d\n",skb->tc_index&0xf,sch->handle,q->backlog,
- sch->qstats.backlog);
- /* sum up all the qaves of prios <= to ours to get the new qave*/
- if (!t->eqp && t->grio) {
- for (i=0;i<t->DPs;i++) {
- if ((!t->tab[i]) || (i==q->DP))
- continue;
-
- if ((t->tab[i]->prio < q->prio) && (PSCHED_IS_PASTPERFECT(t->tab[i]->qidlestart)))
- qave +=t->tab[i]->qave;
+ /* sum up all the qaves of prios <= to ours to get the new qave */
+ if (!gred_wred_mode(t) && gred_rio_mode(t)) {
+ int i;
+
+ for (i = 0; i < t->DPs; i++) {
+ if (t->tab[i] && t->tab[i]->prio < q->prio &&
+ !red_is_idling(&t->tab[i]->parms))
+ qavg +=t->tab[i]->parms.qavg;
}
-
+
}
q->packetsin++;
- q->bytesin+=skb->len;
+ q->bytesin += skb->len;
- if (t->eqp && t->grio) {
- qave=0;
- q->qave=t->tab[t->def]->qave;
- q->qidlestart=t->tab[t->def]->qidlestart;
- }
+ if (gred_wred_mode(t))
+ gred_load_wred_set(t, q);
- if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) {
- long us_idle;
- PSCHED_GET_TIME(now);
- us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max);
- PSCHED_SET_PASTPERFECT(q->qidlestart);
+ q->parms.qavg = red_calc_qavg(&q->parms, gred_backlog(t, q, sch));
- q->qave >>= q->Stab[(us_idle>>q->Scell_log)&0xFF];
- } else {
- if (t->eqp) {
- q->qave += sch->qstats.backlog - (q->qave >> q->Wlog);
- } else {
- q->qave += q->backlog - (q->qave >> q->Wlog);
- }
+ if (red_is_idling(&q->parms))
+ red_end_of_idle_period(&q->parms);
- }
-
-
- if (t->eqp && t->grio)
- t->tab[t->def]->qave=q->qave;
-
- if ((q->qave+qave) < q->qth_min) {
- q->qcount = -1;
-enqueue:
- if (q->backlog + skb->len <= q->limit) {
- q->backlog += skb->len;
-do_enqueue:
- __skb_queue_tail(&sch->q, skb);
- sch->qstats.backlog += skb->len;
- sch->bstats.bytes += skb->len;
- sch->bstats.packets++;
- return 0;
- } else {
- q->pdrop++;
- }
+ if (gred_wred_mode(t))
+ gred_store_wred_set(t, q);
-drop:
- kfree_skb(skb);
- sch->qstats.drops++;
- return NET_XMIT_DROP;
- }
- if ((q->qave+qave) >= q->qth_max) {
- q->qcount = -1;
- sch->qstats.overlimits++;
- q->forced++;
- goto drop;
+ switch (red_action(&q->parms, q->parms.qavg + qavg)) {
+ case RED_DONT_MARK:
+ break;
+
+ case RED_PROB_MARK:
+ sch->qstats.overlimits++;
+ if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) {
+ q->stats.prob_drop++;
+ goto congestion_drop;
+ }
+
+ q->stats.prob_mark++;
+ break;
+
+ case RED_HARD_MARK:
+ sch->qstats.overlimits++;
+ if (gred_use_harddrop(t) || !gred_use_ecn(t) ||
+ !INET_ECN_set_ce(skb)) {
+ q->stats.forced_drop++;
+ goto congestion_drop;
+ }
+ q->stats.forced_mark++;
+ break;
}
- if (++q->qcount) {
- if ((((qave+q->qave) - q->qth_min)>>q->Wlog)*q->qcount < q->qR)
- goto enqueue;
- q->qcount = 0;
- q->qR = net_random()&q->Rmask;
- sch->qstats.overlimits++;
- q->early++;
- goto drop;
+
+ if (q->backlog + skb->len <= q->limit) {
+ q->backlog += skb->len;
+ return qdisc_enqueue_tail(skb, sch);
}
- q->qR = net_random()&q->Rmask;
- goto enqueue;
+
+ q->stats.pdrop++;
+drop:
+ return qdisc_drop(skb, sch);
+
+congestion_drop:
+ qdisc_drop(skb, sch);
+ return NET_XMIT_CN;
}
-static int
-gred_requeue(struct sk_buff *skb, struct Qdisc* sch)
+static int gred_requeue(struct sk_buff *skb, struct Qdisc* sch)
{
+ struct gred_sched *t = qdisc_priv(sch);
struct gred_sched_data *q;
- struct gred_sched *t= qdisc_priv(sch);
- q= t->tab[(skb->tc_index&0xf)];
-/* error checking here -- probably unnecessary */
- PSCHED_SET_PASTPERFECT(q->qidlestart);
-
- __skb_queue_head(&sch->q, skb);
- sch->qstats.backlog += skb->len;
- sch->qstats.requeues++;
- q->backlog += skb->len;
- return 0;
+ u16 dp = tc_index_to_dp(skb);
+
+ if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "GRED: Unable to relocate VQ 0x%x "
+ "for requeue, screwing up backlog.\n",
+ tc_index_to_dp(skb));
+ } else {
+ if (red_is_idling(&q->parms))
+ red_end_of_idle_period(&q->parms);
+ q->backlog += skb->len;
+ }
+
+ return qdisc_requeue(skb, sch);
}
-static struct sk_buff *
-gred_dequeue(struct Qdisc* sch)
+static struct sk_buff *gred_dequeue(struct Qdisc* sch)
{
struct sk_buff *skb;
- struct gred_sched_data *q;
- struct gred_sched *t= qdisc_priv(sch);
+ struct gred_sched *t = qdisc_priv(sch);
+
+ skb = qdisc_dequeue_head(sch);
- skb = __skb_dequeue(&sch->q);
if (skb) {
- sch->qstats.backlog -= skb->len;
- q= t->tab[(skb->tc_index&0xf)];
- if (q) {
- q->backlog -= skb->len;
- if (!q->backlog && !t->eqp)
- PSCHED_GET_TIME(q->qidlestart);
+ struct gred_sched_data *q;
+ u16 dp = tc_index_to_dp(skb);
+
+ if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "GRED: Unable to relocate "
+ "VQ 0x%x after dequeue, screwing up "
+ "backlog.\n", tc_index_to_dp(skb));
} else {
- D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf);
+ q->backlog -= skb->len;
+
+ if (!q->backlog && !gred_wred_mode(t))
+ red_start_of_idle_period(&q->parms);
}
+
return skb;
}
- if (t->eqp) {
- q= t->tab[t->def];
- if (!q)
- D2PRINTK("no default VQ set: Results will be "
- "screwed up\n");
- else
- PSCHED_GET_TIME(q->qidlestart);
- }
+ if (gred_wred_mode(t) && !red_is_idling(&t->wred_set))
+ red_start_of_idle_period(&t->wred_set);
return NULL;
}
@@ -263,36 +297,34 @@ gred_dequeue(struct Qdisc* sch)
static unsigned int gred_drop(struct Qdisc* sch)
{
struct sk_buff *skb;
+ struct gred_sched *t = qdisc_priv(sch);
- struct gred_sched_data *q;
- struct gred_sched *t= qdisc_priv(sch);
-
- skb = __skb_dequeue_tail(&sch->q);
+ skb = qdisc_dequeue_tail(sch);
if (skb) {
unsigned int len = skb->len;
- sch->qstats.backlog -= len;
- sch->qstats.drops++;
- q= t->tab[(skb->tc_index&0xf)];
- if (q) {
- q->backlog -= len;
- q->other++;
- if (!q->backlog && !t->eqp)
- PSCHED_GET_TIME(q->qidlestart);
+ struct gred_sched_data *q;
+ u16 dp = tc_index_to_dp(skb);
+
+ if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "GRED: Unable to relocate "
+ "VQ