From bb949fbd1878973c3539d9aecff52f284482a937 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 16:55:56 -0700 Subject: netdev: Create netdev_queue abstraction. A netdev_queue is an entity managed by a qdisc. Currently there is one RX and one TX queue, and a netdev_queue merely contains a backpointer to the net_device. The Qdisc struct is augmented with a netdev_queue pointer as well. Eventually the 'dev' Qdisc member will go away and we will have the resulting hierarchy: net_device --> netdev_queue --> Qdisc Also, qdisc_alloc() and qdisc_create_dflt() now take a netdev_queue pointer argument. Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'net/sched/sch_generic.c') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 13afa721439..d9708648089 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -440,7 +440,9 @@ static struct Qdisc_ops pfifo_fast_ops __read_mostly = { .owner = THIS_MODULE, }; -struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops) +struct Qdisc *qdisc_alloc(struct net_device *dev, + struct netdev_queue *dev_queue, + struct Qdisc_ops *ops) { void *p; struct Qdisc *sch; @@ -462,6 +464,7 @@ struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops) sch->ops = ops; sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; + sch->dev_queue = dev_queue; sch->dev = dev; dev_hold(dev); atomic_set(&sch->refcnt, 1); @@ -471,12 +474,14 @@ errout: return ERR_PTR(err); } -struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops, +struct Qdisc * qdisc_create_dflt(struct net_device *dev, + struct netdev_queue *dev_queue, + struct Qdisc_ops *ops, unsigned int parentid) { struct Qdisc *sch; - sch = qdisc_alloc(dev, ops); + sch = qdisc_alloc(dev, dev_queue, ops); if (IS_ERR(sch)) goto errout; sch->stats_lock = &dev->queue_lock; @@ -545,7 +550,8 @@ void dev_activate(struct net_device *dev) if (dev->qdisc_sleeping == &noop_qdisc) { struct Qdisc *qdisc; if (dev->tx_queue_len) { - qdisc = qdisc_create_dflt(dev, &pfifo_fast_ops, + qdisc = qdisc_create_dflt(dev, &dev->tx_queue, + &pfifo_fast_ops, TC_H_ROOT); if (qdisc == NULL) { printk(KERN_INFO "%s: activation failed\n", dev->name); -- cgit v1.2.3-18-g5258 From 5ce2d488fe039ddd86a638496cf704df86c74eeb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 17:06:30 -0700 Subject: pkt_sched: Remove 'dev' member of struct Qdisc. It can be obtained via the netdev_queue. So create a helper routine, qdisc_dev(), to make the transformations nicer looking. Now, qdisc_alloc() now no longer needs a net_device pointer argument. Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'net/sched/sch_generic.c') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index d9708648089..b626a4f32b6 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -364,7 +364,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) { struct sk_buff_head *list = prio2list(skb, qdisc); - if (skb_queue_len(list) < qdisc->dev->tx_queue_len) { + if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) { qdisc->q.qlen++; return __qdisc_enqueue_tail(skb, qdisc, list); } @@ -440,8 +440,7 @@ static struct Qdisc_ops pfifo_fast_ops __read_mostly = { .owner = THIS_MODULE, }; -struct Qdisc *qdisc_alloc(struct net_device *dev, - struct netdev_queue *dev_queue, +struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, struct Qdisc_ops *ops) { void *p; @@ -465,8 +464,7 @@ struct Qdisc *qdisc_alloc(struct net_device *dev, sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; sch->dev_queue = dev_queue; - sch->dev = dev; - dev_hold(dev); + dev_hold(qdisc_dev(sch)); atomic_set(&sch->refcnt, 1); return sch; @@ -481,7 +479,7 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, { struct Qdisc *sch; - sch = qdisc_alloc(dev, dev_queue, ops); + sch = qdisc_alloc(dev_queue, ops); if (IS_ERR(sch)) goto errout; sch->stats_lock = &dev->queue_lock; @@ -534,7 +532,7 @@ void qdisc_destroy(struct Qdisc *qdisc) ops->destroy(qdisc); module_put(ops->owner); - dev_put(qdisc->dev); + dev_put(qdisc_dev(qdisc)); call_rcu(&qdisc->q_rcu, __qdisc_destroy); } EXPORT_SYMBOL(qdisc_destroy); -- cgit v1.2.3-18-g5258 From dc2b48475a0a36f8b3bbb2da60d3a006dc5c2c84 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 17:18:23 -0700 Subject: netdev: Move queue_lock into struct netdev_queue. The lock is now an attribute of the device queue. One thing to notice is that "suspicious" places emerge which will need specific training about multiple queue handling. They are so marked with explicit "netdev->rx_queue" and "netdev->tx_queue" references. Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'net/sched/sch_generic.c') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index b626a4f32b6..ee8f9f78a09 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -29,31 +29,31 @@ /* Main transmission queue. */ /* Modifications to data participating in scheduling must be protected with - * dev->queue_lock spinlock. + * queue->lock spinlock. * * The idea is the following: * - enqueue, dequeue are serialized via top level device - * spinlock dev->queue_lock. + * spinlock queue->lock. * - ingress filtering is serialized via top level device * spinlock dev->ingress_lock. * - updates to tree and tree walking are only done under the rtnl mutex. */ void qdisc_lock_tree(struct net_device *dev) - __acquires(dev->queue_lock) + __acquires(dev->tx_queue.lock) __acquires(dev->ingress_lock) { - spin_lock_bh(&dev->queue_lock); + spin_lock_bh(&dev->tx_queue.lock); spin_lock(&dev->ingress_lock); } EXPORT_SYMBOL(qdisc_lock_tree); void qdisc_unlock_tree(struct net_device *dev) __releases(dev->ingress_lock) - __releases(dev->queue_lock) + __releases(dev->tx_queue.lock) { spin_unlock(&dev->ingress_lock); - spin_unlock_bh(&dev->queue_lock); + spin_unlock_bh(&dev->tx_queue.lock); } EXPORT_SYMBOL(qdisc_unlock_tree); @@ -118,15 +118,15 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, } /* - * NOTE: Called under dev->queue_lock with locally disabled BH. + * NOTE: Called under queue->lock with locally disabled BH. * * __LINK_STATE_QDISC_RUNNING guarantees only one CPU can process this - * device at a time. dev->queue_lock serializes queue accesses for + * device at a time. queue->lock serializes queue accesses for * this device AND dev->qdisc pointer itself. * * netif_tx_lock serializes accesses to device driver. * - * dev->queue_lock and netif_tx_lock are mutually exclusive, + * queue->lock and netif_tx_lock are mutually exclusive, * if one is grabbed, another must be free. * * Note, that this procedure can be called by a watchdog timer @@ -148,14 +148,14 @@ static inline int qdisc_restart(struct net_device *dev) /* And release queue */ - spin_unlock(&dev->queue_lock); + spin_unlock(&q->dev_queue->lock); HARD_TX_LOCK(dev, smp_processor_id()); if (!netif_subqueue_stopped(dev, skb)) ret = dev_hard_start_xmit(skb, dev); HARD_TX_UNLOCK(dev); - spin_lock(&dev->queue_lock); + spin_lock(&q->dev_queue->lock); q = dev->qdisc; switch (ret) { @@ -482,7 +482,7 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, sch = qdisc_alloc(dev_queue, ops); if (IS_ERR(sch)) goto errout; - sch->stats_lock = &dev->queue_lock; + sch->stats_lock = &dev_queue->lock; sch->parent = parentid; if (!ops->init || ops->init(sch, NULL) == 0) @@ -494,7 +494,7 @@ errout: } EXPORT_SYMBOL(qdisc_create_dflt); -/* Under dev->queue_lock and BH! */ +/* Under queue->lock and BH! */ void qdisc_reset(struct Qdisc *qdisc) { @@ -514,7 +514,7 @@ static void __qdisc_destroy(struct rcu_head *head) kfree((char *) qdisc - qdisc->padded); } -/* Under dev->queue_lock and BH! */ +/* Under queue->lock and BH! */ void qdisc_destroy(struct Qdisc *qdisc) { @@ -566,13 +566,13 @@ void dev_activate(struct net_device *dev) /* Delay activation until next carrier-on event */ return; - spin_lock_bh(&dev->queue_lock); + spin_lock_bh(&dev->tx_queue.lock); rcu_assign_pointer(dev->qdisc, dev->qdisc_sleeping); if (dev->qdisc != &noqueue_qdisc) { dev->trans_start = jiffies; dev_watchdog_up(dev); } - spin_unlock_bh(&dev->queue_lock); + spin_unlock_bh(&dev->tx_queue.lock); } void dev_deactivate(struct net_device *dev) @@ -581,7 +581,7 @@ void dev_deactivate(struct net_device *dev) struct sk_buff *skb; int running; - spin_lock_bh(&dev->queue_lock); + spin_lock_bh(&dev->tx_queue.lock); qdisc = dev->qdisc; dev->qdisc = &noop_qdisc; @@ -589,7 +589,7 @@ void dev_deactivate(struct net_device *dev) skb = dev->gso_skb; dev->gso_skb = NULL; - spin_unlock_bh(&dev->queue_lock); + spin_unlock_bh(&dev->tx_queue.lock); kfree_skb(skb); @@ -607,9 +607,9 @@ void dev_deactivate(struct net_device *dev) * Double-check inside queue lock to ensure that all effects * of the queue run are visible when we return. */ - spin_lock_bh(&dev->queue_lock); + spin_lock_bh(&dev->tx_queue.lock); running = test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state); - spin_unlock_bh(&dev->queue_lock); + spin_unlock_bh(&dev->tx_queue.lock); /* * The running flag should never be set at this point because -- cgit v1.2.3-18-g5258 From 555353cfa1aee293de445bfa6de43276138ddd82 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 17:33:13 -0700 Subject: netdev: The ingress_lock member is no longer needed. Every qdisc is assosciated with a queue, and in the case of ingress qdiscs that will now be netdev->rx_queue so using that queue's lock is the thing to do. Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net/sched/sch_generic.c') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index ee8f9f78a09..804d44b0034 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -35,24 +35,24 @@ * - enqueue, dequeue are serialized via top level device * spinlock queue->lock. * - ingress filtering is serialized via top level device - * spinlock dev->ingress_lock. + * spinlock dev->rx_queue.lock. * - updates to tree and tree walking are only done under the rtnl mutex. */ void qdisc_lock_tree(struct net_device *dev) __acquires(dev->tx_queue.lock) - __acquires(dev->ingress_lock) + __acquires(dev->rx_queue.lock) { spin_lock_bh(&dev->tx_queue.lock); - spin_lock(&dev->ingress_lock); + spin_lock(&dev->rx_queue.lock); } EXPORT_SYMBOL(qdisc_lock_tree); void qdisc_unlock_tree(struct net_device *dev) - __releases(dev->ingress_lock) + __releases(dev->rx_queue.lock) __releases(dev->tx_queue.lock) { - spin_unlock(&dev->ingress_lock); + spin_unlock(&dev->rx_queue.lock); spin_unlock_bh(&dev->tx_queue.lock); } EXPORT_SYMBOL(qdisc_unlock_tree); -- cgit v1.2.3-18-g5258 From b0e1e6462df3c5944010b3328a546d8fe5d932cd Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 17:42:10 -0700 Subject: netdev: Move rest of qdisc state into struct netdev_queue Now qdisc, qdisc_sleeping, and qdisc_list also live there. Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 90 +++++++++++++++++++++++++++++++------------------ 1 file changed, 57 insertions(+), 33 deletions(-) (limited to 'net/sched/sch_generic.c') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 804d44b0034..3223e5ba76a 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -122,7 +122,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, * * __LINK_STATE_QDISC_RUNNING guarantees only one CPU can process this * device at a time. queue->lock serializes queue accesses for - * this device AND dev->qdisc pointer itself. + * this device AND txq->qdisc pointer itself. * * netif_tx_lock serializes accesses to device driver. * @@ -138,7 +138,8 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, */ static inline int qdisc_restart(struct net_device *dev) { - struct Qdisc *q = dev->qdisc; + struct netdev_queue *txq = &dev->tx_queue; + struct Qdisc *q = txq->qdisc; struct sk_buff *skb; int ret = NETDEV_TX_BUSY; @@ -148,15 +149,15 @@ static inline int qdisc_restart(struct net_device *dev) /* And release queue */ - spin_unlock(&q->dev_queue->lock); + spin_unlock(&txq->lock); HARD_TX_LOCK(dev, smp_processor_id()); if (!netif_subqueue_stopped(dev, skb)) ret = dev_hard_start_xmit(skb, dev); HARD_TX_UNLOCK(dev); - spin_lock(&q->dev_queue->lock); - q = dev->qdisc; + spin_lock(&txq->lock); + q = txq->qdisc; switch (ret) { case NETDEV_TX_OK: @@ -207,9 +208,10 @@ void __qdisc_run(struct net_device *dev) static void dev_watchdog(unsigned long arg) { struct net_device *dev = (struct net_device *)arg; + struct netdev_queue *txq = &dev->tx_queue; netif_tx_lock(dev); - if (dev->qdisc != &noop_qdisc) { + if (txq->qdisc != &noop_qdisc) { if (netif_device_present(dev) && netif_running(dev) && netif_carrier_ok(dev)) { @@ -539,53 +541,63 @@ EXPORT_SYMBOL(qdisc_destroy); void dev_activate(struct net_device *dev) { + struct netdev_queue *txq = &dev->tx_queue; + /* No queueing discipline is attached to device; create default one i.e. pfifo_fast for devices, which need queueing and noqueue_qdisc for virtual interfaces */ - if (dev->qdisc_sleeping == &noop_qdisc) { + if (txq->qdisc_sleeping == &noop_qdisc) { struct Qdisc *qdisc; if (dev->tx_queue_len) { - qdisc = qdisc_create_dflt(dev, &dev->tx_queue, + qdisc = qdisc_create_dflt(dev, txq, &pfifo_fast_ops, TC_H_ROOT); if (qdisc == NULL) { printk(KERN_INFO "%s: activation failed\n", dev->name); return; } - list_add_tail(&qdisc->list, &dev->qdisc_list); + list_add_tail(&qdisc->list, &txq->qdisc_list); } else { qdisc = &noqueue_qdisc; } - dev->qdisc_sleeping = qdisc; + txq->qdisc_sleeping = qdisc; } if (!netif_carrier_ok(dev)) /* Delay activation until next carrier-on event */ return; - spin_lock_bh(&dev->tx_queue.lock); - rcu_assign_pointer(dev->qdisc, dev->qdisc_sleeping); - if (dev->qdisc != &noqueue_qdisc) { + spin_lock_bh(&txq->lock); + rcu_assign_pointer(txq->qdisc, txq->qdisc_sleeping); + if (txq->qdisc != &noqueue_qdisc) { dev->trans_start = jiffies; dev_watchdog_up(dev); } - spin_unlock_bh(&dev->tx_queue.lock); + spin_unlock_bh(&txq->lock); +} + +static void dev_deactivate_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + struct Qdisc *qdisc_default) +{ + struct Qdisc *qdisc = dev_queue->qdisc; + + if (qdisc) { + dev_queue->qdisc = qdisc_default; + qdisc_reset(qdisc); + } } void dev_deactivate(struct net_device *dev) { - struct Qdisc *qdisc; struct sk_buff *skb; int running; spin_lock_bh(&dev->tx_queue.lock); - qdisc = dev->qdisc; - dev->qdisc = &noop_qdisc; - - qdisc_reset(qdisc); + dev_deactivate_queue(dev, &dev->tx_queue, &noop_qdisc); skb = dev->gso_skb; dev->gso_skb = NULL; @@ -622,32 +634,44 @@ void dev_deactivate(struct net_device *dev) } while (WARN_ON_ONCE(running)); } +static void dev_init_scheduler_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + struct Qdisc *qdisc) +{ + dev_queue->qdisc = qdisc; + dev_queue->qdisc_sleeping = qdisc; + INIT_LIST_HEAD(&dev_queue->qdisc_list); +} + void dev_init_scheduler(struct net_device *dev) { qdisc_lock_tree(dev); - dev->qdisc = &noop_qdisc; - dev->qdisc_sleeping = &noop_qdisc; - INIT_LIST_HEAD(&dev->qdisc_list); + dev_init_scheduler_queue(dev, &dev->tx_queue, &noop_qdisc); + dev_init_scheduler_queue(dev, &dev->rx_queue, NULL); qdisc_unlock_tree(dev); setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev); } -void dev_shutdown(struct net_device *dev) +static void dev_shutdown_scheduler_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + struct Qdisc *qdisc_default) { - struct Qdisc *qdisc; + struct Qdisc *qdisc = dev_queue->qdisc_sleeping; + + if (qdisc) { + dev_queue->qdisc = qdisc_default; + dev_queue->qdisc_sleeping = qdisc_default; - qdisc_lock_tree(dev); - qdisc = dev->qdisc_sleeping; - dev->qdisc = &noop_qdisc; - dev->qdisc_sleeping = &noop_qdisc; - qdisc_destroy(qdisc); -#if defined(CONFIG_NET_SCH_INGRESS) || defined(CONFIG_NET_SCH_INGRESS_MODULE) - if ((qdisc = dev->qdisc_ingress) != NULL) { - dev->qdisc_ingress = NULL; qdisc_destroy(qdisc); } -#endif +} + +void dev_shutdown(struct net_device *dev) +{ + qdisc_lock_tree(dev); + dev_shutdown_scheduler_queue(dev, &dev->tx_queue, &noop_qdisc); + dev_shutdown_scheduler_queue(dev, &dev->rx_queue, NULL); BUG_TRAP(!timer_pending(&dev->watchdog_timer)); qdisc_unlock_tree(dev); } -- cgit v1.2.3-18-g5258 From 68dfb42798e1eb2d42acbf872925cc75f1487d9b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 22:57:31 -0700 Subject: pkt_sched: Kill stats_lock member of struct Qdisc. It is always equal to qdisc->dev_queue->lock Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/sched/sch_generic.c') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 3223e5ba76a..dda78ee314e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -484,7 +484,6 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, sch = qdisc_alloc(dev_queue, ops); if (IS_ERR(sch)) goto errout; - sch->stats_lock = &dev_queue->lock; sch->parent = parentid; if (!ops->init || ops->init(sch, NULL) == 0) -- cgit v1.2.3-18-g5258 From 970565bbad0c7b98db0d14131a69e5a0f4445d49 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 23:10:33 -0700 Subject: netdev: Move gso_skb into netdev_queue. Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) (limited to 'net/sched/sch_generic.c') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index dda78ee314e..8247a406a40 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -63,10 +63,11 @@ static inline int qdisc_qlen(struct Qdisc *q) } static inline int dev_requeue_skb(struct sk_buff *skb, struct net_device *dev, + struct netdev_queue *dev_queue, struct Qdisc *q) { if (unlikely(skb->next)) - dev->gso_skb = skb; + dev_queue->gso_skb = skb; else q->ops->requeue(skb, q); @@ -75,12 +76,13 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct net_device *dev, } static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev, + struct netdev_queue *dev_queue, struct Qdisc *q) { struct sk_buff *skb; - if ((skb = dev->gso_skb)) - dev->gso_skb = NULL; + if ((skb = dev_queue->gso_skb)) + dev_queue->gso_skb = NULL; else skb = q->dequeue(q); @@ -89,6 +91,7 @@ static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev, static inline int handle_dev_cpu_collision(struct sk_buff *skb, struct net_device *dev, + struct netdev_queue *dev_queue, struct Qdisc *q) { int ret; @@ -111,7 +114,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, * some time. */ __get_cpu_var(netdev_rx_stat).cpu_collision++; - ret = dev_requeue_skb(skb, dev, q); + ret = dev_requeue_skb(skb, dev, dev_queue, q); } return ret; @@ -144,7 +147,7 @@ static inline int qdisc_restart(struct net_device *dev) int ret = NETDEV_TX_BUSY; /* Dequeue packet */ - if (unlikely((skb = dev_dequeue_skb(dev, q)) == NULL)) + if (unlikely((skb = dev_dequeue_skb(dev, txq, q)) == NULL)) return 0; @@ -167,7 +170,7 @@ static inline int qdisc_restart(struct net_device *dev) case NETDEV_TX_LOCKED: /* Driver try lock failed */ - ret = handle_dev_cpu_collision(skb, dev, q); + ret = handle_dev_cpu_collision(skb, dev, txq, q); break; default: @@ -176,7 +179,7 @@ static inline int qdisc_restart(struct net_device *dev) printk(KERN_WARNING "BUG %s code %d qlen %d\n", dev->name, ret, q->q.qlen); - ret = dev_requeue_skb(skb, dev, q); + ret = dev_requeue_skb(skb, dev, txq, q); break; } @@ -578,31 +581,32 @@ void dev_activate(struct net_device *dev) spin_unlock_bh(&txq->lock); } -static void dev_deactivate_queue(struct net_device *dev, - struct netdev_queue *dev_queue, +static void dev_deactivate_queue(struct netdev_queue *dev_queue, struct Qdisc *qdisc_default) { - struct Qdisc *qdisc = dev_queue->qdisc; + struct Qdisc *qdisc; + struct sk_buff *skb; + + spin_lock_bh(&dev_queue->lock); + qdisc = dev_queue->qdisc; if (qdisc) { dev_queue->qdisc = qdisc_default; qdisc_reset(qdisc); } + skb = dev_queue->gso_skb; + dev_queue->gso_skb = NULL; + + spin_unlock_bh(&dev_queue->lock); + + kfree_skb(skb); } void dev_deactivate(struct net_device *dev) { - struct sk_buff *skb; int running; - spin_lock_bh(&dev->tx_queue.lock); - dev_deactivate_queue(dev, &dev->tx_queue, &noop_qdisc); - - skb = dev->gso_skb; - dev->gso_skb = NULL; - spin_unlock_bh(&dev->tx_queue.lock); - - kfree_skb(skb); + dev_deactivate_queue(&dev->tx_queue, &noop_qdisc); dev_watchdog_down(dev); -- cgit v1.2.3-18-g5258 From 86d804e10a37cd86f16bf72386c37e843a98a74b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 23:11:25 -0700 Subject: netdev: Make netif_schedule() routines work with netdev_queue objects. Only plain netif_schedule() remains taking a net_device, mostly as a compatability item while we transition the rest of these interfaces. Everything else calls netif_schedule_queue() or __netif_schedule(), both of which take a netdev_queue pointer. Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net/sched/sch_generic.c') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 8247a406a40..407dfdb142a 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -62,7 +62,7 @@ static inline int qdisc_qlen(struct Qdisc *q) return q->q.qlen; } -static inline int dev_requeue_skb(struct sk_buff *skb, struct net_device *dev, +static inline int dev_requeue_skb(struct sk_buff *skb, struct netdev_queue *dev_queue, struct Qdisc *q) { @@ -71,7 +71,7 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct net_device *dev, else q->ops->requeue(skb, q); - netif_schedule(dev); + netif_schedule_queue(dev_queue); return 0; } @@ -114,7 +114,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, * some time. */ __get_cpu_var(netdev_rx_stat).cpu_collision++; - ret = dev_requeue_skb(skb, dev, dev_queue, q); + ret = dev_requeue_skb(skb, dev_queue, q); } return ret; @@ -179,7 +179,7 @@ static inline int qdisc_restart(struct net_device *dev) printk(KERN_WARNING "BUG %s code %d qlen %d\n", dev->name, ret, q->q.qlen); - ret = dev_requeue_skb(skb, dev, txq, q); + ret = dev_requeue_skb(skb, txq, q); break; } @@ -200,7 +200,7 @@ void __qdisc_run(struct net_device *dev) * 2. we've been doing it for too long. */ if (need_resched() || jiffies != start_time) { - netif_schedule(dev); + netif_schedule_queue(&dev->tx_queue); break; } } -- cgit v1.2.3-18-g5258 From eb6aafe3f843cb0e939546c03540a3b4911b6964 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 23:12:38 -0700 Subject: pkt_sched: Make qdisc_run take a netdev_queue. This allows us to use this calling convention all the way down into qdisc_restart(). Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'net/sched/sch_generic.c') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 407dfdb142a..fcc7533f0bc 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -75,9 +75,8 @@ static inline int dev_requeue_skb(struct sk_buff *skb, return 0; } -static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev, - struct netdev_queue *dev_queue, - struct Qdisc *q) +static inline struct sk_buff *dequeue_skb(struct netdev_queue *dev_queue, + struct Qdisc *q) { struct sk_buff *skb; @@ -90,10 +89,10 @@ static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev, } static inline int handle_dev_cpu_collision(struct sk_buff *skb, - struct net_device *dev, struct netdev_queue *dev_queue, struct Qdisc *q) { + struct net_device *dev = dev_queue->dev; int ret; if (unlikely(dev->xmit_lock_owner == smp_processor_id())) { @@ -139,21 +138,23 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, * >0 - queue is not empty. * */ -static inline int qdisc_restart(struct net_device *dev) +static inline int qdisc_restart(struct netdev_queue *txq) { - struct netdev_queue *txq = &dev->tx_queue; struct Qdisc *q = txq->qdisc; - struct sk_buff *skb; int ret = NETDEV_TX_BUSY; + struct net_device *dev; + struct sk_buff *skb; /* Dequeue packet */ - if (unlikely((skb = dev_dequeue_skb(dev, txq, q)) == NULL)) + if (unlikely((skb = dequeue_skb(txq, q)) == NULL)) return 0; /* And release queue */ spin_unlock(&txq->lock); + dev = txq->dev; + HARD_TX_LOCK(dev, smp_processor_id()); if (!netif_subqueue_stopped(dev, skb)) ret = dev_hard_start_xmit(skb, dev); @@ -170,7 +171,7 @@ static inline int qdisc_restart(struct net_device *dev) case NETDEV_TX_LOCKED: /* Driver try lock failed */ - ret = handle_dev_cpu_collision(skb, dev, txq, q); + ret = handle_dev_cpu_collision(skb, txq, q); break; default: @@ -186,11 +187,12 @@ static inline int qdisc_restart(struct net_device *dev) return ret; } -void __qdisc_run(struct net_device *dev) +void __qdisc_run(struct netdev_queue *txq) { + struct net_device *dev = txq->dev; unsigned long start_time = jiffies; - while (qdisc_restart(dev)) { + while (qdisc_restart(txq)) { if (netif_queue_stopped(dev)) break; @@ -200,7 +202,7 @@ void __qdisc_run(struct net_device *dev) * 2. we've been doing it for too long. */ if (need_resched() || jiffies != start_time) { - netif_schedule_queue(&dev->tx_queue); + netif_schedule_queue(txq); break; } } -- cgit v1.2.3-18-g5258 From c773e847ea8f6812804e40f52399c6921a00eab1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 23:13:53 -0700 Subject: netdev: Move _xmit_lock and xmit_lock_owner into netdev_queue. Accesses are mostly structured such that when there are multiple TX queues the code transformations will be a little bit simpler. Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net/sched/sch_generic.c') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index fcc7533f0bc..b6a36d39466 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -92,10 +92,9 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, struct netdev_queue *dev_queue, struct Qdisc *q) { - struct net_device *dev = dev_queue->dev; int ret; - if (unlikely(dev->xmit_lock_owner == smp_processor_id())) { + if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) { /* * Same CPU holding the lock. It may be a transient * configuration error, when hard_start_xmit() recurses. We @@ -105,7 +104,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, kfree_skb(skb); if (net_ratelimit()) printk(KERN_WARNING "Dead loop on netdevice %s, " - "fix it urgently!\n", dev->name); + "fix it urgently!\n", dev_queue->dev->name); ret = qdisc_qlen(q); } else { /* @@ -155,10 +154,10 @@ static inline int qdisc_restart(struct netdev_queue *txq) dev = txq->dev; - HARD_TX_LOCK(dev, smp_processor_id()); + HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_subqueue_stopped(dev, skb)) ret = dev_hard_start_xmit(skb, dev); - HARD_TX_UNLOCK(dev); + HARD_TX_UNLOCK(dev, txq); spin_lock(&txq->lock); q = txq->qdisc; -- cgit v1.2.3-18-g5258 From 79d16385c7f287a33ea771c4dbe60ae43f791b49 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Jul 2008 23:14:46 -0700 Subject: netdev: Move atomic queue state bits into netdev_queue. Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'net/sched/sch_generic.c') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index b6a36d39466..243de935b18 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -121,9 +121,9 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, /* * NOTE: Called under queue->lock with locally disabled BH. * - * __LINK_STATE_QDISC_RUNNING guarantees only one CPU can process this - * device at a time. queue->lock serializes queue accesses for - * this device AND txq->qdisc pointer itself. + * __QUEUE_STATE_QDISC_RUNNING guarantees only one CPU can process + * this queue at a time. queue->lock serializes queue accesses for + * this queue AND txq->qdisc pointer itself. * * netif_tx_lock serializes accesses to device driver. * @@ -206,7 +206,7 @@ void __qdisc_run(struct netdev_queue *txq) } } - clear_bit(__LINK_STATE_QDISC_RUNNING, &dev->state); + clear_bit(__QUEUE_STATE_QDISC_RUNNING, &txq->state); } static void dev_watchdog(unsigned long arg) @@ -605,9 +605,10 @@ static void dev_deactivate_queue(struct netdev_queue *dev_queue, void dev_deactivate(struct net_device *dev) { + struct netdev_queue *dev_queue = &dev->tx_queue; int running; - dev_deactivate_queue(&dev->tx_queue, &noop_qdisc); + dev_deactivate_queue(dev_queue, &noop_qdisc); dev_watchdog_down(dev); @@ -616,16 +617,17 @@ void dev_deactivate(struct net_device *dev) /* Wait for outstanding qdisc_run calls. */ do { - while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state)) + while (test_bit(__QUEUE_STATE_QDISC_RUNNING, &dev_queue->state)) yield(); /* * Double-check inside queue lock to ensure that all effects * of the queue run are visible when we return. */ - spin_lock_bh(&dev->tx_queue.lock); - running = test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state); - spin_unlock_bh(&dev->tx_queue.lock); + spin_lock_bh(&dev_queue->lock); + running = test_bit(__QUEUE_STATE_QDISC_RUNNING, + &dev_queue->state); + spin_unlock_bh(&dev_queue->lock); /* * The running flag should never be set at this point because -- cgit v1.2.3-18-g5258 From e8a0464cc950972824e2e128028ae3db666ec1ed Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 17 Jul 2008 00:34:19 -0700 Subject: netdev: Allocate multiple queues for TX. alloc_netdev_mq() now allocates an array of netdev_queue structures for TX, based upon the queue_count argument. Furthermore, all accesses to the TX queues are now vectored through the netdev_get_tx_queue() and netdev_for_each_tx_queue() interfaces. This makes it easy to grep the tree for all things that want to get to a TX queue of a net device. Problem spots which are not really multiqueue aware yet, and only work with one queue, can easily be spotted by grepping for all netdev_get_tx_queue() calls that pass in a zero index. Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 178 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 130 insertions(+), 48 deletions(-) (limited to 'net/sched/sch_generic.c') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 243de935b18..4e2b865cbba 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -40,20 +40,30 @@ */ void qdisc_lock_tree(struct net_device *dev) - __acquires(dev->tx_queue.lock) __acquires(dev->rx_queue.lock) { - spin_lock_bh(&dev->tx_queue.lock); + unsigned int i; + + local_bh_disable(); + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + spin_lock(&txq->lock); + } spin_lock(&dev->rx_queue.lock); } EXPORT_SYMBOL(qdisc_lock_tree); void qdisc_unlock_tree(struct net_device *dev) __releases(dev->rx_queue.lock) - __releases(dev->tx_queue.lock) { + unsigned int i; + spin_unlock(&dev->rx_queue.lock); - spin_unlock_bh(&dev->tx_queue.lock); + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + spin_unlock(&txq->lock); + } + local_bh_enable(); } EXPORT_SYMBOL(qdisc_unlock_tree); @@ -212,22 +222,37 @@ void __qdisc_run(struct netdev_queue *txq) static void dev_watchdog(unsigned long arg) { struct net_device *dev = (struct net_device *)arg; - struct netdev_queue *txq = &dev->tx_queue; netif_tx_lock(dev); - if (txq->qdisc != &noop_qdisc) { + if (!qdisc_tx_is_noop(dev)) { if (netif_device_present(dev) && netif_running(dev) && netif_carrier_ok(dev)) { - if (netif_queue_stopped(dev) && - time_after(jiffies, dev->trans_start + dev->watchdog_timeo)) { + int some_queue_stopped = 0; + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq; + + txq = netdev_get_tx_queue(dev, i); + if (netif_tx_queue_stopped(txq)) { + some_queue_stopped = 1; + break; + } + } - printk(KERN_INFO "NETDEV WATCHDOG: %s: transmit timed out\n", + if (some_queue_stopped && + time_after(jiffies, (dev->trans_start + + dev->watchdog_timeo))) { + printk(KERN_INFO "NETDEV WATCHDOG: %s: " + "transmit timed out\n", dev->name); dev->tx_timeout(dev); WARN_ON_ONCE(1); } - if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + dev->watchdog_timeo))) + if (!mod_timer(&dev->watchdog_timer, + round_jiffies(jiffies + + dev->watchdog_timeo))) dev_hold(dev); } } @@ -542,9 +567,55 @@ void qdisc_destroy(struct Qdisc *qdisc) } EXPORT_SYMBOL(qdisc_destroy); +static bool dev_all_qdisc_sleeping_noop(struct net_device *dev) +{ + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + + if (txq->qdisc_sleeping != &noop_qdisc) + return false; + } + return true; +} + +static void attach_one_default_qdisc(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_unused) +{ + struct Qdisc *qdisc; + + if (dev->tx_queue_len) { + qdisc = qdisc_create_dflt(dev, dev_queue, + &pfifo_fast_ops, TC_H_ROOT); + if (!qdisc) { + printk(KERN_INFO "%s: activation failed\n", dev->name); + return; + } + list_add_tail(&qdisc->list, &dev_queue->qdisc_list); + } else { + qdisc = &noqueue_qdisc; + } + dev_queue->qdisc_sleeping = qdisc; +} + +static void transition_one_qdisc(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_need_watchdog) +{ + int *need_watchdog_p = _need_watchdog; + + spin_lock_bh(&dev_queue->lock); + rcu_assign_pointer(dev_queue->qdisc, dev_queue->qdisc_sleeping); + if (dev_queue->qdisc != &noqueue_qdisc) + *need_watchdog_p = 1; + spin_unlock_bh(&dev_queue->lock); +} + void dev_activate(struct net_device *dev) { - struct netdev_queue *txq = &dev->tx_queue; + int need_watchdog; /* No queueing discipline is attached to device; create default one i.e. pfifo_fast for devices, @@ -552,39 +623,27 @@ void dev_activate(struct net_device *dev) virtual interfaces */ - if (txq->qdisc_sleeping == &noop_qdisc) { - struct Qdisc *qdisc; - if (dev->tx_queue_len) { - qdisc = qdisc_create_dflt(dev, txq, - &pfifo_fast_ops, - TC_H_ROOT); - if (qdisc == NULL) { - printk(KERN_INFO "%s: activation failed\n", dev->name); - return; - } - list_add_tail(&qdisc->list, &txq->qdisc_list); - } else { - qdisc = &noqueue_qdisc; - } - txq->qdisc_sleeping = qdisc; - } + if (dev_all_qdisc_sleeping_noop(dev)) + netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); if (!netif_carrier_ok(dev)) /* Delay activation until next carrier-on event */ return; - spin_lock_bh(&txq->lock); - rcu_assign_pointer(txq->qdisc, txq->qdisc_sleeping); - if (txq->qdisc != &noqueue_qdisc) { + need_watchdog = 0; + netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog); + + if (need_watchdog) { dev->trans_start = jiffies; dev_watchdog_up(dev); } - spin_unlock_bh(&txq->lock); } -static void dev_deactivate_queue(struct netdev_queue *dev_queue, - struct Qdisc *qdisc_default) +static void dev_deactivate_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_qdisc_default) { + struct Qdisc *qdisc_default = _qdisc_default; struct Qdisc *qdisc; struct sk_buff *skb; @@ -603,12 +662,35 @@ static void dev_deactivate_queue(struct netdev_queue *dev_queue, kfree_skb(skb); } +static bool some_qdisc_is_running(struct net_device *dev, int lock) +{ + unsigned int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *dev_queue; + int val; + + dev_queue = netdev_get_tx_queue(dev, i); + + if (lock) + spin_lock_bh(&dev_queue->lock); + + val = test_bit(__QUEUE_STATE_QDISC_RUNNING, &dev_queue->state); + + if (lock) + spin_unlock_bh(&dev_queue->lock); + + if (val) + return true; + } + return false; +} + void dev_deactivate(struct net_device *dev) { - struct netdev_queue *dev_queue = &dev->tx_queue; - int running; + bool running; - dev_deactivate_queue(dev_queue, &noop_qdisc); + netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc); dev_watchdog_down(dev); @@ -617,17 +699,14 @@ void dev_deactivate(struct net_device *dev) /* Wait for outstanding qdisc_run calls. */ do { - while (test_bit(__QUEUE_STATE_QDISC_RUNNING, &dev_queue->state)) + while (some_qdisc_is_running(dev, 0)) yield(); /* * Double-check inside queue lock to ensure that all effects * of the queue run are visible when we return. */ - spin_lock_bh(&dev_queue->lock); - running = test_bit(__QUEUE_STATE_QDISC_RUNNING, - &dev_queue->state); - spin_unlock_bh(&dev_queue->lock); + running = some_qdisc_is_running(dev, 1); /* * The running flag should never be set at this point because @@ -642,8 +721,10 @@ void dev_deactivate(struct net_device *dev) static void dev_init_scheduler_queue(struct net_device *dev, struct netdev_queue *dev_queue, - struct Qdisc *qdisc) + void *_qdisc) { + struct Qdisc *qdisc = _qdisc; + dev_queue->qdisc = qdisc; dev_queue->qdisc_sleeping = qdisc; INIT_LIST_HEAD(&dev_queue->qdisc_list); @@ -652,18 +733,19 @@ static void dev_init_scheduler_queue(struct net_device *dev, void dev_init_scheduler(struct net_device *dev) { qdisc_lock_tree(dev); - dev_init_scheduler_queue(dev, &dev->tx_queue, &noop_qdisc); + netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc); dev_init_scheduler_queue(dev, &dev->rx_queue, NULL); qdisc_unlock_tree(dev); setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev); } -static void dev_shutdown_scheduler_queue(struct net_device *dev, - struct netdev_queue *dev_queue, - struct Qdisc *qdisc_default) +static void shutdown_scheduler_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_qdisc_default) { struct Qdisc *qdisc = dev_queue->qdisc_sleeping; + struct Qdisc *qdisc_default = _qdisc_default; if (qdisc) { dev_queue->qdisc = qdisc_default; @@ -676,8 +758,8 @@ static void dev_shutdown_scheduler_queue(struct net_device *dev, void dev_shutdown(struct net_device *dev) { qdisc_lock_tree(dev); - dev_shutdown_scheduler_queue(dev, &dev->tx_queue, &noop_qdisc); - dev_shutdown_scheduler_queue(dev, &dev->rx_queue, NULL); + netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); + shutdown_scheduler_queue(dev, &dev->rx_queue, NULL); BUG_TRAP(!timer_pending(&dev->watchdog_timer)); qdisc_unlock_tree(dev); } -- cgit v1.2.3-18-g5258 From fd2ea0a79faad824258af5dcec1927aa24d81c16 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 17 Jul 2008 01:56:23 -0700 Subject: net: Use queue aware tests throughout. This effectively "flips the switch" by making the core networking and multiqueue-aware drivers use the new TX multiqueue structures. Non-multiqueue drivers need no changes. The interfaces they use such as netif_stop_queue() degenerate into an operation on TX queue zero. So everything "just works" for them. Code that really wants to do "X" to all TX queues now invokes a routine that does so, such as netif_tx_wake_all_queues(), netif_tx_stop_all_queues(), etc. pktgen and netpoll required a little bit more surgery than the others. In particular the pktgen changes, whilst functional, could be largely improved. The initial check in pktgen_xmit() will sometimes check the wrong queue, which is mostly harmless. The thing to do is probably to invoke fill_packet() earlier. The bulk of the netpoll changes is to make the code operate solely on the TX queue indicated by by the SKB queue mapping. Setting of the SKB queue mapping is entirely confined inside of net/core/dev.c:dev_pick_tx(). If we end up needing any kind of special semantics (drops, for example) it will be implemented here. Finally, we now have a "real_num_tx_queues" which is where the driver indicates how many TX queues are actually active. With IGB changes from Jeff Kirsher. Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net/sched/sch_generic.c') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 4e2b865cbba..2f575b9017d 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -166,7 +166,7 @@ static inline int qdisc_restart(struct netdev_queue *txq) HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_subqueue_stopped(dev, skb)) - ret = dev_hard_start_xmit(skb, dev); + ret = dev_hard_start_xmit(skb, dev, txq); HARD_TX_UNLOCK(dev, txq); spin_lock(&txq->lock); @@ -198,11 +198,10 @@ static inline int qdisc_restart(struct netdev_queue *txq) void __qdisc_run(struct netdev_queue *txq) { - struct net_device *dev = txq->dev; unsigned long start_time = jiffies; while (qdisc_restart(txq)) { - if (netif_queue_stopped(dev)) + if (netif_tx_queue_stopped(txq)) break; /* -- cgit v1.2.3-18-g5258 From d3b753db7c4f1f37a98b51974d484fda5d86dab5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 15 Jul 2008 20:14:35 -0700 Subject: pkt_sched: Move gso_skb into Qdisc. We liberate any dangling gso_skb during qdisc destruction. It really only matters for the root qdisc. But when qdiscs can be shared by multiple netdev_queue objects, we can't have the gso_skb in the netdev_queue any more. Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'net/sched/sch_generic.c') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 2f575b9017d..2bd75befa06 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -77,7 +77,7 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { if (unlikely(skb->next)) - dev_queue->gso_skb = skb; + q->gso_skb = skb; else q->ops->requeue(skb, q); @@ -85,13 +85,12 @@ static inline int dev_requeue_skb(struct sk_buff *skb, return 0; } -static inline struct sk_buff *dequeue_skb(struct netdev_queue *dev_queue, - struct Qdisc *q) +static inline struct sk_buff *dequeue_skb(struct Qdisc *q) { struct sk_buff *skb; - if ((skb = dev_queue->gso_skb)) - dev_queue->gso_skb = NULL; + if ((skb = q->gso_skb)) + q->gso_skb = NULL; else skb = q->dequeue(q); @@ -155,10 +154,9 @@ static inline int qdisc_restart(struct netdev_queue *txq) struct sk_buff *skb; /* Dequeue packet */ - if (unlikely((skb = dequeue_skb(txq, q)) == NULL)) + if (unlikely((skb = dequeue_skb(q)) == NULL)) return 0; - /* And release queue */ spin_unlock(&txq->lock); @@ -643,8 +641,8 @@ static void dev_deactivate_queue(struct net_device *dev, void *_qdisc_default) { struct Qdisc *qdisc_default = _qdisc_default; + struct sk_buff *skb = NULL; struct Qdisc *qdisc; - struct sk_buff *skb; spin_lock_bh(&dev_queue->lock); @@ -652,9 +650,10 @@ static void dev_deactivate_queue(struct net_device *dev, if (qdisc) { dev_queue->qdisc = qdisc_default; qdisc_reset(qdisc); + + skb = qdisc->gso_skb; + qdisc->gso_skb = NULL; } - skb = dev_queue->gso_skb; - dev_queue->gso_skb = NULL; spin_unlock_bh(&dev_queue->lock); -- cgit v1.2.3-18-g5258 From e2627c8c2241bce45e368e150654d076b58a4595 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 16 Jul 2008 00:56:32 -0700 Subject: pkt_sched: Make QDISC_RUNNING a qdisc state. Currently it is associated with a netdev_queue, but when we have qdisc sharing that no longer makes any sense. Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'net/sched/sch_generic.c') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 2bd75befa06..ac208c2b2d1 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -130,8 +130,8 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, /* * NOTE: Called under queue->lock with locally disabled BH. * - * __QUEUE_STATE_QDISC_RUNNING guarantees only one CPU can process - * this queue at a time. queue->lock serializes queue accesses for + * __QDISC_STATE_RUNNING guarantees only one CPU can process + * this qdisc at a time. queue->lock serializes queue accesses for * this queue AND txq->qdisc pointer itself. * * netif_tx_lock serializes accesses to device driver. @@ -146,9 +146,9 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, * >0 - queue is not empty. * */ -static inline int qdisc_restart(struct netdev_queue *txq) +static inline int qdisc_restart(struct netdev_queue *txq, + struct Qdisc *q) { - struct Qdisc *q = txq->qdisc; int ret = NETDEV_TX_BUSY; struct net_device *dev; struct sk_buff *skb; @@ -168,7 +168,6 @@ static inline int qdisc_restart(struct netdev_queue *txq) HARD_TX_UNLOCK(dev, txq); spin_lock(&txq->lock); - q = txq->qdisc; switch (ret) { case NETDEV_TX_OK: @@ -197,8 +196,9 @@ static inline int qdisc_restart(struct netdev_queue *txq) void __qdisc_run(struct netdev_queue *txq) { unsigned long start_time = jiffies; + struct Qdisc *q = txq->qdisc; - while (qdisc_restart(txq)) { + while (qdisc_restart(txq, q)) { if (netif_tx_queue_stopped(txq)) break; @@ -213,7 +213,7 @@ void __qdisc_run(struct netdev_queue *txq) } } - clear_bit(__QUEUE_STATE_QDISC_RUNNING, &txq->state); + clear_bit(__QDISC_STATE_RUNNING, &q->state); } static void dev_watchdog(unsigned long arg) @@ -666,14 +666,16 @@ static bool some_qdisc_is_running(struct net_device *dev, int lock) for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *dev_queue; + struct Qdisc *q; int val; dev_queue = netdev_get_tx_queue(dev, i); + q = dev_queue->qdisc; if (lock) spin_lock_bh(&dev_queue->lock); - val = test_bit(__QUEUE_STATE_QDISC_RUNNING, &dev_queue->state); + val = test_bit(__QDISC_STATE_RUNNING, &q->state); if (lock) spin_unlock_bh(&dev_queue->lock); -- cgit v1.2.3-18-g5258 From 7698b4fcabcd790efc4f226bada1e7b5870653af Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 16 Jul 2008 01:42:40 -0700 Subject: pkt_sched: Add and use qdisc_root() and qdisc_root_lock(). When code wants to lock the qdisc tree state, the logic operation it's doing is locking the top-level qdisc that sits of the root of the netdev_queue. Add qdisc_root_lock() to represent this and convert the easiest cases. In order for this to work out in all cases, we have to hook up the noop_qdisc to a dummy netdev_queue. Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'net/sched/sch_generic.c') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index ac208c2b2d1..739a8711ab3 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -151,14 +151,17 @@ static inline int qdisc_restart(struct netdev_queue *txq, { int ret = NETDEV_TX_BUSY; struct net_device *dev; + spinlock_t *root_lock; struct sk_buff *skb; /* Dequeue packet */ if (unlikely((skb = dequeue_skb(q)) == NULL)) return 0; - /* And release queue */ - spin_unlock(&txq->lock); + root_lock = qdisc_root_lock(q); + + /* And release qdisc */ + spin_unlock(root_lock); dev = txq->dev; @@ -167,7 +170,7 @@ static inline int qdisc_restart(struct netdev_queue *txq, ret = dev_hard_start_xmit(skb, dev, txq); HARD_TX_UNLOCK(dev, txq); - spin_lock(&txq->lock); + spin_lock(root_lock); switch (ret) { case NETDEV_TX_OK: @@ -345,12 +348,18 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = { .owner = THIS_MODULE, }; +static struct netdev_queue noop_netdev_queue = { + .lock = __SPIN_LOCK_UNLOCKED(noop_netdev_queue.lock), + .qdisc = &noop_qdisc, +}; + struct Qdisc noop_qdisc = { .enqueue = noop_enqueue, .dequeue = noop_dequeue, .flags = TCQ_F_BUILTIN, .ops = &noop_qdisc_ops, .list = LIST_HEAD_INIT(noop_qdisc.list), + .dev_queue = &noop_netdev_queue, }; EXPORT_SYMBOL(noop_qdisc); @@ -666,19 +675,21 @@ static bool some_qdisc_is_running(struct net_device *dev, int lock) for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *dev_queue; + spinlock_t *root_lock; struct Qdisc *q; int val; dev_queue = netdev_get_tx_queue(dev, i); q = dev_queue->qdisc; + root_lock = qdisc_root_lock(q); if (lock) - spin_lock_bh(&dev_queue->lock); + spin_lock_bh(root_lock); val = test_bit(__QDISC_STATE_RUNNING, &q->state); if (lock) - spin_unlock_bh(&dev_queue->lock); + spin_unlock_bh(root_lock); if (val) return true; -- cgit v1.2.3-18-g5258 From 37437bb2e1ae8af470dfcd5b4ff454110894ccaf Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 16 Jul 2008 02:15:04 -0700 Subject: pkt_sched: Schedule qdiscs instead of netdev_queue. When we have shared qdiscs, packets come out of the qdiscs for multiple transmit queues. Therefore it doesn't make any sense to schedule the transmit queue when logically we cannot know ahead of time the TX queue of the SKB that the qdisc->dequeue() will give us. Just for sanity I added a BUG check to make sure we never get into a state where the noop_qdisc is scheduled. Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) (limited to 'net/sched/sch_generic.c') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 739a8711ab3..dd5c4e70abe 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -72,16 +72,14 @@ static inline int qdisc_qlen(struct Qdisc *q) return q->q.qlen; } -static inline int dev_requeue_skb(struct sk_buff *skb, - struct netdev_queue *dev_queue, - struct Qdisc *q) +static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { if (unlikely(skb->next)) q->gso_skb = skb; else q->ops->requeue(skb, q); - netif_schedule_queue(dev_queue); + __netif_schedule(q); return 0; } @@ -121,7 +119,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, * some time. */ __get_cpu_var(netdev_rx_stat).cpu_collision++; - ret = dev_requeue_skb(skb, dev_queue, q); + ret = dev_requeue_skb(skb, q); } return ret; @@ -146,9 +144,9 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, * >0 - queue is not empty. * */ -static inline int qdisc_restart(struct netdev_queue *txq, - struct Qdisc *q) +static inline int qdisc_restart(struct Qdisc *q) { + struct netdev_queue *txq; int ret = NETDEV_TX_BUSY; struct net_device *dev; spinlock_t *root_lock; @@ -163,7 +161,8 @@ static inline int qdisc_restart(struct netdev_queue *txq, /* And release qdisc */ spin_unlock(root_lock); - dev = txq->dev; + dev = qdisc_dev(q); + txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_subqueue_stopped(dev, skb)) @@ -189,29 +188,28 @@ static inline int qdisc_restart(struct netdev_queue *txq, printk(KERN_WARNING "BUG %s code %d qlen %d\n", dev->name, ret, q->q.qlen); - ret = dev_requeue_skb(skb, txq, q); + ret = dev_requeue_skb(skb, q); break; } + if (ret && netif_tx_queue_stopped(txq)) + ret = 0; + return ret; } -void __qdisc_run(struct netdev_queue *txq) +void __qdisc_run(struct Qdisc *q) { unsigned long start_time = jiffies; - struct Qdisc *q = txq->qdisc; - - while (qdisc_restart(txq, q)) { - if (netif_tx_queue_stopped(txq)) - break; + while (qdisc_restart(q)) { /* * Postpone processing if * 1. another process needs the CPU; * 2. we've been doing it for too long. */ if (need_resched() || jiffies != start_time) { - netif_schedule_queue(txq); + __netif_schedule(q); break; } } -- cgit v1.2.3-18-g5258 From 16361127ebed0fb8f9d7cc94c6e137eaf710f676 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 16 Jul 2008 02:23:17 -0700 Subject: pkt_sched: dev_init_scheduler() does not need to lock qdisc tree. We are registering the device, there is no way anyone can get at this object's qdiscs yet in any meaningful way. Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/sched/sch_generic.c') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index dd5c4e70abe..7e078c59319 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -741,10 +741,8 @@ static void dev_init_scheduler_queue(struct net_device *dev, void dev_init_scheduler(struct net_device *dev) { - qdisc_lock_tree(dev); netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc); dev_init_scheduler_queue(dev, &dev->rx_queue, NULL); - qdisc_unlock_tree(dev); setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev); } -- cgit v1.2.3-18-g5258 From 8a34c5dc3a7c6431f1cd94c0904be81b296e08ca Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 17 Jul 2008 00:47:45 -0700 Subject: pkt_sched: Perform bulk of qdisc destruction in RCU. This allows less strict control of access to the qdisc attached to a netdev_queue. It is even allowed to enqueue into a qdisc which is in the process of being destroyed. The RCU handler will toss out those packets. We will need this to handle sharing of a qdisc amongst multiple TX queues. In such a setup the lock has to be shared, so will be inside of the qdisc itself. At which point the netdev_queue lock cannot be used to hard synchronize access to the ->qdisc pointer. One operation we have to keep inside of qdisc_destroy() is the list deletion. It is the only piece of state visible after the RCU quiesce period, so we have to undo it early and under the appropriate locking. The operations in the RCU handler do not need any looking because the qdisc tree is no longer visible to anything at that point. Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'net/sched/sch_generic.c') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 7e078c59319..082db8abe70 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -545,6 +545,17 @@ EXPORT_SYMBOL(qdisc_reset); static void __qdisc_destroy(struct rcu_head *head) { struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu); + const struct Qdisc_ops *ops = qdisc->ops; + + gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); + if (ops->reset) + ops->reset(qdisc); + if (ops->destroy) + ops->destroy(qdisc); + + module_put(ops->owner); + dev_put(qdisc_dev(qdisc)); + kfree((char *) qdisc - qdisc->padded); } @@ -552,21 +563,12 @@ static void __qdisc_destroy(struct rcu_head *head) void qdisc_destroy(struct Qdisc *qdisc) { - const struct Qdisc_ops *ops = qdisc->ops; - if (qdisc->flags & TCQ_F_BUILTIN || !atomic_dec_and_test(&qdisc->refcnt)) return; list_del(&qdisc->list); - gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); - if (ops->reset) - ops->reset(qdisc); - if (ops->destroy) - ops->destroy(qdisc); - module_put(ops->owner); - dev_put(qdisc_dev(qdisc)); call_rcu(&qdisc->q_rcu, __qdisc_destroy); } EXPORT_SYMBOL(qdisc_destroy); -- cgit v1.2.3-18-g5258 From 17715e62a5e5c7224e5f906a4b8f9e5084100118 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 16 Jul 2008 02:36:04 -0700 Subject: pkt_sched: Use per-queue locking in shutdown_scheduler_queue. This eliminates another qdisc_lock_tree user. Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/sched/sch_generic.c') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 082db8abe70..efa418a1b34 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -757,18 +757,20 @@ static void shutdown_scheduler_queue(struct net_device *dev, struct Qdisc *qdisc_default = _qdisc_default; if (qdisc) { + spinlock_t *root_lock = qdisc_root_lock(qdisc); + dev_queue->qdisc = qdisc_default; dev_queue->qdisc_sleeping = qdisc_default; + spin_lock(root_lock); qdisc_destroy(qdisc); + spin_unlock(root_lock); } } void dev_shutdown(struct net_device *dev) { - qdisc_lock_tree(dev); netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); shutdown_scheduler_queue(dev, &dev->rx_queue, NULL); BUG_TRAP(!timer_pending(&dev->watchdog_timer)); - qdisc_unlock_tree(dev); } -- cgit v1.2.3-18-g5258 From ead81cc5fc6d996db6afb20f211241612610a07a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 17 Jul 2008 00:50:32 -0700 Subject: netdevice: Move qdisc_list back into net_device proper. And give it it's own lock. Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net/sched/sch_generic.c') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index efa418a1b34..8cdf0b4a6a5 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -563,11 +563,15 @@ static void __qdisc_destroy(struct rcu_head *head) void qdisc_destroy(struct Qdisc *qdisc) { + struct net_device *dev = qdisc_dev(qdisc); + if (qdisc->flags & TCQ_F_BUILTIN || !atomic_dec_and_test(&qdisc->refcnt)) return; + spin_lock_bh(&dev->qdisc_list_lock); list_del(&qdisc->list); + spin_unlock_bh(&dev->qdisc_list_lock); call_rcu(&qdisc->q_rcu, __qdisc_destroy); } @@ -599,7 +603,9 @@ static void attach_one_default_qdisc(struct net_device *dev, printk(KERN_INFO "%s: activation failed\n", dev->name); return; } - list_add_tail(&qdisc->list, &dev_queue->qdisc_list); + spin_lock_bh(&dev->qdisc_list_lock); + list_add_tail(&qdisc->list, &dev->qdisc_list); + spin_unlock_bh(&dev->qdisc_list_lock); } else { qdisc = &noqueue_qdisc; } @@ -738,7 +744,6 @@ static void dev_init_scheduler_queue(struct net_device *dev, dev_queue->qdisc = qdisc; dev_queue->qdisc_sleeping = qdisc; - INIT_LIST_HEAD(&dev_queue->qdisc_list); } void dev_init_scheduler(struct net_device *dev) -- cgit v1.2.3-18-g5258 From c7e4f3bbb4ba4e48ab3b529d5016e454cee1ccd6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 16 Jul 2008 03:22:39 -0700 Subject: pkt_sched: Kill qdisc_lock_tree and qdisc_unlock_tree. No longer used. Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 36 +++--------------------------------- 1 file changed, 3 insertions(+), 33 deletions(-) (limited to 'net/sched/sch_generic.c') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 8cdf0b4a6a5..3d53e92ad9c 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -29,44 +29,14 @@ /* Main transmission queue. */ /* Modifications to data participating in scheduling must be protected with - * queue->lock spinlock. + * qdisc_root_lock(qdisc) spinlock. * * The idea is the following: - * - enqueue, dequeue are serialized via top level device - * spinlock queue->lock. - * - ingress filtering is serialized via top level device - * spinlock dev->rx_queue.lock. + * - enqueue, dequeue are serialized via qdisc root lock + * - ingress filtering is also serialized via qdisc root lock * - updates to tree and tree walking are only done under the rtnl mutex. */ -void qdisc_lock_tree(struct net_device *dev) - __acquires(dev->rx_queue.lock) -{ - unsigned int i; - - local_bh_disable(); - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - spin_lock(&txq->lock); - } - spin_lock(&dev->rx_queue.lock); -} -EXPORT_SYMBOL(qdisc_lock_tree); - -void qdisc_unlock_tree(struct net_device *dev) - __releases(dev->rx_queue.lock) -{ - unsigned int i; - - spin_unlock(&dev->rx_queue.lock); - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - spin_unlock(&txq->lock); - } - local_bh_enable(); -} -EXPORT_SYMBOL(qdisc_unlock_tree); - static inline int qdisc_qlen(struct Qdisc *q) { return q->q.qlen; -- cgit v1.2.3-18-g5258 From 83874000929ed63aef30b44083a9f713135ff040 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 17 Jul 2008 00:53:03 -0700 Subject: pkt_sched: Kill netdev_queue lock. We can simply use the qdisc->q.lock for all of the qdisc tree synchronization. Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) (limited to 'net/sched/sch_generic.c') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 3d53e92ad9c..8fc580b3e17 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -96,15 +96,15 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, } /* - * NOTE: Called under queue->lock with locally disabled BH. + * NOTE: Called under qdisc_lock(q) with locally disabled BH. * * __QDISC_STATE_RUNNING guarantees only one CPU can process - * this qdisc at a time. queue->lock serializes queue accesses for - * this queue AND txq->qdisc pointer itself. + * this qdisc at a time. qdisc_lock(q) serializes queue accesses for + * this queue. * * netif_tx_lock serializes accesses to device driver. * - * queue->lock and netif_tx_lock are mutually exclusive, + * qdisc_lock(q) and netif_tx_lock are mutually exclusive, * if one is grabbed, another must be free. * * Note, that this procedure can be called by a watchdog timer @@ -317,7 +317,6 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = { }; static struct netdev_queue noop_netdev_queue = { - .lock = __SPIN_LOCK_UNLOCKED(noop_netdev_queue.lock), .qdisc = &noop_qdisc, }; @@ -327,6 +326,7 @@ struct Qdisc noop_qdisc = { .flags = TCQ_F_BUILTIN, .ops = &noop_qdisc_ops, .list = LIST_HEAD_INIT(noop_qdisc.list), + .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .dev_queue = &noop_netdev_queue, }; EXPORT_SYMBOL(noop_qdisc); @@ -498,7 +498,7 @@ errout: } EXPORT_SYMBOL(qdisc_create_dflt); -/* Under queue->lock and BH! */ +/* Under qdisc_root_lock(qdisc) and BH! */ void qdisc_reset(struct Qdisc *qdisc) { @@ -526,10 +526,12 @@ static void __qdisc_destroy(struct rcu_head *head) module_put(ops->owner); dev_put(qdisc_dev(qdisc)); + kfree_skb(qdisc->gso_skb); + kfree((char *) qdisc - qdisc->padded); } -/* Under queue->lock and BH! */ +/* Under qdisc_root_lock(qdisc) and BH! */ void qdisc_destroy(struct Qdisc *qdisc) { @@ -586,13 +588,12 @@ static void transition_one_qdisc(struct net_device *dev, struct netdev_queue *dev_queue, void *_need_watchdog) { + struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping; int *need_watchdog_p = _need_watchdog; - spin_lock_bh(&dev_queue->lock); - rcu_assign_pointer(dev_queue->qdisc, dev_queue->qdisc_sleeping); - if (dev_queue->qdisc != &noqueue_qdisc) + rcu_assign_pointer(dev_queue->qdisc, new_qdisc); + if (new_qdisc != &noqueue_qdisc) *need_watchdog_p = 1; - spin_unlock_bh(&dev_queue->lock); } void dev_activate(struct net_device *dev) @@ -629,19 +630,16 @@ static void dev_deactivate_queue(struct net_device *dev, struct sk_buff *skb = NULL; struct Qdisc *qdisc; - spin_lock_bh(&dev_queue->lock); - qdisc = dev_queue->qdisc; if (qdisc) { + spin_lock_bh(qdisc_lock(qdisc)); + dev_queue->qdisc = qdisc_default; qdisc_reset(qdisc); - skb = qdisc->gso_skb; - qdisc->gso_skb = NULL; + spin_unlock_bh(qdisc_lock(qdisc)); } - spin_unlock_bh(&dev_queue->lock); - kfree_skb(skb); } -- cgit v1.2.3-18-g5258 From a0c80b80e0fb48129e4e9d6a9ede914f9ff1850d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 17 Jul 2008 01:46:06 -0700 Subject: pkt_sched: Make default qdisc nonshared-multiqueue safe. Instead of 'pfifo_fast' we have just plain 'fifo_fast'. No priority queues, just a straight FIFO. This is necessary in order to legally have a seperate qdisc per queue in multi-TX-queue setups, and thus get full parallelization. Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 99 +++++++++++-------------------------------------- 1 file changed, 22 insertions(+), 77 deletions(-) (limited to 'net/sched/sch_generic.c') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 8fc580b3e17..e244c462e6b 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -349,99 +349,44 @@ static struct Qdisc noqueue_qdisc = { }; -static const u8 prio2band[TC_PRIO_MAX+1] = - { 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 }; - -/* 3-band FIFO queue: old style, but should be a bit faster than - generic prio+fifo combination. - */ - -#define PFIFO_FAST_BANDS 3 - -static inline struct sk_buff_head *prio2list(struct sk_buff *skb, - struct Qdisc *qdisc) -{ - struct sk_buff_head *list = qdisc_priv(qdisc); - return list + prio2band[skb->priority & TC_PRIO_MAX]; -} - -static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) +static int fifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) { - struct sk_buff_head *list = prio2list(skb, qdisc); + struct sk_buff_head *list = &qdisc->q; - if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) { - qdisc->q.qlen++; + if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) return __qdisc_enqueue_tail(skb, qdisc, list); - } return qdisc_drop(skb, qdisc); } -static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc) +static struct sk_buff *fifo_fast_dequeue(struct Qdisc* qdisc) { - int prio; - struct sk_buff_head *list = qdisc_priv(qdisc); + struct sk_buff_head *list = &qdisc->q; - for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) { - if (!skb_queue_empty(list + prio)) { - qdisc->q.qlen--; - return __qdisc_dequeue_head(qdisc, list + prio); - } - } + if (!skb_queue_empty(list)) + return __qdisc_dequeue_head(qdisc, list); return NULL; } -static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc) +static int fifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc) { - qdisc->q.qlen++; - return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc)); + return __qdisc_requeue(skb, qdisc, &qdisc->q); } -static void pfifo_fast_reset(struct Qdisc* qdisc) +static void fifo_fast_reset(struct Qdisc* qdisc) { - int prio; - struct sk_buff_head *list = qdisc_priv(qdisc); - - for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) - __qdisc_reset_queue(qdisc, list + prio); - + __qdisc_reset_queue(qdisc, &qdisc->q); qdisc->qstats.backlog = 0; - qdisc->q.qlen = 0; } -static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb) -{ - struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS }; - - memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1); - NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); - return skb->len; - -nla_put_failure: - return -1; -} - -static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) -{ - int prio; - struct sk_buff_head *list = qdisc_priv(qdisc); - - for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) - skb_queue_head_init(list + prio); - - return 0; -} - -static struct Qdisc_ops pfifo_fast_ops __read_mostly = { - .id = "pfifo_fast", - .priv_size = PFIFO_FAST_BANDS * sizeof(struct sk_buff_head), - .enqueue = pfifo_fast_enqueue, - .dequeue = pfifo_fast_dequeue, - .requeue = pfifo_fast_requeue, - .init = pfifo_fast_init, - .reset = pfifo_fast_reset, - .dump = pfifo_fast_dump, +static struct Qdisc_ops fifo_fast_ops __read_mostly = { + .id = "fifo_fast", + .priv_size = 0, + .enqueue = fifo_fast_enqueue, + .dequeue = fifo_fast_dequeue, + .requeue = fifo_fast_requeue, + .reset = fifo_fast_reset, .owner = THIS_MODULE, }; @@ -570,7 +515,7 @@ static void attach_one_default_qdisc(struct net_device *dev, if (dev->tx_queue_len) { qdisc = qdisc_create_dflt(dev, dev_queue, - &pfifo_fast_ops, TC_H_ROOT); + &fifo_fast_ops, TC_H_ROOT); if (!qdisc) { printk(KERN_INFO "%s: activation failed\n", dev->name); return; @@ -601,9 +546,9 @@ void dev_activate(struct net_device *dev) int need_watchdog; /* No queueing discipline is attached to device; - create default one i.e. pfifo_fast for devices, - which need queueing and noqueue_qdisc for - virtual interfaces + * create default one i.e. fifo_fast for devices, + * which need queueing and noqueue_qdisc for + * virtual interfaces. */ if (dev_all_qdisc_sleeping_noop(dev)) -- cgit v1.2.3-18-g5258 From 3072367300aa8c779e3a14ee8e89de079e90f3ad Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 18 Jul 2008 22:50:15 -0700 Subject: pkt_sched: Manage qdisc list inside of root qdisc. Idea is from Patrick McHardy. Instead of managing the list of qdiscs on the device level, manage it in the root qdisc of a netdev_queue. This solves all kinds of visibility issues during qdisc destruction. The way to iterate over all qdiscs of a netdev_queue is to visit the netdev_queue->qdisc, and then traverse it's list. The only special case is to ignore builting qdiscs at the root when dumping or doing a qdisc_lookup(). That was not needed previously because builtin qdiscs were not added to the device's qdisc_list. Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'net/sched/sch_generic.c') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index e244c462e6b..14cc443d049 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -480,15 +480,12 @@ static void __qdisc_destroy(struct rcu_head *head) void qdisc_destroy(struct Qdisc *qdisc) { - struct net_device *dev = qdisc_dev(qdisc); - if (qdisc->flags & TCQ_F_BUILTIN || !atomic_dec_and_test(&qdisc->refcnt)) return; - spin_lock_bh(&dev->qdisc_list_lock); - list_del(&qdisc->list); - spin_unlock_bh(&dev->qdisc_list_lock); + if (qdisc->parent) + list_del(&qdisc->list); call_rcu(&qdisc->q_rcu, __qdisc_destroy); } @@ -520,9 +517,6 @@ static void attach_one_default_qdisc(struct net_device *dev, printk(KERN_INFO "%s: activation failed\n", dev->name); return; } - spin_lock_bh(&dev->qdisc_list_lock); - list_add_tail(&qdisc->list, &dev->qdisc_list); - spin_unlock_bh(&dev->qdisc_list_lock); } else { qdisc = &noqueue_qdisc; } -- cgit v1.2.3-18-g5258 From 30ee42be00b7a50929a73cb617f70b1d3219eb69 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 18 Jul 2008 23:00:11 -0700 Subject: pkt_sched: Fix noqueue_qdisc initialization. Like noop_qdisc, it needs a dummy backpointer and explicit qdisc->q.lock initialization. Based upon a report by Stephen Hemminger. Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net/sched/sch_generic.c') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 14cc443d049..522a41a9f90 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -340,12 +340,19 @@ static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = { .owner = THIS_MODULE, }; +static struct Qdisc noqueue_qdisc; +static struct netdev_queue noqueue_netdev_queue = { + .qdisc = &noqueue_qdisc, +}; + static struct Qdisc noqueue_qdisc = { .enqueue = NULL, .dequeue = noop_dequeue, .flags = TCQ_F_BUILTIN, .ops = &noqueue_qdisc_ops, .list = LIST_HEAD_INIT(noqueue_qdisc.list), + .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), + .dev_queue = &noqueue_netdev_queue, }; -- cgit v1.2.3-18-g5258 From 175f9c1bba9b825d22b142d183c9e175488b260c Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sun, 20 Jul 2008 00:08:47 -0700 Subject: net_sched: Add size table for qdiscs Add size table functions for qdiscs and calculate packet size in qdisc_enqueue(). Based on patch by Patrick McHardy http://marc.info/?l=linux-netdev&m=115201979221729&w=2 Signed-off-by: Jussi Kivilinna Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/sched/sch_generic.c') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 522a41a9f90..27a51f04db4 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -469,6 +469,7 @@ static void __qdisc_destroy(struct rcu_head *head) struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu); const struct Qdisc_ops *ops = qdisc->ops; + qdisc_put_stab(qdisc->stab); gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); if (ops->reset) ops->reset(qdisc); -- cgit v1.2.3-18-g5258