diff options
author | Wei Liu <wei.liu2@citrix.com> | 2013-08-26 12:59:38 +0100 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-08-29 01:18:04 -0400 |
commit | b3f980bd827e6e81a050c518d60ed7811a83061d (patch) | |
tree | 2104c030c277c9eab340ab424fa669562fc0189a /drivers/net/xen-netback | |
parent | 43e9d1943278e96150b449ea1fa81f4ae27729d5 (diff) |
xen-netback: switch to NAPI + kthread 1:1 model
This patch implements 1:1 model netback. NAPI and kthread are utilized
to do the weight-lifting job:
- NAPI is used for guest side TX (host side RX)
- kthread is used for guest side RX (host side TX)
Xenvif and xen_netbk are made into one structure to reduce code size.
This model provides better scheduling fairness among vifs. It is also
prerequisite for implementing multiqueue for Xen netback.
Signed-off-by: Wei Liu <wei.liu2@citrix.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/xen-netback')
-rw-r--r-- | drivers/net/xen-netback/common.h | 132 | ||||
-rw-r--r-- | drivers/net/xen-netback/interface.c | 119 | ||||
-rw-r--r-- | drivers/net/xen-netback/netback.c | 607 |
3 files changed, 351 insertions, 507 deletions
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 8a4d77ee9c5..9c1f15872e1 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -45,31 +45,109 @@ #include <xen/grant_table.h> #include <xen/xenbus.h> -struct xen_netbk; +typedef unsigned int pending_ring_idx_t; +#define INVALID_PENDING_RING_IDX (~0U) + +/* For the head field in pending_tx_info: it is used to indicate + * whether this tx info is the head of one or more coalesced requests. + * + * When head != INVALID_PENDING_RING_IDX, it means the start of a new + * tx requests queue and the end of previous queue. + * + * An example sequence of head fields (I = INVALID_PENDING_RING_IDX): + * + * ...|0 I I I|5 I|9 I I I|... + * -->|<-INUSE---------------- + * + * After consuming the first slot(s) we have: + * + * ...|V V V V|5 I|9 I I I|... + * -----FREE->|<-INUSE-------- + * + * where V stands for "valid pending ring index". Any number other + * than INVALID_PENDING_RING_IDX is OK. These entries are considered + * free and can contain any number other than + * INVALID_PENDING_RING_IDX. In practice we use 0. + * + * The in use non-INVALID_PENDING_RING_IDX (say 0, 5 and 9 in the + * above example) number is the index into pending_tx_info and + * mmap_pages arrays. + */ +struct pending_tx_info { + struct xen_netif_tx_request req; /* coalesced tx request */ + pending_ring_idx_t head; /* head != INVALID_PENDING_RING_IDX + * if it is head of one or more tx + * reqs + */ +}; + +#define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE) +#define XEN_NETIF_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE) + +struct xenvif_rx_meta { + int id; + int size; + int gso_size; +}; + +/* Discriminate from any valid pending_idx value. */ +#define INVALID_PENDING_IDX 0xFFFF + +#define MAX_BUFFER_OFFSET PAGE_SIZE + +#define MAX_PENDING_REQS 256 struct xenvif { /* Unique identifier for this interface. */ domid_t domid; unsigned int handle; - /* Reference to netback processing backend. */ - struct xen_netbk *netbk; + /* Use NAPI for guest TX */ + struct napi_struct napi; + /* When feature-split-event-channels = 0, tx_irq = rx_irq. */ + unsigned int tx_irq; + /* Only used when feature-split-event-channels = 1 */ + char tx_irq_name[IFNAMSIZ+4]; /* DEVNAME-tx */ + struct xen_netif_tx_back_ring tx; + struct sk_buff_head tx_queue; + struct page *mmap_pages[MAX_PENDING_REQS]; + pending_ring_idx_t pending_prod; + pending_ring_idx_t pending_cons; + u16 pending_ring[MAX_PENDING_REQS]; + struct pending_tx_info pending_tx_info[MAX_PENDING_REQS]; + + /* Coalescing tx requests before copying makes number of grant + * copy ops greater or equal to number of slots required. In + * worst case a tx request consumes 2 gnttab_copy. + */ + struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS]; - u8 fe_dev_addr[6]; + /* Use kthread for guest RX */ + struct task_struct *task; + wait_queue_head_t wq; /* When feature-split-event-channels = 0, tx_irq = rx_irq. */ - unsigned int tx_irq; unsigned int rx_irq; /* Only used when feature-split-event-channels = 1 */ - char tx_irq_name[IFNAMSIZ+4]; /* DEVNAME-tx */ char rx_irq_name[IFNAMSIZ+4]; /* DEVNAME-rx */ + struct xen_netif_rx_back_ring rx; + struct sk_buff_head rx_queue; - /* List of frontends to notify after a batch of frames sent. */ - struct list_head notify_list; + /* Allow xenvif_start_xmit() to peek ahead in the rx request + * ring. This is a prediction of what rx_req_cons will be + * once all queued skbs are put on the ring. + */ + RING_IDX rx_req_cons_peek; + + /* Given MAX_BUFFER_OFFSET of 4096 the worst case is that each + * head/fragment page uses 2 copy operations because it + * straddles two buffers in the frontend. + */ + struct gnttab_copy grant_copy_op[2*XEN_NETIF_RX_RING_SIZE]; + struct xenvif_rx_meta meta[2*XEN_NETIF_RX_RING_SIZE]; - /* The shared rings and indexes. */ - struct xen_netif_tx_back_ring tx; - struct xen_netif_rx_back_ring rx; + + u8 fe_dev_addr[6]; /* Frontend feature information. */ u8 can_sg:1; @@ -80,13 +158,6 @@ struct xenvif { /* Internal feature information. */ u8 can_queue:1; /* can queue packets for receiver? */ - /* - * Allow xenvif_start_xmit() to peek ahead in the rx request - * ring. This is a prediction of what rx_req_cons will be - * once all queued skbs are put on the ring. - */ - RING_IDX rx_req_cons_peek; - /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */ unsigned long credit_bytes; unsigned long credit_usec; @@ -97,11 +168,7 @@ struct xenvif { unsigned long rx_gso_checksum_fixup; /* Miscellaneous private stuff. */ - struct list_head schedule_list; - atomic_t refcnt; struct net_device *dev; - - wait_queue_head_t waiting_to_free; }; static inline struct xenbus_device *xenvif_to_xenbus_device(struct xenvif *vif) @@ -109,9 +176,6 @@ static inline struct xenbus_device *xenvif_to_xenbus_device(struct xenvif *vif) return to_xenbus_device(vif->dev->dev.parent); } -#define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE) -#define XEN_NETIF_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE) - struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, unsigned int handle); @@ -121,9 +185,6 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, unsigned int rx_evtchn); void xenvif_disconnect(struct xenvif *vif); -void xenvif_get(struct xenvif *vif); -void xenvif_put(struct xenvif *vif); - int xenvif_xenbus_init(void); void xenvif_xenbus_fini(void); @@ -139,18 +200,8 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref); -/* (De)Register a xenvif with the netback backend. */ -void xen_netbk_add_xenvif(struct xenvif *vif); -void xen_netbk_remove_xenvif(struct xenvif *vif); - -/* (De)Schedule backend processing for a xenvif */ -void xen_netbk_schedule_xenvif(struct xenvif *vif); -void xen_netbk_deschedule_xenvif(struct xenvif *vif); - /* Check for SKBs from frontend and schedule backend processing */ void xen_netbk_check_rx_xenvif(struct xenvif *vif); -/* Receive an SKB from the frontend */ -void xenvif_receive_skb(struct xenvif *vif, struct sk_buff *skb); /* Queue an SKB for transmission to the frontend */ void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb); @@ -163,6 +214,11 @@ void xenvif_carrier_off(struct xenvif *vif); /* Returns number of ring slots required to send an skb to the frontend */ unsigned int xen_netbk_count_skb_slots(struct xenvif *vif, struct sk_buff *skb); +int xen_netbk_tx_action(struct xenvif *vif, int budget); +void xen_netbk_rx_action(struct xenvif *vif); + +int xen_netbk_kthread(void *data); + extern bool separate_tx_rx_irq; #endif /* __XEN_NETBACK__COMMON_H__ */ diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 087d2db0389..44d6b707c77 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -30,6 +30,7 @@ #include "common.h" +#include <linux/kthread.h> #include <linux/ethtool.h> #include <linux/rtnetlink.h> #include <linux/if_vlan.h> @@ -38,17 +39,7 @@ #include <asm/xen/hypercall.h> #define XENVIF_QUEUE_LENGTH 32 - -void xenvif_get(struct xenvif *vif) -{ - atomic_inc(&vif->refcnt); -} - -void xenvif_put(struct xenvif *vif) -{ - if (atomic_dec_and_test(&vif->refcnt)) - wake_up(&vif->waiting_to_free); -} +#define XENVIF_NAPI_WEIGHT 64 int xenvif_schedulable(struct xenvif *vif) { @@ -64,21 +55,55 @@ static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id) { struct xenvif *vif = dev_id; - if (vif->netbk == NULL) - return IRQ_HANDLED; - - xen_netbk_schedule_xenvif(vif); + if (RING_HAS_UNCONSUMED_REQUESTS(&vif->tx)) + napi_schedule(&vif->napi); return IRQ_HANDLED; } +static int xenvif_poll(struct napi_struct *napi, int budget) +{ + struct xenvif *vif = container_of(napi, struct xenvif, napi); + int work_done; + + work_done = xen_netbk_tx_action(vif, budget); + + if (work_done < budget) { + int more_to_do = 0; + unsigned long flags; + + /* It is necessary to disable IRQ before calling + * RING_HAS_UNCONSUMED_REQUESTS. Otherwise we might + * lose event from the frontend. + * + * Consider: + * RING_HAS_UNCONSUMED_REQUESTS + * <frontend generates event to trigger napi_schedule> + * __napi_complete + * + * This handler is still in scheduled state so the + * event has no effect at all. After __napi_complete + * this handler is descheduled and cannot get + * scheduled again. We lose event in this case and the ring + * will be completely stalled. + */ + + local_irq_save(flags); + + RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do); + if (!more_to_do) + __napi_complete(napi); + + local_irq_restore(flags); + } + + return work_done; +} + static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id) { struct xenvif *vif = dev_id; - if (vif->netbk == NULL) - return IRQ_HANDLED; - if (xenvif_rx_schedulable(vif)) netif_wake_queue(vif->dev); @@ -99,7 +124,8 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) BUG_ON(skb->dev != dev); - if (vif->netbk == NULL) + /* Drop the packet if vif is not ready */ + if (vif->task == NULL) goto drop; /* Drop the packet if the target domain has no receive buffers. */ @@ -108,7 +134,6 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) /* Reserve ring slots for the worst-case number of fragments. */ vif->rx_req_cons_peek += xen_netbk_count_skb_slots(vif, skb); - xenvif_get(vif); if (vif->can_queue && xen_netbk_must_stop_queue(vif)) netif_stop_queue(dev); @@ -123,11 +148,6 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -void xenvif_receive_skb(struct xenvif *vif, struct sk_buff *skb) -{ - netif_rx_ni(skb); -} - void xenvif_notify_tx_completion(struct xenvif *vif) { if (netif_queue_stopped(vif->dev) && xenvif_rx_schedulable(vif)) @@ -142,7 +162,7 @@ static struct net_device_stats *xenvif_get_stats(struct net_device *dev) static void xenvif_up(struct xenvif *vif) { - xen_netbk_add_xenvif(vif); + napi_enable(&vif->napi); enable_irq(vif->tx_irq); if (vif->tx_irq != vif->rx_irq) enable_irq(vif->rx_irq); @@ -151,12 +171,11 @@ static void xenvif_up(struct xenvif *vif) static void xenvif_down(struct xenvif *vif) { + napi_disable(&vif->napi); disable_irq(vif->tx_irq); if (vif->tx_irq != vif->rx_irq) disable_irq(vif->rx_irq); del_timer_sync(&vif->credit_timeout); - xen_netbk_deschedule_xenvif(vif); - xen_netbk_remove_xenvif(vif); } static int xenvif_open(struct net_device *dev) @@ -272,11 +291,12 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, struct net_device *dev; struct xenvif *vif; char name[IFNAMSIZ] = {}; + int i; snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle); dev = alloc_netdev(sizeof(struct xenvif), name, ether_setup); if (dev == NULL) { - pr_warn("Could not allocate netdev\n"); + pr_warn("Could not allocate netdev for %s\n", name); return ERR_PTR(-ENOMEM); } @@ -285,14 +305,9 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, vif = netdev_priv(dev); vif->domid = domid; vif->handle = handle; - vif->netbk = NULL; vif->can_sg = 1; vif->csum = 1; - atomic_set(&vif->refcnt, 1); - init_waitqueue_head(&vif->waiting_to_free); vif->dev = dev; - INIT_LIST_HEAD(&vif->schedule_list); - INIT_LIST_HEAD(&vif->notify_list); vif->credit_bytes = vif->remaining_credit = ~0UL; vif->credit_usec = 0UL; @@ -307,6 +322,16 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, dev->tx_queue_len = XENVIF_QUEUE_LENGTH; + skb_queue_head_init(&vif->rx_queue); + skb_queue_head_init(&vif->tx_queue); + + vif->pending_cons = 0; + vif->pending_prod = MAX_PENDING_REQS; + for (i = 0; i < MAX_PENDING_REQS; i++) + vif->pending_ring[i] = i; + for (i = 0; i < MAX_PENDING_REQS; i++) + vif->mmap_pages[i] = NULL; + /* * Initialise a dummy MAC address. We choose the numerically * largest non-broadcast address to prevent the address getting @@ -316,6 +341,8 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, memset(dev->dev_addr, 0xFF, ETH_ALEN); dev->dev_addr[0] &= ~0x01; + netif_napi_add(dev, &vif->napi, xenvif_poll, XENVIF_NAPI_WEIGHT); + netif_carrier_off(dev); err = register_netdev(dev); @@ -377,7 +404,14 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, disable_irq(vif->rx_irq); } - xenvif_get(vif); + init_waitqueue_head(&vif->wq); + vif->task = kthread_create(xen_netbk_kthread, + (void *)vif, vif->dev->name); + if (IS_ERR(vif->task)) { + pr_warn("Could not allocate kthread for %s\n", vif->dev->name); + err = PTR_ERR(vif->task); + goto err_rx_unbind; + } rtnl_lock(); if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN) @@ -388,7 +422,13 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, xenvif_up(vif); rtnl_unlock(); + wake_up_process(vif->task); + return 0; + +err_rx_unbind: + unbind_from_irqhandler(vif->rx_irq, vif); + vif->rx_irq = 0; err_tx_unbind: unbind_from_irqhandler(vif->tx_irq, vif); vif->tx_irq = 0; @@ -408,7 +448,6 @@ void xenvif_carrier_off(struct xenvif *vif) if (netif_running(dev)) xenvif_down(vif); rtnl_unlock(); - xenvif_put(vif); } void xenvif_disconnect(struct xenvif *vif) @@ -422,9 +461,6 @@ void xenvif_disconnect(struct xenvif *vif) if (netif_carrier_ok(vif->dev)) xenvif_carrier_off(vif); - atomic_dec(&vif->refcnt); - wait_event(vif->waiting_to_free, atomic_read(&vif->refcnt) == 0); - if (vif->tx_irq) { if (vif->tx_irq == vif->rx_irq) unbind_from_irqhandler(vif->tx_irq, vif); @@ -438,6 +474,11 @@ void xenvif_disconnect(struct xenvif *vif) need_module_put = 1; } + if (vif->task) + kthread_stop(vif->task); + + netif_napi_del(&vif->napi); + unregister_netdev(vif->dev); xen_netbk_unmap_frontend_rings(vif); diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 91f163d03a4..44ccc674c02 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -70,116 +70,25 @@ module_param(fatal_skb_slots, uint, 0444); */ #define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN -typedef unsigned int pending_ring_idx_t; -#define INVALID_PENDING_RING_IDX (~0U) - -struct pending_tx_info { - struct xen_netif_tx_request req; /* coalesced tx request */ - struct xenvif *vif; - pending_ring_idx_t head; /* head != INVALID_PENDING_RING_IDX - * if it is head of one or more tx - * reqs - */ -}; - -struct netbk_rx_meta { - int id; - int size; - int gso_size; -}; - -#define MAX_PENDING_REQS 256 - -/* Discriminate from any valid pending_idx value. */ -#define INVALID_PENDING_IDX 0xFFFF - -#define MAX_BUFFER_OFFSET PAGE_SIZE - -struct xen_netbk { - wait_queue_head_t wq; - struct task_struct *task; - - struct sk_buff_head rx_queue; - struct sk_buff_head tx_queue; - - struct timer_list net_timer; - - struct page *mmap_pages[MAX_PENDING_REQS]; - - pending_ring_idx_t pending_prod; - pending_ring_idx_t pending_cons; - struct list_head net_schedule_list; - - /* Protect the net_schedule_list in netif. */ - spinlock_t net_schedule_list_lock; - - atomic_t netfront_count; - - struct pending_tx_info pending_tx_info[MAX_PENDING_REQS]; - /* Coalescing tx requests before copying makes number of grant - * copy ops greater or equal to number of slots required. In - * worst case a tx request consumes 2 gnttab_copy. - */ - struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS]; - - u16 pending_ring[MAX_PENDING_REQS]; - - /* - * Given MAX_BUFFER_OFFSET of 4096 the worst case is that each - * head/fragment page uses 2 copy operations because it - * straddles two buffers in the frontend. - */ - struct gnttab_copy grant_copy_op[2*XEN_NETIF_RX_RING_SIZE]; - struct netbk_rx_meta meta[2*XEN_NETIF_RX_RING_SIZE]; -}; - -static struct xen_netbk *xen_netbk; -static int xen_netbk_group_nr; - /* * If head != INVALID_PENDING_RING_IDX, it means this tx request is head of * one or more merged tx requests, otherwise it is the continuation of * previous tx request. */ -static inline int pending_tx_is_head(struct xen_netbk *netbk, RING_IDX idx) -{ - return netbk->pending_tx_info[idx].head != INVALID_PENDING_RING_IDX; -} - -void xen_netbk_add_xenvif(struct xenvif *vif) -{ - int i; - int min_netfront_count; - int min_group = 0; - struct xen_netbk *netbk; - - min_netfront_count = atomic_read(&xen_netbk[0].netfront_count); - for (i = 0; i < xen_netbk_group_nr; i++) { - int netfront_count = atomic_read(&xen_netbk[i].netfront_count); - if (netfront_count < min_netfront_count) { - min_group = i; - min_netfront_count = netfront_count; - } - } - - netbk = &xen_netbk[min_group]; - - vif->netbk = netbk; - atomic_inc(&netbk->netfront_count); -} - -void xen_netbk_remove_xenvif(struct xenvif *vif) +static inline int pending_tx_is_head(struct xenvif *vif, RING_IDX idx) { - struct xen_netbk *netbk = vif->netbk; - vif->netbk = NULL; - atomic_dec(&netbk->netfront_count); + return vif->pending_tx_info[idx].head != INVALID_PENDING_RING_IDX; } -static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx, +static void xen_netbk_idx_release(struct xenvif *vif, u16 pending_idx, u8 status); static void make_tx_response(struct xenvif *vif, struct xen_netif_tx_request *txp, s8 st); + +static inline int tx_work_todo(struct xenvif *vif); +static inline int rx_work_todo(struct xenvif *vif); + static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif, u16 id, s8 st, @@ -187,16 +96,16 @@ static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif, u16 size, u16 flags); -static inline unsigned long idx_to_pfn(struct xen_netbk *netbk, +static inline unsigned long idx_to_pfn(struct xenvif *vif, u16 idx) { - return page_to_pfn(netbk->mmap_pages[idx]); + return page_to_pfn(vif->mmap_pages[idx]); } -static inline unsigned long idx_to_kaddr(struct xen_netbk *netbk, +static inline unsigned long idx_to_kaddr(struct xenvif *vif, u16 idx) { - return (unsigned long)pfn_to_kaddr(idx_to_pfn(netbk, idx)); + return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx)); } /* @@ -224,15 +133,10 @@ static inline pending_ring_idx_t pending_index(unsigned i) return i & (MAX_PENDING_REQS-1); } -static inline pending_ring_idx_t nr_pending_reqs(struct xen_netbk *netbk) +static inline pending_ring_idx_t nr_pending_reqs(struct xenvif *vif) { return MAX_PENDING_REQS - - netbk->pending_prod + netbk->pending_cons; -} - -static void xen_netbk_kick_thread(struct xen_netbk *netbk) -{ - wake_up(&netbk->wq); + vif->pending_prod + vif->pending_cons; } static int max_required_rx_slots(struct xenvif *vif) @@ -364,15 +268,15 @@ struct netrx_pending_operations { unsigned copy_prod, copy_cons; unsigned meta_prod, meta_cons; struct gnttab_copy *copy; - struct netbk_rx_meta *meta; + struct xenvif_rx_meta *meta; int copy_off; grant_ref_t copy_gref; }; -static struct netbk_rx_meta *get_next_rx_buffer(struct xenvif *vif, - struct netrx_pending_operations *npo) +static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif *vif, + struct netrx_pending_operations *npo) { - struct netbk_rx_meta *meta; + struct xenvif_rx_meta *meta; struct xen_netif_rx_request *req; req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); @@ -398,7 +302,7 @@ static void netbk_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb, unsigned long offset, int *head) { struct gnttab_copy *copy_gop; - struct netbk_rx_meta *meta; + struct xenvif_rx_meta *meta; unsigned long bytes; /* Data must not cross a page boundary. */ @@ -434,15 +338,15 @@ static void netbk_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb, copy_gop = npo->copy + npo->copy_prod++; copy_gop->flags = GNTCOPY_dest_gref; + copy_gop->len = bytes; + copy_gop->source.domid = DOMID_SELF; copy_gop->source.u.gmfn = virt_to_mfn(page_address(page)); - copy_gop->source.offset = offset; - copy_gop->dest.domid = vif->domid; + copy_gop->dest.domid = vif->domid; copy_gop->dest.offset = npo->copy_off; copy_gop->dest.u.ref = npo->copy_gref; - copy_gop->len = bytes; npo->copy_off += bytes; meta->size += bytes; @@ -485,7 +389,7 @@ static int netbk_gop_skb(struct sk_buff *skb, int nr_frags = skb_shinfo(skb)->nr_frags; int i; struct xen_netif_rx_request *req; - struct netbk_rx_meta *meta; + struct xenvif_rx_meta *meta; unsigned char *data; int head = 1; int old_meta_prod; @@ -565,7 +469,7 @@ static int netbk_check_gop(struct xenvif *vif, int nr_meta_slots, } static void netbk_add_frag_responses(struct xenvif *vif, int status, - struct netbk_rx_meta *meta, + struct xenvif_rx_meta *meta, int nr_meta_slots) { int i; @@ -594,9 +498,13 @@ struct skb_cb_overlay { int meta_slots_used; }; -static void xen_netbk_rx_action(struct xen_netbk *netbk) +static void xen_netbk_kick_thread(struct xenvif *vif) +{ + wake_up(&vif->wq); +} + +void xen_netbk_rx_action(struct xenvif *vif) { - struct xenvif *vif = NULL, *tmp; s8 status; u16 flags; struct xen_netif_rx_response *resp; @@ -608,17 +516,18 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk) int count; unsigned long offset; struct skb_cb_overlay *sco; + int need_to_notify = 0; struct netrx_pending_operations npo = { - .copy = netbk->grant_copy_op, - .meta = netbk->meta, + .copy = vif->grant_copy_op, + .meta = vif->meta, }; skb_queue_head_init(&rxq); count = 0; - while ((skb = skb_dequeue(&netbk->rx_queue)) != NULL) { + while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) { vif = netdev_priv(skb->dev); nr_frags = skb_shinfo(skb)->nr_frags; @@ -635,27 +544,27 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk) break; } - BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta)); + BUG_ON(npo.meta_prod > ARRAY_SIZE(vif->meta)); if (!npo.copy_prod) return; - BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op)); - gnttab_batch_copy(netbk->grant_copy_op, npo.copy_prod); + BUG_ON(npo.copy_prod > ARRAY_SIZE(vif->grant_copy_op)); + gnttab_batch_copy(vif->grant_copy_op, npo.copy_prod); while ((skb = __skb_dequeue(&rxq)) != NULL) { sco = (struct skb_cb_overlay *)skb->cb; vif = netdev_priv(skb->dev); - if (netbk->meta[npo.meta_cons].gso_size && vif->gso_prefix) { + if (vif->meta[npo.meta_cons].gso_size && vif->gso_prefix) { resp = RING_GET_RESPONSE(&vif->rx, - vif->rx.rsp_prod_pvt++); + vif->rx.rsp_prod_pvt++); resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data; - resp->offset = netbk->meta[npo.meta_cons].gso_size; - resp->id = netbk->meta[npo.meta_cons].id; + resp->offset = vif->meta[npo.meta_cons].gso_size; + resp->id = vif->meta[npo.meta_cons].id; resp->status = sco->meta_slots_used; npo.meta_cons++; @@ -680,12 +589,12 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk) flags |= XEN_NETRXF_data_validated; offset = 0; - resp = make_rx_response(vif, netbk->meta[npo.meta_cons].id, + resp = make_rx_response(vif, vif->meta[npo.meta_cons].id, status, offset, - netbk->meta[npo.meta_cons].size, + vif->meta[npo.meta_cons].size, flags); - if (netbk->meta[npo.meta_cons].gso_size && !vif->gso_prefix) { + if (vif->meta[npo.meta_cons].gso_size && !vif->gso_prefix) { struct xen_netif_extra_info *gso = (struct xen_netif_extra_info *) RING_GET_RESPONSE(&vif->rx, @@ -693,7 +602,7 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk) resp->flags |= XEN_NETRXF_extra_info; - gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size; + gso->u.gso.size = vif->meta[npo.meta_cons].gso_size; gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; gso->u.gso.pad = 0; gso->u.gso.features = 0; @@ -703,112 +612,33 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk) } netbk_add_frag_responses(vif, status, - netbk->meta + npo.meta_cons + 1, + vif->meta + npo.meta_cons + 1, sco->meta_slots_used); RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret); + if (ret) + need_to_notify = 1; + xenvif_notify_tx_completion(vif); - if (ret && list_empty(&vif->notify_list)) - list_add_tail(&vif->notify_list, ¬ify); - else - xenvif_put(vif); npo.meta_cons += sco->meta_slots_used; dev_kfree_skb(skb); } - list_for_each_entry_safe(vif, tmp, ¬ify, notify_list) { + if (need_to_notify) notify_remote_via_irq(vif->rx_irq); - list_del_init(&vif->notify_list); - xenvif_put(vif); - } /* More work to do? */ - if (!skb_queue_empty(&netbk->rx_queue) && - !timer_pending(&netbk->net_timer)) - xen_netbk_kick_thread(netbk); + if (!skb_queue_empty(&vif->rx_queue)) + xen_netbk_kick_thread(vif); } void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb) { - struct xen_netbk *netbk = vif->netbk; + skb_queue_tail(&vif->rx_queue, skb); - skb_queue_tail(&netbk->rx_queue, skb); - - xen_netbk_kick_thread(netbk); -} - -static void xen_netbk_alarm(unsigned long data) -{ - struct xen_netbk *netbk = (struct xen_netbk *)data; - xen_netbk_kick_thread(netbk); -} - -static int __on_net_schedule_list(struct xenvif *vif) -{ - return !list_empty(&vif->schedule_list); -} - -/* Must be called with net_schedule_list_lock held */ -static void remove_from_net_schedule_list(struct xenvif *vif) -{ - if (likely(__on_net_schedule_list(vif))) { - list_del_init(&vif->schedule_list); - xenvif_put(vif); - } -} - -static struct xenvif *poll_net_schedule_list(struct xen_netbk *netbk) -{ - struct xenvif *vif = NULL; - - spin_lock_irq(&netbk->net_schedule_list_lock); - if (list_empty(&netbk->net_schedule_list)) - goto out; - - vif = list_first_entry(&netbk->net_schedule_list, - struct xenvif, schedule_list); - if (!vif) - goto out; - - xenvif_get(vif); - - remove_from_net_schedule_list(vif); -out: - spin_unlock_irq(&netbk->net_schedule_list_lock); - return vif; -} - -void xen_netbk_schedule_xenvif(struct xenvif *vif) -{ - unsigned long flags; - struct xen_netbk *netbk = vif->netbk; - - if (__on_net_schedule_list(vif)) - goto kick; - - spin_lock_irqsave(&netbk->net_schedule_list_lock, flags); - if (!__on_net_schedule_list(vif) && - likely(xenvif_schedulable(vif))) { - list_add_tail(&vif->schedule_list, &netbk->net_schedule_list); - xenvif_get(vif); - } - spin_unlock_irqrestore(&netbk->net_schedule_list_lock, flags); - -kick: - smp_mb(); - if ((nr_pending_reqs(netbk) < (MAX_PENDING_REQS/2)) && - !list_empty(&netbk->net_schedule_list)) - xen_netbk_kick_thread(netbk); -} - -void xen_netbk_deschedule_xenvif(struct xenvif *vif) -{ - struct xen_netbk *netbk = vif->netbk; - spin_lock_irq(&netbk->net_schedule_list_lock); - remove_from_net_schedule_list(vif); - spin_unlock_irq(&netbk->net_schedule_list_lock); + xen_netbk_kick_thread(vif); } void xen_netbk_check_rx_xenvif(struct xenvif *vif) @@ -818,7 +648,7 @@ void xen_netbk_check_rx_xenvif(struct xenvif *vif) RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do); if (more_to_do) - xen_netbk_schedule_xenvif(vif); + napi_schedule(&vif->napi); } static void tx_add_credit(struct xenvif *vif) @@ -860,15 +690,12 @@ static void netbk_tx_err(struct xenvif *vif, txp = RING_GET_REQUEST(&vif->tx, cons++); } while (1); vif->tx.req_cons = cons; - xen_netbk_check_rx_xenvif(vif); - xenvif_put(vif); } static void netbk_fatal_tx_err(struct xenvif *vif) { netdev_err(vif->dev, "fatal error; disabling device\n"); xenvif_carrier_off(vif); - xenvif_put(vif); } static int netbk_count_requests(struct xenvif *vif, @@ -969,19 +796,20 @@ static int netbk_count_requests(struct xenvif *vif, return slots; } -static struct page *xen_netbk_alloc_page(struct xen_netbk *netbk, +static struct page *xen_netbk_alloc_page(struct xenvif *vif, u16 pending_idx) { struct page *page; - page = alloc_page(GFP_KERNEL|__GFP_COLD); + + page = alloc_page(GFP_ATOMIC|__GFP_COLD); if (!page) return NULL; - netbk->mmap_pages[pending_idx] = page; + vif->mmap_pages[pending_idx] = page; + return page; } -static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk, - struct xenvif *vif, +static struct gnttab_copy *xen_netbk_get_requests(struct xenvif *vif, struct sk_buff *skb, struct xen_netif_tx_request *txp, struct gnttab_copy *gop) @@ -1012,9 +840,9 @@ static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk, for (shinfo->nr_frags = slot = start; slot < nr_slots; shinfo->nr_frags++) { struct pending_tx_info *pending_tx_info = - netbk->pending_tx_info; + vif->pending_tx_info; - page = alloc_page(GFP_KERNEL|__GFP_COLD); + page = alloc_page(GFP_ATOMIC|__GFP_COLD); if (!page) goto err; @@ -1049,21 +877,18 @@ static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk, gop->len = txp->size; dst_offset += gop->len; - index = pending_index(netbk->pending_cons++); + index = pending_index(vif->pending_cons++); - pending_idx = netbk->pending_ring[index]; + pending_idx = vif->pending_ring[index]; memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp)); - xenvif_get(vif); - - pending_tx_info[pending_idx].vif = vif; /* Poison these fields, corresponding * fields for head tx req will be set * to correct values after the loop. */ - netbk->mmap_pages[pending_idx] = (void *)(~0UL); + vif->mmap_pages[pending_idx] = (void *)(~0UL); pending_tx_info[pending_idx].head = INVALID_PENDING_RING_IDX; @@ -1083,7 +908,7 @@ static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk, first->req.offset = 0; first->req.size = dst_offset; first->head = start_idx; - netbk->mmap_pages[head_idx] = page; + vif->mmap_pages[head_idx] = page; frag_set_pending_idx(&frags[shinfo->nr_frags], head_idx); } @@ -1093,18 +918,18 @@ static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk, err: /* Unwind, freeing all pages and sending error responses. */ while (shinfo->nr_frags-- > start) { - xen_netbk_idx_release(netbk, + xen_netbk_idx_release(vif, frag_get_pending_idx(&frags[shinfo->nr_frags]), XEN_NETIF_RSP_ERROR); } /* The head too, if necessary. */ if (start) - xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_ERROR); + xen_netbk_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR); return NULL; } -static int xen_netbk_tx_check_gop(struct xen_netbk *netbk, +static int xen_netbk_tx_check_gop(struct xenvif *vif, struct sk_buff *skb, struct gnttab_copy **gopp) { @@ -1119,7 +944,7 @@ static int xen_netbk_tx_check_gop(struct xen_netbk *netbk, /* Check status of header. */ err = gop->status; if (unlikely(err)) - xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_ERROR); + xen_netbk_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR); /* Skip first skb fragment if it is on same page as header fragment. */ start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); @@ -1129,7 +954,7 @@ static int xen_netbk_tx_check_gop(struct xen_netbk *netbk, pending_ring_idx_t head; pending_idx = frag_get_pending_idx(&shinfo->frags[i]); - tx_info = &netbk->pending_tx_info[pending_idx]; + tx_info = &vif->pending_tx_info[pending_idx]; head = tx_info->head; /* Check error status: if okay then remember grant handle. */ @@ -1137,18 +962,19 @@ static int xen_netbk_tx_check_gop(struct xen_netbk *netbk, newerr = (++gop)->status; if (newerr) break; - peek = netbk->pending_ring[pending_index(++head)]; - } while (!pending_tx_is_head(netbk, peek)); + peek = vif->pending_ring[pending_index(++head)]; + } while (!pending_tx_is_head(vif, peek)); if (likely(!newerr)) { |