aboutsummaryrefslogtreecommitdiff
path: root/drivers/net/xen-netback/netback.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/xen-netback/netback.c')
-rw-r--r--drivers/net/xen-netback/netback.c316
1 files changed, 248 insertions, 68 deletions
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index cd49ba94963..a2865f17c66 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -47,11 +47,25 @@
#include <asm/xen/hypercall.h>
#include <asm/xen/page.h>
+/*
+ * This is the maximum slots a skb can have. If a guest sends a skb
+ * which exceeds this limit it is considered malicious.
+ */
+#define MAX_SKB_SLOTS_DEFAULT 20
+static unsigned int max_skb_slots = MAX_SKB_SLOTS_DEFAULT;
+module_param(max_skb_slots, uint, 0444);
+
+typedef unsigned int pending_ring_idx_t;
+#define INVALID_PENDING_RING_IDX (~0U)
+
struct pending_tx_info {
- struct xen_netif_tx_request req;
+ struct xen_netif_tx_request req; /* coalesced tx request */
struct xenvif *vif;
+ pending_ring_idx_t head; /* head != INVALID_PENDING_RING_IDX
+ * if it is head of one or more tx
+ * reqs
+ */
};
-typedef unsigned int pending_ring_idx_t;
struct netbk_rx_meta {
int id;
@@ -102,7 +116,11 @@ struct xen_netbk {
atomic_t netfront_count;
struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
- struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS];
+ /* Coalescing tx requests before copying makes number of grant
+ * copy ops greater or equal to number of slots required. In
+ * worst case a tx request consumes 2 gnttab_copy.
+ */
+ struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS];
u16 pending_ring[MAX_PENDING_REQS];
@@ -118,6 +136,16 @@ struct xen_netbk {
static struct xen_netbk *xen_netbk;
static int xen_netbk_group_nr;
+/*
+ * If head != INVALID_PENDING_RING_IDX, it means this tx request is head of
+ * one or more merged tx requests, otherwise it is the continuation of
+ * previous tx request.
+ */
+static inline int pending_tx_is_head(struct xen_netbk *netbk, RING_IDX idx)
+{
+ return netbk->pending_tx_info[idx].head != INVALID_PENDING_RING_IDX;
+}
+
void xen_netbk_add_xenvif(struct xenvif *vif)
{
int i;
@@ -250,6 +278,7 @@ static int max_required_rx_slots(struct xenvif *vif)
{
int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE);
+ /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */
if (vif->can_sg || vif->gso || vif->gso_prefix)
max += MAX_SKB_FRAGS + 1; /* extra_info + frags */
@@ -657,6 +686,7 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
__skb_queue_tail(&rxq, skb);
/* Filled the batch queue? */
+ /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */
if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE)
break;
}
@@ -898,51 +928,91 @@ static void netbk_fatal_tx_err(struct xenvif *vif)
static int netbk_count_requests(struct xenvif *vif,
struct xen_netif_tx_request *first,
+ RING_IDX first_idx,
struct xen_netif_tx_request *txp,
int work_to_do)
{
RING_IDX cons = vif->tx.req_cons;
- int frags = 0;
+ int slots = 0;
+ int drop_err = 0;
if (!(first->flags & XEN_NETTXF_more_data))
return 0;
do {
- if (frags >= work_to_do) {
- netdev_err(vif->dev, "Need more frags\n");
+ if (slots >= work_to_do) {
+ netdev_err(vif->dev,
+ "Asked for %d slots but exceeds this limit\n",
+ work_to_do);
netbk_fatal_tx_err(vif);
return -ENODATA;
}
- if (unlikely(frags >= MAX_SKB_FRAGS)) {
- netdev_err(vif->dev, "Too many frags\n");
+ /* This guest is really using too many slots and
+ * considered malicious.
+ */
+ if (unlikely(slots >= max_skb_slots)) {
+ netdev_err(vif->dev,
+ "Malicious frontend using %d slots, threshold %u\n",
+ slots, max_skb_slots);
netbk_fatal_tx_err(vif);
return -E2BIG;
}
- memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + frags),
+ /* Xen network protocol had implicit dependency on
+ * MAX_SKB_FRAGS. XEN_NETIF_NR_SLOTS_MIN is set to the
+ * historical MAX_SKB_FRAGS value 18 to honor the same
+ * behavior as before. Any packet using more than 18
+ * slots but less than max_skb_slots slots is dropped
+ */
+ if (!drop_err && slots >= XEN_NETIF_NR_SLOTS_MIN) {
+ if (net_ratelimit())
+ netdev_dbg(vif->dev,
+ "Too many slots (%d) exceeding limit (%d), dropping packet\n",
+ slots, XEN_NETIF_NR_SLOTS_MIN);
+ drop_err = -E2BIG;
+ }
+
+ memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + slots),
sizeof(*txp));
- if (txp->size > first->size) {
- netdev_err(vif->dev, "Frag is bigger than frame.\n");
- netbk_fatal_tx_err(vif);
- return -EIO;
+
+ /* If the guest submitted a frame >= 64 KiB then
+ * first->size overflowed and following slots will
+ * appear to be larger than the frame.
+ *
+ * This cannot be fatal error as there are buggy
+ * frontends that do this.
+ *
+ * Consume all slots and drop the packet.
+ */
+ if (!drop_err && txp->size > first->size) {
+ if (net_ratelimit())
+ netdev_dbg(vif->dev,
+ "Invalid tx request, slot size %u > remaining size %u\n",
+ txp->size, first->size);
+ drop_err = -EIO;
}
first->size -= txp->size;
- frags++;
+ slots++;
if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
- netdev_err(vif->dev, "txp->offset: %x, size: %u\n",
+ netdev_err(vif->dev, "Cross page boundary, txp->offset: %x, size: %u\n",
txp->offset, txp->size);
netbk_fatal_tx_err(vif);
return -EINVAL;
}
} while ((txp++)->flags & XEN_NETTXF_more_data);
- return frags;
+
+ if (drop_err) {
+ netbk_tx_err(vif, first, first_idx + slots);
+ return drop_err;
+ }
+
+ return slots;
}
static struct page *xen_netbk_alloc_page(struct xen_netbk *netbk,
- struct sk_buff *skb,
u16 pending_idx)
{
struct page *page;
@@ -963,48 +1033,114 @@ static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk,
struct skb_shared_info *shinfo = skb_shinfo(skb);
skb_frag_t *frags = shinfo->frags;
u16 pending_idx = *((u16 *)skb->data);
- int i, start;
+ u16 head_idx = 0;
+ int slot, start;
+ struct page *page;
+ pending_ring_idx_t index, start_idx = 0;
+ uint16_t dst_offset;
+ unsigned int nr_slots;
+ struct pending_tx_info *first = NULL;
+
+ /* At this point shinfo->nr_frags is in fact the number of
+ * slots, which can be as large as XEN_NETIF_NR_SLOTS_MIN.
+ */
+ nr_slots = shinfo->nr_frags;
/* Skip first skb fragment if it is on same page as header fragment. */
start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
- for (i = start; i < shinfo->nr_frags; i++, txp++) {
- struct page *page;
- pending_ring_idx_t index;
+ /* Coalesce tx requests, at this point the packet passed in
+ * should be <= 64K. Any packets larger than 64K have been
+ * handled in netbk_count_requests().
+ */
+ for (shinfo->nr_frags = slot = start; slot < nr_slots;
+ shinfo->nr_frags++) {
struct pending_tx_info *pending_tx_info =
netbk->pending_tx_info;
- index = pending_index(netbk->pending_cons++);
- pending_idx = netbk->pending_ring[index];
- page = xen_netbk_alloc_page(netbk, skb, pending_idx);
+ page = alloc_page(GFP_KERNEL|__GFP_COLD);
if (!page)
goto err;
- gop->source.u.ref = txp->gref;
- gop->source.domid = vif->domid;
- gop->source.offset = txp->offset;
-
- gop->dest.u.gmfn = virt_to_mfn(page_address(page));
- gop->dest.domid = DOMID_SELF;
- gop->dest.offset = txp->offset;
-
- gop->len = txp->size;
- gop->flags = GNTCOPY_source_gref;
+ dst_offset = 0;
+ first = NULL;
+ while (dst_offset < PAGE_SIZE && slot < nr_slots) {
+ gop->flags = GNTCOPY_source_gref;
+
+ gop->source.u.ref = txp->gref;
+ gop->source.domid = vif->domid;
+ gop->source.offset = txp->offset;
+
+ gop->dest.domid = DOMID_SELF;
+
+ gop->dest.offset = dst_offset;
+ gop->dest.u.gmfn = virt_to_mfn(page_address(page));
+
+ if (dst_offset + txp->size > PAGE_SIZE) {
+ /* This page can only merge a portion
+ * of tx request. Do not increment any
+ * pointer / counter here. The txp
+ * will be dealt with in future
+ * rounds, eventually hitting the
+ * `else` branch.
+ */
+ gop->len = PAGE_SIZE - dst_offset;
+ txp->offset += gop->len;
+ txp->size -= gop->len;
+ dst_offset += gop->len; /* quit loop */
+ } else {
+ /* This tx request can be merged in the page */
+ gop->len = txp->size;
+ dst_offset += gop->len;
+
+ index = pending_index(netbk->pending_cons++);
+
+ pending_idx = netbk->pending_ring[index];
+
+ memcpy(&pending_tx_info[pending_idx].req, txp,
+ sizeof(*txp));
+ xenvif_get(vif);
+
+ pending_tx_info[pending_idx].vif = vif;
+
+ /* Poison these fields, corresponding
+ * fields for head tx req will be set
+ * to correct values after the loop.
+ */
+ netbk->mmap_pages[pending_idx] = (void *)(~0UL);
+ pending_tx_info[pending_idx].head =
+ INVALID_PENDING_RING_IDX;
+
+ if (!first) {
+ first = &pending_tx_info[pending_idx];
+ start_idx = index;
+ head_idx = pending_idx;
+ }
+
+ txp++;
+ slot++;
+ }
- gop++;
+ gop++;
+ }
- memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
- xenvif_get(vif);
- pending_tx_info[pending_idx].vif = vif;
- frag_set_pending_idx(&frags[i], pending_idx);
+ first->req.offset = 0;
+ first->req.size = dst_offset;
+ first->head = start_idx;
+ set_page_ext(page, netbk, head_idx);
+ netbk->mmap_pages[head_idx] = page;
+ frag_set_pending_idx(&frags[shinfo->nr_frags], head_idx);
}
+ BUG_ON(shinfo->nr_frags > MAX_SKB_FRAGS);
+
return gop;
err:
/* Unwind, freeing all pages and sending error responses. */
- while (i-- > start) {
- xen_netbk_idx_release(netbk, frag_get_pending_idx(&frags[i]),
- XEN_NETIF_RSP_ERROR);
+ while (shinfo->nr_frags-- > start) {
+ xen_netbk_idx_release(netbk,
+ frag_get_pending_idx(&frags[shinfo->nr_frags]),
+ XEN_NETIF_RSP_ERROR);
}
/* The head too, if necessary. */
if (start)
@@ -1020,8 +1156,10 @@ static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
struct gnttab_copy *gop = *gopp;
u16 pending_idx = *((u16 *)skb->data);
struct skb_shared_info *shinfo = skb_shinfo(skb);
+ struct pending_tx_info *tx_info;
int nr_frags = shinfo->nr_frags;
int i, err, start;
+ u16 peek; /* peek into next tx request */
/* Check status of header. */
err = gop->status;
@@ -1033,11 +1171,20 @@ static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
for (i = start; i < nr_frags; i++) {
int j, newerr;
+ pending_ring_idx_t head;
pending_idx = frag_get_pending_idx(&shinfo->frags[i]);
+ tx_info = &netbk->pending_tx_info[pending_idx];
+ head = tx_info->head;
/* Check error status: if okay then remember grant handle. */
- newerr = (++gop)->status;
+ do {
+ newerr = (++gop)->status;
+ if (newerr)
+ break;
+ peek = netbk->pending_ring[pending_index(++head)];
+ } while (!pending_tx_is_head(netbk, peek));
+
if (likely(!newerr)) {
/* Had a previous error? Invalidate this fragment. */
if (unlikely(err))
@@ -1157,7 +1304,6 @@ static int netbk_set_skb_gso(struct xenvif *vif,
static int checksum_setup(struct xenvif *vif, struct sk_buff *skb)
{
struct iphdr *iph;
- unsigned char *th;
int err = -EPROTO;
int recalculate_partial_csum = 0;
@@ -1181,27 +1327,26 @@ static int checksum_setup(struct xenvif *vif, struct sk_buff *skb)
goto out;
iph = (void *)skb->data;
- th = skb->data + 4 * iph->ihl;
- if (th >= skb_tail_pointer(skb))
- goto out;
-
- skb->csum_start = th - skb->head;
switch (iph->protocol) {
case IPPROTO_TCP:
- skb->csum_offset = offsetof(struct tcphdr, check);
+ if (!skb_partial_csum_set(skb, 4 * iph->ihl,
+ offsetof(struct tcphdr, check)))
+ goto out;
if (recalculate_partial_csum) {
- struct tcphdr *tcph = (struct tcphdr *)th;
+ struct tcphdr *tcph = tcp_hdr(skb);
tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
skb->len - iph->ihl*4,
IPPROTO_TCP, 0);
}
break;
case IPPROTO_UDP:
- skb->csum_offset = offsetof(struct udphdr, check);
+ if (!skb_partial_csum_set(skb, 4 * iph->ihl,
+ offsetof(struct udphdr, check)))
+ goto out;
if (recalculate_partial_csum) {
- struct udphdr *udph = (struct udphdr *)th;
+ struct udphdr *udph = udp_hdr(skb);
udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
skb->len - iph->ihl*4,
IPPROTO_UDP, 0);
@@ -1215,9 +1360,6 @@ static int checksum_setup(struct xenvif *vif, struct sk_buff *skb)
goto out;
}
- if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
- goto out;
-
err = 0;
out:
@@ -1262,11 +1404,12 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
struct sk_buff *skb;
int ret;
- while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+ while ((nr_pending_reqs(netbk) + XEN_NETIF_NR_SLOTS_MIN
+ < MAX_PENDING_REQS) &&
!list_empty(&netbk->net_schedule_list)) {
struct xenvif *vif;
struct xen_netif_tx_request txreq;
- struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
+ struct xen_netif_tx_request txfrags[max_skb_slots];
struct page *page;
struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
u16 pending_idx;
@@ -1327,7 +1470,8 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
continue;
}
- ret = netbk_count_requests(vif, &txreq, txfrags, work_to_do);
+ ret = netbk_count_requests(vif, &txreq, idx,
+ txfrags, work_to_do);
if (unlikely(ret < 0))
continue;
@@ -1354,7 +1498,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
pending_idx = netbk->pending_ring[index];
data_len = (txreq.size > PKT_PROT_LEN &&
- ret < MAX_SKB_FRAGS) ?
+ ret < XEN_NETIF_NR_SLOTS_MIN) ?
PKT_PROT_LEN : txreq.size;
skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
@@ -1381,7 +1525,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
}
/* XXX could copy straight to head */
- page = xen_netbk_alloc_page(netbk, skb, pending_idx);
+ page = xen_netbk_alloc_page(netbk, pending_idx);
if (!page) {
kfree_skb(skb);
netbk_tx_err(vif, &txreq, idx);
@@ -1404,6 +1548,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
memcpy(&netbk->pending_tx_info[pending_idx].req,
&txreq, sizeof(txreq));
netbk->pending_tx_info[pending_idx].vif = vif;
+ netbk->pending_tx_info[pending_idx].head = index;
*((u16 *)skb->data) = pending_idx;
__skb_put(skb, data_len);
@@ -1496,6 +1641,7 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk)
skb->dev = vif->dev;
skb->protocol = eth_type_trans(skb, skb->dev);
+ skb_reset_network_header(skb);
if (checksum_setup(vif, skb)) {
netdev_dbg(vif->dev,
@@ -1504,6 +1650,8 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk)
continue;
}
+ skb_probe_transport_header(skb, 0);
+
vif->dev->stats.rx_bytes += skb->len;
vif->dev->stats.rx_packets++;
@@ -1531,7 +1679,10 @@ static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx,
{
struct xenvif *vif;
struct pending_tx_info *pending_tx_info;
- pending_ring_idx_t index;
+ pending_ring_idx_t head;
+ u16 peek; /* peek into next tx request */
+
+ BUG_ON(netbk->mmap_pages[pending_idx] == (void *)(~0UL));
/* Already complete? */
if (netbk->mmap_pages[pending_idx] == NULL)
@@ -1540,19 +1691,40 @@ static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx,
pending_tx_info = &netbk->pending_tx_info[pending_idx];
vif = pending_tx_info->vif;
+ head = pending_tx_info->head;
+
+ BUG_ON(!pending_tx_is_head(netbk, head));
+ BUG_ON(netbk->pending_ring[pending_index(head)] != pending_idx);
+
+ do {
+ pending_ring_idx_t index;
+ pending_ring_idx_t idx = pending_index(head);
+ u16 info_idx = netbk->pending_ring[idx];
- make_tx_response(vif, &pending_tx_info->req, status);
+ pending_tx_info = &netbk->pending_tx_info[info_idx];
+ make_tx_response(vif, &pending_tx_info->req, status);
- index = pending_index(netbk->pending_prod++);
- netbk->pending_ring[index] = pending_idx;
+ /* Setting any number other than
+ * INVALID_PENDING_RING_IDX indicates this slot is
+ * starting a new packet / ending a previous packet.
+ */
+ pending_tx_info->head = 0;
- xenvif_put(vif);
+ index = pending_index(netbk->pending_prod++);
+ netbk->pending_ring[index] = netbk->pending_ring[info_idx];
+
+ xenvif_put(vif);
+
+ peek = netbk->pending_ring[pending_index(++head)];
+
+ } while (!pending_tx_is_head(netbk, peek));
netbk->mmap_pages[pending_idx]->mapping = 0;
put_page(netbk->mmap_pages[pending_idx]);
netbk->mmap_pages[pending_idx] = NULL;
}
+
static void make_tx_response(struct xenvif *vif,
struct xen_netif_tx_request *txp,
s8 st)
@@ -1605,8 +1777,9 @@ static inline int rx_work_todo(struct xen_netbk *netbk)
static inline int tx_work_todo(struct xen_netbk *netbk)
{
- if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
- !list_empty(&netbk->net_schedule_list))
+ if ((nr_pending_reqs(netbk) + XEN_NETIF_NR_SLOTS_MIN
+ < MAX_PENDING_REQS) &&
+ !list_empty(&netbk->net_schedule_list))
return 1;
return 0;
@@ -1689,6 +1862,13 @@ static int __init netback_init(void)
if (!xen_domain())
return -ENODEV;
+ if (max_skb_slots < XEN_NETIF_NR_SLOTS_MIN) {
+ printk(KERN_INFO
+ "xen-netback: max_skb_slots too small (%d), bump it to XEN_NETIF_NR_SLOTS_MIN (%d)\n",
+ max_skb_slots, XEN_NETIF_NR_SLOTS_MIN);
+ max_skb_slots = XEN_NETIF_NR_SLOTS_MIN;
+ }
+
xen_netbk_group_nr = num_online_cpus();
xen_netbk = vzalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr);
if (!xen_netbk)