From af40da894e96d5c826d38be3ea53ee00d9de0367 Mon Sep 17 00:00:00 2001 From: Vladimir Sokolovsky Date: Mon, 14 Jul 2008 23:48:48 -0700 Subject: IPoIB: add LRO support Add "ipoib_use_lro" module parameter to enable LRO and an "ipoib_lro_max_aggr" module parameter to set the max number of packets to be aggregated. Make LRO controllable and LRO statistics accessible through ethtool. Signed-off-by: Vladimir Sokolovsky Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/Kconfig | 1 + drivers/infiniband/ulp/ipoib/ipoib.h | 11 +++++ drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | 46 +++++++++++++++++++++ drivers/infiniband/ulp/ipoib/ipoib_ib.c | 8 +++- drivers/infiniband/ulp/ipoib/ipoib_main.c | 62 ++++++++++++++++++++++++++++ 5 files changed, 127 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig index 1f76bad020f..691525cf394 100644 --- a/drivers/infiniband/ulp/ipoib/Kconfig +++ b/drivers/infiniband/ulp/ipoib/Kconfig @@ -1,6 +1,7 @@ config INFINIBAND_IPOIB tristate "IP-over-InfiniBand" depends on NETDEVICES && INET && (IPV6 || IPV6=n) + select INET_LRO ---help--- Support for the IP-over-InfiniBand protocol (IPoIB). This transports IP packets over InfiniBand so you can use your IB diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 8754b364f22..2c522572e3c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -50,6 +50,7 @@ #include #include #include +#include /* constants */ @@ -94,6 +95,9 @@ enum { IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */ IPOIB_MCAST_FLAG_ATTACHED = 3, + IPOIB_MAX_LRO_DESCRIPTORS = 8, + IPOIB_LRO_MAX_AGGR = 64, + MAX_SEND_CQE = 16, IPOIB_CM_COPYBREAK = 256, }; @@ -248,6 +252,11 @@ struct ipoib_ethtool_st { u16 max_coalesced_frames; }; +struct ipoib_lro { + struct net_lro_mgr lro_mgr; + struct net_lro_desc lro_desc[IPOIB_MAX_LRO_DESCRIPTORS]; +}; + /* * Device private locking: tx_lock protects members used in TX fast * path (and we use LLTX so upper layers don't do extra locking). @@ -334,6 +343,8 @@ struct ipoib_dev_priv { int hca_caps; struct ipoib_ethtool_st ethtool; struct timer_list poll_timer; + + struct ipoib_lro lro; }; struct ipoib_ah { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 10279b79c44..66af5c1a76e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -86,11 +86,57 @@ static int ipoib_set_coalesce(struct net_device *dev, return 0; } +static const char ipoib_stats_keys[][ETH_GSTRING_LEN] = { + "LRO aggregated", "LRO flushed", + "LRO avg aggr", "LRO no desc" +}; + +static void ipoib_get_strings(struct net_device *netdev, u32 stringset, u8 *data) +{ + switch (stringset) { + case ETH_SS_STATS: + memcpy(data, *ipoib_stats_keys, sizeof(ipoib_stats_keys)); + break; + } +} + +static int ipoib_get_sset_count(struct net_device *dev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return ARRAY_SIZE(ipoib_stats_keys); + default: + return -EOPNOTSUPP; + } +} + +static void ipoib_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, uint64_t *data) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + int index = 0; + + /* Get LRO statistics */ + data[index++] = priv->lro.lro_mgr.stats.aggregated; + data[index++] = priv->lro.lro_mgr.stats.flushed; + if (priv->lro.lro_mgr.stats.flushed) + data[index++] = priv->lro.lro_mgr.stats.aggregated / + priv->lro.lro_mgr.stats.flushed; + else + data[index++] = 0; + data[index++] = priv->lro.lro_mgr.stats.no_desc; +} + static const struct ethtool_ops ipoib_ethtool_ops = { .get_drvinfo = ipoib_get_drvinfo, .get_tso = ethtool_op_get_tso, .get_coalesce = ipoib_get_coalesce, .set_coalesce = ipoib_set_coalesce, + .get_flags = ethtool_op_get_flags, + .set_flags = ethtool_op_set_flags, + .get_strings = ipoib_get_strings, + .get_sset_count = ipoib_get_sset_count, + .get_ethtool_stats = ipoib_get_ethtool_stats, }; void ipoib_set_ethtool_ops(struct net_device *dev) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index eca8518d79a..5d50e5261ee 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -288,7 +288,10 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) if (test_bit(IPOIB_FLAG_CSUM, &priv->flags) && likely(wc->csum_ok)) skb->ip_summed = CHECKSUM_UNNECESSARY; - netif_receive_skb(skb); + if (dev->features & NETIF_F_LRO) + lro_receive_skb(&priv->lro.lro_mgr, skb, NULL); + else + netif_receive_skb(skb); repost: if (unlikely(ipoib_ib_post_receive(dev, wr_id))) @@ -440,6 +443,9 @@ poll_more: } if (done < budget) { + if (dev->features & NETIF_F_LRO) + lro_flush_all(&priv->lro.lro_mgr); + netif_rx_complete(dev, napi); if (unlikely(ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP | diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index bfe1dbf9920..fead88f7fb1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -60,6 +60,15 @@ MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue"); module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue"); +static int lro; +module_param(lro, bool, 0444); +MODULE_PARM_DESC(lro, "Enable LRO (Large Receive Offload)"); + +static int lro_max_aggr = IPOIB_LRO_MAX_AGGR; +module_param(lro_max_aggr, int, 0644); +MODULE_PARM_DESC(lro_max_aggr, "LRO: Max packets to be aggregated " + "(default = 64)"); + #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG int ipoib_debug_level; @@ -936,6 +945,54 @@ static const struct header_ops ipoib_header_ops = { .create = ipoib_hard_header, }; +static int get_skb_hdr(struct sk_buff *skb, void **iphdr, + void **tcph, u64 *hdr_flags, void *priv) +{ + unsigned int ip_len; + struct iphdr *iph; + + if (unlikely(skb->protocol != htons(ETH_P_IP))) + return -1; + + /* + * In the future we may add an else clause that verifies the + * checksum and allows devices which do not calculate checksum + * to use LRO. + */ + if (unlikely(skb->ip_summed != CHECKSUM_UNNECESSARY)) + return -1; + + /* Check for non-TCP packet */ + skb_reset_network_header(skb); + iph = ip_hdr(skb); + if (iph->protocol != IPPROTO_TCP) + return -1; + + ip_len = ip_hdrlen(skb); + skb_set_transport_header(skb, ip_len); + *tcph = tcp_hdr(skb); + + /* check if IP header and TCP header are complete */ + if (ntohs(iph->tot_len) < ip_len + tcp_hdrlen(skb)) + return -1; + + *hdr_flags = LRO_IPV4 | LRO_TCP; + *iphdr = iph; + + return 0; +} + +static void ipoib_lro_setup(struct ipoib_dev_priv *priv) +{ + priv->lro.lro_mgr.max_aggr = lro_max_aggr; + priv->lro.lro_mgr.max_desc = IPOIB_MAX_LRO_DESCRIPTORS; + priv->lro.lro_mgr.lro_arr = priv->lro.lro_desc; + priv->lro.lro_mgr.get_skb_header = get_skb_hdr; + priv->lro.lro_mgr.features = LRO_F_NAPI; + priv->lro.lro_mgr.dev = priv->dev; + priv->lro.lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY; +} + static void ipoib_setup(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -975,6 +1032,8 @@ static void ipoib_setup(struct net_device *dev) priv->dev = dev; + ipoib_lro_setup(priv); + spin_lock_init(&priv->lock); spin_lock_init(&priv->tx_lock); @@ -1152,6 +1211,9 @@ static struct net_device *ipoib_add_port(const char *format, priv->dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; } + if (lro) + priv->dev->features |= NETIF_F_LRO; + /* * Set the full membership bit, so that we join the right * broadcast group, etc. -- cgit v1.2.3-18-g5258