aboutsummaryrefslogtreecommitdiff
path: root/drivers/net/hyperv
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/hyperv')
-rw-r--r--drivers/net/hyperv/hyperv_net.h372
-rw-r--r--drivers/net/hyperv/netvsc.c620
-rw-r--r--drivers/net/hyperv/netvsc_drv.c561
-rw-r--r--drivers/net/hyperv/rndis_filter.c410
4 files changed, 1473 insertions, 490 deletions
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index e6fe0d80d61..6cc37c15e0b 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -12,8 +12,7 @@
* more details.
*
* You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
+ * this program; if not, see <http://www.gnu.org/licenses/>.
*
* Authors:
* Haiyang Zhang <haiyangz@microsoft.com>
@@ -29,52 +28,122 @@
#include <linux/hyperv.h>
#include <linux/rndis.h>
-/* Fwd declaration */
-struct hv_netvsc_packet;
+/* RSS related */
+#define OID_GEN_RECEIVE_SCALE_CAPABILITIES 0x00010203 /* query only */
+#define OID_GEN_RECEIVE_SCALE_PARAMETERS 0x00010204 /* query and set */
-/* Represent the xfer page packet which contains 1 or more netvsc packet */
-struct xferpage_packet {
- struct list_head list_ent;
- u32 status;
+#define NDIS_OBJECT_TYPE_RSS_CAPABILITIES 0x88
+#define NDIS_OBJECT_TYPE_RSS_PARAMETERS 0x89
- /* # of netvsc packets this xfer packet contains */
- u32 count;
+#define NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2 2
+#define NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2 2
+
+struct ndis_obj_header {
+ u8 type;
+ u8 rev;
+ u16 size;
+} __packed;
+
+/* ndis_recv_scale_cap/cap_flag */
+#define NDIS_RSS_CAPS_MESSAGE_SIGNALED_INTERRUPTS 0x01000000
+#define NDIS_RSS_CAPS_CLASSIFICATION_AT_ISR 0x02000000
+#define NDIS_RSS_CAPS_CLASSIFICATION_AT_DPC 0x04000000
+#define NDIS_RSS_CAPS_USING_MSI_X 0x08000000
+#define NDIS_RSS_CAPS_RSS_AVAILABLE_ON_PORTS 0x10000000
+#define NDIS_RSS_CAPS_SUPPORTS_MSI_X 0x20000000
+#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV4 0x00000100
+#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6 0x00000200
+#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6_EX 0x00000400
+
+struct ndis_recv_scale_cap { /* NDIS_RECEIVE_SCALE_CAPABILITIES */
+ struct ndis_obj_header hdr;
+ u32 cap_flag;
+ u32 num_int_msg;
+ u32 num_recv_que;
+ u16 num_indirect_tabent;
+} __packed;
+
+
+/* ndis_recv_scale_param flags */
+#define NDIS_RSS_PARAM_FLAG_BASE_CPU_UNCHANGED 0x0001
+#define NDIS_RSS_PARAM_FLAG_HASH_INFO_UNCHANGED 0x0002
+#define NDIS_RSS_PARAM_FLAG_ITABLE_UNCHANGED 0x0004
+#define NDIS_RSS_PARAM_FLAG_HASH_KEY_UNCHANGED 0x0008
+#define NDIS_RSS_PARAM_FLAG_DISABLE_RSS 0x0010
+
+/* Hash info bits */
+#define NDIS_HASH_FUNC_TOEPLITZ 0x00000001
+#define NDIS_HASH_IPV4 0x00000100
+#define NDIS_HASH_TCP_IPV4 0x00000200
+#define NDIS_HASH_IPV6 0x00000400
+#define NDIS_HASH_IPV6_EX 0x00000800
+#define NDIS_HASH_TCP_IPV6 0x00001000
+#define NDIS_HASH_TCP_IPV6_EX 0x00002000
+
+#define NDIS_RSS_INDIRECTION_TABLE_MAX_SIZE_REVISION_2 (128 * 4)
+#define NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 40
+
+#define ITAB_NUM 128
+#define HASH_KEYLEN NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2
+extern u8 netvsc_hash_key[];
+
+struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */
+ struct ndis_obj_header hdr;
+
+ /* Qualifies the rest of the information */
+ u16 flag;
+
+ /* The base CPU number to do receive processing. not used */
+ u16 base_cpu_number;
+
+ /* This describes the hash function and type being enabled */
+ u32 hashinfo;
+
+ /* The size of indirection table array */
+ u16 indirect_tabsize;
+
+ /* The offset of the indirection table from the beginning of this
+ * structure
+ */
+ u32 indirect_taboffset;
+
+ /* The size of the hash secret key */
+ u16 hashkey_size;
+
+ /* The offset of the secret key from the beginning of this structure */
+ u32 kashkey_offset;
+
+ u32 processor_masks_offset;
+ u32 num_processor_masks;
+ u32 processor_masks_entry_size;
};
+/* Fwd declaration */
+struct ndis_tcp_ip_checksum_info;
+
/*
* Represent netvsc packet which contains 1 RNDIS and 1 ethernet frame
* within the RNDIS
*/
struct hv_netvsc_packet {
/* Bookkeeping stuff */
- struct list_head list_ent;
u32 status;
struct hv_device *device;
bool is_data_pkt;
u16 vlan_tci;
- /*
- * Valid only for receives when we break a xfer page packet
- * into multiple netvsc packets
- */
- struct xferpage_packet *xfer_page_pkt;
+ u16 q_idx;
+ struct vmbus_channel *channel;
- union {
- struct {
- u64 recv_completion_tid;
- void *recv_completion_ctx;
- void (*recv_completion)(void *context);
- } recv;
- struct {
- u64 send_completion_tid;
- void *send_completion_ctx;
- void (*send_completion)(void *context);
- } send;
- } completion;
+ u64 send_completion_tid;
+ void *send_completion_ctx;
+ void (*send_completion)(void *context);
+
+ u32 send_buf_index;
/* This points to the memory after page_buf */
- void *extension;
+ struct rndis_message *rndis_msg;
u32 total_data_buflen;
/* Points to the send/receive buffer where the ethernet frame is */
@@ -118,7 +187,9 @@ int netvsc_send(struct hv_device *device,
void netvsc_linkstatus_callback(struct hv_device *device_obj,
unsigned int status);
int netvsc_recv_callback(struct hv_device *device_obj,
- struct hv_netvsc_packet *packet);
+ struct hv_netvsc_packet *packet,
+ struct ndis_tcp_ip_checksum_info *csum_info);
+void netvsc_channel_cb(void *context);
int rndis_filter_open(struct hv_device *dev);
int rndis_filter_close(struct hv_device *dev);
int rndis_filter_device_add(struct hv_device *dev,
@@ -127,11 +198,6 @@ void rndis_filter_device_remove(struct hv_device *dev);
int rndis_filter_receive(struct hv_device *dev,
struct hv_netvsc_packet *pkt);
-
-
-int rndis_filter_send(struct hv_device *dev,
- struct hv_netvsc_packet *pkt);
-
int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter);
int rndis_filter_set_device_mac(struct hv_device *hdev, char *mac);
@@ -140,6 +206,8 @@ int rndis_filter_set_device_mac(struct hv_device *hdev, char *mac);
#define NVSP_PROTOCOL_VERSION_1 2
#define NVSP_PROTOCOL_VERSION_2 0x30002
+#define NVSP_PROTOCOL_VERSION_4 0x40000
+#define NVSP_PROTOCOL_VERSION_5 0x50000
enum {
NVSP_MSG_TYPE_NONE = 0,
@@ -194,6 +262,23 @@ enum {
NVSP_MSG2_TYPE_ALLOC_CHIMNEY_HANDLE,
NVSP_MSG2_TYPE_ALLOC_CHIMNEY_HANDLE_COMP,
+
+ NVSP_MSG2_MAX = NVSP_MSG2_TYPE_ALLOC_CHIMNEY_HANDLE_COMP,
+
+ /* Version 4 messages */
+ NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION,
+ NVSP_MSG4_TYPE_SWITCH_DATA_PATH,
+ NVSP_MSG4_TYPE_UPLINK_CONNECT_STATE_DEPRECATED,
+
+ NVSP_MSG4_MAX = NVSP_MSG4_TYPE_UPLINK_CONNECT_STATE_DEPRECATED,
+
+ /* Version 5 messages */
+ NVSP_MSG5_TYPE_OID_QUERY_EX,
+ NVSP_MSG5_TYPE_OID_QUERY_EX_COMP,
+ NVSP_MSG5_TYPE_SUBCHANNEL,
+ NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE,
+
+ NVSP_MSG5_MAX = NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE,
};
enum {
@@ -448,10 +533,44 @@ union nvsp_2_message_uber {
struct nvsp_2_free_rxbuf free_rxbuf;
} __packed;
+enum nvsp_subchannel_operation {
+ NVSP_SUBCHANNEL_NONE = 0,
+ NVSP_SUBCHANNEL_ALLOCATE,
+ NVSP_SUBCHANNEL_MAX
+};
+
+struct nvsp_5_subchannel_request {
+ u32 op;
+ u32 num_subchannels;
+} __packed;
+
+struct nvsp_5_subchannel_complete {
+ u32 status;
+ u32 num_subchannels; /* Actual number of subchannels allocated */
+} __packed;
+
+struct nvsp_5_send_indirect_table {
+ /* The number of entries in the send indirection table */
+ u32 count;
+
+ /* The offset of the send indireciton table from top of this struct.
+ * The send indirection table tells which channel to put the send
+ * traffic on. Each entry is a channel number.
+ */
+ u32 offset;
+} __packed;
+
+union nvsp_5_message_uber {
+ struct nvsp_5_subchannel_request subchn_req;
+ struct nvsp_5_subchannel_complete subchn_comp;
+ struct nvsp_5_send_indirect_table send_table;
+} __packed;
+
union nvsp_all_messages {
union nvsp_message_init_uber init_msg;
union nvsp_1_message_uber v1_msg;
union nvsp_2_message_uber v2_msg;
+ union nvsp_5_message_uber v5_msg;
} __packed;
/* ALL Messages */
@@ -463,15 +582,18 @@ struct nvsp_message {
#define NETVSC_MTU 65536
-#define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*2) /* 2MB */
+#define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */
+#define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024*1024*15) /* 15MB */
+#define NETVSC_SEND_BUFFER_SIZE (1024 * 1024) /* 1MB */
+#define NETVSC_INVALID_INDEX -1
-#define NETVSC_RECEIVE_BUFFER_ID 0xcafe
-/* Preallocated receive packets */
-#define NETVSC_RECEIVE_PACKETLIST_COUNT 256
+#define NETVSC_RECEIVE_BUFFER_ID 0xcafe
#define NETVSC_PACKET_SIZE 2048
+#define VRSS_SEND_TAB_SIZE 16
+
/* Per netvsc channel-specific */
struct netvsc_device {
struct hv_device *dev;
@@ -482,12 +604,6 @@ struct netvsc_device {
wait_queue_head_t wait_drain;
bool start_remove;
bool destroy;
- /*
- * List of free preallocated hv_netvsc_packet to represent receive
- * packet
- */
- struct list_head recv_pkt_list;
- spinlock_t recv_pkt_list_lock;
/* Receive buffer allocated by us but manages by NetVSP */
void *recv_buf;
@@ -496,6 +612,15 @@ struct netvsc_device {
u32 recv_section_cnt;
struct nvsp_1_receive_buffer_section *recv_section;
+ /* Send buffer allocated by us */
+ void *send_buf;
+ u32 send_buf_size;
+ u32 send_buf_gpadl_handle;
+ u32 send_section_cnt;
+ u32 send_section_size;
+ unsigned long *send_section_map;
+ int map_words;
+
/* Used for NetVSP initialization protocol */
struct completion channel_init_wait;
struct nvsp_message channel_init_pkt;
@@ -505,8 +630,20 @@ struct netvsc_device {
struct net_device *ndev;
+ struct vmbus_channel *chn_table[NR_CPUS];
+ u32 send_table[VRSS_SEND_TAB_SIZE];
+ u32 num_chn;
+ atomic_t queue_sends[NR_CPUS];
+
/* Holds rndis device info */
void *extension;
+
+ int ring_size;
+
+ /* The primary channel callback buffer */
+ unsigned char cb_buffer[NETVSC_PACKET_SIZE];
+ /* The sub channel callback buffer */
+ unsigned char *sub_cb_buf;
};
/* NdisInitialize message */
@@ -654,6 +791,7 @@ enum ndis_per_pkt_info_type {
IEEE_8021Q_INFO,
ORIGINAL_PKTINFO,
PACKET_CANCEL_ID,
+ NBL_HASH_VALUE = PACKET_CANCEL_ID,
ORIGINAL_NET_BUFLIST,
CACHED_NET_BUFLIST,
SHORT_PKT_PADINFO,
@@ -672,9 +810,137 @@ struct ndis_pkt_8021q_info {
};
};
+struct ndis_oject_header {
+ u8 type;
+ u8 revision;
+ u16 size;
+};
+
+#define NDIS_OBJECT_TYPE_DEFAULT 0x80
+#define NDIS_OFFLOAD_PARAMETERS_REVISION_3 3
+#define NDIS_OFFLOAD_PARAMETERS_NO_CHANGE 0
+#define NDIS_OFFLOAD_PARAMETERS_LSOV2_DISABLED 1
+#define NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED 2
+#define NDIS_OFFLOAD_PARAMETERS_LSOV1_ENABLED 2
+#define NDIS_OFFLOAD_PARAMETERS_RSC_DISABLED 1
+#define NDIS_OFFLOAD_PARAMETERS_RSC_ENABLED 2
+#define NDIS_OFFLOAD_PARAMETERS_TX_RX_DISABLED 1
+#define NDIS_OFFLOAD_PARAMETERS_TX_ENABLED_RX_DISABLED 2
+#define NDIS_OFFLOAD_PARAMETERS_RX_ENABLED_TX_DISABLED 3
+#define NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED 4
+
+#define NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE 1
+#define NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4 0
+#define NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6 1
+
+#define VERSION_4_OFFLOAD_SIZE 22
+/*
+ * New offload OIDs for NDIS 6
+ */
+#define OID_TCP_OFFLOAD_CURRENT_CONFIG 0xFC01020B /* query only */
+#define OID_TCP_OFFLOAD_PARAMETERS 0xFC01020C /* set only */
+#define OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES 0xFC01020D/* query only */
+#define OID_TCP_CONNECTION_OFFLOAD_CURRENT_CONFIG 0xFC01020E /* query only */
+#define OID_TCP_CONNECTION_OFFLOAD_HARDWARE_CAPABILITIES 0xFC01020F /* query */
+#define OID_OFFLOAD_ENCAPSULATION 0x0101010A /* set/query */
+
+struct ndis_offload_params {
+ struct ndis_oject_header header;
+ u8 ip_v4_csum;
+ u8 tcp_ip_v4_csum;
+ u8 udp_ip_v4_csum;
+ u8 tcp_ip_v6_csum;
+ u8 udp_ip_v6_csum;
+ u8 lso_v1;
+ u8 ip_sec_v1;
+ u8 lso_v2_ipv4;
+ u8 lso_v2_ipv6;
+ u8 tcp_connection_ip_v4;
+ u8 tcp_connection_ip_v6;
+ u32 flags;
+ u8 ip_sec_v2;
+ u8 ip_sec_v2_ip_v4;
+ struct {
+ u8 rsc_ip_v4;
+ u8 rsc_ip_v6;
+ };
+ struct {
+ u8 encapsulated_packet_task_offload;
+ u8 encapsulation_types;
+ };
+};
+
+struct ndis_tcp_ip_checksum_info {
+ union {
+ struct {
+ u32 is_ipv4:1;
+ u32 is_ipv6:1;
+ u32 tcp_checksum:1;
+ u32 udp_checksum:1;
+ u32 ip_header_checksum:1;
+ u32 reserved:11;
+ u32 tcp_header_offset:10;
+ } transmit;
+ struct {
+ u32 tcp_checksum_failed:1;
+ u32 udp_checksum_failed:1;
+ u32 ip_checksum_failed:1;
+ u32 tcp_checksum_succeeded:1;
+ u32 udp_checksum_succeeded:1;
+ u32 ip_checksum_succeeded:1;
+ u32 loopback:1;
+ u32 tcp_checksum_value_invalid:1;
+ u32 ip_checksum_value_invalid:1;
+ } receive;
+ u32 value;
+ };
+};
+
+struct ndis_tcp_lso_info {
+ union {
+ struct {
+ u32 unused:30;
+ u32 type:1;
+ u32 reserved2:1;
+ } transmit;
+ struct {
+ u32 mss:20;
+ u32 tcp_header_offset:10;
+ u32 type:1;
+ u32 reserved2:1;
+ } lso_v1_transmit;
+ struct {
+ u32 tcp_payload:30;
+ u32 type:1;
+ u32 reserved2:1;
+ } lso_v1_transmit_complete;
+ struct {
+ u32 mss:20;
+ u32 tcp_header_offset:10;
+ u32 type:1;
+ u32 ip_version:1;
+ } lso_v2_transmit;
+ struct {
+ u32 reserved:30;
+ u32 type:1;
+ u32 reserved2:1;
+ } lso_v2_transmit_complete;
+ u32 value;
+ };
+};
+
#define NDIS_VLAN_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \
sizeof(struct ndis_pkt_8021q_info))
+#define NDIS_CSUM_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \
+ sizeof(struct ndis_tcp_ip_checksum_info))
+
+#define NDIS_LSO_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \
+ sizeof(struct ndis_tcp_lso_info))
+
+#define NDIS_HASH_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \
+ sizeof(u32))
+
/* Format of Information buffer passed in a SetRequest for the OID */
/* OID_GEN_RNDIS_CONFIG_PARAMETER. */
struct rndis_config_parameter_info {
@@ -847,12 +1113,6 @@ struct rndis_message {
};
-struct rndis_filter_packet {
- void *completion_ctx;
- void (*completion)(void *context);
- struct rndis_message msg;
-};
-
/* Handy macros */
/* get the size of an RNDIS message. Pass in the message type, */
@@ -906,6 +1166,16 @@ struct rndis_filter_packet {
#define NDIS_PACKET_TYPE_FUNCTIONAL 0x00000400
#define NDIS_PACKET_TYPE_MAC_FRAME 0x00000800
+#define INFO_IPV4 2
+#define INFO_IPV6 4
+#define INFO_TCP 2
+#define INFO_UDP 4
+
+#define TRANSPORT_INFO_NOT_IP 0
+#define TRANSPORT_INFO_IPV4_TCP ((INFO_IPV4 << 16) | INFO_TCP)
+#define TRANSPORT_INFO_IPV4_UDP ((INFO_IPV4 << 16) | INFO_UDP)
+#define TRANSPORT_INFO_IPV6_TCP ((INFO_IPV6 << 16) | INFO_TCP)
+#define TRANSPORT_INFO_IPV6_UDP ((INFO_IPV6 << 16) | INFO_UDP)
#endif /* _HYPERV_NET_H */
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 2b0480416b3..d97d5f39a04 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -11,8 +11,7 @@
* more details.
*
* You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
+ * this program; if not, see <http://www.gnu.org/licenses/>.
*
* Authors:
* Haiyang Zhang <haiyangz@microsoft.com>
@@ -29,6 +28,7 @@
#include <linux/slab.h>
#include <linux/netdevice.h>
#include <linux/if_ether.h>
+#include <asm/sync_bitops.h>
#include "hyperv_net.h"
@@ -81,7 +81,7 @@ get_in_err:
}
-static int netvsc_destroy_recv_buf(struct netvsc_device *net_device)
+static int netvsc_destroy_buf(struct netvsc_device *net_device)
{
struct nvsp_message *revoke_packet;
int ret = 0;
@@ -137,8 +137,7 @@ static int netvsc_destroy_recv_buf(struct netvsc_device *net_device)
if (net_device->recv_buf) {
/* Free up the receive buffer */
- free_pages((unsigned long)net_device->recv_buf,
- get_order(net_device->recv_buf_size));
+ vfree(net_device->recv_buf);
net_device->recv_buf = NULL;
}
@@ -148,10 +147,62 @@ static int netvsc_destroy_recv_buf(struct netvsc_device *net_device)
net_device->recv_section = NULL;
}
+ /* Deal with the send buffer we may have setup.
+ * If we got a send section size, it means we received a
+ * SendsendBufferComplete msg (ie sent
+ * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
+ * to send a revoke msg here
+ */
+ if (net_device->send_section_size) {
+ /* Send the revoke receive buffer */
+ revoke_packet = &net_device->revoke_packet;
+ memset(revoke_packet, 0, sizeof(struct nvsp_message));
+
+ revoke_packet->hdr.msg_type =
+ NVSP_MSG1_TYPE_REVOKE_SEND_BUF;
+ revoke_packet->msg.v1_msg.revoke_recv_buf.id = 0;
+
+ ret = vmbus_sendpacket(net_device->dev->channel,
+ revoke_packet,
+ sizeof(struct nvsp_message),
+ (unsigned long)revoke_packet,
+ VM_PKT_DATA_INBAND, 0);
+ /* If we failed here, we might as well return and
+ * have a leak rather than continue and a bugchk
+ */
+ if (ret != 0) {
+ netdev_err(ndev, "unable to send "
+ "revoke send buffer to netvsp\n");
+ return ret;
+ }
+ }
+ /* Teardown the gpadl on the vsp end */
+ if (net_device->send_buf_gpadl_handle) {
+ ret = vmbus_teardown_gpadl(net_device->dev->channel,
+ net_device->send_buf_gpadl_handle);
+
+ /* If we failed here, we might as well return and have a leak
+ * rather than continue and a bugchk
+ */
+ if (ret != 0) {
+ netdev_err(ndev,
+ "unable to teardown send buffer's gpadl\n");
+ return ret;
+ }
+ net_device->send_buf_gpadl_handle = 0;
+ }
+ if (net_device->send_buf) {
+ /* Free up the receive buffer */
+ free_pages((unsigned long)net_device->send_buf,
+ get_order(net_device->send_buf_size));
+ net_device->send_buf = NULL;
+ }
+ kfree(net_device->send_section_map);
+
return ret;
}
-static int netvsc_init_recv_buf(struct hv_device *device)
+static int netvsc_init_buf(struct hv_device *device)
{
int ret = 0;
int t;
@@ -164,9 +215,7 @@ static int netvsc_init_recv_buf(struct hv_device *device)
return -ENODEV;
ndev = net_device->ndev;
- net_device->recv_buf =
- (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
- get_order(net_device->recv_buf_size));
+ net_device->recv_buf = vzalloc(net_device->recv_buf_size);
if (!net_device->recv_buf) {
netdev_err(ndev, "unable to allocate receive "
"buffer of size %d\n", net_device->recv_buf_size);
@@ -252,10 +301,92 @@ static int netvsc_init_recv_buf(struct hv_device *device)
goto cleanup;
}
+ /* Now setup the send buffer.
+ */
+ net_device->send_buf =
+ (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
+ get_order(net_device->send_buf_size));
+ if (!net_device->send_buf) {
+ netdev_err(ndev, "unable to allocate send "
+ "buffer of size %d\n", net_device->send_buf_size);
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ /* Establish the gpadl handle for this buffer on this
+ * channel. Note: This call uses the vmbus connection rather
+ * than the channel to establish the gpadl handle.
+ */
+ ret = vmbus_establish_gpadl(device->channel, net_device->send_buf,
+ net_device->send_buf_size,
+ &net_device->send_buf_gpadl_handle);
+ if (ret != 0) {
+ netdev_err(ndev,
+ "unable to establish send buffer's gpadl\n");
+ goto cleanup;
+ }
+
+ /* Notify the NetVsp of the gpadl handle */
+ init_packet = &net_device->channel_init_pkt;
+ memset(init_packet, 0, sizeof(struct nvsp_message));
+ init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF;
+ init_packet->msg.v1_msg.send_recv_buf.gpadl_handle =
+ net_device->send_buf_gpadl_handle;
+ init_packet->msg.v1_msg.send_recv_buf.id = 0;
+
+ /* Send the gpadl notification request */
+ ret = vmbus_sendpacket(device->channel, init_packet,
+ sizeof(struct nvsp_message),
+ (unsigned long)init_packet,
+ VM_PKT_DATA_INBAND,
+ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ if (ret != 0) {
+ netdev_err(ndev,
+ "unable to send send buffer's gpadl to netvsp\n");
+ goto cleanup;
+ }
+
+ t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
+ BUG_ON(t == 0);
+
+ /* Check the response */
+ if (init_packet->msg.v1_msg.
+ send_send_buf_complete.status != NVSP_STAT_SUCCESS) {
+ netdev_err(ndev, "Unable to complete send buffer "
+ "initialization with NetVsp - status %d\n",
+ init_packet->msg.v1_msg.
+ send_recv_buf_complete.status);
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
+ /* Parse the response */
+ net_device->send_section_size = init_packet->msg.
+ v1_msg.send_send_buf_complete.section_size;
+
+ /* Section count is simply the size divided by the section size.
+ */
+ net_device->send_section_cnt =
+ net_device->send_buf_size/net_device->send_section_size;
+
+ dev_info(&device->device, "Send section size: %d, Section count:%d\n",
+ net_device->send_section_size, net_device->send_section_cnt);
+
+ /* Setup state for managing the send buffer. */
+ net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt,
+ BITS_PER_LONG);
+
+ net_device->send_section_map =
+ kzalloc(net_device->map_words * sizeof(ulong), GFP_KERNEL);
+ if (net_device->send_section_map == NULL) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
goto exit;
cleanup:
- netvsc_destroy_recv_buf(net_device);
+ netvsc_destroy_buf(net_device);
exit:
return ret;
@@ -294,7 +425,7 @@ static int negotiate_nvsp_ver(struct hv_device *device,
NVSP_STAT_SUCCESS)
return -EINVAL;
- if (nvsp_ver != NVSP_PROTOCOL_VERSION_2)
+ if (nvsp_ver == NVSP_PROTOCOL_VERSION_1)
return 0;
/* NVSPv2 only: Send NDIS config */
@@ -318,6 +449,9 @@ static int netvsc_connect_vsp(struct hv_device *device)
struct nvsp_message *init_packet;
int ndis_version;
struct net_device *ndev;
+ u32 ver_list[] = { NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
+ NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5 };
+ int i, num_ver = 4; /* number of different NVSP versions */
net_device = get_outbound_net_device(device);
if (!net_device)
@@ -327,13 +461,14 @@ static int netvsc_connect_vsp(struct hv_device *device)
init_packet = &net_device->channel_init_pkt;
/* Negotiate the latest NVSP protocol supported */
- if (negotiate_nvsp_ver(device, net_device, init_packet,
- NVSP_PROTOCOL_VERSION_2) == 0) {
- net_device->nvsp_version = NVSP_PROTOCOL_VERSION_2;
- } else if (negotiate_nvsp_ver(device, net_device, init_packet,
- NVSP_PROTOCOL_VERSION_1) == 0) {
- net_device->nvsp_version = NVSP_PROTOCOL_VERSION_1;
- } else {
+ for (i = num_ver - 1; i >= 0; i--)
+ if (negotiate_nvsp_ver(device, net_device, init_packet,
+ ver_list[i]) == 0) {
+ net_device->nvsp_version = ver_list[i];
+ break;
+ }
+
+ if (i < 0) {
ret = -EPROTO;
goto cleanup;
}
@@ -343,7 +478,10 @@ static int netvsc_connect_vsp(struct hv_device *device)
/* Send the ndis version */
memset(init_packet, 0, sizeof(struct nvsp_message));
- ndis_version = 0x00050001;
+ if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_4)
+ ndis_version = 0x00060001;
+ else
+ ndis_version = 0x0006001e;
init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_NDIS_VER;
init_packet->msg.v1_msg.
@@ -362,7 +500,13 @@ static int netvsc_connect_vsp(struct hv_device *device)
goto cleanup;
/* Post the big receive buffer to NetVSP */
- ret = netvsc_init_recv_buf(device);
+ if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
+ net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
+ else
+ net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
+ net_device->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
+
+ ret = netvsc_init_buf(device);
cleanup:
return ret;
@@ -370,7 +514,7 @@ cleanup:
static void netvsc_disconnect_vsp(struct netvsc_device *net_device)
{
- netvsc_destroy_recv_buf(net_device);
+ netvsc_destroy_buf(net_device);
}
/*
@@ -379,7 +523,6 @@ static void netvsc_disconnect_vsp(struct netvsc_device *net_device)
int netvsc_device_remove(struct hv_device *device)
{
struct netvsc_device *net_device;
- struct hv_netvsc_packet *netvsc_packet, *pos;
unsigned long flags;
net_device = hv_get_drvdata(device);
@@ -408,11 +551,8 @@ int netvsc_device_remove(struct hv_device *device)
vmbus_close(device->channel);
/* Release all resources */
- list_for_each_entry_safe(netvsc_packet, pos,
- &net_device->recv_pkt_list, list_ent) {
- list_del(&netvsc_packet->list_ent);
- kfree(netvsc_packet);
- }
+ if (net_device->sub_cb_buf)
+ vfree(net_device->sub_cb_buf);
kfree(net_device);
return 0;
@@ -436,17 +576,21 @@ static inline u32 hv_ringbuf_avail_percent(
return avail_write * 100 / ring_info->ring_datasize;
}
-static void netvsc_send_completion(struct hv_device *device,
+static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
+ u32 index)
+{
+ sync_change_bit(index, net_device->send_section_map);
+}
+
+static void netvsc_send_completion(struct netvsc_device *net_device,
+ struct hv_device *device,
struct vmpacket_descriptor *packet)
{
- struct netvsc_device *net_device;
struct nvsp_message *nvsp_packet;
struct hv_netvsc_packet *nvsc_packet;
struct net_device *ndev;
+ u32 send_index;
- net_device = get_inbound_net_device(device);
- if (!net_device)
- return;
ndev = net_device->ndev;
nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
@@ -456,7 +600,9 @@ static void netvsc_send_completion(struct hv_device *device,
(nvsp_packet->hdr.msg_type ==
NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE) ||
(nvsp_packet->hdr.msg_type ==
- NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE)) {
+ NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE) ||
+ (nvsp_packet->hdr.msg_type ==
+ NVSP_MSG5_TYPE_SUBCHANNEL)) {
/* Copy the response back */
memcpy(&net_device->channel_init_pkt, nvsp_packet,
sizeof(struct nvsp_message));
@@ -464,28 +610,39 @@ static void netvsc_send_completion(struct hv_device *device,
} else if (nvsp_packet->hdr.msg_type ==
NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) {
int num_outstanding_sends;
+ u16 q_idx = 0;
+ struct vmbus_channel *channel = device->channel;
+ int queue_sends;
/* Get the send context */
nvsc_packet = (struct hv_netvsc_packet *)(unsigned long)
packet->trans_id;
/* Notify the layer above us */
- if (nvsc_packet)
- nvsc_packet->completion.send.send_completion(
- nvsc_packet->completion.send.
- send_completion_ctx);
+ if (nvsc_packet) {
+ send_index = nvsc_packet->send_buf_index;
+ if (send_index != NETVSC_INVALID_INDEX)
+ netvsc_free_send_slot(net_device, send_index);
+ q_idx = nvsc_packet->q_idx;
+ channel = nvsc_packet->channel;
+ nvsc_packet->send_completion(nvsc_packet->
+ send_completion_ctx);
+ }
num_outstanding_sends =
atomic_dec_return(&net_device->num_outstanding_sends);
+ queue_sends = atomic_dec_return(&net_device->
+ queue_sends[q_idx]);
if (net_device->destroy && num_outstanding_sends == 0)
wake_up(&net_device->wait_drain);
- if (netif_queue_stopped(ndev) && !net_device->start_remove &&
- (hv_ringbuf_avail_percent(&device->channel->outbound)
- > RING_AVAIL_PERCENT_HIWATER ||
- num_outstanding_sends < 1))
- netif_wake_queue(ndev);
+ if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
+ !net_device->start_remove &&
+ (hv_ringbuf_avail_percent(&channel->outbound) >
+ RING_AVAIL_PERCENT_HIWATER || queue_sends < 1))
+ netif_tx_wake_queue(netdev_get_tx_queue(
+ ndev, q_idx));
} else {
netdev_err(ndev, "Unknown send completion packet type- "
"%d received!!\n", nvsp_packet->hdr.msg_type);
@@ -493,6 +650,52 @@ static void netvsc_send_completion(struct hv_device *device,
}
+static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
+{
+ unsigned long index;
+ u32 max_words = net_device->map_words;
+ unsigned long *map_addr = (unsigned long *)net_device->send_section_map;
+ u32 section_cnt = net_device->send_section_cnt;
+ int ret_val = NETVSC_INVALID_INDEX;
+ int i;
+ int prev_val;
+
+ for (i = 0; i < max_words; i++) {
+ if (!~(map_addr[i]))
+ continue;
+ index = ffz(map_addr[i]);
+ prev_val = sync_test_and_set_bit(index, &map_addr[i]);
+ if (prev_val)
+ continue;
+ if ((index + (i * BITS_PER_LONG)) >= section_cnt)
+ break;
+ ret_val = (index + (i * BITS_PER_LONG));
+ break;
+ }
+ return ret_val;
+}
+
+u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
+ unsigned int section_index,
+ struct hv_netvsc_packet *packet)
+{
+ char *start = net_device->send_buf;
+ char *dest = (start + (section_index * net_device->send_section_size));
+ int i;
+ u32 msg_size = 0;
+
+ for (i = 0; i < packet->page_buf_cnt; i++) {
+ char *src = phys_to_virt(packet->page_buf[i].pfn << PAGE_SHIFT);
+ u32 offset = packet->page_buf[i].offset;
+ u32 len = packet->page_buf[i].len;
+
+ memcpy(dest, (src + offset), len);
+ msg_size += len;
+ dest += len;
+ }
+ return msg_size;
+}
+
int netvsc_send(struct hv_device *device,
struct hv_netvsc_packet *packet)
{
@@ -500,7 +703,12 @@ int netvsc_send(struct hv_device *device,
int ret = 0;
struct nvsp_message sendMessage;
struct net_device *ndev;
+ struct vmbus_channel *out_channel = NULL;
u64 req_id;
+ unsigned int section_index = NETVSC_INVALID_INDEX;
+ u32 msg_size = 0;
+ struct sk_buff *skb;
+
net_device = get_outbound_net_device(device);
if (!net_device)
@@ -516,25 +724,46 @@ int netvsc_send(struct hv_device *device,
sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 1;
}
- /* Not using send buffer section */
+ /* Attempt to send via sendbuf */
+ if (packet->total_data_buflen < net_device->send_section_size) {
+ section_index = netvsc_get_next_send_section(net_device);
+ if (section_index != NETVSC_INVALID_INDEX) {
+ msg_size = netvsc_copy_to_send_buf(net_device,
+ section_index,
+ packet);
+ skb = (struct sk_buff *)
+ (unsigned long)packet->send_completion_tid;
+ if (skb)
+ dev_kfree_skb_any(skb);
+ packet->page_buf_cnt = 0;
+ }
+ }
+ packet->send_buf_index = section_index;
+
+
sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_index =
- 0xFFFFFFFF;
- sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0;
+ section_index;
+ sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = msg_size;
- if (packet->completion.send.send_completion)
+ if (packet->send_completion)
req_id = (ulong)packet;
else
req_id = 0;
+ out_channel = net_device->chn_table[packet->q_idx];
+ if (out_channel == NULL)
+ out_channel = device->channel;
+ packet->channel = out_channel;
+
if (packet->page_buf_cnt) {
- ret = vmbus_sendpacket_pagebuffer(device->channel,
+ ret = vmbus_sendpacket_pagebuffer(out_channel,
packet->page_buf,
packet->page_buf_cnt,
&sendMessage,
sizeof(struct nvsp_message),
req_id);
} else {
- ret = vmbus_sendpacket(device->channel, &sendMessage,
+ ret = vmbus_sendpacket(out_channel, &sendMessage,
sizeof(struct nvsp_message),
req_id,
VM_PKT_DATA_INBAND,
@@ -543,17 +772,24 @@ int netvsc_send(struct hv_device *device,
if (ret == 0) {
atomic_inc(&net_device->num_outstanding_sends);
- if (hv_ringbuf_avail_percent(&device->channel->outbound) <
+ atomic_inc(&net_device->queue_sends[packet->q_idx]);
+
+ if (hv_ringbuf_avail_percent(&out_channel->outbound) <
RING_AVAIL_PERCENT_LOWATER) {
- netif_stop_queue(ndev);
+ netif_tx_stop_queue(netdev_get_tx_queue(
+ ndev, packet->q_idx));
+
if (atomic_read(&net_device->
- num_outstanding_sends) < 1)
- netif_wake_queue(ndev);
+ queue_sends[packet->q_idx]) < 1)
+ netif_tx_wake_queue(netdev_get_tx_queue(
+ ndev, packet->q_idx));
}
} else if (ret == -EAGAIN) {
- netif_stop_queue(ndev);
- if (atomic_read(&net_device->num_outstanding_sends) < 1) {
- netif_wake_queue(ndev);
+ netif_tx_stop_queue(netdev_get_tx_queue(
+ ndev, packet->q_idx));
+ if (atomic_read(&net_device->queue_sends[packet->q_idx]) < 1) {
+ netif_tx_wake_queue(netdev_get_tx_queue(
+ ndev, packet->q_idx));
ret = -ENOSPC;
}
} else {
@@ -565,13 +801,14 @@ int netvsc_send(struct hv_device *device,
}
static void netvsc_send_recv_completion(struct hv_device *device,
+ struct vmbus_channel *channel,
+ struct netvsc_device *net_device,
u64 transaction_id, u32 status)
{
struct nvsp_message recvcompMessage;
int retries = 0;
int ret;
struct net_device *ndev;
- struct netvsc_device *net_device = hv_get_drvdata(device);
ndev = net_device->ndev;
@@ -582,7 +819,7 @@ static void netvsc_send_recv_completion(struct hv_device *device,
retry_send_cmplt:
/* Send the completion */
- ret = vmbus_sendpacket(device->channel, &recvcompMessage,
+ ret = vmbus_sendpacket(channel, &recvcompMessage,
sizeof(struct nvsp_message), transaction_id,
VM_PKT_COMP, 0);
if (ret == 0) {
@@ -608,78 +845,20 @@ retry_send_cmplt:
}
}
-/* Send a receive completion packet to RNDIS device (ie NetVsp) */
-static void netvsc_receive_completion(void *context)
+static void netvsc_receive(struct netvsc_device *net_device,
+ struct vmbus_channel *channel,
+ struct hv_device *device,
+ struct vmpacket_descriptor *packet)
{
- struct hv_netvsc_packet *packet = context;
- struct hv_device *device = packet->device;
- struct netvsc_device *net_device;
- u64 transaction_id = 0;
- bool fsend_receive_comp = false;
- unsigned long flags;
- struct net_device *ndev;
- u32 status = NVSP_STAT_NONE;
-
- /*
- * Even though it seems logical to do a GetOutboundNetDevice() here to
- * send out receive completion, we are using GetInboundNetDevice()
- * since we may have disable outbound traffic already.
- */
- net_device = get_inbound_net_device(device);
- if (!net_device)
- return;
- ndev = net_device->ndev;
-
- /* Overloading use of the lock. */
- spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags);
-
- if (packet->status != NVSP_STAT_SUCCESS)
- packet->xfer_page_pkt->status = NVSP_STAT_FAIL;
-
- packet->xfer_page_pkt->count--;
-
- /*
- * Last one in the line that represent 1 xfer page packet.
- * Return the xfer page packet itself to the freelist
- */
- if (packet->xfer_page_pkt->count == 0) {
- fsend_receive_comp = true;
- transaction_id = packet->completion.recv.recv_completion_tid;
- status = packet->xfer_page_pkt->status;
- list_add_tail(&packet->xfer_page_pkt->list_ent,
- &net_device->recv_pkt_list);
-
- }
-
- /* Put the packet back */
- list_add_tail(&packet->list_ent, &net_device->recv_pkt_list);
- spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, flags);
-
- /* Send a receive completion for the xfer page packet */
- if (fsend_receive_comp)
- netvsc_send_recv_completion(device, transaction_id, status);
-
-}
-
-static void netvsc_receive(struct hv_device *device,
- struct vmpacket_descriptor *packet)
-{
- struct netvsc_device *net_device;
struct vmtransfer_page_packet_header *vmxferpage_packet;
struct nvsp_message *nvsp_packet;
- struct hv_netvsc_packet *netvsc_packet = NULL;
- /* struct netvsc_driver *netvscDriver; */
- struct xferpage_packet *xferpage_packet = NULL;
+ struct hv_netvsc_packet nv_pkt;
+ struct hv_netvsc_packet *netvsc_packet = &nv_pkt;
+ u32 status = NVSP_STAT_SUCCESS;
int i;
int count = 0;
- unsigned long flags;
struct net_device *ndev;
- LIST_HEAD(listHead);
-
- net_device = get_inbound_net_device(device);
- if (!net_device)
- return;
ndev = net_device->ndev;
/*
@@ -712,77 +891,14 @@ static void netvsc_receive(struct hv_device *device,
return;
}
- /*
- * Grab free packets (range count + 1) to represent this xfer
- * page packet. +1 to represent the xfer page packet itself.
- * We grab it here so that we know exactly how many we can
- * fulfil
- */
- spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags);
- while (!list_empty(&net_device->recv_pkt_list)) {
- list_move_tail(net_device->recv_pkt_list.next, &listHead);
- if (++count == vmxferpage_packet->range_cnt + 1)
- break;
- }
- spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, flags);
-
- /*
- * We need at least 2 netvsc pkts (1 to represent the xfer
- * page and at least 1 for the range) i.e. we can handled
- * some of the xfer page packet ranges...
- */
- if (count < 2) {
- netdev_err(ndev, "Got only %d netvsc pkt...needed "
- "%d pkts. Dropping this xfer page packet completely!\n",
- count, vmxferpage_packet->range_cnt + 1);
-
- /* Return it to the freelist */
- spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags);
- for (i = count; i != 0; i--) {
- list_move_tail(listHead.next,
- &net_device->recv_pkt_list);
- }
- spin_unlock_irqrestore(&net_device->recv_pkt_list_lock,
- flags);
-
- netvsc_send_recv_completion(device,
- vmxferpage_packet->d.trans_id,
- NVSP_STAT_FAIL);
-
- return;
- }
-
- /* Remove the 1st packet to represent the xfer page packet itself */
- xferpage_packet = (struct xferpage_packet *)listHead.next;
- list_del(&xferpage_packet->list_ent);
- xferpage_packet->status = NVSP_STAT_SUCCESS;
-
- /* This is how much we can satisfy */
- xferpage_packet->count = count - 1;
-
- if (xferpage_packet->count != vmxferpage_packet->range_cnt) {
- netdev_err(ndev, "Needed %d netvsc pkts to satisfy "
- "this xfer page...got %d\n",
- vmxferpage_packet->range_cnt, xferpage_packet->count);
- }
+ count = vmxferpage_packet->range_cnt;
+ netvsc_packet->device = device;
+ netvsc_packet->channel = channel;
/* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
- for (i = 0; i < (count - 1); i++) {
- netvsc_packet = (struct hv_netvsc_packet *)listHead.next;
- list_del(&netvsc_packet->list_ent);
-
+ for (i = 0; i < count; i++) {
/* Initialize the netvsc packet */
netvsc_packet->status = NVSP_STAT_SUCCESS;
- netvsc_packet->xfer_page_pkt = xferpage_packet;
- netvsc_packet->completion.recv.recv_completion =
- netvsc_receive_completion;
- netvsc_packet->completion.recv.recv_completion_ctx =
- netvsc_packet;
- netvsc_packet->device = device;
- /* Save this so that we can send it back */
- netvsc_packet->completion.recv.recv_completion_tid =
- vmxferpage_packet->d.trans_id;
-
netvsc_packet->data = (void *)((unsigned long)net_device->
recv_buf + vmxferpage_packet->ranges[i].byte_offset);
netvsc_packet->total_data_buflen =
@@ -791,49 +907,91 @@ static void netvsc_receive(struct hv_device *device,
/* Pass it to the upper layer */
rndis_filter_receive(device, netvsc_packet);
- netvsc_receive_completion(netvsc_packet->
- completion.recv.recv_completion_ctx);
+ if (netvsc_packet->status != NVSP_STAT_SUCCESS)
+ status = NVSP_STAT_FAIL;
+ }
+
+ netvsc_send_recv_completion(device, channel, net_device,
+ vmxferpage_packet->d.trans_id, status);
+}
+
+
+static void netvsc_send_table(struct hv_device *hdev,
+ struct vmpacket_descriptor *vmpkt)
+{
+ struct netvsc_device *nvscdev;
+ struct net_device *ndev;
+ struct nvsp_message *nvmsg;
+ int i;
+ u32 count, *tab;
+
+ nvscdev = get_outbound_net_device(hdev);
+ if (!nvscdev)
+ return;
+ ndev = nvscdev->ndev;
+
+ nvmsg = (struct nvsp_message *)((unsigned long)vmpkt +
+ (vmpkt->offset8 << 3));
+
+ if (nvmsg->hdr.msg_type != NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE)
+ return;
+
+ count = nvmsg->msg.v5_msg.send_table.count;
+ if (count != VRSS_SEND_TAB_SIZE) {
+ netdev_err(ndev, "Received wrong send-table size:%u\n", count);
+ return;
}
+ tab = (u32 *)((unsigned long)&nvmsg->msg.v5_msg.send_table +
+ nvmsg->msg.v5_msg.send_table.offset);
+
+ for (i = 0; i < count; i++)
+ nvscdev->send_table[i] = tab[i];
}
-static void netvsc_channel_cb(void *context)
+void netvsc_channel_cb(void *context)
{
int ret;
- struct hv_device *device = context;
+ struct vmbus_channel *channel = (struct vmbus_channel *)context;
+ struct hv_device *device;
struct netvsc_device *net_device;
u32 bytes_recvd;
u64 request_id;
- unsigned char *packet;
struct vmpacket_descriptor *desc;
unsigned char *buffer;
int bufferlen = NETVSC_PACKET_SIZE;
struct net_device *ndev;
- packet = kzalloc(NETVSC_PACKET_SIZE * sizeof(unsigned char),
- GFP_ATOMIC);
- if (!packet)
- return;
- buffer = packet;
+ if (channel->primary_channel != NULL)
+ device = channel->primary_channel->device_obj;
+ else
+ device = channel->device_obj;
net_device = get_inbound_net_device(device);
if (!net_device)
- goto out;
+ return;
ndev = net_device->ndev;
+ buffer = get_per_channel_state(channel);
do {
- ret = vmbus_recvpacket_raw(device->channel, buffer, bufferlen,
+ ret = vmbus_recvpacket_raw(channel, buffer, bufferlen,
&bytes_recvd, &request_id);
if (ret == 0) {
if (bytes_recvd > 0) {
desc = (struct vmpacket_descriptor *)buffer;
switch (desc->type) {
case VM_PKT_COMP:
- netvsc_send_completion(device, desc);
+ netvsc_send_completion(net_device,
+ device, desc);
break;
case VM_PKT_DATA_USING_XFER_PAGES:
- netvsc_receive(device, desc);
+ netvsc_receive(net_device, channel,
+ device, desc);
+ break;
+
+ case VM_PKT_DATA_INBAND:
+ netvsc_send_table(device, desc);
break;
default:
@@ -845,23 +1003,16 @@ static void netvsc_channel_cb(void *context)
break;
}
- /* reset */
- if (bufferlen > NETVSC_PACKET_SIZE) {
- kfree(buffer);
- buffer = packet;
- bufferlen = NETVSC_PACKET_SIZE;
- }
} else {
- /* reset */
- if (bufferlen > NETVSC_PACKET_SIZE) {
- kfree(buffer);
- buffer = packet;
- bufferlen = NETVSC_PACKET_SIZE;
- }
-
+ /*
+ * We are done for this pass.
+ */
break;
}
+
} else if (ret == -ENOBUFS) {
+ if (bufferlen > NETVSC_PACKET_SIZE)
+ kfree(buffer);
/* Handle large packet */
buffer = kmalloc(bytes_recvd, GFP_ATOMIC);
if (buffer == NULL) {
@@ -876,8 +1027,8 @@ static void netvsc_channel_cb(void *context)
}
} while (1);
-out:
- kfree(buffer);
+ if (bufferlen > NETVSC_PACKET_SIZE)
+ kfree(buffer);
return;
}
@@ -888,11 +1039,9 @@ out:
int netvsc_device_add(struct hv_device *device, void *additional_info)
{
int ret = 0;
- int i;
int ring_size =
((struct netvsc_device_info *)additional_info)->ring_size;
struct netvsc_device *net_device;
- struct hv_netvsc_packet *packet, *pos;
struct net_device *ndev;
net_device = alloc_net_device(device);
@@ -901,6 +1050,8 @@ int netvsc_device_add(struct hv_device *device, void *additional_info)
goto cleanup;
}
+ net_device->ring_size = ring_size;
+
/*
* Coming into this function, struct net_device * is
* registered as the driver private data.
@@ -911,25 +1062,14 @@ int netvsc_device_add(struct hv_device *device, void *additional_info)
ndev = net_device->ndev;
/* Initialize the NetVSC channel extension */
- net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
- spin_lock_init(&net_device->recv_pkt_list_lock);
-
- INIT_LIST_HEAD(&net_device->recv_pkt_list);
-
- for (i = 0; i < NETVSC_RECEIVE_PACKETLIST_COUNT; i++) {
- packet = kzalloc(sizeof(struct hv_netvsc_packet), GFP_KERNEL);
- if (!packet)
- break;
-
- list_add_tail(&packet->list_ent,
- &net_device->recv_pkt_list);
- }
init_completion(&net_device->channel_init_wait);
+ set_per_channel_state(device->channel, net_device->cb_buffer);
+
/* Open the channel */
ret = vmbus_open(device->channel, ring_size * PAGE_SIZE,
ring_size * PAGE_SIZE, NULL, 0,
- netvsc_channel_cb, device);
+ netvsc_channel_cb, device->channel);
if (ret != 0) {
netdev_err(ndev, "unable to open channel: %d\n", ret);
@@ -939,6 +1079,8 @@ int netvsc_device_add(struct hv_device *device, void *additional_info)
/* Channel is opened */
pr_info("hv_netvsc channel opened successfully\n");
+ net_device->chn_table[0] = device->channel;
+
/* Connect with the NetVsp */
ret = netvsc_connect_vsp(device);
if (ret != 0) {
@@ -955,16 +1097,8 @@ close:
cleanup:
- if (net_device) {
- list_for_each_entry_safe(packet, pos,
- &net_device->recv_pkt_list,
- list_ent) {
- list_del(&packet->list_ent);
- kfree(packet);
- }
-
+ if (net_device)
kfree(net_device);
- }
return ret;
}
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 524f713f601..4fd71b75e66 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -11,8 +11,7 @@
* more details.
*
* You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
+ * this program; if not, see <http://www.gnu.org/licenses/>.
*
* Authors:
* Haiyang Zhang <haiyangz@microsoft.com>
@@ -89,8 +88,12 @@ static int netvsc_open(struct net_device *net)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
struct hv_device *device_obj = net_device_ctx->device_ctx;
+ struct netvsc_device *nvdev;
+ struct rndis_device *rdev;
int ret = 0;
+ netif_carrier_off(net);
+
/* Open up the device */
ret = rndis_filter_open(device_obj);
if (ret != 0) {
@@ -98,7 +101,12 @@ static int netvsc_open(struct net_device *net)
return ret;
}
- netif_start_queue(net);
+ netif_tx_start_all_queues(net);
+
+ nvdev = hv_get_drvdata(device_obj);
+ rdev = nvdev->extension;
+ if (!rdev->link_state)
+ netif_carrier_on(net);
return ret;
}
@@ -120,35 +128,285 @@ static int netvsc_close(struct net_device *net)
return ret;
}
+static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size,
+ int pkt_type)
+{
+ struct rndis_packet *rndis_pkt;
+ struct rndis_per_packet_info *ppi;
+
+ rndis_pkt = &msg->msg.pkt;
+ rndis_pkt->data_offset += ppi_size;
+
+ ppi = (struct rndis_per_packet_info *)((void *)rndis_pkt +
+ rndis_pkt->per_pkt_info_offset + rndis_pkt->per_pkt_info_len);
+
+ ppi->size = ppi_size;
+ ppi->type = pkt_type;
+ ppi->ppi_offset = sizeof(struct rndis_per_packet_info);
+
+ rndis_pkt->per_pkt_info_len += ppi_size;
+
+ return ppi;
+}
+
+union sub_key {
+ u64 k;
+ struct {
+ u8 pad[3];
+ u8 kb;
+ u32 ka;
+ };
+};
+
+/* Toeplitz hash function
+ * data: network byte order
+ * return: host byte order
+ */
+static u32 comp_hash(u8 *key, int klen, u8 *data, int dlen)
+{
+ union sub_key subk;
+ int k_next = 4;
+ u8 dt;
+ int i, j;
+ u32 ret = 0;
+
+ subk.k = 0;
+ subk.ka = ntohl(*(u32 *)key);
+
+ for (i = 0; i < dlen; i++) {
+ subk.kb = key[k_next];
+ k_next = (k_next + 1) % klen;
+ dt = data[i];
+ for (j = 0; j < 8; j++) {
+ if (dt & 0x80)
+ ret ^= subk.ka;
+ dt <<= 1;
+ subk.k <<= 1;
+ }
+ }
+
+ return ret;
+}
+
+static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb)
+{
+ struct iphdr *iphdr;
+ int data_len;
+ bool ret = false;
+
+ if (eth_hdr(skb)->h_proto != htons(ETH_P_IP))
+ return false;
+
+ iphdr = ip_hdr(skb);
+
+ if (iphdr->version == 4) {
+ if (iphdr->protocol == IPPROTO_TCP)
+ data_len = 12;
+ else
+ data_len = 8;
+ *hash = comp_hash(netvsc_hash_key, HASH_KEYLEN,
+ (u8 *)&iphdr->saddr, data_len);
+ ret = true;
+ }
+
+ return ret;
+}
+
+static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
+ void *accel_priv, select_queue_fallback_t fallback)
+{
+ struct net_device_context *net_device_ctx = netdev_priv(ndev);
+ struct hv_device *hdev = net_device_ctx->device_ctx;
+ struct netvsc_device *nvsc_dev = hv_get_drvdata(hdev);
+ u32 hash;
+ u16 q_idx = 0;
+
+ if (nvsc_dev == NULL || ndev->real_num_tx_queues <= 1)
+ return 0;
+
+ if (netvsc_set_hash(&hash, skb)) {
+ q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] %
+ ndev->real_num_tx_queues;
+ skb_set_hash(skb, hash, PKT_HASH_TYPE_L3);
+ }
+
+ return q_idx;
+}
+
static void netvsc_xmit_completion(void *context)
{
struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context;
struct sk_buff *skb = (struct sk_buff *)
- (unsigned long)packet->completion.send.send_completion_tid;
+ (unsigned long)packet->send_completion_tid;
+ u32 index = packet->send_buf_index;
kfree(packet);
- if (skb)
+ if (skb && (index == NETVSC_INVALID_INDEX))
dev_kfree_skb_any(skb);
}
+static u32 fill_pg_buf(struct page *page, u32 offset, u32 len,
+ struct hv_page_buffer *pb)
+{
+ int j = 0;
+
+ /* Deal with compund pages by ignoring unused part
+ * of the page.
+ */
+ page += (offset >> PAGE_SHIFT);
+ offset &= ~PAGE_MASK;
+
+ while (len > 0) {
+ unsigned long bytes;
+
+ bytes = PAGE_SIZE - offset;
+ if (bytes > len)
+ bytes = len;
+ pb[j].pfn = page_to_pfn(page);
+ pb[j].offset = offset;
+ pb[j].len = bytes;
+
+ offset += bytes;
+ len -= bytes;
+
+ if (offset == PAGE_SIZE && len) {
+ page++;
+ offset = 0;
+ j++;
+ }
+ }
+
+ return j + 1;
+}
+
+static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb,
+ struct hv_page_buffer *pb)
+{
+ u32 slots_used = 0;
+ char *data = skb->data;
+ int frags = skb_shinfo(skb)->nr_frags;
+ int i;
+
+ /* The packet is laid out thus:
+ * 1. hdr
+ * 2. skb linear data
+ * 3. skb fragment data
+ */
+ if (hdr != NULL)
+ slots_used += fill_pg_buf(virt_to_page(hdr),
+ offset_in_page(hdr),
+ len, &pb[slots_used]);
+
+ slots_used += fill_pg_buf(virt_to_page(data),
+ offset_in_page(data),
+ skb_headlen(skb), &pb[slots_used]);
+
+ for (i = 0; i < frags; i++) {
+ skb_frag_t *frag = skb_shinfo(skb)->frags + i;
+
+ slots_used += fill_pg_buf(skb_frag_page(frag),
+ frag->page_offset,
+ skb_frag_size(frag), &pb[slots_used]);
+ }
+ return slots_used;
+}
+
+static int count_skb_frag_slots(struct sk_buff *skb)
+{
+ int i, frags = skb_shinfo(skb)->nr_frags;
+ int pages = 0;
+
+ for (i = 0; i < frags; i++) {
+ skb_frag_t *frag = skb_shinfo(skb)->frags + i;
+ unsigned long size = skb_frag_size(frag);
+ unsigned long offset = frag->page_offset;
+
+ /* Skip unused frames from start of page */
+ offset &= ~PAGE_MASK;
+ pages += PFN_UP(offset + size);
+ }
+ return pages;
+}
+
+static int netvsc_get_slots(struct sk_buff *skb)
+{
+ char *data = skb->data;
+ unsigned int offset = offset_in_page(data);
+ unsigned int len = skb_headlen(skb);
+ int slots;
+ int frag_slots;
+
+ slots = DIV_ROUND_UP(offset + len, PAGE_SIZE);
+ frag_slots = count_skb_frag_slots(skb);
+ return slots + frag_slots;
+}
+
+static u32 get_net_transport_info(struct sk_buff *skb, u32 *trans_off)
+{
+ u32 ret_val = TRANSPORT_INFO_NOT_IP;
+
+ if ((eth_hdr(skb)->h_proto != htons(ETH_P_IP)) &&
+ (eth_hdr(skb)->h_proto != htons(ETH_P_IPV6))) {
+ goto not_ip;
+ }
+
+ *trans_off = skb_transport_offset(skb);
+
+ if ((eth_hdr(skb)->h_proto == htons(ETH_P_IP))) {
+ struct iphdr *iphdr = ip_hdr(skb);
+
+ if (iphdr->protocol == IPPROTO_TCP)
+ ret_val = TRANSPORT_INFO_IPV4_TCP;
+ else if (iphdr->protocol == IPPROTO_UDP)
+ ret_val = TRANSPORT_INFO_IPV4_UDP;
+ } else {
+ if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
+ ret_val = TRANSPORT_INFO_IPV6_TCP;
+ else if (ipv6_hdr(skb)->nexthdr == IPPROTO_UDP)
+ ret_val = TRANSPORT_INFO_IPV6_UDP;
+ }
+
+not_ip:
+ return ret_val;
+}
+
static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
struct hv_netvsc_packet *packet;
int ret;
- unsigned int i, num_pages, npg_data;
-
- /* Add multipages for skb->data and additional 2 for RNDIS */
- npg_data = (((unsigned long)skb->data + skb_headlen(skb) - 1)
- >> PAGE_SHIFT) - ((unsigned long)skb->data >> PAGE_SHIFT) + 1;
- num_pages = skb_shinfo(skb)->nr_frags + npg_data + 2;
+ unsigned int num_data_pgs;
+ struct rndis_message *rndis_msg;
+ struct rndis_packet *rndis_pkt;
+ u32 rndis_msg_size;
+ bool isvlan;
+ struct rndis_per_packet_info *ppi;
+ struct ndis_tcp_ip_checksum_info *csum_info;
+ struct ndis_tcp_lso_info *lso_info;
+ int hdr_offset;
+ u32 net_trans_info;
+ u32 hash;
+
+
+ /* We will atmost need two pages to describe the rndis
+ * header. We can only transmit MAX_PAGE_BUFFER_COUNT number
+ * of pages in a single packet.
+ */
+ num_data_pgs = netvsc_get_slots(skb) + 2;
+ if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) {
+ netdev_err(net, "Packet too big: %u\n", skb->len);
+ dev_kfree_skb(skb);
+ net->stats.tx_dropped++;
+ return NETDEV_TX_OK;
+ }
/* Allocate a netvsc packet based on # of frags. */
packet = kzalloc(sizeof(struct hv_netvsc_packet) +
- (num_pages * sizeof(struct hv_page_buffer)) +
- sizeof(struct rndis_filter_packet) +
- NDIS_VLAN_PPI_SIZE, GFP_ATOMIC);
+ (num_data_pgs * sizeof(struct hv_page_buffer)) +
+ sizeof(struct rndis_message) +
+ NDIS_VLAN_PPI_SIZE + NDIS_CSUM_PPI_SIZE +
+ NDIS_LSO_PPI_SIZE + NDIS_HASH_PPI_SIZE, GFP_ATOMIC);
if (!packet) {
/* out of memory, drop packet */
netdev_err(net, "unable to allocate hv_netvsc_packet\n");
@@ -160,53 +418,149 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
packet->vlan_tci = skb->vlan_tci;
- packet->extension = (void *)(unsigned long)packet +
+ packet->q_idx = skb_get_queue_mapping(skb);
+
+ packet->is_data_pkt = true;
+ packet->total_data_buflen = skb->len;
+
+ packet->rndis_msg = (struct rndis_message *)((unsigned long)packet +
sizeof(struct hv_netvsc_packet) +
- (num_pages * sizeof(struct hv_page_buffer));
+ (num_data_pgs * sizeof(struct hv_page_buffer)));
- /* If the rndis msg goes beyond 1 page, we will add 1 later */
- packet->page_buf_cnt = num_pages - 1;
+ /* Set the completion routine */
+ packet->send_completion = netvsc_xmit_completion;
+ packet->send_completion_ctx = packet;
+ packet->send_completion_tid = (unsigned long)skb;
+
+ isvlan = packet->vlan_tci & VLAN_TAG_PRESENT;
+
+ /* Add the rndis header */
+ rndis_msg = packet->rndis_msg;
+ rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET;
+ rndis_msg->msg_len = packet->total_data_buflen;
+ rndis_pkt = &rndis_msg->msg.pkt;
+ rndis_pkt->data_offset = sizeof(struct rndis_packet);
+ rndis_pkt->data_len = packet->total_data_buflen;
+ rndis_pkt->per_pkt_info_offset = sizeof(struct rndis_packet);
+
+ rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet);
+
+ hash = skb_get_hash_raw(skb);
+ if (hash != 0 && net->real_num_tx_queues > 1) {
+ rndis_msg_size += NDIS_HASH_PPI_SIZE;
+ ppi = init_ppi_data(rndis_msg, NDIS_HASH_PPI_SIZE,
+ NBL_HASH_VALUE);
+ *(u32 *)((void *)ppi + ppi->ppi_offset) = hash;
+ }
- /* Initialize it from the skb */
- packet->total_data_buflen = skb->len;
+ if (isvlan) {
+ struct ndis_pkt_8021q_info *vlan;
+
+ rndis_msg_size += NDIS_VLAN_PPI_SIZE;
+ ppi = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE,
+ IEEE_8021Q_INFO);
+ vlan = (struct ndis_pkt_8021q_info *)((void *)ppi +
+ ppi->ppi_offset);
+ vlan->vlanid = packet->vlan_tci & VLAN_VID_MASK;
+ vlan->pri = (packet->vlan_tci & VLAN_PRIO_MASK) >>
+ VLAN_PRIO_SHIFT;
+ }
+
+ net_trans_info = get_net_transport_info(skb, &hdr_offset);
+ if (net_trans_info == TRANSPORT_INFO_NOT_IP)
+ goto do_send;
+
+ /*
+ * Setup the sendside checksum offload only if this is not a
+ * GSO packet.
+ */
+ if (skb_is_gso(skb))
+ goto do_lso;
+
+ if ((skb->ip_summed == CHECKSUM_NONE) ||
+ (skb->ip_summed == CHECKSUM_UNNECESSARY))
+ goto do_send;
+
+ rndis_msg_size += NDIS_CSUM_PPI_SIZE;
+ ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE,
+ TCPIP_CHKSUM_PKTINFO);
+
+ csum_info = (struct ndis_tcp_ip_checksum_info *)((void *)ppi +
+ ppi->ppi_offset);
- /* Start filling in the page buffers starting after RNDIS buffer. */
- packet->page_buf[1].pfn = virt_to_phys(skb->data) >> PAGE_SHIFT;
- packet->page_buf[1].offset
- = (unsigned long)skb->data & (PAGE_SIZE - 1);
- if (npg_data == 1)
- packet->page_buf[1].len = skb_headlen(skb);
+ if (net_trans_info & (INFO_IPV4 << 16))
+ csum_info->transmit.is_ipv4 = 1;
else
- packet->page_buf[1].len = PAGE_SIZE
- - packet->page_buf[1].offset;
-
- for (i = 2; i <= npg_data; i++) {
- packet->page_buf[i].pfn = virt_to_phys(skb->data
- + PAGE_SIZE * (i-1)) >> PAGE_SHIFT;
- packet->page_buf[i].offset = 0;
- packet->page_buf[i].len = PAGE_SIZE;
+ csum_info->transmit.is_ipv6 = 1;
+
+ if (net_trans_info & INFO_TCP) {
+ csum_info->transmit.tcp_checksum = 1;
+ csum_info->transmit.tcp_header_offset = hdr_offset;
+ } else if (net_trans_info & INFO_UDP) {
+ /* UDP checksum offload is not supported on ws2008r2.
+ * Furthermore, on ws2012 and ws2012r2, there are some
+ * issues with udp checksum offload from Linux guests.
+ * (these are host issues).
+ * For now compute the checksum here.
+ */
+ struct udphdr *uh;
+ u16 udp_len;
+
+ ret = skb_cow_head(skb, 0);
+ if (ret)
+ goto drop;
+
+ uh = udp_hdr(skb);
+ udp_len = ntohs(uh->len);
+ uh->check = 0;
+ uh->check = csum_tcpudp_magic(ip_hdr(skb)->saddr,
+ ip_hdr(skb)->daddr,
+ udp_len, IPPROTO_UDP,
+ csum_partial(uh, udp_len, 0));
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+
+ csum_info->transmit.udp_checksum = 0;
}
- if (npg_data > 1)
- packet->page_buf[npg_data].len = (((unsigned long)skb->data
- + skb_headlen(skb) - 1) & (PAGE_SIZE - 1)) + 1;
-
- /* Additional fragments are after SKB data */
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
- const skb_frag_t *f = &skb_shinfo(skb)->frags[i];
-
- packet->page_buf[i+npg_data+1].pfn =
- page_to_pfn(skb_frag_page(f));
- packet->page_buf[i+npg_data+1].offset = f->page_offset;
- packet->page_buf[i+npg_data+1].len = skb_frag_size(f);
+ goto do_send;
+
+do_lso:
+ rndis_msg_size += NDIS_LSO_PPI_SIZE;
+ ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE,
+ TCP_LARGESEND_PKTINFO);
+
+ lso_info = (struct ndis_tcp_lso_info *)((void *)ppi +
+ ppi->ppi_offset);
+
+ lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
+ if (net_trans_info & (INFO_IPV4 << 16)) {
+ lso_info->lso_v2_transmit.ip_version =
+ NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4;
+ ip_hdr(skb)->tot_len = 0;
+ ip_hdr(skb)->check = 0;
+ tcp_hdr(skb)->check =
+ ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
+ ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
+ } else {
+ lso_info->lso_v2_transmit.ip_version =
+ NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6;
+ ipv6_hdr(skb)->payload_len = 0;
+ tcp_hdr(skb)->check =
+ ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
}
+ lso_info->lso_v2_transmit.tcp_header_offset = hdr_offset;
+ lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size;
- /* Set the completion routine */
- packet->completion.send.send_completion = netvsc_xmit_completion;
- packet->completion.send.send_completion_ctx = packet;
- packet->completion.send.send_completion_tid = (unsigned long)skb;
+do_send:
+ /* Start filling in the page buffers with the rndis hdr */
+ rndis_msg->msg_len += rndis_msg_size;
+ packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size,
+ skb, &packet->page_buf[0]);
- ret = rndis_filter_send(net_device_ctx->device_ctx,
- packet);
+ ret = netvsc_send(net_device_ctx->device_ctx, packet);
+
+drop:
if (ret == 0) {
net->stats.tx_bytes += skb->len;
net->stats.tx_packets++;
@@ -230,23 +584,24 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj,
struct net_device *net;
struct net_device_context *ndev_ctx;
struct netvsc_device *net_device;
+ struct rndis_device *rdev;
net_device = hv_get_drvdata(device_obj);
+ rdev = net_device->extension;
+
+ rdev->link_state = status != 1;
+
net = net_device->ndev;
- if (!net) {
- netdev_err(net, "got link status but net device "
- "not initialized yet\n");
+ if (!net || net->reg_state != NETREG_REGISTERED)
return;
- }
+ ndev_ctx = netdev_priv(net);
if (status == 1) {
- netif_carrier_on(net);
- ndev_ctx = netdev_priv(net);
schedule_delayed_work(&ndev_ctx->dwork, 0);
schedule_delayed_work(&ndev_ctx->dwork, msecs_to_jiffies(20));
} else {
- netif_carrier_off(net);
+ schedule_delayed_work(&ndev_ctx->dwork, 0);
}
}
@@ -255,15 +610,14 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj,
* "wire" on the specified device.
*/
int netvsc_recv_callback(struct hv_device *device_obj,
- struct hv_netvsc_packet *packet)
+ struct hv_netvsc_packet *packet,
+ struct ndis_tcp_ip_checksum_info *csum_info)
{
struct net_device *net;
struct sk_buff *skb;
net = ((struct netvsc_device *)hv_get_drvdata(device_obj))->ndev;
- if (!net) {
- netdev_err(net, "got receive callback but net device"
- " not initialized yet\n");
+ if (!net || net->reg_state != NETREG_REGISTERED) {
packet->status = NVSP_STAT_FAIL;
return 0;
}
@@ -284,11 +638,24 @@ int netvsc_recv_callback(struct hv_device *device_obj,
packet->total_data_buflen);
skb->protocol = eth_type_trans(skb, net);
- skb->ip_summed = CHECKSUM_NONE;
+ if (csum_info) {
+ /* We only look at the IP checksum here.
+ * Should we be dropping the packet if checksum
+ * failed? How do we deal with other checksums - TCP/UDP?
+ */
+ if (csum_info->receive.ip_checksum_succeeded)
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ else
+ skb->ip_summed = CHECKSUM_NONE;
+ }
+
if (packet->vlan_tci & VLAN_TAG_PRESENT)
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
packet->vlan_tci);
+ skb_record_rx_queue(skb, packet->channel->
+ offermsg.offer.sub_channel_index);
+
net->stats.rx_packets++;
net->stats.rx_bytes += packet->total_data_buflen;
@@ -320,14 +687,13 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
if (nvdev == NULL || nvdev->destroy)
return -ENODEV;
- if (nvdev->nvsp_version == NVSP_PROTOCOL_VERSION_2)
+ if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2)
limit = NETVSC_MTU;
if (mtu < 68 || mtu > limit)
return -EINVAL;
nvdev->start_remove = true;
- cancel_delayed_work_sync(&ndevctx->dwork);
cancel_work_sync(&ndevctx->work);
netif_tx_disable(ndev);
rndis_filter_device_remove(hdev);
@@ -338,7 +704,7 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
hv_set_drvdata(hdev, ndev);
device_info.ring_size = ring_size;
rndis_filter_device_add(hdev, &device_info);
- netif_wake_queue(ndev);
+ netif_tx_wake_all_queues(ndev);
return 0;
}
@@ -384,6 +750,7 @@ static const struct net_device_ops device_ops = {
.ndo_change_mtu = netvsc_change_mtu,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = netvsc_set_mac_addr,
+ .ndo_select_queue = netvsc_select_queue,
};
/*
@@ -392,17 +759,35 @@ static const struct net_device_ops device_ops = {
* current context when receiving RNDIS_STATUS_MEDIA_CONNECT event. So, add
* another netif_notify_peers() into a delayed work, otherwise GARP packet
* will not be sent after quick migration, and cause network disconnection.
+ * Also, we update the carrier status here.
*/
-static void netvsc_send_garp(struct work_struct *w)
+static void netvsc_link_change(struct work_struct *w)
{
struct net_device_context *ndev_ctx;
struct net_device *net;
struct netvsc_device *net_device;
+ struct rndis_device *rdev;
+ bool notify;
+
+ rtnl_lock();
ndev_ctx = container_of(w, struct net_device_context, dwork.work);
net_device = hv_get_drvdata(ndev_ctx->device_ctx);
+ rdev = net_device->extension;
net = net_device->ndev;
- netdev_notify_peers(net);
+
+ if (rdev->link_state) {
+ netif_carrier_off(net);
+ notify = false;
+ } else {
+ netif_carrier_on(net);
+ notify = true;
+ }
+
+ rtnl_unlock();
+
+ if (notify)
+ netdev_notify_peers(net);
}
@@ -412,52 +797,56 @@ static int netvsc_probe(struct hv_device *dev,
struct net_device *net = NULL;
struct net_device_context *net_device_ctx;
struct netvsc_device_info device_info;
+ struct netvsc_device *nvdev;
int ret;
- net = alloc_etherdev(sizeof(struct net_device_context));
+ net = alloc_etherdev_mq(sizeof(struct net_device_context),
+ num_online_cpus());
if (!net)
return -ENOMEM;
- /* Set initial state */
netif_carrier_off(net);
net_device_ctx = netdev_priv(net);
net_device_ctx->device_ctx = dev;
hv_set_drvdata(dev, net);
- INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_send_garp);
+ INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change);
INIT_WORK(&net_device_ctx->work, do_set_multicast);
net->netdev_ops = &device_ops;
- /* TODO: Add GSO and Checksum offload */
- net->hw_features = 0;
- net->features = NETIF_F_HW_VLAN_CTAG_TX;
+ net->hw_features = NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_IP_CSUM |
+ NETIF_F_TSO;
+ net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_RXCSUM |
+ NETIF_F_IP_CSUM | NETIF_F_TSO;
- SET_ETHTOOL_OPS(net, &ethtool_ops);
+ net->ethtool_ops = &ethtool_ops;
SET_NETDEV_DEV(net, &dev->device);
- ret = register_netdev(net);
- if (ret != 0) {
- pr_err("Unable to register netdev.\n");
- free_netdev(net);
- goto out;
- }
-
/* Notify the netvsc driver of the new device */
device_info.ring_size = ring_size;
ret = rndis_filter_device_add(dev, &device_info);
if (ret != 0) {
netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
- unregister_netdev(net);
free_netdev(net);
hv_set_drvdata(dev, NULL);
return ret;
}
memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN);
- netif_carrier_on(net);
+ nvdev = hv_get_drvdata(dev);
+ netif_set_real_num_tx_queues(net, nvdev->num_chn);
+ netif_set_real_num_rx_queues(net, nvdev->num_chn);
+
+ ret = register_netdev(net);
+ if (ret != 0) {
+ pr_err("Unable to register netdev.\n");
+ rndis_filter_device_remove(dev);
+ free_netdev(net);
+ } else {
+ schedule_delayed_work(&net_device_ctx->dwork, 0);
+ }
-out:
return ret;
}
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 0775f0aefd1..99c527adae5 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -11,8 +11,7 @@
* more details.
*
* You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
+ * this program; if not, see <http://www.gnu.org/licenses/>.
*
* Authors:
* Haiyang Zhang <haiyangz@microsoft.com>
@@ -32,7 +31,7 @@
#include "hyperv_net.h"
-#define RNDIS_EXT_LEN 100
+#define RNDIS_EXT_LEN PAGE_SIZE
struct rndis_request {
struct list_head list_ent;
struct completion wait_event;
@@ -59,9 +58,6 @@ struct rndis_request {
u8 request_ext[RNDIS_EXT_LEN];
};
-static void rndis_filter_send_completion(void *ctx);
-
-
static struct rndis_device *get_rndis_device(void)
{
struct rndis_device *device;
@@ -98,6 +94,8 @@ static struct rndis_request *get_rndis_request(struct rndis_device *dev,
rndis_msg->ndis_msg_type = msg_type;
rndis_msg->msg_len = msg_len;
+ request->pkt.q_idx = 0;
+
/*
* Set the request id. This field is always after the rndis header for
* request/response packet types so we just used the SetRequest as a
@@ -238,12 +236,28 @@ static int rndis_filter_send_request(struct rndis_device *dev,
packet->page_buf[0].len;
}
- packet->completion.send.send_completion = NULL;
+ packet->send_completion = NULL;
ret = netvsc_send(dev->net_dev->dev, packet);
return ret;
}
+static void rndis_set_link_state(struct rndis_device *rdev,
+ struct rndis_request *request)
+{
+ u32 link_status;
+ struct rndis_query_complete *query_complete;
+
+ query_complete = &request->response_msg.msg.query_complete;
+
+ if (query_complete->status == RNDIS_STATUS_SUCCESS &&
+ query_complete->info_buflen == sizeof(u32)) {
+ memcpy(&link_status, (void *)((unsigned long)query_complete +
+ query_complete->info_buf_offset), sizeof(u32));
+ rdev->link_state = link_status != 0;
+ }
+}
+
static void rndis_filter_receive_response(struct rndis_device *dev,
struct rndis_message *resp)
{
@@ -273,12 +287,16 @@ static void rndis_filter_receive_response(struct rndis_device *dev,
sizeof(struct rndis_message) + RNDIS_EXT_LEN) {
memcpy(&request->response_msg, resp,
resp->msg_len);
+ if (request->request_msg.ndis_msg_type ==
+ RNDIS_MSG_QUERY && request->request_msg.msg.
+ query_req.oid == RNDIS_OID_GEN_MEDIA_CONNECT_STATUS)
+ rndis_set_link_state(dev, request);
} else {
netdev_err(ndev,
"rndis response buffer overflow "
"detected (size %u max %zu)\n",
resp->msg_len,
- sizeof(struct rndis_filter_packet));
+ sizeof(struct rndis_message));
if (resp->ndis_msg_type ==
RNDIS_MSG_RESET_C) {
@@ -354,6 +372,7 @@ static void rndis_filter_receive_data(struct rndis_device *dev,
struct rndis_packet *rndis_pkt;
u32 data_offset;
struct ndis_pkt_8021q_info *vlan;
+ struct ndis_tcp_ip_checksum_info *csum_info;
rndis_pkt = &msg->msg.pkt;
@@ -382,8 +401,6 @@ static void rndis_filter_receive_data(struct rndis_device *dev,
pkt->total_data_buflen = rndis_pkt->data_len;
pkt->data = (void *)((unsigned long)pkt->data + data_offset);
- pkt->is_data_pkt = true;
-
vlan = rndis_get_ppi(rndis_pkt, IEEE_8021Q_INFO);
if (vlan) {
pkt->vlan_tci = VLAN_TAG_PRESENT | vlan->vlanid |
@@ -392,7 +409,8 @@ static void rndis_filter_receive_data(struct rndis_device *dev,
pkt->vlan_tci = 0;
}
- netvsc_recv_callback(dev->net_dev->dev, pkt);
+ csum_info = rndis_get_ppi(rndis_pkt, TCPIP_CHKSUM_PKTINFO);
+ netvsc_recv_callback(dev->net_dev->dev, pkt, csum_info);
}
int rndis_filter_receive(struct hv_device *dev,
@@ -491,6 +509,19 @@ static int rndis_filter_query_device(struct rndis_device *dev, u32 oid,
query->info_buflen = 0;
query->dev_vc_handle = 0;
+ if (oid == OID_GEN_RECEIVE_SCALE_CAPABILITIES) {
+ struct ndis_recv_scale_cap *cap;
+
+ request->request_msg.msg_len +=
+ sizeof(struct ndis_recv_scale_cap);
+ query->info_buflen = sizeof(struct ndis_recv_scale_cap);
+ cap = (struct ndis_recv_scale_cap *)((unsigned long)query +
+ query->info_buf_offset);
+ cap->hdr.type = NDIS_OBJECT_TYPE_RSS_CAPABILITIES;
+ cap->hdr.rev = NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2;
+ cap->hdr.size = sizeof(struct ndis_recv_scale_cap);
+ }
+
ret = rndis_filter_send_request(dev, request);
if (ret != 0)
goto cleanup;
@@ -611,6 +642,154 @@ cleanup:
return ret;
}
+int rndis_filter_set_offload_params(struct hv_device *hdev,
+ struct ndis_offload_params *req_offloads)
+{
+ struct netvsc_device *nvdev = hv_get_drvdata(hdev);
+ struct rndis_device *rdev = nvdev->extension;
+ struct net_device *ndev = nvdev->ndev;
+ struct rndis_request *request;
+ struct rndis_set_request *set;
+ struct ndis_offload_params *offload_params;
+ struct rndis_set_complete *set_complete;
+ u32 extlen = sizeof(struct ndis_offload_params);
+ int ret, t;
+ u32 vsp_version = nvdev->nvsp_version;
+
+ if (vsp_version <= NVSP_PROTOCOL_VERSION_4) {
+ extlen = VERSION_4_OFFLOAD_SIZE;
+ /* On NVSP_PROTOCOL_VERSION_4 and below, we do not support
+ * UDP checksum offload.
+ */
+ req_offloads->udp_ip_v4_csum = 0;
+ req_offloads->udp_ip_v6_csum = 0;
+ }
+
+ request = get_rndis_request(rdev, RNDIS_MSG_SET,
+ RNDIS_MESSAGE_SIZE(struct rndis_set_request) + extlen);
+ if (!request)
+ return -ENOMEM;
+
+ set = &request->request_msg.msg.set_req;
+ set->oid = OID_TCP_OFFLOAD_PARAMETERS;
+ set->info_buflen = extlen;
+ set->info_buf_offset = sizeof(struct rndis_set_request);
+ set->dev_vc_handle = 0;
+
+ offload_params = (struct ndis_offload_params *)((ulong)set +
+ set->info_buf_offset);
+ *offload_params = *req_offloads;
+ offload_params->header.type = NDIS_OBJECT_TYPE_DEFAULT;
+ offload_params->header.revision = NDIS_OFFLOAD_PARAMETERS_REVISION_3;
+ offload_params->header.size = extlen;
+
+ ret = rndis_filter_send_request(rdev, request);
+ if (ret != 0)
+ goto cleanup;
+
+ t = wait_for_completion_timeout(&request->wait_event, 5*HZ);
+ if (t == 0) {
+ netdev_err(ndev, "timeout before we got aOFFLOAD set response...\n");
+ /* can't put_rndis_request, since we may still receive a
+ * send-completion.
+ */
+ return -EBUSY;
+ } else {
+ set_complete = &request->response_msg.msg.set_complete;
+ if (set_complete->status != RNDIS_STATUS_SUCCESS) {
+ netdev_err(ndev, "Fail to set offload on host side:0x%x\n",
+ set_complete->status);
+ ret = -EINVAL;
+ }
+ }
+
+cleanup:
+ put_rndis_request(rdev, request);
+ return ret;
+}
+
+u8 netvsc_hash_key[HASH_KEYLEN] = {
+ 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
+ 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
+ 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
+ 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
+ 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
+};
+
+int rndis_filter_set_rss_param(struct rndis_device *rdev, int num_queue)
+{
+ struct net_device *ndev = rdev->net_dev->ndev;
+ struct rndis_request *request;
+ struct rndis_set_request *set;
+ struct rndis_set_complete *set_complete;
+ u32 extlen = sizeof(struct ndis_recv_scale_param) +
+ 4*ITAB_NUM + HASH_KEYLEN;
+ struct ndis_recv_scale_param *rssp;
+ u32 *itab;
+ u8 *keyp;
+ int i, t, ret;
+
+ request = get_rndis_request(
+ rdev, RNDIS_MSG_SET,
+ RNDIS_MESSAGE_SIZE(struct rndis_set_request) + extlen);
+ if (!request)
+ return -ENOMEM;
+
+ set = &request->request_msg.msg.set_req;
+ set->oid = OID_GEN_RECEIVE_SCALE_PARAMETERS;
+ set->info_buflen = extlen;
+ set->info_buf_offset = sizeof(struct rndis_set_request);
+ set->dev_vc_handle = 0;
+
+ rssp = (struct ndis_recv_scale_param *)(set + 1);
+ rssp->hdr.type = NDIS_OBJECT_TYPE_RSS_PARAMETERS;
+ rssp->hdr.rev = NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2;
+ rssp->hdr.size = sizeof(struct ndis_recv_scale_param);
+ rssp->flag = 0;
+ rssp->hashinfo = NDIS_HASH_FUNC_TOEPLITZ | NDIS_HASH_IPV4 |
+ NDIS_HASH_TCP_IPV4;
+ rssp->indirect_tabsize = 4*ITAB_NUM;
+ rssp->indirect_taboffset = sizeof(struct ndis_recv_scale_param);
+ rssp->hashkey_size = HASH_KEYLEN;
+ rssp->kashkey_offset = rssp->indirect_taboffset +
+ rssp->indirect_tabsize;
+
+ /* Set indirection table entries */
+ itab = (u32 *)(rssp + 1);
+ for (i = 0; i < ITAB_NUM; i++)
+ itab[i] = i % num_queue;
+
+ /* Set hask key values */
+ keyp = (u8 *)((unsigned long)rssp + rssp->kashkey_offset);
+ for (i = 0; i < HASH_KEYLEN; i++)
+ keyp[i] = netvsc_hash_key[i];
+
+
+ ret = rndis_filter_send_request(rdev, request);
+ if (ret != 0)
+ goto cleanup;
+
+ t = wait_for_completion_timeout(&request->wait_event, 5*HZ);
+ if (t == 0) {
+ netdev_err(ndev, "timeout before we got a set response...\n");
+ /* can't put_rndis_request, since we may still receive a
+ * send-completion.
+ */
+ return -ETIMEDOUT;
+ } else {
+ set_complete = &request->response_msg.msg.set_complete;
+ if (set_complete->status != RNDIS_STATUS_SUCCESS) {
+ netdev_err(ndev, "Fail to set RSS parameters:0x%x\n",
+ set_complete->status);
+ ret = -EINVAL;
+ }
+ }
+
+cleanup:
+ put_rndis_request(rdev, request);
+ return ret;
+}
+
static int rndis_filter_query_device_link_status(struct rndis_device *dev)
{
@@ -621,7 +800,6 @@ static int rndis_filter_query_device_link_status(struct rndis_device *dev)
ret = rndis_filter_query_device(dev,
RNDIS_OID_GEN_MEDIA_CONNECT_STATUS,
&link_status, &size);
- dev->link_state = (link_status != 0) ? true : false;
return ret;
}
@@ -804,6 +982,28 @@ static int rndis_filter_close_device(struct rndis_device *dev)
return ret;
}
+static void netvsc_sc_open(struct vmbus_channel *new_sc)
+{
+ struct netvsc_device *nvscdev;
+ u16 chn_index = new_sc->offermsg.offer.sub_channel_index;
+ int ret;
+
+ nvscdev = hv_get_drvdata(new_sc->primary_channel->device_obj);
+
+ if (chn_index >= nvscdev->num_chn)
+ return;
+
+ set_per_channel_state(new_sc, nvscdev->sub_cb_buf + (chn_index - 1) *
+ NETVSC_PACKET_SIZE);
+
+ ret = vmbus_open(new_sc, nvscdev->ring_size * PAGE_SIZE,
+ nvscdev->ring_size * PAGE_SIZE, NULL, 0,
+ netvsc_channel_cb, new_sc);
+
+ if (ret == 0)
+ nvscdev->chn_table[chn_index] = new_sc;
+}
+
int rndis_filter_device_add(struct hv_device *dev,
void *additional_info)
{
@@ -811,6 +1011,11 @@ int rndis_filter_device_add(struct hv_device *dev,
struct netvsc_device *net_device;
struct rndis_device *rndis_device;
struct netvsc_device_info *device_info = additional_info;
+ struct ndis_offload_params offloads;
+ struct nvsp_message *init_packet;
+ int t;
+ struct ndis_recv_scale_cap rsscap;
+ u32 rsscap_size = sizeof(struct ndis_recv_scale_cap);
rndis_device = get_rndis_device();
if (!rndis_device)
@@ -830,6 +1035,7 @@ int rndis_filter_device_add(struct hv_device *dev,
/* Initialize the rndis device */
net_device = hv_get_drvdata(dev);
+ net_device->num_chn = 1;
net_device->extension = rndis_device;
rndis_device->net_dev = net_device;
@@ -850,6 +1056,25 @@ int rndis_filter_device_add(struct hv_device *dev,
memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN);
+ /* Turn on the offloads; the host supports all of the relevant
+ * offloads.
+ */
+ memset(&offloads, 0, sizeof(struct ndis_offload_params));
+ /* A value of zero means "no change"; now turn on what we
+ * want.
+ */
+ offloads.ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
+ offloads.tcp_ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
+ offloads.udp_ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
+ offloads.tcp_ip_v6_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
+ offloads.udp_ip_v6_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
+ offloads.lso_v2_ipv4 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED;
+
+
+ ret = rndis_filter_set_offload_params(dev, &offloads);
+ if (ret)
+ goto err_dev_remv;
+
rndis_filter_query_device_link_status(rndis_device);
device_info->link_state = rndis_device->link_state;
@@ -858,6 +1083,69 @@ int rndis_filter_device_add(struct hv_device *dev,
rndis_device->hw_mac_adr,
device_info->link_state ? "down" : "up");
+ if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5)
+ return 0;
+
+ /* vRSS setup */
+ memset(&rsscap, 0, rsscap_size);
+ ret = rndis_filter_query_device(rndis_device,
+ OID_GEN_RECEIVE_SCALE_CAPABILITIES,
+ &rsscap, &rsscap_size);
+ if (ret || rsscap.num_recv_que < 2)
+ goto out;
+
+ net_device->num_chn = (num_online_cpus() < rsscap.num_recv_que) ?
+ num_online_cpus() : rsscap.num_recv_que;
+ if (net_device->num_chn == 1)
+ goto out;
+
+ net_device->sub_cb_buf = vzalloc((net_device->num_chn - 1) *
+ NETVSC_PACKET_SIZE);
+ if (!net_device->sub_cb_buf) {
+ net_device->num_chn = 1;
+ dev_info(&dev->device, "No memory for subchannels.\n");
+ goto out;
+ }
+
+ vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open);
+
+ init_packet = &net_device->channel_init_pkt;
+ memset(init_packet, 0, sizeof(struct nvsp_message));
+ init_packet->hdr.msg_type = NVSP_MSG5_TYPE_SUBCHANNEL;
+ init_packet->msg.v5_msg.subchn_req.op = NVSP_SUBCHANNEL_ALLOCATE;
+ init_packet->msg.v5_msg.subchn_req.num_subchannels =
+ net_device->num_chn - 1;
+ ret = vmbus_sendpacket(dev->channel, init_packet,
+ sizeof(struct nvsp_message),
+ (unsigned long)init_packet,
+ VM_PKT_DATA_INBAND,
+ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ if (ret)
+ goto out;
+ t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
+ if (t == 0) {
+ ret = -ETIMEDOUT;
+ goto out;
+ }
+ if (init_packet->msg.v5_msg.subchn_comp.status !=
+ NVSP_STAT_SUCCESS) {
+ ret = -ENODEV;
+ goto out;
+ }
+ net_device->num_chn = 1 +
+ init_packet->msg.v5_msg.subchn_comp.num_subchannels;
+
+ vmbus_are_subchannels_present(dev->channel);
+
+ ret = rndis_filter_set_rss_param(rndis_device, net_device->num_chn);
+
+out:
+ if (ret)
+ net_device->num_chn = 1;
+ return 0; /* return 0 because primary channel can be used alone */
+
+err_dev_remv:
+ rndis_filter_device_remove(dev);
return ret;
}
@@ -895,101 +1183,3 @@ int rndis_filter_close(struct hv_device *dev)
return rndis_filter_close_device(nvdev->extension);
}
-
-int rndis_filter_send(struct hv_device *dev,
- struct hv_netvsc_packet *pkt)
-{
- int ret;
- struct rndis_filter_packet *filter_pkt;
- struct rndis_message *rndis_msg;
- struct rndis_packet *rndis_pkt;
- u32 rndis_msg_size;
- bool isvlan = pkt->vlan_tci & VLAN_TAG_PRESENT;
-
- /* Add the rndis header */
- filter_pkt = (struct rndis_filter_packet *)pkt->extension;
-
- rndis_msg = &filter_pkt->msg;
- rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet);
- if (isvlan)
- rndis_msg_size += NDIS_VLAN_PPI_SIZE;
-
- rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET;
- rndis_msg->msg_len = pkt->total_data_buflen +
- rndis_msg_size;
-
- rndis_pkt = &rndis_msg->msg.pkt;
- rndis_pkt->data_offset = sizeof(struct rndis_packet);
- if (isvlan)
- rndis_pkt->data_offset += NDIS_VLAN_PPI_SIZE;
- rndis_pkt->data_len = pkt->total_data_buflen;
-
- if (isvlan) {
- struct rndis_per_packet_info *ppi;
- struct ndis_pkt_8021q_info *vlan;
-
- rndis_pkt->per_pkt_info_offset = sizeof(struct rndis_packet);
- rndis_pkt->per_pkt_info_len = NDIS_VLAN_PPI_SIZE;
-
- ppi = (struct rndis_per_packet_info *)((ulong)rndis_pkt +
- rndis_pkt->per_pkt_info_offset);
- ppi->size = NDIS_VLAN_PPI_SIZE;
- ppi->type = IEEE_8021Q_INFO;
- ppi->ppi_offset = sizeof(struct rndis_per_packet_info);
-
- vlan = (struct ndis_pkt_8021q_info *)((ulong)ppi +
- ppi->ppi_offset);
- vlan->vlanid = pkt->vlan_tci & VLAN_VID_MASK;
- vlan->pri = (pkt->vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
- }
-
- pkt->is_data_pkt = true;
- pkt->page_buf[0].pfn = virt_to_phys(rndis_msg) >> PAGE_SHIFT;
- pkt->page_buf[0].offset =
- (unsigned long)rndis_msg & (PAGE_SIZE-1);
- pkt->page_buf[0].len = rndis_msg_size;
-
- /* Add one page_buf if the rndis msg goes beyond page boundary */
- if (pkt->page_buf[0].offset + rndis_msg_size > PAGE_SIZE) {
- int i;
- for (i = pkt->page_buf_cnt; i > 1; i--)
- pkt->page_buf[i] = pkt->page_buf[i-1];
- pkt->page_buf_cnt++;
- pkt->page_buf[0].len = PAGE_SIZE - pkt->page_buf[0].offset;
- pkt->page_buf[1].pfn = virt_to_phys((void *)((ulong)
- rndis_msg + pkt->page_buf[0].len)) >> PAGE_SHIFT;
- pkt->page_buf[1].offset = 0;
- pkt->page_buf[1].len = rndis_msg_size - pkt->page_buf[0].len;
- }
-
- /* Save the packet send completion and context */
- filter_pkt->completion = pkt->completion.send.send_completion;
- filter_pkt->completion_ctx =
- pkt->completion.send.send_completion_ctx;
-
- /* Use ours */
- pkt->completion.send.send_completion = rndis_filter_send_completion;
- pkt->completion.send.send_completion_ctx = filter_pkt;
-
- ret = netvsc_send(dev, pkt);
- if (ret != 0) {
- /*
- * Reset the completion to originals to allow retries from
- * above
- */
- pkt->completion.send.send_completion =
- filter_pkt->completion;
- pkt->completion.send.send_completion_ctx =
- filter_pkt->completion_ctx;
- }
-
- return ret;
-}
-
-static void rndis_filter_send_completion(void *ctx)
-{
- struct rndis_filter_packet *filter_pkt = ctx;
-
- /* Pass it back to the original handler */
- filter_pkt->completion(filter_pkt->completion_ctx);
-}