diff options
Diffstat (limited to 'drivers/net/hyperv')
| -rw-r--r-- | drivers/net/hyperv/Kconfig | 5 | ||||
| -rw-r--r-- | drivers/net/hyperv/Makefile | 3 | ||||
| -rw-r--r-- | drivers/net/hyperv/hyperv_net.h | 1181 | ||||
| -rw-r--r-- | drivers/net/hyperv/netvsc.c | 1104 | ||||
| -rw-r--r-- | drivers/net/hyperv/netvsc_drv.c | 923 | ||||
| -rw-r--r-- | drivers/net/hyperv/rndis_filter.c | 1185 | 
6 files changed, 4401 insertions, 0 deletions
diff --git a/drivers/net/hyperv/Kconfig b/drivers/net/hyperv/Kconfig new file mode 100644 index 00000000000..936968d2355 --- /dev/null +++ b/drivers/net/hyperv/Kconfig @@ -0,0 +1,5 @@ +config HYPERV_NET +	tristate "Microsoft Hyper-V virtual network driver" +	depends on HYPERV +	help +	  Select this option to enable the Hyper-V virtual network driver. diff --git a/drivers/net/hyperv/Makefile b/drivers/net/hyperv/Makefile new file mode 100644 index 00000000000..c8a66827100 --- /dev/null +++ b/drivers/net/hyperv/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_HYPERV_NET) += hv_netvsc.o + +hv_netvsc-y := netvsc_drv.o netvsc.o rndis_filter.o diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h new file mode 100644 index 00000000000..6cc37c15e0b --- /dev/null +++ b/drivers/net/hyperv/hyperv_net.h @@ -0,0 +1,1181 @@ +/* + * + * Copyright (c) 2011, Microsoft Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, see <http://www.gnu.org/licenses/>. + * + * Authors: + *   Haiyang Zhang <haiyangz@microsoft.com> + *   Hank Janssen  <hjanssen@microsoft.com> + *   K. Y. Srinivasan <kys@microsoft.com> + * + */ + +#ifndef _HYPERV_NET_H +#define _HYPERV_NET_H + +#include <linux/list.h> +#include <linux/hyperv.h> +#include <linux/rndis.h> + +/* RSS related */ +#define OID_GEN_RECEIVE_SCALE_CAPABILITIES 0x00010203  /* query only */ +#define OID_GEN_RECEIVE_SCALE_PARAMETERS 0x00010204  /* query and set */ + +#define NDIS_OBJECT_TYPE_RSS_CAPABILITIES 0x88 +#define NDIS_OBJECT_TYPE_RSS_PARAMETERS 0x89 + +#define NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2 2 +#define NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2 2 + +struct ndis_obj_header { +	u8 type; +	u8 rev; +	u16 size; +} __packed; + +/* ndis_recv_scale_cap/cap_flag */ +#define NDIS_RSS_CAPS_MESSAGE_SIGNALED_INTERRUPTS 0x01000000 +#define NDIS_RSS_CAPS_CLASSIFICATION_AT_ISR       0x02000000 +#define NDIS_RSS_CAPS_CLASSIFICATION_AT_DPC       0x04000000 +#define NDIS_RSS_CAPS_USING_MSI_X                 0x08000000 +#define NDIS_RSS_CAPS_RSS_AVAILABLE_ON_PORTS      0x10000000 +#define NDIS_RSS_CAPS_SUPPORTS_MSI_X              0x20000000 +#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV4          0x00000100 +#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6          0x00000200 +#define NDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6_EX       0x00000400 + +struct ndis_recv_scale_cap { /* NDIS_RECEIVE_SCALE_CAPABILITIES */ +	struct ndis_obj_header hdr; +	u32 cap_flag; +	u32 num_int_msg; +	u32 num_recv_que; +	u16 num_indirect_tabent; +} __packed; + + +/* ndis_recv_scale_param flags */ +#define NDIS_RSS_PARAM_FLAG_BASE_CPU_UNCHANGED     0x0001 +#define NDIS_RSS_PARAM_FLAG_HASH_INFO_UNCHANGED    0x0002 +#define NDIS_RSS_PARAM_FLAG_ITABLE_UNCHANGED       0x0004 +#define NDIS_RSS_PARAM_FLAG_HASH_KEY_UNCHANGED     0x0008 +#define NDIS_RSS_PARAM_FLAG_DISABLE_RSS            0x0010 + +/* Hash info bits */ +#define NDIS_HASH_FUNC_TOEPLITZ 0x00000001 +#define NDIS_HASH_IPV4          0x00000100 +#define NDIS_HASH_TCP_IPV4      0x00000200 +#define NDIS_HASH_IPV6          0x00000400 +#define NDIS_HASH_IPV6_EX       0x00000800 +#define NDIS_HASH_TCP_IPV6      0x00001000 +#define NDIS_HASH_TCP_IPV6_EX   0x00002000 + +#define NDIS_RSS_INDIRECTION_TABLE_MAX_SIZE_REVISION_2 (128 * 4) +#define NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2   40 + +#define ITAB_NUM 128 +#define HASH_KEYLEN NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 +extern u8 netvsc_hash_key[]; + +struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */ +	struct ndis_obj_header hdr; + +	/* Qualifies the rest of the information */ +	u16 flag; + +	/* The base CPU number to do receive processing. not used */ +	u16 base_cpu_number; + +	/* This describes the hash function and type being enabled */ +	u32 hashinfo; + +	/* The size of indirection table array */ +	u16 indirect_tabsize; + +	/* The offset of the indirection table from the beginning of this +	 * structure +	 */ +	u32 indirect_taboffset; + +	/* The size of the hash secret key */ +	u16 hashkey_size; + +	/* The offset of the secret key from the beginning of this structure */ +	u32 kashkey_offset; + +	u32 processor_masks_offset; +	u32 num_processor_masks; +	u32 processor_masks_entry_size; +}; + +/* Fwd declaration */ +struct ndis_tcp_ip_checksum_info; + +/* + * Represent netvsc packet which contains 1 RNDIS and 1 ethernet frame + * within the RNDIS + */ +struct hv_netvsc_packet { +	/* Bookkeeping stuff */ +	u32 status; + +	struct hv_device *device; +	bool is_data_pkt; +	u16 vlan_tci; + +	u16 q_idx; +	struct vmbus_channel *channel; + +	u64 send_completion_tid; +	void *send_completion_ctx; +	void (*send_completion)(void *context); + +	u32 send_buf_index; + +	/* This points to the memory after page_buf */ +	struct rndis_message *rndis_msg; + +	u32 total_data_buflen; +	/* Points to the send/receive buffer where the ethernet frame is */ +	void *data; +	u32 page_buf_cnt; +	struct hv_page_buffer page_buf[0]; +}; + +struct netvsc_device_info { +	unsigned char mac_adr[ETH_ALEN]; +	bool link_state;	/* 0 - link up, 1 - link down */ +	int  ring_size; +}; + +enum rndis_device_state { +	RNDIS_DEV_UNINITIALIZED = 0, +	RNDIS_DEV_INITIALIZING, +	RNDIS_DEV_INITIALIZED, +	RNDIS_DEV_DATAINITIALIZED, +}; + +struct rndis_device { +	struct netvsc_device *net_dev; + +	enum rndis_device_state state; +	bool link_state; +	atomic_t new_req_id; + +	spinlock_t request_lock; +	struct list_head req_list; + +	unsigned char hw_mac_adr[ETH_ALEN]; +}; + + +/* Interface */ +int netvsc_device_add(struct hv_device *device, void *additional_info); +int netvsc_device_remove(struct hv_device *device); +int netvsc_send(struct hv_device *device, +		struct hv_netvsc_packet *packet); +void netvsc_linkstatus_callback(struct hv_device *device_obj, +				unsigned int status); +int netvsc_recv_callback(struct hv_device *device_obj, +			struct hv_netvsc_packet *packet, +			struct ndis_tcp_ip_checksum_info *csum_info); +void netvsc_channel_cb(void *context); +int rndis_filter_open(struct hv_device *dev); +int rndis_filter_close(struct hv_device *dev); +int rndis_filter_device_add(struct hv_device *dev, +			void *additional_info); +void rndis_filter_device_remove(struct hv_device *dev); +int rndis_filter_receive(struct hv_device *dev, +			struct hv_netvsc_packet *pkt); + +int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter); +int rndis_filter_set_device_mac(struct hv_device *hdev, char *mac); + + +#define NVSP_INVALID_PROTOCOL_VERSION	((u32)0xFFFFFFFF) + +#define NVSP_PROTOCOL_VERSION_1		2 +#define NVSP_PROTOCOL_VERSION_2		0x30002 +#define NVSP_PROTOCOL_VERSION_4		0x40000 +#define NVSP_PROTOCOL_VERSION_5		0x50000 + +enum { +	NVSP_MSG_TYPE_NONE = 0, + +	/* Init Messages */ +	NVSP_MSG_TYPE_INIT			= 1, +	NVSP_MSG_TYPE_INIT_COMPLETE		= 2, + +	NVSP_VERSION_MSG_START			= 100, + +	/* Version 1 Messages */ +	NVSP_MSG1_TYPE_SEND_NDIS_VER		= NVSP_VERSION_MSG_START, + +	NVSP_MSG1_TYPE_SEND_RECV_BUF, +	NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE, +	NVSP_MSG1_TYPE_REVOKE_RECV_BUF, + +	NVSP_MSG1_TYPE_SEND_SEND_BUF, +	NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE, +	NVSP_MSG1_TYPE_REVOKE_SEND_BUF, + +	NVSP_MSG1_TYPE_SEND_RNDIS_PKT, +	NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE, + +	/* Version 2 messages */ +	NVSP_MSG2_TYPE_SEND_CHIMNEY_DELEGATED_BUF, +	NVSP_MSG2_TYPE_SEND_CHIMNEY_DELEGATED_BUF_COMP, +	NVSP_MSG2_TYPE_REVOKE_CHIMNEY_DELEGATED_BUF, + +	NVSP_MSG2_TYPE_RESUME_CHIMNEY_RX_INDICATION, + +	NVSP_MSG2_TYPE_TERMINATE_CHIMNEY, +	NVSP_MSG2_TYPE_TERMINATE_CHIMNEY_COMP, + +	NVSP_MSG2_TYPE_INDICATE_CHIMNEY_EVENT, + +	NVSP_MSG2_TYPE_SEND_CHIMNEY_PKT, +	NVSP_MSG2_TYPE_SEND_CHIMNEY_PKT_COMP, + +	NVSP_MSG2_TYPE_POST_CHIMNEY_RECV_REQ, +	NVSP_MSG2_TYPE_POST_CHIMNEY_RECV_REQ_COMP, + +	NVSP_MSG2_TYPE_ALLOC_RXBUF, +	NVSP_MSG2_TYPE_ALLOC_RXBUF_COMP, + +	NVSP_MSG2_TYPE_FREE_RXBUF, + +	NVSP_MSG2_TYPE_SEND_VMQ_RNDIS_PKT, +	NVSP_MSG2_TYPE_SEND_VMQ_RNDIS_PKT_COMP, + +	NVSP_MSG2_TYPE_SEND_NDIS_CONFIG, + +	NVSP_MSG2_TYPE_ALLOC_CHIMNEY_HANDLE, +	NVSP_MSG2_TYPE_ALLOC_CHIMNEY_HANDLE_COMP, + +	NVSP_MSG2_MAX = NVSP_MSG2_TYPE_ALLOC_CHIMNEY_HANDLE_COMP, + +	/* Version 4 messages */ +	NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION, +	NVSP_MSG4_TYPE_SWITCH_DATA_PATH, +	NVSP_MSG4_TYPE_UPLINK_CONNECT_STATE_DEPRECATED, + +	NVSP_MSG4_MAX = NVSP_MSG4_TYPE_UPLINK_CONNECT_STATE_DEPRECATED, + +	/* Version 5 messages */ +	NVSP_MSG5_TYPE_OID_QUERY_EX, +	NVSP_MSG5_TYPE_OID_QUERY_EX_COMP, +	NVSP_MSG5_TYPE_SUBCHANNEL, +	NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE, + +	NVSP_MSG5_MAX = NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE, +}; + +enum { +	NVSP_STAT_NONE = 0, +	NVSP_STAT_SUCCESS, +	NVSP_STAT_FAIL, +	NVSP_STAT_PROTOCOL_TOO_NEW, +	NVSP_STAT_PROTOCOL_TOO_OLD, +	NVSP_STAT_INVALID_RNDIS_PKT, +	NVSP_STAT_BUSY, +	NVSP_STAT_PROTOCOL_UNSUPPORTED, +	NVSP_STAT_MAX, +}; + +struct nvsp_message_header { +	u32 msg_type; +}; + +/* Init Messages */ + +/* + * This message is used by the VSC to initialize the channel after the channels + * has been opened. This message should never include anything other then + * versioning (i.e. this message will be the same for ever). + */ +struct nvsp_message_init { +	u32 min_protocol_ver; +	u32 max_protocol_ver; +} __packed; + +/* + * This message is used by the VSP to complete the initialization of the + * channel. This message should never include anything other then versioning + * (i.e. this message will be the same for ever). + */ +struct nvsp_message_init_complete { +	u32 negotiated_protocol_ver; +	u32 max_mdl_chain_len; +	u32 status; +} __packed; + +union nvsp_message_init_uber { +	struct nvsp_message_init init; +	struct nvsp_message_init_complete init_complete; +} __packed; + +/* Version 1 Messages */ + +/* + * This message is used by the VSC to send the NDIS version to the VSP. The VSP + * can use this information when handling OIDs sent by the VSC. + */ +struct nvsp_1_message_send_ndis_version { +	u32 ndis_major_ver; +	u32 ndis_minor_ver; +} __packed; + +/* + * This message is used by the VSC to send a receive buffer to the VSP. The VSP + * can then use the receive buffer to send data to the VSC. + */ +struct nvsp_1_message_send_receive_buffer { +	u32 gpadl_handle; +	u16 id; +} __packed; + +struct nvsp_1_receive_buffer_section { +	u32 offset; +	u32 sub_alloc_size; +	u32 num_sub_allocs; +	u32 end_offset; +} __packed; + +/* + * This message is used by the VSP to acknowledge a receive buffer send by the + * VSC. This message must be sent by the VSP before the VSP uses the receive + * buffer. + */ +struct nvsp_1_message_send_receive_buffer_complete { +	u32 status; +	u32 num_sections; + +	/* +	 * The receive buffer is split into two parts, a large suballocation +	 * section and a small suballocation section. These sections are then +	 * suballocated by a certain size. +	 */ + +	/* +	 * For example, the following break up of the receive buffer has 6 +	 * large suballocations and 10 small suballocations. +	 */ + +	/* +	 * |            Large Section          |  |   Small Section   | +	 * ------------------------------------------------------------ +	 * |     |     |     |     |     |     |  | | | | | | | | | | | +	 * |                                      | +	 *  LargeOffset                            SmallOffset +	 */ + +	struct nvsp_1_receive_buffer_section sections[1]; +} __packed; + +/* + * This message is sent by the VSC to revoke the receive buffer.  After the VSP + * completes this transaction, the vsp should never use the receive buffer + * again. + */ +struct nvsp_1_message_revoke_receive_buffer { +	u16 id; +}; + +/* + * This message is used by the VSC to send a send buffer to the VSP. The VSC + * can then use the send buffer to send data to the VSP. + */ +struct nvsp_1_message_send_send_buffer { +	u32 gpadl_handle; +	u16 id; +} __packed; + +/* + * This message is used by the VSP to acknowledge a send buffer sent by the + * VSC. This message must be sent by the VSP before the VSP uses the sent + * buffer. + */ +struct nvsp_1_message_send_send_buffer_complete { +	u32 status; + +	/* +	 * The VSC gets to choose the size of the send buffer and the VSP gets +	 * to choose the sections size of the buffer.  This was done to enable +	 * dynamic reconfigurations when the cost of GPA-direct buffers +	 * decreases. +	 */ +	u32 section_size; +} __packed; + +/* + * This message is sent by the VSC to revoke the send buffer.  After the VSP + * completes this transaction, the vsp should never use the send buffer again. + */ +struct nvsp_1_message_revoke_send_buffer { +	u16 id; +}; + +/* + * This message is used by both the VSP and the VSC to send a RNDIS message to + * the opposite channel endpoint. + */ +struct nvsp_1_message_send_rndis_packet { +	/* +	 * This field is specified by RNIDS. They assume there's two different +	 * channels of communication. However, the Network VSP only has one. +	 * Therefore, the channel travels with the RNDIS packet. +	 */ +	u32 channel_type; + +	/* +	 * This field is used to send part or all of the data through a send +	 * buffer. This values specifies an index into the send buffer. If the +	 * index is 0xFFFFFFFF, then the send buffer is not being used and all +	 * of the data was sent through other VMBus mechanisms. +	 */ +	u32 send_buf_section_index; +	u32 send_buf_section_size; +} __packed; + +/* + * This message is used by both the VSP and the VSC to complete a RNDIS message + * to the opposite channel endpoint. At this point, the initiator of this + * message cannot use any resources associated with the original RNDIS packet. + */ +struct nvsp_1_message_send_rndis_packet_complete { +	u32 status; +}; + +union nvsp_1_message_uber { +	struct nvsp_1_message_send_ndis_version send_ndis_ver; + +	struct nvsp_1_message_send_receive_buffer send_recv_buf; +	struct nvsp_1_message_send_receive_buffer_complete +						send_recv_buf_complete; +	struct nvsp_1_message_revoke_receive_buffer revoke_recv_buf; + +	struct nvsp_1_message_send_send_buffer send_send_buf; +	struct nvsp_1_message_send_send_buffer_complete send_send_buf_complete; +	struct nvsp_1_message_revoke_send_buffer revoke_send_buf; + +	struct nvsp_1_message_send_rndis_packet send_rndis_pkt; +	struct nvsp_1_message_send_rndis_packet_complete +						send_rndis_pkt_complete; +} __packed; + + +/* + * Network VSP protocol version 2 messages: + */ +struct nvsp_2_vsc_capability { +	union { +		u64 data; +		struct { +			u64 vmq:1; +			u64 chimney:1; +			u64 sriov:1; +			u64 ieee8021q:1; +			u64 correlation_id:1; +		}; +	}; +} __packed; + +struct nvsp_2_send_ndis_config { +	u32 mtu; +	u32 reserved; +	struct nvsp_2_vsc_capability capability; +} __packed; + +/* Allocate receive buffer */ +struct nvsp_2_alloc_rxbuf { +	/* Allocation ID to match the allocation request and response */ +	u32 alloc_id; + +	/* Length of the VM shared memory receive buffer that needs to +	 * be allocated +	 */ +	u32 len; +} __packed; + +/* Allocate receive buffer complete */ +struct nvsp_2_alloc_rxbuf_comp { +	/* The NDIS_STATUS code for buffer allocation */ +	u32 status; + +	u32 alloc_id; + +	/* GPADL handle for the allocated receive buffer */ +	u32 gpadl_handle; + +	/* Receive buffer ID */ +	u64 recv_buf_id; +} __packed; + +struct nvsp_2_free_rxbuf { +	u64 recv_buf_id; +} __packed; + +union nvsp_2_message_uber { +	struct nvsp_2_send_ndis_config send_ndis_config; +	struct nvsp_2_alloc_rxbuf alloc_rxbuf; +	struct nvsp_2_alloc_rxbuf_comp alloc_rxbuf_comp; +	struct nvsp_2_free_rxbuf free_rxbuf; +} __packed; + +enum nvsp_subchannel_operation { +	NVSP_SUBCHANNEL_NONE = 0, +	NVSP_SUBCHANNEL_ALLOCATE, +	NVSP_SUBCHANNEL_MAX +}; + +struct nvsp_5_subchannel_request { +	u32 op; +	u32 num_subchannels; +} __packed; + +struct nvsp_5_subchannel_complete { +	u32 status; +	u32 num_subchannels; /* Actual number of subchannels allocated */ +} __packed; + +struct nvsp_5_send_indirect_table { +	/* The number of entries in the send indirection table */ +	u32 count; + +	/* The offset of the send indireciton table from top of this struct. +	 * The send indirection table tells which channel to put the send +	 * traffic on. Each entry is a channel number. +	 */ +	u32 offset; +} __packed; + +union nvsp_5_message_uber { +	struct nvsp_5_subchannel_request subchn_req; +	struct nvsp_5_subchannel_complete subchn_comp; +	struct nvsp_5_send_indirect_table send_table; +} __packed; + +union nvsp_all_messages { +	union nvsp_message_init_uber init_msg; +	union nvsp_1_message_uber v1_msg; +	union nvsp_2_message_uber v2_msg; +	union nvsp_5_message_uber v5_msg; +} __packed; + +/* ALL Messages */ +struct nvsp_message { +	struct nvsp_message_header hdr; +	union nvsp_all_messages msg; +} __packed; + + +#define NETVSC_MTU 65536 + +#define NETVSC_RECEIVE_BUFFER_SIZE		(1024*1024*16)	/* 16MB */ +#define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY	(1024*1024*15)  /* 15MB */ +#define NETVSC_SEND_BUFFER_SIZE			(1024 * 1024)   /* 1MB */ +#define NETVSC_INVALID_INDEX			-1 + + +#define NETVSC_RECEIVE_BUFFER_ID		0xcafe + +#define NETVSC_PACKET_SIZE                      2048 + +#define VRSS_SEND_TAB_SIZE 16 + +/* Per netvsc channel-specific */ +struct netvsc_device { +	struct hv_device *dev; + +	u32 nvsp_version; + +	atomic_t num_outstanding_sends; +	wait_queue_head_t wait_drain; +	bool start_remove; +	bool destroy; + +	/* Receive buffer allocated by us but manages by NetVSP */ +	void *recv_buf; +	u32 recv_buf_size; +	u32 recv_buf_gpadl_handle; +	u32 recv_section_cnt; +	struct nvsp_1_receive_buffer_section *recv_section; + +	/* Send buffer allocated by us */ +	void *send_buf; +	u32 send_buf_size; +	u32 send_buf_gpadl_handle; +	u32 send_section_cnt; +	u32 send_section_size; +	unsigned long *send_section_map; +	int map_words; + +	/* Used for NetVSP initialization protocol */ +	struct completion channel_init_wait; +	struct nvsp_message channel_init_pkt; + +	struct nvsp_message revoke_packet; +	/* unsigned char HwMacAddr[HW_MACADDR_LEN]; */ + +	struct net_device *ndev; + +	struct vmbus_channel *chn_table[NR_CPUS]; +	u32 send_table[VRSS_SEND_TAB_SIZE]; +	u32 num_chn; +	atomic_t queue_sends[NR_CPUS]; + +	/* Holds rndis device info */ +	void *extension; + +	int ring_size; + +	/* The primary channel callback buffer */ +	unsigned char cb_buffer[NETVSC_PACKET_SIZE]; +	/* The sub channel callback buffer */ +	unsigned char *sub_cb_buf; +}; + +/* NdisInitialize message */ +struct rndis_initialize_request { +	u32 req_id; +	u32 major_ver; +	u32 minor_ver; +	u32 max_xfer_size; +}; + +/* Response to NdisInitialize */ +struct rndis_initialize_complete { +	u32 req_id; +	u32 status; +	u32 major_ver; +	u32 minor_ver; +	u32 dev_flags; +	u32 medium; +	u32 max_pkt_per_msg; +	u32 max_xfer_size; +	u32 pkt_alignment_factor; +	u32 af_list_offset; +	u32 af_list_size; +}; + +/* Call manager devices only: Information about an address family */ +/* supported by the device is appended to the response to NdisInitialize. */ +struct rndis_co_address_family { +	u32 address_family; +	u32 major_ver; +	u32 minor_ver; +}; + +/* NdisHalt message */ +struct rndis_halt_request { +	u32 req_id; +}; + +/* NdisQueryRequest message */ +struct rndis_query_request { +	u32 req_id; +	u32 oid; +	u32 info_buflen; +	u32 info_buf_offset; +	u32 dev_vc_handle; +}; + +/* Response to NdisQueryRequest */ +struct rndis_query_complete { +	u32 req_id; +	u32 status; +	u32 info_buflen; +	u32 info_buf_offset; +}; + +/* NdisSetRequest message */ +struct rndis_set_request { +	u32 req_id; +	u32 oid; +	u32 info_buflen; +	u32 info_buf_offset; +	u32 dev_vc_handle; +}; + +/* Response to NdisSetRequest */ +struct rndis_set_complete { +	u32 req_id; +	u32 status; +}; + +/* NdisReset message */ +struct rndis_reset_request { +	u32 reserved; +}; + +/* Response to NdisReset */ +struct rndis_reset_complete { +	u32 status; +	u32 addressing_reset; +}; + +/* NdisMIndicateStatus message */ +struct rndis_indicate_status { +	u32 status; +	u32 status_buflen; +	u32 status_buf_offset; +}; + +/* Diagnostic information passed as the status buffer in */ +/* struct rndis_indicate_status messages signifying error conditions. */ +struct rndis_diagnostic_info { +	u32 diag_status; +	u32 error_offset; +}; + +/* NdisKeepAlive message */ +struct rndis_keepalive_request { +	u32 req_id; +}; + +/* Response to NdisKeepAlive */ +struct rndis_keepalive_complete { +	u32 req_id; +	u32 status; +}; + +/* + * Data message. All Offset fields contain byte offsets from the beginning of + * struct rndis_packet. All Length fields are in bytes.  VcHandle is set + * to 0 for connectionless data, otherwise it contains the VC handle. + */ +struct rndis_packet { +	u32 data_offset; +	u32 data_len; +	u32 oob_data_offset; +	u32 oob_data_len; +	u32 num_oob_data_elements; +	u32 per_pkt_info_offset; +	u32 per_pkt_info_len; +	u32 vc_handle; +	u32 reserved; +}; + +/* Optional Out of Band data associated with a Data message. */ +struct rndis_oobd { +	u32 size; +	u32 type; +	u32 class_info_offset; +}; + +/* Packet extension field contents associated with a Data message. */ +struct rndis_per_packet_info { +	u32 size; +	u32 type; +	u32 ppi_offset; +}; + +enum ndis_per_pkt_info_type { +	TCPIP_CHKSUM_PKTINFO, +	IPSEC_PKTINFO, +	TCP_LARGESEND_PKTINFO, +	CLASSIFICATION_HANDLE_PKTINFO, +	NDIS_RESERVED, +	SG_LIST_PKTINFO, +	IEEE_8021Q_INFO, +	ORIGINAL_PKTINFO, +	PACKET_CANCEL_ID, +	NBL_HASH_VALUE = PACKET_CANCEL_ID, +	ORIGINAL_NET_BUFLIST, +	CACHED_NET_BUFLIST, +	SHORT_PKT_PADINFO, +	MAX_PER_PKT_INFO +}; + +struct ndis_pkt_8021q_info { +	union { +		struct { +			u32 pri:3; /* User Priority */ +			u32 cfi:1; /* Canonical Format ID */ +			u32 vlanid:12; /* VLAN ID */ +			u32 reserved:16; +		}; +		u32 value; +	}; +}; + +struct ndis_oject_header { +	u8 type; +	u8 revision; +	u16 size; +}; + +#define NDIS_OBJECT_TYPE_DEFAULT	0x80 +#define NDIS_OFFLOAD_PARAMETERS_REVISION_3 3 +#define NDIS_OFFLOAD_PARAMETERS_NO_CHANGE 0 +#define NDIS_OFFLOAD_PARAMETERS_LSOV2_DISABLED 1 +#define NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED  2 +#define NDIS_OFFLOAD_PARAMETERS_LSOV1_ENABLED  2 +#define NDIS_OFFLOAD_PARAMETERS_RSC_DISABLED 1 +#define NDIS_OFFLOAD_PARAMETERS_RSC_ENABLED 2 +#define NDIS_OFFLOAD_PARAMETERS_TX_RX_DISABLED 1 +#define NDIS_OFFLOAD_PARAMETERS_TX_ENABLED_RX_DISABLED 2 +#define NDIS_OFFLOAD_PARAMETERS_RX_ENABLED_TX_DISABLED 3 +#define NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED 4 + +#define NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE	1 +#define NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4	0 +#define NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6	1 + +#define VERSION_4_OFFLOAD_SIZE			22 +/* + * New offload OIDs for NDIS 6 + */ +#define OID_TCP_OFFLOAD_CURRENT_CONFIG 0xFC01020B /* query only */ +#define OID_TCP_OFFLOAD_PARAMETERS 0xFC01020C		/* set only */ +#define OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES 0xFC01020D/* query only */ +#define OID_TCP_CONNECTION_OFFLOAD_CURRENT_CONFIG 0xFC01020E /* query only */ +#define OID_TCP_CONNECTION_OFFLOAD_HARDWARE_CAPABILITIES 0xFC01020F /* query */ +#define OID_OFFLOAD_ENCAPSULATION 0x0101010A /* set/query */ + +struct ndis_offload_params { +	struct ndis_oject_header header; +	u8 ip_v4_csum; +	u8 tcp_ip_v4_csum; +	u8 udp_ip_v4_csum; +	u8 tcp_ip_v6_csum; +	u8 udp_ip_v6_csum; +	u8 lso_v1; +	u8 ip_sec_v1; +	u8 lso_v2_ipv4; +	u8 lso_v2_ipv6; +	u8 tcp_connection_ip_v4; +	u8 tcp_connection_ip_v6; +	u32 flags; +	u8 ip_sec_v2; +	u8 ip_sec_v2_ip_v4; +	struct { +		u8 rsc_ip_v4; +		u8 rsc_ip_v6; +	}; +	struct { +		u8 encapsulated_packet_task_offload; +		u8 encapsulation_types; +	}; +}; + +struct ndis_tcp_ip_checksum_info { +	union { +		struct { +			u32 is_ipv4:1; +			u32 is_ipv6:1; +			u32 tcp_checksum:1; +			u32 udp_checksum:1; +			u32 ip_header_checksum:1; +			u32 reserved:11; +			u32 tcp_header_offset:10; +		} transmit; +		struct { +			u32 tcp_checksum_failed:1; +			u32 udp_checksum_failed:1; +			u32 ip_checksum_failed:1; +			u32 tcp_checksum_succeeded:1; +			u32 udp_checksum_succeeded:1; +			u32 ip_checksum_succeeded:1; +			u32 loopback:1; +			u32 tcp_checksum_value_invalid:1; +			u32 ip_checksum_value_invalid:1; +		} receive; +		u32  value; +	}; +}; + +struct ndis_tcp_lso_info { +	union { +		struct { +			u32 unused:30; +			u32 type:1; +			u32 reserved2:1; +		} transmit; +		struct { +			u32 mss:20; +			u32 tcp_header_offset:10; +			u32 type:1; +			u32 reserved2:1; +		} lso_v1_transmit; +		struct { +			u32 tcp_payload:30; +			u32 type:1; +			u32 reserved2:1; +		} lso_v1_transmit_complete; +		struct { +			u32 mss:20; +			u32 tcp_header_offset:10; +			u32 type:1; +			u32 ip_version:1; +		} lso_v2_transmit; +		struct { +			u32 reserved:30; +			u32 type:1; +			u32 reserved2:1; +		} lso_v2_transmit_complete; +		u32  value; +	}; +}; + +#define NDIS_VLAN_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \ +		sizeof(struct ndis_pkt_8021q_info)) + +#define NDIS_CSUM_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \ +		sizeof(struct ndis_tcp_ip_checksum_info)) + +#define NDIS_LSO_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \ +		sizeof(struct ndis_tcp_lso_info)) + +#define NDIS_HASH_PPI_SIZE (sizeof(struct rndis_per_packet_info) + \ +		sizeof(u32)) + +/* Format of Information buffer passed in a SetRequest for the OID */ +/* OID_GEN_RNDIS_CONFIG_PARAMETER. */ +struct rndis_config_parameter_info { +	u32 parameter_name_offset; +	u32 parameter_name_length; +	u32 parameter_type; +	u32 parameter_value_offset; +	u32 parameter_value_length; +}; + +/* Values for ParameterType in struct rndis_config_parameter_info */ +#define RNDIS_CONFIG_PARAM_TYPE_INTEGER     0 +#define RNDIS_CONFIG_PARAM_TYPE_STRING      2 + +/* CONDIS Miniport messages for connection oriented devices */ +/* that do not implement a call manager. */ + +/* CoNdisMiniportCreateVc message */ +struct rcondis_mp_create_vc { +	u32 req_id; +	u32 ndis_vc_handle; +}; + +/* Response to CoNdisMiniportCreateVc */ +struct rcondis_mp_create_vc_complete { +	u32 req_id; +	u32 dev_vc_handle; +	u32 status; +}; + +/* CoNdisMiniportDeleteVc message */ +struct rcondis_mp_delete_vc { +	u32 req_id; +	u32 dev_vc_handle; +}; + +/* Response to CoNdisMiniportDeleteVc */ +struct rcondis_mp_delete_vc_complete { +	u32 req_id; +	u32 status; +}; + +/* CoNdisMiniportQueryRequest message */ +struct rcondis_mp_query_request { +	u32 req_id; +	u32 request_type; +	u32 oid; +	u32 dev_vc_handle; +	u32 info_buflen; +	u32 info_buf_offset; +}; + +/* CoNdisMiniportSetRequest message */ +struct rcondis_mp_set_request { +	u32 req_id; +	u32 request_type; +	u32 oid; +	u32 dev_vc_handle; +	u32 info_buflen; +	u32 info_buf_offset; +}; + +/* CoNdisIndicateStatus message */ +struct rcondis_indicate_status { +	u32 ndis_vc_handle; +	u32 status; +	u32 status_buflen; +	u32 status_buf_offset; +}; + +/* CONDIS Call/VC parameters */ +struct rcondis_specific_parameters { +	u32 parameter_type; +	u32 parameter_length; +	u32 parameter_lffset; +}; + +struct rcondis_media_parameters { +	u32 flags; +	u32 reserved1; +	u32 reserved2; +	struct rcondis_specific_parameters media_specific; +}; + +struct rndis_flowspec { +	u32 token_rate; +	u32 token_bucket_size; +	u32 peak_bandwidth; +	u32 latency; +	u32 delay_variation; +	u32 service_type; +	u32 max_sdu_size; +	u32 minimum_policed_size; +}; + +struct rcondis_call_manager_parameters { +	struct rndis_flowspec transmit; +	struct rndis_flowspec receive; +	struct rcondis_specific_parameters call_mgr_specific; +}; + +/* CoNdisMiniportActivateVc message */ +struct rcondis_mp_activate_vc_request { +	u32 req_id; +	u32 flags; +	u32 dev_vc_handle; +	u32 media_params_offset; +	u32 media_params_length; +	u32 call_mgr_params_offset; +	u32 call_mgr_params_length; +}; + +/* Response to CoNdisMiniportActivateVc */ +struct rcondis_mp_activate_vc_complete { +	u32 req_id; +	u32 status; +}; + +/* CoNdisMiniportDeactivateVc message */ +struct rcondis_mp_deactivate_vc_request { +	u32 req_id; +	u32 flags; +	u32 dev_vc_handle; +}; + +/* Response to CoNdisMiniportDeactivateVc */ +struct rcondis_mp_deactivate_vc_complete { +	u32 req_id; +	u32 status; +}; + + +/* union with all of the RNDIS messages */ +union rndis_message_container { +	struct rndis_packet pkt; +	struct rndis_initialize_request init_req; +	struct rndis_halt_request halt_req; +	struct rndis_query_request query_req; +	struct rndis_set_request set_req; +	struct rndis_reset_request reset_req; +	struct rndis_keepalive_request keep_alive_req; +	struct rndis_indicate_status indicate_status; +	struct rndis_initialize_complete init_complete; +	struct rndis_query_complete query_complete; +	struct rndis_set_complete set_complete; +	struct rndis_reset_complete reset_complete; +	struct rndis_keepalive_complete keep_alive_complete; +	struct rcondis_mp_create_vc co_miniport_create_vc; +	struct rcondis_mp_delete_vc co_miniport_delete_vc; +	struct rcondis_indicate_status co_indicate_status; +	struct rcondis_mp_activate_vc_request co_miniport_activate_vc; +	struct rcondis_mp_deactivate_vc_request co_miniport_deactivate_vc; +	struct rcondis_mp_create_vc_complete co_miniport_create_vc_complete; +	struct rcondis_mp_delete_vc_complete co_miniport_delete_vc_complete; +	struct rcondis_mp_activate_vc_complete co_miniport_activate_vc_complete; +	struct rcondis_mp_deactivate_vc_complete +		co_miniport_deactivate_vc_complete; +}; + +/* Remote NDIS message format */ +struct rndis_message { +	u32 ndis_msg_type; + +	/* Total length of this message, from the beginning */ +	/* of the sruct rndis_message, in bytes. */ +	u32 msg_len; + +	/* Actual message */ +	union rndis_message_container msg; +}; + + +/* Handy macros */ + +/* get the size of an RNDIS message. Pass in the message type, */ +/* struct rndis_set_request, struct rndis_packet for example */ +#define RNDIS_MESSAGE_SIZE(msg)				\ +	(sizeof(msg) + (sizeof(struct rndis_message) -	\ +	 sizeof(union rndis_message_container))) + +/* get pointer to info buffer with message pointer */ +#define MESSAGE_TO_INFO_BUFFER(msg)				\ +	(((unsigned char *)(msg)) + msg->info_buf_offset) + +/* get pointer to status buffer with message pointer */ +#define MESSAGE_TO_STATUS_BUFFER(msg)			\ +	(((unsigned char *)(msg)) + msg->status_buf_offset) + +/* get pointer to OOBD buffer with message pointer */ +#define MESSAGE_TO_OOBD_BUFFER(msg)				\ +	(((unsigned char *)(msg)) + msg->oob_data_offset) + +/* get pointer to data buffer with message pointer */ +#define MESSAGE_TO_DATA_BUFFER(msg)				\ +	(((unsigned char *)(msg)) + msg->per_pkt_info_offset) + +/* get pointer to contained message from NDIS_MESSAGE pointer */ +#define RNDIS_MESSAGE_PTR_TO_MESSAGE_PTR(rndis_msg)		\ +	((void *) &rndis_msg->msg) + +/* get pointer to contained message from NDIS_MESSAGE pointer */ +#define RNDIS_MESSAGE_RAW_PTR_TO_MESSAGE_PTR(rndis_msg)	\ +	((void *) rndis_msg) + + +#define __struct_bcount(x) + + + +#define RNDIS_HEADER_SIZE	(sizeof(struct rndis_message) - \ +				 sizeof(union rndis_message_container)) + +#define NDIS_PACKET_TYPE_DIRECTED	0x00000001 +#define NDIS_PACKET_TYPE_MULTICAST	0x00000002 +#define NDIS_PACKET_TYPE_ALL_MULTICAST	0x00000004 +#define NDIS_PACKET_TYPE_BROADCAST	0x00000008 +#define NDIS_PACKET_TYPE_SOURCE_ROUTING	0x00000010 +#define NDIS_PACKET_TYPE_PROMISCUOUS	0x00000020 +#define NDIS_PACKET_TYPE_SMT		0x00000040 +#define NDIS_PACKET_TYPE_ALL_LOCAL	0x00000080 +#define NDIS_PACKET_TYPE_GROUP		0x00000100 +#define NDIS_PACKET_TYPE_ALL_FUNCTIONAL	0x00000200 +#define NDIS_PACKET_TYPE_FUNCTIONAL	0x00000400 +#define NDIS_PACKET_TYPE_MAC_FRAME	0x00000800 + +#define INFO_IPV4       2 +#define INFO_IPV6       4 +#define INFO_TCP        2 +#define INFO_UDP        4 + +#define TRANSPORT_INFO_NOT_IP   0 +#define TRANSPORT_INFO_IPV4_TCP ((INFO_IPV4 << 16) | INFO_TCP) +#define TRANSPORT_INFO_IPV4_UDP ((INFO_IPV4 << 16) | INFO_UDP) +#define TRANSPORT_INFO_IPV6_TCP ((INFO_IPV6 << 16) | INFO_TCP) +#define TRANSPORT_INFO_IPV6_UDP ((INFO_IPV6 << 16) | INFO_UDP) + + +#endif /* _HYPERV_NET_H */ diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c new file mode 100644 index 00000000000..d97d5f39a04 --- /dev/null +++ b/drivers/net/hyperv/netvsc.c @@ -0,0 +1,1104 @@ +/* + * Copyright (c) 2009, Microsoft Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, see <http://www.gnu.org/licenses/>. + * + * Authors: + *   Haiyang Zhang <haiyangz@microsoft.com> + *   Hank Janssen  <hjanssen@microsoft.com> + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/mm.h> +#include <linux/delay.h> +#include <linux/io.h> +#include <linux/slab.h> +#include <linux/netdevice.h> +#include <linux/if_ether.h> +#include <asm/sync_bitops.h> + +#include "hyperv_net.h" + + +static struct netvsc_device *alloc_net_device(struct hv_device *device) +{ +	struct netvsc_device *net_device; +	struct net_device *ndev = hv_get_drvdata(device); + +	net_device = kzalloc(sizeof(struct netvsc_device), GFP_KERNEL); +	if (!net_device) +		return NULL; + +	init_waitqueue_head(&net_device->wait_drain); +	net_device->start_remove = false; +	net_device->destroy = false; +	net_device->dev = device; +	net_device->ndev = ndev; + +	hv_set_drvdata(device, net_device); +	return net_device; +} + +static struct netvsc_device *get_outbound_net_device(struct hv_device *device) +{ +	struct netvsc_device *net_device; + +	net_device = hv_get_drvdata(device); +	if (net_device && net_device->destroy) +		net_device = NULL; + +	return net_device; +} + +static struct netvsc_device *get_inbound_net_device(struct hv_device *device) +{ +	struct netvsc_device *net_device; + +	net_device = hv_get_drvdata(device); + +	if (!net_device) +		goto get_in_err; + +	if (net_device->destroy && +		atomic_read(&net_device->num_outstanding_sends) == 0) +		net_device = NULL; + +get_in_err: +	return net_device; +} + + +static int netvsc_destroy_buf(struct netvsc_device *net_device) +{ +	struct nvsp_message *revoke_packet; +	int ret = 0; +	struct net_device *ndev = net_device->ndev; + +	/* +	 * If we got a section count, it means we received a +	 * SendReceiveBufferComplete msg (ie sent +	 * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need +	 * to send a revoke msg here +	 */ +	if (net_device->recv_section_cnt) { +		/* Send the revoke receive buffer */ +		revoke_packet = &net_device->revoke_packet; +		memset(revoke_packet, 0, sizeof(struct nvsp_message)); + +		revoke_packet->hdr.msg_type = +			NVSP_MSG1_TYPE_REVOKE_RECV_BUF; +		revoke_packet->msg.v1_msg. +		revoke_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID; + +		ret = vmbus_sendpacket(net_device->dev->channel, +				       revoke_packet, +				       sizeof(struct nvsp_message), +				       (unsigned long)revoke_packet, +				       VM_PKT_DATA_INBAND, 0); +		/* +		 * If we failed here, we might as well return and +		 * have a leak rather than continue and a bugchk +		 */ +		if (ret != 0) { +			netdev_err(ndev, "unable to send " +				"revoke receive buffer to netvsp\n"); +			return ret; +		} +	} + +	/* Teardown the gpadl on the vsp end */ +	if (net_device->recv_buf_gpadl_handle) { +		ret = vmbus_teardown_gpadl(net_device->dev->channel, +			   net_device->recv_buf_gpadl_handle); + +		/* If we failed here, we might as well return and have a leak +		 * rather than continue and a bugchk +		 */ +		if (ret != 0) { +			netdev_err(ndev, +				   "unable to teardown receive buffer's gpadl\n"); +			return ret; +		} +		net_device->recv_buf_gpadl_handle = 0; +	} + +	if (net_device->recv_buf) { +		/* Free up the receive buffer */ +		vfree(net_device->recv_buf); +		net_device->recv_buf = NULL; +	} + +	if (net_device->recv_section) { +		net_device->recv_section_cnt = 0; +		kfree(net_device->recv_section); +		net_device->recv_section = NULL; +	} + +	/* Deal with the send buffer we may have setup. +	 * If we got a  send section size, it means we received a +	 * SendsendBufferComplete msg (ie sent +	 * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need +	 * to send a revoke msg here +	 */ +	if (net_device->send_section_size) { +		/* Send the revoke receive buffer */ +		revoke_packet = &net_device->revoke_packet; +		memset(revoke_packet, 0, sizeof(struct nvsp_message)); + +		revoke_packet->hdr.msg_type = +			NVSP_MSG1_TYPE_REVOKE_SEND_BUF; +		revoke_packet->msg.v1_msg.revoke_recv_buf.id = 0; + +		ret = vmbus_sendpacket(net_device->dev->channel, +				       revoke_packet, +				       sizeof(struct nvsp_message), +				       (unsigned long)revoke_packet, +				       VM_PKT_DATA_INBAND, 0); +		/* If we failed here, we might as well return and +		 * have a leak rather than continue and a bugchk +		 */ +		if (ret != 0) { +			netdev_err(ndev, "unable to send " +				   "revoke send buffer to netvsp\n"); +			return ret; +		} +	} +	/* Teardown the gpadl on the vsp end */ +	if (net_device->send_buf_gpadl_handle) { +		ret = vmbus_teardown_gpadl(net_device->dev->channel, +					   net_device->send_buf_gpadl_handle); + +		/* If we failed here, we might as well return and have a leak +		 * rather than continue and a bugchk +		 */ +		if (ret != 0) { +			netdev_err(ndev, +				   "unable to teardown send buffer's gpadl\n"); +			return ret; +		} +		net_device->send_buf_gpadl_handle = 0; +	} +	if (net_device->send_buf) { +		/* Free up the receive buffer */ +		free_pages((unsigned long)net_device->send_buf, +			   get_order(net_device->send_buf_size)); +		net_device->send_buf = NULL; +	} +	kfree(net_device->send_section_map); + +	return ret; +} + +static int netvsc_init_buf(struct hv_device *device) +{ +	int ret = 0; +	int t; +	struct netvsc_device *net_device; +	struct nvsp_message *init_packet; +	struct net_device *ndev; + +	net_device = get_outbound_net_device(device); +	if (!net_device) +		return -ENODEV; +	ndev = net_device->ndev; + +	net_device->recv_buf = vzalloc(net_device->recv_buf_size); +	if (!net_device->recv_buf) { +		netdev_err(ndev, "unable to allocate receive " +			"buffer of size %d\n", net_device->recv_buf_size); +		ret = -ENOMEM; +		goto cleanup; +	} + +	/* +	 * Establish the gpadl handle for this buffer on this +	 * channel.  Note: This call uses the vmbus connection rather +	 * than the channel to establish the gpadl handle. +	 */ +	ret = vmbus_establish_gpadl(device->channel, net_device->recv_buf, +				    net_device->recv_buf_size, +				    &net_device->recv_buf_gpadl_handle); +	if (ret != 0) { +		netdev_err(ndev, +			"unable to establish receive buffer's gpadl\n"); +		goto cleanup; +	} + + +	/* Notify the NetVsp of the gpadl handle */ +	init_packet = &net_device->channel_init_pkt; + +	memset(init_packet, 0, sizeof(struct nvsp_message)); + +	init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_RECV_BUF; +	init_packet->msg.v1_msg.send_recv_buf. +		gpadl_handle = net_device->recv_buf_gpadl_handle; +	init_packet->msg.v1_msg. +		send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID; + +	/* Send the gpadl notification request */ +	ret = vmbus_sendpacket(device->channel, init_packet, +			       sizeof(struct nvsp_message), +			       (unsigned long)init_packet, +			       VM_PKT_DATA_INBAND, +			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); +	if (ret != 0) { +		netdev_err(ndev, +			"unable to send receive buffer's gpadl to netvsp\n"); +		goto cleanup; +	} + +	t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ); +	BUG_ON(t == 0); + + +	/* Check the response */ +	if (init_packet->msg.v1_msg. +	    send_recv_buf_complete.status != NVSP_STAT_SUCCESS) { +		netdev_err(ndev, "Unable to complete receive buffer " +			   "initialization with NetVsp - status %d\n", +			   init_packet->msg.v1_msg. +			   send_recv_buf_complete.status); +		ret = -EINVAL; +		goto cleanup; +	} + +	/* Parse the response */ + +	net_device->recv_section_cnt = init_packet->msg. +		v1_msg.send_recv_buf_complete.num_sections; + +	net_device->recv_section = kmemdup( +		init_packet->msg.v1_msg.send_recv_buf_complete.sections, +		net_device->recv_section_cnt * +		sizeof(struct nvsp_1_receive_buffer_section), +		GFP_KERNEL); +	if (net_device->recv_section == NULL) { +		ret = -EINVAL; +		goto cleanup; +	} + +	/* +	 * For 1st release, there should only be 1 section that represents the +	 * entire receive buffer +	 */ +	if (net_device->recv_section_cnt != 1 || +	    net_device->recv_section->offset != 0) { +		ret = -EINVAL; +		goto cleanup; +	} + +	/* Now setup the send buffer. +	 */ +	net_device->send_buf = +		(void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, +					 get_order(net_device->send_buf_size)); +	if (!net_device->send_buf) { +		netdev_err(ndev, "unable to allocate send " +			   "buffer of size %d\n", net_device->send_buf_size); +		ret = -ENOMEM; +		goto cleanup; +	} + +	/* Establish the gpadl handle for this buffer on this +	 * channel.  Note: This call uses the vmbus connection rather +	 * than the channel to establish the gpadl handle. +	 */ +	ret = vmbus_establish_gpadl(device->channel, net_device->send_buf, +				    net_device->send_buf_size, +				    &net_device->send_buf_gpadl_handle); +	if (ret != 0) { +		netdev_err(ndev, +			   "unable to establish send buffer's gpadl\n"); +		goto cleanup; +	} + +	/* Notify the NetVsp of the gpadl handle */ +	init_packet = &net_device->channel_init_pkt; +	memset(init_packet, 0, sizeof(struct nvsp_message)); +	init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF; +	init_packet->msg.v1_msg.send_recv_buf.gpadl_handle = +		net_device->send_buf_gpadl_handle; +	init_packet->msg.v1_msg.send_recv_buf.id = 0; + +	/* Send the gpadl notification request */ +	ret = vmbus_sendpacket(device->channel, init_packet, +			       sizeof(struct nvsp_message), +			       (unsigned long)init_packet, +			       VM_PKT_DATA_INBAND, +			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); +	if (ret != 0) { +		netdev_err(ndev, +			   "unable to send send buffer's gpadl to netvsp\n"); +		goto cleanup; +	} + +	t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ); +	BUG_ON(t == 0); + +	/* Check the response */ +	if (init_packet->msg.v1_msg. +	    send_send_buf_complete.status != NVSP_STAT_SUCCESS) { +		netdev_err(ndev, "Unable to complete send buffer " +			   "initialization with NetVsp - status %d\n", +			   init_packet->msg.v1_msg. +			   send_recv_buf_complete.status); +		ret = -EINVAL; +		goto cleanup; +	} + +	/* Parse the response */ +	net_device->send_section_size = init_packet->msg. +				v1_msg.send_send_buf_complete.section_size; + +	/* Section count is simply the size divided by the section size. +	 */ +	net_device->send_section_cnt = +		net_device->send_buf_size/net_device->send_section_size; + +	dev_info(&device->device, "Send section size: %d, Section count:%d\n", +		 net_device->send_section_size, net_device->send_section_cnt); + +	/* Setup state for managing the send buffer. */ +	net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt, +					     BITS_PER_LONG); + +	net_device->send_section_map = +		kzalloc(net_device->map_words * sizeof(ulong), GFP_KERNEL); +	if (net_device->send_section_map == NULL) { +		ret = -ENOMEM; +		goto cleanup; +	} + +	goto exit; + +cleanup: +	netvsc_destroy_buf(net_device); + +exit: +	return ret; +} + + +/* Negotiate NVSP protocol version */ +static int negotiate_nvsp_ver(struct hv_device *device, +			      struct netvsc_device *net_device, +			      struct nvsp_message *init_packet, +			      u32 nvsp_ver) +{ +	int ret, t; + +	memset(init_packet, 0, sizeof(struct nvsp_message)); +	init_packet->hdr.msg_type = NVSP_MSG_TYPE_INIT; +	init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver; +	init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver; + +	/* Send the init request */ +	ret = vmbus_sendpacket(device->channel, init_packet, +			       sizeof(struct nvsp_message), +			       (unsigned long)init_packet, +			       VM_PKT_DATA_INBAND, +			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + +	if (ret != 0) +		return ret; + +	t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ); + +	if (t == 0) +		return -ETIMEDOUT; + +	if (init_packet->msg.init_msg.init_complete.status != +	    NVSP_STAT_SUCCESS) +		return -EINVAL; + +	if (nvsp_ver == NVSP_PROTOCOL_VERSION_1) +		return 0; + +	/* NVSPv2 only: Send NDIS config */ +	memset(init_packet, 0, sizeof(struct nvsp_message)); +	init_packet->hdr.msg_type = NVSP_MSG2_TYPE_SEND_NDIS_CONFIG; +	init_packet->msg.v2_msg.send_ndis_config.mtu = net_device->ndev->mtu; +	init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1; + +	ret = vmbus_sendpacket(device->channel, init_packet, +				sizeof(struct nvsp_message), +				(unsigned long)init_packet, +				VM_PKT_DATA_INBAND, 0); + +	return ret; +} + +static int netvsc_connect_vsp(struct hv_device *device) +{ +	int ret; +	struct netvsc_device *net_device; +	struct nvsp_message *init_packet; +	int ndis_version; +	struct net_device *ndev; +	u32 ver_list[] = { NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2, +		NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5 }; +	int i, num_ver = 4; /* number of different NVSP versions */ + +	net_device = get_outbound_net_device(device); +	if (!net_device) +		return -ENODEV; +	ndev = net_device->ndev; + +	init_packet = &net_device->channel_init_pkt; + +	/* Negotiate the latest NVSP protocol supported */ +	for (i = num_ver - 1; i >= 0; i--) +		if (negotiate_nvsp_ver(device, net_device, init_packet, +				       ver_list[i])  == 0) { +			net_device->nvsp_version = ver_list[i]; +			break; +		} + +	if (i < 0) { +		ret = -EPROTO; +		goto cleanup; +	} + +	pr_debug("Negotiated NVSP version:%x\n", net_device->nvsp_version); + +	/* Send the ndis version */ +	memset(init_packet, 0, sizeof(struct nvsp_message)); + +	if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_4) +		ndis_version = 0x00060001; +	else +		ndis_version = 0x0006001e; + +	init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_NDIS_VER; +	init_packet->msg.v1_msg. +		send_ndis_ver.ndis_major_ver = +				(ndis_version & 0xFFFF0000) >> 16; +	init_packet->msg.v1_msg. +		send_ndis_ver.ndis_minor_ver = +				ndis_version & 0xFFFF; + +	/* Send the init request */ +	ret = vmbus_sendpacket(device->channel, init_packet, +				sizeof(struct nvsp_message), +				(unsigned long)init_packet, +				VM_PKT_DATA_INBAND, 0); +	if (ret != 0) +		goto cleanup; + +	/* Post the big receive buffer to NetVSP */ +	if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2) +		net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY; +	else +		net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE; +	net_device->send_buf_size = NETVSC_SEND_BUFFER_SIZE; + +	ret = netvsc_init_buf(device); + +cleanup: +	return ret; +} + +static void netvsc_disconnect_vsp(struct netvsc_device *net_device) +{ +	netvsc_destroy_buf(net_device); +} + +/* + * netvsc_device_remove - Callback when the root bus device is removed + */ +int netvsc_device_remove(struct hv_device *device) +{ +	struct netvsc_device *net_device; +	unsigned long flags; + +	net_device = hv_get_drvdata(device); + +	netvsc_disconnect_vsp(net_device); + +	/* +	 * Since we have already drained, we don't need to busy wait +	 * as was done in final_release_stor_device() +	 * Note that we cannot set the ext pointer to NULL until +	 * we have drained - to drain the outgoing packets, we need to +	 * allow incoming packets. +	 */ + +	spin_lock_irqsave(&device->channel->inbound_lock, flags); +	hv_set_drvdata(device, NULL); +	spin_unlock_irqrestore(&device->channel->inbound_lock, flags); + +	/* +	 * At this point, no one should be accessing net_device +	 * except in here +	 */ +	dev_notice(&device->device, "net device safe to remove\n"); + +	/* Now, we can close the channel safely */ +	vmbus_close(device->channel); + +	/* Release all resources */ +	if (net_device->sub_cb_buf) +		vfree(net_device->sub_cb_buf); + +	kfree(net_device); +	return 0; +} + + +#define RING_AVAIL_PERCENT_HIWATER 20 +#define RING_AVAIL_PERCENT_LOWATER 10 + +/* + * Get the percentage of available bytes to write in the ring. + * The return value is in range from 0 to 100. + */ +static inline u32 hv_ringbuf_avail_percent( +		struct hv_ring_buffer_info *ring_info) +{ +	u32 avail_read, avail_write; + +	hv_get_ringbuffer_availbytes(ring_info, &avail_read, &avail_write); + +	return avail_write * 100 / ring_info->ring_datasize; +} + +static inline void netvsc_free_send_slot(struct netvsc_device *net_device, +					 u32 index) +{ +	sync_change_bit(index, net_device->send_section_map); +} + +static void netvsc_send_completion(struct netvsc_device *net_device, +				   struct hv_device *device, +				   struct vmpacket_descriptor *packet) +{ +	struct nvsp_message *nvsp_packet; +	struct hv_netvsc_packet *nvsc_packet; +	struct net_device *ndev; +	u32 send_index; + +	ndev = net_device->ndev; + +	nvsp_packet = (struct nvsp_message *)((unsigned long)packet + +			(packet->offset8 << 3)); + +	if ((nvsp_packet->hdr.msg_type == NVSP_MSG_TYPE_INIT_COMPLETE) || +	    (nvsp_packet->hdr.msg_type == +	     NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE) || +	    (nvsp_packet->hdr.msg_type == +	     NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE) || +	    (nvsp_packet->hdr.msg_type == +	     NVSP_MSG5_TYPE_SUBCHANNEL)) { +		/* Copy the response back */ +		memcpy(&net_device->channel_init_pkt, nvsp_packet, +		       sizeof(struct nvsp_message)); +		complete(&net_device->channel_init_wait); +	} else if (nvsp_packet->hdr.msg_type == +		   NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) { +		int num_outstanding_sends; +		u16 q_idx = 0; +		struct vmbus_channel *channel = device->channel; +		int queue_sends; + +		/* Get the send context */ +		nvsc_packet = (struct hv_netvsc_packet *)(unsigned long) +			packet->trans_id; + +		/* Notify the layer above us */ +		if (nvsc_packet) { +			send_index = nvsc_packet->send_buf_index; +			if (send_index != NETVSC_INVALID_INDEX) +				netvsc_free_send_slot(net_device, send_index); +			q_idx = nvsc_packet->q_idx; +			channel = nvsc_packet->channel; +			nvsc_packet->send_completion(nvsc_packet-> +						     send_completion_ctx); +		} + +		num_outstanding_sends = +			atomic_dec_return(&net_device->num_outstanding_sends); +		queue_sends = atomic_dec_return(&net_device-> +						queue_sends[q_idx]); + +		if (net_device->destroy && num_outstanding_sends == 0) +			wake_up(&net_device->wait_drain); + +		if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) && +		    !net_device->start_remove && +		    (hv_ringbuf_avail_percent(&channel->outbound) > +		     RING_AVAIL_PERCENT_HIWATER || queue_sends < 1)) +				netif_tx_wake_queue(netdev_get_tx_queue( +						    ndev, q_idx)); +	} else { +		netdev_err(ndev, "Unknown send completion packet type- " +			   "%d received!!\n", nvsp_packet->hdr.msg_type); +	} + +} + +static u32 netvsc_get_next_send_section(struct netvsc_device *net_device) +{ +	unsigned long index; +	u32 max_words = net_device->map_words; +	unsigned long *map_addr = (unsigned long *)net_device->send_section_map; +	u32 section_cnt = net_device->send_section_cnt; +	int ret_val = NETVSC_INVALID_INDEX; +	int i; +	int prev_val; + +	for (i = 0; i < max_words; i++) { +		if (!~(map_addr[i])) +			continue; +		index = ffz(map_addr[i]); +		prev_val = sync_test_and_set_bit(index, &map_addr[i]); +		if (prev_val) +			continue; +		if ((index + (i * BITS_PER_LONG)) >= section_cnt) +			break; +		ret_val = (index + (i * BITS_PER_LONG)); +		break; +	} +	return ret_val; +} + +u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device, +			    unsigned int section_index, +			    struct hv_netvsc_packet *packet) +{ +	char *start = net_device->send_buf; +	char *dest = (start + (section_index * net_device->send_section_size)); +	int i; +	u32 msg_size = 0; + +	for (i = 0; i < packet->page_buf_cnt; i++) { +		char *src = phys_to_virt(packet->page_buf[i].pfn << PAGE_SHIFT); +		u32 offset = packet->page_buf[i].offset; +		u32 len = packet->page_buf[i].len; + +		memcpy(dest, (src + offset), len); +		msg_size += len; +		dest += len; +	} +	return msg_size; +} + +int netvsc_send(struct hv_device *device, +			struct hv_netvsc_packet *packet) +{ +	struct netvsc_device *net_device; +	int ret = 0; +	struct nvsp_message sendMessage; +	struct net_device *ndev; +	struct vmbus_channel *out_channel = NULL; +	u64 req_id; +	unsigned int section_index = NETVSC_INVALID_INDEX; +	u32 msg_size = 0; +	struct sk_buff *skb; + + +	net_device = get_outbound_net_device(device); +	if (!net_device) +		return -ENODEV; +	ndev = net_device->ndev; + +	sendMessage.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT; +	if (packet->is_data_pkt) { +		/* 0 is RMC_DATA; */ +		sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 0; +	} else { +		/* 1 is RMC_CONTROL; */ +		sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 1; +	} + +	/* Attempt to send via sendbuf */ +	if (packet->total_data_buflen < net_device->send_section_size) { +		section_index = netvsc_get_next_send_section(net_device); +		if (section_index != NETVSC_INVALID_INDEX) { +			msg_size = netvsc_copy_to_send_buf(net_device, +							   section_index, +							   packet); +			skb = (struct sk_buff *) +			      (unsigned long)packet->send_completion_tid; +			if (skb) +				dev_kfree_skb_any(skb); +			packet->page_buf_cnt = 0; +		} +	} +	packet->send_buf_index = section_index; + + +	sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_index = +		section_index; +	sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = msg_size; + +	if (packet->send_completion) +		req_id = (ulong)packet; +	else +		req_id = 0; + +	out_channel = net_device->chn_table[packet->q_idx]; +	if (out_channel == NULL) +		out_channel = device->channel; +	packet->channel = out_channel; + +	if (packet->page_buf_cnt) { +		ret = vmbus_sendpacket_pagebuffer(out_channel, +						  packet->page_buf, +						  packet->page_buf_cnt, +						  &sendMessage, +						  sizeof(struct nvsp_message), +						  req_id); +	} else { +		ret = vmbus_sendpacket(out_channel, &sendMessage, +				sizeof(struct nvsp_message), +				req_id, +				VM_PKT_DATA_INBAND, +				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); +	} + +	if (ret == 0) { +		atomic_inc(&net_device->num_outstanding_sends); +		atomic_inc(&net_device->queue_sends[packet->q_idx]); + +		if (hv_ringbuf_avail_percent(&out_channel->outbound) < +			RING_AVAIL_PERCENT_LOWATER) { +			netif_tx_stop_queue(netdev_get_tx_queue( +					    ndev, packet->q_idx)); + +			if (atomic_read(&net_device-> +				queue_sends[packet->q_idx]) < 1) +				netif_tx_wake_queue(netdev_get_tx_queue( +						    ndev, packet->q_idx)); +		} +	} else if (ret == -EAGAIN) { +		netif_tx_stop_queue(netdev_get_tx_queue( +				    ndev, packet->q_idx)); +		if (atomic_read(&net_device->queue_sends[packet->q_idx]) < 1) { +			netif_tx_wake_queue(netdev_get_tx_queue( +					    ndev, packet->q_idx)); +			ret = -ENOSPC; +		} +	} else { +		netdev_err(ndev, "Unable to send packet %p ret %d\n", +			   packet, ret); +	} + +	return ret; +} + +static void netvsc_send_recv_completion(struct hv_device *device, +					struct vmbus_channel *channel, +					struct netvsc_device *net_device, +					u64 transaction_id, u32 status) +{ +	struct nvsp_message recvcompMessage; +	int retries = 0; +	int ret; +	struct net_device *ndev; + +	ndev = net_device->ndev; + +	recvcompMessage.hdr.msg_type = +				NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE; + +	recvcompMessage.msg.v1_msg.send_rndis_pkt_complete.status = status; + +retry_send_cmplt: +	/* Send the completion */ +	ret = vmbus_sendpacket(channel, &recvcompMessage, +			       sizeof(struct nvsp_message), transaction_id, +			       VM_PKT_COMP, 0); +	if (ret == 0) { +		/* success */ +		/* no-op */ +	} else if (ret == -EAGAIN) { +		/* no more room...wait a bit and attempt to retry 3 times */ +		retries++; +		netdev_err(ndev, "unable to send receive completion pkt" +			" (tid %llx)...retrying %d\n", transaction_id, retries); + +		if (retries < 4) { +			udelay(100); +			goto retry_send_cmplt; +		} else { +			netdev_err(ndev, "unable to send receive " +				"completion pkt (tid %llx)...give up retrying\n", +				transaction_id); +		} +	} else { +		netdev_err(ndev, "unable to send receive " +			"completion pkt - %llx\n", transaction_id); +	} +} + +static void netvsc_receive(struct netvsc_device *net_device, +			struct vmbus_channel *channel, +			struct hv_device *device, +			struct vmpacket_descriptor *packet) +{ +	struct vmtransfer_page_packet_header *vmxferpage_packet; +	struct nvsp_message *nvsp_packet; +	struct hv_netvsc_packet nv_pkt; +	struct hv_netvsc_packet *netvsc_packet = &nv_pkt; +	u32 status = NVSP_STAT_SUCCESS; +	int i; +	int count = 0; +	struct net_device *ndev; + +	ndev = net_device->ndev; + +	/* +	 * All inbound packets other than send completion should be xfer page +	 * packet +	 */ +	if (packet->type != VM_PKT_DATA_USING_XFER_PAGES) { +		netdev_err(ndev, "Unknown packet type received - %d\n", +			   packet->type); +		return; +	} + +	nvsp_packet = (struct nvsp_message *)((unsigned long)packet + +			(packet->offset8 << 3)); + +	/* Make sure this is a valid nvsp packet */ +	if (nvsp_packet->hdr.msg_type != +	    NVSP_MSG1_TYPE_SEND_RNDIS_PKT) { +		netdev_err(ndev, "Unknown nvsp packet type received-" +			" %d\n", nvsp_packet->hdr.msg_type); +		return; +	} + +	vmxferpage_packet = (struct vmtransfer_page_packet_header *)packet; + +	if (vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID) { +		netdev_err(ndev, "Invalid xfer page set id - " +			   "expecting %x got %x\n", NETVSC_RECEIVE_BUFFER_ID, +			   vmxferpage_packet->xfer_pageset_id); +		return; +	} + +	count = vmxferpage_packet->range_cnt; +	netvsc_packet->device = device; +	netvsc_packet->channel = channel; + +	/* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */ +	for (i = 0; i < count; i++) { +		/* Initialize the netvsc packet */ +		netvsc_packet->status = NVSP_STAT_SUCCESS; +		netvsc_packet->data = (void *)((unsigned long)net_device-> +			recv_buf + vmxferpage_packet->ranges[i].byte_offset); +		netvsc_packet->total_data_buflen = +					vmxferpage_packet->ranges[i].byte_count; + +		/* Pass it to the upper layer */ +		rndis_filter_receive(device, netvsc_packet); + +		if (netvsc_packet->status != NVSP_STAT_SUCCESS) +			status = NVSP_STAT_FAIL; +	} + +	netvsc_send_recv_completion(device, channel, net_device, +				    vmxferpage_packet->d.trans_id, status); +} + + +static void netvsc_send_table(struct hv_device *hdev, +			      struct vmpacket_descriptor *vmpkt) +{ +	struct netvsc_device *nvscdev; +	struct net_device *ndev; +	struct nvsp_message *nvmsg; +	int i; +	u32 count, *tab; + +	nvscdev = get_outbound_net_device(hdev); +	if (!nvscdev) +		return; +	ndev = nvscdev->ndev; + +	nvmsg = (struct nvsp_message *)((unsigned long)vmpkt + +					(vmpkt->offset8 << 3)); + +	if (nvmsg->hdr.msg_type != NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE) +		return; + +	count = nvmsg->msg.v5_msg.send_table.count; +	if (count != VRSS_SEND_TAB_SIZE) { +		netdev_err(ndev, "Received wrong send-table size:%u\n", count); +		return; +	} + +	tab = (u32 *)((unsigned long)&nvmsg->msg.v5_msg.send_table + +		      nvmsg->msg.v5_msg.send_table.offset); + +	for (i = 0; i < count; i++) +		nvscdev->send_table[i] = tab[i]; +} + +void netvsc_channel_cb(void *context) +{ +	int ret; +	struct vmbus_channel *channel = (struct vmbus_channel *)context; +	struct hv_device *device; +	struct netvsc_device *net_device; +	u32 bytes_recvd; +	u64 request_id; +	struct vmpacket_descriptor *desc; +	unsigned char *buffer; +	int bufferlen = NETVSC_PACKET_SIZE; +	struct net_device *ndev; + +	if (channel->primary_channel != NULL) +		device = channel->primary_channel->device_obj; +	else +		device = channel->device_obj; + +	net_device = get_inbound_net_device(device); +	if (!net_device) +		return; +	ndev = net_device->ndev; +	buffer = get_per_channel_state(channel); + +	do { +		ret = vmbus_recvpacket_raw(channel, buffer, bufferlen, +					   &bytes_recvd, &request_id); +		if (ret == 0) { +			if (bytes_recvd > 0) { +				desc = (struct vmpacket_descriptor *)buffer; +				switch (desc->type) { +				case VM_PKT_COMP: +					netvsc_send_completion(net_device, +								device, desc); +					break; + +				case VM_PKT_DATA_USING_XFER_PAGES: +					netvsc_receive(net_device, channel, +						       device, desc); +					break; + +				case VM_PKT_DATA_INBAND: +					netvsc_send_table(device, desc); +					break; + +				default: +					netdev_err(ndev, +						   "unhandled packet type %d, " +						   "tid %llx len %d\n", +						   desc->type, request_id, +						   bytes_recvd); +					break; +				} + +			} else { +				/* +				 * We are done for this pass. +				 */ +				break; +			} + +		} else if (ret == -ENOBUFS) { +			if (bufferlen > NETVSC_PACKET_SIZE) +				kfree(buffer); +			/* Handle large packet */ +			buffer = kmalloc(bytes_recvd, GFP_ATOMIC); +			if (buffer == NULL) { +				/* Try again next time around */ +				netdev_err(ndev, +					   "unable to allocate buffer of size " +					   "(%d)!!\n", bytes_recvd); +				break; +			} + +			bufferlen = bytes_recvd; +		} +	} while (1); + +	if (bufferlen > NETVSC_PACKET_SIZE) +		kfree(buffer); +	return; +} + +/* + * netvsc_device_add - Callback when the device belonging to this + * driver is added + */ +int netvsc_device_add(struct hv_device *device, void *additional_info) +{ +	int ret = 0; +	int ring_size = +	((struct netvsc_device_info *)additional_info)->ring_size; +	struct netvsc_device *net_device; +	struct net_device *ndev; + +	net_device = alloc_net_device(device); +	if (!net_device) { +		ret = -ENOMEM; +		goto cleanup; +	} + +	net_device->ring_size = ring_size; + +	/* +	 * Coming into this function, struct net_device * is +	 * registered as the driver private data. +	 * In alloc_net_device(), we register struct netvsc_device * +	 * as the driver private data and stash away struct net_device * +	 * in struct netvsc_device *. +	 */ +	ndev = net_device->ndev; + +	/* Initialize the NetVSC channel extension */ +	init_completion(&net_device->channel_init_wait); + +	set_per_channel_state(device->channel, net_device->cb_buffer); + +	/* Open the channel */ +	ret = vmbus_open(device->channel, ring_size * PAGE_SIZE, +			 ring_size * PAGE_SIZE, NULL, 0, +			 netvsc_channel_cb, device->channel); + +	if (ret != 0) { +		netdev_err(ndev, "unable to open channel: %d\n", ret); +		goto cleanup; +	} + +	/* Channel is opened */ +	pr_info("hv_netvsc channel opened successfully\n"); + +	net_device->chn_table[0] = device->channel; + +	/* Connect with the NetVsp */ +	ret = netvsc_connect_vsp(device); +	if (ret != 0) { +		netdev_err(ndev, +			"unable to connect to NetVSP - %d\n", ret); +		goto close; +	} + +	return ret; + +close: +	/* Now, we can close the channel safely */ +	vmbus_close(device->channel); + +cleanup: + +	if (net_device) +		kfree(net_device); + +	return ret; +} diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c new file mode 100644 index 00000000000..4fd71b75e66 --- /dev/null +++ b/drivers/net/hyperv/netvsc_drv.c @@ -0,0 +1,923 @@ +/* + * Copyright (c) 2009, Microsoft Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, see <http://www.gnu.org/licenses/>. + * + * Authors: + *   Haiyang Zhang <haiyangz@microsoft.com> + *   Hank Janssen  <hjanssen@microsoft.com> + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/init.h> +#include <linux/atomic.h> +#include <linux/module.h> +#include <linux/highmem.h> +#include <linux/device.h> +#include <linux/io.h> +#include <linux/delay.h> +#include <linux/netdevice.h> +#include <linux/inetdevice.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> +#include <linux/if_vlan.h> +#include <linux/in.h> +#include <linux/slab.h> +#include <net/arp.h> +#include <net/route.h> +#include <net/sock.h> +#include <net/pkt_sched.h> + +#include "hyperv_net.h" + +struct net_device_context { +	/* point back to our device context */ +	struct hv_device *device_ctx; +	struct delayed_work dwork; +	struct work_struct work; +}; + +#define RING_SIZE_MIN 64 +static int ring_size = 128; +module_param(ring_size, int, S_IRUGO); +MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)"); + +static void do_set_multicast(struct work_struct *w) +{ +	struct net_device_context *ndevctx = +		container_of(w, struct net_device_context, work); +	struct netvsc_device *nvdev; +	struct rndis_device *rdev; + +	nvdev = hv_get_drvdata(ndevctx->device_ctx); +	if (nvdev == NULL || nvdev->ndev == NULL) +		return; + +	rdev = nvdev->extension; +	if (rdev == NULL) +		return; + +	if (nvdev->ndev->flags & IFF_PROMISC) +		rndis_filter_set_packet_filter(rdev, +			NDIS_PACKET_TYPE_PROMISCUOUS); +	else +		rndis_filter_set_packet_filter(rdev, +			NDIS_PACKET_TYPE_BROADCAST | +			NDIS_PACKET_TYPE_ALL_MULTICAST | +			NDIS_PACKET_TYPE_DIRECTED); +} + +static void netvsc_set_multicast_list(struct net_device *net) +{ +	struct net_device_context *net_device_ctx = netdev_priv(net); + +	schedule_work(&net_device_ctx->work); +} + +static int netvsc_open(struct net_device *net) +{ +	struct net_device_context *net_device_ctx = netdev_priv(net); +	struct hv_device *device_obj = net_device_ctx->device_ctx; +	struct netvsc_device *nvdev; +	struct rndis_device *rdev; +	int ret = 0; + +	netif_carrier_off(net); + +	/* Open up the device */ +	ret = rndis_filter_open(device_obj); +	if (ret != 0) { +		netdev_err(net, "unable to open device (ret %d).\n", ret); +		return ret; +	} + +	netif_tx_start_all_queues(net); + +	nvdev = hv_get_drvdata(device_obj); +	rdev = nvdev->extension; +	if (!rdev->link_state) +		netif_carrier_on(net); + +	return ret; +} + +static int netvsc_close(struct net_device *net) +{ +	struct net_device_context *net_device_ctx = netdev_priv(net); +	struct hv_device *device_obj = net_device_ctx->device_ctx; +	int ret; + +	netif_tx_disable(net); + +	/* Make sure netvsc_set_multicast_list doesn't re-enable filter! */ +	cancel_work_sync(&net_device_ctx->work); +	ret = rndis_filter_close(device_obj); +	if (ret != 0) +		netdev_err(net, "unable to close device (ret %d).\n", ret); + +	return ret; +} + +static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size, +				int pkt_type) +{ +	struct rndis_packet *rndis_pkt; +	struct rndis_per_packet_info *ppi; + +	rndis_pkt = &msg->msg.pkt; +	rndis_pkt->data_offset += ppi_size; + +	ppi = (struct rndis_per_packet_info *)((void *)rndis_pkt + +		rndis_pkt->per_pkt_info_offset + rndis_pkt->per_pkt_info_len); + +	ppi->size = ppi_size; +	ppi->type = pkt_type; +	ppi->ppi_offset = sizeof(struct rndis_per_packet_info); + +	rndis_pkt->per_pkt_info_len += ppi_size; + +	return ppi; +} + +union sub_key { +	u64 k; +	struct { +		u8 pad[3]; +		u8 kb; +		u32 ka; +	}; +}; + +/* Toeplitz hash function + * data: network byte order + * return: host byte order + */ +static u32 comp_hash(u8 *key, int klen, u8 *data, int dlen) +{ +	union sub_key subk; +	int k_next = 4; +	u8 dt; +	int i, j; +	u32 ret = 0; + +	subk.k = 0; +	subk.ka = ntohl(*(u32 *)key); + +	for (i = 0; i < dlen; i++) { +		subk.kb = key[k_next]; +		k_next = (k_next + 1) % klen; +		dt = data[i]; +		for (j = 0; j < 8; j++) { +			if (dt & 0x80) +				ret ^= subk.ka; +			dt <<= 1; +			subk.k <<= 1; +		} +	} + +	return ret; +} + +static bool netvsc_set_hash(u32 *hash, struct sk_buff *skb) +{ +	struct iphdr *iphdr; +	int data_len; +	bool ret = false; + +	if (eth_hdr(skb)->h_proto != htons(ETH_P_IP)) +		return false; + +	iphdr = ip_hdr(skb); + +	if (iphdr->version == 4) { +		if (iphdr->protocol == IPPROTO_TCP) +			data_len = 12; +		else +			data_len = 8; +		*hash = comp_hash(netvsc_hash_key, HASH_KEYLEN, +				  (u8 *)&iphdr->saddr, data_len); +		ret = true; +	} + +	return ret; +} + +static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, +			void *accel_priv, select_queue_fallback_t fallback) +{ +	struct net_device_context *net_device_ctx = netdev_priv(ndev); +	struct hv_device *hdev =  net_device_ctx->device_ctx; +	struct netvsc_device *nvsc_dev = hv_get_drvdata(hdev); +	u32 hash; +	u16 q_idx = 0; + +	if (nvsc_dev == NULL || ndev->real_num_tx_queues <= 1) +		return 0; + +	if (netvsc_set_hash(&hash, skb)) { +		q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] % +			ndev->real_num_tx_queues; +		skb_set_hash(skb, hash, PKT_HASH_TYPE_L3); +	} + +	return q_idx; +} + +static void netvsc_xmit_completion(void *context) +{ +	struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context; +	struct sk_buff *skb = (struct sk_buff *) +		(unsigned long)packet->send_completion_tid; +	u32 index = packet->send_buf_index; + +	kfree(packet); + +	if (skb && (index == NETVSC_INVALID_INDEX)) +		dev_kfree_skb_any(skb); +} + +static u32 fill_pg_buf(struct page *page, u32 offset, u32 len, +			struct hv_page_buffer *pb) +{ +	int j = 0; + +	/* Deal with compund pages by ignoring unused part +	 * of the page. +	 */ +	page += (offset >> PAGE_SHIFT); +	offset &= ~PAGE_MASK; + +	while (len > 0) { +		unsigned long bytes; + +		bytes = PAGE_SIZE - offset; +		if (bytes > len) +			bytes = len; +		pb[j].pfn = page_to_pfn(page); +		pb[j].offset = offset; +		pb[j].len = bytes; + +		offset += bytes; +		len -= bytes; + +		if (offset == PAGE_SIZE && len) { +			page++; +			offset = 0; +			j++; +		} +	} + +	return j + 1; +} + +static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb, +			   struct hv_page_buffer *pb) +{ +	u32 slots_used = 0; +	char *data = skb->data; +	int frags = skb_shinfo(skb)->nr_frags; +	int i; + +	/* The packet is laid out thus: +	 * 1. hdr +	 * 2. skb linear data +	 * 3. skb fragment data +	 */ +	if (hdr != NULL) +		slots_used += fill_pg_buf(virt_to_page(hdr), +					offset_in_page(hdr), +					len, &pb[slots_used]); + +	slots_used += fill_pg_buf(virt_to_page(data), +				offset_in_page(data), +				skb_headlen(skb), &pb[slots_used]); + +	for (i = 0; i < frags; i++) { +		skb_frag_t *frag = skb_shinfo(skb)->frags + i; + +		slots_used += fill_pg_buf(skb_frag_page(frag), +					frag->page_offset, +					skb_frag_size(frag), &pb[slots_used]); +	} +	return slots_used; +} + +static int count_skb_frag_slots(struct sk_buff *skb) +{ +	int i, frags = skb_shinfo(skb)->nr_frags; +	int pages = 0; + +	for (i = 0; i < frags; i++) { +		skb_frag_t *frag = skb_shinfo(skb)->frags + i; +		unsigned long size = skb_frag_size(frag); +		unsigned long offset = frag->page_offset; + +		/* Skip unused frames from start of page */ +		offset &= ~PAGE_MASK; +		pages += PFN_UP(offset + size); +	} +	return pages; +} + +static int netvsc_get_slots(struct sk_buff *skb) +{ +	char *data = skb->data; +	unsigned int offset = offset_in_page(data); +	unsigned int len = skb_headlen(skb); +	int slots; +	int frag_slots; + +	slots = DIV_ROUND_UP(offset + len, PAGE_SIZE); +	frag_slots = count_skb_frag_slots(skb); +	return slots + frag_slots; +} + +static u32 get_net_transport_info(struct sk_buff *skb, u32 *trans_off) +{ +	u32 ret_val = TRANSPORT_INFO_NOT_IP; + +	if ((eth_hdr(skb)->h_proto != htons(ETH_P_IP)) && +		(eth_hdr(skb)->h_proto != htons(ETH_P_IPV6))) { +		goto not_ip; +	} + +	*trans_off = skb_transport_offset(skb); + +	if ((eth_hdr(skb)->h_proto == htons(ETH_P_IP))) { +		struct iphdr *iphdr = ip_hdr(skb); + +		if (iphdr->protocol == IPPROTO_TCP) +			ret_val = TRANSPORT_INFO_IPV4_TCP; +		else if (iphdr->protocol == IPPROTO_UDP) +			ret_val = TRANSPORT_INFO_IPV4_UDP; +	} else { +		if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP) +			ret_val = TRANSPORT_INFO_IPV6_TCP; +		else if (ipv6_hdr(skb)->nexthdr == IPPROTO_UDP) +			ret_val = TRANSPORT_INFO_IPV6_UDP; +	} + +not_ip: +	return ret_val; +} + +static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) +{ +	struct net_device_context *net_device_ctx = netdev_priv(net); +	struct hv_netvsc_packet *packet; +	int ret; +	unsigned int num_data_pgs; +	struct rndis_message *rndis_msg; +	struct rndis_packet *rndis_pkt; +	u32 rndis_msg_size; +	bool isvlan; +	struct rndis_per_packet_info *ppi; +	struct ndis_tcp_ip_checksum_info *csum_info; +	struct ndis_tcp_lso_info *lso_info; +	int  hdr_offset; +	u32 net_trans_info; +	u32 hash; + + +	/* We will atmost need two pages to describe the rndis +	 * header. We can only transmit MAX_PAGE_BUFFER_COUNT number +	 * of pages in a single packet. +	 */ +	num_data_pgs = netvsc_get_slots(skb) + 2; +	if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) { +		netdev_err(net, "Packet too big: %u\n", skb->len); +		dev_kfree_skb(skb); +		net->stats.tx_dropped++; +		return NETDEV_TX_OK; +	} + +	/* Allocate a netvsc packet based on # of frags. */ +	packet = kzalloc(sizeof(struct hv_netvsc_packet) + +			 (num_data_pgs * sizeof(struct hv_page_buffer)) + +			 sizeof(struct rndis_message) + +			 NDIS_VLAN_PPI_SIZE + NDIS_CSUM_PPI_SIZE + +			 NDIS_LSO_PPI_SIZE + NDIS_HASH_PPI_SIZE, GFP_ATOMIC); +	if (!packet) { +		/* out of memory, drop packet */ +		netdev_err(net, "unable to allocate hv_netvsc_packet\n"); + +		dev_kfree_skb(skb); +		net->stats.tx_dropped++; +		return NETDEV_TX_OK; +	} + +	packet->vlan_tci = skb->vlan_tci; + +	packet->q_idx = skb_get_queue_mapping(skb); + +	packet->is_data_pkt = true; +	packet->total_data_buflen = skb->len; + +	packet->rndis_msg = (struct rndis_message *)((unsigned long)packet + +				sizeof(struct hv_netvsc_packet) + +				(num_data_pgs * sizeof(struct hv_page_buffer))); + +	/* Set the completion routine */ +	packet->send_completion = netvsc_xmit_completion; +	packet->send_completion_ctx = packet; +	packet->send_completion_tid = (unsigned long)skb; + +	isvlan = packet->vlan_tci & VLAN_TAG_PRESENT; + +	/* Add the rndis header */ +	rndis_msg = packet->rndis_msg; +	rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET; +	rndis_msg->msg_len = packet->total_data_buflen; +	rndis_pkt = &rndis_msg->msg.pkt; +	rndis_pkt->data_offset = sizeof(struct rndis_packet); +	rndis_pkt->data_len = packet->total_data_buflen; +	rndis_pkt->per_pkt_info_offset = sizeof(struct rndis_packet); + +	rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet); + +	hash = skb_get_hash_raw(skb); +	if (hash != 0 && net->real_num_tx_queues > 1) { +		rndis_msg_size += NDIS_HASH_PPI_SIZE; +		ppi = init_ppi_data(rndis_msg, NDIS_HASH_PPI_SIZE, +				    NBL_HASH_VALUE); +		*(u32 *)((void *)ppi + ppi->ppi_offset) = hash; +	} + +	if (isvlan) { +		struct ndis_pkt_8021q_info *vlan; + +		rndis_msg_size += NDIS_VLAN_PPI_SIZE; +		ppi = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE, +					IEEE_8021Q_INFO); +		vlan = (struct ndis_pkt_8021q_info *)((void *)ppi + +						ppi->ppi_offset); +		vlan->vlanid = packet->vlan_tci & VLAN_VID_MASK; +		vlan->pri = (packet->vlan_tci & VLAN_PRIO_MASK) >> +				VLAN_PRIO_SHIFT; +	} + +	net_trans_info = get_net_transport_info(skb, &hdr_offset); +	if (net_trans_info == TRANSPORT_INFO_NOT_IP) +		goto do_send; + +	/* +	 * Setup the sendside checksum offload only if this is not a +	 * GSO packet. +	 */ +	if (skb_is_gso(skb)) +		goto do_lso; + +	if ((skb->ip_summed == CHECKSUM_NONE) || +	    (skb->ip_summed == CHECKSUM_UNNECESSARY)) +		goto do_send; + +	rndis_msg_size += NDIS_CSUM_PPI_SIZE; +	ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE, +			    TCPIP_CHKSUM_PKTINFO); + +	csum_info = (struct ndis_tcp_ip_checksum_info *)((void *)ppi + +			ppi->ppi_offset); + +	if (net_trans_info & (INFO_IPV4 << 16)) +		csum_info->transmit.is_ipv4 = 1; +	else +		csum_info->transmit.is_ipv6 = 1; + +	if (net_trans_info & INFO_TCP) { +		csum_info->transmit.tcp_checksum = 1; +		csum_info->transmit.tcp_header_offset = hdr_offset; +	} else if (net_trans_info & INFO_UDP) { +		/* UDP checksum offload is not supported on ws2008r2. +		 * Furthermore, on ws2012 and ws2012r2, there are some +		 * issues with udp checksum offload from Linux guests. +		 * (these are host issues). +		 * For now compute the checksum here. +		 */ +		struct udphdr *uh; +		u16 udp_len; + +		ret = skb_cow_head(skb, 0); +		if (ret) +			goto drop; + +		uh = udp_hdr(skb); +		udp_len = ntohs(uh->len); +		uh->check = 0; +		uh->check = csum_tcpudp_magic(ip_hdr(skb)->saddr, +					      ip_hdr(skb)->daddr, +					      udp_len, IPPROTO_UDP, +					      csum_partial(uh, udp_len, 0)); +		if (uh->check == 0) +			uh->check = CSUM_MANGLED_0; + +		csum_info->transmit.udp_checksum = 0; +	} +	goto do_send; + +do_lso: +	rndis_msg_size += NDIS_LSO_PPI_SIZE; +	ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE, +			    TCP_LARGESEND_PKTINFO); + +	lso_info = (struct ndis_tcp_lso_info *)((void *)ppi + +			ppi->ppi_offset); + +	lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; +	if (net_trans_info & (INFO_IPV4 << 16)) { +		lso_info->lso_v2_transmit.ip_version = +			NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4; +		ip_hdr(skb)->tot_len = 0; +		ip_hdr(skb)->check = 0; +		tcp_hdr(skb)->check = +		~csum_tcpudp_magic(ip_hdr(skb)->saddr, +				   ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); +	} else { +		lso_info->lso_v2_transmit.ip_version = +			NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6; +		ipv6_hdr(skb)->payload_len = 0; +		tcp_hdr(skb)->check = +		~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, +				&ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); +	} +	lso_info->lso_v2_transmit.tcp_header_offset = hdr_offset; +	lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size; + +do_send: +	/* Start filling in the page buffers with the rndis hdr */ +	rndis_msg->msg_len += rndis_msg_size; +	packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size, +					skb, &packet->page_buf[0]); + +	ret = netvsc_send(net_device_ctx->device_ctx, packet); + +drop: +	if (ret == 0) { +		net->stats.tx_bytes += skb->len; +		net->stats.tx_packets++; +	} else { +		kfree(packet); +		if (ret != -EAGAIN) { +			dev_kfree_skb_any(skb); +			net->stats.tx_dropped++; +		} +	} + +	return (ret == -EAGAIN) ? NETDEV_TX_BUSY : NETDEV_TX_OK; +} + +/* + * netvsc_linkstatus_callback - Link up/down notification + */ +void netvsc_linkstatus_callback(struct hv_device *device_obj, +				       unsigned int status) +{ +	struct net_device *net; +	struct net_device_context *ndev_ctx; +	struct netvsc_device *net_device; +	struct rndis_device *rdev; + +	net_device = hv_get_drvdata(device_obj); +	rdev = net_device->extension; + +	rdev->link_state = status != 1; + +	net = net_device->ndev; + +	if (!net || net->reg_state != NETREG_REGISTERED) +		return; + +	ndev_ctx = netdev_priv(net); +	if (status == 1) { +		schedule_delayed_work(&ndev_ctx->dwork, 0); +		schedule_delayed_work(&ndev_ctx->dwork, msecs_to_jiffies(20)); +	} else { +		schedule_delayed_work(&ndev_ctx->dwork, 0); +	} +} + +/* + * netvsc_recv_callback -  Callback when we receive a packet from the + * "wire" on the specified device. + */ +int netvsc_recv_callback(struct hv_device *device_obj, +				struct hv_netvsc_packet *packet, +				struct ndis_tcp_ip_checksum_info *csum_info) +{ +	struct net_device *net; +	struct sk_buff *skb; + +	net = ((struct netvsc_device *)hv_get_drvdata(device_obj))->ndev; +	if (!net || net->reg_state != NETREG_REGISTERED) { +		packet->status = NVSP_STAT_FAIL; +		return 0; +	} + +	/* Allocate a skb - TODO direct I/O to pages? */ +	skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen); +	if (unlikely(!skb)) { +		++net->stats.rx_dropped; +		packet->status = NVSP_STAT_FAIL; +		return 0; +	} + +	/* +	 * Copy to skb. This copy is needed here since the memory pointed by +	 * hv_netvsc_packet cannot be deallocated +	 */ +	memcpy(skb_put(skb, packet->total_data_buflen), packet->data, +		packet->total_data_buflen); + +	skb->protocol = eth_type_trans(skb, net); +	if (csum_info) { +		/* We only look at the IP checksum here. +		 * Should we be dropping the packet if checksum +		 * failed? How do we deal with other checksums - TCP/UDP? +		 */ +		if (csum_info->receive.ip_checksum_succeeded) +			skb->ip_summed = CHECKSUM_UNNECESSARY; +		else +			skb->ip_summed = CHECKSUM_NONE; +	} + +	if (packet->vlan_tci & VLAN_TAG_PRESENT) +		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), +				       packet->vlan_tci); + +	skb_record_rx_queue(skb, packet->channel-> +			    offermsg.offer.sub_channel_index); + +	net->stats.rx_packets++; +	net->stats.rx_bytes += packet->total_data_buflen; + +	/* +	 * Pass the skb back up. Network stack will deallocate the skb when it +	 * is done. +	 * TODO - use NAPI? +	 */ +	netif_rx(skb); + +	return 0; +} + +static void netvsc_get_drvinfo(struct net_device *net, +			       struct ethtool_drvinfo *info) +{ +	strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); +	strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); +} + +static int netvsc_change_mtu(struct net_device *ndev, int mtu) +{ +	struct net_device_context *ndevctx = netdev_priv(ndev); +	struct hv_device *hdev =  ndevctx->device_ctx; +	struct netvsc_device *nvdev = hv_get_drvdata(hdev); +	struct netvsc_device_info device_info; +	int limit = ETH_DATA_LEN; + +	if (nvdev == NULL || nvdev->destroy) +		return -ENODEV; + +	if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2) +		limit = NETVSC_MTU; + +	if (mtu < 68 || mtu > limit) +		return -EINVAL; + +	nvdev->start_remove = true; +	cancel_work_sync(&ndevctx->work); +	netif_tx_disable(ndev); +	rndis_filter_device_remove(hdev); + +	ndev->mtu = mtu; + +	ndevctx->device_ctx = hdev; +	hv_set_drvdata(hdev, ndev); +	device_info.ring_size = ring_size; +	rndis_filter_device_add(hdev, &device_info); +	netif_tx_wake_all_queues(ndev); + +	return 0; +} + + +static int netvsc_set_mac_addr(struct net_device *ndev, void *p) +{ +	struct net_device_context *ndevctx = netdev_priv(ndev); +	struct hv_device *hdev =  ndevctx->device_ctx; +	struct sockaddr *addr = p; +	char save_adr[ETH_ALEN]; +	unsigned char save_aatype; +	int err; + +	memcpy(save_adr, ndev->dev_addr, ETH_ALEN); +	save_aatype = ndev->addr_assign_type; + +	err = eth_mac_addr(ndev, p); +	if (err != 0) +		return err; + +	err = rndis_filter_set_device_mac(hdev, addr->sa_data); +	if (err != 0) { +		/* roll back to saved MAC */ +		memcpy(ndev->dev_addr, save_adr, ETH_ALEN); +		ndev->addr_assign_type = save_aatype; +	} + +	return err; +} + + +static const struct ethtool_ops ethtool_ops = { +	.get_drvinfo	= netvsc_get_drvinfo, +	.get_link	= ethtool_op_get_link, +}; + +static const struct net_device_ops device_ops = { +	.ndo_open =			netvsc_open, +	.ndo_stop =			netvsc_close, +	.ndo_start_xmit =		netvsc_start_xmit, +	.ndo_set_rx_mode =		netvsc_set_multicast_list, +	.ndo_change_mtu =		netvsc_change_mtu, +	.ndo_validate_addr =		eth_validate_addr, +	.ndo_set_mac_address =		netvsc_set_mac_addr, +	.ndo_select_queue =		netvsc_select_queue, +}; + +/* + * Send GARP packet to network peers after migrations. + * After Quick Migration, the network is not immediately operational in the + * current context when receiving RNDIS_STATUS_MEDIA_CONNECT event. So, add + * another netif_notify_peers() into a delayed work, otherwise GARP packet + * will not be sent after quick migration, and cause network disconnection. + * Also, we update the carrier status here. + */ +static void netvsc_link_change(struct work_struct *w) +{ +	struct net_device_context *ndev_ctx; +	struct net_device *net; +	struct netvsc_device *net_device; +	struct rndis_device *rdev; +	bool notify; + +	rtnl_lock(); + +	ndev_ctx = container_of(w, struct net_device_context, dwork.work); +	net_device = hv_get_drvdata(ndev_ctx->device_ctx); +	rdev = net_device->extension; +	net = net_device->ndev; + +	if (rdev->link_state) { +		netif_carrier_off(net); +		notify = false; +	} else { +		netif_carrier_on(net); +		notify = true; +	} + +	rtnl_unlock(); + +	if (notify) +		netdev_notify_peers(net); +} + + +static int netvsc_probe(struct hv_device *dev, +			const struct hv_vmbus_device_id *dev_id) +{ +	struct net_device *net = NULL; +	struct net_device_context *net_device_ctx; +	struct netvsc_device_info device_info; +	struct netvsc_device *nvdev; +	int ret; + +	net = alloc_etherdev_mq(sizeof(struct net_device_context), +				num_online_cpus()); +	if (!net) +		return -ENOMEM; + +	netif_carrier_off(net); + +	net_device_ctx = netdev_priv(net); +	net_device_ctx->device_ctx = dev; +	hv_set_drvdata(dev, net); +	INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change); +	INIT_WORK(&net_device_ctx->work, do_set_multicast); + +	net->netdev_ops = &device_ops; + +	net->hw_features = NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_IP_CSUM | +				NETIF_F_TSO; +	net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_RXCSUM | +			NETIF_F_IP_CSUM | NETIF_F_TSO; + +	net->ethtool_ops = ðtool_ops; +	SET_NETDEV_DEV(net, &dev->device); + +	/* Notify the netvsc driver of the new device */ +	device_info.ring_size = ring_size; +	ret = rndis_filter_device_add(dev, &device_info); +	if (ret != 0) { +		netdev_err(net, "unable to add netvsc device (ret %d)\n", ret); +		free_netdev(net); +		hv_set_drvdata(dev, NULL); +		return ret; +	} +	memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN); + +	nvdev = hv_get_drvdata(dev); +	netif_set_real_num_tx_queues(net, nvdev->num_chn); +	netif_set_real_num_rx_queues(net, nvdev->num_chn); + +	ret = register_netdev(net); +	if (ret != 0) { +		pr_err("Unable to register netdev.\n"); +		rndis_filter_device_remove(dev); +		free_netdev(net); +	} else { +		schedule_delayed_work(&net_device_ctx->dwork, 0); +	} + +	return ret; +} + +static int netvsc_remove(struct hv_device *dev) +{ +	struct net_device *net; +	struct net_device_context *ndev_ctx; +	struct netvsc_device *net_device; + +	net_device = hv_get_drvdata(dev); +	net = net_device->ndev; + +	if (net == NULL) { +		dev_err(&dev->device, "No net device to remove\n"); +		return 0; +	} + +	net_device->start_remove = true; + +	ndev_ctx = netdev_priv(net); +	cancel_delayed_work_sync(&ndev_ctx->dwork); +	cancel_work_sync(&ndev_ctx->work); + +	/* Stop outbound asap */ +	netif_tx_disable(net); + +	unregister_netdev(net); + +	/* +	 * Call to the vsc driver to let it know that the device is being +	 * removed +	 */ +	rndis_filter_device_remove(dev); + +	free_netdev(net); +	return 0; +} + +static const struct hv_vmbus_device_id id_table[] = { +	/* Network guid */ +	{ HV_NIC_GUID, }, +	{ }, +}; + +MODULE_DEVICE_TABLE(vmbus, id_table); + +/* The one and only one */ +static struct  hv_driver netvsc_drv = { +	.name = KBUILD_MODNAME, +	.id_table = id_table, +	.probe = netvsc_probe, +	.remove = netvsc_remove, +}; + +static void __exit netvsc_drv_exit(void) +{ +	vmbus_driver_unregister(&netvsc_drv); +} + +static int __init netvsc_drv_init(void) +{ +	if (ring_size < RING_SIZE_MIN) { +		ring_size = RING_SIZE_MIN; +		pr_info("Increased ring_size to %d (min allowed)\n", +			ring_size); +	} +	return vmbus_driver_register(&netvsc_drv); +} + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Microsoft Hyper-V network driver"); + +module_init(netvsc_drv_init); +module_exit(netvsc_drv_exit); diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c new file mode 100644 index 00000000000..99c527adae5 --- /dev/null +++ b/drivers/net/hyperv/rndis_filter.c @@ -0,0 +1,1185 @@ +/* + * Copyright (c) 2009, Microsoft Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, see <http://www.gnu.org/licenses/>. + * + * Authors: + *   Haiyang Zhang <haiyangz@microsoft.com> + *   Hank Janssen  <hjanssen@microsoft.com> + */ +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/highmem.h> +#include <linux/slab.h> +#include <linux/io.h> +#include <linux/if_ether.h> +#include <linux/netdevice.h> +#include <linux/if_vlan.h> +#include <linux/nls.h> + +#include "hyperv_net.h" + + +#define RNDIS_EXT_LEN PAGE_SIZE +struct rndis_request { +	struct list_head list_ent; +	struct completion  wait_event; + +	struct rndis_message response_msg; +	/* +	 * The buffer for extended info after the RNDIS response message. It's +	 * referenced based on the data offset in the RNDIS message. Its size +	 * is enough for current needs, and should be sufficient for the near +	 * future. +	 */ +	u8 response_ext[RNDIS_EXT_LEN]; + +	/* Simplify allocation by having a netvsc packet inline */ +	struct hv_netvsc_packet	pkt; +	/* Set 2 pages for rndis requests crossing page boundary */ +	struct hv_page_buffer buf[2]; + +	struct rndis_message request_msg; +	/* +	 * The buffer for the extended info after the RNDIS request message. +	 * It is referenced and sized in a similar way as response_ext. +	 */ +	u8 request_ext[RNDIS_EXT_LEN]; +}; + +static struct rndis_device *get_rndis_device(void) +{ +	struct rndis_device *device; + +	device = kzalloc(sizeof(struct rndis_device), GFP_KERNEL); +	if (!device) +		return NULL; + +	spin_lock_init(&device->request_lock); + +	INIT_LIST_HEAD(&device->req_list); + +	device->state = RNDIS_DEV_UNINITIALIZED; + +	return device; +} + +static struct rndis_request *get_rndis_request(struct rndis_device *dev, +					     u32 msg_type, +					     u32 msg_len) +{ +	struct rndis_request *request; +	struct rndis_message *rndis_msg; +	struct rndis_set_request *set; +	unsigned long flags; + +	request = kzalloc(sizeof(struct rndis_request), GFP_KERNEL); +	if (!request) +		return NULL; + +	init_completion(&request->wait_event); + +	rndis_msg = &request->request_msg; +	rndis_msg->ndis_msg_type = msg_type; +	rndis_msg->msg_len = msg_len; + +	request->pkt.q_idx = 0; + +	/* +	 * Set the request id. This field is always after the rndis header for +	 * request/response packet types so we just used the SetRequest as a +	 * template +	 */ +	set = &rndis_msg->msg.set_req; +	set->req_id = atomic_inc_return(&dev->new_req_id); + +	/* Add to the request list */ +	spin_lock_irqsave(&dev->request_lock, flags); +	list_add_tail(&request->list_ent, &dev->req_list); +	spin_unlock_irqrestore(&dev->request_lock, flags); + +	return request; +} + +static void put_rndis_request(struct rndis_device *dev, +			    struct rndis_request *req) +{ +	unsigned long flags; + +	spin_lock_irqsave(&dev->request_lock, flags); +	list_del(&req->list_ent); +	spin_unlock_irqrestore(&dev->request_lock, flags); + +	kfree(req); +} + +static void dump_rndis_message(struct hv_device *hv_dev, +			struct rndis_message *rndis_msg) +{ +	struct net_device *netdev; +	struct netvsc_device *net_device; + +	net_device = hv_get_drvdata(hv_dev); +	netdev = net_device->ndev; + +	switch (rndis_msg->ndis_msg_type) { +	case RNDIS_MSG_PACKET: +		netdev_dbg(netdev, "RNDIS_MSG_PACKET (len %u, " +			   "data offset %u data len %u, # oob %u, " +			   "oob offset %u, oob len %u, pkt offset %u, " +			   "pkt len %u\n", +			   rndis_msg->msg_len, +			   rndis_msg->msg.pkt.data_offset, +			   rndis_msg->msg.pkt.data_len, +			   rndis_msg->msg.pkt.num_oob_data_elements, +			   rndis_msg->msg.pkt.oob_data_offset, +			   rndis_msg->msg.pkt.oob_data_len, +			   rndis_msg->msg.pkt.per_pkt_info_offset, +			   rndis_msg->msg.pkt.per_pkt_info_len); +		break; + +	case RNDIS_MSG_INIT_C: +		netdev_dbg(netdev, "RNDIS_MSG_INIT_C " +			"(len %u, id 0x%x, status 0x%x, major %d, minor %d, " +			"device flags %d, max xfer size 0x%x, max pkts %u, " +			"pkt aligned %u)\n", +			rndis_msg->msg_len, +			rndis_msg->msg.init_complete.req_id, +			rndis_msg->msg.init_complete.status, +			rndis_msg->msg.init_complete.major_ver, +			rndis_msg->msg.init_complete.minor_ver, +			rndis_msg->msg.init_complete.dev_flags, +			rndis_msg->msg.init_complete.max_xfer_size, +			rndis_msg->msg.init_complete. +			   max_pkt_per_msg, +			rndis_msg->msg.init_complete. +			   pkt_alignment_factor); +		break; + +	case RNDIS_MSG_QUERY_C: +		netdev_dbg(netdev, "RNDIS_MSG_QUERY_C " +			"(len %u, id 0x%x, status 0x%x, buf len %u, " +			"buf offset %u)\n", +			rndis_msg->msg_len, +			rndis_msg->msg.query_complete.req_id, +			rndis_msg->msg.query_complete.status, +			rndis_msg->msg.query_complete. +			   info_buflen, +			rndis_msg->msg.query_complete. +			   info_buf_offset); +		break; + +	case RNDIS_MSG_SET_C: +		netdev_dbg(netdev, +			"RNDIS_MSG_SET_C (len %u, id 0x%x, status 0x%x)\n", +			rndis_msg->msg_len, +			rndis_msg->msg.set_complete.req_id, +			rndis_msg->msg.set_complete.status); +		break; + +	case RNDIS_MSG_INDICATE: +		netdev_dbg(netdev, "RNDIS_MSG_INDICATE " +			"(len %u, status 0x%x, buf len %u, buf offset %u)\n", +			rndis_msg->msg_len, +			rndis_msg->msg.indicate_status.status, +			rndis_msg->msg.indicate_status.status_buflen, +			rndis_msg->msg.indicate_status.status_buf_offset); +		break; + +	default: +		netdev_dbg(netdev, "0x%x (len %u)\n", +			rndis_msg->ndis_msg_type, +			rndis_msg->msg_len); +		break; +	} +} + +static int rndis_filter_send_request(struct rndis_device *dev, +				  struct rndis_request *req) +{ +	int ret; +	struct hv_netvsc_packet *packet; + +	/* Setup the packet to send it */ +	packet = &req->pkt; + +	packet->is_data_pkt = false; +	packet->total_data_buflen = req->request_msg.msg_len; +	packet->page_buf_cnt = 1; + +	packet->page_buf[0].pfn = virt_to_phys(&req->request_msg) >> +					PAGE_SHIFT; +	packet->page_buf[0].len = req->request_msg.msg_len; +	packet->page_buf[0].offset = +		(unsigned long)&req->request_msg & (PAGE_SIZE - 1); + +	/* Add one page_buf when request_msg crossing page boundary */ +	if (packet->page_buf[0].offset + packet->page_buf[0].len > PAGE_SIZE) { +		packet->page_buf_cnt++; +		packet->page_buf[0].len = PAGE_SIZE - +			packet->page_buf[0].offset; +		packet->page_buf[1].pfn = virt_to_phys((void *)&req->request_msg +			+ packet->page_buf[0].len) >> PAGE_SHIFT; +		packet->page_buf[1].offset = 0; +		packet->page_buf[1].len = req->request_msg.msg_len - +			packet->page_buf[0].len; +	} + +	packet->send_completion = NULL; + +	ret = netvsc_send(dev->net_dev->dev, packet); +	return ret; +} + +static void rndis_set_link_state(struct rndis_device *rdev, +				 struct rndis_request *request) +{ +	u32 link_status; +	struct rndis_query_complete *query_complete; + +	query_complete = &request->response_msg.msg.query_complete; + +	if (query_complete->status == RNDIS_STATUS_SUCCESS && +	    query_complete->info_buflen == sizeof(u32)) { +		memcpy(&link_status, (void *)((unsigned long)query_complete + +		       query_complete->info_buf_offset), sizeof(u32)); +		rdev->link_state = link_status != 0; +	} +} + +static void rndis_filter_receive_response(struct rndis_device *dev, +				       struct rndis_message *resp) +{ +	struct rndis_request *request = NULL; +	bool found = false; +	unsigned long flags; +	struct net_device *ndev; + +	ndev = dev->net_dev->ndev; + +	spin_lock_irqsave(&dev->request_lock, flags); +	list_for_each_entry(request, &dev->req_list, list_ent) { +		/* +		 * All request/response message contains RequestId as the 1st +		 * field +		 */ +		if (request->request_msg.msg.init_req.req_id +		    == resp->msg.init_complete.req_id) { +			found = true; +			break; +		} +	} +	spin_unlock_irqrestore(&dev->request_lock, flags); + +	if (found) { +		if (resp->msg_len <= +		    sizeof(struct rndis_message) + RNDIS_EXT_LEN) { +			memcpy(&request->response_msg, resp, +			       resp->msg_len); +			if (request->request_msg.ndis_msg_type == +			    RNDIS_MSG_QUERY && request->request_msg.msg. +			    query_req.oid == RNDIS_OID_GEN_MEDIA_CONNECT_STATUS) +				rndis_set_link_state(dev, request); +		} else { +			netdev_err(ndev, +				"rndis response buffer overflow " +				"detected (size %u max %zu)\n", +				resp->msg_len, +				sizeof(struct rndis_message)); + +			if (resp->ndis_msg_type == +			    RNDIS_MSG_RESET_C) { +				/* does not have a request id field */ +				request->response_msg.msg.reset_complete. +					status = RNDIS_STATUS_BUFFER_OVERFLOW; +			} else { +				request->response_msg.msg. +				init_complete.status = +					RNDIS_STATUS_BUFFER_OVERFLOW; +			} +		} + +		complete(&request->wait_event); +	} else { +		netdev_err(ndev, +			"no rndis request found for this response " +			"(id 0x%x res type 0x%x)\n", +			resp->msg.init_complete.req_id, +			resp->ndis_msg_type); +	} +} + +static void rndis_filter_receive_indicate_status(struct rndis_device *dev, +					     struct rndis_message *resp) +{ +	struct rndis_indicate_status *indicate = +			&resp->msg.indicate_status; + +	if (indicate->status == RNDIS_STATUS_MEDIA_CONNECT) { +		netvsc_linkstatus_callback( +			dev->net_dev->dev, 1); +	} else if (indicate->status == RNDIS_STATUS_MEDIA_DISCONNECT) { +		netvsc_linkstatus_callback( +			dev->net_dev->dev, 0); +	} else { +		/* +		 * TODO: +		 */ +	} +} + +/* + * Get the Per-Packet-Info with the specified type + * return NULL if not found. + */ +static inline void *rndis_get_ppi(struct rndis_packet *rpkt, u32 type) +{ +	struct rndis_per_packet_info *ppi; +	int len; + +	if (rpkt->per_pkt_info_offset == 0) +		return NULL; + +	ppi = (struct rndis_per_packet_info *)((ulong)rpkt + +		rpkt->per_pkt_info_offset); +	len = rpkt->per_pkt_info_len; + +	while (len > 0) { +		if (ppi->type == type) +			return (void *)((ulong)ppi + ppi->ppi_offset); +		len -= ppi->size; +		ppi = (struct rndis_per_packet_info *)((ulong)ppi + ppi->size); +	} + +	return NULL; +} + +static void rndis_filter_receive_data(struct rndis_device *dev, +				   struct rndis_message *msg, +				   struct hv_netvsc_packet *pkt) +{ +	struct rndis_packet *rndis_pkt; +	u32 data_offset; +	struct ndis_pkt_8021q_info *vlan; +	struct ndis_tcp_ip_checksum_info *csum_info; + +	rndis_pkt = &msg->msg.pkt; + +	/* Remove the rndis header and pass it back up the stack */ +	data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset; + +	pkt->total_data_buflen -= data_offset; + +	/* +	 * Make sure we got a valid RNDIS message, now total_data_buflen +	 * should be the data packet size plus the trailer padding size +	 */ +	if (pkt->total_data_buflen < rndis_pkt->data_len) { +		netdev_err(dev->net_dev->ndev, "rndis message buffer " +			   "overflow detected (got %u, min %u)" +			   "...dropping this message!\n", +			   pkt->total_data_buflen, rndis_pkt->data_len); +		return; +	} + +	/* +	 * Remove the rndis trailer padding from rndis packet message +	 * rndis_pkt->data_len tell us the real data length, we only copy +	 * the data packet to the stack, without the rndis trailer padding +	 */ +	pkt->total_data_buflen = rndis_pkt->data_len; +	pkt->data = (void *)((unsigned long)pkt->data + data_offset); + +	vlan = rndis_get_ppi(rndis_pkt, IEEE_8021Q_INFO); +	if (vlan) { +		pkt->vlan_tci = VLAN_TAG_PRESENT | vlan->vlanid | +			(vlan->pri << VLAN_PRIO_SHIFT); +	} else { +		pkt->vlan_tci = 0; +	} + +	csum_info = rndis_get_ppi(rndis_pkt, TCPIP_CHKSUM_PKTINFO); +	netvsc_recv_callback(dev->net_dev->dev, pkt, csum_info); +} + +int rndis_filter_receive(struct hv_device *dev, +				struct hv_netvsc_packet	*pkt) +{ +	struct netvsc_device *net_dev = hv_get_drvdata(dev); +	struct rndis_device *rndis_dev; +	struct rndis_message *rndis_msg; +	struct net_device *ndev; +	int ret = 0; + +	if (!net_dev) { +		ret = -EINVAL; +		goto exit; +	} + +	ndev = net_dev->ndev; + +	/* Make sure the rndis device state is initialized */ +	if (!net_dev->extension) { +		netdev_err(ndev, "got rndis message but no rndis device - " +			  "dropping this message!\n"); +		ret = -ENODEV; +		goto exit; +	} + +	rndis_dev = (struct rndis_device *)net_dev->extension; +	if (rndis_dev->state == RNDIS_DEV_UNINITIALIZED) { +		netdev_err(ndev, "got rndis message but rndis device " +			   "uninitialized...dropping this message!\n"); +		ret = -ENODEV; +		goto exit; +	} + +	rndis_msg = pkt->data; + +	dump_rndis_message(dev, rndis_msg); + +	switch (rndis_msg->ndis_msg_type) { +	case RNDIS_MSG_PACKET: +		/* data msg */ +		rndis_filter_receive_data(rndis_dev, rndis_msg, pkt); +		break; + +	case RNDIS_MSG_INIT_C: +	case RNDIS_MSG_QUERY_C: +	case RNDIS_MSG_SET_C: +		/* completion msgs */ +		rndis_filter_receive_response(rndis_dev, rndis_msg); +		break; + +	case RNDIS_MSG_INDICATE: +		/* notification msgs */ +		rndis_filter_receive_indicate_status(rndis_dev, rndis_msg); +		break; +	default: +		netdev_err(ndev, +			"unhandled rndis message (type %u len %u)\n", +			   rndis_msg->ndis_msg_type, +			   rndis_msg->msg_len); +		break; +	} + +exit: +	if (ret != 0) +		pkt->status = NVSP_STAT_FAIL; + +	return ret; +} + +static int rndis_filter_query_device(struct rndis_device *dev, u32 oid, +				  void *result, u32 *result_size) +{ +	struct rndis_request *request; +	u32 inresult_size = *result_size; +	struct rndis_query_request *query; +	struct rndis_query_complete *query_complete; +	int ret = 0; +	int t; + +	if (!result) +		return -EINVAL; + +	*result_size = 0; +	request = get_rndis_request(dev, RNDIS_MSG_QUERY, +			RNDIS_MESSAGE_SIZE(struct rndis_query_request)); +	if (!request) { +		ret = -ENOMEM; +		goto cleanup; +	} + +	/* Setup the rndis query */ +	query = &request->request_msg.msg.query_req; +	query->oid = oid; +	query->info_buf_offset = sizeof(struct rndis_query_request); +	query->info_buflen = 0; +	query->dev_vc_handle = 0; + +	if (oid == OID_GEN_RECEIVE_SCALE_CAPABILITIES) { +		struct ndis_recv_scale_cap *cap; + +		request->request_msg.msg_len += +			sizeof(struct ndis_recv_scale_cap); +		query->info_buflen = sizeof(struct ndis_recv_scale_cap); +		cap = (struct ndis_recv_scale_cap *)((unsigned long)query + +						     query->info_buf_offset); +		cap->hdr.type = NDIS_OBJECT_TYPE_RSS_CAPABILITIES; +		cap->hdr.rev = NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2; +		cap->hdr.size = sizeof(struct ndis_recv_scale_cap); +	} + +	ret = rndis_filter_send_request(dev, request); +	if (ret != 0) +		goto cleanup; + +	t = wait_for_completion_timeout(&request->wait_event, 5*HZ); +	if (t == 0) { +		ret = -ETIMEDOUT; +		goto cleanup; +	} + +	/* Copy the response back */ +	query_complete = &request->response_msg.msg.query_complete; + +	if (query_complete->info_buflen > inresult_size) { +		ret = -1; +		goto cleanup; +	} + +	memcpy(result, +	       (void *)((unsigned long)query_complete + +			 query_complete->info_buf_offset), +	       query_complete->info_buflen); + +	*result_size = query_complete->info_buflen; + +cleanup: +	if (request) +		put_rndis_request(dev, request); + +	return ret; +} + +static int rndis_filter_query_device_mac(struct rndis_device *dev) +{ +	u32 size = ETH_ALEN; + +	return rndis_filter_query_device(dev, +				      RNDIS_OID_802_3_PERMANENT_ADDRESS, +				      dev->hw_mac_adr, &size); +} + +#define NWADR_STR "NetworkAddress" +#define NWADR_STRLEN 14 + +int rndis_filter_set_device_mac(struct hv_device *hdev, char *mac) +{ +	struct netvsc_device *nvdev = hv_get_drvdata(hdev); +	struct rndis_device *rdev = nvdev->extension; +	struct net_device *ndev = nvdev->ndev; +	struct rndis_request *request; +	struct rndis_set_request *set; +	struct rndis_config_parameter_info *cpi; +	wchar_t *cfg_nwadr, *cfg_mac; +	struct rndis_set_complete *set_complete; +	char macstr[2*ETH_ALEN+1]; +	u32 extlen = sizeof(struct rndis_config_parameter_info) + +		2*NWADR_STRLEN + 4*ETH_ALEN; +	int ret, t; + +	request = get_rndis_request(rdev, RNDIS_MSG_SET, +		RNDIS_MESSAGE_SIZE(struct rndis_set_request) + extlen); +	if (!request) +		return -ENOMEM; + +	set = &request->request_msg.msg.set_req; +	set->oid = RNDIS_OID_GEN_RNDIS_CONFIG_PARAMETER; +	set->info_buflen = extlen; +	set->info_buf_offset = sizeof(struct rndis_set_request); +	set->dev_vc_handle = 0; + +	cpi = (struct rndis_config_parameter_info *)((ulong)set + +		set->info_buf_offset); +	cpi->parameter_name_offset = +		sizeof(struct rndis_config_parameter_info); +	/* Multiply by 2 because host needs 2 bytes (utf16) for each char */ +	cpi->parameter_name_length = 2*NWADR_STRLEN; +	cpi->parameter_type = RNDIS_CONFIG_PARAM_TYPE_STRING; +	cpi->parameter_value_offset = +		cpi->parameter_name_offset + cpi->parameter_name_length; +	/* Multiply by 4 because each MAC byte displayed as 2 utf16 chars */ +	cpi->parameter_value_length = 4*ETH_ALEN; + +	cfg_nwadr = (wchar_t *)((ulong)cpi + cpi->parameter_name_offset); +	cfg_mac = (wchar_t *)((ulong)cpi + cpi->parameter_value_offset); +	ret = utf8s_to_utf16s(NWADR_STR, NWADR_STRLEN, UTF16_HOST_ENDIAN, +			      cfg_nwadr, NWADR_STRLEN); +	if (ret < 0) +		goto cleanup; +	snprintf(macstr, 2*ETH_ALEN+1, "%pm", mac); +	ret = utf8s_to_utf16s(macstr, 2*ETH_ALEN, UTF16_HOST_ENDIAN, +			      cfg_mac, 2*ETH_ALEN); +	if (ret < 0) +		goto cleanup; + +	ret = rndis_filter_send_request(rdev, request); +	if (ret != 0) +		goto cleanup; + +	t = wait_for_completion_timeout(&request->wait_event, 5*HZ); +	if (t == 0) { +		netdev_err(ndev, "timeout before we got a set response...\n"); +		/* +		 * can't put_rndis_request, since we may still receive a +		 * send-completion. +		 */ +		return -EBUSY; +	} else { +		set_complete = &request->response_msg.msg.set_complete; +		if (set_complete->status != RNDIS_STATUS_SUCCESS) { +			netdev_err(ndev, "Fail to set MAC on host side:0x%x\n", +				   set_complete->status); +			ret = -EINVAL; +		} +	} + +cleanup: +	put_rndis_request(rdev, request); +	return ret; +} + +int rndis_filter_set_offload_params(struct hv_device *hdev, +				struct ndis_offload_params *req_offloads) +{ +	struct netvsc_device *nvdev = hv_get_drvdata(hdev); +	struct rndis_device *rdev = nvdev->extension; +	struct net_device *ndev = nvdev->ndev; +	struct rndis_request *request; +	struct rndis_set_request *set; +	struct ndis_offload_params *offload_params; +	struct rndis_set_complete *set_complete; +	u32 extlen = sizeof(struct ndis_offload_params); +	int ret, t; +	u32 vsp_version = nvdev->nvsp_version; + +	if (vsp_version <= NVSP_PROTOCOL_VERSION_4) { +		extlen = VERSION_4_OFFLOAD_SIZE; +		/* On NVSP_PROTOCOL_VERSION_4 and below, we do not support +		 * UDP checksum offload. +		 */ +		req_offloads->udp_ip_v4_csum = 0; +		req_offloads->udp_ip_v6_csum = 0; +	} + +	request = get_rndis_request(rdev, RNDIS_MSG_SET, +		RNDIS_MESSAGE_SIZE(struct rndis_set_request) + extlen); +	if (!request) +		return -ENOMEM; + +	set = &request->request_msg.msg.set_req; +	set->oid = OID_TCP_OFFLOAD_PARAMETERS; +	set->info_buflen = extlen; +	set->info_buf_offset = sizeof(struct rndis_set_request); +	set->dev_vc_handle = 0; + +	offload_params = (struct ndis_offload_params *)((ulong)set + +				set->info_buf_offset); +	*offload_params = *req_offloads; +	offload_params->header.type = NDIS_OBJECT_TYPE_DEFAULT; +	offload_params->header.revision = NDIS_OFFLOAD_PARAMETERS_REVISION_3; +	offload_params->header.size = extlen; + +	ret = rndis_filter_send_request(rdev, request); +	if (ret != 0) +		goto cleanup; + +	t = wait_for_completion_timeout(&request->wait_event, 5*HZ); +	if (t == 0) { +		netdev_err(ndev, "timeout before we got aOFFLOAD set response...\n"); +		/* can't put_rndis_request, since we may still receive a +		 * send-completion. +		 */ +		return -EBUSY; +	} else { +		set_complete = &request->response_msg.msg.set_complete; +		if (set_complete->status != RNDIS_STATUS_SUCCESS) { +			netdev_err(ndev, "Fail to set offload on host side:0x%x\n", +				   set_complete->status); +			ret = -EINVAL; +		} +	} + +cleanup: +	put_rndis_request(rdev, request); +	return ret; +} + +u8 netvsc_hash_key[HASH_KEYLEN] = { +	0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, +	0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, +	0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, +	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, +	0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa +}; + +int rndis_filter_set_rss_param(struct rndis_device *rdev, int num_queue) +{ +	struct net_device *ndev = rdev->net_dev->ndev; +	struct rndis_request *request; +	struct rndis_set_request *set; +	struct rndis_set_complete *set_complete; +	u32 extlen = sizeof(struct ndis_recv_scale_param) + +		     4*ITAB_NUM + HASH_KEYLEN; +	struct ndis_recv_scale_param *rssp; +	u32 *itab; +	u8 *keyp; +	int i, t, ret; + +	request = get_rndis_request( +			rdev, RNDIS_MSG_SET, +			RNDIS_MESSAGE_SIZE(struct rndis_set_request) + extlen); +	if (!request) +		return -ENOMEM; + +	set = &request->request_msg.msg.set_req; +	set->oid = OID_GEN_RECEIVE_SCALE_PARAMETERS; +	set->info_buflen = extlen; +	set->info_buf_offset = sizeof(struct rndis_set_request); +	set->dev_vc_handle = 0; + +	rssp = (struct ndis_recv_scale_param *)(set + 1); +	rssp->hdr.type = NDIS_OBJECT_TYPE_RSS_PARAMETERS; +	rssp->hdr.rev = NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2; +	rssp->hdr.size = sizeof(struct ndis_recv_scale_param); +	rssp->flag = 0; +	rssp->hashinfo = NDIS_HASH_FUNC_TOEPLITZ | NDIS_HASH_IPV4 | +			 NDIS_HASH_TCP_IPV4; +	rssp->indirect_tabsize = 4*ITAB_NUM; +	rssp->indirect_taboffset = sizeof(struct ndis_recv_scale_param); +	rssp->hashkey_size = HASH_KEYLEN; +	rssp->kashkey_offset = rssp->indirect_taboffset + +			       rssp->indirect_tabsize; + +	/* Set indirection table entries */ +	itab = (u32 *)(rssp + 1); +	for (i = 0; i < ITAB_NUM; i++) +		itab[i] = i % num_queue; + +	/* Set hask key values */ +	keyp = (u8 *)((unsigned long)rssp + rssp->kashkey_offset); +	for (i = 0; i < HASH_KEYLEN; i++) +		keyp[i] = netvsc_hash_key[i]; + + +	ret = rndis_filter_send_request(rdev, request); +	if (ret != 0) +		goto cleanup; + +	t = wait_for_completion_timeout(&request->wait_event, 5*HZ); +	if (t == 0) { +		netdev_err(ndev, "timeout before we got a set response...\n"); +		/* can't put_rndis_request, since we may still receive a +		 * send-completion. +		 */ +		return -ETIMEDOUT; +	} else { +		set_complete = &request->response_msg.msg.set_complete; +		if (set_complete->status != RNDIS_STATUS_SUCCESS) { +			netdev_err(ndev, "Fail to set RSS parameters:0x%x\n", +				   set_complete->status); +			ret = -EINVAL; +		} +	} + +cleanup: +	put_rndis_request(rdev, request); +	return ret; +} + + +static int rndis_filter_query_device_link_status(struct rndis_device *dev) +{ +	u32 size = sizeof(u32); +	u32 link_status; +	int ret; + +	ret = rndis_filter_query_device(dev, +				      RNDIS_OID_GEN_MEDIA_CONNECT_STATUS, +				      &link_status, &size); + +	return ret; +} + +int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter) +{ +	struct rndis_request *request; +	struct rndis_set_request *set; +	struct rndis_set_complete *set_complete; +	u32 status; +	int ret, t; +	struct net_device *ndev; + +	ndev = dev->net_dev->ndev; + +	request = get_rndis_request(dev, RNDIS_MSG_SET, +			RNDIS_MESSAGE_SIZE(struct rndis_set_request) + +			sizeof(u32)); +	if (!request) { +		ret = -ENOMEM; +		goto cleanup; +	} + +	/* Setup the rndis set */ +	set = &request->request_msg.msg.set_req; +	set->oid = RNDIS_OID_GEN_CURRENT_PACKET_FILTER; +	set->info_buflen = sizeof(u32); +	set->info_buf_offset = sizeof(struct rndis_set_request); + +	memcpy((void *)(unsigned long)set + sizeof(struct rndis_set_request), +	       &new_filter, sizeof(u32)); + +	ret = rndis_filter_send_request(dev, request); +	if (ret != 0) +		goto cleanup; + +	t = wait_for_completion_timeout(&request->wait_event, 5*HZ); + +	if (t == 0) { +		netdev_err(ndev, +			"timeout before we got a set response...\n"); +		ret = -ETIMEDOUT; +		/* +		 * We can't deallocate the request since we may still receive a +		 * send completion for it. +		 */ +		goto exit; +	} else { +		set_complete = &request->response_msg.msg.set_complete; +		status = set_complete->status; +	} + +cleanup: +	if (request) +		put_rndis_request(dev, request); +exit: +	return ret; +} + + +static int rndis_filter_init_device(struct rndis_device *dev) +{ +	struct rndis_request *request; +	struct rndis_initialize_request *init; +	struct rndis_initialize_complete *init_complete; +	u32 status; +	int ret, t; + +	request = get_rndis_request(dev, RNDIS_MSG_INIT, +			RNDIS_MESSAGE_SIZE(struct rndis_initialize_request)); +	if (!request) { +		ret = -ENOMEM; +		goto cleanup; +	} + +	/* Setup the rndis set */ +	init = &request->request_msg.msg.init_req; +	init->major_ver = RNDIS_MAJOR_VERSION; +	init->minor_ver = RNDIS_MINOR_VERSION; +	init->max_xfer_size = 0x4000; + +	dev->state = RNDIS_DEV_INITIALIZING; + +	ret = rndis_filter_send_request(dev, request); +	if (ret != 0) { +		dev->state = RNDIS_DEV_UNINITIALIZED; +		goto cleanup; +	} + + +	t = wait_for_completion_timeout(&request->wait_event, 5*HZ); + +	if (t == 0) { +		ret = -ETIMEDOUT; +		goto cleanup; +	} + +	init_complete = &request->response_msg.msg.init_complete; +	status = init_complete->status; +	if (status == RNDIS_STATUS_SUCCESS) { +		dev->state = RNDIS_DEV_INITIALIZED; +		ret = 0; +	} else { +		dev->state = RNDIS_DEV_UNINITIALIZED; +		ret = -EINVAL; +	} + +cleanup: +	if (request) +		put_rndis_request(dev, request); + +	return ret; +} + +static void rndis_filter_halt_device(struct rndis_device *dev) +{ +	struct rndis_request *request; +	struct rndis_halt_request *halt; +	struct netvsc_device *nvdev = dev->net_dev; +	struct hv_device *hdev = nvdev->dev; +	ulong flags; + +	/* Attempt to do a rndis device halt */ +	request = get_rndis_request(dev, RNDIS_MSG_HALT, +				RNDIS_MESSAGE_SIZE(struct rndis_halt_request)); +	if (!request) +		goto cleanup; + +	/* Setup the rndis set */ +	halt = &request->request_msg.msg.halt_req; +	halt->req_id = atomic_inc_return(&dev->new_req_id); + +	/* Ignore return since this msg is optional. */ +	rndis_filter_send_request(dev, request); + +	dev->state = RNDIS_DEV_UNINITIALIZED; + +cleanup: +	spin_lock_irqsave(&hdev->channel->inbound_lock, flags); +	nvdev->destroy = true; +	spin_unlock_irqrestore(&hdev->channel->inbound_lock, flags); + +	/* Wait for all send completions */ +	wait_event(nvdev->wait_drain, +		atomic_read(&nvdev->num_outstanding_sends) == 0); + +	if (request) +		put_rndis_request(dev, request); +	return; +} + +static int rndis_filter_open_device(struct rndis_device *dev) +{ +	int ret; + +	if (dev->state != RNDIS_DEV_INITIALIZED) +		return 0; + +	ret = rndis_filter_set_packet_filter(dev, +					 NDIS_PACKET_TYPE_BROADCAST | +					 NDIS_PACKET_TYPE_ALL_MULTICAST | +					 NDIS_PACKET_TYPE_DIRECTED); +	if (ret == 0) +		dev->state = RNDIS_DEV_DATAINITIALIZED; + +	return ret; +} + +static int rndis_filter_close_device(struct rndis_device *dev) +{ +	int ret; + +	if (dev->state != RNDIS_DEV_DATAINITIALIZED) +		return 0; + +	ret = rndis_filter_set_packet_filter(dev, 0); +	if (ret == 0) +		dev->state = RNDIS_DEV_INITIALIZED; + +	return ret; +} + +static void netvsc_sc_open(struct vmbus_channel *new_sc) +{ +	struct netvsc_device *nvscdev; +	u16 chn_index = new_sc->offermsg.offer.sub_channel_index; +	int ret; + +	nvscdev = hv_get_drvdata(new_sc->primary_channel->device_obj); + +	if (chn_index >= nvscdev->num_chn) +		return; + +	set_per_channel_state(new_sc, nvscdev->sub_cb_buf + (chn_index - 1) * +			      NETVSC_PACKET_SIZE); + +	ret = vmbus_open(new_sc, nvscdev->ring_size * PAGE_SIZE, +			 nvscdev->ring_size * PAGE_SIZE, NULL, 0, +			 netvsc_channel_cb, new_sc); + +	if (ret == 0) +		nvscdev->chn_table[chn_index] = new_sc; +} + +int rndis_filter_device_add(struct hv_device *dev, +				  void *additional_info) +{ +	int ret; +	struct netvsc_device *net_device; +	struct rndis_device *rndis_device; +	struct netvsc_device_info *device_info = additional_info; +	struct ndis_offload_params offloads; +	struct nvsp_message *init_packet; +	int t; +	struct ndis_recv_scale_cap rsscap; +	u32 rsscap_size = sizeof(struct ndis_recv_scale_cap); + +	rndis_device = get_rndis_device(); +	if (!rndis_device) +		return -ENODEV; + +	/* +	 * Let the inner driver handle this first to create the netvsc channel +	 * NOTE! Once the channel is created, we may get a receive callback +	 * (RndisFilterOnReceive()) before this call is completed +	 */ +	ret = netvsc_device_add(dev, additional_info); +	if (ret != 0) { +		kfree(rndis_device); +		return ret; +	} + + +	/* Initialize the rndis device */ +	net_device = hv_get_drvdata(dev); +	net_device->num_chn = 1; + +	net_device->extension = rndis_device; +	rndis_device->net_dev = net_device; + +	/* Send the rndis initialization message */ +	ret = rndis_filter_init_device(rndis_device); +	if (ret != 0) { +		rndis_filter_device_remove(dev); +		return ret; +	} + +	/* Get the mac address */ +	ret = rndis_filter_query_device_mac(rndis_device); +	if (ret != 0) { +		rndis_filter_device_remove(dev); +		return ret; +	} + +	memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN); + +	/* Turn on the offloads; the host supports all of the relevant +	 * offloads. +	 */ +	memset(&offloads, 0, sizeof(struct ndis_offload_params)); +	/* A value of zero means "no change"; now turn on what we +	 * want. +	 */ +	offloads.ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; +	offloads.tcp_ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; +	offloads.udp_ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; +	offloads.tcp_ip_v6_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; +	offloads.udp_ip_v6_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; +	offloads.lso_v2_ipv4 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED; + + +	ret = rndis_filter_set_offload_params(dev, &offloads); +	if (ret) +		goto err_dev_remv; + +	rndis_filter_query_device_link_status(rndis_device); + +	device_info->link_state = rndis_device->link_state; + +	dev_info(&dev->device, "Device MAC %pM link state %s\n", +		 rndis_device->hw_mac_adr, +		 device_info->link_state ? "down" : "up"); + +	if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5) +		return 0; + +	/* vRSS setup */ +	memset(&rsscap, 0, rsscap_size); +	ret = rndis_filter_query_device(rndis_device, +					OID_GEN_RECEIVE_SCALE_CAPABILITIES, +					&rsscap, &rsscap_size); +	if (ret || rsscap.num_recv_que < 2) +		goto out; + +	net_device->num_chn = (num_online_cpus() < rsscap.num_recv_que) ? +			       num_online_cpus() : rsscap.num_recv_que; +	if (net_device->num_chn == 1) +		goto out; + +	net_device->sub_cb_buf = vzalloc((net_device->num_chn - 1) * +					 NETVSC_PACKET_SIZE); +	if (!net_device->sub_cb_buf) { +		net_device->num_chn = 1; +		dev_info(&dev->device, "No memory for subchannels.\n"); +		goto out; +	} + +	vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open); + +	init_packet = &net_device->channel_init_pkt; +	memset(init_packet, 0, sizeof(struct nvsp_message)); +	init_packet->hdr.msg_type = NVSP_MSG5_TYPE_SUBCHANNEL; +	init_packet->msg.v5_msg.subchn_req.op = NVSP_SUBCHANNEL_ALLOCATE; +	init_packet->msg.v5_msg.subchn_req.num_subchannels = +						net_device->num_chn - 1; +	ret = vmbus_sendpacket(dev->channel, init_packet, +			       sizeof(struct nvsp_message), +			       (unsigned long)init_packet, +			       VM_PKT_DATA_INBAND, +			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); +	if (ret) +		goto out; +	t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ); +	if (t == 0) { +		ret = -ETIMEDOUT; +		goto out; +	} +	if (init_packet->msg.v5_msg.subchn_comp.status != +	    NVSP_STAT_SUCCESS) { +		ret = -ENODEV; +		goto out; +	} +	net_device->num_chn = 1 + +		init_packet->msg.v5_msg.subchn_comp.num_subchannels; + +	vmbus_are_subchannels_present(dev->channel); + +	ret = rndis_filter_set_rss_param(rndis_device, net_device->num_chn); + +out: +	if (ret) +		net_device->num_chn = 1; +	return 0; /* return 0 because primary channel can be used alone */ + +err_dev_remv: +	rndis_filter_device_remove(dev); +	return ret; +} + +void rndis_filter_device_remove(struct hv_device *dev) +{ +	struct netvsc_device *net_dev = hv_get_drvdata(dev); +	struct rndis_device *rndis_dev = net_dev->extension; + +	/* Halt and release the rndis device */ +	rndis_filter_halt_device(rndis_dev); + +	kfree(rndis_dev); +	net_dev->extension = NULL; + +	netvsc_device_remove(dev); +} + + +int rndis_filter_open(struct hv_device *dev) +{ +	struct netvsc_device *net_device = hv_get_drvdata(dev); + +	if (!net_device) +		return -EINVAL; + +	return rndis_filter_open_device(net_device->extension); +} + +int rndis_filter_close(struct hv_device *dev) +{ +	struct netvsc_device *nvdev = hv_get_drvdata(dev); + +	if (!nvdev) +		return -EINVAL; + +	return rndis_filter_close_device(nvdev->extension); +}  | 
