diff options
Diffstat (limited to 'net/openvswitch/datapath.c')
| -rw-r--r-- | net/openvswitch/datapath.c | 1601 | 
1 files changed, 626 insertions, 975 deletions
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 2aa13bd7f2b..9db4bf6740d 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1,5 +1,5 @@  /* - * Copyright (c) 2007-2013 Nicira, Inc. + * Copyright (c) 2007-2014 Nicira, Inc.   *   * This program is free software; you can redistribute it and/or   * modify it under the terms of version 2 of the GNU General Public @@ -44,32 +44,54 @@  #include <linux/netfilter_ipv4.h>  #include <linux/inetdevice.h>  #include <linux/list.h> -#include <linux/lockdep.h>  #include <linux/openvswitch.h>  #include <linux/rculist.h>  #include <linux/dmi.h> -#include <linux/workqueue.h> +#include <linux/genetlink.h> +#include <net/genetlink.h>  #include <net/genetlink.h>  #include <net/net_namespace.h>  #include <net/netns/generic.h>  #include "datapath.h"  #include "flow.h" +#include "flow_table.h" +#include "flow_netlink.h"  #include "vport-internal_dev.h"  #include "vport-netdev.h" +int ovs_net_id __read_mostly; -#define REHASH_FLOW_INTERVAL (10 * 60 * HZ) -static void rehash_flow_table(struct work_struct *work); -static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table); +static struct genl_family dp_packet_genl_family; +static struct genl_family dp_flow_genl_family; +static struct genl_family dp_datapath_genl_family; -int ovs_net_id __read_mostly; +static struct genl_multicast_group ovs_dp_flow_multicast_group = { +	.name = OVS_FLOW_MCGROUP +}; + +static struct genl_multicast_group ovs_dp_datapath_multicast_group = { +	.name = OVS_DATAPATH_MCGROUP +}; + +struct genl_multicast_group ovs_dp_vport_multicast_group = { +	.name = OVS_VPORT_MCGROUP +}; -static void ovs_notify(struct sk_buff *skb, struct genl_info *info, -		       struct genl_multicast_group *grp) +/* Check if need to build a reply message. + * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */ +static bool ovs_must_notify(struct genl_info *info, +			    const struct genl_multicast_group *grp)  { -	genl_notify(skb, genl_info_net(info), info->snd_portid, -		    grp->id, info->nlhdr, GFP_KERNEL); +	return info->nlhdr->nlmsg_flags & NLM_F_ECHO || +		netlink_has_listeners(genl_info_net(info)->genl_sock, 0); +} + +static void ovs_notify(struct genl_family *family, +		       struct sk_buff *skb, struct genl_info *info) +{ +	genl_notify(family, skb, genl_info_net(info), info->snd_portid, +		    0, info->nlhdr, GFP_KERNEL);  }  /** @@ -112,10 +134,9 @@ int lockdep_ovsl_is_held(void)  #endif  static struct vport *new_vport(const struct vport_parms *); -static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *, +static int queue_gso_packets(struct datapath *dp, struct sk_buff *,  			     const struct dp_upcall_info *); -static int queue_userspace_packet(struct net *, int dp_ifindex, -				  struct sk_buff *, +static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,  				  const struct dp_upcall_info *);  /* Must be called with rcu_read_lock or ovs_mutex. */ @@ -137,7 +158,7 @@ static struct datapath *get_dp(struct net *net, int dp_ifindex)  }  /* Must be called with rcu_read_lock or ovs_mutex. */ -const char *ovs_dp_name(const struct datapath *dp) +static const char *ovs_dp_name(const struct datapath *dp)  {  	struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);  	return vport->ops->get_name(vport); @@ -165,7 +186,6 @@ static void destroy_dp_rcu(struct rcu_head *rcu)  {  	struct datapath *dp = container_of(rcu, struct datapath, rcu); -	ovs_flow_tbl_destroy((__force struct flow_table *)dp->table, false);  	free_percpu(dp->stats_percpu);  	release_net(ovs_dp_get_net(dp));  	kfree(dp->ports); @@ -178,6 +198,7 @@ static struct hlist_head *vport_hash_bucket(const struct datapath *dp,  	return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];  } +/* Called with ovs_mutex or RCU read lock. */  struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)  {  	struct vport *vport; @@ -225,6 +246,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)  	struct dp_stats_percpu *stats;  	struct sw_flow_key key;  	u64 *stats_counter; +	u32 n_mask_hit;  	int error;  	stats = this_cpu_ptr(dp->stats_percpu); @@ -237,7 +259,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)  	}  	/* Look up flow. */ -	flow = ovs_flow_lookup(rcu_dereference(dp->table), &key); +	flow = ovs_flow_tbl_lookup_stats(&dp->table, &key, &n_mask_hit);  	if (unlikely(!flow)) {  		struct dp_upcall_info upcall; @@ -254,32 +276,22 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)  	OVS_CB(skb)->flow = flow;  	OVS_CB(skb)->pkt_key = &key; -	stats_counter = &stats->n_hit; -	ovs_flow_used(OVS_CB(skb)->flow, skb); +	ovs_flow_stats_update(OVS_CB(skb)->flow, key.tp.flags, skb);  	ovs_execute_actions(dp, skb); +	stats_counter = &stats->n_hit;  out:  	/* Update datapath statistics. */ -	u64_stats_update_begin(&stats->sync); +	u64_stats_update_begin(&stats->syncp);  	(*stats_counter)++; -	u64_stats_update_end(&stats->sync); +	stats->n_mask_hit += n_mask_hit; +	u64_stats_update_end(&stats->syncp);  } -static struct genl_family dp_packet_genl_family = { -	.id = GENL_ID_GENERATE, -	.hdrsize = sizeof(struct ovs_header), -	.name = OVS_PACKET_FAMILY, -	.version = OVS_PACKET_VERSION, -	.maxattr = OVS_PACKET_ATTR_MAX, -	.netnsok = true, -	.parallel_ops = true, -}; -  int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,  		  const struct dp_upcall_info *upcall_info)  {  	struct dp_stats_percpu *stats; -	int dp_ifindex;  	int err;  	if (upcall_info->portid == 0) { @@ -287,16 +299,10 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,  		goto err;  	} -	dp_ifindex = get_dpifindex(dp); -	if (!dp_ifindex) { -		err = -ENODEV; -		goto err; -	} -  	if (!skb_is_gso(skb)) -		err = queue_userspace_packet(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info); +		err = queue_userspace_packet(dp, skb, upcall_info);  	else -		err = queue_gso_packets(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info); +		err = queue_gso_packets(dp, skb, upcall_info);  	if (err)  		goto err; @@ -305,15 +311,14 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,  err:  	stats = this_cpu_ptr(dp->stats_percpu); -	u64_stats_update_begin(&stats->sync); +	u64_stats_update_begin(&stats->syncp);  	stats->n_lost++; -	u64_stats_update_end(&stats->sync); +	u64_stats_update_end(&stats->syncp);  	return err;  } -static int queue_gso_packets(struct net *net, int dp_ifindex, -			     struct sk_buff *skb, +static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,  			     const struct dp_upcall_info *upcall_info)  {  	unsigned short gso_type = skb_shinfo(skb)->gso_type; @@ -322,14 +327,14 @@ static int queue_gso_packets(struct net *net, int dp_ifindex,  	struct sk_buff *segs, *nskb;  	int err; -	segs = __skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM, false); +	segs = __skb_gso_segment(skb, NETIF_F_SG, false);  	if (IS_ERR(segs))  		return PTR_ERR(segs);  	/* Queue all of the segments. */  	skb = segs;  	do { -		err = queue_userspace_packet(net, dp_ifindex, skb, upcall_info); +		err = queue_userspace_packet(dp, skb, upcall_info);  		if (err)  			break; @@ -382,11 +387,11 @@ static size_t key_attr_size(void)  		+ nla_total_size(28); /* OVS_KEY_ATTR_ND */  } -static size_t upcall_msg_size(const struct sk_buff *skb, -			      const struct nlattr *userdata) +static size_t upcall_msg_size(const struct nlattr *userdata, +			      unsigned int hdrlen)  {  	size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) -		+ nla_total_size(skb->len) /* OVS_PACKET_ATTR_PACKET */ +		+ nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */  		+ nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */  	/* OVS_PACKET_ATTR_USERDATA */ @@ -396,15 +401,24 @@ static size_t upcall_msg_size(const struct sk_buff *skb,  	return size;  } -static int queue_userspace_packet(struct net *net, int dp_ifindex, -				  struct sk_buff *skb, +static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,  				  const struct dp_upcall_info *upcall_info)  {  	struct ovs_header *upcall;  	struct sk_buff *nskb = NULL;  	struct sk_buff *user_skb; /* to be queued to userspace */  	struct nlattr *nla; -	int err; +	struct genl_info info = { +		.dst_sk = ovs_dp_get_net(dp)->genl_sock, +		.snd_portid = upcall_info->portid, +	}; +	size_t len; +	unsigned int hlen; +	int err, dp_ifindex; + +	dp_ifindex = get_dpifindex(dp); +	if (!dp_ifindex) +		return -ENODEV;  	if (vlan_tx_tag_present(skb)) {  		nskb = skb_clone(skb, GFP_ATOMIC); @@ -424,7 +438,22 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,  		goto out;  	} -	user_skb = genlmsg_new(upcall_msg_size(skb, upcall_info->userdata), GFP_ATOMIC); +	/* Complete checksum if needed */ +	if (skb->ip_summed == CHECKSUM_PARTIAL && +	    (err = skb_checksum_help(skb))) +		goto out; + +	/* Older versions of OVS user space enforce alignment of the last +	 * Netlink attribute to NLA_ALIGNTO which would require extensive +	 * padding logic. Only perform zerocopy if padding is not required. +	 */ +	if (dp->user_features & OVS_DP_F_UNALIGNED) +		hlen = skb_zerocopy_headlen(skb); +	else +		hlen = skb->len; + +	len = upcall_msg_size(upcall_info->userdata, hlen); +	user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC);  	if (!user_skb) {  		err = -ENOMEM;  		goto out; @@ -435,7 +464,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,  	upcall->dp_ifindex = dp_ifindex;  	nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); -	ovs_flow_to_nlattrs(upcall_info->key, upcall_info->key, user_skb); +	ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb);  	nla_nest_end(user_skb, nla);  	if (upcall_info->userdata) @@ -443,418 +472,36 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,  			  nla_len(upcall_info->userdata),  			  nla_data(upcall_info->userdata)); -	nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len); - -	skb_copy_and_csum_dev(skb, nla_data(nla)); - -	genlmsg_end(user_skb, upcall); -	err = genlmsg_unicast(net, user_skb, upcall_info->portid); - -out: -	kfree_skb(nskb); -	return err; -} - -/* Called with ovs_mutex. */ -static int flush_flows(struct datapath *dp) -{ -	struct flow_table *old_table; -	struct flow_table *new_table; - -	old_table = ovsl_dereference(dp->table); -	new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS); -	if (!new_table) -		return -ENOMEM; - -	rcu_assign_pointer(dp->table, new_table); - -	ovs_flow_tbl_destroy(old_table, true); -	return 0; -} - -static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, int attr_len) -{ - -	struct sw_flow_actions *acts; -	int new_acts_size; -	int req_size = NLA_ALIGN(attr_len); -	int next_offset = offsetof(struct sw_flow_actions, actions) + -					(*sfa)->actions_len; - -	if (req_size <= (ksize(*sfa) - next_offset)) +	/* Only reserve room for attribute header, packet data is added +	 * in skb_zerocopy() */ +	if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) { +		err = -ENOBUFS;  		goto out; - -	new_acts_size = ksize(*sfa) * 2; - -	if (new_acts_size > MAX_ACTIONS_BUFSIZE) { -		if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) -			return ERR_PTR(-EMSGSIZE); -		new_acts_size = MAX_ACTIONS_BUFSIZE; -	} - -	acts = ovs_flow_actions_alloc(new_acts_size); -	if (IS_ERR(acts)) -		return (void *)acts; - -	memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len); -	acts->actions_len = (*sfa)->actions_len; -	kfree(*sfa); -	*sfa = acts; - -out: -	(*sfa)->actions_len += req_size; -	return  (struct nlattr *) ((unsigned char *)(*sfa) + next_offset); -} - -static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len) -{ -	struct nlattr *a; - -	a = reserve_sfa_size(sfa, nla_attr_size(len)); -	if (IS_ERR(a)) -		return PTR_ERR(a); - -	a->nla_type = attrtype; -	a->nla_len = nla_attr_size(len); - -	if (data) -		memcpy(nla_data(a), data, len); -	memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len)); - -	return 0; -} - -static inline int add_nested_action_start(struct sw_flow_actions **sfa, int attrtype) -{ -	int used = (*sfa)->actions_len; -	int err; - -	err = add_action(sfa, attrtype, NULL, 0); -	if (err) -		return err; - -	return used; -} - -static inline void add_nested_action_end(struct sw_flow_actions *sfa, int st_offset) -{ -	struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + st_offset); - -	a->nla_len = sfa->actions_len - st_offset; -} - -static int validate_and_copy_actions(const struct nlattr *attr, -				     const struct sw_flow_key *key, int depth, -				     struct sw_flow_actions **sfa); - -static int validate_and_copy_sample(const struct nlattr *attr, -				    const struct sw_flow_key *key, int depth, -				    struct sw_flow_actions **sfa) -{ -	const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; -	const struct nlattr *probability, *actions; -	const struct nlattr *a; -	int rem, start, err, st_acts; - -	memset(attrs, 0, sizeof(attrs)); -	nla_for_each_nested(a, attr, rem) { -		int type = nla_type(a); -		if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type]) -			return -EINVAL; -		attrs[type] = a;  	} -	if (rem) -		return -EINVAL; +	nla->nla_len = nla_attr_size(skb->len); -	probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY]; -	if (!probability || nla_len(probability) != sizeof(u32)) -		return -EINVAL; - -	actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; -	if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) -		return -EINVAL; - -	/* validation done, copy sample action. */ -	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE); -	if (start < 0) -		return start; -	err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, nla_data(probability), sizeof(u32)); -	if (err) -		return err; -	st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS); -	if (st_acts < 0) -		return st_acts; - -	err = validate_and_copy_actions(actions, key, depth + 1, sfa); +	err = skb_zerocopy(user_skb, skb, skb->len, hlen);  	if (err) -		return err; - -	add_nested_action_end(*sfa, st_acts); -	add_nested_action_end(*sfa, start); +		goto out; -	return 0; -} +	/* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */ +	if (!(dp->user_features & OVS_DP_F_UNALIGNED)) { +		size_t plen = NLA_ALIGN(user_skb->len) - user_skb->len; -static int validate_tp_port(const struct sw_flow_key *flow_key) -{ -	if (flow_key->eth.type == htons(ETH_P_IP)) { -		if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst) -			return 0; -	} else if (flow_key->eth.type == htons(ETH_P_IPV6)) { -		if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst) -			return 0; +		if (plen > 0) +			memset(skb_put(user_skb, plen), 0, plen);  	} -	return -EINVAL; -} - -static int validate_and_copy_set_tun(const struct nlattr *attr, -				     struct sw_flow_actions **sfa) -{ -	struct sw_flow_match match; -	struct sw_flow_key key; -	int err, start; +	((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len; -	ovs_match_init(&match, &key, NULL); -	err = ovs_ipv4_tun_from_nlattr(nla_data(attr), &match, false); +	err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid); +out:  	if (err) -		return err; - -	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET); -	if (start < 0) -		return start; - -	err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key, -			sizeof(match.key->tun_key)); -	add_nested_action_end(*sfa, start); - +		skb_tx_error(skb); +	kfree_skb(nskb);  	return err;  } -static int validate_set(const struct nlattr *a, -			const struct sw_flow_key *flow_key, -			struct sw_flow_actions **sfa, -			bool *set_tun) -{ -	const struct nlattr *ovs_key = nla_data(a); -	int key_type = nla_type(ovs_key); - -	/* There can be only one key in a action */ -	if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) -		return -EINVAL; - -	if (key_type > OVS_KEY_ATTR_MAX || -	   (ovs_key_lens[key_type] != nla_len(ovs_key) && -	    ovs_key_lens[key_type] != -1)) -		return -EINVAL; - -	switch (key_type) { -	const struct ovs_key_ipv4 *ipv4_key; -	const struct ovs_key_ipv6 *ipv6_key; -	int err; - -	case OVS_KEY_ATTR_PRIORITY: -	case OVS_KEY_ATTR_SKB_MARK: -	case OVS_KEY_ATTR_ETHERNET: -		break; - -	case OVS_KEY_ATTR_TUNNEL: -		*set_tun = true; -		err = validate_and_copy_set_tun(a, sfa); -		if (err) -			return err; -		break; - -	case OVS_KEY_ATTR_IPV4: -		if (flow_key->eth.type != htons(ETH_P_IP)) -			return -EINVAL; - -		if (!flow_key->ip.proto) -			return -EINVAL; - -		ipv4_key = nla_data(ovs_key); -		if (ipv4_key->ipv4_proto != flow_key->ip.proto) -			return -EINVAL; - -		if (ipv4_key->ipv4_frag != flow_key->ip.frag) -			return -EINVAL; - -		break; - -	case OVS_KEY_ATTR_IPV6: -		if (flow_key->eth.type != htons(ETH_P_IPV6)) -			return -EINVAL; - -		if (!flow_key->ip.proto) -			return -EINVAL; - -		ipv6_key = nla_data(ovs_key); -		if (ipv6_key->ipv6_proto != flow_key->ip.proto) -			return -EINVAL; - -		if (ipv6_key->ipv6_frag != flow_key->ip.frag) -			return -EINVAL; - -		if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) -			return -EINVAL; - -		break; - -	case OVS_KEY_ATTR_TCP: -		if (flow_key->ip.proto != IPPROTO_TCP) -			return -EINVAL; - -		return validate_tp_port(flow_key); - -	case OVS_KEY_ATTR_UDP: -		if (flow_key->ip.proto != IPPROTO_UDP) -			return -EINVAL; - -		return validate_tp_port(flow_key); - -	case OVS_KEY_ATTR_SCTP: -		if (flow_key->ip.proto != IPPROTO_SCTP) -			return -EINVAL; - -		return validate_tp_port(flow_key); - -	default: -		return -EINVAL; -	} - -	return 0; -} - -static int validate_userspace(const struct nlattr *attr) -{ -	static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] =	{ -		[OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, -		[OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC }, -	}; -	struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; -	int error; - -	error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, -				 attr, userspace_policy); -	if (error) -		return error; - -	if (!a[OVS_USERSPACE_ATTR_PID] || -	    !nla_get_u32(a[OVS_USERSPACE_ATTR_PID])) -		return -EINVAL; - -	return 0; -} - -static int copy_action(const struct nlattr *from, -		       struct sw_flow_actions **sfa) -{ -	int totlen = NLA_ALIGN(from->nla_len); -	struct nlattr *to; - -	to = reserve_sfa_size(sfa, from->nla_len); -	if (IS_ERR(to)) -		return PTR_ERR(to); - -	memcpy(to, from, totlen); -	return 0; -} - -static int validate_and_copy_actions(const struct nlattr *attr, -				     const struct sw_flow_key *key, -				     int depth, -				     struct sw_flow_actions **sfa) -{ -	const struct nlattr *a; -	int rem, err; - -	if (depth >= SAMPLE_ACTION_DEPTH) -		return -EOVERFLOW; - -	nla_for_each_nested(a, attr, rem) { -		/* Expected argument lengths, (u32)-1 for variable length. */ -		static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { -			[OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), -			[OVS_ACTION_ATTR_USERSPACE] = (u32)-1, -			[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), -			[OVS_ACTION_ATTR_POP_VLAN] = 0, -			[OVS_ACTION_ATTR_SET] = (u32)-1, -			[OVS_ACTION_ATTR_SAMPLE] = (u32)-1 -		}; -		const struct ovs_action_push_vlan *vlan; -		int type = nla_type(a); -		bool skip_copy; - -		if (type > OVS_ACTION_ATTR_MAX || -		    (action_lens[type] != nla_len(a) && -		     action_lens[type] != (u32)-1)) -			return -EINVAL; - -		skip_copy = false; -		switch (type) { -		case OVS_ACTION_ATTR_UNSPEC: -			return -EINVAL; - -		case OVS_ACTION_ATTR_USERSPACE: -			err = validate_userspace(a); -			if (err) -				return err; -			break; - -		case OVS_ACTION_ATTR_OUTPUT: -			if (nla_get_u32(a) >= DP_MAX_PORTS) -				return -EINVAL; -			break; - - -		case OVS_ACTION_ATTR_POP_VLAN: -			break; - -		case OVS_ACTION_ATTR_PUSH_VLAN: -			vlan = nla_data(a); -			if (vlan->vlan_tpid != htons(ETH_P_8021Q)) -				return -EINVAL; -			if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) -				return -EINVAL; -			break; - -		case OVS_ACTION_ATTR_SET: -			err = validate_set(a, key, sfa, &skip_copy); -			if (err) -				return err; -			break; - -		case OVS_ACTION_ATTR_SAMPLE: -			err = validate_and_copy_sample(a, key, depth, sfa); -			if (err) -				return err; -			skip_copy = true; -			break; - -		default: -			return -EINVAL; -		} -		if (!skip_copy) { -			err = copy_action(a, sfa); -			if (err) -				return err; -		} -	} - -	if (rem > 0) -		return -EINVAL; - -	return 0; -} - -static void clear_stats(struct sw_flow *flow) -{ -	flow->used = 0; -	flow->tcp_flags = 0; -	flow->packet_count = 0; -	flow->byte_count = 0; -} -  static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)  {  	struct ovs_header *ovs_header = info->userhdr; @@ -902,15 +549,16 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)  	if (err)  		goto err_flow_free; -	err = ovs_flow_metadata_from_nlattrs(flow, a[OVS_PACKET_ATTR_KEY]); +	err = ovs_nla_get_flow_metadata(flow, a[OVS_PACKET_ATTR_KEY]);  	if (err)  		goto err_flow_free; -	acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS])); +	acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));  	err = PTR_ERR(acts);  	if (IS_ERR(acts))  		goto err_flow_free; -	err = validate_and_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0, &acts); +	err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], +				   &flow->key, 0, &acts);  	rcu_assign_pointer(flow->sf_acts, acts);  	if (err)  		goto err_flow_free; @@ -950,7 +598,7 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {  	[OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },  }; -static struct genl_ops dp_packet_genl_ops[] = { +static const struct genl_ops dp_packet_genl_ops[] = {  	{ .cmd = OVS_PACKET_CMD_EXECUTE,  	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */  	  .policy = packet_policy, @@ -958,15 +606,30 @@ static struct genl_ops dp_packet_genl_ops[] = {  	}  }; -static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats) +static struct genl_family dp_packet_genl_family = { +	.id = GENL_ID_GENERATE, +	.hdrsize = sizeof(struct ovs_header), +	.name = OVS_PACKET_FAMILY, +	.version = OVS_PACKET_VERSION, +	.maxattr = OVS_PACKET_ATTR_MAX, +	.netnsok = true, +	.parallel_ops = true, +	.ops = dp_packet_genl_ops, +	.n_ops = ARRAY_SIZE(dp_packet_genl_ops), +}; + +static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats, +			 struct ovs_dp_megaflow_stats *mega_stats)  { -	struct flow_table *table;  	int i; -	table = rcu_dereference_check(dp->table, lockdep_ovsl_is_held()); -	stats->n_flows = ovs_flow_tbl_count(table); +	memset(mega_stats, 0, sizeof(*mega_stats)); + +	stats->n_flows = ovs_flow_tbl_count(&dp->table); +	mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);  	stats->n_hit = stats->n_missed = stats->n_lost = 0; +  	for_each_possible_cpu(i) {  		const struct dp_stats_percpu *percpu_stats;  		struct dp_stats_percpu local_stats; @@ -975,130 +638,17 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)  		percpu_stats = per_cpu_ptr(dp->stats_percpu, i);  		do { -			start = u64_stats_fetch_begin_bh(&percpu_stats->sync); +			start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);  			local_stats = *percpu_stats; -		} while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start)); +		} while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));  		stats->n_hit += local_stats.n_hit;  		stats->n_missed += local_stats.n_missed;  		stats->n_lost += local_stats.n_lost; +		mega_stats->n_mask_hit += local_stats.n_mask_hit;  	}  } -static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { -	[OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, -	[OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, -	[OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, -}; - -static struct genl_family dp_flow_genl_family = { -	.id = GENL_ID_GENERATE, -	.hdrsize = sizeof(struct ovs_header), -	.name = OVS_FLOW_FAMILY, -	.version = OVS_FLOW_VERSION, -	.maxattr = OVS_FLOW_ATTR_MAX, -	.netnsok = true, -	.parallel_ops = true, -}; - -static struct genl_multicast_group ovs_dp_flow_multicast_group = { -	.name = OVS_FLOW_MCGROUP -}; - -static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb); -static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) -{ -	const struct nlattr *a; -	struct nlattr *start; -	int err = 0, rem; - -	start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE); -	if (!start) -		return -EMSGSIZE; - -	nla_for_each_nested(a, attr, rem) { -		int type = nla_type(a); -		struct nlattr *st_sample; - -		switch (type) { -		case OVS_SAMPLE_ATTR_PROBABILITY: -			if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, sizeof(u32), nla_data(a))) -				return -EMSGSIZE; -			break; -		case OVS_SAMPLE_ATTR_ACTIONS: -			st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); -			if (!st_sample) -				return -EMSGSIZE; -			err = actions_to_attr(nla_data(a), nla_len(a), skb); -			if (err) -				return err; -			nla_nest_end(skb, st_sample); -			break; -		} -	} - -	nla_nest_end(skb, start); -	return err; -} - -static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) -{ -	const struct nlattr *ovs_key = nla_data(a); -	int key_type = nla_type(ovs_key); -	struct nlattr *start; -	int err; - -	switch (key_type) { -	case OVS_KEY_ATTR_IPV4_TUNNEL: -		start = nla_nest_start(skb, OVS_ACTION_ATTR_SET); -		if (!start) -			return -EMSGSIZE; - -		err = ovs_ipv4_tun_to_nlattr(skb, nla_data(ovs_key), -					     nla_data(ovs_key)); -		if (err) -			return err; -		nla_nest_end(skb, start); -		break; -	default: -		if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key)) -			return -EMSGSIZE; -		break; -	} - -	return 0; -} - -static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb) -{ -	const struct nlattr *a; -	int rem, err; - -	nla_for_each_attr(a, attr, len, rem) { -		int type = nla_type(a); - -		switch (type) { -		case OVS_ACTION_ATTR_SET: -			err = set_action_to_attr(a, skb); -			if (err) -				return err; -			break; - -		case OVS_ACTION_ATTR_SAMPLE: -			err = sample_action_to_attr(a, skb); -			if (err) -				return err; -			break; -		default: -			if (nla_put(skb, type, nla_len(a), nla_data(a))) -				return -EMSGSIZE; -			break; -		} -	} - -	return 0; -} -  static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)  {  	return NLMSG_ALIGN(sizeof(struct ovs_header)) @@ -1110,33 +660,32 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)  		+ nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */  } -/* Called with ovs_mutex. */ -static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, +/* Called with ovs_mutex or RCU read lock. */ +static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,  				  struct sk_buff *skb, u32 portid,  				  u32 seq, u32 flags, u8 cmd)  {  	const int skb_orig_len = skb->len;  	struct nlattr *start;  	struct ovs_flow_stats stats; +	__be16 tcp_flags; +	unsigned long used;  	struct ovs_header *ovs_header;  	struct nlattr *nla; -	unsigned long used; -	u8 tcp_flags;  	int err;  	ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);  	if (!ovs_header)  		return -EMSGSIZE; -	ovs_header->dp_ifindex = get_dpifindex(dp); +	ovs_header->dp_ifindex = dp_ifindex;  	/* Fill flow key. */  	nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);  	if (!nla)  		goto nla_put_failure; -	err = ovs_flow_to_nlattrs(&flow->unmasked_key, -			&flow->unmasked_key, skb); +	err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb);  	if (err)  		goto error;  	nla_nest_end(skb, nla); @@ -1145,30 +694,24 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,  	if (!nla)  		goto nla_put_failure; -	err = ovs_flow_to_nlattrs(&flow->key, &flow->mask->key, skb); +	err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb);  	if (err)  		goto error;  	nla_nest_end(skb, nla); -	spin_lock_bh(&flow->lock); -	used = flow->used; -	stats.n_packets = flow->packet_count; -	stats.n_bytes = flow->byte_count; -	tcp_flags = flow->tcp_flags; -	spin_unlock_bh(&flow->lock); +	ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);  	if (used &&  	    nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))  		goto nla_put_failure;  	if (stats.n_packets && -	    nla_put(skb, OVS_FLOW_ATTR_STATS, -		    sizeof(struct ovs_flow_stats), &stats)) +	    nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats))  		goto nla_put_failure; -	if (tcp_flags && -	    nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags)) +	if ((u8)ntohs(tcp_flags) && +	     nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))  		goto nla_put_failure;  	/* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if @@ -1185,10 +728,10 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,  	if (start) {  		const struct sw_flow_actions *sf_acts; -		sf_acts = rcu_dereference_check(flow->sf_acts, -						lockdep_ovsl_is_held()); +		sf_acts = rcu_dereference_ovsl(flow->sf_acts); +		err = ovs_nla_put_actions(sf_acts->actions, +					  sf_acts->actions_len, skb); -		err = actions_to_attr(sf_acts->actions, sf_acts->actions_len, skb);  		if (!err)  			nla_nest_end(skb, start);  		else { @@ -1209,138 +752,128 @@ error:  	return err;  } -static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow) +/* May not be called with RCU read lock. */ +static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts, +					       struct genl_info *info, +					       bool always)  { -	const struct sw_flow_actions *sf_acts; +	struct sk_buff *skb; -	sf_acts = ovsl_dereference(flow->sf_acts); +	if (!always && !ovs_must_notify(info, &ovs_dp_flow_multicast_group)) +		return NULL; -	return genlmsg_new(ovs_flow_cmd_msg_size(sf_acts), GFP_KERNEL); +	skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts), info, GFP_KERNEL); +	if (!skb) +		return ERR_PTR(-ENOMEM); + +	return skb;  } -static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, -					       struct datapath *dp, -					       u32 portid, u32 seq, u8 cmd) +/* Called with ovs_mutex. */ +static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, +					       int dp_ifindex, +					       struct genl_info *info, u8 cmd, +					       bool always)  {  	struct sk_buff *skb;  	int retval; -	skb = ovs_flow_cmd_alloc_info(flow); -	if (!skb) -		return ERR_PTR(-ENOMEM); +	skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info, +				      always); +	if (!skb || IS_ERR(skb)) +		return skb; -	retval = ovs_flow_cmd_fill_info(flow, dp, skb, portid, seq, 0, cmd); +	retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb, +					info->snd_portid, info->snd_seq, 0, +					cmd);  	BUG_ON(retval < 0);  	return skb;  } -static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) +static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)  {  	struct nlattr **a = info->attrs;  	struct ovs_header *ovs_header = info->userhdr; -	struct sw_flow_key key, masked_key; -	struct sw_flow *flow = NULL; +	struct sw_flow *flow, *new_flow;  	struct sw_flow_mask mask;  	struct sk_buff *reply;  	struct datapath *dp; -	struct flow_table *table; -	struct sw_flow_actions *acts = NULL; +	struct sw_flow_actions *acts;  	struct sw_flow_match match;  	int error; -	/* Extract key. */ +	/* Must have key and actions. */  	error = -EINVAL;  	if (!a[OVS_FLOW_ATTR_KEY])  		goto error; +	if (!a[OVS_FLOW_ATTR_ACTIONS]) +		goto error; -	ovs_match_init(&match, &key, &mask); -	error = ovs_match_from_nlattrs(&match, -			a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); -	if (error) +	/* Most of the time we need to allocate a new flow, do it before +	 * locking. +	 */ +	new_flow = ovs_flow_alloc(); +	if (IS_ERR(new_flow)) { +		error = PTR_ERR(new_flow);  		goto error; +	} + +	/* Extract key. */ +	ovs_match_init(&match, &new_flow->unmasked_key, &mask); +	error = ovs_nla_get_match(&match, +				  a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); +	if (error) +		goto err_kfree_flow; + +	ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask);  	/* Validate actions. */ -	if (a[OVS_FLOW_ATTR_ACTIONS]) { -		acts = ovs_flow_actions_alloc(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); -		error = PTR_ERR(acts); -		if (IS_ERR(acts)) -			goto error; +	acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); +	error = PTR_ERR(acts); +	if (IS_ERR(acts)) +		goto err_kfree_flow; -		ovs_flow_key_mask(&masked_key, &key, &mask); -		error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], -						  &masked_key, 0, &acts); -		if (error) { -			OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); -			goto err_kfree; -		} -	} else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) { -		error = -EINVAL; -		goto error; +	error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, +				     0, &acts); +	if (error) { +		OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); +		goto err_kfree_acts; +	} + +	reply = ovs_flow_cmd_alloc_info(acts, info, false); +	if (IS_ERR(reply)) { +		error = PTR_ERR(reply); +		goto err_kfree_acts;  	}  	ovs_lock();  	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); -	error = -ENODEV; -	if (!dp) +	if (unlikely(!dp)) { +		error = -ENODEV;  		goto err_unlock_ovs; - -	table = ovsl_dereference(dp->table); - +	}  	/* Check if this is a duplicate flow */ -	flow = ovs_flow_lookup(table, &key); -	if (!flow) { -		struct sw_flow_mask *mask_p; -		/* Bail out if we're not allowed to create a new flow. */ -		error = -ENOENT; -		if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) -			goto err_unlock_ovs; - -		/* Expand table, if necessary, to make room. */ -		if (ovs_flow_tbl_need_to_expand(table)) { -			struct flow_table *new_table; +	flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->unmasked_key); +	if (likely(!flow)) { +		rcu_assign_pointer(new_flow->sf_acts, acts); -			new_table = ovs_flow_tbl_expand(table); -			if (!IS_ERR(new_table)) { -				rcu_assign_pointer(dp->table, new_table); -				ovs_flow_tbl_destroy(table, true); -				table = ovsl_dereference(dp->table); -			} -		} - -		/* Allocate flow. */ -		flow = ovs_flow_alloc(); -		if (IS_ERR(flow)) { -			error = PTR_ERR(flow); +		/* Put flow in bucket. */ +		error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask); +		if (unlikely(error)) { +			acts = NULL;  			goto err_unlock_ovs;  		} -		clear_stats(flow); - -		flow->key = masked_key; -		flow->unmasked_key = key; - -		/* Make sure mask is unique in the system */ -		mask_p = ovs_sw_flow_mask_find(table, &mask); -		if (!mask_p) { -			/* Allocate a new mask if none exsits. */ -			mask_p = ovs_sw_flow_mask_alloc(); -			if (!mask_p) -				goto err_flow_free; -			mask_p->key = mask.key; -			mask_p->range = mask.range; -			ovs_sw_flow_mask_insert(table, mask_p); -		} -		ovs_sw_flow_mask_add_ref(mask_p); -		flow->mask = mask_p; -		rcu_assign_pointer(flow->sf_acts, acts); - -		/* Put flow in bucket. */ -		ovs_flow_insert(table, flow); - -		reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, -						info->snd_seq, OVS_FLOW_CMD_NEW); +		if (unlikely(reply)) { +			error = ovs_flow_cmd_fill_info(new_flow, +						       ovs_header->dp_ifindex, +						       reply, info->snd_portid, +						       info->snd_seq, 0, +						       OVS_FLOW_CMD_NEW); +			BUG_ON(error < 0); +		} +		ovs_unlock();  	} else { -		/* We found a matching flow. */  		struct sw_flow_actions *old_acts;  		/* Bail out if we're not allowed to modify an existing flow. @@ -1349,47 +882,153 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)  		 * request.  We also accept NLM_F_EXCL in case that bug ever  		 * gets fixed.  		 */ -		error = -EEXIST; -		if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW && -		    info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) +		if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE +							 | NLM_F_EXCL))) { +			error = -EEXIST;  			goto err_unlock_ovs; - +		}  		/* The unmasked key has to be the same for flow updates. */ -		error = -EINVAL; -		if (!ovs_flow_cmp_unmasked_key(flow, &key, match.range.end)) { -			OVS_NLERR("Flow modification message rejected, unmasked key does not match.\n"); -			goto err_unlock_ovs; +		if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) { +			flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); +			if (!flow) { +				error = -ENOENT; +				goto err_unlock_ovs; +			}  		} -  		/* Update actions. */  		old_acts = ovsl_dereference(flow->sf_acts);  		rcu_assign_pointer(flow->sf_acts, acts); -		ovs_flow_deferred_free_acts(old_acts); -		reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, -					       info->snd_seq, OVS_FLOW_CMD_NEW); +		if (unlikely(reply)) { +			error = ovs_flow_cmd_fill_info(flow, +						       ovs_header->dp_ifindex, +						       reply, info->snd_portid, +						       info->snd_seq, 0, +						       OVS_FLOW_CMD_NEW); +			BUG_ON(error < 0); +		} +		ovs_unlock(); + +		ovs_nla_free_flow_actions(old_acts); +		ovs_flow_free(new_flow, false); +	} + +	if (reply) +		ovs_notify(&dp_flow_genl_family, reply, info); +	return 0; + +err_unlock_ovs: +	ovs_unlock(); +	kfree_skb(reply); +err_kfree_acts: +	kfree(acts); +err_kfree_flow: +	ovs_flow_free(new_flow, false); +error: +	return error; +} + +static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) +{ +	struct nlattr **a = info->attrs; +	struct ovs_header *ovs_header = info->userhdr; +	struct sw_flow_key key, masked_key; +	struct sw_flow *flow; +	struct sw_flow_mask mask; +	struct sk_buff *reply = NULL; +	struct datapath *dp; +	struct sw_flow_actions *old_acts = NULL, *acts = NULL; +	struct sw_flow_match match; +	int error; + +	/* Extract key. */ +	error = -EINVAL; +	if (!a[OVS_FLOW_ATTR_KEY]) +		goto error; + +	ovs_match_init(&match, &key, &mask); +	error = ovs_nla_get_match(&match, +				  a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); +	if (error) +		goto error; + +	/* Validate actions. */ +	if (a[OVS_FLOW_ATTR_ACTIONS]) { +		acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); +		error = PTR_ERR(acts); +		if (IS_ERR(acts)) +			goto error; + +		ovs_flow_mask_key(&masked_key, &key, &mask); +		error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], +					     &masked_key, 0, &acts); +		if (error) { +			OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); +			goto err_kfree_acts; +		} +	} + +	/* Can allocate before locking if have acts. */ +	if (acts) { +		reply = ovs_flow_cmd_alloc_info(acts, info, false); +		if (IS_ERR(reply)) { +			error = PTR_ERR(reply); +			goto err_kfree_acts; +		} +	} + +	ovs_lock(); +	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); +	if (unlikely(!dp)) { +		error = -ENODEV; +		goto err_unlock_ovs; +	} +	/* Check that the flow exists. */ +	flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); +	if (unlikely(!flow)) { +		error = -ENOENT; +		goto err_unlock_ovs; +	} + +	/* Update actions, if present. */ +	if (likely(acts)) { +		old_acts = ovsl_dereference(flow->sf_acts); +		rcu_assign_pointer(flow->sf_acts, acts); -		/* Clear stats. */ -		if (a[OVS_FLOW_ATTR_CLEAR]) { -			spin_lock_bh(&flow->lock); -			clear_stats(flow); -			spin_unlock_bh(&flow->lock); +		if (unlikely(reply)) { +			error = ovs_flow_cmd_fill_info(flow, +						       ovs_header->dp_ifindex, +						       reply, info->snd_portid, +						       info->snd_seq, 0, +						       OVS_FLOW_CMD_NEW); +			BUG_ON(error < 0); +		} +	} else { +		/* Could not alloc without acts before locking. */ +		reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, +						info, OVS_FLOW_CMD_NEW, false); +		if (unlikely(IS_ERR(reply))) { +			error = PTR_ERR(reply); +			goto err_unlock_ovs;  		}  	} + +	/* Clear stats. */ +	if (a[OVS_FLOW_ATTR_CLEAR]) +		ovs_flow_stats_clear(flow);  	ovs_unlock(); -	if (!IS_ERR(reply)) -		ovs_notify(reply, info, &ovs_dp_flow_multicast_group); -	else -		netlink_set_err(sock_net(skb->sk)->genl_sock, 0, -				ovs_dp_flow_multicast_group.id, PTR_ERR(reply)); +	if (reply) +		ovs_notify(&dp_flow_genl_family, reply, info); +	if (old_acts) +		ovs_nla_free_flow_actions(old_acts); +  	return 0; -err_flow_free: -	ovs_flow_free(flow, false);  err_unlock_ovs:  	ovs_unlock(); -err_kfree: +	kfree_skb(reply); +err_kfree_acts:  	kfree(acts);  error:  	return error; @@ -1403,7 +1042,6 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)  	struct sk_buff *reply;  	struct sw_flow *flow;  	struct datapath *dp; -	struct flow_table *table;  	struct sw_flow_match match;  	int err; @@ -1413,7 +1051,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)  	}  	ovs_match_init(&match, &key, NULL); -	err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL); +	err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);  	if (err)  		return err; @@ -1424,15 +1062,14 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)  		goto unlock;  	} -	table = ovsl_dereference(dp->table); -	flow = ovs_flow_lookup_unmasked_key(table, &match); +	flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);  	if (!flow) {  		err = -ENOENT;  		goto unlock;  	} -	reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, -					info->snd_seq, OVS_FLOW_CMD_NEW); +	reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info, +					OVS_FLOW_CMD_NEW, true);  	if (IS_ERR(reply)) {  		err = PTR_ERR(reply);  		goto unlock; @@ -1453,50 +1090,56 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)  	struct sk_buff *reply;  	struct sw_flow *flow;  	struct datapath *dp; -	struct flow_table *table;  	struct sw_flow_match match;  	int err; +	if (likely(a[OVS_FLOW_ATTR_KEY])) { +		ovs_match_init(&match, &key, NULL); +		err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); +		if (unlikely(err)) +			return err; +	} +  	ovs_lock();  	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); -	if (!dp) { +	if (unlikely(!dp)) {  		err = -ENODEV;  		goto unlock;  	} -	if (!a[OVS_FLOW_ATTR_KEY]) { -		err = flush_flows(dp); +	if (unlikely(!a[OVS_FLOW_ATTR_KEY])) { +		err = ovs_flow_tbl_flush(&dp->table);  		goto unlock;  	} -	ovs_match_init(&match, &key, NULL); -	err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL); -	if (err) -		goto unlock; - -	table = ovsl_dereference(dp->table); -	flow = ovs_flow_lookup_unmasked_key(table, &match); -	if (!flow) { +	flow = ovs_flow_tbl_lookup_exact(&dp->table, &match); +	if (unlikely(!flow)) {  		err = -ENOENT;  		goto unlock;  	} -	reply = ovs_flow_cmd_alloc_info(flow); -	if (!reply) { -		err = -ENOMEM; -		goto unlock; -	} - -	ovs_flow_remove(table, flow); +	ovs_flow_tbl_remove(&dp->table, flow); +	ovs_unlock(); -	err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, -				     info->snd_seq, 0, OVS_FLOW_CMD_DEL); -	BUG_ON(err < 0); +	reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts, +					info, false); +	if (likely(reply)) { +		if (likely(!IS_ERR(reply))) { +			rcu_read_lock();	/*To keep RCU checker happy. */ +			err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, +						     reply, info->snd_portid, +						     info->snd_seq, 0, +						     OVS_FLOW_CMD_DEL); +			rcu_read_unlock(); +			BUG_ON(err < 0); + +			ovs_notify(&dp_flow_genl_family, reply, info); +		} else { +			netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, PTR_ERR(reply)); +		} +	}  	ovs_flow_free(flow, true); -	ovs_unlock(); - -	ovs_notify(reply, info, &ovs_dp_flow_multicast_group);  	return 0;  unlock:  	ovs_unlock(); @@ -1506,8 +1149,8 @@ unlock:  static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)  {  	struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); +	struct table_instance *ti;  	struct datapath *dp; -	struct flow_table *table;  	rcu_read_lock();  	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); @@ -1516,18 +1159,18 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)  		return -ENODEV;  	} -	table = rcu_dereference(dp->table); +	ti = rcu_dereference(dp->table.ti);  	for (;;) {  		struct sw_flow *flow;  		u32 bucket, obj;  		bucket = cb->args[0];  		obj = cb->args[1]; -		flow = ovs_flow_dump_next(table, &bucket, &obj); +		flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);  		if (!flow)  			break; -		if (ovs_flow_cmd_fill_info(flow, dp, skb, +		if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,  					   NETLINK_CB(cb->skb).portid,  					   cb->nlh->nlmsg_seq, NLM_F_MULTI,  					   OVS_FLOW_CMD_NEW) < 0) @@ -1540,11 +1183,17 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)  	return skb->len;  } +static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { +	[OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, +	[OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, +	[OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, +}; +  static struct genl_ops dp_flow_genl_ops[] = {  	{ .cmd = OVS_FLOW_CMD_NEW,  	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */  	  .policy = flow_policy, -	  .doit = ovs_flow_cmd_new_or_set +	  .doit = ovs_flow_cmd_new  	},  	{ .cmd = OVS_FLOW_CMD_DEL,  	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ @@ -1560,27 +1209,22 @@ static struct genl_ops dp_flow_genl_ops[] = {  	{ .cmd = OVS_FLOW_CMD_SET,  	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */  	  .policy = flow_policy, -	  .doit = ovs_flow_cmd_new_or_set, +	  .doit = ovs_flow_cmd_set,  	},  }; -static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { -	[OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, -	[OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, -}; - -static struct genl_family dp_datapath_genl_family = { +static struct genl_family dp_flow_genl_family = {  	.id = GENL_ID_GENERATE,  	.hdrsize = sizeof(struct ovs_header), -	.name = OVS_DATAPATH_FAMILY, -	.version = OVS_DATAPATH_VERSION, -	.maxattr = OVS_DP_ATTR_MAX, +	.name = OVS_FLOW_FAMILY, +	.version = OVS_FLOW_VERSION, +	.maxattr = OVS_FLOW_ATTR_MAX,  	.netnsok = true,  	.parallel_ops = true, -}; - -static struct genl_multicast_group ovs_dp_datapath_multicast_group = { -	.name = OVS_DATAPATH_MCGROUP +	.ops = dp_flow_genl_ops, +	.n_ops = ARRAY_SIZE(dp_flow_genl_ops), +	.mcgrps = &ovs_dp_flow_multicast_group, +	.n_mcgrps = 1,  };  static size_t ovs_dp_cmd_msg_size(void) @@ -1589,15 +1233,19 @@ static size_t ovs_dp_cmd_msg_size(void)  	msgsize += nla_total_size(IFNAMSIZ);  	msgsize += nla_total_size(sizeof(struct ovs_dp_stats)); +	msgsize += nla_total_size(sizeof(struct ovs_dp_megaflow_stats)); +	msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */  	return msgsize;  } +/* Called with ovs_mutex or RCU read lock. */  static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,  				u32 portid, u32 seq, u32 flags, u8 cmd)  {  	struct ovs_header *ovs_header;  	struct ovs_dp_stats dp_stats; +	struct ovs_dp_megaflow_stats dp_megaflow_stats;  	int err;  	ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family, @@ -1607,14 +1255,21 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,  	ovs_header->dp_ifindex = get_dpifindex(dp); -	rcu_read_lock();  	err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp)); -	rcu_read_unlock();  	if (err)  		goto nla_put_failure; -	get_dp_stats(dp, &dp_stats); -	if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), &dp_stats)) +	get_dp_stats(dp, &dp_stats, &dp_megaflow_stats); +	if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), +			&dp_stats)) +		goto nla_put_failure; + +	if (nla_put(skb, OVS_DP_ATTR_MEGAFLOW_STATS, +			sizeof(struct ovs_dp_megaflow_stats), +			&dp_megaflow_stats)) +		goto nla_put_failure; + +	if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))  		goto nla_put_failure;  	return genlmsg_end(skb, ovs_header); @@ -1625,25 +1280,12 @@ error:  	return -EMSGSIZE;  } -static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid, -					     u32 seq, u8 cmd) +static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info)  { -	struct sk_buff *skb; -	int retval; - -	skb = genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL); -	if (!skb) -		return ERR_PTR(-ENOMEM); - -	retval = ovs_dp_cmd_fill_info(dp, skb, portid, seq, 0, cmd); -	if (retval < 0) { -		kfree_skb(skb); -		return ERR_PTR(retval); -	} -	return skb; +	return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL);  } -/* Called with ovs_mutex. */ +/* Called with rcu_read_lock or ovs_mutex. */  static struct datapath *lookup_datapath(struct net *net,  					struct ovs_header *ovs_header,  					struct nlattr *a[OVS_DP_ATTR_MAX + 1]) @@ -1655,14 +1297,30 @@ static struct datapath *lookup_datapath(struct net *net,  	else {  		struct vport *vport; -		rcu_read_lock();  		vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));  		dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL; -		rcu_read_unlock();  	}  	return dp ? dp : ERR_PTR(-ENODEV);  } +static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info) +{ +	struct datapath *dp; + +	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); +	if (IS_ERR(dp)) +		return; + +	WARN(dp->user_features, "Dropping previously announced user features\n"); +	dp->user_features = 0; +} + +static void ovs_dp_change(struct datapath *dp, struct nlattr **a) +{ +	if (a[OVS_DP_ATTR_USER_FEATURES]) +		dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); +} +  static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)  {  	struct nlattr **a = info->attrs; @@ -1677,29 +1335,30 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)  	if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])  		goto err; -	ovs_lock(); +	reply = ovs_dp_cmd_alloc_info(info); +	if (!reply) +		return -ENOMEM;  	err = -ENOMEM;  	dp = kzalloc(sizeof(*dp), GFP_KERNEL);  	if (dp == NULL) -		goto err_unlock_ovs; +		goto err_free_reply;  	ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));  	/* Allocate table. */ -	err = -ENOMEM; -	rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS)); -	if (!dp->table) +	err = ovs_flow_tbl_init(&dp->table); +	if (err)  		goto err_free_dp; -	dp->stats_percpu = alloc_percpu(struct dp_stats_percpu); +	dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);  	if (!dp->stats_percpu) {  		err = -ENOMEM;  		goto err_destroy_table;  	}  	dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head), -			GFP_KERNEL); +			    GFP_KERNEL);  	if (!dp->ports) {  		err = -ENOMEM;  		goto err_destroy_percpu; @@ -1716,42 +1375,53 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)  	parms.port_no = OVSP_LOCAL;  	parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]); +	ovs_dp_change(dp, a); + +	/* So far only local changes have been made, now need the lock. */ +	ovs_lock(); +  	vport = new_vport(&parms);  	if (IS_ERR(vport)) {  		err = PTR_ERR(vport);  		if (err == -EBUSY)  			err = -EEXIST; +		if (err == -EEXIST) { +			/* An outdated user space instance that does not understand +			 * the concept of user_features has attempted to create a new +			 * datapath and is likely to reuse it. Drop all user features. +			 */ +			if (info->genlhdr->version < OVS_DP_VER_FEATURES) +				ovs_dp_reset_user_features(skb, info); +		} +  		goto err_destroy_ports_array;  	} -	reply = ovs_dp_cmd_build_info(dp, info->snd_portid, -				      info->snd_seq, OVS_DP_CMD_NEW); -	err = PTR_ERR(reply); -	if (IS_ERR(reply)) -		goto err_destroy_local_port; +	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, +				   info->snd_seq, 0, OVS_DP_CMD_NEW); +	BUG_ON(err < 0);  	ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);  	list_add_tail_rcu(&dp->list_node, &ovs_net->dps);  	ovs_unlock(); -	ovs_notify(reply, info, &ovs_dp_datapath_multicast_group); +	ovs_notify(&dp_datapath_genl_family, reply, info);  	return 0; -err_destroy_local_port: -	ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));  err_destroy_ports_array: +	ovs_unlock();  	kfree(dp->ports);  err_destroy_percpu:  	free_percpu(dp->stats_percpu);  err_destroy_table: -	ovs_flow_tbl_destroy(ovsl_dereference(dp->table), false); +	ovs_flow_tbl_destroy(&dp->table, false);  err_free_dp:  	release_net(ovs_dp_get_net(dp));  	kfree(dp); -err_unlock_ovs: -	ovs_unlock(); +err_free_reply: +	kfree_skb(reply);  err:  	return err;  } @@ -1773,10 +1443,13 @@ static void __dp_destroy(struct datapath *dp)  	list_del_rcu(&dp->list_node);  	/* OVSP_LOCAL is datapath internal port. We need to make sure that -	 * all port in datapath are destroyed first before freeing datapath. +	 * all ports in datapath are destroyed first before freeing datapath.  	 */  	ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL)); +	/* RCU destroy the flow table */ +	ovs_flow_tbl_destroy(&dp->table, true); +  	call_rcu(&dp->rcu, destroy_dp_rcu);  } @@ -1786,26 +1459,30 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)  	struct datapath *dp;  	int err; +	reply = ovs_dp_cmd_alloc_info(info); +	if (!reply) +		return -ENOMEM; +  	ovs_lock();  	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);  	err = PTR_ERR(dp);  	if (IS_ERR(dp)) -		goto unlock; +		goto err_unlock_free; -	reply = ovs_dp_cmd_build_info(dp, info->snd_portid, -				      info->snd_seq, OVS_DP_CMD_DEL); -	err = PTR_ERR(reply); -	if (IS_ERR(reply)) -		goto unlock; +	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, +				   info->snd_seq, 0, OVS_DP_CMD_DEL); +	BUG_ON(err < 0);  	__dp_destroy(dp);  	ovs_unlock(); -	ovs_notify(reply, info, &ovs_dp_datapath_multicast_group); +	ovs_notify(&dp_datapath_genl_family, reply, info);  	return 0; -unlock: + +err_unlock_free:  	ovs_unlock(); +	kfree_skb(reply);  	return err;  } @@ -1815,28 +1492,30 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)  	struct datapath *dp;  	int err; +	reply = ovs_dp_cmd_alloc_info(info); +	if (!reply) +		return -ENOMEM; +  	ovs_lock();  	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);  	err = PTR_ERR(dp);  	if (IS_ERR(dp)) -		goto unlock; +		goto err_unlock_free; -	reply = ovs_dp_cmd_build_info(dp, info->snd_portid, -				      info->snd_seq, OVS_DP_CMD_NEW); -	if (IS_ERR(reply)) { -		err = PTR_ERR(reply); -		netlink_set_err(sock_net(skb->sk)->genl_sock, 0, -				ovs_dp_datapath_multicast_group.id, err); -		err = 0; -		goto unlock; -	} +	ovs_dp_change(dp, info->attrs); + +	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, +				   info->snd_seq, 0, OVS_DP_CMD_NEW); +	BUG_ON(err < 0);  	ovs_unlock(); -	ovs_notify(reply, info, &ovs_dp_datapath_multicast_group); +	ovs_notify(&dp_datapath_genl_family, reply, info);  	return 0; -unlock: + +err_unlock_free:  	ovs_unlock(); +	kfree_skb(reply);  	return err;  } @@ -1846,25 +1525,26 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)  	struct datapath *dp;  	int err; -	ovs_lock(); +	reply = ovs_dp_cmd_alloc_info(info); +	if (!reply) +		return -ENOMEM; + +	rcu_read_lock();  	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);  	if (IS_ERR(dp)) {  		err = PTR_ERR(dp); -		goto unlock; -	} - -	reply = ovs_dp_cmd_build_info(dp, info->snd_portid, -				      info->snd_seq, OVS_DP_CMD_NEW); -	if (IS_ERR(reply)) { -		err = PTR_ERR(reply); -		goto unlock; +		goto err_unlock_free;  	} +	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, +				   info->snd_seq, 0, OVS_DP_CMD_NEW); +	BUG_ON(err < 0); +	rcu_read_unlock(); -	ovs_unlock();  	return genlmsg_reply(reply, info); -unlock: -	ovs_unlock(); +err_unlock_free: +	rcu_read_unlock(); +	kfree_skb(reply);  	return err;  } @@ -1891,6 +1571,12 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)  	return skb->len;  } +static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { +	[OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, +	[OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, +	[OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, +}; +  static struct genl_ops dp_datapath_genl_ops[] = {  	{ .cmd = OVS_DP_CMD_NEW,  	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ @@ -1915,27 +1601,18 @@ static struct genl_ops dp_datapath_genl_ops[] = {  	},  }; -static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { -	[OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, -	[OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, -	[OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 }, -	[OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, -	[OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 }, -	[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, -}; - -static struct genl_family dp_vport_genl_family = { +static struct genl_family dp_datapath_genl_family = {  	.id = GENL_ID_GENERATE,  	.hdrsize = sizeof(struct ovs_header), -	.name = OVS_VPORT_FAMILY, -	.version = OVS_VPORT_VERSION, -	.maxattr = OVS_VPORT_ATTR_MAX, +	.name = OVS_DATAPATH_FAMILY, +	.version = OVS_DATAPATH_VERSION, +	.maxattr = OVS_DP_ATTR_MAX,  	.netnsok = true,  	.parallel_ops = true, -}; - -struct genl_multicast_group ovs_dp_vport_multicast_group = { -	.name = OVS_VPORT_MCGROUP +	.ops = dp_datapath_genl_ops, +	.n_ops = ARRAY_SIZE(dp_datapath_genl_ops), +	.mcgrps = &ovs_dp_datapath_multicast_group, +	.n_mcgrps = 1,  };  /* Called with ovs_mutex or RCU read lock. */ @@ -1977,7 +1654,12 @@ error:  	return err;  } -/* Called with ovs_mutex or RCU read lock. */ +static struct sk_buff *ovs_vport_cmd_alloc_info(void) +{ +	return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); +} + +/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */  struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,  					 u32 seq, u8 cmd)  { @@ -2039,33 +1721,35 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)  	u32 port_no;  	int err; -	err = -EINVAL;  	if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||  	    !a[OVS_VPORT_ATTR_UPCALL_PID]) -		goto exit; +		return -EINVAL; + +	port_no = a[OVS_VPORT_ATTR_PORT_NO] +		? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0; +	if (port_no >= DP_MAX_PORTS) +		return -EFBIG; + +	reply = ovs_vport_cmd_alloc_info(); +	if (!reply) +		return -ENOMEM;  	ovs_lock();  	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);  	err = -ENODEV;  	if (!dp) -		goto exit_unlock; - -	if (a[OVS_VPORT_ATTR_PORT_NO]) { -		port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]); - -		err = -EFBIG; -		if (port_no >= DP_MAX_PORTS) -			goto exit_unlock; +		goto exit_unlock_free; +	if (port_no) {  		vport = ovs_vport_ovsl(dp, port_no);  		err = -EBUSY;  		if (vport) -			goto exit_unlock; +			goto exit_unlock_free;  	} else {  		for (port_no = 1; ; port_no++) {  			if (port_no >= DP_MAX_PORTS) {  				err = -EFBIG; -				goto exit_unlock; +				goto exit_unlock_free;  			}  			vport = ovs_vport_ovsl(dp, port_no);  			if (!vport) @@ -2083,22 +1767,19 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)  	vport = new_vport(&parms);  	err = PTR_ERR(vport);  	if (IS_ERR(vport)) -		goto exit_unlock; +		goto exit_unlock_free; -	err = 0; -	reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, -					 OVS_VPORT_CMD_NEW); -	if (IS_ERR(reply)) { -		err = PTR_ERR(reply); -		ovs_dp_detach_port(vport); -		goto exit_unlock; -	} +	err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, +				      info->snd_seq, 0, OVS_VPORT_CMD_NEW); +	BUG_ON(err < 0); +	ovs_unlock(); -	ovs_notify(reply, info, &ovs_dp_vport_multicast_group); +	ovs_notify(&dp_vport_genl_family, reply, info); +	return 0; -exit_unlock: +exit_unlock_free:  	ovs_unlock(); -exit: +	kfree_skb(reply);  	return err;  } @@ -2109,28 +1790,26 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)  	struct vport *vport;  	int err; +	reply = ovs_vport_cmd_alloc_info(); +	if (!reply) +		return -ENOMEM; +  	ovs_lock();  	vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);  	err = PTR_ERR(vport);  	if (IS_ERR(vport)) -		goto exit_unlock; +		goto exit_unlock_free;  	if (a[OVS_VPORT_ATTR_TYPE] &&  	    nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {  		err = -EINVAL; -		goto exit_unlock; -	} - -	reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); -	if (!reply) { -		err = -ENOMEM; -		goto exit_unlock; +		goto exit_unlock_free;  	}  	if (a[OVS_VPORT_ATTR_OPTIONS]) {  		err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);  		if (err) -			goto exit_free; +			goto exit_unlock_free;  	}  	if (a[OVS_VPORT_ATTR_UPCALL_PID]) @@ -2141,13 +1820,12 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)  	BUG_ON(err < 0);  	ovs_unlock(); -	ovs_notify(reply, info, &ovs_dp_vport_multicast_group); +	ovs_notify(&dp_vport_genl_family, reply, info);  	return 0; -exit_free: -	kfree_skb(reply); -exit_unlock: +exit_unlock_free:  	ovs_unlock(); +	kfree_skb(reply);  	return err;  } @@ -2158,30 +1836,33 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)  	struct vport *vport;  	int err; +	reply = ovs_vport_cmd_alloc_info(); +	if (!reply) +		return -ENOMEM; +  	ovs_lock();  	vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);  	err = PTR_ERR(vport);  	if (IS_ERR(vport)) -		goto exit_unlock; +		goto exit_unlock_free;  	if (vport->port_no == OVSP_LOCAL) {  		err = -EINVAL; -		goto exit_unlock; +		goto exit_unlock_free;  	} -	reply = ovs_vport_cmd_build_info(vport, info->snd_portid, -					 info->snd_seq, OVS_VPORT_CMD_DEL); -	err = PTR_ERR(reply); -	if (IS_ERR(reply)) -		goto exit_unlock; - -	err = 0; +	err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, +				      info->snd_seq, 0, OVS_VPORT_CMD_DEL); +	BUG_ON(err < 0);  	ovs_dp_detach_port(vport); +	ovs_unlock(); -	ovs_notify(reply, info, &ovs_dp_vport_multicast_group); +	ovs_notify(&dp_vport_genl_family, reply, info); +	return 0; -exit_unlock: +exit_unlock_free:  	ovs_unlock(); +	kfree_skb(reply);  	return err;  } @@ -2193,24 +1874,25 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)  	struct vport *vport;  	int err; +	reply = ovs_vport_cmd_alloc_info(); +	if (!reply) +		return -ENOMEM; +  	rcu_read_lock();  	vport = lookup_vport(sock_net(skb->sk), ovs_header, a);  	err = PTR_ERR(vport);  	if (IS_ERR(vport)) -		goto exit_unlock; - -	reply = ovs_vport_cmd_build_info(vport, info->snd_portid, -					 info->snd_seq, OVS_VPORT_CMD_NEW); -	err = PTR_ERR(reply); -	if (IS_ERR(reply)) -		goto exit_unlock; - +		goto exit_unlock_free; +	err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, +				      info->snd_seq, 0, OVS_VPORT_CMD_NEW); +	BUG_ON(err < 0);  	rcu_read_unlock();  	return genlmsg_reply(reply, info); -exit_unlock: +exit_unlock_free:  	rcu_read_unlock(); +	kfree_skb(reply);  	return err;  } @@ -2221,11 +1903,12 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)  	int bucket = cb->args[0], skip = cb->args[1];  	int i, j = 0; +	rcu_read_lock();  	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); -	if (!dp) +	if (!dp) { +		rcu_read_unlock();  		return -ENODEV; - -	rcu_read_lock(); +	}  	for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {  		struct vport *vport; @@ -2252,6 +1935,15 @@ out:  	return skb->len;  } +static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { +	[OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, +	[OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, +	[OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 }, +	[OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, +	[OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 }, +	[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, +}; +  static struct genl_ops dp_vport_genl_ops[] = {  	{ .cmd = OVS_VPORT_CMD_NEW,  	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ @@ -2276,26 +1968,25 @@ static struct genl_ops dp_vport_genl_ops[] = {  	},  }; -struct genl_family_and_ops { -	struct genl_family *family; -	struct genl_ops *ops; -	int n_ops; -	struct genl_multicast_group *group; +struct genl_family dp_vport_genl_family = { +	.id = GENL_ID_GENERATE, +	.hdrsize = sizeof(struct ovs_header), +	.name = OVS_VPORT_FAMILY, +	.version = OVS_VPORT_VERSION, +	.maxattr = OVS_VPORT_ATTR_MAX, +	.netnsok = true, +	.parallel_ops = true, +	.ops = dp_vport_genl_ops, +	.n_ops = ARRAY_SIZE(dp_vport_genl_ops), +	.mcgrps = &ovs_dp_vport_multicast_group, +	.n_mcgrps = 1,  }; -static const struct genl_family_and_ops dp_genl_families[] = { -	{ &dp_datapath_genl_family, -	  dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops), -	  &ovs_dp_datapath_multicast_group }, -	{ &dp_vport_genl_family, -	  dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops), -	  &ovs_dp_vport_multicast_group }, -	{ &dp_flow_genl_family, -	  dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops), -	  &ovs_dp_flow_multicast_group }, -	{ &dp_packet_genl_family, -	  dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops), -	  NULL }, +static struct genl_family * const dp_genl_families[] = { +	&dp_datapath_genl_family, +	&dp_vport_genl_family, +	&dp_flow_genl_family, +	&dp_packet_genl_family,  };  static void dp_unregister_genl(int n_families) @@ -2303,65 +1994,28 @@ static void dp_unregister_genl(int n_families)  	int i;  	for (i = 0; i < n_families; i++) -		genl_unregister_family(dp_genl_families[i].family); +		genl_unregister_family(dp_genl_families[i]);  }  static int dp_register_genl(void)  { -	int n_registered;  	int err;  	int i; -	n_registered = 0;  	for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) { -		const struct genl_family_and_ops *f = &dp_genl_families[i]; -		err = genl_register_family_with_ops(f->family, f->ops, -						    f->n_ops); +		err = genl_register_family(dp_genl_families[i]);  		if (err)  			goto error; -		n_registered++; - -		if (f->group) { -			err = genl_register_mc_group(f->family, f->group); -			if (err) -				goto error; -		}  	}  	return 0;  error: -	dp_unregister_genl(n_registered); +	dp_unregister_genl(i);  	return err;  } -static void rehash_flow_table(struct work_struct *work) -{ -	struct datapath *dp; -	struct net *net; - -	ovs_lock(); -	rtnl_lock(); -	for_each_net(net) { -		struct ovs_net *ovs_net = net_generic(net, ovs_net_id); - -		list_for_each_entry(dp, &ovs_net->dps, list_node) { -			struct flow_table *old_table = ovsl_dereference(dp->table); -			struct flow_table *new_table; - -			new_table = ovs_flow_tbl_rehash(old_table); -			if (!IS_ERR(new_table)) { -				rcu_assign_pointer(dp->table, new_table); -				ovs_flow_tbl_destroy(old_table, true); -			} -		} -	} -	rtnl_unlock(); -	ovs_unlock(); -	schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); -} -  static int __net_init ovs_init_net(struct net *net)  {  	struct ovs_net *ovs_net = net_generic(net, ovs_net_id); @@ -2419,8 +2073,6 @@ static int __init dp_init(void)  	if (err < 0)  		goto error_unreg_notifier; -	schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); -  	return 0;  error_unreg_notifier: @@ -2437,7 +2089,6 @@ error:  static void dp_cleanup(void)  { -	cancel_delayed_work_sync(&rehash_flow_wq);  	dp_unregister_genl(ARRAY_SIZE(dp_genl_families));  	unregister_netdevice_notifier(&ovs_dp_device_notifier);  	unregister_pernet_device(&ovs_net_ops);  | 
