20 files changed, 7937 insertions, 0 deletions
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
new file mode 100644
index 00000000000..6ecf491ad50
--- /dev/null
+++ b/net/openvswitch/Kconfig
@@ -0,0 +1,56 @@
+#
+# Open vSwitch
+#
+
+config OPENVSWITCH
+	tristate "Open vSwitch"
+	select LIBCRC32C
+	---help---
+	  Open vSwitch is a multilayer Ethernet switch targeted at virtualized
+	  environments.  In addition to supporting a variety of features
+	  expected in a traditional hardware switch, it enables fine-grained
+	  programmatic extension and flow-based control of the network.  This
+	  control is useful in a wide variety of applications but is
+	  particularly important in multi-server virtualization deployments,
+	  which are often characterized by highly dynamic endpoints and the
+	  need to maintain logical abstractions for multiple tenants.
+
+	  The Open vSwitch datapath provides an in-kernel fast path for packet
+	  forwarding.  It is complemented by a userspace daemon, ovs-vswitchd,
+	  which is able to accept configuration from a variety of sources and
+	  translate it into packet processing rules.
+
+	  See http://openvswitch.org for more information and userspace
+	  utilities.
+
+	  To compile this code as a module, choose M here: the module will be
+	  called openvswitch.
+
+	  If unsure, say N.
+
+config OPENVSWITCH_GRE
+	bool "Open vSwitch GRE tunneling support"
+	depends on INET
+	depends on OPENVSWITCH
+	depends on NET_IPGRE_DEMUX && !(OPENVSWITCH=y && NET_IPGRE_DEMUX=m)
+	default y
+	---help---
+	  If you say Y here, then the Open vSwitch will be able create GRE
+	  vport.
+
+	  Say N to exclude this support and reduce the binary size.
+
+	  If unsure, say Y.
+
+config OPENVSWITCH_VXLAN
+	bool "Open vSwitch VXLAN tunneling support"
+	depends on INET
+	depends on OPENVSWITCH
+	depends on VXLAN && !(OPENVSWITCH=y && VXLAN=m)
+	default y
+	---help---
+	  If you say Y here, then the Open vSwitch will be able create vxlan vport.
+
+	  Say N to exclude this support and reduce the binary size.
+
+	  If unsure, say Y.
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
new file mode 100644
index 00000000000..3591cb5dae9
--- /dev/null
+++ b/net/openvswitch/Makefile
@@ -0,0 +1,24 @@
+#
+# Makefile for Open vSwitch.
+#
+
+obj-$(CONFIG_OPENVSWITCH) += openvswitch.o
+
+openvswitch-y := \
+	actions.o \
+	datapath.o \
+	dp_notify.o \
+	flow.o \
+	flow_netlink.o \
+	flow_table.o \
+	vport.o \
+	vport-internal_dev.o \
+	vport-netdev.o
+
+ifneq ($(CONFIG_OPENVSWITCH_VXLAN),)
+openvswitch-y += vport-vxlan.o
+endif
+
+ifneq ($(CONFIG_OPENVSWITCH_GRE),)
+openvswitch-y += vport-gre.o
+endif
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
new file mode 100644
index 00000000000..e70d8b18e96
--- /dev/null
+++ b/net/openvswitch/actions.c
@@ -0,0 +1,584 @@
+/*
+ * Copyright (c) 2007-2013 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/openvswitch.h>
+#include <linux/sctp.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/in6.h>
+#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/checksum.h>
+#include <net/dsfield.h>
+#include <net/sctp/checksum.h>
+
+#include "datapath.h"
+#include "vport.h"
+
+static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
+			const struct nlattr *attr, int len, bool keep_skb);
+
+static int make_writable(struct sk_buff *skb, int write_len)
+{
+	if (!skb_cloned(skb) || skb_clone_writable(skb, write_len))
+		return 0;
+
+	return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+}
+
+/* remove VLAN header from packet and update csum accordingly. */
+static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
+{
+	struct vlan_hdr *vhdr;
+	int err;
+
+	err = make_writable(skb, VLAN_ETH_HLEN);
+	if (unlikely(err))
+		return err;
+
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->csum = csum_sub(skb->csum, csum_partial(skb->data
+					+ (2 * ETH_ALEN), VLAN_HLEN, 0));
+
+	vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
+	*current_tci = vhdr->h_vlan_TCI;
+
+	memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
+	__skb_pull(skb, VLAN_HLEN);
+
+	vlan_set_encap_proto(skb, vhdr);
+	skb->mac_header += VLAN_HLEN;
+	skb_reset_mac_len(skb);
+
+	return 0;
+}
+
+static int pop_vlan(struct sk_buff *skb)
+{
+	__be16 tci;
+	int err;
+
+	if (likely(vlan_tx_tag_present(skb))) {
+		skb->vlan_tci = 0;
+	} else {
+		if (unlikely(skb->protocol != htons(ETH_P_8021Q) ||
+			     skb->len < VLAN_ETH_HLEN))
+			return 0;
+
+		err = __pop_vlan_tci(skb, &tci);
+		if (err)
+			return err;
+	}
+	/* move next vlan tag to hw accel tag */
+	if (likely(skb->protocol != htons(ETH_P_8021Q) ||
+		   skb->len < VLAN_ETH_HLEN))
+		return 0;
+
+	err = __pop_vlan_tci(skb, &tci);
+	if (unlikely(err))
+		return err;
+
+	__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(tci));
+	return 0;
+}
+
+static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vlan)
+{
+	if (unlikely(vlan_tx_tag_present(skb))) {
+		u16 current_tag;
+
+		/* push down current VLAN tag */
+		current_tag = vlan_tx_tag_get(skb);
+
+		if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag))
+			return -ENOMEM;
+
+		if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->csum = csum_add(skb->csum, csum_partial(skb->data
+					+ (2 * ETH_ALEN), VLAN_HLEN, 0));
+
+	}
+	__vlan_hwaccel_put_tag(skb, vlan->vlan_tpid, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
+	return 0;
+}
+
+static int set_eth_addr(struct sk_buff *skb,
+			const struct ovs_key_ethernet *eth_key)
+{
+	int err;
+	err = make_writable(skb, ETH_HLEN);
+	if (unlikely(err))
+		return err;
+
+	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
+
+	ether_addr_copy(eth_hdr(skb)->h_source, eth_key->eth_src);
+	ether_addr_copy(eth_hdr(skb)->h_dest, eth_key->eth_dst);
+
+	ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
+
+	return 0;
+}
+
+static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
+				__be32 *addr, __be32 new_addr)
+{
+	int transport_len = skb->len - skb_transport_offset(skb);
+
+	if (nh->protocol == IPPROTO_TCP) {
+		if (likely(transport_len >= sizeof(struct tcphdr)))
+			inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb,
+						 *addr, new_addr, 1);
+	} else if (nh->protocol == IPPROTO_UDP) {
+		if (likely(transport_len >= sizeof(struct udphdr))) {
+			struct udphdr *uh = udp_hdr(skb);
+
+			if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+				inet_proto_csum_replace4(&uh->check, skb,
+							 *addr, new_addr, 1);
+				if (!uh->check)
+					uh->check = CSUM_MANGLED_0;
+			}
+		}
+	}
+
+	csum_replace4(&nh->check, *addr, new_addr);
+	skb_clear_hash(skb);
+	*addr = new_addr;
+}
+
+static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
+				 __be32 addr[4], const __be32 new_addr[4])
+{
+	int transport_len = skb->len - skb_transport_offset(skb);
+
+	if (l4_proto == IPPROTO_TCP) {
+		if (likely(transport_len >= sizeof(struct tcphdr)))
+			inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb,
+						  addr, new_addr, 1);
+	} else if (l4_proto == IPPROTO_UDP) {
+		if (likely(transport_len >= sizeof(struct udphdr))) {
+			struct udphdr *uh = udp_hdr(skb);
+
+			if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+				inet_proto_csum_replace16(&uh->check, skb,
+							  addr, new_addr, 1);
+				if (!uh->check)
+					uh->check = CSUM_MANGLED_0;
+			}
+		}
+	}
+}
+
+static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
+			  __be32 addr[4], const __be32 new_addr[4],
+			  bool recalculate_csum)
+{
+	if (recalculate_csum)
+		update_ipv6_checksum(skb, l4_proto, addr, new_addr);
+
+	skb_clear_hash(skb);
+	memcpy(addr, new_addr, sizeof(__be32[4]));
+}
+
+static void set_ipv6_tc(struct ipv6hdr *nh, u8 tc)
+{
+	nh->priority = tc >> 4;
+	nh->flow_lbl[0] = (nh->flow_lbl[0] & 0x0F) | ((tc & 0x0F) << 4);
+}
+
+static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl)
+{
+	nh->flow_lbl[0] = (nh->flow_lbl[0] & 0xF0) | (fl & 0x000F0000) >> 16;
+	nh->flow_lbl[1] = (fl & 0x0000FF00) >> 8;
+	nh->flow_lbl[2] = fl & 0x000000FF;
+}
+
+static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl)
+{
+	csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
+	nh->ttl = new_ttl;
+}
+
+static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key)
+{
+	struct iphdr *nh;
+	int err;
+
+	err = make_writable(skb, skb_network_offset(skb) +
+				 sizeof(struct iphdr));
+	if (unlikely(err))
+		return err;
+
+	nh = ip_hdr(skb);
+
+	if (ipv4_key->ipv4_src != nh->saddr)
+		set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src);
+
+	if (ipv4_key->ipv4_dst != nh->daddr)
+		set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst);
+
+	if (ipv4_key->ipv4_tos != nh->tos)
+		ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos);
+
+	if (ipv4_key->ipv4_ttl != nh->ttl)
+		set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl);
+
+	return 0;
+}
+
+static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key)
+{
+	struct ipv6hdr *nh;
+	int err;
+	__be32 *saddr;
+	__be32 *daddr;
+
+	err = make_writable(skb, skb_network_offset(skb) +
+			    sizeof(struct ipv6hdr));
+	if (unlikely(err))
+		return err;
+
+	nh = ipv6_hdr(skb);
+	saddr = (__be32 *)&nh->saddr;
+	daddr = (__be32 *)&nh->daddr;
+
+	if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src)))
+		set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr,
+			      ipv6_key->ipv6_src, true);
+
+	if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) {
+		unsigned int offset = 0;
+		int flags = IP6_FH_F_SKIP_RH;
+		bool recalc_csum = true;
+
+		if (ipv6_ext_hdr(nh->nexthdr))
+			recalc_csum = ipv6_find_hdr(skb, &offset,
+						    NEXTHDR_ROUTING, NULL,
+						    &flags) != NEXTHDR_ROUTING;
+
+		set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr,
+			      ipv6_key->ipv6_dst, recalc_csum);
+	}
+
+	set_ipv6_tc(nh, ipv6_key->ipv6_tclass);
+	set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label));
+	nh->hop_limit = ipv6_key->ipv6_hlimit;
+
+	return 0;
+}
+
+/* Must follow make_writable() since that can move the skb data. */
+static void set_tp_port(struct sk_buff *skb, __be16 *port,
+			 __be16 new_port, __sum16 *check)
+{
+	inet_proto_csum_replace2(check, skb, *port, new_port, 0);
+	*port = new_port;
+	skb_clear_hash(skb);
+}
+
+static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port)
+{
+	struct udphdr *uh = udp_hdr(skb);
+
+	if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
+		set_tp_port(skb, port, new_port, &uh->check);
+
+		if (!uh->check)
+			uh->check = CSUM_MANGLED_0;
+	} else {
+		*port = new_port;
+		skb_clear_hash(skb);
+	}
+}
+
+static int set_udp(struct sk_buff *skb, const struct ovs_key_udp *udp_port_key)
+{
+	struct udphdr *uh;
+	int err;
+
+	err = make_writable(skb, skb_transport_offset(skb) +
+				 sizeof(struct udphdr));
+	if (unlikely(err))
+		return err;
+
+	uh = udp_hdr(skb);
+	if (udp_port_key->udp_src != uh->source)
+		set_udp_port(skb, &uh->source, udp_port_key->udp_src);
+
+	if (udp_port_key->udp_dst != uh->dest)
+		set_udp_port(skb, &uh->dest, udp_port_key->udp_dst);
+
+	return 0;
+}
+
+static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *tcp_port_key)
+{
+	struct tcphdr *th;
+	int err;
+
+	err = make_writable(skb, skb_transport_offset(skb) +
+				 sizeof(struct tcphdr));
+	if (unlikely(err))
+		return err;
+
+	th = tcp_hdr(skb);
+	if (tcp_port_key->tcp_src != th->source)
+		set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check);
+
+	if (tcp_port_key->tcp_dst != th->dest)
+		set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check);
+
+	return 0;
+}
+
+static int set_sctp(struct sk_buff *skb,
+		     const struct ovs_key_sctp *sctp_port_key)
+{
+	struct sctphdr *sh;
+	int err;
+	unsigned int sctphoff = skb_transport_offset(skb);
+
+	err = make_writable(skb, sctphoff + sizeof(struct sctphdr));
+	if (unlikely(err))
+		return err;
+
+	sh = sctp_hdr(skb);
+	if (sctp_port_key->sctp_src != sh->source ||
+	    sctp_port_key->sctp_dst != sh->dest) {
+		__le32 old_correct_csum, new_csum, old_csum;
+
+		old_csum = sh->checksum;
+		old_correct_csum = sctp_compute_cksum(skb, sctphoff);
+
+		sh->source = sctp_port_key->sctp_src;
+		sh->dest = sctp_port_key->sctp_dst;
+
+		new_csum = sctp_compute_cksum(skb, sctphoff);
+
+		/* Carry any checksum errors through. */
+		sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
+
+		skb_clear_hash(skb);
+	}
+
+	return 0;
+}
+
+static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
+{
+	struct vport *vport;
+
+	if (unlikely(!skb))
+		return -ENOMEM;
+
+	vport = ovs_vport_rcu(dp, out_port);
+	if (unlikely(!vport)) {
+		kfree_skb(skb);
+		return -ENODEV;
+	}
+
+	ovs_vport_send(vport, skb);
+	return 0;
+}
+
+static int output_userspace(struct datapath *dp, struct sk_buff *skb,
+			    const struct nlattr *attr)
+{
+	struct dp_upcall_info upcall;
+	const struct nlattr *a;
+	int rem;
+
+	BUG_ON(!OVS_CB(skb)->pkt_key);
+
+	upcall.cmd = OVS_PACKET_CMD_ACTION;
+	upcall.key = OVS_CB(skb)->pkt_key;
+	upcall.userdata = NULL;
+	upcall.portid = 0;
+
+	for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
+		 a = nla_next(a, &rem)) {
+		switch (nla_type(a)) {
+		case OVS_USERSPACE_ATTR_USERDATA:
+			upcall.userdata = a;
+			break;
+
+		case OVS_USERSPACE_ATTR_PID:
+			upcall.portid = nla_get_u32(a);
+			break;
+		}
+	}
+
+	return ovs_dp_upcall(dp, skb, &upcall);
+}
+
+static int sample(struct datapath *dp, struct sk_buff *skb,
+		  const struct nlattr *attr)
+{
+	const struct nlattr *acts_list = NULL;
+	const struct nlattr *a;
+	int rem;
+
+	for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
+		 a = nla_next(a, &rem)) {
+		switch (nla_type(a)) {
+		case OVS_SAMPLE_ATTR_PROBABILITY:
+			if (prandom_u32() >= nla_get_u32(a))
+				return 0;
+			break;
+
+		case OVS_SAMPLE_ATTR_ACTIONS:
+			acts_list = a;
+			break;
+		}
+	}
+
+	return do_execute_actions(dp, skb, nla_data(acts_list),
+						 nla_len(acts_list), true);
+}
+
+static int execute_set_action(struct sk_buff *skb,
+				 const struct nlattr *nested_attr)
+{
+	int err = 0;
+
+	switch (nla_type(nested_attr)) {
+	case OVS_KEY_ATTR_PRIORITY:
+		skb->priority = nla_get_u32(nested_attr);
+		break;
+
+	case OVS_KEY_ATTR_SKB_MARK:
+		skb->mark = nla_get_u32(nested_attr);
+		break;
+
+	case OVS_KEY_ATTR_IPV4_TUNNEL:
+		OVS_CB(skb)->tun_key = nla_data(nested_attr);
+		break;
+
+	case OVS_KEY_ATTR_ETHERNET:
+		err = set_eth_addr(skb, nla_data(nested_attr));
+		break;
+
+	case OVS_KEY_ATTR_IPV4:
+		err = set_ipv4(skb, nla_data(nested_attr));
+		break;
+
+	case OVS_KEY_ATTR_IPV6:
+		err = set_ipv6(skb, nla_data(nested_attr));
+		break;
+
+	case OVS_KEY_ATTR_TCP:
+		err = set_tcp(skb, nla_data(nested_attr));
+		break;
+
+	case OVS_KEY_ATTR_UDP:
+		err = set_udp(skb, nla_data(nested_attr));
+		break;
+
+	case OVS_KEY_ATTR_SCTP:
+		err = set_sctp(skb, nla_data(nested_attr));
+		break;
+	}
+
+	return err;
+}
+
+/* Execute a list of actions against 'skb'. */
+static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
+			const struct nlattr *attr, int len, bool keep_skb)
+{
+	/* Every output action needs a separate clone of 'skb', but the common
+	 * case is just a single output action, so that doing a clone and
+	 * then freeing the original skbuff is wasteful.  So the following code
+	 * is slightly obscure just to avoid that. */
+	int prev_port = -1;
+	const struct nlattr *a;
+	int rem;
+
+	for (a = attr, rem = len; rem > 0;
+	     a = nla_next(a, &rem)) {
+		int err = 0;
+
+		if (prev_port != -1) {
+			do_output(dp, skb_clone(skb, GFP_ATOMIC), prev_port);
+			prev_port = -1;
+		}
+
+		switch (nla_type(a)) {
+		case OVS_ACTION_ATTR_OUTPUT:
+			prev_port = nla_get_u32(a);
+			break;
+
+		case OVS_ACTION_ATTR_USERSPACE:
+			output_userspace(dp, skb, a);
+			break;
+
+		case OVS_ACTION_ATTR_PUSH_VLAN:
+			err = push_vlan(skb, nla_data(a));
+			if (unlikely(err)) /* skb already freed. */
+				return err;
+			break;
+
+		case OVS_ACTION_ATTR_POP_VLAN:
+			err = pop_vlan(skb);
+			break;
+
+		case OVS_ACTION_ATTR_SET:
+			err = execute_set_action(skb, nla_data(a));
+			break;
+
+		case OVS_ACTION_ATTR_SAMPLE:
+			err = sample(dp, skb, a);
+			if (unlikely(err)) /* skb already freed. */
+				return err;
+			break;
+		}
+
+		if (unlikely(err)) {
+			kfree_skb(skb);
+			return err;
+		}
+	}
+
+	if (prev_port != -1) {
+		if (keep_skb)
+			skb = skb_clone(skb, GFP_ATOMIC);
+
+		do_output(dp, skb, prev_port);
+	} else if (!keep_skb)
+		consume_skb(skb);
+
+	return 0;
+}
+
+/* Execute a list of actions against 'skb'. */
+int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb)
+{
+	struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
+
+	OVS_CB(skb)->tun_key = NULL;
+	return do_execute_actions(dp, skb, acts->actions,
+					 acts->actions_len, false);
+}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
new file mode 100644
index 00000000000..9db4bf6740d
--- /dev/null
+++ b/net/openvswitch/datapath.c
@@ -0,0 +1,2104 @@
+/*
+ * Copyright (c) 2007-2014 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/jhash.h>
+#include <linux/delay.h>
+#include <linux/time.h>
+#include <linux/etherdevice.h>
+#include <linux/genetlink.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/mutex.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/ethtool.h>
+#include <linux/wait.h>
+#include <asm/div64.h>
+#include <linux/highmem.h>
+#include <linux/netfilter_bridge.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/inetdevice.h>
+#include <linux/list.h>
+#include <linux/openvswitch.h>
+#include <linux/rculist.h>
+#include <linux/dmi.h>
+#include <linux/genetlink.h>
+#include <net/genetlink.h>
+#include <net/genetlink.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+#include "datapath.h"
+#include "flow.h"
+#include "flow_table.h"
+#include "flow_netlink.h"
+#include "vport-internal_dev.h"
+#include "vport-netdev.h"
+
+int ovs_net_id __read_mostly;
+
+static struct genl_family dp_packet_genl_family;
+static struct genl_family dp_flow_genl_family;
+static struct genl_family dp_datapath_genl_family;
+
+static struct genl_multicast_group ovs_dp_flow_multicast_group = {
+	.name = OVS_FLOW_MCGROUP
+};
+
+static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
+	.name = OVS_DATAPATH_MCGROUP
+};
+
+struct genl_multicast_group ovs_dp_vport_multicast_group = {
+	.name = OVS_VPORT_MCGROUP
+};
+
+/* Check if need to build a reply message.
+ * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
+static bool ovs_must_notify(struct genl_info *info,
+			    const struct genl_multicast_group *grp)
+{
+	return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
+		netlink_has_listeners(genl_info_net(info)->genl_sock, 0);
+}
+
+static void ovs_notify(struct genl_family *family,
+		       struct sk_buff *skb, struct genl_info *info)
+{
+	genl_notify(family, skb, genl_info_net(info), info->snd_portid,
+		    0, info->nlhdr, GFP_KERNEL);
+}
+
+/**
+ * DOC: Locking:
+ *
+ * All writes e.g. Writes to device state (add/remove datapath, port, set
+ * operations on vports, etc.), Writes to other state (flow table
+ * modifications, set miscellaneous datapath parameters, etc.) are protected
+ * by ovs_lock.
+ *
+ * Reads are protected by RCU.
+ *
+ * There are a few special cases (mostly stats) that have their own
+ * synchronization but they nest under all of above and don't interact with
+ * each other.
+ *
+ * The RTNL lock nests inside ovs_mutex.
+ */
+
+static DEFINE_MUTEX(ovs_mutex);
+
+void ovs_lock(void)
+{
+	mutex_lock(&ovs_mutex);
+}
+
+void ovs_unlock(void)
+{
+	mutex_unlock(&ovs_mutex);
+}
+
+#ifdef CONFIG_LOCKDEP
+int lockdep_ovsl_is_held(void)
+{
+	if (debug_locks)
+		return lockdep_is_held(&ovs_mutex);
+	else
+		return 1;
+}
+#endif
+
+static struct vport *new_vport(const struct vport_parms *);
+static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
+			     const struct dp_upcall_info *);
+static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
+				  const struct dp_upcall_info *);
+
+/* Must be called with rcu_read_lock or ovs_mutex. */
+static struct datapath *get_dp(struct net *net, int dp_ifindex)
+{
+	struct datapath *dp = NULL;
+	struct net_device *dev;
+
+	rcu_read_lock();
+	dev = dev_get_by_index_rcu(net, dp_ifindex);
+	if (dev) {
+		struct vport *vport = ovs_internal_dev_get_vport(dev);
+		if (vport)
+			dp = vport->dp;
+	}
+	rcu_read_unlock();
+
+	return dp;
+}
+
+/* Must be called with rcu_read_lock or ovs_mutex. */
+static const char *ovs_dp_name(const struct datapath *dp)
+{
+	struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
+	return vport->ops->get_name(vport);
+}
+
+static int get_dpifindex(struct datapath *dp)
+{
+	struct vport *local;
+	int ifindex;
+
+	rcu_read_lock();
+
+	local = ovs_vport_rcu(dp, OVSP_LOCAL);
+	if (local)
+		ifindex = netdev_vport_priv(local)->dev->ifindex;
+	else
+		ifindex = 0;
+
+	rcu_read_unlock();
+
+	return ifindex;
+}
+
+static void destroy_dp_rcu(struct rcu_head *rcu)
+{
+	struct datapath *dp = container_of(rcu, struct datapath, rcu);
+
+	free_percpu(dp->stats_percpu);
+	release_net(ovs_dp_get_net(dp));
+	kfree(dp->ports);
+	kfree(dp);
+}
+
+static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
+					    u16 port_no)
+{
+	return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
+}
+
+/* Called with ovs_mutex or RCU read lock. */
+struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
+{
+	struct vport *vport;
+	struct hlist_head *head;
+
+	head = vport_hash_bucket(dp, port_no);
+	hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
+		if (vport->port_no == port_no)
+			return vport;
+	}
+	return NULL;
+}
+
+/* Called with ovs_mutex. */
+static struct vport *new_vport(const struct vport_parms *parms)
+{
+	struct vport *vport;
+
+	vport = ovs_vport_add(parms);
+	if (!IS_ERR(vport)) {
+		struct datapath *dp = parms->dp;
+		struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
+
+		hlist_add_head_rcu(&vport->dp_hash_node, head);
+	}
+	return vport;
+}
+
+void ovs_dp_detach_port(struct vport *p)
+{
+	ASSERT_OVSL();
+
+	/* First drop references to device. */
+	hlist_del_rcu(&p->dp_hash_node);
+
+	/* Then destroy it. */
+	ovs_vport_del(p);
+}
+
+/* Must be called with rcu_read_lock. */
+void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
+{
+	struct datapath *dp = p->dp;
+	struct sw_flow *flow;
+	struct dp_stats_percpu *stats;
+	struct sw_flow_key key;
+	u64 *stats_counter;
+	u32 n_mask_hit;
+	int error;
+
+	stats = this_cpu_ptr(dp->stats_percpu);
+
+	/* Extract flow from 'skb' into 'key'. */
+	error = ovs_flow_extract(skb, p->port_no, &key);
+	if (unlikely(error)) {
+		kfree_skb(skb);
+		return;
+	}
+
+	/* Look up flow. */
+	flow = ovs_flow_tbl_lookup_stats(&dp->table, &key, &n_mask_hit);
+	if (unlikely(!flow)) {
+		struct dp_upcall_info upcall;
+
+		upcall.cmd = OVS_PACKET_CMD_MISS;
+		upcall.key = &key;
+		upcall.userdata = NULL;
+		upcall.portid = p->upcall_portid;
+		ovs_dp_upcall(dp, skb, &upcall);
+		consume_skb(skb);
+		stats_counter = &stats->n_missed;
+		goto out;
+	}
+
+	OVS_CB(skb)->flow = flow;
+	OVS_CB(skb)->pkt_key = &key;
+
+	ovs_flow_stats_update(OVS_CB(skb)->flow, key.tp.flags, skb);
+	ovs_execute_actions(dp, skb);
+	stats_counter = &stats->n_hit;
+
+out:
+	/* Update datapath statistics. */
+	u64_stats_update_begin(&stats->syncp);
+	(*stats_counter)++;
+	stats->n_mask_hit += n_mask_hit;
+	u64_stats_update_end(&stats->syncp);
+}
+
+int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
+		  const struct dp_upcall_info *upcall_info)
+{
+	struct dp_stats_percpu *stats;
+	int err;
+
+	if (upcall_info->portid == 0) {
+		err = -ENOTCONN;
+		goto err;
+	}
+
+	if (!skb_is_gso(skb))
+		err = queue_userspace_packet(dp, skb, upcall_info);
+	else
+		err = queue_gso_packets(dp, skb, upcall_info);
+	if (err)
+		goto err;
+
+	return 0;
+
+err:
+	stats = this_cpu_ptr(dp->stats_percpu);
+
+	u64_stats_update_begin(&stats->syncp);
+	stats->n_lost++;
+	u64_stats_update_end(&stats->syncp);
+
+	return err;
+}
+
+static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
+			     const struct dp_upcall_info *upcall_info)
+{
+	unsigned short gso_type = skb_shinfo(skb)->gso_type;
+	struct dp_upcall_info later_info;
+	struct sw_flow_key later_key;
+	struct sk_buff *segs, *nskb;
+	int err;
+
+	segs = __skb_gso_segment(skb, NETIF_F_SG, false);
+	if (IS_ERR(segs))
+		return PTR_ERR(segs);
+
+	/* Queue all of the segments. */
+	skb = segs;
+	do {
+		err = queue_userspace_packet(dp, skb, upcall_info);
+		if (err)
+			break;
+
+		if (skb == segs && gso_type & SKB_GSO_UDP) {
+			/* The initial flow key extracted by ovs_flow_extract()
+			 * in this case is for a first fragment, so we need to
+			 * properly mark later fragments.
+			 */
+			later_key = *upcall_info->key;
+			later_key.ip.frag = OVS_FRAG_TYPE_LATER;
+
+			later_info = *upcall_info;
+			later_info.key = &later_key;
+			upcall_info = &later_info;
+		}
+	} while ((skb = skb->next));
+
+	/* Free all of the segments. */
+	skb = segs;
+	do {
+		nskb = skb->next;
+		if (err)
+			kfree_skb(skb);
+		else
+			consume_skb(skb);
+	} while ((skb = nskb));
+	return err;
+}
+
+static size_t key_attr_size(void)
+{
+	return    nla_total_size(4)   /* OVS_KEY_ATTR_PRIORITY */
+		+ nla_total_size(0)   /* OVS_KEY_ATTR_TUNNEL */
+		  + nla_total_size(8)   /* OVS_TUNNEL_KEY_ATTR_ID */
+		  + nla_total_size(4)   /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
+		  + nla_total_size(4)   /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
+		  + nla_total_size(1)   /* OVS_TUNNEL_KEY_ATTR_TOS */
+		  + nla_total_size(1)   /* OVS_TUNNEL_KEY_ATTR_TTL */
+		  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
+		  + nla_total_size(0)   /* OVS_TUNNEL_KEY_ATTR_CSUM */
+		+ nla_total_size(4)   /* OVS_KEY_ATTR_IN_PORT */
+		+ nla_total_size(4)   /* OVS_KEY_ATTR_SKB_MARK */
+		+ nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */
+		+ nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
+		+ nla_total_size(4)   /* OVS_KEY_ATTR_8021Q */
+		+ nla_total_size(0)   /* OVS_KEY_ATTR_ENCAP */
+		+ nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
+		+ nla_total_size(40)  /* OVS_KEY_ATTR_IPV6 */
+		+ nla_total_size(2)   /* OVS_KEY_ATTR_ICMPV6 */
+		+ nla_total_size(28); /* OVS_KEY_ATTR_ND */
+}
+
+static size_t upcall_msg_size(const struct nlattr *userdata,
+			      unsigned int hdrlen)
+{
+	size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
+		+ nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
+		+ nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */
+
+	/* OVS_PACKET_ATTR_USERDATA */
+	if (userdata)
+		size += NLA_ALIGN(userdata->nla_len);
+
+	return size;
+}
+
+static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
+				  const struct dp_upcall_info *upcall_info)
+{
+	struct ovs_header *upcall;
+	struct sk_buff *nskb = NULL;
+	struct sk_buff *user_skb; /* to be queued to userspace */
+	struct nlattr *nla;
+	struct genl_info info = {
+		.dst_sk = ovs_dp_get_net(dp)->genl_sock,
+		.snd_portid = upcall_info->portid,
+	};
+	size_t len;
+	unsigned int hlen;
+	int err, dp_ifindex;
+
+	dp_ifindex = get_dpifindex(dp);
+	if (!dp_ifindex)
+		return -ENODEV;
+
+	if (vlan_tx_tag_present(skb)) {
+		nskb = skb_clone(skb, GFP_ATOMIC);
+		if (!nskb)
+			return -ENOMEM;
+
+		nskb = __vlan_put_tag(nskb, nskb->vlan_proto, vlan_tx_tag_get(nskb));
+		if (!nskb)
+			return -ENOMEM;
+
+		nskb->vlan_tci = 0;
+		skb = nskb;
+	}
+
+	if (nla_attr_size(skb->len) > USHRT_MAX) {
+		err = -EFBIG;
+		goto out;
+	}
+
+	/* Complete checksum if needed */
+	if (skb->ip_summed == CHECKSUM_PARTIAL &&
+	    (err = skb_checksum_help(skb)))
+		goto out;
+
+	/* Older versions of OVS user space enforce alignment of the last
+	 * Netlink attribute to NLA_ALIGNTO which would require extensive
+	 * padding logic. Only perform zerocopy if padding is not required.
+	 */
+	if (dp->user_features & OVS_DP_F_UNALIGNED)
+		hlen = skb_zerocopy_headlen(skb);
+	else
+		hlen = skb->len;
+
+	len = upcall_msg_size(upcall_info->userdata, hlen);
+	user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC);
+	if (!user_skb) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
+			     0, upcall_info->cmd);
+	upcall->dp_ifindex = dp_ifindex;
+
+	nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
+	ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb);
+	nla_nest_end(user_skb, nla);
+
+	if (upcall_info->userdata)
+		__nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
+			  nla_len(upcall_info->userdata),
+			  nla_data(upcall_info->userdata));
+
+	/* Only reserve room for attribute header, packet data is added
+	 * in skb_zerocopy() */
+	if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
+		err = -ENOBUFS;
+		goto out;
+	}
+	nla->nla_len = nla_attr_size(skb->len);
+
+	err = skb_zerocopy(user_skb, skb, skb->len, hlen);
+	if (err)
+		goto out;
+
+	/* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
+	if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
+		size_t plen = NLA_ALIGN(user_skb->len) - user_skb->len;
+
+		if (plen > 0)
+			memset(skb_put(user_skb, plen), 0, plen);
+	}
+
+	((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
+
+	err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
+out:
+	if (err)
+		skb_tx_error(skb);
+	kfree_skb(nskb);
+	return err;
+}
+
+static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
+{
+	struct ovs_header *ovs_header = info->userhdr;
+	struct nlattr **a = info->attrs;
+	struct sw_flow_actions *acts;
+	struct sk_buff *packet;
+	struct sw_flow *flow;
+	struct datapath *dp;
+	struct ethhdr *eth;
+	int len;
+	int err;
+
+	err = -EINVAL;
+	if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
+	    !a[OVS_PACKET_ATTR_ACTIONS])
+		goto err;
+
+	len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
+	packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
+	err = -ENOMEM;
+	if (!packet)
+		goto err;
+	skb_reserve(packet, NET_IP_ALIGN);
+
+	nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
+
+	skb_reset_mac_header(packet);
+	eth = eth_hdr(packet);
+
+	/* Normally, setting the skb 'protocol' field would be handled by a
+	 * call to eth_type_trans(), but it assumes there's a sending
+	 * device, which we may not have. */
+	if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
+		packet->protocol = eth->h_proto;
+	else
+		packet->protocol = htons(ETH_P_802_2);
+
+	/* Build an sw_flow for sending this packet. */
+	flow = ovs_flow_alloc();
+	err = PTR_ERR(flow);
+	if (IS_ERR(flow))
+		goto err_kfree_skb;
+
+	err = ovs_flow_extract(packet, -1, &flow->key);
+	if (err)
+		goto err_flow_free;
+
+	err = ovs_nla_get_flow_metadata(flow, a[OVS_PACKET_ATTR_KEY]);
+	if (err)
+		goto err_flow_free;
+	acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
+	err = PTR_ERR(acts);
+	if (IS_ERR(acts))
+		goto err_flow_free;
+
+	err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
+				   &flow->key, 0, &acts);
+	rcu_assign_pointer(flow->sf_acts, acts);
+	if (err)
+		goto err_flow_free;
+
+	OVS_CB(packet)->flow = flow;
+	OVS_CB(packet)->pkt_key = &flow->key;
+	packet->priority = flow->key.phy.priority;
+	packet->mark = flow->key.phy.skb_mark;
+
+	rcu_read_lock();
+	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+	err = -ENODEV;
+	if (!dp)
+		goto err_unlock;
+
+	local_bh_disable();
+	err = ovs_execute_actions(dp, packet);
+	local_bh_enable();
+	rcu_read_unlock();
+
+	ovs_flow_free(flow, false);
+	return err;
+
+err_unlock:
+	rcu_read_unlock();
+err_flow_free:
+	ovs_flow_free(flow, false);
+err_kfree_skb:
+	kfree_skb(packet);
+err:
+	return err;
+}
+
+static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
+	[OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
+	[OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
+	[OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
+};
+
+static const struct genl_ops dp_packet_genl_ops[] = {
+	{ .cmd = OVS_PACKET_CMD_EXECUTE,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = packet_policy,
+	  .doit = ovs_packet_cmd_execute
+	}
+};
+
+static struct genl_family dp_packet_genl_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = sizeof(struct ovs_header),
+	.name = OVS_PACKET_FAMILY,
+	.version = OVS_PACKET_VERSION,
+	.maxattr = OVS_PACKET_ATTR_MAX,
+	.netnsok = true,
+	.parallel_ops = true,
+	.ops = dp_packet_genl_ops,
+	.n_ops = ARRAY_SIZE(dp_packet_genl_ops),
+};
+
+static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats,
+			 struct ovs_dp_megaflow_stats *mega_stats)
+{
+	int i;
+
+	memset(mega_stats, 0, sizeof(*mega_stats));
+
+	stats->n_flows = ovs_flow_tbl_count(&dp->table);
+	mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
+
+	stats->n_hit = stats->n_missed = stats->n_lost = 0;
+
+	for_each_possible_cpu(i) {
+		const struct dp_stats_percpu *percpu_stats;
+		struct dp_stats_percpu local_stats;
+		unsigned int start;
+
+		percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
+
+		do {
+			start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
+			local_stats = *percpu_stats;
+		} while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
+
+		stats->n_hit += local_stats.n_hit;
+		stats->n_missed += local_stats.n_missed;
+		stats->n_lost += local_stats.n_lost;
+		mega_stats->n_mask_hit += local_stats.n_mask_hit;
+	}
+}
+
+static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
+{
+	return NLMSG_ALIGN(sizeof(struct ovs_header))
+		+ nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
+		+ nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_MASK */
+		+ nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
+		+ nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
+		+ nla_total_size(8) /* OVS_FLOW_ATTR_USED */
+		+ nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
+}
+
+/* Called with ovs_mutex or RCU read lock. */
+static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
+				  struct sk_buff *skb, u32 portid,
+				  u32 seq, u32 flags, u8 cmd)
+{
+	const int skb_orig_len = skb->len;
+	struct nlattr *start;
+	struct ovs_flow_stats stats;
+	__be16 tcp_flags;
+	unsigned long used;
+	struct ovs_header *ovs_header;
+	struct nlattr *nla;
+	int err;
+
+	ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
+	if (!ovs_header)
+		return -EMSGSIZE;
+
+	ovs_header->dp_ifindex = dp_ifindex;
+
+	/* Fill flow key. */
+	nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
+	if (!nla)
+		goto nla_put_failure;
+
+	err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb);
+	if (err)
+		goto error;
+	nla_nest_end(skb, nla);
+
+	nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK);
+	if (!nla)
+		goto nla_put_failure;
+
+	err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb);
+	if (err)
+		goto error;
+
+	nla_nest_end(skb, nla);
+
+	ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
+
+	if (used &&
+	    nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
+		goto nla_put_failure;
+
+	if (stats.n_packets &&
+	    nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats))
+		goto nla_put_failure;
+
+	if ((u8)ntohs(tcp_flags) &&
+	     nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
+		goto nla_put_failure;
+
+	/* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
+	 * this is the first flow to be dumped into 'skb'.  This is unusual for
+	 * Netlink but individual action lists can be longer than
+	 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
+	 * The userspace caller can always fetch the actions separately if it
+	 * really wants them.  (Most userspace callers in fact don't care.)
+	 *
+	 * This can only fail for dump operations because the skb is always
+	 * properly sized for single flows.
+	 */
+	start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
+	if (start) {
+		const struct sw_flow_actions *sf_acts;
+
+		sf_acts = rcu_dereference_ovsl(flow->sf_acts);
+		err = ovs_nla_put_actions(sf_acts->actions,
+					  sf_acts->actions_len, skb);
+
+		if (!err)
+			nla_nest_end(skb, start);
+		else {
+			if (skb_orig_len)
+				goto error;
+
+			nla_nest_cancel(skb, start);
+		}
+	} else if (skb_orig_len)
+		goto nla_put_failure;
+
+	return genlmsg_end(skb, ovs_header);
+
+nla_put_failure:
+	err = -EMSGSIZE;
+error:
+	genlmsg_cancel(skb, ovs_header);
+	return err;
+}
+
+/* May not be called with RCU read lock. */
+static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
+					       struct genl_info *info,
+					       bool always)
+{
+	struct sk_buff *skb;
+
+	if (!always && !ovs_must_notify(info, &ovs_dp_flow_multicast_group))
+		return NULL;
+
+	skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts), info, GFP_KERNEL);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	return skb;
+}
+
+/* Called with ovs_mutex. */
+static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
+					       int dp_ifindex,
+					       struct genl_info *info, u8 cmd,
+					       bool always)
+{
+	struct sk_buff *skb;
+	int retval;
+
+	skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info,
+				      always);
+	if (!skb || IS_ERR(skb))
+		return skb;
+
+	retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
+					info->snd_portid, info->snd_seq, 0,
+					cmd);
+	BUG_ON(retval < 0);
+	return skb;
+}
+
+static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct ovs_header *ovs_header = info->userhdr;
+	struct sw_flow *flow, *new_flow;
+	struct sw_flow_mask mask;
+	struct sk_buff *reply;
+	struct datapath *dp;
+	struct sw_flow_actions *acts;
+	struct sw_flow_match match;
+	int error;
+
+	/* Must have key and actions. */
+	error = -EINVAL;
+	if (!a[OVS_FLOW_ATTR_KEY])
+		goto error;
+	if (!a[OVS_FLOW_ATTR_ACTIONS])
+		goto error;
+
+	/* Most of the time we need to allocate a new flow, do it before
+	 * locking.
+	 */
+	new_flow = ovs_flow_alloc();
+	if (IS_ERR(new_flow)) {
+		error = PTR_ERR(new_flow);
+		goto error;
+	}
+
+	/* Extract key. */
+	ovs_match_init(&match, &new_flow->unmasked_key, &mask);
+	error = ovs_nla_get_match(&match,
+				  a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
+	if (error)
+		goto err_kfree_flow;
+
+	ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask);
+
+	/* Validate actions. */
+	acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
+	error = PTR_ERR(acts);
+	if (IS_ERR(acts))
+		goto err_kfree_flow;
+
+	error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
+				     0, &acts);
+	if (error) {
+		OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
+		goto err_kfree_acts;
+	}
+
+	reply = ovs_flow_cmd_alloc_info(acts, info, false);
+	if (IS_ERR(reply)) {
+		error = PTR_ERR(reply);
+		goto err_kfree_acts;
+	}
+
+	ovs_lock();
+	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+	if (unlikely(!dp)) {
+		error = -ENODEV;
+		goto err_unlock_ovs;
+	}
+	/* Check if this is a duplicate flow */
+	flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->unmasked_key);
+	if (likely(!flow)) {
+		rcu_assign_pointer(new_flow->sf_acts, acts);
+
+		/* Put flow in bucket. */
+		error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
+		if (unlikely(error)) {
+			acts = NULL;
+			goto err_unlock_ovs;
+		}
+
+		if (unlikely(reply)) {
+			error = ovs_flow_cmd_fill_info(new_flow,
+						       ovs_header->dp_ifindex,
+						       reply, info->snd_portid,
+						       info->snd_seq, 0,
+						       OVS_FLOW_CMD_NEW);
+			BUG_ON(error < 0);
+		}
+		ovs_unlock();
+	} else {
+		struct sw_flow_actions *old_acts;
+
+		/* Bail out if we're not allowed to modify an existing flow.
+		 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
+		 * because Generic Netlink treats the latter as a dump
+		 * request.  We also accept NLM_F_EXCL in case that bug ever
+		 * gets fixed.
+		 */
+		if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
+							 | NLM_F_EXCL))) {
+			error = -EEXIST;
+			goto err_unlock_ovs;
+		}
+		/* The unmasked key has to be the same for flow updates. */
+		if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) {
+			flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+			if (!flow) {
+				error = -ENOENT;
+				goto err_unlock_ovs;
+			}
+		}
+		/* Update actions. */
+		old_acts = ovsl_dereference(flow->sf_acts);
+		rcu_assign_pointer(flow->sf_acts, acts);
+
+		if (unlikely(reply)) {
+			error = ovs_flow_cmd_fill_info(flow,
+						       ovs_header->dp_ifindex,
+						       reply, info->snd_portid,
+						       info->snd_seq, 0,
+						       OVS_FLOW_CMD_NEW);
+			BUG_ON(error < 0);
+		}
+		ovs_unlock();
+
+		ovs_nla_free_flow_actions(old_acts);
+		ovs_flow_free(new_flow, false);
+	}
+
+	if (reply)
+		ovs_notify(&dp_flow_genl_family, reply, info);
+	return 0;
+
+err_unlock_ovs:
+	ovs_unlock();
+	kfree_skb(reply);
+err_kfree_acts:
+	kfree(acts);
+err_kfree_flow:
+	ovs_flow_free(new_flow, false);
+error:
+	return error;
+}
+
+static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct ovs_header *ovs_header = info->userhdr;
+	struct sw_flow_key key, masked_key;
+	struct sw_flow *flow;
+	struct sw_flow_mask mask;
+	struct sk_buff *reply = NULL;
+	struct datapath *dp;
+	struct sw_flow_actions *old_acts = NULL, *acts = NULL;
+	struct sw_flow_match match;
+	int error;
+
+	/* Extract key. */
+	error = -EINVAL;
+	if (!a[OVS_FLOW_ATTR_KEY])
+		goto error;
+
+	ovs_match_init(&match, &key, &mask);
+	error = ovs_nla_get_match(&match,
+				  a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
+	if (error)
+		goto error;
+
+	/* Validate actions. */
+	if (a[OVS_FLOW_ATTR_ACTIONS]) {
+		acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
+		error = PTR_ERR(acts);
+		if (IS_ERR(acts))
+			goto error;
+
+		ovs_flow_mask_key(&masked_key, &key, &mask);
+		error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS],
+					     &masked_key, 0, &acts);
+		if (error) {
+			OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
+			goto err_kfree_acts;
+		}
+	}
+
+	/* Can allocate before locking if have acts. */
+	if (acts) {
+		reply = ovs_flow_cmd_alloc_info(acts, info, false);
+		if (IS_ERR(reply)) {
+			error = PTR_ERR(reply);
+			goto err_kfree_acts;
+		}
+	}
+
+	ovs_lock();
+	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+	if (unlikely(!dp)) {
+		error = -ENODEV;
+		goto err_unlock_ovs;
+	}
+	/* Check that the flow exists. */
+	flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+	if (unlikely(!flow)) {
+		error = -ENOENT;
+		goto err_unlock_ovs;
+	}
+
+	/* Update actions, if present. */
+	if (likely(acts)) {
+		old_acts = ovsl_dereference(flow->sf_acts);
+		rcu_assign_pointer(flow->sf_acts, acts);
+
+		if (unlikely(reply)) {
+			error = ovs_flow_cmd_fill_info(flow,
+						       ovs_header->dp_ifindex,
+						       reply, info->snd_portid,
+						       info->snd_seq, 0,
+						       OVS_FLOW_CMD_NEW);
+			BUG_ON(error < 0);
+		}
+	} else {
+		/* Could not alloc without acts before locking. */
+		reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
+						info, OVS_FLOW_CMD_NEW, false);
+		if (unlikely(IS_ERR(reply))) {
+			error = PTR_ERR(reply);
+			goto err_unlock_ovs;
+		}
+	}
+
+	/* Clear stats. */
+	if (a[OVS_FLOW_ATTR_CLEAR])
+		ovs_flow_stats_clear(flow);
+	ovs_unlock();
+
+	if (reply)
+		ovs_notify(&dp_flow_genl_family, reply, info);
+	if (old_acts)
+		ovs_nla_free_flow_actions(old_acts);
+
+	return 0;
+
+err_unlock_ovs:
+	ovs_unlock();
+	kfree_skb(reply);
+err_kfree_acts:
+	kfree(acts);
+error:
+	return error;
+}
+
+static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct ovs_header *ovs_header = info->userhdr;
+	struct sw_flow_key key;
+	struct sk_buff *reply;
+	struct sw_flow *flow;
+	struct datapath *dp;
+	struct sw_flow_match match;
+	int err;
+
+	if (!a[OVS_FLOW_ATTR_KEY]) {
+		OVS_NLERR("Flow get message rejected, Key attribute missing.\n");
+		return -EINVAL;
+	}
+
+	ovs_match_init(&match, &key, NULL);
+	err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
+	if (err)
+		return err;
+
+	ovs_lock();
+	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+	if (!dp) {
+		err = -ENODEV;
+		goto unlock;
+	}
+
+	flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+	if (!flow) {
+		err = -ENOENT;
+		goto unlock;
+	}
+
+	reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
+					OVS_FLOW_CMD_NEW, true);
+	if (IS_ERR(reply)) {
+		err = PTR_ERR(reply);
+		goto unlock;
+	}
+
+	ovs_unlock();
+	return genlmsg_reply(reply, info);
+unlock:
+	ovs_unlock();
+	return err;
+}
+
+static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct ovs_header *ovs_header = info->userhdr;
+	struct sw_flow_key key;
+	struct sk_buff *reply;
+	struct sw_flow *flow;
+	struct datapath *dp;
+	struct sw_flow_match match;
+	int err;
+
+	if (likely(a[OVS_FLOW_ATTR_KEY])) {
+		ovs_match_init(&match, &key, NULL);
+		err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
+		if (unlikely(err))
+			return err;
+	}
+
+	ovs_lock();
+	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+	if (unlikely(!dp)) {
+		err = -ENODEV;
+		goto unlock;
+	}
+
+	if (unlikely(!a[OVS_FLOW_ATTR_KEY])) {
+		err = ovs_flow_tbl_flush(&dp->table);
+		goto unlock;
+	}
+
+	flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+	if (unlikely(!flow)) {
+		err = -ENOENT;
+		goto unlock;
+	}
+
+	ovs_flow_tbl_remove(&dp->table, flow);
+	ovs_unlock();
+
+	reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
+					info, false);
+	if (likely(reply)) {
+		if (likely(!IS_ERR(reply))) {
+			rcu_read_lock();	/*To keep RCU checker happy. */
+			err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
+						     reply, info->snd_portid,
+						     info->snd_seq, 0,
+						     OVS_FLOW_CMD_DEL);
+			rcu_read_unlock();
+			BUG_ON(err < 0);
+
+			ovs_notify(&dp_flow_genl_family, reply, info);
+		} else {
+			netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, PTR_ERR(reply));
+		}
+	}
+
+	ovs_flow_free(flow, true);
+	return 0;
+unlock:
+	ovs_unlock();
+	return err;
+}
+
+static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
+	struct table_instance *ti;
+	struct datapath *dp;
+
+	rcu_read_lock();
+	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+	if (!dp) {
+		rcu_read_unlock();
+		return -ENODEV;
+	}
+
+	ti = rcu_dereference(dp->table.ti);
+	for (;;) {
+		struct sw_flow *flow;
+		u32 bucket, obj;
+
+		bucket = cb->args[0];
+		obj = cb->args[1];
+		flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
+		if (!flow)
+			break;
+
+		if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
+					   NETLINK_CB(cb->skb).portid,
+					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					   OVS_FLOW_CMD_NEW) < 0)
+			break;
+
+		cb->args[0] = bucket;
+		cb->args[1] = obj;
+	}
+	rcu_read_unlock();
+	return skb->len;
+}
+
+static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
+	[OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
+	[OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
+	[OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
+};
+
+static struct genl_ops dp_flow_genl_ops[] = {
+	{ .cmd = OVS_FLOW_CMD_NEW,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = flow_policy,
+	  .doit = ovs_flow_cmd_new
+	},
+	{ .cmd = OVS_FLOW_CMD_DEL,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = flow_policy,
+	  .doit = ovs_flow_cmd_del
+	},
+	{ .cmd = OVS_FLOW_CMD_GET,
+	  .flags = 0,		    /* OK for unprivileged users. */
+	  .policy = flow_policy,
+	  .doit = ovs_flow_cmd_get,
+	  .dumpit = ovs_flow_cmd_dump
+	},
+	{ .cmd = OVS_FLOW_CMD_SET,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = flow_policy,
+	  .doit = ovs_flow_cmd_set,
+	},
+};
+
+static struct genl_family dp_flow_genl_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = sizeof(struct ovs_header),
+	.name = OVS_FLOW_FAMILY,
+	.version = OVS_FLOW_VERSION,
+	.maxattr = OVS_FLOW_ATTR_MAX,
+	.netnsok = true,
+	.parallel_ops = true,
+	.ops = dp_flow_genl_ops,
+	.n_ops = ARRAY_SIZE(dp_flow_genl_ops),
+	.mcgrps = &ovs_dp_flow_multicast_group,
+	.n_mcgrps = 1,
+};
+
+static size_t ovs_dp_cmd_msg_size(void)
+{
+	size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
+
+	msgsize += nla_total_size(IFNAMSIZ);
+	msgsize += nla_total_size(sizeof(struct ovs_dp_stats));
+	msgsize += nla_total_size(sizeof(struct ovs_dp_megaflow_stats));
+	msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
+
+	return msgsize;
+}
+
+/* Called with ovs_mutex or RCU read lock. */
+static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
+				u32 portid, u32 seq, u32 flags, u8 cmd)
+{
+	struct ovs_header *ovs_header;
+	struct ovs_dp_stats dp_stats;
+	struct ovs_dp_megaflow_stats dp_megaflow_stats;
+	int err;
+
+	ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
+				   flags, cmd);
+	if (!ovs_header)
+		goto error;
+
+	ovs_header->dp_ifindex = get_dpifindex(dp);
+
+	err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
+	if (err)
+		goto nla_put_failure;
+
+	get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
+	if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
+			&dp_stats))
+		goto nla_put_failure;
+
+	if (nla_put(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
+			sizeof(struct ovs_dp_megaflow_stats),
+			&dp_megaflow_stats))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
+		goto nla_put_failure;
+
+	return genlmsg_end(skb, ovs_header);
+
+nla_put_failure:
+	genlmsg_cancel(skb, ovs_header);
+error:
+	return -EMSGSIZE;
+}
+
+static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info)
+{
+	return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL);
+}
+
+/* Called with rcu_read_lock or ovs_mutex. */
+static struct datapath *lookup_datapath(struct net *net,
+					struct ovs_header *ovs_header,
+					struct nlattr *a[OVS_DP_ATTR_MAX + 1])
+{
+	struct datapath *dp;
+
+	if (!a[OVS_DP_ATTR_NAME])
+		dp = get_dp(net, ovs_header->dp_ifindex);
+	else {
+		struct vport *vport;
+
+		vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
+		dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
+	}
+	return dp ? dp : ERR_PTR(-ENODEV);
+}
+
+static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info)
+{
+	struct datapath *dp;
+
+	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
+	if (IS_ERR(dp))
+		return;
+
+	WARN(dp->user_features, "Dropping previously announced user features\n");
+	dp->user_features = 0;
+}
+
+static void ovs_dp_change(struct datapath *dp, struct nlattr **a)
+{
+	if (a[OVS_DP_ATTR_USER_FEATURES])
+		dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
+}
+
+static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct vport_parms parms;
+	struct sk_buff *reply;
+	struct datapath *dp;
+	struct vport *vport;
+	struct ovs_net *ovs_net;
+	int err, i;
+
+	err = -EINVAL;
+	if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
+		goto err;
+
+	reply = ovs_dp_cmd_alloc_info(info);
+	if (!reply)
+		return -ENOMEM;
+
+	err = -ENOMEM;
+	dp = kzalloc(sizeof(*dp), GFP_KERNEL);
+	if (dp == NULL)
+		goto err_free_reply;
+
+	ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
+
+	/* Allocate table. */
+	err = ovs_flow_tbl_init(&dp->table);
+	if (err)
+		goto err_free_dp;
+
+	dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
+	if (!dp->stats_percpu) {
+		err = -ENOMEM;
+		goto err_destroy_table;
+	}
+
+	dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
+			    GFP_KERNEL);
+	if (!dp->ports) {
+		err = -ENOMEM;
+		goto err_destroy_percpu;
+	}
+
+	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
+		INIT_HLIST_HEAD(&dp->ports[i]);
+
+	/* Set up our datapath device. */
+	parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
+	parms.type = OVS_VPORT_TYPE_INTERNAL;
+	parms.options = NULL;
+	parms.dp = dp;
+	parms.port_no = OVSP_LOCAL;
+	parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
+
+	ovs_dp_change(dp, a);
+
+	/* So far only local changes have been made, now need the lock. */
+	ovs_lock();
+
+	vport = new_vport(&parms);
+	if (IS_ERR(vport)) {
+		err = PTR_ERR(vport);
+		if (err == -EBUSY)
+			err = -EEXIST;
+
+		if (err == -EEXIST) {
+			/* An outdated user space instance that does not understand
+			 * the concept of user_features has attempted to create a new
+			 * datapath and is likely to reuse it. Drop all user features.
+			 */
+			if (info->genlhdr->version < OVS_DP_VER_FEATURES)
+				ovs_dp_reset_user_features(skb, info);
+		}
+
+		goto err_destroy_ports_array;
+	}
+
+	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
+				   info->snd_seq, 0, OVS_DP_CMD_NEW);
+	BUG_ON(err < 0);
+
+	ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
+	list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
+
+	ovs_unlock();
+
+	ovs_notify(&dp_datapath_genl_family, reply, info);
+	return 0;
+
+err_destroy_ports_array:
+	ovs_unlock();
+	kfree(dp->ports);
+err_destroy_percpu:
+	free_percpu(dp->stats_percpu);
+err_destroy_table:
+	ovs_flow_tbl_destroy(&dp->table, false);
+err_free_dp:
+	release_net(ovs_dp_get_net(dp));
+	kfree(dp);
+err_free_reply:
+	kfree_skb(reply);
+err:
+	return err;
+}
+
+/* Called with ovs_mutex. */
+static void __dp_destroy(struct datapath *dp)
+{
+	int i;
+
+	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
+		struct vport *vport;
+		struct hlist_node *n;
+
+		hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
+			if (vport->port_no != OVSP_LOCAL)
+				ovs_dp_detach_port(vport);
+	}
+
+	list_del_rcu(&dp->list_node);
+
+	/* OVSP_LOCAL is datapath internal port. We need to make sure that
+	 * all ports in datapath are destroyed first before freeing datapath.
+	 */
+	ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
+
+	/* RCU destroy the flow table */
+	ovs_flow_tbl_destroy(&dp->table, true);
+
+	call_rcu(&dp->rcu, destroy_dp_rcu);
+}
+
+static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *reply;
+	struct datapath *dp;
+	int err;
+
+	reply = ovs_dp_cmd_alloc_info(info);
+	if (!reply)
+		return -ENOMEM;
+
+	ovs_lock();
+	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
+	err = PTR_ERR(dp);
+	if (IS_ERR(dp))
+		goto err_unlock_free;
+
+	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
+				   info->snd_seq, 0, OVS_DP_CMD_DEL);
+	BUG_ON(err < 0);
+
+	__dp_destroy(dp);
+	ovs_unlock();
+
+	ovs_notify(&dp_datapath_genl_family, reply, info);
+
+	return 0;
+
+err_unlock_free:
+	ovs_unlock();
+	kfree_skb(reply);
+	return err;
+}
+
+static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *reply;
+	struct datapath *dp;
+	int err;
+
+	reply = ovs_dp_cmd_alloc_info(info);
+	if (!reply)
+		return -ENOMEM;
+
+	ovs_lock();
+	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
+	err = PTR_ERR(dp);
+	if (IS_ERR(dp))
+		goto err_unlock_free;
+
+	ovs_dp_change(dp, info->attrs);
+
+	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
+				   info->snd_seq, 0, OVS_DP_CMD_NEW);
+	BUG_ON(err < 0);
+
+	ovs_unlock();
+	ovs_notify(&dp_datapath_genl_family, reply, info);
+
+	return 0;
+
+err_unlock_free:
+	ovs_unlock();
+	kfree_skb(reply);
+	return err;
+}
+
+static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *reply;
+	struct datapath *dp;
+	int err;
+
+	reply = ovs_dp_cmd_alloc_info(info);
+	if (!reply)
+		return -ENOMEM;
+
+	rcu_read_lock();
+	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
+	if (IS_ERR(dp)) {
+		err = PTR_ERR(dp);
+		goto err_unlock_free;
+	}
+	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
+				   info->snd_seq, 0, OVS_DP_CMD_NEW);
+	BUG_ON(err < 0);
+	rcu_read_unlock();
+
+	return genlmsg_reply(reply, info);
+
+err_unlock_free:
+	rcu_read_unlock();
+	kfree_skb(reply);
+	return err;
+}
+
+static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
+	struct datapath *dp;
+	int skip = cb->args[0];
+	int i = 0;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(dp, &ovs_net->dps, list_node) {
+		if (i >= skip &&
+		    ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
+					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					 OVS_DP_CMD_NEW) < 0)
+			break;
+		i++;
+	}
+	rcu_read_unlock();
+
+	cb->args[0] = i;
+
+	return skb->len;
+}
+
+static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
+	[OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
+	[OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
+	[OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
+};
+
+static struct genl_ops dp_datapath_genl_ops[] = {
+	{ .cmd = OVS_DP_CMD_NEW,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = datapath_policy,
+	  .doit = ovs_dp_cmd_new
+	},
+	{ .cmd = OVS_DP_CMD_DEL,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = datapath_policy,
+	  .doit = ovs_dp_cmd_del
+	},
+	{ .cmd = OVS_DP_CMD_GET,
+	  .flags = 0,		    /* OK for unprivileged users. */
+	  .policy = datapath_policy,
+	  .doit = ovs_dp_cmd_get,
+	  .dumpit = ovs_dp_cmd_dump
+	},
+	{ .cmd = OVS_DP_CMD_SET,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = datapath_policy,
+	  .doit = ovs_dp_cmd_set,
+	},
+};
+
+static struct genl_family dp_datapath_genl_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = sizeof(struct ovs_header),
+	.name = OVS_DATAPATH_FAMILY,
+	.version = OVS_DATAPATH_VERSION,
+	.maxattr = OVS_DP_ATTR_MAX,
+	.netnsok = true,
+	.parallel_ops = true,
+	.ops = dp_datapath_genl_ops,
+	.n_ops = ARRAY_SIZE(dp_datapath_genl_ops),
+	.mcgrps = &ovs_dp_datapath_multicast_group,
+	.n_mcgrps = 1,
+};
+
+/* Called with ovs_mutex or RCU read lock. */
+static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
+				   u32 portid, u32 seq, u32 flags, u8 cmd)
+{
+	struct ovs_header *ovs_header;
+	struct ovs_vport_stats vport_stats;
+	int err;
+
+	ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
+				 flags, cmd);
+	if (!ovs_header)
+		return -EMSGSIZE;
+
+	ovs_header->dp_ifindex = get_dpifindex(vport->dp);
+
+	if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
+	    nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
+	    nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) ||
+	    nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid))
+		goto nla_put_failure;
+
+	ovs_vport_get_stats(vport, &vport_stats);
+	if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
+		    &vport_stats))
+		goto nla_put_failure;
+
+	err = ovs_vport_get_options(vport, skb);
+	if (err == -EMSGSIZE)
+		goto error;
+
+	return genlmsg_end(skb, ovs_header);
+
+nla_put_failure:
+	err = -EMSGSIZE;
+error:
+	genlmsg_cancel(skb, ovs_header);
+	return err;
+}
+
+static struct sk_buff *ovs_vport_cmd_alloc_info(void)
+{
+	return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+}
+
+/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
+struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
+					 u32 seq, u8 cmd)
+{
+	struct sk_buff *skb;
+	int retval;
+
+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
+	BUG_ON(retval < 0);
+
+	return skb;
+}
+
+/* Called with ovs_mutex or RCU read lock. */
+static struct vport *lookup_vport(struct net *net,
+				  struct ovs_header *ovs_header,
+				  struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
+{
+	struct datapath *dp;
+	struct vport *vport;
+
+	if (a[OVS_VPORT_ATTR_NAME]) {
+		vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
+		if (!vport)
+			return ERR_PTR(-ENODEV);
+		if (ovs_header->dp_ifindex &&
+		    ovs_header->dp_ifindex != get_dpifindex(vport->dp))
+			return ERR_PTR(-ENODEV);
+		return vport;
+	} else if (a[OVS_VPORT_ATTR_PORT_NO]) {
+		u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
+
+		if (port_no >= DP_MAX_PORTS)
+			return ERR_PTR(-EFBIG);
+
+		dp = get_dp(net, ovs_header->dp_ifindex);
+		if (!dp)
+			return ERR_PTR(-ENODEV);
+
+		vport = ovs_vport_ovsl_rcu(dp, port_no);
+		if (!vport)
+			return ERR_PTR(-ENODEV);
+		return vport;
+	} else
+		return ERR_PTR(-EINVAL);
+}
+
+static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct ovs_header *ovs_header = info->userhdr;
+	struct vport_parms parms;
+	struct sk_buff *reply;
+	struct vport *vport;
+	struct datapath *dp;
+	u32 port_no;
+	int err;
+
+	if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
+	    !a[OVS_VPORT_ATTR_UPCALL_PID])
+		return -EINVAL;
+
+	port_no = a[OVS_VPORT_ATTR_PORT_NO]
+		? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
+	if (port_no >= DP_MAX_PORTS)
+		return -EFBIG;
+
+	reply = ovs_vport_cmd_alloc_info();
+	if (!reply)
+		return -ENOMEM;
+
+	ovs_lock();
+	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+	err = -ENODEV;
+	if (!dp)
+		goto exit_unlock_free;
+
+	if (port_no) {
+		vport = ovs_vport_ovsl(dp, port_no);
+		err = -EBUSY;
+		if (vport)
+			goto exit_unlock_free;
+	} else {
+		for (port_no = 1; ; port_no++) {
+			if (port_no >= DP_MAX_PORTS) {
+				err = -EFBIG;
+				goto exit_unlock_free;
+			}
+			vport = ovs_vport_ovsl(dp, port_no);
+			if (!vport)
+				break;
+		}
+	}
+
+	parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
+	parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
+	parms.options = a[OVS_VPORT_ATTR_OPTIONS];
+	parms.dp = dp;
+	parms.port_no = port_no;
+	parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
+
+	vport = new_vport(&parms);
+	err = PTR_ERR(vport);
+	if (IS_ERR(vport))
+		goto exit_unlock_free;
+
+	err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
+				      info->snd_seq, 0, OVS_VPORT_CMD_NEW);
+	BUG_ON(err < 0);
+	ovs_unlock();
+
+	ovs_notify(&dp_vport_genl_family, reply, info);
+	return 0;
+
+exit_unlock_free:
+	ovs_unlock();
+	kfree_skb(reply);
+	return err;
+}
+
+static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct sk_buff *reply;
+	struct vport *vport;
+	int err;
+
+	reply = ovs_vport_cmd_alloc_info();
+	if (!reply)
+		return -ENOMEM;
+
+	ovs_lock();
+	vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
+	err = PTR_ERR(vport);
+	if (IS_ERR(vport))
+		goto exit_unlock_free;
+
+	if (a[OVS_VPORT_ATTR_TYPE] &&
+	    nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
+		err = -EINVAL;
+		goto exit_unlock_free;
+	}
+
+	if (a[OVS_VPORT_ATTR_OPTIONS]) {
+		err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
+		if (err)
+			goto exit_unlock_free;
+	}
+
+	if (a[OVS_VPORT_ATTR_UPCALL_PID])
+		vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
+
+	err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
+				      info->snd_seq, 0, OVS_VPORT_CMD_NEW);
+	BUG_ON(err < 0);
+
+	ovs_unlock();
+	ovs_notify(&dp_vport_genl_family, reply, info);
+	return 0;
+
+exit_unlock_free:
+	ovs_unlock();
+	kfree_skb(reply);
+	return err;
+}
+
+static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct sk_buff *reply;
+	struct vport *vport;
+	int err;
+
+	reply = ovs_vport_cmd_alloc_info();
+	if (!reply)
+		return -ENOMEM;
+
+	ovs_lock();
+	vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
+	err = PTR_ERR(vport);
+	if (IS_ERR(vport))
+		goto exit_unlock_free;
+
+	if (vport->port_no == OVSP_LOCAL) {
+		err = -EINVAL;
+		goto exit_unlock_free;
+	}
+
+	err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
+				      info->snd_seq, 0, OVS_VPORT_CMD_DEL);
+	BUG_ON(err < 0);
+	ovs_dp_detach_port(vport);
+	ovs_unlock();
+
+	ovs_notify(&dp_vport_genl_family, reply, info);
+	return 0;
+
+exit_unlock_free:
+	ovs_unlock();
+	kfree_skb(reply);
+	return err;
+}
+
+static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct ovs_header *ovs_header = info->userhdr;
+	struct sk_buff *reply;
+	struct vport *vport;
+	int err;
+
+	reply = ovs_vport_cmd_alloc_info();
+	if (!reply)
+		return -ENOMEM;
+
+	rcu_read_lock();
+	vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
+	err = PTR_ERR(vport);
+	if (IS_ERR(vport))
+		goto exit_unlock_free;
+	err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
+				      info->snd_seq, 0, OVS_VPORT_CMD_NEW);
+	BUG_ON(err < 0);
+	rcu_read_unlock();
+
+	return genlmsg_reply(reply, info);
+
+exit_unlock_free:
+	rcu_read_unlock();
+	kfree_skb(reply);
+	return err;
+}
+
+static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
+	struct datapath *dp;
+	int bucket = cb->args[0], skip = cb->args[1];
+	int i, j = 0;
+
+	rcu_read_lock();
+	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+	if (!dp) {
+		rcu_read_unlock();
+		return -ENODEV;
+	}
+	for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
+		struct vport *vport;
+
+		j = 0;
+		hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
+			if (j >= skip &&
+			    ovs_vport_cmd_fill_info(vport, skb,
+						    NETLINK_CB(cb->skb).portid,
+						    cb->nlh->nlmsg_seq,
+						    NLM_F_MULTI,
+						    OVS_VPORT_CMD_NEW) < 0)
+				goto out;
+
+			j++;
+		}
+		skip = 0;
+	}
+out:
+	rcu_read_unlock();
+
+	cb->args[0] = i;
+	cb->args[1] = j;
+
+	return skb->len;
+}
+
+static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
+	[OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
+	[OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
+	[OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
+	[OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
+	[OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
+	[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
+};
+
+static struct genl_ops dp_vport_genl_ops[] = {
+	{ .cmd = OVS_VPORT_CMD_NEW,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = vport_policy,
+	  .doit = ovs_vport_cmd_new
+	},
+	{ .cmd = OVS_VPORT_CMD_DEL,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = vport_policy,
+	  .doit = ovs_vport_cmd_del
+	},
+	{ .cmd = OVS_VPORT_CMD_GET,
+	  .flags = 0,		    /* OK for unprivileged users. */
+	  .policy = vport_policy,
+	  .doit = ovs_vport_cmd_get,
+	  .dumpit = ovs_vport_cmd_dump
+	},
+	{ .cmd = OVS_VPORT_CMD_SET,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = vport_policy,
+	  .doit = ovs_vport_cmd_set,
+	},
+};
+
+struct genl_family dp_vport_genl_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = sizeof(struct ovs_header),
+	.name = OVS_VPORT_FAMILY,
+	.version = OVS_VPORT_VERSION,
+	.maxattr = OVS_VPORT_ATTR_MAX,
+	.netnsok = true,
+	.parallel_ops = true,
+	.ops = dp_vport_genl_ops,
+	.n_ops = ARRAY_SIZE(dp_vport_genl_ops),
+	.mcgrps = &ovs_dp_vport_multicast_group,
+	.n_mcgrps = 1,
+};
+
+static struct genl_family * const dp_genl_families[] = {
+	&dp_datapath_genl_family,
+	&dp_vport_genl_family,
+	&dp_flow_genl_family,
+	&dp_packet_genl_family,
+};
+
+static void dp_unregister_genl(int n_families)
+{
+	int i;
+
+	for (i = 0; i < n_families; i++)
+		genl_unregister_family(dp_genl_families[i]);
+}
+
+static int dp_register_genl(void)
+{
+	int err;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
+
+		err = genl_register_family(dp_genl_families[i]);
+		if (err)
+			goto error;
+	}
+
+	return 0;
+
+error:
+	dp_unregister_genl(i);
+	return err;
+}
+
+static int __net_init ovs_init_net(struct net *net)
+{
+	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+
+	INIT_LIST_HEAD(&ovs_net->dps);
+	INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
+	return 0;
+}
+
+static void __net_exit ovs_exit_net(struct net *net)
+{
+	struct datapath *dp, *dp_next;
+	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+
+	ovs_lock();
+	list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
+		__dp_destroy(dp);
+	ovs_unlock();
+
+	cancel_work_sync(&ovs_net->dp_notify_work);
+}
+
+static struct pernet_operations ovs_net_ops = {
+	.init = ovs_init_net,
+	.exit = ovs_exit_net,
+	.id   = &ovs_net_id,
+	.size = sizeof(struct ovs_net),
+};
+
+static int __init dp_init(void)
+{
+	int err;
+
+	BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
+
+	pr_info("Open vSwitch switching datapath\n");
+
+	err = ovs_flow_init();
+	if (err)
+		goto error;
+
+	err = ovs_vport_init();
+	if (err)
+		goto error_flow_exit;
+
+	err = register_pernet_device(&ovs_net_ops);
+	if (err)
+		goto error_vport_exit;
+
+	err = register_netdevice_notifier(&ovs_dp_device_notifier);
+	if (err)
+		goto error_netns_exit;
+
+	err = dp_register_genl();
+	if (err < 0)
+		goto error_unreg_notifier;
+
+	return 0;
+
+error_unreg_notifier:
+	unregister_netdevice_notifier(&ovs_dp_device_notifier);
+error_netns_exit:
+	unregister_pernet_device(&ovs_net_ops);
+error_vport_exit:
+	ovs_vport_exit();
+error_flow_exit:
+	ovs_flow_exit();
+error:
+	return err;
+}
+
+static void dp_cleanup(void)
+{
+	dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
+	unregister_netdevice_notifier(&ovs_dp_device_notifier);
+	unregister_pernet_device(&ovs_net_ops);
+	rcu_barrier();
+	ovs_vport_exit();
+	ovs_flow_exit();
+}
+
+module_init(dp_init);
+module_exit(dp_cleanup);
+
+MODULE_DESCRIPTION("Open vSwitch switching datapath");
+MODULE_LICENSE("GPL");
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
new file mode 100644
index 00000000000..7ede507500d
--- /dev/null
+++ b/net/openvswitch/datapath.h
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2007-2012 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef DATAPATH_H
+#define DATAPATH_H 1
+
+#include <asm/page.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/u64_stats_sync.h>
+
+#include "flow.h"
+#include "flow_table.h"
+#include "vport.h"
+
+#define DP_MAX_PORTS           USHRT_MAX
+#define DP_VPORT_HASH_BUCKETS  1024
+
+#define SAMPLE_ACTION_DEPTH 3
+
+/**
+ * struct dp_stats_percpu - per-cpu packet processing statistics for a given
+ * datapath.
+ * @n_hit: Number of received packets for which a matching flow was found in
+ * the flow table.
+ * @n_miss: Number of received packets that had no matching flow in the flow
+ * table.  The sum of @n_hit and @n_miss is the number of packets that have
+ * been received by the datapath.
+ * @n_lost: Number of received packets that had no matching flow in the flow
+ * table that could not be sent to userspace (normally due to an overflow in
+ * one of the datapath's queues).
+ * @n_mask_hit: Number of masks looked up for flow match.
+ *   @n_mask_hit / (@n_hit + @n_missed)  will be the average masks looked
+ *   up per packet.
+ */
+struct dp_stats_percpu {
+	u64 n_hit;
+	u64 n_missed;
+	u64 n_lost;
+	u64 n_mask_hit;
+	struct u64_stats_sync syncp;
+};
+
+/**
+ * struct datapath - datapath for flow-based packet switching
+ * @rcu: RCU callback head for deferred destruction.
+ * @list_node: Element in global 'dps' list.
+ * @table: flow table.
+ * @ports: Hash table for ports.  %OVSP_LOCAL port always exists.  Protected by
+ * ovs_mutex and RCU.
+ * @stats_percpu: Per-CPU datapath statistics.
+ * @net: Reference to net namespace.
+ *
+ * Context: See the comment on locking at the top of datapath.c for additional
+ * locking information.
+ */
+struct datapath {
+	struct rcu_head rcu;
+	struct list_head list_node;
+
+	/* Flow table. */
+	struct flow_table table;
+
+	/* Switch ports. */
+	struct hlist_head *ports;
+
+	/* Stats. */
+	struct dp_stats_percpu __percpu *stats_percpu;
+
+#ifdef CONFIG_NET_NS
+	/* Network namespace ref. */
+	struct net *net;
+#endif
+
+	u32 user_features;
+};
+
+/**
+ * struct ovs_skb_cb - OVS data in skb CB
+ * @flow: The flow associated with this packet.  May be %NULL if no flow.
+ * @pkt_key: The flow information extracted from the packet.  Must be nonnull.
+ * @tun_key: Key for the tunnel that encapsulated this packet. NULL if the
+ * packet is not being tunneled.
+ */
+struct ovs_skb_cb {
+	struct sw_flow		*flow;
+	struct sw_flow_key	*pkt_key;
+	struct ovs_key_ipv4_tunnel  *tun_key;
+};
+#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
+
+/**
+ * struct dp_upcall - metadata to include with a packet to send to userspace
+ * @cmd: One of %OVS_PACKET_CMD_*.
+ * @key: Becomes %OVS_PACKET_ATTR_KEY.  Must be nonnull.
+ * @userdata: If nonnull, its variable-length value is passed to userspace as
+ * %OVS_PACKET_ATTR_USERDATA.
+ * @pid: Netlink PID to which packet should be sent.  If @pid is 0 then no
+ * packet is sent and the packet is accounted in the datapath's @n_lost
+ * counter.
+ */
+struct dp_upcall_info {
+	u8 cmd;
+	const struct sw_flow_key *key;
+	const struct nlattr *userdata;
+	u32 portid;
+};
+
+/**
+ * struct ovs_net - Per net-namespace data for ovs.
+ * @dps: List of datapaths to enable dumping them all out.
+ * Protected by genl_mutex.
+ */
+struct ovs_net {
+	struct list_head dps;
+	struct work_struct dp_notify_work;
+	struct vport_net vport_net;
+};
+
+extern int ovs_net_id;
+void ovs_lock(void);
+void ovs_unlock(void);
+
+#ifdef CONFIG_LOCKDEP
+int lockdep_ovsl_is_held(void);
+#else
+#define lockdep_ovsl_is_held()	1
+#endif
+
+#define ASSERT_OVSL()		WARN_ON(unlikely(!lockdep_ovsl_is_held()))
+#define ovsl_dereference(p)					\
+	rcu_dereference_protected(p, lockdep_ovsl_is_held())
+#define rcu_dereference_ovsl(p)					\
+	rcu_dereference_check(p, lockdep_ovsl_is_held())
+
+static inline struct net *ovs_dp_get_net(struct datapath *dp)
+{
+	return read_pnet(&dp->net);
+}
+
+static inline void ovs_dp_set_net(struct datapath *dp, struct net *net)
+{
+	write_pnet(&dp->net, net);
+}
+
+struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no);
+
+static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no)
+{
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	return ovs_lookup_vport(dp, port_no);
+}
+
+static inline struct vport *ovs_vport_ovsl_rcu(const struct datapath *dp, int port_no)
+{
+	WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
+	return ovs_lookup_vport(dp, port_no);
+}
+
+static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_no)
+{
+	ASSERT_OVSL();
+	return ovs_lookup_vport(dp, port_no);
+}
+
+extern struct notifier_block ovs_dp_device_notifier;
+extern struct genl_family dp_vport_genl_family;
+
+void ovs_dp_process_received_packet(struct vport *, struct sk_buff *);
+void ovs_dp_detach_port(struct vport *);
+int ovs_dp_upcall(struct datapath *, struct sk_buff *,
+		  const struct dp_upcall_info *);
+
+struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
+					 u8 cmd);
+
+int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
+void ovs_dp_notify_wq(struct work_struct *work);
+
+#define OVS_NLERR(fmt, ...)					\
+do {								\
+	if (net_ratelimit())					\
+		pr_info("netlink: " fmt, ##__VA_ARGS__);	\
+} while (0)
+#endif /* datapath.h */
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
new file mode 100644
index 00000000000..2c631fe76be
--- /dev/null
+++ b/net/openvswitch/dp_notify.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2007-2012 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include <linux/netdevice.h>
+#include <net/genetlink.h>
+#include <net/netns/generic.h>
+
+#include "datapath.h"
+#include "vport-internal_dev.h"
+#include "vport-netdev.h"
+
+static void dp_detach_port_notify(struct vport *vport)
+{
+	struct sk_buff *notify;
+	struct datapath *dp;
+
+	dp = vport->dp;
+	notify = ovs_vport_cmd_build_info(vport, 0, 0,
+					  OVS_VPORT_CMD_DEL);
+	ovs_dp_detach_port(vport);
+	if (IS_ERR(notify)) {
+		genl_set_err(&dp_vport_genl_family, ovs_dp_get_net(dp), 0,
+			     0, PTR_ERR(notify));
+		return;
+	}
+
+	genlmsg_multicast_netns(&dp_vport_genl_family,
+				ovs_dp_get_net(dp), notify, 0,
+				0, GFP_KERNEL);
+}
+
+void ovs_dp_notify_wq(struct work_struct *work)
+{
+	struct ovs_net *ovs_net = container_of(work, struct ovs_net, dp_notify_work);
+	struct datapath *dp;
+
+	ovs_lock();
+	list_for_each_entry(dp, &ovs_net->dps, list_node) {
+		int i;
+
+		for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
+			struct vport *vport;
+			struct hlist_node *n;
+
+			hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) {
+				struct netdev_vport *netdev_vport;
+
+				if (vport->ops->type != OVS_VPORT_TYPE_NETDEV)
+					continue;
+
+				netdev_vport = netdev_vport_priv(vport);
+				if (!(netdev_vport->dev->priv_flags & IFF_OVS_DATAPATH))
+					dp_detach_port_notify(vport);
+			}
+		}
+	}
+	ovs_unlock();
+}
+
+static int dp_device_event(struct notifier_block *unused, unsigned long event,
+			   void *ptr)
+{
+	struct ovs_net *ovs_net;
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct vport *vport = NULL;
+
+	if (!ovs_is_internal_dev(dev))
+		vport = ovs_netdev_get_vport(dev);
+
+	if (!vport)
+		return NOTIFY_DONE;
+
+	if (event == NETDEV_UNREGISTER) {
+		/* upper_dev_unlink and decrement promisc immediately */
+		ovs_netdev_detach_dev(vport);
+
+		/* schedule vport destroy, dev_put and genl notification */
+		ovs_net = net_generic(dev_net(dev), ovs_net_id);
+		queue_work(system_wq, &ovs_net->dp_notify_work);
+	}
+
+	return NOTIFY_DONE;
+}
+
+struct notifier_block ovs_dp_device_notifier = {
+	.notifier_call = dp_device_event
+};
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
new file mode 100644
index 00000000000..d07ab538fc9
--- /dev/null
+++ b/net/openvswitch/flow.c
@@ -0,0 +1,615 @@
+/*
+ * Copyright (c) 2007-2013 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include "flow.h"
+#include "datapath.h"
+#include <linux/uaccess.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <net/llc_pdu.h>
+#include <linux/kernel.h>
+#include <linux/jhash.h>
+#include <linux/jiffies.h>
+#include <linux/llc.h>
+#include <linux/module.h>
+#include <linux/in.h>
+#include <linux/rcupdate.h>
+#include <linux/if_arp.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/sctp.h>
+#include <linux/smp.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/rculist.h>
+#include <net/ip.h>
+#include <net/ip_tunnels.h>
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+
+u64 ovs_flow_used_time(unsigned long flow_jiffies)
+{
+	struct timespec cur_ts;
+	u64 cur_ms, idle_ms;
+
+	ktime_get_ts(&cur_ts);
+	idle_ms = jiffies_to_msecs(jiffies - flow_jiffies);
+	cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC +
+		 cur_ts.tv_nsec / NSEC_PER_MSEC;
+
+	return cur_ms - idle_ms;
+}
+
+#define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF))
+
+void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
+			   struct sk_buff *skb)
+{
+	struct flow_stats *stats;
+	int node = numa_node_id();
+
+	stats = rcu_dereference(flow->stats[node]);
+
+	/* Check if already have node-specific stats. */
+	if (likely(stats)) {
+		spin_lock(&stats->lock);
+		/* Mark if we write on the pre-allocated stats. */
+		if (node == 0 && unlikely(flow->stats_last_writer != node))
+			flow->stats_last_writer = node;
+	} else {
+		stats = rcu_dereference(flow->stats[0]); /* Pre-allocated. */
+		spin_lock(&stats->lock);
+
+		/* If the current NUMA-node is the only writer on the
+		 * pre-allocated stats keep using them.
+		 */
+		if (unlikely(flow->stats_last_writer != node)) {
+			/* A previous locker may have already allocated the
+			 * stats, so we need to check again.  If node-specific
+			 * stats were already allocated, we update the pre-
+			 * allocated stats as we have already locked them.
+			 */
+			if (likely(flow->stats_last_writer != NUMA_NO_NODE)
+			    && likely(!rcu_dereference(flow->stats[node]))) {
+				/* Try to allocate node-specific stats. */
+				struct flow_stats *new_stats;
+
+				new_stats =
+					kmem_cache_alloc_node(flow_stats_cache,
+							      GFP_THISNODE |
+							      __GFP_NOMEMALLOC,
+							      node);
+				if (likely(new_stats)) {
+					new_stats->used = jiffies;
+					new_stats->packet_count = 1;
+					new_stats->byte_count = skb->len;
+					new_stats->tcp_flags = tcp_flags;
+					spin_lock_init(&new_stats->lock);
+
+					rcu_assign_pointer(flow->stats[node],
+							   new_stats);
+					goto unlock;
+				}
+			}
+			flow->stats_last_writer = node;
+		}
+	}
+
+	stats->used = jiffies;
+	stats->packet_count++;
+	stats->byte_count += skb->len;
+	stats->tcp_flags |= tcp_flags;
+unlock:
+	spin_unlock(&stats->lock);
+}
+
+/* Must be called with rcu_read_lock or ovs_mutex. */
+void ovs_flow_stats_get(const struct sw_flow *flow,
+			struct ovs_flow_stats *ovs_stats,
+			unsigned long *used, __be16 *tcp_flags)
+{
+	int node;
+
+	*used = 0;
+	*tcp_flags = 0;
+	memset(ovs_stats, 0, sizeof(*ovs_stats));
+
+	for_each_node(node) {
+		struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[node]);
+
+		if (stats) {
+			/* Local CPU may write on non-local stats, so we must
+			 * block bottom-halves here.
+			 */
+			spin_lock_bh(&stats->lock);
+			if (!*used || time_after(stats->used, *used))
+				*used = stats->used;
+			*tcp_flags |= stats->tcp_flags;
+			ovs_stats->n_packets += stats->packet_count;
+			ovs_stats->n_bytes += stats->byte_count;
+			spin_unlock_bh(&stats->lock);
+		}
+	}
+}
+
+/* Called with ovs_mutex. */
+void ovs_flow_stats_clear(struct sw_flow *flow)
+{
+	int node;
+
+	for_each_node(node) {
+		struct flow_stats *stats = ovsl_dereference(flow->stats[node]);
+
+		if (stats) {
+			spin_lock_bh(&stats->lock);
+			stats->used = 0;
+			stats->packet_count = 0;
+			stats->byte_count = 0;
+			stats->tcp_flags = 0;
+			spin_unlock_bh(&stats->lock);
+		}
+	}
+}
+
+static int check_header(struct sk_buff *skb, int len)
+{
+	if (unlikely(skb->len < len))
+		return -EINVAL;
+	if (unlikely(!pskb_may_pull(skb, len)))
+		return -ENOMEM;
+	return 0;
+}
+
+static bool arphdr_ok(struct sk_buff *skb)
+{
+	return pskb_may_pull(skb, skb_network_offset(skb) +
+				  sizeof(struct arp_eth_header));
+}
+
+static int check_iphdr(struct sk_buff *skb)
+{
+	unsigned int nh_ofs = skb_network_offset(skb);
+	unsigned int ip_len;
+	int err;
+
+	err = check_header(skb, nh_ofs + sizeof(struct iphdr));
+	if (unlikely(err))
+		return err;
+
+	ip_len = ip_hdrlen(skb);
+	if (unlikely(ip_len < sizeof(struct iphdr) ||
+		     skb->len < nh_ofs + ip_len))
+		return -EINVAL;
+
+	skb_set_transport_header(skb, nh_ofs + ip_len);
+	return 0;
+}
+
+static bool tcphdr_ok(struct sk_buff *skb)
+{
+	int th_ofs = skb_transport_offset(skb);
+	int tcp_len;
+
+	if (unlikely(!pskb_may_pull(skb, th_ofs + sizeof(struct tcphdr))))
+		return false;
+
+	tcp_len = tcp_hdrlen(skb);
+	if (unlikely(tcp_len < sizeof(struct tcphdr) ||
+		     skb->len < th_ofs + tcp_len))
+		return false;
+
+	return true;
+}
+
+static bool udphdr_ok(struct sk_buff *skb)
+{
+	return pskb_may_pull(skb, skb_transport_offset(skb) +
+				  sizeof(struct udphdr));
+}
+
+static bool sctphdr_ok(struct sk_buff *skb)
+{
+	return pskb_may_pull(skb, skb_transport_offset(skb) +
+				  sizeof(struct sctphdr));
+}
+
+static bool icmphdr_ok(struct sk_buff *skb)
+{
+	return pskb_may_pull(skb, skb_transport_offset(skb) +
+				  sizeof(struct icmphdr));
+}
+
+static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
+{
+	unsigned int nh_ofs = skb_network_offset(skb);
+	unsigned int nh_len;
+	int payload_ofs;
+	struct ipv6hdr *nh;
+	uint8_t nexthdr;
+	__be16 frag_off;
+	int err;
+
+	err = check_header(skb, nh_ofs + sizeof(*nh));
+	if (unlikely(err))
+		return err;
+
+	nh = ipv6_hdr(skb);
+	nexthdr = nh->nexthdr;
+	payload_ofs = (u8 *)(nh + 1) - skb->data;
+
+	key->ip.proto = NEXTHDR_NONE;
+	key->ip.tos = ipv6_get_dsfield(nh);
+	key->ip.ttl = nh->hop_limit;
+	key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
+	key->ipv6.addr.src = nh->saddr;
+	key->ipv6.addr.dst = nh->daddr;
+
+	payload_ofs = ipv6_skip_exthdr(skb, payload_ofs, &nexthdr, &frag_off);
+	if (unlikely(payload_ofs < 0))
+		return -EINVAL;
+
+	if (frag_off) {
+		if (frag_off & htons(~0x7))
+			key->ip.frag = OVS_FRAG_TYPE_LATER;
+		else
+			key->ip.frag = OVS_FRAG_TYPE_FIRST;
+	}
+
+	nh_len = payload_ofs - nh_ofs;
+	skb_set_transport_header(skb, nh_ofs + nh_len);
+	key->ip.proto = nexthdr;
+	return nh_len;
+}
+
+static bool icmp6hdr_ok(struct sk_buff *skb)
+{
+	return pskb_may_pull(skb, skb_transport_offset(skb) +
+				  sizeof(struct icmp6hdr));
+}
+
+static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
+{
+	struct qtag_prefix {
+		__be16 eth_type; /* ETH_P_8021Q */
+		__be16 tci;
+	};
+	struct qtag_prefix *qp;
+
+	if (unlikely(skb->len < sizeof(struct qtag_prefix) + sizeof(__be16)))
+		return 0;
+
+	if (unlikely(!pskb_may_pull(skb, sizeof(struct qtag_prefix) +
+					 sizeof(__be16))))
+		return -ENOMEM;
+
+	qp = (struct qtag_prefix *) skb->data;
+	key->eth.tci = qp->tci | htons(VLAN_TAG_PRESENT);
+	__skb_pull(skb, sizeof(struct qtag_prefix));
+
+	return 0;
+}
+
+static __be16 parse_ethertype(struct sk_buff *skb)
+{
+	struct llc_snap_hdr {
+		u8  dsap;  /* Always 0xAA */
+		u8  ssap;  /* Always 0xAA */
+		u8  ctrl;
+		u8  oui[3];
+		__be16 ethertype;
+	};
+	struct llc_snap_hdr *llc;
+	__be16 proto;
+
+	proto = *(__be16 *) skb->data;
+	__skb_pull(skb, sizeof(__be16));
+
+	if (ntohs(proto) >= ETH_P_802_3_MIN)
+		return proto;
+
+	if (skb->len < sizeof(struct llc_snap_hdr))
+		return htons(ETH_P_802_2);
+
+	if (unlikely(!pskb_may_pull(skb, sizeof(struct llc_snap_hdr))))
+		return htons(0);
+
+	llc = (struct llc_snap_hdr *) skb->data;
+	if (llc->dsap != LLC_SAP_SNAP ||
+	    llc->ssap != LLC_SAP_SNAP ||
+	    (llc->oui[0] | llc->oui[1] | llc->oui[2]) != 0)
+		return htons(ETH_P_802_2);
+
+	__skb_pull(skb, sizeof(struct llc_snap_hdr));
+
+	if (ntohs(llc->ethertype) >= ETH_P_802_3_MIN)
+		return llc->ethertype;
+
+	return htons(ETH_P_802_2);
+}
+
+static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
+			int nh_len)
+{
+	struct icmp6hdr *icmp = icmp6_hdr(skb);
+
+	/* The ICMPv6 type and code fields use the 16-bit transport port
+	 * fields, so we need to store them in 16-bit network byte order.
+	 */
+	key->tp.src = htons(icmp->icmp6_type);
+	key->tp.dst = htons(icmp->icmp6_code);
+
+	if (icmp->icmp6_code == 0 &&
+	    (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
+	     icmp->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT)) {
+		int icmp_len = skb->len - skb_transport_offset(skb);
+		struct nd_msg *nd;
+		int offset;
+
+		/* In order to process neighbor discovery options, we need the
+		 * entire packet.
+		 */
+		if (unlikely(icmp_len < sizeof(*nd)))
+			return 0;
+
+		if (unlikely(skb_linearize(skb)))
+			return -ENOMEM;
+
+		nd = (struct nd_msg *)skb_transport_header(skb);
+		key->ipv6.nd.target = nd->target;
+
+		icmp_len -= sizeof(*nd);
+		offset = 0;
+		while (icmp_len >= 8) {
+			struct nd_opt_hdr *nd_opt =
+				 (struct nd_opt_hdr *)(nd->opt + offset);
+			int opt_len = nd_opt->nd_opt_len * 8;
+
+			if (unlikely(!opt_len || opt_len > icmp_len))
+				return 0;
+
+			/* Store the link layer address if the appropriate
+			 * option is provided.  It is considered an error if
+			 * the same link layer option is specified twice.
+			 */
+			if (nd_opt->nd_opt_type == ND_OPT_SOURCE_LL_ADDR
+			    && opt_len == 8) {
+				if (unlikely(!is_zero_ether_addr(key->ipv6.nd.sll)))
+					goto invalid;
+				ether_addr_copy(key->ipv6.nd.sll,
+						&nd->opt[offset+sizeof(*nd_opt)]);
+			} else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR
+				   && opt_len == 8) {
+				if (unlikely(!is_zero_ether_addr(key->ipv6.nd.tll)))
+					goto invalid;
+				ether_addr_copy(key->ipv6.nd.tll,
+						&nd->opt[offset+sizeof(*nd_opt)]);
+			}
+
+			icmp_len -= opt_len;
+			offset += opt_len;
+		}
+	}
+
+	return 0;
+
+invalid:
+	memset(&key->ipv6.nd.target, 0, sizeof(key->ipv6.nd.target));
+	memset(key->ipv6.nd.sll, 0, sizeof(key->ipv6.nd.sll));
+	memset(key->ipv6.nd.tll, 0, sizeof(key->ipv6.nd.tll));
+
+	return 0;
+}
+
+/**
+ * ovs_flow_extract - extracts a flow key from an Ethernet frame.
+ * @skb: sk_buff that contains the frame, with skb->data pointing to the
+ * Ethernet header
+ * @in_port: port number on which @skb was received.
+ * @key: output flow key
+ *
+ * The caller must ensure that skb->len >= ETH_HLEN.
+ *
+ * Returns 0 if successful, otherwise a negative errno value.
+ *
+ * Initializes @skb header pointers as follows:
+ *
+ *    - skb->mac_header: the Ethernet header.
+ *
+ *    - skb->network_header: just past the Ethernet header, or just past the
+ *      VLAN header, to the first byte of the Ethernet payload.
+ *
+ *    - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6
+ *      on output, then just past the IP header, if one is present and
+ *      of a correct length, otherwise the same as skb->network_header.
+ *      For other key->eth.type values it is left untouched.
+ */
+int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
+{
+	int error;
+	struct ethhdr *eth;
+
+	memset(key, 0, sizeof(*key));
+
+	key->phy.priority = skb->priority;
+	if (OVS_CB(skb)->tun_key)
+		memcpy(&key->tun_key, OVS_CB(skb)->tun_key, sizeof(key->tun_key));
+	key->phy.in_port = in_port;
+	key->phy.skb_mark = skb->mark;
+
+	skb_reset_mac_header(skb);
+
+	/* Link layer.  We are guaranteed to have at least the 14 byte Ethernet
+	 * header in the linear data area.
+	 */
+	eth = eth_hdr(skb);
+	ether_addr_copy(key->eth.src, eth->h_source);
+	ether_addr_copy(key->eth.dst, eth->h_dest);
+
+	__skb_pull(skb, 2 * ETH_ALEN);
+	/* We are going to push all headers that we pull, so no need to
+	 * update skb->csum here.
+	 */
+
+	if (vlan_tx_tag_present(skb))
+		key->eth.tci = htons(skb->vlan_tci);
+	else if (eth->h_proto == htons(ETH_P_8021Q))
+		if (unlikely(parse_vlan(skb, key)))
+			return -ENOMEM;
+
+	key->eth.type = parse_ethertype(skb);
+	if (unlikely(key->eth.type == htons(0)))
+		return -ENOMEM;
+
+	skb_reset_network_header(skb);
+	__skb_push(skb, skb->data - skb_mac_header(skb));
+
+	/* Network layer. */
+	if (key->eth.type == htons(ETH_P_IP)) {
+		struct iphdr *nh;
+		__be16 offset;
+
+		error = check_iphdr(skb);
+		if (unlikely(error)) {
+			if (error == -EINVAL) {
+				skb->transport_header = skb->network_header;
+				error = 0;
+			}
+			return error;
+		}
+
+		nh = ip_hdr(skb);
+		key->ipv4.addr.src = nh->saddr;
+		key->ipv4.addr.dst = nh->daddr;
+
+		key->ip.proto = nh->protocol;
+		key->ip.tos = nh->tos;
+		key->ip.ttl = nh->ttl;
+
+		offset = nh->frag_off & htons(IP_OFFSET);
+		if (offset) {
+			key->ip.frag = OVS_FRAG_TYPE_LATER;
+			return 0;
+		}
+		if (nh->frag_off & htons(IP_MF) ||
+			 skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+			key->ip.frag = OVS_FRAG_TYPE_FIRST;
+
+		/* Transport layer. */
+		if (key->ip.proto == IPPROTO_TCP) {
+			if (tcphdr_ok(skb)) {
+				struct tcphdr *tcp = tcp_hdr(skb);
+				key->tp.src = tcp->source;
+				key->tp.dst = tcp->dest;
+				key->tp.flags = TCP_FLAGS_BE16(tcp);
+			}
+		} else if (key->ip.proto == IPPROTO_UDP) {
+			if (udphdr_ok(skb)) {
+				struct udphdr *udp = udp_hdr(skb);
+				key->tp.src = udp->source;
+				key->tp.dst = udp->dest;
+			}
+		} else if (key->ip.proto == IPPROTO_SCTP) {
+			if (sctphdr_ok(skb)) {
+				struct sctphdr *sctp = sctp_hdr(skb);
+				key->tp.src = sctp->source;
+				key->tp.dst = sctp->dest;
+			}
+		} else if (key->ip.proto == IPPROTO_ICMP) {
+			if (icmphdr_ok(skb)) {
+				struct icmphdr *icmp = icmp_hdr(skb);
+				/* The ICMP type and code fields use the 16-bit
+				 * transport port fields, so we need to store
+				 * them in 16-bit network byte order. */
+				key->tp.src = htons(icmp->type);
+				key->tp.dst = htons(icmp->code);
+			}
+		}
+
+	} else if ((key->eth.type == htons(ETH_P_ARP) ||
+		   key->eth.type == htons(ETH_P_RARP)) && arphdr_ok(skb)) {
+		struct arp_eth_header *arp;
+
+		arp = (struct arp_eth_header *)skb_network_header(skb);
+
+		if (arp->ar_hrd == htons(ARPHRD_ETHER)
+				&& arp->ar_pro == htons(ETH_P_IP)
+				&& arp->ar_hln == ETH_ALEN
+				&& arp->ar_pln == 4) {
+
+			/* We only match on the lower 8 bits of the opcode. */
+			if (ntohs(arp->ar_op) <= 0xff)
+				key->ip.proto = ntohs(arp->ar_op);
+			memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src));
+			memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
+			ether_addr_copy(key->ipv4.arp.sha, arp->ar_sha);
+			ether_addr_copy(key->ipv4.arp.tha, arp->ar_tha);
+		}
+	} else if (key->eth.type == htons(ETH_P_IPV6)) {
+		int nh_len;             /* IPv6 Header + Extensions */
+
+		nh_len = parse_ipv6hdr(skb, key);
+		if (unlikely(nh_len < 0)) {
+			if (nh_len == -EINVAL) {
+				skb->transport_header = skb->network_header;
+				error = 0;
+			} else {
+				error = nh_len;
+			}
+			return error;
+		}
+
+		if (key->ip.frag == OVS_FRAG_TYPE_LATER)
+			return 0;
+		if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+			key->ip.frag = OVS_FRAG_TYPE_FIRST;
+
+		/* Transport layer. */
+		if (key->ip.proto == NEXTHDR_TCP) {
+			if (tcphdr_ok(skb)) {
+				struct tcphdr *tcp = tcp_hdr(skb);
+				key->tp.src = tcp->source;
+				key->tp.dst = tcp->dest;
+				key->tp.flags = TCP_FLAGS_BE16(tcp);
+			}
+		} else if (key->ip.proto == NEXTHDR_UDP) {
+			if (udphdr_ok(skb)) {
+				struct udphdr *udp = udp_hdr(skb);
+				key->tp.src = udp->source;
+				key->tp.dst = udp->dest;
+			}
+		} else if (key->ip.proto == NEXTHDR_SCTP) {
+			if (sctphdr_ok(skb)) {
+				struct sctphdr *sctp = sctp_hdr(skb);
+				key->tp.src = sctp->source;
+				key->tp.dst = sctp->dest;
+			}
+		} else if (key->ip.proto == NEXTHDR_ICMP) {
+			if (icmp6hdr_ok(skb)) {
+				error = parse_icmpv6(skb, key, nh_len);
+				if (error)
+					return error;
+			}
+		}
+	}
+
+	return 0;
+}
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
new file mode 100644
index 00000000000..5e5aaed3a85
--- /dev/null
+++ b/net/openvswitch/flow.h
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2007-2014 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef FLOW_H
+#define FLOW_H 1
+
+#include <linux/cache.h>
+#include <linux/kernel.h>
+#include <linux/netlink.h>
+#include <linux/openvswitch.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/if_ether.h>
+#include <linux/in6.h>
+#include <linux/jiffies.h>
+#include <linux/time.h>
+#include <linux/flex_array.h>
+#include <net/inet_ecn.h>
+
+struct sk_buff;
+
+/* Used to memset ovs_key_ipv4_tunnel padding. */
+#define OVS_TUNNEL_KEY_SIZE					\
+	(offsetof(struct ovs_key_ipv4_tunnel, ipv4_ttl) +	\
+	FIELD_SIZEOF(struct ovs_key_ipv4_tunnel, ipv4_ttl))
+
+struct ovs_key_ipv4_tunnel {
+	__be64 tun_id;
+	__be32 ipv4_src;
+	__be32 ipv4_dst;
+	__be16 tun_flags;
+	u8   ipv4_tos;
+	u8   ipv4_ttl;
+} __packed __aligned(4); /* Minimize padding. */
+
+static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key,
+					 const struct iphdr *iph, __be64 tun_id,
+					 __be16 tun_flags)
+{
+	tun_key->tun_id = tun_id;
+	tun_key->ipv4_src = iph->saddr;
+	tun_key->ipv4_dst = iph->daddr;
+	tun_key->ipv4_tos = iph->tos;
+	tun_key->ipv4_ttl = iph->ttl;
+	tun_key->tun_flags = tun_flags;
+
+	/* clear struct padding. */
+	memset((unsigned char *) tun_key + OVS_TUNNEL_KEY_SIZE, 0,
+	       sizeof(*tun_key) - OVS_TUNNEL_KEY_SIZE);
+}
+
+struct sw_flow_key {
+	struct ovs_key_ipv4_tunnel tun_key;  /* Encapsulating tunnel key. */
+	struct {
+		u32	priority;	/* Packet QoS priority. */
+		u32	skb_mark;	/* SKB mark. */
+		u16	in_port;	/* Input switch port (or DP_MAX_PORTS). */
+	} __packed phy; /* Safe when right after 'tun_key'. */
+	struct {
+		u8     src[ETH_ALEN];	/* Ethernet source address. */
+		u8     dst[ETH_ALEN];	/* Ethernet destination address. */
+		__be16 tci;		/* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
+		__be16 type;		/* Ethernet frame type. */
+	} eth;
+	struct {
+		u8     proto;		/* IP protocol or lower 8 bits of ARP opcode. */
+		u8     tos;		/* IP ToS. */
+		u8     ttl;		/* IP TTL/hop limit. */
+		u8     frag;		/* One of OVS_FRAG_TYPE_*. */
+	} ip;
+	struct {
+		__be16 src;		/* TCP/UDP/SCTP source port. */
+		__be16 dst;		/* TCP/UDP/SCTP destination port. */
+		__be16 flags;		/* TCP flags. */
+	} tp;
+	union {
+		struct {
+			struct {
+				__be32 src;	/* IP source address. */
+				__be32 dst;	/* IP destination address. */
+			} addr;
+			struct {
+				u8 sha[ETH_ALEN];	/* ARP source hardware address. */
+				u8 tha[ETH_ALEN];	/* ARP target hardware address. */
+			} arp;
+		} ipv4;
+		struct {
+			struct {
+				struct in6_addr src;	/* IPv6 source address. */
+				struct in6_addr dst;	/* IPv6 destination address. */
+			} addr;
+			__be32 label;			/* IPv6 flow label. */
+			struct {
+				struct in6_addr target;	/* ND target address. */
+				u8 sll[ETH_ALEN];	/* ND source link layer address. */
+				u8 tll[ETH_ALEN];	/* ND target link layer address. */
+			} nd;
+		} ipv6;
+	};
+} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
+
+struct sw_flow_key_range {
+	unsigned short int start;
+	unsigned short int end;
+};
+
+struct sw_flow_mask {
+	int ref_count;
+	struct rcu_head rcu;
+	struct list_head list;
+	struct sw_flow_key_range range;
+	struct sw_flow_key key;
+};
+
+struct sw_flow_match {
+	struct sw_flow_key *key;
+	struct sw_flow_key_range range;
+	struct sw_flow_mask *mask;
+};
+
+struct sw_flow_actions {
+	struct rcu_head rcu;
+	u32 actions_len;
+	struct nlattr actions[];
+};
+
+struct flow_stats {
+	u64 packet_count;		/* Number of packets matched. */
+	u64 byte_count;			/* Number of bytes matched. */
+	unsigned long used;		/* Last used time (in jiffies). */
+	spinlock_t lock;		/* Lock for atomic stats update. */
+	__be16 tcp_flags;		/* Union of seen TCP flags. */
+};
+
+struct sw_flow {
+	struct rcu_head rcu;
+	struct hlist_node hash_node[2];
+	u32 hash;
+	int stats_last_writer;		/* NUMA-node id of the last writer on
+					 * 'stats[0]'.
+					 */
+	struct sw_flow_key key;
+	struct sw_flow_key unmasked_key;
+	struct sw_flow_mask *mask;
+	struct sw_flow_actions __rcu *sf_acts;
+	struct flow_stats __rcu *stats[]; /* One for each NUMA node.  First one
+					   * is allocated at flow creation time,
+					   * the rest are allocated on demand
+					   * while holding the 'stats[0].lock'.
+					   */
+};
+
+struct arp_eth_header {
+	__be16      ar_hrd;	/* format of hardware address   */
+	__be16      ar_pro;	/* format of protocol address   */
+	unsigned char   ar_hln;	/* length of hardware address   */
+	unsigned char   ar_pln;	/* length of protocol address   */
+	__be16      ar_op;	/* ARP opcode (command)     */
+
+	/* Ethernet+IPv4 specific members. */
+	unsigned char       ar_sha[ETH_ALEN];	/* sender hardware address  */
+	unsigned char       ar_sip[4];		/* sender IP address        */
+	unsigned char       ar_tha[ETH_ALEN];	/* target hardware address  */
+	unsigned char       ar_tip[4];		/* target IP address        */
+} __packed;
+
+void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags,
+			   struct sk_buff *);
+void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *,
+			unsigned long *used, __be16 *tcp_flags);
+void ovs_flow_stats_clear(struct sw_flow *);
+u64 ovs_flow_used_time(unsigned long flow_jiffies);
+
+int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *);
+
+#endif /* flow.h */
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
new file mode 100644
index 00000000000..d757848da89
--- /dev/null
+++ b/net/openvswitch/flow_netlink.c
@@ -0,0 +1,1576 @@
+/*
+ * Copyright (c) 2007-2013 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include "flow.h"
+#include "datapath.h"
+#include <linux/uaccess.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <net/llc_pdu.h>
+#include <linux/kernel.h>
+#include <linux/jhash.h>
+#include <linux/jiffies.h>
+#include <linux/llc.h>
+#include <linux/module.h>
+#include <linux/in.h>
+#include <linux/rcupdate.h>
+#include <linux/if_arp.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/sctp.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/rculist.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+
+#include "flow_netlink.h"
+
+static void update_range__(struct sw_flow_match *match,
+			   size_t offset, size_t size, bool is_mask)
+{
+	struct sw_flow_key_range *range = NULL;
+	size_t start = rounddown(offset, sizeof(long));
+	size_t end = roundup(offset + size, sizeof(long));
+
+	if (!is_mask)
+		range = &match->range;
+	else if (match->mask)
+		range = &match->mask->range;
+
+	if (!range)
+		return;
+
+	if (range->start == range->end) {
+		range->start = start;
+		range->end = end;
+		return;
+	}
+
+	if (range->start > start)
+		range->start = start;
+
+	if (range->end < end)
+		range->end = end;
+}
+
+#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
+	do { \
+		update_range__(match, offsetof(struct sw_flow_key, field),  \
+				     sizeof((match)->key->field), is_mask); \
+		if (is_mask) {						    \
+			if ((match)->mask)				    \
+				(match)->mask->key.field = value;	    \
+		} else {                                                    \
+			(match)->key->field = value;		            \
+		}                                                           \
+	} while (0)
+
+#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
+	do { \
+		update_range__(match, offsetof(struct sw_flow_key, field),  \
+				len, is_mask);                              \
+		if (is_mask) {						    \
+			if ((match)->mask)				    \
+				memcpy(&(match)->mask->key.field, value_p, len);\
+		} else {                                                    \
+			memcpy(&(match)->key->field, value_p, len);         \
+		}                                                           \
+	} while (0)
+
+static u16 range_n_bytes(const struct sw_flow_key_range *range)
+{
+	return range->end - range->start;
+}
+
+static bool match_validate(const struct sw_flow_match *match,
+			   u64 key_attrs, u64 mask_attrs)
+{
+	u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET;
+	u64 mask_allowed = key_attrs;  /* At most allow all key attributes */
+
+	/* The following mask attributes allowed only if they
+	 * pass the validation tests. */
+	mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4)
+			| (1 << OVS_KEY_ATTR_IPV6)
+			| (1 << OVS_KEY_ATTR_TCP)
+			| (1 << OVS_KEY_ATTR_TCP_FLAGS)
+			| (1 << OVS_KEY_ATTR_UDP)
+			| (1 << OVS_KEY_ATTR_SCTP)
+			| (1 << OVS_KEY_ATTR_ICMP)
+			| (1 << OVS_KEY_ATTR_ICMPV6)
+			| (1 << OVS_KEY_ATTR_ARP)
+			| (1 << OVS_KEY_ATTR_ND));
+
+	/* Always allowed mask fields. */
+	mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
+		       | (1 << OVS_KEY_ATTR_IN_PORT)
+		       | (1 << OVS_KEY_ATTR_ETHERTYPE));
+
+	/* Check key attributes. */
+	if (match->key->eth.type == htons(ETH_P_ARP)
+			|| match->key->eth.type == htons(ETH_P_RARP)) {
+		key_expected |= 1 << OVS_KEY_ATTR_ARP;
+		if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
+			mask_allowed |= 1 << OVS_KEY_ATTR_ARP;
+	}
+
+	if (match->key->eth.type == htons(ETH_P_IP)) {
+		key_expected |= 1 << OVS_KEY_ATTR_IPV4;
+		if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
+			mask_allowed |= 1 << OVS_KEY_ATTR_IPV4;
+
+		if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
+			if (match->key->ip.proto == IPPROTO_UDP) {
+				key_expected |= 1 << OVS_KEY_ATTR_UDP;
+				if (match->mask && (match->mask->key.ip.proto == 0xff))
+					mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
+			}
+
+			if (match->key->ip.proto == IPPROTO_SCTP) {
+				key_expected |= 1 << OVS_KEY_ATTR_SCTP;
+				if (match->mask && (match->mask->key.ip.proto == 0xff))
+					mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
+			}
+
+			if (match->key->ip.proto == IPPROTO_TCP) {
+				key_expected |= 1 << OVS_KEY_ATTR_TCP;
+				key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
+				if (match->mask && (match->mask->key.ip.proto == 0xff)) {
+					mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
+					mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
+				}
+			}
+
+			if (match->key->ip.proto == IPPROTO_ICMP) {
+				key_expected |= 1 << OVS_KEY_ATTR_ICMP;
+				if (match->mask && (match->mask->key.ip.proto == 0xff))
+					mask_allowed |= 1 << OVS_KEY_ATTR_ICMP;
+			}
+		}
+	}
+
+	if (match->key->eth.type == htons(ETH_P_IPV6)) {
+		key_expected |= 1 << OVS_KEY_ATTR_IPV6;
+		if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
+			mask_allowed |= 1 << OVS_KEY_ATTR_IPV6;
+
+		if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
+			if (match->key->ip.proto == IPPROTO_UDP) {
+				key_expected |= 1 << OVS_KEY_ATTR_UDP;
+				if (match->mask && (match->mask->key.ip.proto == 0xff))
+					mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
+			}
+
+			if (match->key->ip.proto == IPPROTO_SCTP) {
+				key_expected |= 1 << OVS_KEY_ATTR_SCTP;
+				if (match->mask && (match->mask->key.ip.proto == 0xff))
+					mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
+			}
+
+			if (match->key->ip.proto == IPPROTO_TCP) {
+				key_expected |= 1 << OVS_KEY_ATTR_TCP;
+				key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
+				if (match->mask && (match->mask->key.ip.proto == 0xff)) {
+					mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
+					mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
+				}
+			}
+
+			if (match->key->ip.proto == IPPROTO_ICMPV6) {
+				key_expected |= 1 << OVS_KEY_ATTR_ICMPV6;
+				if (match->mask && (match->mask->key.ip.proto == 0xff))
+					mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6;
+
+				if (match->key->tp.src ==
+						htons(NDISC_NEIGHBOUR_SOLICITATION) ||
+				    match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
+					key_expected |= 1 << OVS_KEY_ATTR_ND;
+					if (match->mask && (match->mask->key.tp.src == htons(0xffff)))
+						mask_allowed |= 1 << OVS_KEY_ATTR_ND;
+				}
+			}
+		}
+	}
+
+	if ((key_attrs & key_expected) != key_expected) {
+		/* Key attributes check failed. */
+		OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n",
+				(unsigned long long)key_attrs, (unsigned long long)key_expected);
+		return false;
+	}
+
+	if ((mask_attrs & mask_allowed) != mask_attrs) {
+		/* Mask attributes check failed. */
+		OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n",
+				(unsigned long long)mask_attrs, (unsigned long long)mask_allowed);
+		return false;
+	}
+
+	return true;
+}
+
+/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
+static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
+	[OVS_KEY_ATTR_ENCAP] = -1,
+	[OVS_KEY_ATTR_PRIORITY] = sizeof(u32),
+	[OVS_KEY_ATTR_IN_PORT] = sizeof(u32),
+	[OVS_KEY_ATTR_SKB_MARK] = sizeof(u32),
+	[OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet),
+	[OVS_KEY_ATTR_VLAN] = sizeof(__be16),
+	[OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16),
+	[OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4),
+	[OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
+	[OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
+	[OVS_KEY_ATTR_TCP_FLAGS] = sizeof(__be16),
+	[OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp),
+	[OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp),
+	[OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp),
+	[OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
+	[OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
+	[OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
+	[OVS_KEY_ATTR_TUNNEL] = -1,
+};
+
+static bool is_all_zero(const u8 *fp, size_t size)
+{
+	int i;
+
+	if (!fp)
+		return false;
+
+	for (i = 0; i < size; i++)
+		if (fp[i])
+			return false;
+
+	return true;
+}
+
+static int __parse_flow_nlattrs(const struct nlattr *attr,
+				const struct nlattr *a[],
+				u64 *attrsp, bool nz)
+{
+	const struct nlattr *nla;
+	u64 attrs;
+	int rem;
+
+	attrs = *attrsp;
+	nla_for_each_nested(nla, attr, rem) {
+		u16 type = nla_type(nla);
+		int expected_len;
+
+		if (type > OVS_KEY_ATTR_MAX) {
+			OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n",
+				  type, OVS_KEY_ATTR_MAX);
+			return -EINVAL;
+		}
+
+		if (attrs & (1 << type)) {
+			OVS_NLERR("Duplicate key attribute (type %d).\n", type);
+			return -EINVAL;
+		}
+
+		expected_len = ovs_key_lens[type];
+		if (nla_len(nla) != expected_len && expected_len != -1) {
+			OVS_NLERR("Key attribute has unexpected length (type=%d"
+				  ", length=%d, expected=%d).\n", type,
+				  nla_len(nla), expected_len);
+			return -EINVAL;
+		}
+
+		if (!nz || !is_all_zero(nla_data(nla), expected_len)) {
+			attrs |= 1 << type;
+			a[type] = nla;
+		}
+	}
+	if (rem) {
+		OVS_NLERR("Message has %d unknown bytes.\n", rem);
+		return -EINVAL;
+	}
+
+	*attrsp = attrs;
+	return 0;
+}
+
+static int parse_flow_mask_nlattrs(const struct nlattr *attr,
+				   const struct nlattr *a[], u64 *attrsp)
+{
+	return __parse_flow_nlattrs(attr, a, attrsp, true);
+}
+
+static int parse_flow_nlattrs(const struct nlattr *attr,
+			      const struct nlattr *a[], u64 *attrsp)
+{
+	return __parse_flow_nlattrs(attr, a, attrsp, false);
+}
+
+static int ipv4_tun_from_nlattr(const struct nlattr *attr,
+				struct sw_flow_match *match, bool is_mask)
+{
+	struct nlattr *a;
+	int rem;
+	bool ttl = false;
+	__be16 tun_flags = 0;
+
+	nla_for_each_nested(a, attr, rem) {
+		int type = nla_type(a);
+		static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
+			[OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64),
+			[OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32),
+			[OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32),
+			[OVS_TUNNEL_KEY_ATTR_TOS] = 1,
+			[OVS_TUNNEL_KEY_ATTR_TTL] = 1,
+			[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
+			[OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
+		};
+
+		if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
+			OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n",
+			type, OVS_TUNNEL_KEY_ATTR_MAX);
+			return -EINVAL;
+		}
+
+		if (ovs_tunnel_key_lens[type] != nla_len(a)) {
+			OVS_NLERR("IPv4 tunnel attribute type has unexpected "
+				  " length (type=%d, length=%d, expected=%d).\n",
+				  type, nla_len(a), ovs_tunnel_key_lens[type]);
+			return -EINVAL;
+		}
+
+		switch (type) {
+		case OVS_TUNNEL_KEY_ATTR_ID:
+			SW_FLOW_KEY_PUT(match, tun_key.tun_id,
+					nla_get_be64(a), is_mask);
+			tun_flags |= TUNNEL_KEY;
+			break;
+		case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
+			SW_FLOW_KEY_PUT(match, tun_key.ipv4_src,
+					nla_get_be32(a), is_mask);
+			break;
+		case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
+			SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst,
+					nla_get_be32(a), is_mask);
+			break;
+		case OVS_TUNNEL_KEY_ATTR_TOS:
+			SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos,
+					nla_get_u8(a), is_mask);
+			break;
+		case OVS_TUNNEL_KEY_ATTR_TTL:
+			SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl,
+					nla_get_u8(a), is_mask);
+			ttl = true;
+			break;
+		case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
+			tun_flags |= TUNNEL_DONT_FRAGMENT;
+			break;
+		case OVS_TUNNEL_KEY_ATTR_CSUM:
+			tun_flags |= TUNNEL_CSUM;
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
+
+	if (rem > 0) {
+		OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem);
+		return -EINVAL;
+	}
+
+	if (!is_mask) {
+		if (!match->key->tun_key.ipv4_dst) {
+			OVS_NLERR("IPv4 tunnel destination address is zero.\n");
+			return -EINVAL;
+		}
+
+		if (!ttl) {
+			OVS_NLERR("IPv4 tunnel TTL not specified.\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int ipv4_tun_to_nlattr(struct sk_buff *skb,
+			      const struct ovs_key_ipv4_tunnel *tun_key,
+			      const struct ovs_key_ipv4_tunnel *output)
+{
+	struct nlattr *nla;
+
+	nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
+	if (!nla)
+		return -EMSGSIZE;
+
+	if (output->tun_flags & TUNNEL_KEY &&
+	    nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
+		return -EMSGSIZE;
+	if (output->ipv4_src &&
+		nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src))
+		return -EMSGSIZE;
+	if (output->ipv4_dst &&
+		nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst))
+		return -EMSGSIZE;
+	if (output->ipv4_tos &&
+		nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
+		return -EMSGSIZE;
+	if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl))
+		return -EMSGSIZE;
+	if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
+		nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
+		return -EMSGSIZE;
+	if ((output->tun_flags & TUNNEL_CSUM) &&
+		nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
+		return -EMSGSIZE;
+
+	nla_nest_end(skb, nla);
+	return 0;
+}
+
+
+static int metadata_from_nlattrs(struct sw_flow_match *match,  u64 *attrs,
+				 const struct nlattr **a, bool is_mask)
+{
+	if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
+		SW_FLOW_KEY_PUT(match, phy.priority,
+			  nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
+		*attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY);
+	}
+
+	if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
+		u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
+
+		if (is_mask)
+			in_port = 0xffffffff; /* Always exact match in_port. */
+		else if (in_port >= DP_MAX_PORTS)
+			return -EINVAL;
+
+		SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
+		*attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
+	} else if (!is_mask) {
+		SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
+	}
+
+	if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) {
+		uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
+
+		SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
+		*attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
+	}
+	if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
+		if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
+					 is_mask))
+			return -EINVAL;
+		*attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
+	}
+	return 0;
+}
+
+static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
+				const struct nlattr **a, bool is_mask)
+{
+	int err;
+	u64 orig_attrs = attrs;
+
+	err = metadata_from_nlattrs(match, &attrs, a, is_mask);
+	if (err)
+		return err;
+
+	if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) {
+		const struct ovs_key_ethernet *eth_key;
+
+		eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
+		SW_FLOW_KEY_MEMCPY(match, eth.src,
+				eth_key->eth_src, ETH_ALEN, is_mask);
+		SW_FLOW_KEY_MEMCPY(match, eth.dst,
+				eth_key->eth_dst, ETH_ALEN, is_mask);
+		attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
+	}
+
+	if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
+		__be16 tci;
+
+		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+		if (!(tci & htons(VLAN_TAG_PRESENT))) {
+			if (is_mask)
+				OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n");
+			else
+				OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n");
+
+			return -EINVAL;
+		}
+
+		SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask);
+		attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
+	} else if (!is_mask)
+		SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
+
+	if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
+		__be16 eth_type;
+
+		eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+		if (is_mask) {
+			/* Always exact match EtherType. */
+			eth_type = htons(0xffff);
+		} else if (ntohs(eth_type) < ETH_P_802_3_MIN) {
+			OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n",
+					ntohs(eth_type), ETH_P_802_3_MIN);
+			return -EINVAL;
+		}
+
+		SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
+		attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
+	} else if (!is_mask) {
+		SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
+	}
+
+	if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
+		const struct ovs_key_ipv4 *ipv4_key;
+
+		ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
+		if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
+			OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n",
+				ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
+			return -EINVAL;
+		}
+		SW_FLOW_KEY_PUT(match, ip.proto,
+				ipv4_key->ipv4_proto, is_mask);
+		SW_FLOW_KEY_PUT(match, ip.tos,
+				ipv4_key->ipv4_tos, is_mask);
+		SW_FLOW_KEY_PUT(match, ip.ttl,
+				ipv4_key->ipv4_ttl, is_mask);
+		SW_FLOW_KEY_PUT(match, ip.frag,
+				ipv4_key->ipv4_frag, is_mask);
+		SW_FLOW_KEY_PUT(match, ipv4.addr.src,
+				ipv4_key->ipv4_src, is_mask);
+		SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
+				ipv4_key->ipv4_dst, is_mask);
+		attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
+	}
+
+	if (attrs & (1 << OVS_KEY_ATTR_IPV6)) {
+		const struct ovs_key_ipv6 *ipv6_key;
+
+		ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
+		if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
+			OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n",
+				ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
+			return -EINVAL;
+		}
+		SW_FLOW_KEY_PUT(match, ipv6.label,
+				ipv6_key->ipv6_label, is_mask);
+		SW_FLOW_KEY_PUT(match, ip.proto,
+				ipv6_key->ipv6_proto, is_mask);
+		SW_FLOW_KEY_PUT(match, ip.tos,
+				ipv6_key->ipv6_tclass, is_mask);
+		SW_FLOW_KEY_PUT(match, ip.ttl,
+				ipv6_key->ipv6_hlimit, is_mask);
+		SW_FLOW_KEY_PUT(match, ip.frag,
+				ipv6_key->ipv6_frag, is_mask);
+		SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
+				ipv6_key->ipv6_src,
+				sizeof(match->key->ipv6.addr.src),
+				is_mask);
+		SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
+				ipv6_key->ipv6_dst,
+				sizeof(match->key->ipv6.addr.dst),
+				is_mask);
+
+		attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
+	}
+
+	if (attrs & (1 << OVS_KEY_ATTR_ARP)) {
+		const struct ovs_key_arp *arp_key;
+
+		arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
+		if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
+			OVS_NLERR("Unknown ARP opcode (opcode=%d).\n",
+				  arp_key->arp_op);
+			return -EINVAL;
+		}
+
+		SW_FLOW_KEY_PUT(match, ipv4.addr.src,
+				arp_key->arp_sip, is_mask);
+		SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
+			arp_key->arp_tip, is_mask);
+		SW_FLOW_KEY_PUT(match, ip.proto,
+				ntohs(arp_key->arp_op), is_mask);
+		SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
+				arp_key->arp_sha, ETH_ALEN, is_mask);
+		SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
+				arp_key->arp_tha, ETH_ALEN, is_mask);
+
+		attrs &= ~(1 << OVS_KEY_ATTR_ARP);
+	}
+
+	if (attrs & (1 << OVS_KEY_ATTR_TCP)) {
+		const struct ovs_key_tcp *tcp_key;
+
+		tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
+		SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask);
+		SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask);
+		attrs &= ~(1 << OVS_KEY_ATTR_TCP);
+	}
+
+	if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) {
+		if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
+			SW_FLOW_KEY_PUT(match, tp.flags,
+					nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
+					is_mask);
+		} else {
+			SW_FLOW_KEY_PUT(match, tp.flags,
+					nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
+					is_mask);
+		}
+		attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS);
+	}
+
+	if (attrs & (1 << OVS_KEY_ATTR_UDP)) {
+		const struct ovs_key_udp *udp_key;
+
+		udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
+		SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask);
+		SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask);
+		attrs &= ~(1 << OVS_KEY_ATTR_UDP);
+	}
+
+	if (attrs & (1 << OVS_KEY_ATTR_SCTP)) {
+		const struct ovs_key_sctp *sctp_key;
+
+		sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
+		SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask);
+		SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask);
+		attrs &= ~(1 << OVS_KEY_ATTR_SCTP);
+	}
+
+	if (attrs & (1 << OVS_KEY_ATTR_ICMP)) {
+		const struct ovs_key_icmp *icmp_key;
+
+		icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
+		SW_FLOW_KEY_PUT(match, tp.src,
+				htons(icmp_key->icmp_type), is_mask);
+		SW_FLOW_KEY_PUT(match, tp.dst,
+				htons(icmp_key->icmp_code), is_mask);
+		attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
+	}
+
+	if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) {
+		const struct ovs_key_icmpv6 *icmpv6_key;
+
+		icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
+		SW_FLOW_KEY_PUT(match, tp.src,
+				htons(icmpv6_key->icmpv6_type), is_mask);
+		SW_FLOW_KEY_PUT(match, tp.dst,
+				htons(icmpv6_key->icmpv6_code), is_mask);
+		attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
+	}
+
+	if (attrs & (1 << OVS_KEY_ATTR_ND)) {
+		const struct ovs_key_nd *nd_key;
+
+		nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
+		SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
+			nd_key->nd_target,
+			sizeof(match->key->ipv6.nd.target),
+			is_mask);
+		SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
+			nd_key->nd_sll, ETH_ALEN, is_mask);
+		SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
+				nd_key->nd_tll, ETH_ALEN, is_mask);
+		attrs &= ~(1 << OVS_KEY_ATTR_ND);
+	}
+
+	if (attrs != 0)
+		return -EINVAL;
+
+	return 0;
+}
+
+static void sw_flow_mask_set(struct sw_flow_mask *mask,
+			     struct sw_flow_key_range *range, u8 val)
+{
+	u8 *m = (u8 *)&mask->key + range->start;
+
+	mask->range = *range;
+	memset(m, val, range_n_bytes(range));
+}
+
+/**
+ * ovs_nla_get_match - parses Netlink attributes into a flow key and
+ * mask. In case the 'mask' is NULL, the flow is treated as exact match
+ * flow. Otherwise, it is treated as a wildcarded flow, except the mask
+ * does not include any don't care bit.
+ * @match: receives the extracted flow match information.
+ * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
+ * sequence. The fields should of the packet that triggered the creation
+ * of this flow.
+ * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
+ * attribute specifies the mask field of the wildcarded flow.
+ */
+int ovs_nla_get_match(struct sw_flow_match *match,
+		      const struct nlattr *key,
+		      const struct nlattr *mask)
+{
+	const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
+	const struct nlattr *encap;
+	u64 key_attrs = 0;
+	u64 mask_attrs = 0;
+	bool encap_valid = false;
+	int err;
+
+	err = parse_flow_nlattrs(key, a, &key_attrs);
+	if (err)
+		return err;
+
+	if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
+	    (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
+	    (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) {
+		__be16 tci;
+
+		if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
+		      (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
+			OVS_NLERR("Invalid Vlan frame.\n");
+			return -EINVAL;
+		}
+
+		key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
+		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+		encap = a[OVS_KEY_ATTR_ENCAP];
+		key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
+		encap_valid = true;
+
+		if (tci & htons(VLAN_TAG_PRESENT)) {
+			err = parse_flow_nlattrs(encap, a, &key_attrs);
+			if (err)
+				return err;
+		} else if (!tci) {
+			/* Corner case for truncated 802.1Q header. */
+			if (nla_len(encap)) {
+				OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n");
+				return -EINVAL;
+			}
+		} else {
+			OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n");
+			return  -EINVAL;
+		}
+	}
+
+	err = ovs_key_from_nlattrs(match, key_attrs, a, false);
+	if (err)
+		return err;
+
+	if (mask) {
+		err = parse_flow_mask_nlattrs(mask, a, &mask_attrs);
+		if (err)
+			return err;
+
+		if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP)  {
+			__be16 eth_type = 0;
+			__be16 tci = 0;
+
+			if (!encap_valid) {
+				OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n");
+				return  -EINVAL;
+			}
+
+			mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
+			if (a[OVS_KEY_ATTR_ETHERTYPE])
+				eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+
+			if (eth_type == htons(0xffff)) {
+				mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
+				encap = a[OVS_KEY_ATTR_ENCAP];
+				err = parse_flow_mask_nlattrs(encap, a, &mask_attrs);
+			} else {
+				OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n",
+						ntohs(eth_type));
+				return -EINVAL;
+			}
+
+			if (a[OVS_KEY_ATTR_VLAN])
+				tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+
+			if (!(tci & htons(VLAN_TAG_PRESENT))) {
+				OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci));
+				return -EINVAL;
+			}
+		}
+
+		err = ovs_key_from_nlattrs(match, mask_attrs, a, true);
+		if (err)
+			return err;
+	} else {
+		/* Populate exact match flow's key mask. */
+		if (match->mask)
+			sw_flow_mask_set(match->mask, &match->range, 0xff);
+	}
+
+	if (!match_validate(match, key_attrs, mask_attrs))
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
+ * @flow: Receives extracted in_port, priority, tun_key and skb_mark.
+ * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
+ * sequence.
+ *
+ * This parses a series of Netlink attributes that form a flow key, which must
+ * take the same form accepted by flow_from_nlattrs(), but only enough of it to
+ * get the metadata, that is, the parts of the flow key that cannot be
+ * extracted from the packet itself.
+ */
+
+int ovs_nla_get_flow_metadata(struct sw_flow *flow,
+			      const struct nlattr *attr)
+{
+	struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key;
+	const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
+	u64 attrs = 0;
+	int err;
+	struct sw_flow_match match;
+
+	flow->key.phy.in_port = DP_MAX_PORTS;
+	flow->key.phy.priority = 0;
+	flow->key.phy.skb_mark = 0;
+	memset(tun_key, 0, sizeof(flow->key.tun_key));
+
+	err = parse_flow_nlattrs(attr, a, &attrs);
+	if (err)
+		return -EINVAL;
+
+	memset(&match, 0, sizeof(match));
+	match.key = &flow->key;
+
+	err = metadata_from_nlattrs(&match, &attrs, a, false);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+int ovs_nla_put_flow(const struct sw_flow_key *swkey,
+		     const struct sw_flow_key *output, struct sk_buff *skb)
+{
+	struct ovs_key_ethernet *eth_key;
+	struct nlattr *nla, *encap;
+	bool is_mask = (swkey != output);
+
+	if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
+		goto nla_put_failure;
+
+	if ((swkey->tun_key.ipv4_dst || is_mask) &&
+	    ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key))
+		goto nla_put_failure;
+
+	if (swkey->phy.in_port == DP_MAX_PORTS) {
+		if (is_mask && (output->phy.in_port == 0xffff))
+			if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
+				goto nla_put_failure;
+	} else {
+		u16 upper_u16;
+		upper_u16 = !is_mask ? 0 : 0xffff;
+
+		if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
+				(upper_u16 << 16) | output->phy.in_port))
+			goto nla_put_failure;
+	}
+
+	if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
+		goto nla_put_failure;
+
+	nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
+	if (!nla)
+		goto nla_put_failure;
+
+	eth_key = nla_data(nla);
+	ether_addr_copy(eth_key->eth_src, output->eth.src);
+	ether_addr_copy(eth_key->eth_dst, output->eth.dst);
+
+	if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
+		__be16 eth_type;
+		eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff);
+		if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
+		    nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci))
+			goto nla_put_failure;
+		encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
+		if (!swkey->eth.tci)
+			goto unencap;
+	} else
+		encap = NULL;
+
+	if (swkey->eth.type == htons(ETH_P_802_2)) {
+		/*
+		 * Ethertype 802.2 is represented in the netlink with omitted
+		 * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
+		 * 0xffff in the mask attribute.  Ethertype can also
+		 * be wildcarded.
+		 */
+		if (is_mask && output->eth.type)
+			if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
+						output->eth.type))
+				goto nla_put_failure;
+		goto unencap;
+	}
+
+	if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
+		goto nla_put_failure;
+
+	if (swkey->eth.type == htons(ETH_P_IP)) {
+		struct ovs_key_ipv4 *ipv4_key;
+
+		nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key));
+		if (!nla)
+			goto nla_put_failure;
+		ipv4_key = nla_data(nla);
+		ipv4_key->ipv4_src = output->ipv4.addr.src;
+		ipv4_key->ipv4_dst = output->ipv4.addr.dst;
+		ipv4_key->ipv4_proto = output->ip.proto;
+		ipv4_key->ipv4_tos = output->ip.tos;
+		ipv4_key->ipv4_ttl = output->ip.ttl;
+		ipv4_key->ipv4_frag = output->ip.frag;
+	} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
+		struct ovs_key_ipv6 *ipv6_key;
+
+		nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
+		if (!nla)
+			goto nla_put_failure;
+		ipv6_key = nla_data(nla);
+		memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
+				sizeof(ipv6_key->ipv6_src));
+		memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
+				sizeof(ipv6_key->ipv6_dst));
+		ipv6_key->ipv6_label = output->ipv6.label;
+		ipv6_key->ipv6_proto = output->ip.proto;
+		ipv6_key->ipv6_tclass = output->ip.tos;
+		ipv6_key->ipv6_hlimit = output->ip.ttl;
+		ipv6_key->ipv6_frag = output->ip.frag;
+	} else if (swkey->eth.type == htons(ETH_P_ARP) ||
+		   swkey->eth.type == htons(ETH_P_RARP)) {
+		struct ovs_key_arp *arp_key;
+
+		nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
+		if (!nla)
+			goto nla_put_failure;
+		arp_key = nla_data(nla);
+		memset(arp_key, 0, sizeof(struct ovs_key_arp));
+		arp_key->arp_sip = output->ipv4.addr.src;
+		arp_key->arp_tip = output->ipv4.addr.dst;
+		arp_key->arp_op = htons(output->ip.proto);
+		ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
+		ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
+	}
+
+	if ((swkey->eth.type == htons(ETH_P_IP) ||
+	     swkey->eth.type == htons(ETH_P_IPV6)) &&
+	     swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
+
+		if (swkey->ip.proto == IPPROTO_TCP) {
+			struct ovs_key_tcp *tcp_key;
+
+			nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key));
+			if (!nla)
+				goto nla_put_failure;
+			tcp_key = nla_data(nla);
+			tcp_key->tcp_src = output->tp.src;
+			tcp_key->tcp_dst = output->tp.dst;
+			if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
+					 output->tp.flags))
+				goto nla_put_failure;
+		} else if (swkey->ip.proto == IPPROTO_UDP) {
+			struct ovs_key_udp *udp_key;
+
+			nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key));
+			if (!nla)
+				goto nla_put_failure;
+			udp_key = nla_data(nla);
+			udp_key->udp_src = output->tp.src;
+			udp_key->udp_dst = output->tp.dst;
+		} else if (swkey->ip.proto == IPPROTO_SCTP) {
+			struct ovs_key_sctp *sctp_key;
+
+			nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
+			if (!nla)
+				goto nla_put_failure;
+			sctp_key = nla_data(nla);
+			sctp_key->sctp_src = output->tp.src;
+			sctp_key->sctp_dst = output->tp.dst;
+		} else if (swkey->eth.type == htons(ETH_P_IP) &&
+			   swkey->ip.proto == IPPROTO_ICMP) {
+			struct ovs_key_icmp *icmp_key;
+
+			nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key));
+			if (!nla)
+				goto nla_put_failure;
+			icmp_key = nla_data(nla);
+			icmp_key->icmp_type = ntohs(output->tp.src);
+			icmp_key->icmp_code = ntohs(output->tp.dst);
+		} else if (swkey->eth.type == htons(ETH_P_IPV6) &&
+			   swkey->ip.proto == IPPROTO_ICMPV6) {
+			struct ovs_key_icmpv6 *icmpv6_key;
+
+			nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6,
+						sizeof(*icmpv6_key));
+			if (!nla)
+				goto nla_put_failure;
+			icmpv6_key = nla_data(nla);
+			icmpv6_key->icmpv6_type = ntohs(output->tp.src);
+			icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
+
+			if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
+			    icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
+				struct ovs_key_nd *nd_key;
+
+				nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
+				if (!nla)
+					goto nla_put_failure;
+				nd_key = nla_data(nla);
+				memcpy(nd_key->nd_target, &output->ipv6.nd.target,
+							sizeof(nd_key->nd_target));
+				ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll);
+				ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll);
+			}
+		}
+	}
+
+unencap:
+	if (encap)
+		nla_nest_end(skb, encap);
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+#define MAX_ACTIONS_BUFSIZE	(32 * 1024)
+
+struct sw_flow_actions *ovs_nla_alloc_flow_actions(int size)
+{
+	struct sw_flow_actions *sfa;
+
+	if (size > MAX_ACTIONS_BUFSIZE)
+		return ERR_PTR(-EINVAL);
+
+	sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
+	if (!sfa)
+		return ERR_PTR(-ENOMEM);
+
+	sfa->actions_len = 0;
+	return sfa;
+}
+
+/* Schedules 'sf_acts' to be freed after the next RCU grace period.
+ * The caller must hold rcu_read_lock for this to be sensible. */
+void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
+{
+	kfree_rcu(sf_acts, rcu);
+}
+
+static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
+				       int attr_len)
+{
+
+	struct sw_flow_actions *acts;
+	int new_acts_size;
+	int req_size = NLA_ALIGN(attr_len);
+	int next_offset = offsetof(struct sw_flow_actions, actions) +
+					(*sfa)->actions_len;
+
+	if (req_size <= (ksize(*sfa) - next_offset))
+		goto out;
+
+	new_acts_size = ksize(*sfa) * 2;
+
+	if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
+		if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size)
+			return ERR_PTR(-EMSGSIZE);
+		new_acts_size = MAX_ACTIONS_BUFSIZE;
+	}
+
+	acts = ovs_nla_alloc_flow_actions(new_acts_size);
+	if (IS_ERR(acts))
+		return (void *)acts;
+
+	memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
+	acts->actions_len = (*sfa)->actions_len;
+	kfree(*sfa);
+	*sfa = acts;
+
+out:
+	(*sfa)->actions_len += req_size;
+	return  (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
+}
+
+static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len)
+{
+	struct nlattr *a;
+
+	a = reserve_sfa_size(sfa, nla_attr_size(len));
+	if (IS_ERR(a))
+		return PTR_ERR(a);
+
+	a->nla_type = attrtype;
+	a->nla_len = nla_attr_size(len);
+
+	if (data)
+		memcpy(nla_data(a), data, len);
+	memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
+
+	return 0;
+}
+
+static inline int add_nested_action_start(struct sw_flow_actions **sfa,
+					  int attrtype)
+{
+	int used = (*sfa)->actions_len;
+	int err;
+
+	err = add_action(sfa, attrtype, NULL, 0);
+	if (err)
+		return err;
+
+	return used;
+}
+
+static inline void add_nested_action_end(struct sw_flow_actions *sfa,
+					 int st_offset)
+{
+	struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions +
+							       st_offset);
+
+	a->nla_len = sfa->actions_len - st_offset;
+}
+
+static int validate_and_copy_sample(const struct nlattr *attr,
+				    const struct sw_flow_key *key, int depth,
+				    struct sw_flow_actions **sfa)
+{
+	const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
+	const struct nlattr *probability, *actions;
+	const struct nlattr *a;
+	int rem, start, err, st_acts;
+
+	memset(attrs, 0, sizeof(attrs));
+	nla_for_each_nested(a, attr, rem) {
+		int type = nla_type(a);
+		if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
+			return -EINVAL;
+		attrs[type] = a;
+	}
+	if (rem)
+		return -EINVAL;
+
+	probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
+	if (!probability || nla_len(probability) != sizeof(u32))
+		return -EINVAL;
+
+	actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
+	if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
+		return -EINVAL;
+
+	/* validation done, copy sample action. */
+	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE);
+	if (start < 0)
+		return start;
+	err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY,
+			 nla_data(probability), sizeof(u32));
+	if (err)
+		return err;
+	st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS);
+	if (st_acts < 0)
+		return st_acts;
+
+	err = ovs_nla_copy_actions(actions, key, depth + 1, sfa);
+	if (err)
+		return err;
+
+	add_nested_action_end(*sfa, st_acts);
+	add_nested_action_end(*sfa, start);
+
+	return 0;
+}
+
+static int validate_tp_port(const struct sw_flow_key *flow_key)
+{
+	if ((flow_key->eth.type == htons(ETH_P_IP) ||
+	     flow_key->eth.type == htons(ETH_P_IPV6)) &&
+	    (flow_key->tp.src || flow_key->tp.dst))
+		return 0;
+
+	return -EINVAL;
+}
+
+void ovs_match_init(struct sw_flow_match *match,
+		    struct sw_flow_key *key,
+		    struct sw_flow_mask *mask)
+{
+	memset(match, 0, sizeof(*match));
+	match->key = key;
+	match->mask = mask;
+
+	memset(key, 0, sizeof(*key));
+
+	if (mask) {
+		memset(&mask->key, 0, sizeof(mask->key));
+		mask->range.start = mask->range.end = 0;
+	}
+}
+
+static int validate_and_copy_set_tun(const struct nlattr *attr,
+				     struct sw_flow_actions **sfa)
+{
+	struct sw_flow_match match;
+	struct sw_flow_key key;
+	int err, start;
+
+	ovs_match_init(&match, &key, NULL);
+	err = ipv4_tun_from_nlattr(nla_data(attr), &match, false);
+	if (err)
+		return err;
+
+	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET);
+	if (start < 0)
+		return start;
+
+	err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key,
+			sizeof(match.key->tun_key));
+	add_nested_action_end(*sfa, start);
+
+	return err;
+}
+
+static int validate_set(const struct nlattr *a,
+			const struct sw_flow_key *flow_key,
+			struct sw_flow_actions **sfa,
+			bool *set_tun)
+{
+	const struct nlattr *ovs_key = nla_data(a);
+	int key_type = nla_type(ovs_key);
+
+	/* There can be only one key in a action */
+	if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
+		return -EINVAL;
+
+	if (key_type > OVS_KEY_ATTR_MAX ||
+	    (ovs_key_lens[key_type] != nla_len(ovs_key) &&
+	     ovs_key_lens[key_type] != -1))
+		return -EINVAL;
+
+	switch (key_type) {
+	const struct ovs_key_ipv4 *ipv4_key;
+	const struct ovs_key_ipv6 *ipv6_key;
+	int err;
+
+	case OVS_KEY_ATTR_PRIORITY:
+	case OVS_KEY_ATTR_SKB_MARK:
+	case OVS_KEY_ATTR_ETHERNET:
+		break;
+
+	case OVS_KEY_ATTR_TUNNEL:
+		*set_tun = true;
+		err = validate_and_copy_set_tun(a, sfa);
+		if (err)
+			return err;
+		break;
+
+	case OVS_KEY_ATTR_IPV4:
+		if (flow_key->eth.type != htons(ETH_P_IP))
+			return -EINVAL;
+
+		if (!flow_key->ip.proto)
+			return -EINVAL;
+
+		ipv4_key = nla_data(ovs_key);
+		if (ipv4_key->ipv4_proto != flow_key->ip.proto)
+			return -EINVAL;
+
+		if (ipv4_key->ipv4_frag != flow_key->ip.frag)
+			return -EINVAL;
+
+		break;
+
+	case OVS_KEY_ATTR_IPV6:
+		if (flow_key->eth.type != htons(ETH_P_IPV6))
+			return -EINVAL;
+
+		if (!flow_key->ip.proto)
+			return -EINVAL;
+
+		ipv6_key = nla_data(ovs_key);
+		if (ipv6_key->ipv6_proto != flow_key->ip.proto)
+			return -EINVAL;
+
+		if (ipv6_key->ipv6_frag != flow_key->ip.frag)
+			return -EINVAL;
+
+		if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
+			return -EINVAL;
+
+		break;
+
+	case OVS_KEY_ATTR_TCP:
+		if (flow_key->ip.proto != IPPROTO_TCP)
+			return -EINVAL;
+
+		return validate_tp_port(flow_key);
+
+	case OVS_KEY_ATTR_UDP:
+		if (flow_key->ip.proto != IPPROTO_UDP)
+			return -EINVAL;
+
+		return validate_tp_port(flow_key);
+
+	case OVS_KEY_ATTR_SCTP:
+		if (flow_key->ip.proto != IPPROTO_SCTP)
+			return -EINVAL;
+
+		return validate_tp_port(flow_key);
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int validate_userspace(const struct nlattr *attr)
+{
+	static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
+		[OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
+		[OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
+	};
+	struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
+	int error;
+
+	error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
+				 attr, userspace_policy);
+	if (error)
+		return error;
+
+	if (!a[OVS_USERSPACE_ATTR_PID] ||
+	    !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int copy_action(const struct nlattr *from,
+		       struct sw_flow_actions **sfa)
+{
+	int totlen = NLA_ALIGN(from->nla_len);
+	struct nlattr *to;
+
+	to = reserve_sfa_size(sfa, from->nla_len);
+	if (IS_ERR(to))
+		return PTR_ERR(to);
+
+	memcpy(to, from, totlen);
+	return 0;
+}
+
+int ovs_nla_copy_actions(const struct nlattr *attr,
+			 const struct sw_flow_key *key,
+			 int depth,
+			 struct sw_flow_actions **sfa)
+{
+	const struct nlattr *a;
+	int rem, err;
+
+	if (depth >= SAMPLE_ACTION_DEPTH)
+		return -EOVERFLOW;
+
+	nla_for_each_nested(a, attr, rem) {
+		/* Expected argument lengths, (u32)-1 for variable length. */
+		static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
+			[OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
+			[OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
+			[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
+			[OVS_ACTION_ATTR_POP_VLAN] = 0,
+			[OVS_ACTION_ATTR_SET] = (u32)-1,
+			[OVS_ACTION_ATTR_SAMPLE] = (u32)-1
+		};
+		const struct ovs_action_push_vlan *vlan;
+		int type = nla_type(a);
+		bool skip_copy;
+
+		if (type > OVS_ACTION_ATTR_MAX ||
+		    (action_lens[type] != nla_len(a) &&
+		     action_lens[type] != (u32)-1))
+			return -EINVAL;
+
+		skip_copy = false;
+		switch (type) {
+		case OVS_ACTION_ATTR_UNSPEC:
+			return -EINVAL;
+
+		case OVS_ACTION_ATTR_USERSPACE:
+			err = validate_userspace(a);
+			if (err)
+				return err;
+			break;
+
+		case OVS_ACTION_ATTR_OUTPUT:
+			if (nla_get_u32(a) >= DP_MAX_PORTS)
+				return -EINVAL;
+			break;
+
+
+		case OVS_ACTION_ATTR_POP_VLAN:
+			break;
+
+		case OVS_ACTION_ATTR_PUSH_VLAN:
+			vlan = nla_data(a);
+			if (vlan->vlan_tpid != htons(ETH_P_8021Q))
+				return -EINVAL;
+			if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
+				return -EINVAL;
+			break;
+
+		case OVS_ACTION_ATTR_SET:
+			err = validate_set(a, key, sfa, &skip_copy);
+			if (err)
+				return err;
+			break;
+
+		case OVS_ACTION_ATTR_SAMPLE:
+			err = validate_and_copy_sample(a, key, depth, sfa);
+			if (err)
+				return err;
+			skip_copy = true;
+			break;
+
+		default:
+			return -EINVAL;
+		}
+		if (!skip_copy) {
+			err = copy_action(a, sfa);
+			if (err)
+				return err;
+		}
+	}
+
+	if (rem > 0)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
+{
+	const struct nlattr *a;
+	struct nlattr *start;
+	int err = 0, rem;
+
+	start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
+	if (!start)
+		return -EMSGSIZE;
+
+	nla_for_each_nested(a, attr, rem) {
+		int type = nla_type(a);
+		struct nlattr *st_sample;
+
+		switch (type) {
+		case OVS_SAMPLE_ATTR_PROBABILITY:
+			if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY,
+				    sizeof(u32), nla_data(a)))
+				return -EMSGSIZE;
+			break;
+		case OVS_SAMPLE_ATTR_ACTIONS:
+			st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
+			if (!st_sample)
+				return -EMSGSIZE;
+			err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
+			if (err)
+				return err;
+			nla_nest_end(skb, st_sample);
+			break;
+		}
+	}
+
+	nla_nest_end(skb, start);
+	return err;
+}
+
+static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
+{
+	const struct nlattr *ovs_key = nla_data(a);
+	int key_type = nla_type(ovs_key);
+	struct nlattr *start;
+	int err;
+
+	switch (key_type) {
+	case OVS_KEY_ATTR_IPV4_TUNNEL:
+		start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
+		if (!start)
+			return -EMSGSIZE;
+
+		err = ipv4_tun_to_nlattr(skb, nla_data(ovs_key),
+					     nla_data(ovs_key));
+		if (err)
+			return err;
+		nla_nest_end(skb, start);
+		break;
+	default:
+		if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
+			return -EMSGSIZE;
+		break;
+	}
+
+	return 0;
+}
+
+int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
+{
+	const struct nlattr *a;
+	int rem, err;
+
+	nla_for_each_attr(a, attr, len, rem) {
+		int type = nla_type(a);
+
+		switch (type) {
+		case OVS_ACTION_ATTR_SET:
+			err = set_action_to_attr(a, skb);
+			if (err)
+				return err;
+			break;
+
+		case OVS_ACTION_ATTR_SAMPLE:
+			err = sample_action_to_attr(a, skb);
+			if (err)
+				return err;
+			break;
+		default:
+			if (nla_put(skb, type, nla_len(a), nla_data(a)))
+				return -EMSGSIZE;
+			break;
+		}
+	}
+
+	return 0;
+}
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
new file mode 100644
index 00000000000..440151045d3
--- /dev/null
+++ b/net/openvswitch/flow_netlink.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2007-2013 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+
+#ifndef FLOW_NETLINK_H
+#define FLOW_NETLINK_H 1
+
+#include <linux/kernel.h>
+#include <linux/netlink.h>
+#include <linux/openvswitch.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/if_ether.h>
+#include <linux/in6.h>
+#include <linux/jiffies.h>
+#include <linux/time.h>
+#include <linux/flex_array.h>
+
+#include <net/inet_ecn.h>
+#include <net/ip_tunnels.h>
+
+#include "flow.h"
+
+void ovs_match_init(struct sw_flow_match *match,
+		    struct sw_flow_key *key, struct sw_flow_mask *mask);
+
+int ovs_nla_put_flow(const struct sw_flow_key *,
+		     const struct sw_flow_key *, struct sk_buff *);
+int ovs_nla_get_flow_metadata(struct sw_flow *flow,
+			      const struct nlattr *attr);
+int ovs_nla_get_match(struct sw_flow_match *match,
+		      const struct nlattr *,
+		      const struct nlattr *);
+
+int ovs_nla_copy_actions(const struct nlattr *attr,
+			 const struct sw_flow_key *key, int depth,
+			 struct sw_flow_actions **sfa);
+int ovs_nla_put_actions(const struct nlattr *attr,
+			int len, struct sk_buff *skb);
+
+struct sw_flow_actions *ovs_nla_alloc_flow_actions(int actions_len);
+void ovs_nla_free_flow_actions(struct sw_flow_actions *);
+
+#endif /* flow_netlink.h */
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
new file mode 100644
index 00000000000..cf2d853646f
--- /dev/null
+++ b/net/openvswitch/flow_table.c
@@ -0,0 +1,647 @@
+/*
+ * Copyright (c) 2007-2013 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include "flow.h"
+#include "datapath.h"
+#include <linux/uaccess.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <net/llc_pdu.h>
+#include <linux/kernel.h>
+#include <linux/hash.h>
+#include <linux/jiffies.h>
+#include <linux/llc.h>
+#include <linux/module.h>
+#include <linux/in.h>
+#include <linux/rcupdate.h>
+#include <linux/if_arp.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/sctp.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/rculist.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+
+#define TBL_MIN_BUCKETS		1024
+#define REHASH_INTERVAL		(10 * 60 * HZ)
+
+static struct kmem_cache *flow_cache;
+struct kmem_cache *flow_stats_cache __read_mostly;
+
+static u16 range_n_bytes(const struct sw_flow_key_range *range)
+{
+	return range->end - range->start;
+}
+
+void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
+		       const struct sw_flow_mask *mask)
+{
+	const long *m = (const long *)((const u8 *)&mask->key +
+				mask->range.start);
+	const long *s = (const long *)((const u8 *)src +
+				mask->range.start);
+	long *d = (long *)((u8 *)dst + mask->range.start);
+	int i;
+
+	/* The memory outside of the 'mask->range' are not set since
+	 * further operations on 'dst' only uses contents within
+	 * 'mask->range'.
+	 */
+	for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long))
+		*d++ = *s++ & *m++;
+}
+
+struct sw_flow *ovs_flow_alloc(void)
+{
+	struct sw_flow *flow;
+	struct flow_stats *stats;
+	int node;
+
+	flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);
+	if (!flow)
+		return ERR_PTR(-ENOMEM);
+
+	flow->sf_acts = NULL;
+	flow->mask = NULL;
+	flow->stats_last_writer = NUMA_NO_NODE;
+
+	/* Initialize the default stat node. */
+	stats = kmem_cache_alloc_node(flow_stats_cache,
+				      GFP_KERNEL | __GFP_ZERO, 0);
+	if (!stats)
+		goto err;
+
+	spin_lock_init(&stats->lock);
+
+	RCU_INIT_POINTER(flow->stats[0], stats);
+
+	for_each_node(node)
+		if (node != 0)
+			RCU_INIT_POINTER(flow->stats[node], NULL);
+
+	return flow;
+err:
+	kmem_cache_free(flow_cache, flow);
+	return ERR_PTR(-ENOMEM);
+}
+
+int ovs_flow_tbl_count(struct flow_table *table)
+{
+	return table->count;
+}
+
+static struct flex_array *alloc_buckets(unsigned int n_buckets)
+{
+	struct flex_array *buckets;
+	int i, err;
+
+	buckets = flex_array_alloc(sizeof(struct hlist_head),
+				   n_buckets, GFP_KERNEL);
+	if (!buckets)
+		return NULL;
+
+	err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL);
+	if (err) {
+		flex_array_free(buckets);
+		return NULL;
+	}
+
+	for (i = 0; i < n_buckets; i++)
+		INIT_HLIST_HEAD((struct hlist_head *)
+					flex_array_get(buckets, i));
+
+	return buckets;
+}
+
+static void flow_free(struct sw_flow *flow)
+{
+	int node;
+
+	kfree((struct sw_flow_actions __force *)flow->sf_acts);
+	for_each_node(node)
+		if (flow->stats[node])
+			kmem_cache_free(flow_stats_cache,
+					(struct flow_stats __force *)flow->stats[node]);
+	kmem_cache_free(flow_cache, flow);
+}
+
+static void rcu_free_flow_callback(struct rcu_head *rcu)
+{
+	struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
+
+	flow_free(flow);
+}
+
+void ovs_flow_free(struct sw_flow *flow, bool deferred)
+{
+	if (!flow)
+		return;
+
+	if (deferred)
+		call_rcu(&flow->rcu, rcu_free_flow_callback);
+	else
+		flow_free(flow);
+}
+
+static void free_buckets(struct flex_array *buckets)
+{
+	flex_array_free(buckets);
+}
+
+
+static void __table_instance_destroy(struct table_instance *ti)
+{
+	free_buckets(ti->buckets);
+	kfree(ti);
+}
+
+static struct table_instance *table_instance_alloc(int new_size)
+{
+	struct table_instance *ti = kmalloc(sizeof(*ti), GFP_KERNEL);
+
+	if (!ti)
+		return NULL;
+
+	ti->buckets = alloc_buckets(new_size);
+
+	if (!ti->buckets) {
+		kfree(ti);
+		return NULL;
+	}
+	ti->n_buckets = new_size;
+	ti->node_ver = 0;
+	ti->keep_flows = false;
+	get_random_bytes(&ti->hash_seed, sizeof(u32));
+
+	return ti;
+}
+
+int ovs_flow_tbl_init(struct flow_table *table)
+{
+	struct table_instance *ti;
+
+	ti = table_instance_alloc(TBL_MIN_BUCKETS);
+
+	if (!ti)
+		return -ENOMEM;
+
+	rcu_assign_pointer(table->ti, ti);
+	INIT_LIST_HEAD(&table->mask_list);
+	table->last_rehash = jiffies;
+	table->count = 0;
+	return 0;
+}
+
+static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
+{
+	struct table_instance *ti = container_of(rcu, struct table_instance, rcu);
+
+	__table_instance_destroy(ti);
+}
+
+static void table_instance_destroy(struct table_instance *ti, bool deferred)
+{
+	int i;
+
+	if (!ti)
+		return;
+
+	if (ti->keep_flows)
+		goto skip_flows;
+
+	for (i = 0; i < ti->n_buckets; i++) {
+		struct sw_flow *flow;
+		struct hlist_head *head = flex_array_get(ti->buckets, i);
+		struct hlist_node *n;
+		int ver = ti->node_ver;
+
+		hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
+			hlist_del_rcu(&flow->hash_node[ver]);
+			ovs_flow_free(flow, deferred);
+		}
+	}
+
+skip_flows:
+	if (deferred)
+		call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
+	else
+		__table_instance_destroy(ti);
+}
+
+void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred)
+{
+	struct table_instance *ti = ovsl_dereference(table->ti);
+
+	table_instance_destroy(ti, deferred);
+}
+
+struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
+				       u32 *bucket, u32 *last)
+{
+	struct sw_flow *flow;
+	struct hlist_head *head;
+	int ver;
+	int i;
+
+	ver = ti->node_ver;
+	while (*bucket < ti->n_buckets) {
+		i = 0;
+		head = flex_array_get(ti->buckets, *bucket);
+		hlist_for_each_entry_rcu(flow, head, hash_node[ver]) {
+			if (i < *last) {
+				i++;
+				continue;
+			}
+			*last = i + 1;
+			return flow;
+		}
+		(*bucket)++;
+		*last = 0;
+	}
+
+	return NULL;
+}
+
+static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash)
+{
+	hash = jhash_1word(hash, ti->hash_seed);
+	return flex_array_get(ti->buckets,
+				(hash & (ti->n_buckets - 1)));
+}
+
+static void table_instance_insert(struct table_instance *ti, struct sw_flow *flow)
+{
+	struct hlist_head *head;
+
+	head = find_bucket(ti, flow->hash);
+	hlist_add_head_rcu(&flow->hash_node[ti->node_ver], head);
+}
+
+static void flow_table_copy_flows(struct table_instance *old,
+				  struct table_instance *new)
+{
+	int old_ver;
+	int i;
+
+	old_ver = old->node_ver;
+	new->node_ver = !old_ver;
+
+	/* Insert in new table. */
+	for (i = 0; i < old->n_buckets; i++) {
+		struct sw_flow *flow;
+		struct hlist_head *head;
+
+		head = flex_array_get(old->buckets, i);
+
+		hlist_for_each_entry(flow, head, hash_node[old_ver])
+			table_instance_insert(new, flow);
+	}
+
+	old->keep_flows = true;
+}
+
+static struct table_instance *table_instance_rehash(struct table_instance *ti,
+					    int n_buckets)
+{
+	struct table_instance *new_ti;
+
+	new_ti = table_instance_alloc(n_buckets);
+	if (!new_ti)
+		return NULL;
+
+	flow_table_copy_flows(ti, new_ti);
+
+	return new_ti;
+}
+
+int ovs_flow_tbl_flush(struct flow_table *flow_table)
+{
+	struct table_instance *old_ti;
+	struct table_instance *new_ti;
+
+	old_ti = ovsl_dereference(flow_table->ti);
+	new_ti = table_instance_alloc(TBL_MIN_BUCKETS);
+	if (!new_ti)
+		return -ENOMEM;
+
+	rcu_assign_pointer(flow_table->ti, new_ti);
+	flow_table->last_rehash = jiffies;
+	flow_table->count = 0;
+
+	table_instance_destroy(old_ti, true);
+	return 0;
+}
+
+static u32 flow_hash(const struct sw_flow_key *key, int key_start,
+		     int key_end)
+{
+	const u32 *hash_key = (const u32 *)((const u8 *)key + key_start);
+	int hash_u32s = (key_end - key_start) >> 2;
+
+	/* Make sure number of hash bytes are multiple of u32. */
+	BUILD_BUG_ON(sizeof(long) % sizeof(u32));
+
+	return arch_fast_hash2(hash_key, hash_u32s, 0);
+}
+
+static int flow_key_start(const struct sw_flow_key *key)
+{
+	if (key->tun_key.ipv4_dst)
+		return 0;
+	else
+		return rounddown(offsetof(struct sw_flow_key, phy),
+					  sizeof(long));
+}
+
+static bool cmp_key(const struct sw_flow_key *key1,
+		    const struct sw_flow_key *key2,
+		    int key_start, int key_end)
+{
+	const long *cp1 = (const long *)((const u8 *)key1 + key_start);
+	const long *cp2 = (const long *)((const u8 *)key2 + key_start);
+	long diffs = 0;
+	int i;
+
+	for (i = key_start; i < key_end;  i += sizeof(long))
+		diffs |= *cp1++ ^ *cp2++;
+
+	return diffs == 0;
+}
+
+static bool flow_cmp_masked_key(const struct sw_flow *flow,
+				const struct sw_flow_key *key,
+				int key_start, int key_end)
+{
+	return cmp_key(&flow->key, key, key_start, key_end);
+}
+
+bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
+			       struct sw_flow_match *match)
+{
+	struct sw_flow_key *key = match->key;
+	int key_start = flow_key_start(key);
+	int key_end = match->range.end;
+
+	return cmp_key(&flow->unmasked_key, key, key_start, key_end);
+}
+
+static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
+					  const struct sw_flow_key *unmasked,
+					  struct sw_flow_mask *mask)
+{
+	struct sw_flow *flow;
+	struct hlist_head *head;
+	int key_start = mask->range.start;
+	int key_end = mask->range.end;
+	u32 hash;
+	struct sw_flow_key masked_key;
+
+	ovs_flow_mask_key(&masked_key, unmasked, mask);
+	hash = flow_hash(&masked_key, key_start, key_end);
+	head = find_bucket(ti, hash);
+	hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) {
+		if (flow->mask == mask && flow->hash == hash &&
+		    flow_cmp_masked_key(flow, &masked_key,
+					  key_start, key_end))
+			return flow;
+	}
+	return NULL;
+}
+
+struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
+				    const struct sw_flow_key *key,
+				    u32 *n_mask_hit)
+{
+	struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
+	struct sw_flow_mask *mask;
+	struct sw_flow *flow;
+
+	*n_mask_hit = 0;
+	list_for_each_entry_rcu(mask, &tbl->mask_list, list) {
+		(*n_mask_hit)++;
+		flow = masked_flow_lookup(ti, key, mask);
+		if (flow)  /* Found */
+			return flow;
+	}
+	return NULL;
+}
+
+struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
+				    const struct sw_flow_key *key)
+{
+	u32 __always_unused n_mask_hit;
+
+	return ovs_flow_tbl_lookup_stats(tbl, key, &n_mask_hit);
+}
+
+struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
+					  struct sw_flow_match *match)
+{
+	struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
+	struct sw_flow_mask *mask;
+	struct sw_flow *flow;
+
+	/* Always called under ovs-mutex. */
+	list_for_each_entry(mask, &tbl->mask_list, list) {
+		flow = masked_flow_lookup(ti, match->key, mask);
+		if (flow && ovs_flow_cmp_unmasked_key(flow, match))  /* Found */
+			return flow;
+	}
+	return NULL;
+}
+
+int ovs_flow_tbl_num_masks(const struct flow_table *table)
+{
+	struct sw_flow_mask *mask;
+	int num = 0;
+
+	list_for_each_entry(mask, &table->mask_list, list)
+		num++;
+
+	return num;
+}
+
+static struct table_instance *table_instance_expand(struct table_instance *ti)
+{
+	return table_instance_rehash(ti, ti->n_buckets * 2);
+}
+
+/* Remove 'mask' from the mask list, if it is not needed any more. */
+static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
+{
+	if (mask) {
+		/* ovs-lock is required to protect mask-refcount and
+		 * mask list.
+		 */
+		ASSERT_OVSL();
+		BUG_ON(!mask->ref_count);
+		mask->ref_count--;
+
+		if (!mask->ref_count) {
+			list_del_rcu(&mask->list);
+			kfree_rcu(mask, rcu);
+		}
+	}
+}
+
+/* Must be called with OVS mutex held. */
+void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
+{
+	struct table_instance *ti = ovsl_dereference(table->ti);
+
+	BUG_ON(table->count == 0);
+	hlist_del_rcu(&flow->hash_node[ti->node_ver]);
+	table->count--;
+
+	/* RCU delete the mask. 'flow->mask' is not NULLed, as it should be
+	 * accessible as long as the RCU read lock is held.
+	 */
+	flow_mask_remove(table, flow->mask);
+}
+
+static struct sw_flow_mask *mask_alloc(void)
+{
+	struct sw_flow_mask *mask;
+
+	mask = kmalloc(sizeof(*mask), GFP_KERNEL);
+	if (mask)
+		mask->ref_count = 1;
+
+	return mask;
+}
+
+static bool mask_equal(const struct sw_flow_mask *a,
+		       const struct sw_flow_mask *b)
+{
+	const u8 *a_ = (const u8 *)&a->key + a->range.start;
+	const u8 *b_ = (const u8 *)&b->key + b->range.start;
+
+	return  (a->range.end == b->range.end)
+		&& (a->range.start == b->range.start)
+		&& (memcmp(a_, b_, range_n_bytes(&a->range)) == 0);
+}
+
+static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
+					   const struct sw_flow_mask *mask)
+{
+	struct list_head *ml;
+
+	list_for_each(ml, &tbl->mask_list) {
+		struct sw_flow_mask *m;
+		m = container_of(ml, struct sw_flow_mask, list);
+		if (mask_equal(mask, m))
+			return m;
+	}
+
+	return NULL;
+}
+
+/* Add 'mask' into the mask list, if it is not already there. */
+static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
+			    struct sw_flow_mask *new)
+{
+	struct sw_flow_mask *mask;
+	mask = flow_mask_find(tbl, new);
+	if (!mask) {
+		/* Allocate a new mask if none exsits. */
+		mask = mask_alloc();
+		if (!mask)
+			return -ENOMEM;
+		mask->key = new->key;
+		mask->range = new->range;
+		list_add_rcu(&mask->list, &tbl->mask_list);
+	} else {
+		BUG_ON(!mask->ref_count);
+		mask->ref_count++;
+	}
+
+	flow->mask = mask;
+	return 0;
+}
+
+/* Must be called with OVS mutex held. */
+int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
+			struct sw_flow_mask *mask)
+{
+	struct table_instance *new_ti = NULL;
+	struct table_instance *ti;
+	int err;
+
+	err = flow_mask_insert(table, flow, mask);
+	if (err)
+		return err;
+
+	flow->hash = flow_hash(&flow->key, flow->mask->range.start,
+			flow->mask->range.end);
+	ti = ovsl_dereference(table->ti);
+	table_instance_insert(ti, flow);
+	table->count++;
+
+	/* Expand table, if necessary, to make room. */
+	if (table->count > ti->n_buckets)
+		new_ti = table_instance_expand(ti);
+	else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL))
+		new_ti = table_instance_rehash(ti, ti->n_buckets);
+
+	if (new_ti) {
+		rcu_assign_pointer(table->ti, new_ti);
+		table_instance_destroy(ti, true);
+		table->last_rehash = jiffies;
+	}
+	return 0;
+}
+
+/* Initializes the flow module.
+ * Returns zero if successful or a negative error code. */
+int ovs_flow_init(void)
+{
+	BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long));
+	BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
+
+	flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow)
+				       + (num_possible_nodes()
+					  * sizeof(struct flow_stats *)),
+				       0, 0, NULL);
+	if (flow_cache == NULL)
+		return -ENOMEM;
+
+	flow_stats_cache
+		= kmem_cache_create("sw_flow_stats", sizeof(struct flow_stats),
+				    0, SLAB_HWCACHE_ALIGN, NULL);
+	if (flow_stats_cache == NULL) {
+		kmem_cache_destroy(flow_cache);
+		flow_cache = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/* Uninitializes the flow module. */
+void ovs_flow_exit(void)
+{
+	kmem_cache_destroy(flow_stats_cache);
+	kmem_cache_destroy(flow_cache);
+}
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
new file mode 100644
index 00000000000..5918bff7f3f
--- /dev/null
+++ b/net/openvswitch/flow_table.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2007-2013 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef FLOW_TABLE_H
+#define FLOW_TABLE_H 1
+
+#include <linux/kernel.h>
+#include <linux/netlink.h>
+#include <linux/openvswitch.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/if_ether.h>
+#include <linux/in6.h>
+#include <linux/jiffies.h>
+#include <linux/time.h>
+#include <linux/flex_array.h>
+
+#include <net/inet_ecn.h>
+#include <net/ip_tunnels.h>
+
+#include "flow.h"
+
+struct table_instance {
+	struct flex_array *buckets;
+	unsigned int n_buckets;
+	struct rcu_head rcu;
+	int node_ver;
+	u32 hash_seed;
+	bool keep_flows;
+};
+
+struct flow_table {
+	struct table_instance __rcu *ti;
+	struct list_head mask_list;
+	unsigned long last_rehash;
+	unsigned int count;
+};
+
+extern struct kmem_cache *flow_stats_cache;
+
+int ovs_flow_init(void);
+void ovs_flow_exit(void);
+
+struct sw_flow *ovs_flow_alloc(void);
+void ovs_flow_free(struct sw_flow *, bool deferred);
+
+int ovs_flow_tbl_init(struct flow_table *);
+int ovs_flow_tbl_count(struct flow_table *table);
+void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred);
+int ovs_flow_tbl_flush(struct flow_table *flow_table);
+
+int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
+			struct sw_flow_mask *mask);
+void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow);
+int  ovs_flow_tbl_num_masks(const struct flow_table *table);
+struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table,
+				       u32 *bucket, u32 *idx);
+struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
+				    const struct sw_flow_key *,
+				    u32 *n_mask_hit);
+struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
+				    const struct sw_flow_key *);
+struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
+					  struct sw_flow_match *match);
+bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
+			       struct sw_flow_match *match);
+
+void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
+		       const struct sw_flow_mask *mask);
+#endif /* flow_table.h */
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
new file mode 100644
index 00000000000..f49148a07da
--- /dev/null
+++ b/net/openvswitch/vport-gre.c
@@ -0,0 +1,287 @@
+/*
+ * Copyright (c) 2007-2013 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/if.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/if_tunnel.h>
+#include <linux/if_vlan.h>
+#include <linux/in.h>
+#include <linux/in_route.h>
+#include <linux/inetdevice.h>
+#include <linux/jhash.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/workqueue.h>
+#include <linux/rculist.h>
+#include <net/route.h>
+#include <net/xfrm.h>
+
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/ip_tunnels.h>
+#include <net/gre.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/protocol.h>
+
+#include "datapath.h"
+#include "vport.h"
+
+/* Returns the least-significant 32 bits of a __be64. */
+static __be32 be64_get_low32(__be64 x)
+{
+#ifdef __BIG_ENDIAN
+	return (__force __be32)x;
+#else
+	return (__force __be32)((__force u64)x >> 32);
+#endif
+}
+
+static __be16 filter_tnl_flags(__be16 flags)
+{
+	return flags & (TUNNEL_CSUM | TUNNEL_KEY);
+}
+
+static struct sk_buff *__build_header(struct sk_buff *skb,
+				      int tunnel_hlen)
+{
+	const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
+	struct tnl_ptk_info tpi;
+
+	skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM));
+	if (IS_ERR(skb))
+		return NULL;
+
+	tpi.flags = filter_tnl_flags(tun_key->tun_flags);
+	tpi.proto = htons(ETH_P_TEB);
+	tpi.key = be64_get_low32(tun_key->tun_id);
+	tpi.seq = 0;
+	gre_build_header(skb, &tpi, tunnel_hlen);
+
+	return skb;
+}
+
+static __be64 key_to_tunnel_id(__be32 key, __be32 seq)
+{
+#ifdef __BIG_ENDIAN
+	return (__force __be64)((__force u64)seq << 32 | (__force u32)key);
+#else
+	return (__force __be64)((__force u64)key << 32 | (__force u32)seq);
+#endif
+}
+
+/* Called with rcu_read_lock and BH disabled. */
+static int gre_rcv(struct sk_buff *skb,
+		   const struct tnl_ptk_info *tpi)
+{
+	struct ovs_key_ipv4_tunnel tun_key;
+	struct ovs_net *ovs_net;
+	struct vport *vport;
+	__be64 key;
+
+	ovs_net = net_generic(dev_net(skb->dev), ovs_net_id);
+	vport = rcu_dereference(ovs_net->vport_net.gre_vport);
+	if (unlikely(!vport))
+		return PACKET_REJECT;
+
+	key = key_to_tunnel_id(tpi->key, tpi->seq);
+	ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key,
+			      filter_tnl_flags(tpi->flags));
+
+	ovs_vport_receive(vport, skb, &tun_key);
+	return PACKET_RCVD;
+}
+
+/* Called with rcu_read_lock and BH disabled. */
+static int gre_err(struct sk_buff *skb, u32 info,
+		   const struct tnl_ptk_info *tpi)
+{
+	struct ovs_net *ovs_net;
+	struct vport *vport;
+
+	ovs_net = net_generic(dev_net(skb->dev), ovs_net_id);
+	vport = rcu_dereference(ovs_net->vport_net.gre_vport);
+
+	if (unlikely(!vport))
+		return PACKET_REJECT;
+	else
+		return PACKET_RCVD;
+}
+
+static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
+{
+	struct net *net = ovs_dp_get_net(vport->dp);
+	struct flowi4 fl;
+	struct rtable *rt;
+	int min_headroom;
+	int tunnel_hlen;
+	__be16 df;
+	int err;
+
+	if (unlikely(!OVS_CB(skb)->tun_key)) {
+		err = -EINVAL;
+		goto error;
+	}
+
+	/* Route lookup */
+	memset(&fl, 0, sizeof(fl));
+	fl.daddr = OVS_CB(skb)->tun_key->ipv4_dst;
+	fl.saddr = OVS_CB(skb)->tun_key->ipv4_src;
+	fl.flowi4_tos = RT_TOS(OVS_CB(skb)->tun_key->ipv4_tos);
+	fl.flowi4_mark = skb->mark;
+	fl.flowi4_proto = IPPROTO_GRE;
+
+	rt = ip_route_output_key(net, &fl);
+	if (IS_ERR(rt))
+		return PTR_ERR(rt);
+
+	tunnel_hlen = ip_gre_calc_hlen(OVS_CB(skb)->tun_key->tun_flags);
+
+	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
+			+ tunnel_hlen + sizeof(struct iphdr)
+			+ (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+	if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
+		int head_delta = SKB_DATA_ALIGN(min_headroom -
+						skb_headroom(skb) +
+						16);
+		err = pskb_expand_head(skb, max_t(int, head_delta, 0),
+					0, GFP_ATOMIC);
+		if (unlikely(err))
+			goto err_free_rt;
+	}
+
+	if (vlan_tx_tag_present(skb)) {
+		if (unlikely(!__vlan_put_tag(skb,
+					     skb->vlan_proto,
+					     vlan_tx_tag_get(skb)))) {
+			err = -ENOMEM;
+			goto err_free_rt;
+		}
+		skb->vlan_tci = 0;
+	}
+
+	/* Push Tunnel header. */
+	skb = __build_header(skb, tunnel_hlen);
+	if (unlikely(!skb)) {
+		err = 0;
+		goto err_free_rt;
+	}
+
+	df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
+		htons(IP_DF) : 0;
+
+	skb->ignore_df = 1;
+
+	return iptunnel_xmit(skb->sk, rt, skb, fl.saddr,
+			     OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE,
+			     OVS_CB(skb)->tun_key->ipv4_tos,
+			     OVS_CB(skb)->tun_key->ipv4_ttl, df, false);
+err_free_rt:
+	ip_rt_put(rt);
+error:
+	return err;
+}
+
+static struct gre_cisco_protocol gre_protocol = {
+	.handler        = gre_rcv,
+	.err_handler    = gre_err,
+	.priority       = 1,
+};
+
+static int gre_ports;
+static int gre_init(void)
+{
+	int err;
+
+	gre_ports++;
+	if (gre_ports > 1)
+		return 0;
+
+	err = gre_cisco_register(&gre_protocol);
+	if (err)
+		pr_warn("cannot register gre protocol handler\n");
+
+	return err;
+}
+
+static void gre_exit(void)
+{
+	gre_ports--;
+	if (gre_ports > 0)
+		return;
+
+	gre_cisco_unregister(&gre_protocol);
+}
+
+static const char *gre_get_name(const struct vport *vport)
+{
+	return vport_priv(vport);
+}
+
+static struct vport *gre_create(const struct vport_parms *parms)
+{
+	struct net *net = ovs_dp_get_net(parms->dp);
+	struct ovs_net *ovs_net;
+	struct vport *vport;
+	int err;
+
+	err = gre_init();
+	if (err)
+		return ERR_PTR(err);
+
+	ovs_net = net_generic(net, ovs_net_id);
+	if (ovsl_dereference(ovs_net->vport_net.gre_vport)) {
+		vport = ERR_PTR(-EEXIST);
+		goto error;
+	}
+
+	vport = ovs_vport_alloc(IFNAMSIZ, &ovs_gre_vport_ops, parms);
+	if (IS_ERR(vport))
+		goto error;
+
+	strncpy(vport_priv(vport), parms->name, IFNAMSIZ);
+	rcu_assign_pointer(ovs_net->vport_net.gre_vport, vport);
+	return vport;
+
+error:
+	gre_exit();
+	return vport;
+}
+
+static void gre_tnl_destroy(struct vport *vport)
+{
+	struct net *net = ovs_dp_get_net(vport->dp);
+	struct ovs_net *ovs_net;
+
+	ovs_net = net_generic(net, ovs_net_id);
+
+	RCU_INIT_POINTER(ovs_net->vport_net.gre_vport, NULL);
+	ovs_vport_deferred_free(vport);
+	gre_exit();
+}
+
+const struct vport_ops ovs_gre_vport_ops = {
+	.type		= OVS_VPORT_TYPE_GRE,
+	.create		= gre_create,
+	.destroy	= gre_tnl_destroy,
+	.get_name	= gre_get_name,
+	.send		= gre_tnl_send,
+};
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
new file mode 100644
index 00000000000..789af9280e7
--- /dev/null
+++ b/net/openvswitch/vport-internal_dev.c
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2007-2012 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include <linux/hardirq.h>
+#include <linux/if_vlan.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/skbuff.h>
+
+#include <net/dst.h>
+#include <net/xfrm.h>
+
+#include "datapath.h"
+#include "vport-internal_dev.h"
+#include "vport-netdev.h"
+
+struct internal_dev {
+	struct vport *vport;
+};
+
+static struct internal_dev *internal_dev_priv(struct net_device *netdev)
+{
+	return netdev_priv(netdev);
+}
+
+/* This function is only called by the kernel network layer.*/
+static struct rtnl_link_stats64 *internal_dev_get_stats(struct net_device *netdev,
+							struct rtnl_link_stats64 *stats)
+{
+	struct vport *vport = ovs_internal_dev_get_vport(netdev);
+	struct ovs_vport_stats vport_stats;
+
+	ovs_vport_get_stats(vport, &vport_stats);
+
+	/* The tx and rx stats need to be swapped because the
+	 * switch and host OS have opposite perspectives. */
+	stats->rx_packets	= vport_stats.tx_packets;
+	stats->tx_packets	= vport_stats.rx_packets;
+	stats->rx_bytes		= vport_stats.tx_bytes;
+	stats->tx_bytes		= vport_stats.rx_bytes;
+	stats->rx_errors	= vport_stats.tx_errors;
+	stats->tx_errors	= vport_stats.rx_errors;
+	stats->rx_dropped	= vport_stats.tx_dropped;
+	stats->tx_dropped	= vport_stats.rx_dropped;
+
+	return stats;
+}
+
+/* Called with rcu_read_lock_bh. */
+static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+	rcu_read_lock();
+	ovs_vport_receive(internal_dev_priv(netdev)->vport, skb, NULL);
+	rcu_read_unlock();
+	return 0;
+}
+
+static int internal_dev_open(struct net_device *netdev)
+{
+	netif_start_queue(netdev);
+	return 0;
+}
+
+static int internal_dev_stop(struct net_device *netdev)
+{
+	netif_stop_queue(netdev);
+	return 0;
+}
+
+static void internal_dev_getinfo(struct net_device *netdev,
+				 struct ethtool_drvinfo *info)
+{
+	strlcpy(info->driver, "openvswitch", sizeof(info->driver));
+}
+
+static const struct ethtool_ops internal_dev_ethtool_ops = {
+	.get_drvinfo	= internal_dev_getinfo,
+	.get_link	= ethtool_op_get_link,
+};
+
+static int internal_dev_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	if (new_mtu < 68)
+		return -EINVAL;
+
+	netdev->mtu = new_mtu;
+	return 0;
+}
+
+static void internal_dev_destructor(struct net_device *dev)
+{
+	struct vport *vport = ovs_internal_dev_get_vport(dev);
+
+	ovs_vport_free(vport);
+	free_netdev(dev);
+}
+
+static const struct net_device_ops internal_dev_netdev_ops = {
+	.ndo_open = internal_dev_open,
+	.ndo_stop = internal_dev_stop,
+	.ndo_start_xmit = internal_dev_xmit,
+	.ndo_set_mac_address = eth_mac_addr,
+	.ndo_change_mtu = internal_dev_change_mtu,
+	.ndo_get_stats64 = internal_dev_get_stats,
+};
+
+static void do_setup(struct net_device *netdev)
+{
+	ether_setup(netdev);
+
+	netdev->netdev_ops = &internal_dev_netdev_ops;
+
+	netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
+	netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+	netdev->destructor = internal_dev_destructor;
+	netdev->ethtool_ops = &internal_dev_ethtool_ops;
+	netdev->tx_queue_len = 0;
+
+	netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST |
+			   NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE;
+
+	netdev->vlan_features = netdev->features;
+	netdev->features |= NETIF_F_HW_VLAN_CTAG_TX;
+	netdev->hw_features = netdev->features & ~NETIF_F_LLTX;
+	eth_hw_addr_random(netdev);
+}
+
+static struct vport *internal_dev_create(const struct vport_parms *parms)
+{
+	struct vport *vport;
+	struct netdev_vport *netdev_vport;
+	struct internal_dev *internal_dev;
+	int err;
+
+	vport = ovs_vport_alloc(sizeof(struct netdev_vport),
+				&ovs_internal_vport_ops, parms);
+	if (IS_ERR(vport)) {
+		err = PTR_ERR(vport);
+		goto error;
+	}
+
+	netdev_vport = netdev_vport_priv(vport);
+
+	netdev_vport->dev = alloc_netdev(sizeof(struct internal_dev),
+					 parms->name, do_setup);
+	if (!netdev_vport->dev) {
+		err = -ENOMEM;
+		goto error_free_vport;
+	}
+
+	dev_net_set(netdev_vport->dev, ovs_dp_get_net(vport->dp));
+	internal_dev = internal_dev_priv(netdev_vport->dev);
+	internal_dev->vport = vport;
+
+	/* Restrict bridge port to current netns. */
+	if (vport->port_no == OVSP_LOCAL)
+		netdev_vport->dev->features |= NETIF_F_NETNS_LOCAL;
+
+	rtnl_lock();
+	err = register_netdevice(netdev_vport->dev);
+	if (err)
+		goto error_free_netdev;
+
+	dev_set_promiscuity(netdev_vport->dev, 1);
+	rtnl_unlock();
+	netif_start_queue(netdev_vport->dev);
+
+	return vport;
+
+error_free_netdev:
+	rtnl_unlock();
+	free_netdev(netdev_vport->dev);
+error_free_vport:
+	ovs_vport_free(vport);
+error:
+	return ERR_PTR(err);
+}
+
+static void internal_dev_destroy(struct vport *vport)
+{
+	struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+
+	netif_stop_queue(netdev_vport->dev);
+	rtnl_lock();
+	dev_set_promiscuity(netdev_vport->dev, -1);
+
+	/* unregister_netdevice() waits for an RCU grace period. */
+	unregister_netdevice(netdev_vport->dev);
+
+	rtnl_unlock();
+}
+
+static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
+{
+	struct net_device *netdev = netdev_vport_priv(vport)->dev;
+	int len;
+
+	len = skb->len;
+
+	skb_dst_drop(skb);
+	nf_reset(skb);
+	secpath_reset(skb);
+
+	skb->dev = netdev;
+	skb->pkt_type = PACKET_HOST;
+	skb->protocol = eth_type_trans(skb, netdev);
+	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+
+	netif_rx(skb);
+
+	return len;
+}
+
+const struct vport_ops ovs_internal_vport_ops = {
+	.type		= OVS_VPORT_TYPE_INTERNAL,
+	.create		= internal_dev_create,
+	.destroy	= internal_dev_destroy,
+	.get_name	= ovs_netdev_get_name,
+	.send		= internal_dev_recv,
+};
+
+int ovs_is_internal_dev(const struct net_device *netdev)
+{
+	return netdev->netdev_ops == &internal_dev_netdev_ops;
+}
+
+struct vport *ovs_internal_dev_get_vport(struct net_device *netdev)
+{
+	if (!ovs_is_internal_dev(netdev))
+		return NULL;
+
+	return internal_dev_priv(netdev)->vport;
+}
diff --git a/net/openvswitch/vport-internal_dev.h b/net/openvswitch/vport-internal_dev.h
new file mode 100644
index 00000000000..9a7d30ecc6a
--- /dev/null
+++ b/net/openvswitch/vport-internal_dev.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2007-2011 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef VPORT_INTERNAL_DEV_H
+#define VPORT_INTERNAL_DEV_H 1
+
+#include "datapath.h"
+#include "vport.h"
+
+int ovs_is_internal_dev(const struct net_device *);
+struct vport *ovs_internal_dev_get_vport(struct net_device *);
+
+#endif /* vport-internal_dev.h */
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
new file mode 100644
index 00000000000..d21f77d875b
--- /dev/null
+++ b/net/openvswitch/vport-netdev.c
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2007-2012 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/if_arp.h>
+#include <linux/if_bridge.h>
+#include <linux/if_vlan.h>
+#include <linux/kernel.h>
+#include <linux/llc.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+#include <linux/openvswitch.h>
+
+#include <net/llc.h>
+
+#include "datapath.h"
+#include "vport-internal_dev.h"
+#include "vport-netdev.h"
+
+/* Must be called with rcu_read_lock. */
+static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
+{
+	if (unlikely(!vport))
+		goto error;
+
+	if (unlikely(skb_warn_if_lro(skb)))
+		goto error;
+
+	/* Make our own copy of the packet.  Otherwise we will mangle the
+	 * packet for anyone who came before us (e.g. tcpdump via AF_PACKET).
+	 */
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (unlikely(!skb))
+		return;
+
+	skb_push(skb, ETH_HLEN);
+	ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
+
+	ovs_vport_receive(vport, skb, NULL);
+	return;
+
+error:
+	kfree_skb(skb);
+}
+
+/* Called with rcu_read_lock and bottom-halves disabled. */
+static rx_handler_result_t netdev_frame_hook(struct sk_buff **pskb)
+{
+	struct sk_buff *skb = *pskb;
+	struct vport *vport;
+
+	if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
+		return RX_HANDLER_PASS;
+
+	vport = ovs_netdev_get_vport(skb->dev);
+
+	netdev_port_receive(vport, skb);
+
+	return RX_HANDLER_CONSUMED;
+}
+
+static struct net_device *get_dpdev(struct datapath *dp)
+{
+	struct vport *local;
+
+	local = ovs_vport_ovsl(dp, OVSP_LOCAL);
+	BUG_ON(!local);
+	return netdev_vport_priv(local)->dev;
+}
+
+static struct vport *netdev_create(const struct vport_parms *parms)
+{
+	struct vport *vport;
+	struct netdev_vport *netdev_vport;
+	int err;
+
+	vport = ovs_vport_alloc(sizeof(struct netdev_vport),
+				&ovs_netdev_vport_ops, parms);
+	if (IS_ERR(vport)) {
+		err = PTR_ERR(vport);
+		goto error;
+	}
+
+	netdev_vport = netdev_vport_priv(vport);
+
+	netdev_vport->dev = dev_get_by_name(ovs_dp_get_net(vport->dp), parms->name);
+	if (!netdev_vport->dev) {
+		err = -ENODEV;
+		goto error_free_vport;
+	}
+
+	if (netdev_vport->dev->flags & IFF_LOOPBACK ||
+	    netdev_vport->dev->type != ARPHRD_ETHER ||
+	    ovs_is_internal_dev(netdev_vport->dev)) {
+		err = -EINVAL;
+		goto error_put;
+	}
+
+	rtnl_lock();
+	err = netdev_master_upper_dev_link(netdev_vport->dev,
+					   get_dpdev(vport->dp));
+	if (err)
+		goto error_unlock;
+
+	err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook,
+					 vport);
+	if (err)
+		goto error_master_upper_dev_unlink;
+
+	dev_set_promiscuity(netdev_vport->dev, 1);
+	netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH;
+	rtnl_unlock();
+
+	return vport;
+
+error_master_upper_dev_unlink:
+	netdev_upper_dev_unlink(netdev_vport->dev, get_dpdev(vport->dp));
+error_unlock:
+	rtnl_unlock();
+error_put:
+	dev_put(netdev_vport->dev);
+error_free_vport:
+	ovs_vport_free(vport);
+error:
+	return ERR_PTR(err);
+}
+
+static void free_port_rcu(struct rcu_head *rcu)
+{
+	struct netdev_vport *netdev_vport = container_of(rcu,
+					struct netdev_vport, rcu);
+
+	dev_put(netdev_vport->dev);
+	ovs_vport_free(vport_from_priv(netdev_vport));
+}
+
+void ovs_netdev_detach_dev(struct vport *vport)
+{
+	struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+
+	ASSERT_RTNL();
+	netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
+	netdev_rx_handler_unregister(netdev_vport->dev);
+	netdev_upper_dev_unlink(netdev_vport->dev,
+				netdev_master_upper_dev_get(netdev_vport->dev));
+	dev_set_promiscuity(netdev_vport->dev, -1);
+}
+
+static void netdev_destroy(struct vport *vport)
+{
+	struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+
+	rtnl_lock();
+	if (netdev_vport->dev->priv_flags & IFF_OVS_DATAPATH)
+		ovs_netdev_detach_dev(vport);
+	rtnl_unlock();
+
+	call_rcu(&netdev_vport->rcu, free_port_rcu);
+}
+
+const char *ovs_netdev_get_name(const struct vport *vport)
+{
+	const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+	return netdev_vport->dev->name;
+}
+
+static unsigned int packet_length(const struct sk_buff *skb)
+{
+	unsigned int length = skb->len - ETH_HLEN;
+
+	if (skb->protocol == htons(ETH_P_8021Q))
+		length -= VLAN_HLEN;
+
+	return length;
+}
+
+static int netdev_send(struct vport *vport, struct sk_buff *skb)
+{
+	struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+	int mtu = netdev_vport->dev->mtu;
+	int len;
+
+	if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) {
+		net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
+				     netdev_vport->dev->name,
+				     packet_length(skb), mtu);
+		goto drop;
+	}
+
+	skb->dev = netdev_vport->dev;
+	len = skb->len;
+	dev_queue_xmit(skb);
+
+	return len;
+
+drop:
+	kfree_skb(skb);
+	return 0;
+}
+
+/* Returns null if this device is not attached to a datapath. */
+struct vport *ovs_netdev_get_vport(struct net_device *dev)
+{
+	if (likely(dev->priv_flags & IFF_OVS_DATAPATH))
+		return (struct vport *)
+			rcu_dereference_rtnl(dev->rx_handler_data);
+	else
+		return NULL;
+}
+
+const struct vport_ops ovs_netdev_vport_ops = {
+	.type		= OVS_VPORT_TYPE_NETDEV,
+	.create		= netdev_create,
+	.destroy	= netdev_destroy,
+	.get_name	= ovs_netdev_get_name,
+	.send		= netdev_send,
+};
diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h
new file mode 100644
index 00000000000..8df01c1127e
--- /dev/null
+++ b/net/openvswitch/vport-netdev.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2007-2011 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef VPORT_NETDEV_H
+#define VPORT_NETDEV_H 1
+
+#include <linux/netdevice.h>
+#include <linux/rcupdate.h>
+
+#include "vport.h"
+
+struct vport *ovs_netdev_get_vport(struct net_device *dev);
+
+struct netdev_vport {
+	struct rcu_head rcu;
+
+	struct net_device *dev;
+};
+
+static inline struct netdev_vport *
+netdev_vport_priv(const struct vport *vport)
+{
+	return vport_priv(vport);
+}
+
+const char *ovs_netdev_get_name(const struct vport *);
+void ovs_netdev_detach_dev(struct vport *);
+
+#endif /* vport_netdev.h */
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
new file mode 100644
index 00000000000..0edbd95c60e
--- /dev/null
+++ b/net/openvswitch/vport-vxlan.c
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2013 Nicira, Inc.
+ * Copyright (c) 2013 Cisco Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/net.h>
+#include <linux/rculist.h>
+#include <linux/udp.h>
+
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/udp.h>
+#include <net/ip_tunnels.h>
+#include <net/rtnetlink.h>
+#include <net/route.h>
+#include <net/dsfield.h>
+#include <net/inet_ecn.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/vxlan.h>
+
+#include "datapath.h"
+#include "vport.h"
+
+/**
+ * struct vxlan_port - Keeps track of open UDP ports
+ * @vs: vxlan_sock created for the port.
+ * @name: vport name.
+ */
+struct vxlan_port {
+	struct vxlan_sock *vs;
+	char name[IFNAMSIZ];
+};
+
+static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
+{
+	return vport_priv(vport);
+}
+
+/* Called with rcu_read_lock and BH disabled. */
+static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
+{
+	struct ovs_key_ipv4_tunnel tun_key;
+	struct vport *vport = vs->data;
+	struct iphdr *iph;
+	__be64 key;
+
+	/* Save outer tunnel values */
+	iph = ip_hdr(skb);
+	key = cpu_to_be64(ntohl(vx_vni) >> 8);
+	ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY);
+
+	ovs_vport_receive(vport, skb, &tun_key);
+}
+
+static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
+{
+	struct vxlan_port *vxlan_port = vxlan_vport(vport);
+	__be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
+
+	if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
+		return -EMSGSIZE;
+	return 0;
+}
+
+static void vxlan_tnl_destroy(struct vport *vport)
+{
+	struct vxlan_port *vxlan_port = vxlan_vport(vport);
+
+	vxlan_sock_release(vxlan_port->vs);
+
+	ovs_vport_deferred_free(vport);
+}
+
+static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
+{
+	struct net *net = ovs_dp_get_net(parms->dp);
+	struct nlattr *options = parms->options;
+	struct vxlan_port *vxlan_port;
+	struct vxlan_sock *vs;
+	struct vport *vport;
+	struct nlattr *a;
+	u16 dst_port;
+	int err;
+
+	if (!options) {
+		err = -EINVAL;
+		goto error;
+	}
+	a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
+	if (a && nla_len(a) == sizeof(u16)) {
+		dst_port = nla_get_u16(a);
+	} else {
+		/* Require destination port from userspace. */
+		err = -EINVAL;
+		goto error;
+	}
+
+	vport = ovs_vport_alloc(sizeof(struct vxlan_port),
+				&ovs_vxlan_vport_ops, parms);
+	if (IS_ERR(vport))
+		return vport;
+
+	vxlan_port = vxlan_vport(vport);
+	strncpy(vxlan_port->name, parms->name, IFNAMSIZ);
+
+	vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, 0);
+	if (IS_ERR(vs)) {
+		ovs_vport_free(vport);
+		return (void *)vs;
+	}
+	vxlan_port->vs = vs;
+
+	return vport;
+
+error:
+	return ERR_PTR(err);
+}
+
+static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
+{
+	struct net *net = ovs_dp_get_net(vport->dp);
+	struct vxlan_port *vxlan_port = vxlan_vport(vport);
+	__be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
+	struct rtable *rt;
+	struct flowi4 fl;
+	__be16 src_port;
+	int port_min;
+	int port_max;
+	__be16 df;
+	int err;
+
+	if (unlikely(!OVS_CB(skb)->tun_key)) {
+		err = -EINVAL;
+		goto error;
+	}
+
+	/* Route lookup */
+	memset(&fl, 0, sizeof(fl));
+	fl.daddr = OVS_CB(skb)->tun_key->ipv4_dst;
+	fl.saddr = OVS_CB(skb)->tun_key->ipv4_src;
+	fl.flowi4_tos = RT_TOS(OVS_CB(skb)->tun_key->ipv4_tos);
+	fl.flowi4_mark = skb->mark;
+	fl.flowi4_proto = IPPROTO_UDP;
+
+	rt = ip_route_output_key(net, &fl);
+	if (IS_ERR(rt)) {
+		err = PTR_ERR(rt);
+		goto error;
+	}
+
+	df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
+		htons(IP_DF) : 0;
+
+	skb->ignore_df = 1;
+
+	inet_get_local_port_range(net, &port_min, &port_max);
+	src_port = vxlan_src_port(port_min, port_max, skb);
+
+	err = vxlan_xmit_skb(vxlan_port->vs, rt, skb,
+			     fl.saddr, OVS_CB(skb)->tun_key->ipv4_dst,
+			     OVS_CB(skb)->tun_key->ipv4_tos,
+			     OVS_CB(skb)->tun_key->ipv4_ttl, df,
+			     src_port, dst_port,
+			     htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8),
+			     false);
+	if (err < 0)
+		ip_rt_put(rt);
+error:
+	return err;
+}
+
+static const char *vxlan_get_name(const struct vport *vport)
+{
+	struct vxlan_port *vxlan_port = vxlan_vport(vport);
+	return vxlan_port->name;
+}
+
+const struct vport_ops ovs_vxlan_vport_ops = {
+	.type		= OVS_VPORT_TYPE_VXLAN,
+	.create		= vxlan_tnl_create,
+	.destroy	= vxlan_tnl_destroy,
+	.get_name	= vxlan_get_name,
+	.get_options	= vxlan_get_options,
+	.send		= vxlan_tnl_send,
+};
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
new file mode 100644
index 00000000000..42c0f4a0b78
--- /dev/null
+++ b/net/openvswitch/vport.c
@@ -0,0 +1,433 @@
+/*
+ * Copyright (c) 2007-2012 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/if.h>
+#include <linux/if_vlan.h>
+#include <linux/jhash.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+#include <linux/rtnetlink.h>
+#include <linux/compat.h>
+#include <net/net_namespace.h>
+
+#include "datapath.h"
+#include "vport.h"
+#include "vport-internal_dev.h"
+
+static void ovs_vport_record_error(struct vport *,
+				   enum vport_err_type err_type);
+
+/* List of statically compiled vport implementations.  Don't forget to also
+ * add yours to the list at the bottom of vport.h. */
+static const struct vport_ops *vport_ops_list[] = {
+	&ovs_netdev_vport_ops,
+	&ovs_internal_vport_ops,
+
+#ifdef CONFIG_OPENVSWITCH_GRE
+	&ovs_gre_vport_ops,
+#endif
+#ifdef CONFIG_OPENVSWITCH_VXLAN
+	&ovs_vxlan_vport_ops,
+#endif
+};
+
+/* Protected by RCU read lock for reading, ovs_mutex for writing. */
+static struct hlist_head *dev_table;
+#define VPORT_HASH_BUCKETS 1024
+
+/**
+ *	ovs_vport_init - initialize vport subsystem
+ *
+ * Called at module load time to initialize the vport subsystem.
+ */
+int ovs_vport_init(void)
+{
+	dev_table = kzalloc(VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
+			    GFP_KERNEL);
+	if (!dev_table)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ *	ovs_vport_exit - shutdown vport subsystem
+ *
+ * Called at module exit time to shutdown the vport subsystem.
+ */
+void ovs_vport_exit(void)
+{
+	kfree(dev_table);
+}
+
+static struct hlist_head *hash_bucket(struct net *net, const char *name)
+{
+	unsigned int hash = jhash(name, strlen(name), (unsigned long) net);
+	return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)];
+}
+
+/**
+ *	ovs_vport_locate - find a port that has already been created
+ *
+ * @name: name of port to find
+ *
+ * Must be called with ovs or RCU read lock.
+ */
+struct vport *ovs_vport_locate(struct net *net, const char *name)
+{
+	struct hlist_head *bucket = hash_bucket(net, name);
+	struct vport *vport;
+
+	hlist_for_each_entry_rcu(vport, bucket, hash_node)
+		if (!strcmp(name, vport->ops->get_name(vport)) &&
+		    net_eq(ovs_dp_get_net(vport->dp), net))
+			return vport;
+
+	return NULL;
+}
+
+/**
+ *	ovs_vport_alloc - allocate and initialize new vport
+ *
+ * @priv_size: Size of private data area to allocate.
+ * @ops: vport device ops
+ *
+ * Allocate and initialize a new vport defined by @ops.  The vport will contain
+ * a private data area of size @priv_size that can be accessed using
+ * vport_priv().  vports that are no longer needed should be released with
+ * vport_free().
+ */
+struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
+			  const struct vport_parms *parms)
+{
+	struct vport *vport;
+	size_t alloc_size;
+
+	alloc_size = sizeof(struct vport);
+	if (priv_size) {
+		alloc_size = ALIGN(alloc_size, VPORT_ALIGN);
+		alloc_size += priv_size;
+	}
+
+	vport = kzalloc(alloc_size, GFP_KERNEL);
+	if (!vport)
+		return ERR_PTR(-ENOMEM);
+
+	vport->dp = parms->dp;
+	vport->port_no = parms->port_no;
+	vport->upcall_portid = parms->upcall_portid;
+	vport->ops = ops;
+	INIT_HLIST_NODE(&vport->dp_hash_node);
+
+	vport->percpu_stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+	if (!vport->percpu_stats) {
+		kfree(vport);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	spin_lock_init(&vport->stats_lock);
+
+	return vport;
+}
+
+/**
+ *	ovs_vport_free - uninitialize and free vport
+ *
+ * @vport: vport to free
+ *
+ * Frees a vport allocated with vport_alloc() when it is no longer needed.
+ *
+ * The caller must ensure that an RCU grace period has passed since the last
+ * time @vport was in a datapath.
+ */
+void ovs_vport_free(struct vport *vport)
+{
+	free_percpu(vport->percpu_stats);
+	kfree(vport);
+}
+
+/**
+ *	ovs_vport_add - add vport device (for kernel callers)
+ *
+ * @parms: Information about new vport.
+ *
+ * Creates a new vport with the specified configuration (which is dependent on
+ * device type).  ovs_mutex must be held.
+ */
+struct vport *ovs_vport_add(const struct vport_parms *parms)
+{
+	struct vport *vport;
+	int err = 0;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) {
+		if (vport_ops_list[i]->type == parms->type) {
+			struct hlist_head *bucket;
+
+			vport = vport_ops_list[i]->create(parms);
+			if (IS_ERR(vport)) {
+				err = PTR_ERR(vport);
+				goto out;
+			}
+
+			bucket = hash_bucket(ovs_dp_get_net(vport->dp),
+					     vport->ops->get_name(vport));
+			hlist_add_head_rcu(&vport->hash_node, bucket);
+			return vport;
+		}
+	}
+
+	err = -EAFNOSUPPORT;
+
+out:
+	return ERR_PTR(err);
+}
+
+/**
+ *	ovs_vport_set_options - modify existing vport device (for kernel callers)
+ *
+ * @vport: vport to modify.
+ * @options: New configuration.
+ *
+ * Modifies an existing device with the specified configuration (which is
+ * dependent on device type).  ovs_mutex must be held.
+ */
+int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
+{
+	if (!vport->ops->set_options)
+		return -EOPNOTSUPP;
+	return vport->ops->set_options(vport, options);
+}
+
+/**
+ *	ovs_vport_del - delete existing vport device
+ *
+ * @vport: vport to delete.
+ *
+ * Detaches @vport from its datapath and destroys it.  It is possible to fail
+ * for reasons such as lack of memory.  ovs_mutex must be held.
+ */
+void ovs_vport_del(struct vport *vport)
+{
+	ASSERT_OVSL();
+
+	hlist_del_rcu(&vport->hash_node);
+
+	vport->ops->destroy(vport);
+}
+
+/**
+ *	ovs_vport_get_stats - retrieve device stats
+ *
+ * @vport: vport from which to retrieve the stats
+ * @stats: location to store stats
+ *
+ * Retrieves transmit, receive, and error stats for the given device.
+ *
+ * Must be called with ovs_mutex or rcu_read_lock.
+ */
+void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
+{
+	int i;
+
+	memset(stats, 0, sizeof(*stats));
+
+	/* We potentially have 2 sources of stats that need to be combined:
+	 * those we have collected (split into err_stats and percpu_stats) from
+	 * set_stats() and device error stats from netdev->get_stats() (for
+	 * errors that happen  downstream and therefore aren't reported through
+	 * our vport_record_error() function).
+	 * Stats from first source are reported by ovs (OVS_VPORT_ATTR_STATS).
+	 * netdev-stats can be directly read over netlink-ioctl.
+	 */
+
+	spin_lock_bh(&vport->stats_lock);
+
+	stats->rx_errors	= vport->err_stats.rx_errors;
+	stats->tx_errors	= vport->err_stats.tx_errors;
+	stats->tx_dropped	= vport->err_stats.tx_dropped;
+	stats->rx_dropped	= vport->err_stats.rx_dropped;
+
+	spin_unlock_bh(&vport->stats_lock);
+
+	for_each_possible_cpu(i) {
+		const struct pcpu_sw_netstats *percpu_stats;
+		struct pcpu_sw_netstats local_stats;
+		unsigned int start;
+
+		percpu_stats = per_cpu_ptr(vport->percpu_stats, i);
+
+		do {
+			start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
+			local_stats = *percpu_stats;
+		} while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
+
+		stats->rx_bytes		+= local_stats.rx_bytes;
+		stats->rx_packets	+= local_stats.rx_packets;
+		stats->tx_bytes		+= local_stats.tx_bytes;
+		stats->tx_packets	+= local_stats.tx_packets;
+	}
+}
+
+/**
+ *	ovs_vport_get_options - retrieve device options
+ *
+ * @vport: vport from which to retrieve the options.
+ * @skb: sk_buff where options should be appended.
+ *
+ * Retrieves the configuration of the given device, appending an
+ * %OVS_VPORT_ATTR_OPTIONS attribute that in turn contains nested
+ * vport-specific attributes to @skb.
+ *
+ * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room, or another
+ * negative error code if a real error occurred.  If an error occurs, @skb is
+ * left unmodified.
+ *
+ * Must be called with ovs_mutex or rcu_read_lock.
+ */
+int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
+{
+	struct nlattr *nla;
+	int err;
+
+	if (!vport->ops->get_options)
+		return 0;
+
+	nla = nla_nest_start(skb, OVS_VPORT_ATTR_OPTIONS);
+	if (!nla)
+		return -EMSGSIZE;
+
+	err = vport->ops->get_options(vport, skb);
+	if (err) {
+		nla_nest_cancel(skb, nla);
+		return err;
+	}
+
+	nla_nest_end(skb, nla);
+	return 0;
+}
+
+/**
+ *	ovs_vport_receive - pass up received packet to the datapath for processing
+ *
+ * @vport: vport that received the packet
+ * @skb: skb that was received
+ * @tun_key: tunnel (if any) that carried packet
+ *
+ * Must be called with rcu_read_lock.  The packet cannot be shared and
+ * skb->data should point to the Ethernet header.
+ */
+void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
+		       struct ovs_key_ipv4_tunnel *tun_key)
+{
+	struct pcpu_sw_netstats *stats;
+
+	stats = this_cpu_ptr(vport->percpu_stats);
+	u64_stats_update_begin(&stats->syncp);
+	stats->rx_packets++;
+	stats->rx_bytes += skb->len;
+	u64_stats_update_end(&stats->syncp);
+
+	OVS_CB(skb)->tun_key = tun_key;
+	ovs_dp_process_received_packet(vport, skb);
+}
+
+/**
+ *	ovs_vport_send - send a packet on a device
+ *
+ * @vport: vport on which to send the packet
+ * @skb: skb to send
+ *
+ * Sends the given packet and returns the length of data sent.  Either ovs
+ * lock or rcu_read_lock must be held.
+ */
+int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
+{
+	int sent = vport->ops->send(vport, skb);
+
+	if (likely(sent > 0)) {
+		struct pcpu_sw_netstats *stats;
+
+		stats = this_cpu_ptr(vport->percpu_stats);
+
+		u64_stats_update_begin(&stats->syncp);
+		stats->tx_packets++;
+		stats->tx_bytes += sent;
+		u64_stats_update_end(&stats->syncp);
+	} else if (sent < 0) {
+		ovs_vport_record_error(vport, VPORT_E_TX_ERROR);
+		kfree_skb(skb);
+	} else
+		ovs_vport_record_error(vport, VPORT_E_TX_DROPPED);
+
+	return sent;
+}
+
+/**
+ *	ovs_vport_record_error - indicate device error to generic stats layer
+ *
+ * @vport: vport that encountered the error
+ * @err_type: one of enum vport_err_type types to indicate the error type
+ *
+ * If using the vport generic stats layer indicate that an error of the given
+ * type has occurred.
+ */
+static void ovs_vport_record_error(struct vport *vport,
+				   enum vport_err_type err_type)
+{
+	spin_lock(&vport->stats_lock);
+
+	switch (err_type) {
+	case VPORT_E_RX_DROPPED:
+		vport->err_stats.rx_dropped++;
+		break;
+
+	case VPORT_E_RX_ERROR:
+		vport->err_stats.rx_errors++;
+		break;
+
+	case VPORT_E_TX_DROPPED:
+		vport->err_stats.tx_dropped++;
+		break;
+
+	case VPORT_E_TX_ERROR:
+		vport->err_stats.tx_errors++;
+		break;
+	}
+
+	spin_unlock(&vport->stats_lock);
+}
+
+static void free_vport_rcu(struct rcu_head *rcu)
+{
+	struct vport *vport = container_of(rcu, struct vport, rcu);
+
+	ovs_vport_free(vport);
+}
+
+void ovs_vport_deferred_free(struct vport *vport)
+{
+	if (!vport)
+		return;
+
+	call_rcu(&vport->rcu, free_vport_rcu);
+}
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
new file mode 100644
index 00000000000..8d721e62f38
--- /dev/null
+++ b/net/openvswitch/vport.h
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2007-2012 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef VPORT_H
+#define VPORT_H 1
+
+#include <linux/if_tunnel.h>
+#include <linux/list.h>
+#include <linux/netlink.h>
+#include <linux/openvswitch.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/u64_stats_sync.h>
+
+#include "datapath.h"
+
+struct vport;
+struct vport_parms;
+
+/* The following definitions are for users of the vport subsytem: */
+
+/* The following definitions are for users of the vport subsytem: */
+struct vport_net {
+	struct vport __rcu *gre_vport;
+};
+
+int ovs_vport_init(void);
+void ovs_vport_exit(void);
+
+struct vport *ovs_vport_add(const struct vport_parms *);
+void ovs_vport_del(struct vport *);
+
+struct vport *ovs_vport_locate(struct net *net, const char *name);
+
+void ovs_vport_get_stats(struct vport *, struct ovs_vport_stats *);
+
+int ovs_vport_set_options(struct vport *, struct nlattr *options);
+int ovs_vport_get_options(const struct vport *, struct sk_buff *);
+
+int ovs_vport_send(struct vport *, struct sk_buff *);
+
+/* The following definitions are for implementers of vport devices: */
+
+struct vport_err_stats {
+	u64 rx_dropped;
+	u64 rx_errors;
+	u64 tx_dropped;
+	u64 tx_errors;
+};
+
+/**
+ * struct vport - one port within a datapath
+ * @rcu: RCU callback head for deferred destruction.
+ * @dp: Datapath to which this port belongs.
+ * @upcall_portid: The Netlink port to use for packets received on this port that
+ * miss the flow table.
+ * @port_no: Index into @dp's @ports array.
+ * @hash_node: Element in @dev_table hash table in vport.c.
+ * @dp_hash_node: Element in @datapath->ports hash table in datapath.c.
+ * @ops: Class structure.
+ * @percpu_stats: Points to per-CPU statistics used and maintained by vport
+ * @stats_lock: Protects @err_stats;
+ * @err_stats: Points to error statistics used and maintained by vport
+ */
+struct vport {
+	struct rcu_head rcu;
+	struct datapath	*dp;
+	u32 upcall_portid;
+	u16 port_no;
+
+	struct hlist_node hash_node;
+	struct hlist_node dp_hash_node;
+	const struct vport_ops *ops;
+
+	struct pcpu_sw_netstats __percpu *percpu_stats;
+
+	spinlock_t stats_lock;
+	struct vport_err_stats err_stats;
+};
+
+/**
+ * struct vport_parms - parameters for creating a new vport
+ *
+ * @name: New vport's name.
+ * @type: New vport's type.
+ * @options: %OVS_VPORT_ATTR_OPTIONS attribute from Netlink message, %NULL if
+ * none was supplied.
+ * @dp: New vport's datapath.
+ * @port_no: New vport's port number.
+ */
+struct vport_parms {
+	const char *name;
+	enum ovs_vport_type type;
+	struct nlattr *options;
+
+	/* For ovs_vport_alloc(). */
+	struct datapath *dp;
+	u16 port_no;
+	u32 upcall_portid;
+};
+
+/**
+ * struct vport_ops - definition of a type of virtual port
+ *
+ * @type: %OVS_VPORT_TYPE_* value for this type of virtual port.
+ * @create: Create a new vport configured as specified.  On success returns
+ * a new vport allocated with ovs_vport_alloc(), otherwise an ERR_PTR() value.
+ * @destroy: Destroys a vport.  Must call vport_free() on the vport but not
+ * before an RCU grace period has elapsed.
+ * @set_options: Modify the configuration of an existing vport.  May be %NULL
+ * if modification is not supported.
+ * @get_options: Appends vport-specific attributes for the configuration of an
+ * existing vport to a &struct sk_buff.  May be %NULL for a vport that does not
+ * have any configuration.
+ * @get_name: Get the device's name.
+ * @send: Send a packet on the device.  Returns the length of the packet sent,
+ * zero for dropped packets or negative for error.
+ */
+struct vport_ops {
+	enum ovs_vport_type type;
+
+	/* Called with ovs_mutex. */
+	struct vport *(*create)(const struct vport_parms *);
+	void (*destroy)(struct vport *);
+
+	int (*set_options)(struct vport *, struct nlattr *);
+	int (*get_options)(const struct vport *, struct sk_buff *);
+
+	/* Called with rcu_read_lock or ovs_mutex. */
+	const char *(*get_name)(const struct vport *);
+
+	int (*send)(struct vport *, struct sk_buff *);
+};
+
+enum vport_err_type {
+	VPORT_E_RX_DROPPED,
+	VPORT_E_RX_ERROR,
+	VPORT_E_TX_DROPPED,
+	VPORT_E_TX_ERROR,
+};
+
+struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *,
+			      const struct vport_parms *);
+void ovs_vport_free(struct vport *);
+void ovs_vport_deferred_free(struct vport *vport);
+
+#define VPORT_ALIGN 8
+
+/**
+ *	vport_priv - access private data area of vport
+ *
+ * @vport: vport to access
+ *
+ * If a nonzero size was passed in priv_size of vport_alloc() a private data
+ * area was allocated on creation.  This allows that area to be accessed and
+ * used for any purpose needed by the vport implementer.
+ */
+static inline void *vport_priv(const struct vport *vport)
+{
+	return (u8 *)(uintptr_t)vport + ALIGN(sizeof(struct vport), VPORT_ALIGN);
+}
+
+/**
+ *	vport_from_priv - lookup vport from private data pointer
+ *
+ * @priv: Start of private data area.
+ *
+ * It is sometimes useful to translate from a pointer to the private data
+ * area to the vport, such as in the case where the private data pointer is
+ * the result of a hash table lookup.  @priv must point to the start of the
+ * private data area.
+ */
+static inline struct vport *vport_from_priv(void *priv)
+{
+	return (struct vport *)((u8 *)priv - ALIGN(sizeof(struct vport), VPORT_ALIGN));
+}
+
+void ovs_vport_receive(struct vport *, struct sk_buff *,
+		       struct ovs_key_ipv4_tunnel *);
+
+/* List of statically compiled vport implementations.  Don't forget to also
+ * add yours to the list at the top of vport.c. */
+extern const struct vport_ops ovs_netdev_vport_ops;
+extern const struct vport_ops ovs_internal_vport_ops;
+extern const struct vport_ops ovs_gre_vport_ops;
+extern const struct vport_ops ovs_vxlan_vport_ops;
+
+static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
+				      const void *start, unsigned int len)
+{
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->csum = csum_add(skb->csum, csum_partial(start, len, 0));
+}
+
+#endif /* vport.h */