#include <linux/etherdevice.h>
#include <linux/if_macvlan.h>
#include <linux/interrupt.h>
#include <linux/nsproxy.h>
#include <linux/compat.h>
#include <linux/if_tun.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/cache.h>
#include <linux/sched.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/wait.h>
#include <linux/cdev.h>
#include <linux/idr.h>
#include <linux/fs.h>
#include <net/net_namespace.h>
#include <net/rtnetlink.h>
#include <net/sock.h>
#include <linux/virtio_net.h>
/*
* A macvtap queue is the central object of this driver, it connects
* an open character device to a macvlan interface. There can be
* multiple queues on one interface, which map back to queues
* implemented in hardware on the underlying device.
*
* macvtap_proto is used to allocate queues through the sock allocation
* mechanism.
*
* TODO: multiqueue support is currently not implemented, even though
* macvtap is basically prepared for that. We will need to add this
* here as well as in virtio-net and qemu to get line rate on 10gbit
* adapters from a guest.
*/
struct macvtap_queue {
struct sock sk;
struct socket sock;
struct socket_wq wq;
int vnet_hdr_sz;
struct macvlan_dev __rcu *vlan;
struct file *file;
unsigned int flags;
};
static struct proto macvtap_proto = {
.name = "macvtap",
.owner = THIS_MODULE,
.obj_size = sizeof (struct macvtap_queue),
};
/*
* Variables for dealing with macvtaps device numbers.
*/
static dev_t macvtap_major;
#define MACVTAP_NUM_DEVS (1U << MINORBITS)
static DEFINE_MUTEX(minor_lock);
static DEFINE_IDR(minor_idr);
#define GOODCOPY_LEN 128
static struct class *macvtap_class;
static struct cdev macvtap_cdev;
static const struct proto_ops macvtap_socket_ops;
/*
* RCU usage:
* The macvtap_queue and the macvlan_dev are loosely coupled, the
* pointers from one to the other can only be read while rcu_read_lock
* or macvtap_lock is held.
*
* Both the file and the macvlan_dev hold a reference on the macvtap_queue
* through sock_hold(&q->sk). When the macvlan_dev goes away first,
* q->vlan becomes inaccessible. When the files gets closed,
* macvtap_get_queue() fails.
*
* There may still be references to the struct sock inside of the
* queue from outbound SKBs, but these never reference back to the
* file or the dev. The data structure is freed through __sk_free
* when both our references and any pending SKBs are gone.
*/
static DEFINE_SPINLOCK(macvtap_lock);
/*
* get_slot: return a [unused/occupied] slot in vlan->taps[]:
* - if 'q' is NULL, return the first empty slot;
* - otherwise, return the slot this pointer occupies.
*/
static int get_slot(struct macvlan_dev *vlan, struct macvtap_queue *q)
{
int i;
for (i = 0; i < MAX_MACVTAP_QUEUES; i++) {
if (rcu_dereference(vlan->taps[i]) == q)
return i;
}
/* Should never happen */
BUG_ON(1);
}
static int macvtap_set_queue(struct net_device *dev, struct file *file,
struct macvtap_queue *q)
{
struct macvlan_dev *vlan = netdev_priv(dev);
int index;
int err = -EBUSY;
spin_lock(&macvtap_lock);
if (vlan->numvtaps == MAX_MACVTAP_QUEUES)
goto out;
err = 0;
index = get_slot(vlan, NULL);
rcu_assign_pointer(q->vlan, vlan);
rcu_assign_pointer(vlan->taps[index], q);
sock_hold(&q->sk);
q->file = file;
file->private_data = q;
vlan->numvtaps++;
out:
spin_unlock(&macvtap_lock);
return err;
}
/*
* The file owning the queue got closed, give up both
* the reference that the files holds as well as the
* one from the macvlan_dev if that still exists.
*
* Using the spinlock makes sure that we don't get
* to the queue again after destroying it.
*/
static void macvtap_put_queue(struct macvtap_queue *q)
{
struct macvlan_dev *vlan;
spin_lock(&macvtap_lock);
vlan = rcu_dereference_protected(q->vlan,
lockdep_is_held(&macvtap_lock));
if (vlan) {
int index = get_slot(vlan, q);
RCU_INIT_POINTER(vlan->taps[index], NULL);
RCU_INIT_POINTER(q->vlan, NULL);
sock_put(&q->sk);
--vlan->numvtaps;
}
spin_unlock(&macvtap_lock);
synchronize_rcu();
sock_put(&q->sk);
}
/*
* Select a queue based on the rxq of the device on which this packet
* arrived. If the incoming device is not mq, calc