diff options
Diffstat (limited to 'virt/kvm/assigned-dev.c')
| -rw-r--r-- | virt/kvm/assigned-dev.c | 496 | 
1 files changed, 350 insertions, 146 deletions
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 7c98928b09d..bf06577fea5 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -17,6 +17,8 @@  #include <linux/pci.h>  #include <linux/interrupt.h>  #include <linux/slab.h> +#include <linux/namei.h> +#include <linux/fs.h>  #include "irq.h"  static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, @@ -47,99 +49,161 @@ static int find_index_from_host_irq(struct kvm_assigned_dev_kernel  			index = i;  			break;  		} -	if (index < 0) { +	if (index < 0)  		printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); -		return 0; -	}  	return index;  } -static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) +static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id)  { -	struct kvm_assigned_dev_kernel *assigned_dev; -	int i; +	struct kvm_assigned_dev_kernel *assigned_dev = dev_id; +	int ret; + +	spin_lock(&assigned_dev->intx_lock); +	if (pci_check_and_mask_intx(assigned_dev->dev)) { +		assigned_dev->host_irq_disabled = true; +		ret = IRQ_WAKE_THREAD; +	} else +		ret = IRQ_NONE; +	spin_unlock(&assigned_dev->intx_lock); -	assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, -				    interrupt_work); +	return ret; +} -	spin_lock_irq(&assigned_dev->assigned_dev_lock); -	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { -		struct kvm_guest_msix_entry *guest_entries = -			assigned_dev->guest_msix_entries; -		for (i = 0; i < assigned_dev->entries_nr; i++) { -			if (!(guest_entries[i].flags & -					KVM_ASSIGNED_MSIX_PENDING)) -				continue; -			guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING; +static void +kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev, +				 int vector) +{ +	if (unlikely(assigned_dev->irq_requested_type & +		     KVM_DEV_IRQ_GUEST_INTX)) { +		spin_lock(&assigned_dev->intx_mask_lock); +		if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX))  			kvm_set_irq(assigned_dev->kvm, -				    assigned_dev->irq_source_id, -				    guest_entries[i].vector, 1); -		} +				    assigned_dev->irq_source_id, vector, 1, +				    false); +		spin_unlock(&assigned_dev->intx_mask_lock);  	} else  		kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, -			    assigned_dev->guest_irq, 1); +			    vector, 1, false); +} + +static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id) +{ +	struct kvm_assigned_dev_kernel *assigned_dev = dev_id; -	spin_unlock_irq(&assigned_dev->assigned_dev_lock); +	if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { +		spin_lock_irq(&assigned_dev->intx_lock); +		disable_irq_nosync(irq); +		assigned_dev->host_irq_disabled = true; +		spin_unlock_irq(&assigned_dev->intx_lock); +	} + +	kvm_assigned_dev_raise_guest_irq(assigned_dev, +					 assigned_dev->guest_irq); + +	return IRQ_HANDLED;  } -static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) +#ifdef __KVM_HAVE_MSI +static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)  { -	unsigned long flags; -	struct kvm_assigned_dev_kernel *assigned_dev = -		(struct kvm_assigned_dev_kernel *) dev_id; +	struct kvm_assigned_dev_kernel *assigned_dev = dev_id; +	int ret = kvm_set_irq_inatomic(assigned_dev->kvm, +				       assigned_dev->irq_source_id, +				       assigned_dev->guest_irq, 1); +	return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED; +} -	spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags); -	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { -		int index = find_index_from_host_irq(assigned_dev, irq); -		if (index < 0) -			goto out; -		assigned_dev->guest_msix_entries[index].flags |= -			KVM_ASSIGNED_MSIX_PENDING; +static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id) +{ +	struct kvm_assigned_dev_kernel *assigned_dev = dev_id; + +	kvm_assigned_dev_raise_guest_irq(assigned_dev, +					 assigned_dev->guest_irq); + +	return IRQ_HANDLED; +} +#endif + +#ifdef __KVM_HAVE_MSIX +static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id) +{ +	struct kvm_assigned_dev_kernel *assigned_dev = dev_id; +	int index = find_index_from_host_irq(assigned_dev, irq); +	u32 vector; +	int ret = 0; + +	if (index >= 0) { +		vector = assigned_dev->guest_msix_entries[index].vector; +		ret = kvm_set_irq_inatomic(assigned_dev->kvm, +					   assigned_dev->irq_source_id, +					   vector, 1);  	} -	schedule_work(&assigned_dev->interrupt_work); +	return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED; +} -	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { -		disable_irq_nosync(irq); -		assigned_dev->host_irq_disabled = true; +static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id) +{ +	struct kvm_assigned_dev_kernel *assigned_dev = dev_id; +	int index = find_index_from_host_irq(assigned_dev, irq); +	u32 vector; + +	if (index >= 0) { +		vector = assigned_dev->guest_msix_entries[index].vector; +		kvm_assigned_dev_raise_guest_irq(assigned_dev, vector);  	} -out: -	spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags);  	return IRQ_HANDLED;  } +#endif  /* Ack the irq line for an assigned device */  static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)  { -	struct kvm_assigned_dev_kernel *dev; -	unsigned long flags; - -	if (kian->gsi == -1) -		return; - -	dev = container_of(kian, struct kvm_assigned_dev_kernel, -			   ack_notifier); - -	kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); +	struct kvm_assigned_dev_kernel *dev = +		container_of(kian, struct kvm_assigned_dev_kernel, +			     ack_notifier); + +	kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false); + +	spin_lock(&dev->intx_mask_lock); + +	if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) { +		bool reassert = false; + +		spin_lock_irq(&dev->intx_lock); +		/* +		 * The guest IRQ may be shared so this ack can come from an +		 * IRQ for another guest device. +		 */ +		if (dev->host_irq_disabled) { +			if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) +				enable_irq(dev->host_irq); +			else if (!pci_check_and_unmask_intx(dev->dev)) +				reassert = true; +			dev->host_irq_disabled = reassert; +		} +		spin_unlock_irq(&dev->intx_lock); -	/* The guest irq may be shared so this ack may be -	 * from another device. -	 */ -	spin_lock_irqsave(&dev->assigned_dev_lock, flags); -	if (dev->host_irq_disabled) { -		enable_irq(dev->host_irq); -		dev->host_irq_disabled = false; +		if (reassert) +			kvm_set_irq(dev->kvm, dev->irq_source_id, +				    dev->guest_irq, 1, false);  	} -	spin_unlock_irqrestore(&dev->assigned_dev_lock, flags); + +	spin_unlock(&dev->intx_mask_lock);  }  static void deassign_guest_irq(struct kvm *kvm,  			       struct kvm_assigned_dev_kernel *assigned_dev)  { -	kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); -	assigned_dev->ack_notifier.gsi = -1; +	if (assigned_dev->ack_notifier.gsi != -1) +		kvm_unregister_irq_ack_notifier(kvm, +						&assigned_dev->ack_notifier); + +	kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, +		    assigned_dev->guest_irq, 0, false);  	if (assigned_dev->irq_source_id != -1)  		kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); @@ -152,32 +216,23 @@ static void deassign_host_irq(struct kvm *kvm,  			      struct kvm_assigned_dev_kernel *assigned_dev)  {  	/* -	 * In kvm_free_device_irq, cancel_work_sync return true if: -	 * 1. work is scheduled, and then cancelled. -	 * 2. work callback is executed. -	 * -	 * The first one ensured that the irq is disabled and no more events -	 * would happen. But for the second one, the irq may be enabled (e.g. -	 * for MSI). So we disable irq here to prevent further events. +	 * We disable irq here to prevent further events.  	 *  	 * Notice this maybe result in nested disable if the interrupt type is  	 * INTx, but it's OK for we are going to free it.  	 *  	 * If this function is a part of VM destroy, please ensure that till  	 * now, the kvm state is still legal for probably we also have to wait -	 * interrupt_work done. +	 * on a currently running IRQ handler.  	 */  	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {  		int i;  		for (i = 0; i < assigned_dev->entries_nr; i++) -			disable_irq_nosync(assigned_dev-> -					   host_msix_entries[i].vector); - -		cancel_work_sync(&assigned_dev->interrupt_work); +			disable_irq(assigned_dev->host_msix_entries[i].vector);  		for (i = 0; i < assigned_dev->entries_nr; i++)  			free_irq(assigned_dev->host_msix_entries[i].vector, -				 (void *)assigned_dev); +				 assigned_dev);  		assigned_dev->entries_nr = 0;  		kfree(assigned_dev->host_msix_entries); @@ -185,10 +240,17 @@ static void deassign_host_irq(struct kvm *kvm,  		pci_disable_msix(assigned_dev->dev);  	} else {  		/* Deal with MSI and INTx */ -		disable_irq_nosync(assigned_dev->host_irq); -		cancel_work_sync(&assigned_dev->interrupt_work); - -		free_irq(assigned_dev->host_irq, (void *)assigned_dev); +		if ((assigned_dev->irq_requested_type & +		     KVM_DEV_IRQ_HOST_INTX) && +		    (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { +			spin_lock_irq(&assigned_dev->intx_lock); +			pci_intx(assigned_dev->dev, false); +			spin_unlock_irq(&assigned_dev->intx_lock); +			synchronize_irq(assigned_dev->host_irq); +		} else +			disable_irq(assigned_dev->host_irq); + +		free_irq(assigned_dev->host_irq, assigned_dev);  		if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)  			pci_disable_msi(assigned_dev->dev); @@ -233,6 +295,14 @@ static void kvm_free_assigned_device(struct kvm *kvm,  	kvm_free_assigned_irq(kvm, assigned_dev);  	pci_reset_function(assigned_dev->dev); +	if (pci_load_and_free_saved_state(assigned_dev->dev, +					  &assigned_dev->pci_saved_state)) +		printk(KERN_INFO "%s: Couldn't reload %s saved state\n", +		       __func__, dev_name(&assigned_dev->dev->dev)); +	else +		pci_restore_state(assigned_dev->dev); + +	assigned_dev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;  	pci_release_regions(assigned_dev->dev);  	pci_disable_device(assigned_dev->dev); @@ -259,15 +329,34 @@ void kvm_free_all_assigned_devices(struct kvm *kvm)  static int assigned_device_enable_host_intx(struct kvm *kvm,  					    struct kvm_assigned_dev_kernel *dev)  { +	irq_handler_t irq_handler; +	unsigned long flags; +  	dev->host_irq = dev->dev->irq; -	/* Even though this is PCI, we don't want to use shared -	 * interrupts. Sharing host devices with guest-assigned devices -	 * on the same interrupt line is not a happy situation: there -	 * are going to be long delays in accepting, acking, etc. + +	/* +	 * We can only share the IRQ line with other host devices if we are +	 * able to disable the IRQ source at device-level - independently of +	 * the guest driver. Otherwise host devices may suffer from unbounded +	 * IRQ latencies when the guest keeps the line asserted.  	 */ -	if (request_irq(dev->host_irq, kvm_assigned_dev_intr, -			0, "kvm_assigned_intx_device", (void *)dev)) +	if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { +		irq_handler = kvm_assigned_dev_intx; +		flags = IRQF_SHARED; +	} else { +		irq_handler = NULL; +		flags = IRQF_ONESHOT; +	} +	if (request_threaded_irq(dev->host_irq, irq_handler, +				 kvm_assigned_dev_thread_intx, flags, +				 dev->irq_name, dev))  		return -EIO; + +	if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { +		spin_lock_irq(&dev->intx_lock); +		pci_intx(dev->dev, true); +		spin_unlock_irq(&dev->intx_lock); +	}  	return 0;  } @@ -284,8 +373,9 @@ static int assigned_device_enable_host_msi(struct kvm *kvm,  	}  	dev->host_irq = dev->dev->irq; -	if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0, -			"kvm_assigned_msi_device", (void *)dev)) { +	if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi, +				 kvm_assigned_dev_thread_msi, 0, +				 dev->irq_name, dev)) {  		pci_disable_msi(dev->dev);  		return -EIO;  	} @@ -305,15 +395,16 @@ static int assigned_device_enable_host_msix(struct kvm *kvm,  	if (dev->entries_nr == 0)  		return r; -	r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr); +	r = pci_enable_msix_exact(dev->dev, +				  dev->host_msix_entries, dev->entries_nr);  	if (r)  		return r;  	for (i = 0; i < dev->entries_nr; i++) { -		r = request_irq(dev->host_msix_entries[i].vector, -				kvm_assigned_dev_intr, 0, -				"kvm_assigned_msix_device", -				(void *)dev); +		r = request_threaded_irq(dev->host_msix_entries[i].vector, +					 kvm_assigned_dev_msix, +					 kvm_assigned_dev_thread_msix, +					 0, dev->irq_name, dev);  		if (r)  			goto err;  	} @@ -321,7 +412,7 @@ static int assigned_device_enable_host_msix(struct kvm *kvm,  	return 0;  err:  	for (i -= 1; i >= 0; i--) -		free_irq(dev->host_msix_entries[i].vector, (void *)dev); +		free_irq(dev->host_msix_entries[i].vector, dev);  	pci_disable_msix(dev->dev);  	return r;  } @@ -344,7 +435,6 @@ static int assigned_device_enable_guest_msi(struct kvm *kvm,  {  	dev->guest_irq = irq->guest_irq;  	dev->ack_notifier.gsi = -1; -	dev->host_irq_disabled = false;  	return 0;  }  #endif @@ -356,7 +446,6 @@ static int assigned_device_enable_guest_msix(struct kvm *kvm,  {  	dev->guest_irq = irq->guest_irq;  	dev->ack_notifier.gsi = -1; -	dev->host_irq_disabled = false;  	return 0;  }  #endif @@ -370,6 +459,9 @@ static int assign_host_irq(struct kvm *kvm,  	if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)  		return r; +	snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s", +		 pci_name(dev->dev)); +  	switch (host_irq_type) {  	case KVM_DEV_IRQ_HOST_INTX:  		r = assigned_device_enable_host_intx(kvm, dev); @@ -387,6 +479,7 @@ static int assign_host_irq(struct kvm *kvm,  	default:  		r = -EINVAL;  	} +	dev->host_irq_disabled = false;  	if (!r)  		dev->irq_requested_type |= host_irq_type; @@ -431,7 +524,8 @@ static int assign_guest_irq(struct kvm *kvm,  	if (!r) {  		dev->irq_requested_type |= guest_irq_type; -		kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); +		if (dev->ack_notifier.gsi != -1) +			kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);  	} else  		kvm_free_irq_source_id(kvm, dev->irq_source_id); @@ -487,6 +581,7 @@ static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,  {  	int r = -ENODEV;  	struct kvm_assigned_dev_kernel *match; +	unsigned long irq_type;  	mutex_lock(&kvm->lock); @@ -495,12 +590,74 @@ static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,  	if (!match)  		goto out; -	r = kvm_deassign_irq(kvm, match, assigned_irq->flags); +	irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK | +					  KVM_DEV_IRQ_GUEST_MASK); +	r = kvm_deassign_irq(kvm, match, irq_type);  out:  	mutex_unlock(&kvm->lock);  	return r;  } +/* + * We want to test whether the caller has been granted permissions to + * use this device.  To be able to configure and control the device, + * the user needs access to PCI configuration space and BAR resources. + * These are accessed through PCI sysfs.  PCI config space is often + * passed to the process calling this ioctl via file descriptor, so we + * can't rely on access to that file.  We can check for permissions + * on each of the BAR resource files, which is a pretty clear + * indicator that the user has been granted access to the device. + */ +static int probe_sysfs_permissions(struct pci_dev *dev) +{ +#ifdef CONFIG_SYSFS +	int i; +	bool bar_found = false; + +	for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) { +		char *kpath, *syspath; +		struct path path; +		struct inode *inode; +		int r; + +		if (!pci_resource_len(dev, i)) +			continue; + +		kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL); +		if (!kpath) +			return -ENOMEM; + +		/* Per sysfs-rules, sysfs is always at /sys */ +		syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i); +		kfree(kpath); +		if (!syspath) +			return -ENOMEM; + +		r = kern_path(syspath, LOOKUP_FOLLOW, &path); +		kfree(syspath); +		if (r) +			return r; + +		inode = path.dentry->d_inode; + +		r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS); +		path_put(&path); +		if (r) +			return r; + +		bar_found = true; +	} + +	/* If no resources, probably something special */ +	if (!bar_found) +		return -EPERM; + +	return 0; +#else +	return -EINVAL; /* No way to control the device without sysfs */ +#endif +} +  static int kvm_vm_ioctl_assign_device(struct kvm *kvm,  				      struct kvm_assigned_pci_dev *assigned_dev)  { @@ -508,6 +665,9 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,  	struct kvm_assigned_dev_kernel *match;  	struct pci_dev *dev; +	if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)) +		return -EINVAL; +  	mutex_lock(&kvm->lock);  	idx = srcu_read_lock(&kvm->srcu); @@ -534,6 +694,17 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,  		r = -EINVAL;  		goto out_free;  	} + +	/* Don't allow bridges to be assigned */ +	if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) { +		r = -EPERM; +		goto out_put; +	} + +	r = probe_sysfs_permissions(dev); +	if (r) +		goto out_put; +  	if (pci_enable_device(dev)) {  		printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);  		r = -EBUSY; @@ -547,6 +718,14 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,  	}  	pci_reset_function(dev); +	pci_save_state(dev); +	match->pci_saved_state = pci_store_saved_state(dev); +	if (!match->pci_saved_state) +		printk(KERN_DEBUG "%s: Couldn't store %s saved state\n", +		       __func__, dev_name(&dev->dev)); + +	if (!pci_intx_mask_supported(dev)) +		assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3;  	match->assigned_dev_id = assigned_dev->assigned_dev_id;  	match->host_segnr = assigned_dev->segnr; @@ -554,31 +733,31 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,  	match->host_devfn = assigned_dev->devfn;  	match->flags = assigned_dev->flags;  	match->dev = dev; -	spin_lock_init(&match->assigned_dev_lock); +	spin_lock_init(&match->intx_lock); +	spin_lock_init(&match->intx_mask_lock);  	match->irq_source_id = -1;  	match->kvm = kvm;  	match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; -	INIT_WORK(&match->interrupt_work, -		  kvm_assigned_dev_interrupt_work_handler);  	list_add(&match->list, &kvm->arch.assigned_dev_head); -	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { -		if (!kvm->arch.iommu_domain) { -			r = kvm_iommu_map_guest(kvm); -			if (r) -				goto out_list_del; -		} -		r = kvm_assign_device(kvm, match); +	if (!kvm->arch.iommu_domain) { +		r = kvm_iommu_map_guest(kvm);  		if (r)  			goto out_list_del;  	} +	r = kvm_assign_device(kvm, match); +	if (r) +		goto out_list_del;  out:  	srcu_read_unlock(&kvm->srcu, idx);  	mutex_unlock(&kvm->lock);  	return r;  out_list_del: +	if (pci_load_and_free_saved_state(dev, &match->pci_saved_state)) +		printk(KERN_INFO "%s: Couldn't reload %s saved state\n", +		       __func__, dev_name(&dev->dev));  	list_del(&match->list);  	pci_release_regions(dev);  out_disable: @@ -609,8 +788,7 @@ static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,  		goto out;  	} -	if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) -		kvm_deassign_device(kvm, match); +	kvm_deassign_device(kvm, match);  	kvm_free_assigned_device(kvm, match); @@ -639,7 +817,7 @@ static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,  	if (adev->entries_nr == 0) {  		adev->entries_nr = entry_nr->entry_nr;  		if (adev->entries_nr == 0 || -		    adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) { +		    adev->entries_nr > KVM_MAX_MSIX_PER_DEV) {  			r = -EINVAL;  			goto msix_nr_out;  		} @@ -651,9 +829,9 @@ static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,  			r = -ENOMEM;  			goto msix_nr_out;  		} -		adev->guest_msix_entries = kzalloc( -				sizeof(struct kvm_guest_msix_entry) * -				entry_nr->entry_nr, GFP_KERNEL); +		adev->guest_msix_entries = +			kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr, +				GFP_KERNEL);  		if (!adev->guest_msix_entries) {  			kfree(adev->host_msix_entries);  			r = -ENOMEM; @@ -702,11 +880,60 @@ msix_entry_out:  }  #endif +static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm, +		struct kvm_assigned_pci_dev *assigned_dev) +{ +	int r = 0; +	struct kvm_assigned_dev_kernel *match; + +	mutex_lock(&kvm->lock); + +	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, +				      assigned_dev->assigned_dev_id); +	if (!match) { +		r = -ENODEV; +		goto out; +	} + +	spin_lock(&match->intx_mask_lock); + +	match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX; +	match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX; + +	if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { +		if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) { +			kvm_set_irq(match->kvm, match->irq_source_id, +				    match->guest_irq, 0, false); +			/* +			 * Masking at hardware-level is performed on demand, +			 * i.e. when an IRQ actually arrives at the host. +			 */ +		} else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { +			/* +			 * Unmask the IRQ line if required. Unmasking at +			 * device level will be performed by user space. +			 */ +			spin_lock_irq(&match->intx_lock); +			if (match->host_irq_disabled) { +				enable_irq(match->host_irq); +				match->host_irq_disabled = false; +			} +			spin_unlock_irq(&match->intx_lock); +		} +	} + +	spin_unlock(&match->intx_mask_lock); + +out: +	mutex_unlock(&kvm->lock); +	return r; +} +  long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,  				  unsigned long arg)  {  	void __user *argp = (void __user *)arg; -	int r = -ENOTTY; +	int r;  	switch (ioctl) {  	case KVM_ASSIGN_PCI_DEVICE: { @@ -724,7 +951,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,  		r = -EOPNOTSUPP;  		break;  	} -#ifdef KVM_CAP_ASSIGN_DEV_IRQ  	case KVM_ASSIGN_DEV_IRQ: {  		struct kvm_assigned_irq assigned_irq; @@ -747,8 +973,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,  			goto out;  		break;  	} -#endif -#ifdef KVM_CAP_DEVICE_DEASSIGNMENT  	case KVM_DEASSIGN_PCI_DEVICE: {  		struct kvm_assigned_pci_dev assigned_dev; @@ -760,37 +984,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,  			goto out;  		break;  	} -#endif -#ifdef KVM_CAP_IRQ_ROUTING -	case KVM_SET_GSI_ROUTING: { -		struct kvm_irq_routing routing; -		struct kvm_irq_routing __user *urouting; -		struct kvm_irq_routing_entry *entries; - -		r = -EFAULT; -		if (copy_from_user(&routing, argp, sizeof(routing))) -			goto out; -		r = -EINVAL; -		if (routing.nr >= KVM_MAX_IRQ_ROUTES) -			goto out; -		if (routing.flags) -			goto out; -		r = -ENOMEM; -		entries = vmalloc(routing.nr * sizeof(*entries)); -		if (!entries) -			goto out; -		r = -EFAULT; -		urouting = argp; -		if (copy_from_user(entries, urouting->entries, -				   routing.nr * sizeof(*entries))) -			goto out_free_irq_routing; -		r = kvm_set_irq_routing(kvm, entries, routing.nr, -					routing.flags); -	out_free_irq_routing: -		vfree(entries); -		break; -	} -#endif /* KVM_CAP_IRQ_ROUTING */  #ifdef __KVM_HAVE_MSIX  	case KVM_ASSIGN_SET_MSIX_NR: {  		struct kvm_assigned_msix_nr entry_nr; @@ -813,8 +1006,19 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,  		break;  	}  #endif +	case KVM_ASSIGN_SET_INTX_MASK: { +		struct kvm_assigned_pci_dev assigned_dev; + +		r = -EFAULT; +		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) +			goto out; +		r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev); +		break; +	} +	default: +		r = -ENOTTY; +		break;  	}  out:  	return r;  } -  | 
