diff options
Diffstat (limited to 'drivers/iommu/amd_iommu_v2.c')
| -rw-r--r-- | drivers/iommu/amd_iommu_v2.c | 196 | 
1 files changed, 85 insertions, 111 deletions
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index 5208828792e..499b4366a98 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -45,17 +45,22 @@ struct pri_queue {  struct pasid_state {  	struct list_head list;			/* For global state-list */  	atomic_t count;				/* Reference count */ +	unsigned mmu_notifier_count;		/* Counting nested mmu_notifier +						   calls */  	struct task_struct *task;		/* Task bound to this PASID */  	struct mm_struct *mm;			/* mm_struct for the faults */  	struct mmu_notifier mn;                 /* mmu_otifier handle */  	struct pri_queue pri[PRI_QUEUE_SIZE];	/* PRI tag states */  	struct device_state *device_state;	/* Link to our device_state */  	int pasid;				/* PASID index */ -	spinlock_t lock;			/* Protect pri_queues */ +	spinlock_t lock;			/* Protect pri_queues and +						   mmu_notifer_count */  	wait_queue_head_t wq;			/* To wait for count == 0 */  };  struct device_state { +	struct list_head list; +	u16 devid;  	atomic_t count;  	struct pci_dev *pdev;  	struct pasid_state **states; @@ -81,13 +86,9 @@ struct fault {  	u16 flags;  }; -static struct device_state **state_table; +static LIST_HEAD(state_list);  static spinlock_t state_lock; -/* List and lock for all pasid_states */ -static LIST_HEAD(pasid_state_list); -static DEFINE_SPINLOCK(ps_lock); -  static struct workqueue_struct *iommu_wq;  /* @@ -99,7 +100,6 @@ static u64 *empty_page_table;  static void free_pasid_states(struct device_state *dev_state);  static void unbind_pasid(struct device_state *dev_state, int pasid); -static int task_exit(struct notifier_block *nb, unsigned long e, void *data);  static u16 device_id(struct pci_dev *pdev)  { @@ -111,13 +111,25 @@ static u16 device_id(struct pci_dev *pdev)  	return devid;  } +static struct device_state *__get_device_state(u16 devid) +{ +	struct device_state *dev_state; + +	list_for_each_entry(dev_state, &state_list, list) { +		if (dev_state->devid == devid) +			return dev_state; +	} + +	return NULL; +} +  static struct device_state *get_device_state(u16 devid)  {  	struct device_state *dev_state;  	unsigned long flags;  	spin_lock_irqsave(&state_lock, flags); -	dev_state = state_table[devid]; +	dev_state = __get_device_state(devid);  	if (dev_state != NULL)  		atomic_inc(&dev_state->count);  	spin_unlock_irqrestore(&state_lock, flags); @@ -158,29 +170,6 @@ static void put_device_state_wait(struct device_state *dev_state)  	free_device_state(dev_state);  } -static struct notifier_block profile_nb = { -	.notifier_call = task_exit, -}; - -static void link_pasid_state(struct pasid_state *pasid_state) -{ -	spin_lock(&ps_lock); -	list_add_tail(&pasid_state->list, &pasid_state_list); -	spin_unlock(&ps_lock); -} - -static void __unlink_pasid_state(struct pasid_state *pasid_state) -{ -	list_del(&pasid_state->list); -} - -static void unlink_pasid_state(struct pasid_state *pasid_state) -{ -	spin_lock(&ps_lock); -	__unlink_pasid_state(pasid_state); -	spin_unlock(&ps_lock); -} -  /* Must be called under dev_state->lock */  static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state,  						  int pasid, bool alloc) @@ -337,7 +326,6 @@ static void unbind_pasid(struct device_state *dev_state, int pasid)  	if (pasid_state == NULL)  		return; -	unlink_pasid_state(pasid_state);  	__unbind_pasid(pasid_state);  	put_pasid_state_wait(pasid_state); /* Reference taken in this function */  } @@ -379,7 +367,12 @@ static void free_pasid_states(struct device_state *dev_state)  			continue;  		put_pasid_state(pasid_state); -		unbind_pasid(dev_state, i); + +		/* +		 * This will call the mn_release function and +		 * unbind the PASID +		 */ +		mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);  	}  	if (dev_state->pasid_levels == 2) @@ -439,12 +432,19 @@ static void mn_invalidate_range_start(struct mmu_notifier *mn,  {  	struct pasid_state *pasid_state;  	struct device_state *dev_state; +	unsigned long flags;  	pasid_state = mn_to_state(mn);  	dev_state   = pasid_state->device_state; -	amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid, -				  __pa(empty_page_table)); +	spin_lock_irqsave(&pasid_state->lock, flags); +	if (pasid_state->mmu_notifier_count == 0) { +		amd_iommu_domain_set_gcr3(dev_state->domain, +					  pasid_state->pasid, +					  __pa(empty_page_table)); +	} +	pasid_state->mmu_notifier_count += 1; +	spin_unlock_irqrestore(&pasid_state->lock, flags);  }  static void mn_invalidate_range_end(struct mmu_notifier *mn, @@ -453,15 +453,39 @@ static void mn_invalidate_range_end(struct mmu_notifier *mn,  {  	struct pasid_state *pasid_state;  	struct device_state *dev_state; +	unsigned long flags;  	pasid_state = mn_to_state(mn);  	dev_state   = pasid_state->device_state; -	amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid, -				  __pa(pasid_state->mm->pgd)); +	spin_lock_irqsave(&pasid_state->lock, flags); +	pasid_state->mmu_notifier_count -= 1; +	if (pasid_state->mmu_notifier_count == 0) { +		amd_iommu_domain_set_gcr3(dev_state->domain, +					  pasid_state->pasid, +					  __pa(pasid_state->mm->pgd)); +	} +	spin_unlock_irqrestore(&pasid_state->lock, flags); +} + +static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm) +{ +	struct pasid_state *pasid_state; +	struct device_state *dev_state; + +	might_sleep(); + +	pasid_state = mn_to_state(mn); +	dev_state   = pasid_state->device_state; + +	if (pasid_state->device_state->inv_ctx_cb) +		dev_state->inv_ctx_cb(dev_state->pdev, pasid_state->pasid); + +	unbind_pasid(dev_state, pasid_state->pasid);  }  static struct mmu_notifier_ops iommu_mn = { +	.release		= mn_release,  	.clear_flush_young      = mn_clear_flush_young,  	.change_pte             = mn_change_pte,  	.invalidate_page        = mn_invalidate_page, @@ -504,8 +528,10 @@ static void do_fault(struct work_struct *work)  	write = !!(fault->flags & PPR_FAULT_WRITE); +	down_read(&fault->state->mm->mmap_sem);  	npages = get_user_pages(fault->state->task, fault->state->mm,  				fault->address, 1, write, 0, &page, NULL); +	up_read(&fault->state->mm->mmap_sem);  	if (npages == 1) {  		put_page(page); @@ -604,53 +630,6 @@ static struct notifier_block ppr_nb = {  	.notifier_call = ppr_notifier,  }; -static int task_exit(struct notifier_block *nb, unsigned long e, void *data) -{ -	struct pasid_state *pasid_state; -	struct task_struct *task; - -	task = data; - -	/* -	 * Using this notifier is a hack - but there is no other choice -	 * at the moment. What I really want is a sleeping notifier that -	 * is called when an MM goes down. But such a notifier doesn't -	 * exist yet. The notifier needs to sleep because it has to make -	 * sure that the device does not use the PASID and the address -	 * space anymore before it is destroyed. This includes waiting -	 * for pending PRI requests to pass the workqueue. The -	 * MMU-Notifiers would be a good fit, but they use RCU and so -	 * they are not allowed to sleep. Lets see how we can solve this -	 * in a more intelligent way in the future. -	 */ -again: -	spin_lock(&ps_lock); -	list_for_each_entry(pasid_state, &pasid_state_list, list) { -		struct device_state *dev_state; -		int pasid; - -		if (pasid_state->task != task) -			continue; - -		/* Drop Lock and unbind */ -		spin_unlock(&ps_lock); - -		dev_state = pasid_state->device_state; -		pasid     = pasid_state->pasid; - -		if (pasid_state->device_state->inv_ctx_cb) -			dev_state->inv_ctx_cb(dev_state->pdev, pasid); - -		unbind_pasid(dev_state, pasid); - -		/* Task may be in the list multiple times */ -		goto again; -	} -	spin_unlock(&ps_lock); - -	return NOTIFY_OK; -} -  int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,  			 struct task_struct *task)  { @@ -703,8 +682,6 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,  	if (ret)  		goto out_clear_state; -	link_pasid_state(pasid_state); -  	return 0;  out_clear_state: @@ -725,6 +702,7 @@ EXPORT_SYMBOL(amd_iommu_bind_pasid);  void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid)  { +	struct pasid_state *pasid_state;  	struct device_state *dev_state;  	u16 devid; @@ -741,7 +719,17 @@ void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid)  	if (pasid < 0 || pasid >= dev_state->max_pasids)  		goto out; -	unbind_pasid(dev_state, pasid); +	pasid_state = get_pasid_state(dev_state, pasid); +	if (pasid_state == NULL) +		goto out; +	/* +	 * Drop reference taken here. We are safe because we still hold +	 * the reference taken in the amd_iommu_bind_pasid function. +	 */ +	put_pasid_state(pasid_state); + +	/* This will call the mn_release function and unbind the PASID */ +	mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);  out:  	put_device_state(dev_state); @@ -771,7 +759,8 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids)  	spin_lock_init(&dev_state->lock);  	init_waitqueue_head(&dev_state->wq); -	dev_state->pdev = pdev; +	dev_state->pdev  = pdev; +	dev_state->devid = devid;  	tmp = pasids;  	for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9) @@ -801,13 +790,13 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids)  	spin_lock_irqsave(&state_lock, flags); -	if (state_table[devid] != NULL) { +	if (__get_device_state(devid) != NULL) {  		spin_unlock_irqrestore(&state_lock, flags);  		ret = -EBUSY;  		goto out_free_domain;  	} -	state_table[devid] = dev_state; +	list_add_tail(&dev_state->list, &state_list);  	spin_unlock_irqrestore(&state_lock, flags); @@ -839,13 +828,13 @@ void amd_iommu_free_device(struct pci_dev *pdev)  	spin_lock_irqsave(&state_lock, flags); -	dev_state = state_table[devid]; +	dev_state = __get_device_state(devid);  	if (dev_state == NULL) {  		spin_unlock_irqrestore(&state_lock, flags);  		return;  	} -	state_table[devid] = NULL; +	list_del(&dev_state->list);  	spin_unlock_irqrestore(&state_lock, flags); @@ -872,7 +861,7 @@ int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,  	spin_lock_irqsave(&state_lock, flags);  	ret = -EINVAL; -	dev_state = state_table[devid]; +	dev_state = __get_device_state(devid);  	if (dev_state == NULL)  		goto out_unlock; @@ -903,7 +892,7 @@ int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev,  	spin_lock_irqsave(&state_lock, flags);  	ret = -EINVAL; -	dev_state = state_table[devid]; +	dev_state = __get_device_state(devid);  	if (dev_state == NULL)  		goto out_unlock; @@ -920,7 +909,6 @@ EXPORT_SYMBOL(amd_iommu_set_invalidate_ctx_cb);  static int __init amd_iommu_v2_init(void)  { -	size_t state_table_size;  	int ret;  	pr_info("AMD IOMMUv2 driver by Joerg Roedel <joerg.roedel@amd.com>\n"); @@ -936,16 +924,10 @@ static int __init amd_iommu_v2_init(void)  	spin_lock_init(&state_lock); -	state_table_size = MAX_DEVICES * sizeof(struct device_state *); -	state_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, -					       get_order(state_table_size)); -	if (state_table == NULL) -		return -ENOMEM; -  	ret = -ENOMEM;  	iommu_wq = create_workqueue("amd_iommu_v2");  	if (iommu_wq == NULL) -		goto out_free; +		goto out;  	ret = -ENOMEM;  	empty_page_table = (u64 *)get_zeroed_page(GFP_KERNEL); @@ -953,29 +935,24 @@ static int __init amd_iommu_v2_init(void)  		goto out_destroy_wq;  	amd_iommu_register_ppr_notifier(&ppr_nb); -	profile_event_register(PROFILE_TASK_EXIT, &profile_nb);  	return 0;  out_destroy_wq:  	destroy_workqueue(iommu_wq); -out_free: -	free_pages((unsigned long)state_table, get_order(state_table_size)); - +out:  	return ret;  }  static void __exit amd_iommu_v2_exit(void)  {  	struct device_state *dev_state; -	size_t state_table_size;  	int i;  	if (!amd_iommu_v2_supported())  		return; -	profile_event_unregister(PROFILE_TASK_EXIT, &profile_nb);  	amd_iommu_unregister_ppr_notifier(&ppr_nb);  	flush_workqueue(iommu_wq); @@ -998,9 +975,6 @@ static void __exit amd_iommu_v2_exit(void)  	destroy_workqueue(iommu_wq); -	state_table_size = MAX_DEVICES * sizeof(struct device_state *); -	free_pages((unsigned long)state_table, get_order(state_table_size)); -  	free_page((unsigned long)empty_page_table);  }  | 
