diff options
Diffstat (limited to 'mm/truncate.c')
| -rw-r--r-- | mm/truncate.c | 163 | 
1 files changed, 139 insertions, 24 deletions
diff --git a/mm/truncate.c b/mm/truncate.c index 353b683afd6..eda24730716 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -22,6 +22,45 @@  #include <linux/cleancache.h>  #include "internal.h" +static void clear_exceptional_entry(struct address_space *mapping, +				    pgoff_t index, void *entry) +{ +	struct radix_tree_node *node; +	void **slot; + +	/* Handled by shmem itself */ +	if (shmem_mapping(mapping)) +		return; + +	spin_lock_irq(&mapping->tree_lock); +	/* +	 * Regular page slots are stabilized by the page lock even +	 * without the tree itself locked.  These unlocked entries +	 * need verification under the tree lock. +	 */ +	if (!__radix_tree_lookup(&mapping->page_tree, index, &node, &slot)) +		goto unlock; +	if (*slot != entry) +		goto unlock; +	radix_tree_replace_slot(slot, NULL); +	mapping->nrshadows--; +	if (!node) +		goto unlock; +	workingset_node_shadows_dec(node); +	/* +	 * Don't track node without shadow entries. +	 * +	 * Avoid acquiring the list_lru lock if already untracked. +	 * The list_empty() test is safe as node->private_list is +	 * protected by mapping->tree_lock. +	 */ +	if (!workingset_node_shadows(node) && +	    !list_empty(&node->private_list)) +		list_lru_del(&workingset_shadow_nodes, &node->private_list); +	__radix_tree_delete_node(&mapping->page_tree, node); +unlock: +	spin_unlock_irq(&mapping->tree_lock); +}  /**   * do_invalidatepage - invalidate part or all of a page @@ -208,11 +247,12 @@ void truncate_inode_pages_range(struct address_space *mapping,  	unsigned int	partial_start;	/* inclusive */  	unsigned int	partial_end;	/* exclusive */  	struct pagevec	pvec; +	pgoff_t		indices[PAGEVEC_SIZE];  	pgoff_t		index;  	int		i;  	cleancache_invalidate_inode(mapping); -	if (mapping->nrpages == 0) +	if (mapping->nrpages == 0 && mapping->nrshadows == 0)  		return;  	/* Offsets within partial pages */ @@ -238,17 +278,23 @@ void truncate_inode_pages_range(struct address_space *mapping,  	pagevec_init(&pvec, 0);  	index = start; -	while (index < end && pagevec_lookup(&pvec, mapping, index, -			min(end - index, (pgoff_t)PAGEVEC_SIZE))) { +	while (index < end && pagevec_lookup_entries(&pvec, mapping, index, +			min(end - index, (pgoff_t)PAGEVEC_SIZE), +			indices)) {  		mem_cgroup_uncharge_start();  		for (i = 0; i < pagevec_count(&pvec); i++) {  			struct page *page = pvec.pages[i];  			/* We rely upon deletion not changing page->index */ -			index = page->index; +			index = indices[i];  			if (index >= end)  				break; +			if (radix_tree_exceptional_entry(page)) { +				clear_exceptional_entry(mapping, index, page); +				continue; +			} +  			if (!trylock_page(page))  				continue;  			WARN_ON(page->index != index); @@ -259,6 +305,7 @@ void truncate_inode_pages_range(struct address_space *mapping,  			truncate_inode_page(mapping, page);  			unlock_page(page);  		} +		pagevec_remove_exceptionals(&pvec);  		pagevec_release(&pvec);  		mem_cgroup_uncharge_end();  		cond_resched(); @@ -307,14 +354,18 @@ void truncate_inode_pages_range(struct address_space *mapping,  	index = start;  	for ( ; ; ) {  		cond_resched(); -		if (!pagevec_lookup(&pvec, mapping, index, -			min(end - index, (pgoff_t)PAGEVEC_SIZE))) { +		if (!pagevec_lookup_entries(&pvec, mapping, index, +			min(end - index, (pgoff_t)PAGEVEC_SIZE), indices)) { +			/* If all gone from start onwards, we're done */  			if (index == start)  				break; +			/* Otherwise restart to make sure all gone */  			index = start;  			continue;  		} -		if (index == start && pvec.pages[0]->index >= end) { +		if (index == start && indices[0] >= end) { +			/* All gone out of hole to be punched, we're done */ +			pagevec_remove_exceptionals(&pvec);  			pagevec_release(&pvec);  			break;  		} @@ -323,9 +374,17 @@ void truncate_inode_pages_range(struct address_space *mapping,  			struct page *page = pvec.pages[i];  			/* We rely upon deletion not changing page->index */ -			index = page->index; -			if (index >= end) +			index = indices[i]; +			if (index >= end) { +				/* Restart punch to make sure all gone */ +				index = start - 1;  				break; +			} + +			if (radix_tree_exceptional_entry(page)) { +				clear_exceptional_entry(mapping, index, page); +				continue; +			}  			lock_page(page);  			WARN_ON(page->index != index); @@ -333,6 +392,7 @@ void truncate_inode_pages_range(struct address_space *mapping,  			truncate_inode_page(mapping, page);  			unlock_page(page);  		} +		pagevec_remove_exceptionals(&pvec);  		pagevec_release(&pvec);  		mem_cgroup_uncharge_end();  		index++; @@ -360,6 +420,53 @@ void truncate_inode_pages(struct address_space *mapping, loff_t lstart)  EXPORT_SYMBOL(truncate_inode_pages);  /** + * truncate_inode_pages_final - truncate *all* pages before inode dies + * @mapping: mapping to truncate + * + * Called under (and serialized by) inode->i_mutex. + * + * Filesystems have to use this in the .evict_inode path to inform the + * VM that this is the final truncate and the inode is going away. + */ +void truncate_inode_pages_final(struct address_space *mapping) +{ +	unsigned long nrshadows; +	unsigned long nrpages; + +	/* +	 * Page reclaim can not participate in regular inode lifetime +	 * management (can't call iput()) and thus can race with the +	 * inode teardown.  Tell it when the address space is exiting, +	 * so that it does not install eviction information after the +	 * final truncate has begun. +	 */ +	mapping_set_exiting(mapping); + +	/* +	 * When reclaim installs eviction entries, it increases +	 * nrshadows first, then decreases nrpages.  Make sure we see +	 * this in the right order or we might miss an entry. +	 */ +	nrpages = mapping->nrpages; +	smp_rmb(); +	nrshadows = mapping->nrshadows; + +	if (nrpages || nrshadows) { +		/* +		 * As truncation uses a lockless tree lookup, cycle +		 * the tree lock to make sure any ongoing tree +		 * modification that does not see AS_EXITING is +		 * completed before starting the final truncate. +		 */ +		spin_lock_irq(&mapping->tree_lock); +		spin_unlock_irq(&mapping->tree_lock); + +		truncate_inode_pages(mapping, 0); +	} +} +EXPORT_SYMBOL(truncate_inode_pages_final); + +/**   * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode   * @mapping: the address_space which holds the pages to invalidate   * @start: the offset 'from' which to invalidate @@ -375,32 +482,31 @@ EXPORT_SYMBOL(truncate_inode_pages);  unsigned long invalidate_mapping_pages(struct address_space *mapping,  		pgoff_t start, pgoff_t end)  { +	pgoff_t indices[PAGEVEC_SIZE];  	struct pagevec pvec;  	pgoff_t index = start;  	unsigned long ret;  	unsigned long count = 0;  	int i; -	/* -	 * Note: this function may get called on a shmem/tmpfs mapping: -	 * pagevec_lookup() might then return 0 prematurely (because it -	 * got a gangful of swap entries); but it's hardly worth worrying -	 * about - it can rarely have anything to free from such a mapping -	 * (most pages are dirty), and already skips over any difficulties. -	 */ -  	pagevec_init(&pvec, 0); -	while (index <= end && pagevec_lookup(&pvec, mapping, index, -			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { +	while (index <= end && pagevec_lookup_entries(&pvec, mapping, index, +			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, +			indices)) {  		mem_cgroup_uncharge_start();  		for (i = 0; i < pagevec_count(&pvec); i++) {  			struct page *page = pvec.pages[i];  			/* We rely upon deletion not changing page->index */ -			index = page->index; +			index = indices[i];  			if (index > end)  				break; +			if (radix_tree_exceptional_entry(page)) { +				clear_exceptional_entry(mapping, index, page); +				continue; +			} +  			if (!trylock_page(page))  				continue;  			WARN_ON(page->index != index); @@ -414,6 +520,7 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,  				deactivate_page(page);  			count += ret;  		} +		pagevec_remove_exceptionals(&pvec);  		pagevec_release(&pvec);  		mem_cgroup_uncharge_end();  		cond_resched(); @@ -444,7 +551,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)  		goto failed;  	BUG_ON(page_has_private(page)); -	__delete_from_page_cache(page); +	__delete_from_page_cache(page, NULL);  	spin_unlock_irq(&mapping->tree_lock);  	mem_cgroup_uncharge_cache_page(page); @@ -481,6 +588,7 @@ static int do_launder_page(struct address_space *mapping, struct page *page)  int invalidate_inode_pages2_range(struct address_space *mapping,  				  pgoff_t start, pgoff_t end)  { +	pgoff_t indices[PAGEVEC_SIZE];  	struct pagevec pvec;  	pgoff_t index;  	int i; @@ -491,17 +599,23 @@ int invalidate_inode_pages2_range(struct address_space *mapping,  	cleancache_invalidate_inode(mapping);  	pagevec_init(&pvec, 0);  	index = start; -	while (index <= end && pagevec_lookup(&pvec, mapping, index, -			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { +	while (index <= end && pagevec_lookup_entries(&pvec, mapping, index, +			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, +			indices)) {  		mem_cgroup_uncharge_start();  		for (i = 0; i < pagevec_count(&pvec); i++) {  			struct page *page = pvec.pages[i];  			/* We rely upon deletion not changing page->index */ -			index = page->index; +			index = indices[i];  			if (index > end)  				break; +			if (radix_tree_exceptional_entry(page)) { +				clear_exceptional_entry(mapping, index, page); +				continue; +			} +  			lock_page(page);  			WARN_ON(page->index != index);  			if (page->mapping != mapping) { @@ -539,6 +653,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,  				ret = ret2;  			unlock_page(page);  		} +		pagevec_remove_exceptionals(&pvec);  		pagevec_release(&pvec);  		mem_cgroup_uncharge_end();  		cond_resched();  | 
