diff options
Diffstat (limited to 'mm/shmem.c')
-rw-r--r-- | mm/shmem.c | 148 |
1 files changed, 83 insertions, 65 deletions
diff --git a/mm/shmem.c b/mm/shmem.c index 8fa27e4e582..ba4ad28b7db 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -852,7 +852,7 @@ static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page) { - struct inode *inode; + struct address_space *mapping; unsigned long idx; unsigned long size; unsigned long limit; @@ -875,8 +875,10 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s if (size > SHMEM_NR_DIRECT) size = SHMEM_NR_DIRECT; offset = shmem_find_swp(entry, ptr, ptr+size); - if (offset >= 0) + if (offset >= 0) { + shmem_swp_balance_unmap(); goto found; + } if (!info->i_indirect) goto lost2; @@ -917,6 +919,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s shmem_swp_unmap(ptr); if (offset >= 0) { shmem_dir_unmap(dir); + ptr = shmem_swp_map(subdir); goto found; } } @@ -928,8 +931,7 @@ lost2: return 0; found: idx += offset; - inode = igrab(&info->vfs_inode); - spin_unlock(&info->lock); + ptr += offset; /* * Move _head_ to start search for next from here. @@ -940,37 +942,18 @@ found: */ if (shmem_swaplist.next != &info->swaplist) list_move_tail(&shmem_swaplist, &info->swaplist); - mutex_unlock(&shmem_swaplist_mutex); - error = 1; - if (!inode) - goto out; /* - * Charge page using GFP_KERNEL while we can wait. - * Charged back to the user(not to caller) when swap account is used. - * add_to_page_cache() will be called with GFP_NOWAIT. + * We rely on shmem_swaplist_mutex, not only to protect the swaplist, + * but also to hold up shmem_evict_inode(): so inode cannot be freed + * beneath us (pagelock doesn't help until the page is in pagecache). */ - error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); - if (error) - goto out; - error = radix_tree_preload(GFP_KERNEL); - if (error) { - mem_cgroup_uncharge_cache_page(page); - goto out; - } - error = 1; - - spin_lock(&info->lock); - ptr = shmem_swp_entry(info, idx, NULL); - if (ptr && ptr->val == entry.val) { - error = add_to_page_cache_locked(page, inode->i_mapping, - idx, GFP_NOWAIT); - /* does mem_cgroup_uncharge_cache_page on error */ - } else /* we must compensate for our precharge above */ - mem_cgroup_uncharge_cache_page(page); + mapping = info->vfs_inode.i_mapping; + error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT); + /* which does mem_cgroup_uncharge_cache_page on error */ if (error == -EEXIST) { - struct page *filepage = find_get_page(inode->i_mapping, idx); + struct page *filepage = find_get_page(mapping, idx); error = 1; if (filepage) { /* @@ -990,14 +973,8 @@ found: swap_free(entry); error = 1; /* not an error, but entry was found */ } - if (ptr) - shmem_swp_unmap(ptr); + shmem_swp_unmap(ptr); spin_unlock(&info->lock); - radix_tree_preload_end(); -out: - unlock_page(page); - page_cache_release(page); - iput(inode); /* allows for NULL */ return error; } @@ -1009,6 +986,26 @@ int shmem_unuse(swp_entry_t entry, struct page *page) struct list_head *p, *next; struct shmem_inode_info *info; int found = 0; + int error; + + /* + * Charge page using GFP_KERNEL while we can wait, before taking + * the shmem_swaplist_mutex which might hold up shmem_writepage(). + * Charged back to the user (not to caller) when swap account is used. + * add_to_page_cache() will be called with GFP_NOWAIT. + */ + error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); + if (error) + goto out; + /* + * Try to preload while we can wait, to not make a habit of + * draining atomic reserves; but don't latch on to this cpu, + * it's okay if sometimes we get rescheduled after this. + */ + error = radix_tree_preload(GFP_KERNEL); + if (error) + goto uncharge; + radix_tree_preload_end(); mutex_lock(&shmem_swaplist_mutex); list_for_each_safe(p, next, &shmem_swaplist) { @@ -1016,17 +1013,19 @@ int shmem_unuse(swp_entry_t entry, struct page *page) found = shmem_unuse_inode(info, entry, page); cond_resched(); if (found) - goto out; + break; } mutex_unlock(&shmem_swaplist_mutex); - /* - * Can some race bring us here? We've been holding page lock, - * so I think not; but would rather try again later than BUG() - */ + +uncharge: + if (!found) + mem_cgroup_uncharge_cache_page(page); + if (found < 0) + error = found; +out: unlock_page(page); page_cache_release(page); -out: - return (found < 0) ? found : 0; + return error; } /* @@ -1064,7 +1063,25 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) else swap.val = 0; + /* + * Add inode to shmem_unuse()'s list of swapped-out inodes, + * if it's not already there. Do it now because we cannot take + * mutex while holding spinlock, and must do so before the page + * is moved to swap cache, when its pagelock no longer protects + * the inode from eviction. But don't unlock the mutex until + * we've taken the spinlock, because shmem_unuse_inode() will + * prune a !swapped inode from the swaplist under both locks. + */ + if (swap.val) { + mutex_lock(&shmem_swaplist_mutex); + if (list_empty(&info->swaplist)) + list_add_tail(&info->swaplist, &shmem_swaplist); + } + spin_lock(&info->lock); + if (swap.val) + mutex_unlock(&shmem_swaplist_mutex); + if (index >= info->next_index) { BUG_ON(!(info->flags & SHMEM_TRUNCATE)); goto unlock; @@ -1084,21 +1101,10 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) delete_from_page_cache(page); shmem_swp_set(info, entry, swap.val); shmem_swp_unmap(entry); - if (list_empty(&info->swaplist)) - inode = igrab(inode); - else - inode = NULL; spin_unlock(&info->lock); swap_shmem_alloc(swap); BUG_ON(page_mapped(page)); swap_writepage(page, wbc); - if (inode) { - mutex_lock(&shmem_swaplist_mutex); - /* move instead of add in case we're racing */ - list_move_tail(&info->swaplist, &shmem_swaplist); - mutex_unlock(&shmem_swaplist_mutex); - iput(inode); - } return 0; } @@ -1400,20 +1406,14 @@ repeat: if (sbinfo->max_blocks) { if (percpu_counter_compare(&sbinfo->used_blocks, sbinfo->max_blocks) >= 0 || - shmem_acct_block(info->flags)) { - spin_unlock(&info->lock); - error = -ENOSPC; - goto failed; - } + shmem_acct_block(info->flags)) + goto nospace; percpu_counter_inc(&sbinfo->used_blocks); spin_lock(&inode->i_lock); inode->i_blocks += BLOCKS_PER_PAGE; spin_unlock(&inode->i_lock); - } else if (shmem_acct_block(info->flags)) { - spin_unlock(&info->lock); - error = -ENOSPC; - goto failed; - } + } else if (shmem_acct_block(info->flags)) + goto nospace; if (!filepage) { int ret; @@ -1493,6 +1493,24 @@ done: error = 0; goto out; +nospace: + /* + * Perhaps the page was brought in from swap between find_lock_page + * and taking info->lock? We allow for that at add_to_page_cache_lru, + * but must also avoid reporting a spurious ENOSPC while working on a + * full tmpfs. (When filepage has been passed in to shmem_getpage, it + * is already in page cache, which prevents this race from occurring.) + */ + if (!filepage) { + struct page *page = find_get_page(mapping, idx); + if (page) { + spin_unlock(&info->lock); + page_cache_release(page); + goto repeat; + } + } + spin_unlock(&info->lock); + error = -ENOSPC; failed: if (*pagep != filepage) { unlock_page(filepage); |