diff options
-rw-r--r-- | Documentation/feature-removal-schedule.txt | 27 | ||||
-rw-r--r-- | Documentation/filesystems/Locking | 2 | ||||
-rw-r--r-- | fs/gfs2/ops_address.c | 2 | ||||
-rw-r--r-- | fs/gfs2/ops_file.c | 2 | ||||
-rw-r--r-- | fs/gfs2/ops_vm.c | 36 | ||||
-rw-r--r-- | fs/ncpfs/mmap.c | 23 | ||||
-rw-r--r-- | fs/ocfs2/aops.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/mmap.c | 17 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_file.c | 23 | ||||
-rw-r--r-- | include/linux/mm.h | 41 | ||||
-rw-r--r-- | ipc/shm.c | 9 | ||||
-rw-r--r-- | mm/filemap.c | 94 | ||||
-rw-r--r-- | mm/filemap_xip.c | 54 | ||||
-rw-r--r-- | mm/fremap.c | 103 | ||||
-rw-r--r-- | mm/memory.c | 132 | ||||
-rw-r--r-- | mm/mmap.c | 8 | ||||
-rw-r--r-- | mm/nommu.c | 3 | ||||
-rw-r--r-- | mm/rmap.c | 4 | ||||
-rw-r--r-- | mm/shmem.c | 82 | ||||
-rw-r--r-- | mm/truncate.c | 2 |
20 files changed, 394 insertions, 272 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 66c8b4b165c..716568afdff 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -135,6 +135,33 @@ Who: Greg Kroah-Hartman <gregkh@suse.de> --------------------------- +What: filemap_nopage, filemap_populate +When: April 2007 +Why: These legacy interfaces no longer have any callers in the kernel and + any functionality provided can be provided with filemap_fault. The + removal schedule is short because they are a big maintainence burden + and have some bugs. +Who: Nick Piggin <npiggin@suse.de> + +--------------------------- + +What: vm_ops.populate, install_page +When: April 2007 +Why: These legacy interfaces no longer have any callers in the kernel and + any functionality provided can be provided with vm_ops.fault. +Who: Nick Piggin <npiggin@suse.de> + +--------------------------- + +What: vm_ops.nopage +When: February 2008, provided in-kernel callers have been converted +Why: This interface is replaced by vm_ops.fault, but it has been around + forever, is used by a lot of drivers, and doesn't cost much to + maintain. +Who: Nick Piggin <npiggin@suse.de> + +--------------------------- + What: Interrupt only SA_* flags When: September 2007 Why: The interrupt related SA_* flags are replaced by IRQF_* to move them diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index d866551be03..970c8ec1a05 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -510,12 +510,14 @@ More details about quota locking can be found in fs/dquot.c. prototypes: void (*open)(struct vm_area_struct*); void (*close)(struct vm_area_struct*); + struct page *(*fault)(struct vm_area_struct*, struct fault_data *); struct page *(*nopage)(struct vm_area_struct*, unsigned long, int *); locking rules: BKL mmap_sem open: no yes close: no yes +fault: no yes nopage: no yes ================================================================================ diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 26c888890c2..ce90032c010 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -251,7 +251,7 @@ static int gfs2_readpage(struct file *file, struct page *page) if (file) { gf = file->private_data; if (test_bit(GFF_EXLOCK, &gf->f_flags)) - /* gfs2_sharewrite_nopage has grabbed the ip->i_gl already */ + /* gfs2_sharewrite_fault has grabbed the ip->i_gl already */ goto skip_lock; } gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh); diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index bad0b24cb77..581ac11b265 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -364,7 +364,7 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) else vma->vm_ops = &gfs2_vm_ops_private; - vma->vm_flags |= VM_CAN_INVALIDATE; + vma->vm_flags |= VM_CAN_INVALIDATE|VM_CAN_NONLINEAR; gfs2_glock_dq_uninit(&i_gh); diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c index d5a98cbfebd..e9fe6eb74e7 100644 --- a/fs/gfs2/ops_vm.c +++ b/fs/gfs2/ops_vm.c @@ -27,13 +27,13 @@ #include "trans.h" #include "util.h" -static struct page *gfs2_private_nopage(struct vm_area_struct *area, - unsigned long address, int *type) +static struct page *gfs2_private_fault(struct vm_area_struct *vma, + struct fault_data *fdata) { - struct gfs2_inode *ip = GFS2_I(area->vm_file->f_mapping->host); + struct gfs2_inode *ip = GFS2_I(vma->vm_file->f_mapping->host); set_bit(GIF_PAGED, &ip->i_flags); - return filemap_nopage(area, address, type); + return filemap_fault(vma, fdata); } static int alloc_page_backing(struct gfs2_inode *ip, struct page *page) @@ -104,16 +104,14 @@ out: return error; } -static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area, - unsigned long address, int *type) +static struct page *gfs2_sharewrite_fault(struct vm_area_struct *vma, + struct fault_data *fdata) { - struct file *file = area->vm_file; + struct file *file = vma->vm_file; struct gfs2_file *gf = file->private_data; struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); struct gfs2_holder i_gh; struct page *result = NULL; - unsigned long index = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + - area->vm_pgoff; int alloc_required; int error; @@ -124,23 +122,27 @@ static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area, set_bit(GIF_PAGED, &ip->i_flags); set_bit(GIF_SW_PAGED, &ip->i_flags); - error = gfs2_write_alloc_required(ip, (u64)index << PAGE_CACHE_SHIFT, - PAGE_CACHE_SIZE, &alloc_required); - if (error) + error = gfs2_write_alloc_required(ip, + (u64)fdata->pgoff << PAGE_CACHE_SHIFT, + PAGE_CACHE_SIZE, &alloc_required); + if (error) { + fdata->type = VM_FAULT_OOM; /* XXX: are these right? */ goto out; + } set_bit(GFF_EXLOCK, &gf->f_flags); - result = filemap_nopage(area, address, type); + result = filemap_fault(vma, fdata); clear_bit(GFF_EXLOCK, &gf->f_flags); - if (!result || result == NOPAGE_OOM) + if (!result) goto out; if (alloc_required) { error = alloc_page_backing(ip, result); if (error) { - if (area->vm_flags & VM_CAN_INVALIDATE) + if (vma->vm_flags & VM_CAN_INVALIDATE) unlock_page(result); page_cache_release(result); + fdata->type = VM_FAULT_OOM; result = NULL; goto out; } @@ -154,10 +156,10 @@ out: } struct vm_operations_struct gfs2_vm_ops_private = { - .nopage = gfs2_private_nopage, + .fault = gfs2_private_fault, }; struct vm_operations_struct gfs2_vm_ops_sharewrite = { - .nopage = gfs2_sharewrite_nopage, + .fault = gfs2_sharewrite_fault, }; diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c index 5416673418b..af48b792ca0 100644 --- a/fs/ncpfs/mmap.c +++ b/fs/ncpfs/mmap.c @@ -25,8 +25,8 @@ /* * Fill in the supplied page for mmap */ -static struct page* ncp_file_mmap_nopage(struct vm_area_struct *area, - unsigned long address, int *type) +static struct page* ncp_file_mmap_fault(struct vm_area_struct *area, + struct fault_data *fdata) { struct file *file = area->vm_file; struct dentry *dentry = file->f_path.dentry; @@ -40,15 +40,17 @@ static struct page* ncp_file_mmap_nopage(struct vm_area_struct *area, page = alloc_page(GFP_HIGHUSER); /* ncpfs has nothing against high pages as long as recvmsg and memset works on it */ - if (!page) - return page; + if (!page) { + fdata->type = VM_FAULT_OOM; + return NULL; + } pg_addr = kmap(page); - address &= PAGE_MASK; - pos = address - area->vm_start + (area->vm_pgoff << PAGE_SHIFT); + pos = fdata->pgoff << PAGE_SHIFT; count = PAGE_SIZE; - if (address + PAGE_SIZE > area->vm_end) { - count = area->vm_end - address; + if (fdata->address + PAGE_SIZE > area->vm_end) { + WARN_ON(1); /* shouldn't happen? */ + count = area->vm_end - fdata->address; } /* what we can read in one go */ bufsize = NCP_SERVER(inode)->buffer_size; @@ -91,15 +93,14 @@ static struct page* ncp_file_mmap_nopage(struct vm_area_struct *area, * fetches from the network, here the analogue of disk. * -- wli */ - if (type) - *type = VM_FAULT_MAJOR; + fdata->type = VM_FAULT_MAJOR; count_vm_event(PGMAJFAULT); return page; } static struct vm_operations_struct ncp_file_mmap = { - .nopage = ncp_file_mmap_nopage, + .fault = ncp_file_mmap_fault, }; diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 84bf6e79de2..460d440310f 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -232,7 +232,7 @@ static int ocfs2_readpage(struct file *file, struct page *page) * might now be discovering a truncate that hit on another node. * block_read_full_page->get_block freaks out if it is asked to read * beyond the end of a file, so we check here. Callers - * (generic_file_read, fault->nopage) are clever enough to check i_size + * (generic_file_read, vm_ops->fault) are clever enough to check i_size * and notice that the page they just read isn't needed. * * XXX sys_readahead() seems to get that wrong? diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 904f39ff534..cd75508b1c8 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -60,24 +60,23 @@ static inline int ocfs2_vm_op_unblock_sigs(sigset_t *oldset) return sigprocmask(SIG_SETMASK, oldset, NULL); } -static struct page *ocfs2_nopage(struct vm_area_struct * area, - unsigned long address, - int *type) +static struct page *ocfs2_fault(struct vm_area_struct *area, + struct fault_data *fdata) { - struct page *page = NOPAGE_SIGBUS; + struct page *page = NULL; sigset_t blocked, oldset; int ret; - mlog_entry("(area=%p, address=%lu, type=%p)\n", area, address, - type); + mlog_entry("(area=%p, page offset=%lu)\n", area, fdata->pgoff); ret = ocfs2_vm_op_block_sigs(&blocked, &oldset); if (ret < 0) { + fdata->type = VM_FAULT_SIGBUS; mlog_errno(ret); goto out; } - page = filemap_nopage(area, address, type); + page = filemap_fault(area, fdata); ret = ocfs2_vm_op_unblock_sigs(&oldset); if (ret < 0) @@ -209,7 +208,7 @@ out: } static struct vm_operations_struct ocfs2_file_vm_ops = { - .nopage = ocfs2_nopage, + .fault = ocfs2_fault, .page_mkwrite = ocfs2_page_mkwrite, }; @@ -226,7 +225,7 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) ocfs2_meta_unlock(file->f_dentry->d_inode, lock_level); out: vma->vm_ops = &ocfs2_file_vm_ops; - vma->vm_flags |= VM_CAN_INVALIDATE; + vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR; return 0; } diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 92b2f225712..f12e80a69c6 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -213,18 +213,19 @@ xfs_file_fsync( #ifdef CONFIG_XFS_DMAPI STATIC struct page * -xfs_vm_nopage( - struct vm_area_struct *area, - unsigned long address, - int *type) +xfs_vm_fault( + struct vm_area_struct *vma, + struct fault_data *fdata) { - struct inode *inode = area->vm_file->f_path.dentry->d_inode; + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; bhv_vnode_t *vp = vn_from_inode(inode); ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI); - if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), area, 0)) + if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), vma, 0)) { + fdata->type = VM_FAULT_SIGBUS; return NULL; - return filemap_nopage(area, address, type); + } + return filemap_fault(vma, fdata); } #endif /* CONFIG_XFS_DMAPI */ @@ -310,7 +311,7 @@ xfs_file_mmap( struct vm_area_struct *vma) { vma->vm_ops = &xfs_file_vm_ops; - vma->vm_flags |= VM_CAN_INVALIDATE; + vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR; #ifdef CONFIG_XFS_DMAPI if (vn_from_inode(filp->f_path.dentry->d_inode)->v_vfsp->vfs_flag & VFS_DMI) @@ -465,14 +466,12 @@ const struct file_operations xfs_dir_file_operations = { }; static struct vm_operations_struct xfs_file_vm_ops = { - .nopage = filemap_nopage, - .populate = filemap_populate, + .fault = filemap_fault, }; #ifdef CONFIG_XFS_DMAPI static struct vm_operations_struct xfs_dmapi_file_vm_ops = { - .nopage = xfs_vm_nopage, - .populate = filemap_populate, + .fault = xfs_vm_fault, #ifdef HAVE_VMOP_MPROTECT .mprotect = xfs_vm_mprotect, #endif diff --git a/include/linux/mm.h b/include/linux/mm.h index ca9536a348c..f28a1b3e63a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -173,6 +173,7 @@ extern unsigned int kobjsize(const void *objp); * In this case, do_no_page must * return with the page locked. */ +#define VM_CAN_NONLINEAR 0x10000000 /* Has ->fault & does nonlinear pages */ #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS @@ -196,6 +197,25 @@ extern unsigned int kobjsize(const void *objp); */ extern pgprot_t protection_map[16]; +#define FAULT_FLAG_WRITE 0x01 +#define FAULT_FLAG_NONLINEAR 0x02 + +/* + * fault_data is filled in the the pagefault handler and passed to the + * vma's ->fault function. That function is responsible for filling in + * 'type', which is the type of fault if a page is returned, or the type + * of error if NULL is returned. + * + * pgoff should be used in favour of address, if possible. If pgoff is + * used, one may set VM_CAN_NONLINEAR in the vma->vm_flags to get + * nonlinear mapping support. + */ +struct fault_data { + unsigned long address; + pgoff_t pgoff; + unsigned int flags; + int type; +}; /* * These are the virtual MM functions - opening of an area, closing and @@ -205,9 +225,15 @@ extern pgprot_t protection_map[16]; struct vm_operations_struct { void (*open)(struct vm_area_struct * area); void (*close)(struct vm_area_struct * area); - struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int *type); - unsigned long (*nopfn)(struct vm_area_struct * area, unsigned long address); - int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock); + struct page *(*fault)(struct vm_area_struct *vma, + struct fault_data *fdata); + struct page *(*nopage)(struct vm_area_struct *area, + unsigned long address, int *type); + unsigned long (*nopfn)(struct vm_area_struct *area, + unsigned long address); + int (*populate)(struct vm_area_struct *area, unsigned long address, + unsigned long len, pgprot_t prot, unsigned long pgoff, + int nonblock); /* notification that a previously read-only page is about to become * writable, if an error is returned it will cause a SIGBUS */ @@ -661,7 +687,6 @@ static inline int page_mapped(struct page *page) */ #define NOPAGE_SIGBUS (NULL) #define NOPAGE_OOM ((struct page *) (-1)) -#define NOPAGE_REFAULT ((struct page *) (-2)) /* Return to userspace, rerun */ /* * Error return values for the *_nopfn functions @@ -1110,9 +1135,11 @@ extern void truncate_inode_pages_range(struct address_space *, loff_t lstart, loff_t lend); /* generic vm_area_ops exported for stackable file systems */ -extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int *); -extern int filemap_populate(struct vm_area_struct *, unsigned long, - unsigned long, pgprot_t, unsigned long, int); +extern struct page *filemap_fault(struct vm_area_struct *, struct fault_data *); +extern struct page * __deprecated_for_modules +filemap_nopage(struct vm_area_struct *, unsigned long, int *); +extern int __deprecated_for_modules filemap_populate(struct vm_area_struct *, + unsigned long, unsigned long, pgprot_t, unsigned long, int); /* mm/page-writeback.c */ int write_one_page(struct page *page, int wait); diff --git a/ipc/shm.c b/ipc/shm.c index 242c3f66493..e2d090348b1 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -224,13 +224,13 @@ static void shm_close(struct vm_area_struct *vma) mutex_unlock(&shm_ids(ns).mutex); } -static struct page *shm_nopage(struct vm_area_struct *vma, - unsigned long address, int *type) +static struct page *shm_fault(struct vm_area_struct *vma, + struct fault_data *fdata) { struct file *file = vma->vm_file; struct shm_file_data *sfd = shm_file_data(file); - return sfd->vm_ops->nopage(vma, address, type); + return sfd->vm_ops->fault(vma, fdata); } #ifdef CONFIG_NUMA @@ -269,6 +269,7 @@ static int shm_mmap(struct file * file, struct vm_area_struct * vma) if (ret != 0) return ret; sfd->vm_ops = vma->vm_ops; + BUG_ON(!sfd->vm_ops->fault); vma->vm_ops = &shm_vm_ops; shm_open(vma); @@ -327,7 +328,7 @@ static const struct file_operations shm_file_operations = { static struct vm_operations_struct shm_vm_ops = { .open = shm_open, /* callback for a new vm-area open */ .close = shm_close, /* callback for when the vm-area is released */ - .nopage = shm_nopage, + .fault = shm_fault, #if defined(CONFIG_NUMA) .set_policy = shm_set_policy, .get_policy = shm_get_policy, diff --git a/mm/filemap.c b/mm/filemap.c index 462cda58a18..26b992d169e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1301,40 +1301,38 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset) #define MMAP_LOTSAMISS (100) /** - * filemap_nopage - read in file data for page fault handling - * @area: the applicable vm_area - * @address: target address to read in - * @type: returned with VM_FAULT_{MINOR,MAJOR} if not %NULL + * filemap_fault - read in file data for page fault handling + * @vma: user vma (not used) + * @fdata: the applicable fault_data * - * filemap_nopage() is invoked via the vma operations vector for a + * filemap_fault() is invoked via the vma operations vector for a * mapped memory region to read in file data during a page fault. * * The goto's are kind of ugly, but this streamlines the normal case of having * it in the page cache, and handles the special cases reasonably without * having a lot of duplicated code. */ -struct page *filemap_nopage(struct vm_area_struct *area, - unsigned long address, int *type) +struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata) { int error; - struct file *file = area->vm_file; + struct file *file = vma->vm_file; struct address_space *mapping = file->f_mapping; struct file_ra_state *ra = &file->f_ra; struct inode *inode = mapping->host; struct page *page; - unsigned long size, pgoff; - int did_readaround = 0, majmin = VM_FAULT_MINOR; + unsigned long size; + int did_readaround = 0; - BUG_ON(!(area->vm_flags & VM_CAN_INVALIDATE)); + fdata->type = VM_FAULT_MINOR; - pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff; + BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE)); size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (pgoff >= size) + if (fdata->pgoff >= size) goto outside_data_content; /* If we don't want any read-ahead, don't bother */ - if (VM_RandomReadHint(area)) + if (VM_RandomReadHint(vma)) goto no_cached_page; /* @@ -1343,19 +1341,19 @@ struct page *filemap_nopage(struct vm_area_struct *area, * * For sequential accesses, we use the generic readahead logic. */ - if (VM_SequentialReadHint(area)) - page_cache_readahead(mapping, ra, file, pgoff, 1); + if (VM_SequentialReadHint(vma)) + page_cache_readahead(mapping, ra, file, fdata->pgoff, 1); /* * Do we have something in the page cache already? */ retry_find: - page = find_lock_page(mapping, pgoff); + page = find_lock_page(mapping, fdata->pgoff); if (!page) { unsigned long ra_pages; - if (VM_SequentialReadHint(area)) { - handle_ra_miss(mapping, ra, pgoff); + if (VM_SequentialReadHint(vma)) { + handle_ra_miss(mapping, ra, fdata->pgoff); goto no_cached_page; } ra->mmap_miss++; @@ -1372,7 +1370,7 @@ retry_find: * check did_readaround, as this is an inner loop. */ if (!did_readaround) { - majmin = VM_FAULT_MAJOR; + fdata->type = VM_FAULT_MAJOR; count_vm_event(PGMAJFAULT); } did_readaround = 1; @@ -1380,11 +1378,11 @@ retry_find: if (ra_pages) { pgoff_t start = 0; - if (pgoff > ra_pages / 2) - start = pgoff - ra_pages / 2; + if (fdata->pgoff > ra_pages / 2) + start = fdata->pgoff - ra_pages / 2; do_page_cache_readahead(mapping, file, start, ra_pages); } - page = find_lock_page(mapping, pgoff); + page = find_lock_page(mapping, fdata->pgoff); if (!page) goto no_cached_page; } @@ -1401,7 +1399,7 @@ retry_find: /* Must recheck i_size under page lock */ size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (unlikely(pgoff >= size)) { + if (unlikely(fdata->pgoff >= size)) { unlock_page(page); goto outside_data_content; } @@ -1410,8 +1408,6 @@ retry_find: * Found the page and have a reference on it. */ mark_page_accessed(page); - if (type) - *type = majmin; return page; outside_data_content: @@ -1419,15 +1415,17 @@ outside_data_content: * An external ptracer can access pages that normally aren't * accessible.. */ - if (area->vm_mm == current->mm) - return NOPAGE_SIGBUS; + if (vma->vm_mm == current->mm) { + fdata->type = VM_FAULT_SIGBUS; + return NULL; + } /* Fall through to the non-read-ahead case */ no_cached_page: /* * We're only likely to ever get here if MADV_RANDOM is in * effect. */ - error = page_cache_read(file, pgoff); + error = page_cache_read(file, fdata->pgoff); /* * The page we want has now been added to the page cache. @@ -1443,13 +1441,15 @@ no_cached_page: * to schedule I/O. */ if (error == -ENOMEM) - return NOPAGE_OOM; - return NOPAGE_SIGBUS; + fdata->type = VM_FAULT_OOM; + else + fdata->type = VM_FAULT_SIGBUS; + return NULL; page_not_uptodate: /* IO error path */ if (!did_readaround) { - majmin = VM_FAULT_MAJOR; + fdata->type = VM_FAULT_MAJOR; count_vm_event(PGMAJFAULT); } @@ -1468,7 +1468,30 @@ page_not_uptodate: /* Things didn't work out. Return zero to tell the mm layer so. */ shrink_readahead_size_eio(file, ra); - return NOPAGE_SIGBUS; + fdata->type = VM_FAULT_SIGBUS; + return NULL; +} +EXPORT_SYMBOL(filemap_fault); + +/* + * filemap_nopage and filemap_populate are legacy exports that are not used + * in tree. Scheduled for removal. + */ +struct page *filemap_nopage(struct vm_area_struct *area, + unsigned long address, int *type) +{ + struct page *page; + struct fault_data fdata; + fdata.address = address; + fdata.pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + + area->vm_pgoff; + fdata.flags = 0; + + page = filemap_fault(area, &fdata); + if (type) + *type = fdata.type; + + return page; } EXPORT_SYMBOL(filemap_nopage); @@ -1646,8 +1669,7 @@ repeat: EXPORT_SYMBOL(filemap_populate); struct vm_operations_struct generic_file_vm_ops = { - .nopage = filemap_nopage, - .populate = filemap_populate, + .fault = filemap_fault, }; /* This is used for a general mmap of a disk file */ @@ -1660,7 +1682,7 @@ int generic_file_mmap(struct file * file, struct vm_area_struct * vma) return -ENOEXEC; file_accessed(file); vma->vm_ops = &generic_file_vm_ops; - vma->vm_flags |= VM_CAN_INVALIDATE; + vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR; return 0; } diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 65ffc321f0c..82f4b8e9834 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -205,62 +205,67 @@ __xip_unmap (struct address_space * mapping, } /* - * xip_nopage() is invoked via the vma operations vector for a + * xip_fault() is invoked via the vma operations vector for a * mapped memory region to read in file data during a page fault. * - * This function is derived from filemap_nopage, but used for execute in place + * This function is derived from filemap_fault, but used for execute in place */ -static struct page * -xip_file_nopage(struct vm_area_struct * area, - unsigned long address, - int *type) +static struct page *xip_file_fault(struct vm_area_struct *area, + struct fault_data *fdata) { struct file *file = area->vm_file; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; struct page *page; - unsigned long size, pgoff, endoff; + pgoff_t size; - pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) - + area->vm_pgoff; - endoff = ((area->vm_end - area->vm_start) >> PAGE_CACHE_SHIFT) - + area->vm_pgoff; + /* XXX: are VM_FAULT_ codes OK? */ size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (pgoff >= size) - return NOPAGE_SIGBUS; + if (fdata->pgoff >= size) { + fdata->type = VM_FAULT_SIGBUS; + return NULL; + } - page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0); + page = mapping->a_ops->get_xip_page(mapping, + fdata->pgoff*(PAGE_SIZE/512), 0); if (!IS_ERR(page)) goto out; - if (PTR_ERR(page) != -ENODATA) - return NOPAGE_SIGBUS; + if (PTR_ERR(page) != -ENODATA) { + fdata->type = VM_FAULT_OOM; + return NULL; + } /* sparse block */ if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) && (area->vm_flags & (VM_SHARED| VM_MAYSHARE)) && (!(mapping->host->i_sb->s_flags & MS_RDONLY))) { /* maybe shared writable, allocate new block */ - page = mapping->a_ops->get_xip_page (mapping, - pgoff*(PAGE_SIZE/512), 1); - if (IS_ERR(page)) - return NOPAGE_SIGBUS; + page = mapping->a_ops->get_xip_page(mapping, + fdata->pgoff*(PAGE_SIZE/512), 1); + if (IS_ERR(page)) { + fdata->type = VM_FAULT_SIGBUS; + return NULL; + } /* unmap page at pgoff from all other vmas */ - __xip_unmap(mapping, pgoff); + __xip_unmap(mapping, fdata->pgoff); } else { /* not shared and writable, use xip_sparse_page() */ page = xip_sparse_page(); - if (!page) - return NOPAGE_OOM; + if (!page) { + fdata->type = VM_FAULT_OOM; + return NULL; + } } out: + fdata->type = VM_FAULT_MINOR; page_cache_get(page); return page; } static struct vm_operations_struct xip_file_vm_ops = { - .nopage = xip_file_nopage, + .fault = xip_file_fault, }; int xip_file_mmap(struct file * file, struct vm_area_struct * vma) @@ -269,6 +274,7 @@ int xip_file_mmap(struct file * file, struct vm_area_struct * vma) file_accessed(file); vma->vm_ops = &xip_file_vm_ops; + vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } EXPORT_SYMBOL_GPL(xip_file_mmap); diff --git a/mm/fremap.c b/mm/fremap.c index 4e3f53dd5fd..01e51f01b84 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -126,6 +126,25 @@ out: return err; } +static int populate_range(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, unsigned long size, pgoff_t pgoff) +{ + int err; + + do { + err = install_file_pte(mm, vma, addr, pgoff, vma->vm_page_prot); + if (err) + return err; + + size -= PAGE_SIZE; + addr += PAGE_SIZE; + pgoff++; + } while (size); + + return 0; + +} + /*** * sys_remap_file_pages - remap arbitrary pages of a shared backing store * file within an existing vma. @@ -183,41 +202,63 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, * the single existing vma. vm_private_data is used as a * swapout cursor in a VM_NONLINEAR vma. */ - if (vma && (vma->vm_flags & VM_SHARED) && - (!vma->vm_private_data || (vma->vm_flags & VM_NONLINEAR)) && - vma->vm_ops && vma->vm_ops->populate && - end > start && start >= vma->vm_start && - end <= vma->vm_end) { - - /* Must set VM_NONLINEAR before any pages are populated. */ - if (pgoff != linear_page_index(vma, start) && - !(vma->vm_flags & VM_NONLINEAR)) { - if (!has_write_lock) { - up_read(&mm->mmap_sem); - down_write(&mm->mmap_sem); - has_write_lock = 1; - goto retry; + if (!vma || !(vma->vm_flags & VM_SHARED)) + goto out; + + if (vma->vm_private_data && !(vma->vm_flags & VM_NONLINEAR)) + goto out; + + if ((!vma->vm_ops || !vma->vm_ops->populate) && + !(vma->vm_flags & VM_CAN_NONLINEAR)) + goto out; + + if (end <= start || start < vma->vm_start || end > vma->vm_end) + goto out; + + /* Must set VM_NONLINEAR before any pages are populated. */ + if (!(vma->vm_flags & VM_NONLINEAR)) { + /* Don't need a nonlinear mapping, exit success */ + if (pgoff == linear_page_index(vma, start)) { + err = 0; + goto out; + } + + if (!has_write_lock) { + up_read(&mm->mmap_sem); + down_write(&mm->mmap_sem); + has_write_lock = 1; + goto retry; + } + mapping = vma->vm_file->f_mapping; + spin_lock(&mapping->i_mmap_lock); + flush_dcache_mmap_lock(mapping); + vma->vm_flags |= VM_NONLINEAR; + vma_prio_tree_remove(vma, &mapping->i_mmap); + vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear); + flush_dcache_mmap_unlock(mapping); + spin_unlock(&mapping->i_mmap_lock); + } + + if (vma->vm_flags & VM_CAN_NONLINEAR) { + err = populate_range(mm, vma, start, size, pgoff); + if (!err && !(flags & MAP_NONBLOCK)) { + if (unlikely(has_write_lock)) { + downgrade_write(&mm->mmap_sem); + has_write_lock = 0; } - mapping = vma->vm_file->f_mapping; - spin_lock(&mapping->i_mmap_lock); - flush_dcache_mmap_lock(mapping); - vma->vm_flags |= VM_NONLINEAR; - vma_prio_tree_remove(vma, &mapping->i_mmap); - vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear); - flush_dcache_mmap_unlock(mapping); - spin_unlock(&mapping->i_mmap_lock); + make_pages_present(start, start+size); } + } else + err = vma->vm_ops->populate(vma, start, size, vma->vm_page_prot, + pgoff, flags & MAP_NONBLOCK); - err = vma->vm_ops->populate(vma, start, size, - vma->vm_page_prot, - pgoff, flags & MAP_NONBLOCK); + /* + * We can't clear VM_NONLINEAR because we'd have to do + * it after ->populate completes, and that would prevent + * downgrading the lock. (Locks can't be upgraded). + */ - /* - * We can't clear VM_NONLINEAR because we'd have to do - * it after ->populate completes, and that would prevent - * downgrading the lock. (Locks can't be upgraded). - */ - } +out: if (likely(!has_write_lock)) up_read(&mm->mmap_sem); else diff --git a/mm/memory.c b/mm/memory.c index e6c99f6b564..eee7fec3ab5 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1047,7 +1047,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, if (pages) foll_flags |= FOLL_GET; if (!write && !(vma->vm_flags & VM_LOCKED) && - (!vma->vm_ops || !vma->vm_ops->nopage)) + (!vma->vm_ops || (!vma->vm_ops->nopage && + !vma->vm_ops-> |