diff options
Diffstat (limited to 'ipc/shm.c')
| -rw-r--r-- | ipc/shm.c | 1290 |
1 files changed, 875 insertions, 415 deletions
diff --git a/ipc/shm.c b/ipc/shm.c index dca90489e3b..89fc354156c 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -13,9 +13,17 @@ * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com> * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com> * + * support for audit of ipc object properties and permission changes + * Dustin Kirkland <dustin.kirkland@us.ibm.com> + * + * namespaces support + * OpenVZ, SWsoft Inc. + * Pavel Emelianov <xemul@openvz.org> + * + * Better ipc lock (kern_ipc_perm.lock) handling + * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013. */ -#include <linux/config.h> #include <linux/slab.h> #include <linux/mm.h> #include <linux/hugetlb.h> @@ -27,169 +35,469 @@ #include <linux/security.h> #include <linux/syscalls.h> #include <linux/audit.h> +#include <linux/capability.h> #include <linux/ptrace.h> #include <linux/seq_file.h> +#include <linux/rwsem.h> +#include <linux/nsproxy.h> +#include <linux/mount.h> +#include <linux/ipc_namespace.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include "util.h" -#define shm_flags shm_perm.mode +struct shm_file_data { + int id; + struct ipc_namespace *ns; + struct file *file; + const struct vm_operations_struct *vm_ops; +}; + +#define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data)) -static struct file_operations shm_file_operations; -static struct vm_operations_struct shm_vm_ops; +static const struct file_operations shm_file_operations; +static const struct vm_operations_struct shm_vm_ops; -static struct ipc_ids shm_ids; +#define shm_ids(ns) ((ns)->ids[IPC_SHM_IDS]) -#define shm_lock(id) ((struct shmid_kernel*)ipc_lock(&shm_ids,id)) -#define shm_unlock(shp) ipc_unlock(&(shp)->shm_perm) -#define shm_get(id) ((struct shmid_kernel*)ipc_get(&shm_ids,id)) -#define shm_buildid(id, seq) \ - ipc_buildid(&shm_ids, id, seq) +#define shm_unlock(shp) \ + ipc_unlock(&(shp)->shm_perm) -static int newseg (key_t key, int shmflg, size_t size); -static void shm_open (struct vm_area_struct *shmd); -static void shm_close (struct vm_area_struct *shmd); +static int newseg(struct ipc_namespace *, struct ipc_params *); +static void shm_open(struct vm_area_struct *vma); +static void shm_close(struct vm_area_struct *vma); +static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp); #ifdef CONFIG_PROC_FS static int sysvipc_shm_proc_show(struct seq_file *s, void *it); #endif -size_t shm_ctlmax = SHMMAX; -size_t shm_ctlall = SHMALL; -int shm_ctlmni = SHMMNI; +void shm_init_ns(struct ipc_namespace *ns) +{ + ns->shm_ctlmax = SHMMAX; + ns->shm_ctlall = SHMALL; + ns->shm_ctlmni = SHMMNI; + ns->shm_rmid_forced = 0; + ns->shm_tot = 0; + ipc_init_ids(&shm_ids(ns)); +} -static int shm_tot; /* total number of shared memory pages */ +/* + * Called with shm_ids.rwsem (writer) and the shp structure locked. + * Only shm_ids.rwsem remains locked on exit. + */ +static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) +{ + struct shmid_kernel *shp; + shp = container_of(ipcp, struct shmid_kernel, shm_perm); -void __init shm_init (void) + if (shp->shm_nattch) { + shp->shm_perm.mode |= SHM_DEST; + /* Do not find it any more */ + shp->shm_perm.key = IPC_PRIVATE; + shm_unlock(shp); + } else + shm_destroy(ns, shp); +} + +#ifdef CONFIG_IPC_NS +void shm_exit_ns(struct ipc_namespace *ns) { - ipc_init_ids(&shm_ids, 1); - ipc_init_proc_interface("sysvipc/shm", - " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime\n", - &shm_ids, - sysvipc_shm_proc_show); + free_ipcs(ns, &shm_ids(ns), do_shm_rmid); + idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr); } +#endif -static inline int shm_checkid(struct shmid_kernel *s, int id) +static int __init ipc_ns_init(void) { - if (ipc_checkid(&shm_ids,&s->shm_perm,id)) - return -EIDRM; + shm_init_ns(&init_ipc_ns); return 0; } -static inline struct shmid_kernel *shm_rmid(int id) +pure_initcall(ipc_ns_init); + +void __init shm_init(void) { - return (struct shmid_kernel *)ipc_rmid(&shm_ids,id); + ipc_init_proc_interface("sysvipc/shm", +#if BITS_PER_LONG <= 32 + " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n", +#else + " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n", +#endif + IPC_SHM_IDS, sysvipc_shm_proc_show); } -static inline int shm_addid(struct shmid_kernel *shp) +static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id) { - return ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni); + struct kern_ipc_perm *ipcp = ipc_obtain_object(&shm_ids(ns), id); + + if (IS_ERR(ipcp)) + return ERR_CAST(ipcp); + + return container_of(ipcp, struct shmid_kernel, shm_perm); } +static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id) +{ + struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id); + if (IS_ERR(ipcp)) + return ERR_CAST(ipcp); -static inline void shm_inc (int id) { - struct shmid_kernel *shp; + return container_of(ipcp, struct shmid_kernel, shm_perm); +} - if(!(shp = shm_lock(id))) - BUG(); - shp->shm_atim = get_seconds(); - shp->shm_lprid = current->tgid; - shp->shm_nattch++; - shm_unlock(shp); +/* + * shm_lock_(check_) routines are called in the paths where the rwsem + * is not necessarily held. + */ +static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) +{ + struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id); + + if (IS_ERR(ipcp)) + return (struct shmid_kernel *)ipcp; + + return container_of(ipcp, struct shmid_kernel, shm_perm); +} + +static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp) +{ + rcu_read_lock(); + ipc_lock_object(&ipcp->shm_perm); } +static void shm_rcu_free(struct rcu_head *head) +{ + struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); + struct shmid_kernel *shp = ipc_rcu_to_struct(p); + + security_shm_free(shp); + ipc_rcu_free(head); +} + +static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) +{ + ipc_rmid(&shm_ids(ns), &s->shm_perm); +} + + /* This is called by fork, once for every shm attach. */ -static void shm_open (struct vm_area_struct *shmd) +static void shm_open(struct vm_area_struct *vma) { - shm_inc (shmd->vm_file->f_dentry->d_inode->i_ino); + struct file *file = vma->vm_file; + struct shm_file_data *sfd = shm_file_data(file); + struct shmid_kernel *shp; + + shp = shm_lock(sfd->ns, sfd->id); + BUG_ON(IS_ERR(shp)); + shp->shm_atim = get_seconds(); + shp->shm_lprid = task_tgid_vnr(current); + shp->shm_nattch++; + shm_unlock(shp); } /* * shm_destroy - free the struct shmid_kernel * + * @ns: namespace * @shp: struct to free * - * It has to be called with shp and shm_ids.sem locked, + * It has to be called with shp and shm_ids.rwsem (writer) locked, * but returns with shp unlocked and freed. */ -static void shm_destroy (struct shmid_kernel *shp) +static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) { - shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; - shm_rmid (shp->id); + struct file *shm_file; + + shm_file = shp->shm_file; + shp->shm_file = NULL; + ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; + shm_rmid(ns, shp); shm_unlock(shp); - if (!is_file_hugepages(shp->shm_file)) - shmem_lock(shp->shm_file, 0, shp->mlock_user); - else - user_shm_unlock(shp->shm_file->f_dentry->d_inode->i_size, - shp->mlock_user); - fput (shp->shm_file); - security_shm_free(shp); - ipc_rcu_putref(shp); + if (!is_file_hugepages(shm_file)) + shmem_lock(shm_file, 0, shp->mlock_user); + else if (shp->mlock_user) + user_shm_unlock(file_inode(shm_file)->i_size, shp->mlock_user); + fput(shm_file); + ipc_rcu_putref(shp, shm_rcu_free); } /* - * remove the attach descriptor shmd. + * shm_may_destroy - identifies whether shm segment should be destroyed now + * + * Returns true if and only if there are no active users of the segment and + * one of the following is true: + * + * 1) shmctl(id, IPC_RMID, NULL) was called for this shp + * + * 2) sysctl kernel.shm_rmid_forced is set to 1. + */ +static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) +{ + return (shp->shm_nattch == 0) && + (ns->shm_rmid_forced || + (shp->shm_perm.mode & SHM_DEST)); +} + +/* + * remove the attach descriptor vma. * free memory for segment if it is marked destroyed. * The descriptor has already been removed from the current->mm->mmap list * and will later be kfree()d. */ -static void shm_close (struct vm_area_struct *shmd) +static void shm_close(struct vm_area_struct *vma) { - struct file * file = shmd->vm_file; - int id = file->f_dentry->d_inode->i_ino; + struct file *file = vma->vm_file; + struct shm_file_data *sfd = shm_file_data(file); struct shmid_kernel *shp; + struct ipc_namespace *ns = sfd->ns; - down (&shm_ids.sem); + down_write(&shm_ids(ns).rwsem); /* remove from the list of attaches of the shm segment */ - if(!(shp = shm_lock(id))) - BUG(); - shp->shm_lprid = current->tgid; + shp = shm_lock(ns, sfd->id); + BUG_ON(IS_ERR(shp)); + shp->shm_lprid = task_tgid_vnr(current); shp->shm_dtim = get_seconds(); shp->shm_nattch--; - if(shp->shm_nattch == 0 && - shp->shm_flags & SHM_DEST) - shm_destroy (shp); + if (shm_may_destroy(ns, shp)) + shm_destroy(ns, shp); else shm_unlock(shp); - up (&shm_ids.sem); + up_write(&shm_ids(ns).rwsem); +} + +/* Called with ns->shm_ids(ns).rwsem locked */ +static int shm_try_destroy_current(int id, void *p, void *data) +{ + struct ipc_namespace *ns = data; + struct kern_ipc_perm *ipcp = p; + struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm); + + if (shp->shm_creator != current) + return 0; + + /* + * Mark it as orphaned to destroy the segment when + * kernel.shm_rmid_forced is changed. + * It is noop if the following shm_may_destroy() returns true. + */ + shp->shm_creator = NULL; + + /* + * Don't even try to destroy it. If shm_rmid_forced=0 and IPC_RMID + * is not set, it shouldn't be deleted here. + */ + if (!ns->shm_rmid_forced) + return 0; + + if (shm_may_destroy(ns, shp)) { + shm_lock_by_ptr(shp); + shm_destroy(ns, shp); + } + return 0; +} + +/* Called with ns->shm_ids(ns).rwsem locked */ +static int shm_try_destroy_orphaned(int id, void *p, void *data) +{ + struct ipc_namespace *ns = data; + struct kern_ipc_perm *ipcp = p; + struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm); + + /* + * We want to destroy segments without users and with already + * exit'ed originating process. + * + * As shp->* are changed under rwsem, it's safe to skip shp locking. + */ + if (shp->shm_creator != NULL) + return 0; + + if (shm_may_destroy(ns, shp)) { + shm_lock_by_ptr(shp); + shm_destroy(ns, shp); + } + return 0; +} + +void shm_destroy_orphaned(struct ipc_namespace *ns) +{ + down_write(&shm_ids(ns).rwsem); + if (shm_ids(ns).in_use) + idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns); + up_write(&shm_ids(ns).rwsem); +} + + +void exit_shm(struct task_struct *task) +{ + struct ipc_namespace *ns = task->nsproxy->ipc_ns; + + if (shm_ids(ns).in_use == 0) + return; + + /* Destroy all already created segments, but not mapped yet */ + down_write(&shm_ids(ns).rwsem); + if (shm_ids(ns).in_use) + idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns); + up_write(&shm_ids(ns).rwsem); +} + +static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct file *file = vma->vm_file; + struct shm_file_data *sfd = shm_file_data(file); + + return sfd->vm_ops->fault(vma, vmf); +} + +#ifdef CONFIG_NUMA +static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new) +{ + struct file *file = vma->vm_file; + struct shm_file_data *sfd = shm_file_data(file); + int err = 0; + if (sfd->vm_ops->set_policy) + err = sfd->vm_ops->set_policy(vma, new); + return err; } -static int shm_mmap(struct file * file, struct vm_area_struct * vma) +static struct mempolicy *shm_get_policy(struct vm_area_struct *vma, + unsigned long addr) { - file_accessed(file); + struct file *file = vma->vm_file; + struct shm_file_data *sfd = shm_file_data(file); + struct mempolicy *pol = NULL; + + if (sfd->vm_ops->get_policy) + pol = sfd->vm_ops->get_policy(vma, addr); + else if (vma->vm_policy) + pol = vma->vm_policy; + + return pol; +} +#endif + +static int shm_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct shm_file_data *sfd = shm_file_data(file); + int ret; + + ret = sfd->file->f_op->mmap(sfd->file, vma); + if (ret != 0) + return ret; + sfd->vm_ops = vma->vm_ops; +#ifdef CONFIG_MMU + BUG_ON(!sfd->vm_ops->fault); +#endif vma->vm_ops = &shm_vm_ops; - shm_inc(file->f_dentry->d_inode->i_ino); + shm_open(vma); + + return ret; +} + +static int shm_release(struct inode *ino, struct file *file) +{ + struct shm_file_data *sfd = shm_file_data(file); + + put_ipc_ns(sfd->ns); + shm_file_data(file) = NULL; + kfree(sfd); return 0; } -static struct file_operations shm_file_operations = { - .mmap = shm_mmap +static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync) +{ + struct shm_file_data *sfd = shm_file_data(file); + + if (!sfd->file->f_op->fsync) + return -EINVAL; + return sfd->file->f_op->fsync(sfd->file, start, end, datasync); +} + +static long shm_fallocate(struct file *file, int mode, loff_t offset, + loff_t len) +{ + struct shm_file_data *sfd = shm_file_data(file); + + if (!sfd->file->f_op->fallocate) + return -EOPNOTSUPP; + return sfd->file->f_op->fallocate(file, mode, offset, len); +} + +static unsigned long shm_get_unmapped_area(struct file *file, + unsigned long addr, unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + struct shm_file_data *sfd = shm_file_data(file); + return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len, + pgoff, flags); +} + +static const struct file_operations shm_file_operations = { + .mmap = shm_mmap, + .fsync = shm_fsync, + .release = shm_release, +#ifndef CONFIG_MMU + .get_unmapped_area = shm_get_unmapped_area, +#endif + .llseek = noop_llseek, + .fallocate = shm_fallocate, +}; + +static const struct file_operations shm_file_operations_huge = { + .mmap = shm_mmap, + .fsync = shm_fsync, + .release = shm_release, + .get_unmapped_area = shm_get_unmapped_area, + .llseek = noop_llseek, + .fallocate = shm_fallocate, }; -static struct vm_operations_struct shm_vm_ops = { +int is_file_shm_hugepages(struct file *file) +{ + return file->f_op == &shm_file_operations_huge; +} + +static const struct vm_operations_struct shm_vm_ops = { .open = shm_open, /* callback for a new vm-area open */ .close = shm_close, /* callback for when the vm-area is released */ - .nopage = shmem_nopage, -#if defined(CONFIG_NUMA) && defined(CONFIG_SHMEM) - .set_policy = shmem_set_policy, - .get_policy = shmem_get_policy, + .fault = shm_fault, +#if defined(CONFIG_NUMA) + .set_policy = shm_set_policy, + .get_policy = shm_get_policy, #endif }; -static int newseg (key_t key, int shmflg, size_t size) +/** + * newseg - Create a new shared memory segment + * @ns: namespace + * @params: ptr to the structure that contains key, size and shmflg + * + * Called with shm_ids.rwsem held as a writer. + */ +static int newseg(struct ipc_namespace *ns, struct ipc_params *params) { + key_t key = params->key; + int shmflg = params->flg; + size_t size = params->u.size; int error; struct shmid_kernel *shp; - int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT; - struct file * file; + size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; + struct file *file; char name[13]; int id; + vm_flags_t acctflag = 0; - if (size < SHMMIN || size > shm_ctlmax) + if (size < SHMMIN || size > ns->shm_ctlmax) return -EINVAL; - if (shm_tot + numpages >= shm_ctlall) + if (numpages << PAGE_SHIFT < size) + return -ENOSPC; + + if (ns->shm_tot + numpages < ns->shm_tot || + ns->shm_tot + numpages > ns->shm_ctlall) return -ENOSPC; shp = ipc_rcu_alloc(sizeof(*shp)); @@ -197,103 +505,140 @@ static int newseg (key_t key, int shmflg, size_t size) return -ENOMEM; shp->shm_perm.key = key; - shp->shm_flags = (shmflg & S_IRWXUGO); + shp->shm_perm.mode = (shmflg & S_IRWXUGO); shp->mlock_user = NULL; shp->shm_perm.security = NULL; error = security_shm_alloc(shp); if (error) { - ipc_rcu_putref(shp); + ipc_rcu_putref(shp, ipc_rcu_free); return error; } + sprintf(name, "SYSV%08x", key); if (shmflg & SHM_HUGETLB) { - /* hugetlb_zero_setup takes care of mlock user accounting */ - file = hugetlb_zero_setup(size); - shp->mlock_user = current->user; + struct hstate *hs; + size_t hugesize; + + hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK); + if (!hs) { + error = -EINVAL; + goto no_file; + } + hugesize = ALIGN(size, huge_page_size(hs)); + + /* hugetlb_file_setup applies strict accounting */ + if (shmflg & SHM_NORESERVE) + acctflag = VM_NORESERVE; + file = hugetlb_file_setup(name, hugesize, acctflag, + &shp->mlock_user, HUGETLB_SHMFS_INODE, + (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK); } else { - sprintf (name, "SYSV%08x", key); - file = shmem_file_setup(name, size, VM_ACCOUNT); + /* + * Do not allow no accounting for OVERCOMMIT_NEVER, even + * if it's asked for. + */ + if ((shmflg & SHM_NORESERVE) && + sysctl_overcommit_memory != OVERCOMMIT_NEVER) + acctflag = VM_NORESERVE; + file = shmem_file_setup(name, size, acctflag); } error = PTR_ERR(file); if (IS_ERR(file)) goto no_file; - error = -ENOSPC; - id = shm_addid(shp); - if(id == -1) + id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); + if (id < 0) { + error = id; goto no_id; + } - shp->shm_cprid = current->tgid; + shp->shm_cprid = task_tgid_vnr(current); shp->shm_lprid = 0; shp->shm_atim = shp->shm_dtim = 0; shp->shm_ctim = get_seconds(); shp->shm_segsz = size; shp->shm_nattch = 0; - shp->id = shm_buildid(id,shp->shm_perm.seq); shp->shm_file = file; - file->f_dentry->d_inode->i_ino = shp->id; - if (shmflg & SHM_HUGETLB) - set_file_hugepages(file); - else - file->f_op = &shm_file_operations; - shm_tot += numpages; - shm_unlock(shp); - return shp->id; + shp->shm_creator = current; + + /* + * shmid gets reported as "inode#" in /proc/pid/maps. + * proc-ps tools use this. Changing this will break them. + */ + file_inode(file)->i_ino = shp->shm_perm.id; + + ns->shm_tot += numpages; + error = shp->shm_perm.id; + + ipc_unlock_object(&shp->shm_perm); + rcu_read_unlock(); + return error; no_id: + if (is_file_hugepages(file) && shp->mlock_user) + user_shm_unlock(size, shp->mlock_user); fput(file); no_file: - security_shm_free(shp); - ipc_rcu_putref(shp); + ipc_rcu_putref(shp, shm_rcu_free); return error; } -asmlinkage long sys_shmget (key_t key, size_t size, int shmflg) +/* + * Called with shm_ids.rwsem and ipcp locked. + */ +static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg) { struct shmid_kernel *shp; - int err, id = 0; - - down(&shm_ids.sem); - if (key == IPC_PRIVATE) { - err = newseg(key, shmflg, size); - } else if ((id = ipc_findkey(&shm_ids, key)) == -1) { - if (!(shmflg & IPC_CREAT)) - err = -ENOENT; - else - err = newseg(key, shmflg, size); - } else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) { - err = -EEXIST; - } else { - shp = shm_lock(id); - if(shp==NULL) - BUG(); - if (shp->shm_segsz < size) - err = -EINVAL; - else if (ipcperms(&shp->shm_perm, shmflg)) - err = -EACCES; - else { - int shmid = shm_buildid(id, shp->shm_perm.seq); - err = security_shm_associate(shp, shmflg); - if (!err) - err = shmid; - } - shm_unlock(shp); - } - up(&shm_ids.sem); - return err; + shp = container_of(ipcp, struct shmid_kernel, shm_perm); + return security_shm_associate(shp, shmflg); +} + +/* + * Called with shm_ids.rwsem and ipcp locked. + */ +static inline int shm_more_checks(struct kern_ipc_perm *ipcp, + struct ipc_params *params) +{ + struct shmid_kernel *shp; + + shp = container_of(ipcp, struct shmid_kernel, shm_perm); + if (shp->shm_segsz < params->u.size) + return -EINVAL; + + return 0; +} + +SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg) +{ + struct ipc_namespace *ns; + static const struct ipc_ops shm_ops = { + .getnew = newseg, + .associate = shm_security, + .more_checks = shm_more_checks, + }; + struct ipc_params shm_params; + + ns = current->nsproxy->ipc_ns; + + shm_params.key = key; + shm_params.flg = shmflg; + shm_params.u.size = size; + + return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params); } static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version) { - switch(version) { + switch (version) { case IPC_64: return copy_to_user(buf, in, sizeof(*in)); case IPC_OLD: { struct shmid_ds out; + memset(&out, 0, sizeof(out)); ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm); out.shm_segsz = in->shm_segsz; out.shm_atime = in->shm_atime; @@ -310,28 +655,14 @@ static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ } } -struct shm_setbuf { - uid_t uid; - gid_t gid; - mode_t mode; -}; - -static inline unsigned long copy_shmid_from_user(struct shm_setbuf *out, void __user *buf, int version) +static inline unsigned long +copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version) { - switch(version) { + switch (version) { case IPC_64: - { - struct shmid64_ds tbuf; - - if (copy_from_user(&tbuf, buf, sizeof(tbuf))) + if (copy_from_user(out, buf, sizeof(*out))) return -EFAULT; - - out->uid = tbuf.shm_perm.uid; - out->gid = tbuf.shm_perm.gid; - out->mode = tbuf.shm_flags; - return 0; - } case IPC_OLD: { struct shmid_ds tbuf_old; @@ -339,9 +670,9 @@ static inline unsigned long copy_shmid_from_user(struct shm_setbuf *out, void __ if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) return -EFAULT; - out->uid = tbuf_old.shm_perm.uid; - out->gid = tbuf_old.shm_perm.gid; - out->mode = tbuf_old.shm_flags; + out->shm_perm.uid = tbuf_old.shm_perm.uid; + out->shm_perm.gid = tbuf_old.shm_perm.gid; + out->shm_perm.mode = tbuf_old.shm_perm.mode; return 0; } @@ -352,14 +683,14 @@ static inline unsigned long copy_shmid_from_user(struct shm_setbuf *out, void __ static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version) { - switch(version) { + switch (version) { case IPC_64: return copy_to_user(buf, in, sizeof(*in)); case IPC_OLD: { struct shminfo out; - if(in->shmmax > INT_MAX) + if (in->shmmax > INT_MAX) out.shmmax = INT_MAX; else out.shmmax = (int)in->shmmax; @@ -367,7 +698,7 @@ static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminf out.shmmin = in->shmmin; out.shmmni = in->shmmni; out.shmseg = in->shmseg; - out.shmall = in->shmall; + out.shmall = in->shmall; return copy_to_user(buf, &out, sizeof(out)); } @@ -376,69 +707,156 @@ static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminf } } -static void shm_get_stat(unsigned long *rss, unsigned long *swp) +/* + * Calculate and add used RSS and swap pages of a shm. + * Called with shm_ids.rwsem held as a reader + */ +static void shm_add_rss_swap(struct shmid_kernel *shp, + unsigned long *rss_add, unsigned long *swp_add) +{ + struct inode *inode; + + inode = file_inode(shp->shm_file); + + if (is_file_hugepages(shp->shm_file)) { + struct address_space *mapping = inode->i_mapping; + struct hstate *h = hstate_file(shp->shm_file); + *rss_add += pages_per_huge_page(h) * mapping->nrpages; + } else { +#ifdef CONFIG_SHMEM + struct shmem_inode_info *info = SHMEM_I(inode); + spin_lock(&info->lock); + *rss_add += inode->i_mapping->nrpages; + *swp_add += info->swapped; + spin_unlock(&info->lock); +#else + *rss_add += inode->i_mapping->nrpages; +#endif + } +} + +/* + * Called with shm_ids.rwsem held as a reader + */ +static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, + unsigned long *swp) { - int i; + int next_id; + int total, in_use; *rss = 0; *swp = 0; - for (i = 0; i <= shm_ids.max_id; i++) { + in_use = shm_ids(ns).in_use; + + for (total = 0, next_id = 0; total < in_use; next_id++) { + struct kern_ipc_perm *ipc; struct shmid_kernel *shp; - struct inode *inode; - shp = shm_get(i); - if(!shp) + ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id); + if (ipc == NULL) continue; + shp = container_of(ipc, struct shmid_kernel, shm_perm); - inode = shp->shm_file->f_dentry->d_inode; + shm_add_rss_swap(shp, rss, swp); - if (is_file_hugepages(shp->shm_file)) { - struct address_space *mapping = inode->i_mapping; - *rss += (HPAGE_SIZE/PAGE_SIZE)*mapping->nrpages; - } else { - struct shmem_inode_info *info = SHMEM_I(inode); - spin_lock(&info->lock); - *rss += inode->i_mapping->nrpages; - *swp += info->swapped; - spin_unlock(&info->lock); - } + total++; } } -asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) +/* + * This function handles some shmctl commands which require the rwsem + * to be held in write mode. + * NOTE: no locks must be held, the rwsem is taken inside this function. + */ +static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, + struct shmid_ds __user *buf, int version) { - struct shm_setbuf setbuf; + struct kern_ipc_perm *ipcp; + struct shmid64_ds shmid64; struct shmid_kernel *shp; - int err, version; + int err; + + if (cmd == IPC_SET) { + if (copy_shmid_from_user(&shmid64, buf, version)) + return -EFAULT; + } + + down_write(&shm_ids(ns).rwsem); + rcu_read_lock(); + + ipcp = ipcctl_pre_down_nolock(ns, &shm_ids(ns), shmid, cmd, + &shmid64.shm_perm, 0); + if (IS_ERR(ipcp)) { + err = PTR_ERR(ipcp); + goto out_unlock1; + } - if (cmd < 0 || shmid < 0) { + shp = container_of(ipcp, struct shmid_kernel, shm_perm); + + err = security_shm_shmctl(shp, cmd); + if (err) + goto out_unlock1; + + switch (cmd) { + case IPC_RMID: + ipc_lock_object(&shp->shm_perm); + /* do_shm_rmid unlocks the ipc object and rcu */ + do_shm_rmid(ns, ipcp); + goto out_up; + case IPC_SET: + ipc_lock_object(&shp->shm_perm); + err = ipc_update_perm(&shmid64.shm_perm, ipcp); + if (err) + goto out_unlock0; + shp->shm_ctim = get_seconds(); + break; + default: err = -EINVAL; - goto out; + goto out_unlock1; } - version = ipc_parse_version(&cmd); +out_unlock0: + ipc_unlock_object(&shp->shm_perm); +out_unlock1: + rcu_read_unlock(); +out_up: + up_write(&shm_ids(ns).rwsem); + return err; +} - switch (cmd) { /* replace with proc interface ? */ - case IPC_INFO: - { - struct shminfo64 shminfo; +static int shmctl_nolock(struct ipc_namespace *ns, int shmid, + int cmd, int version, void __user *buf) +{ + int err; + struct shmid_kernel *shp; + /* preliminary security checks for *_INFO */ + if (cmd == IPC_INFO || cmd == SHM_INFO) { err = security_shm_shmctl(NULL, cmd); if (err) return err; + } - memset(&shminfo,0,sizeof(shminfo)); - shminfo.shmmni = shminfo.shmseg = shm_ctlmni; - shminfo.shmmax = shm_ctlmax; - shminfo.shmall = shm_ctlall; + switch (cmd) { + case IPC_INFO: + { + struct shminfo64 shminfo; + + memset(&shminfo, 0, sizeof(shminfo)); + shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni; + shminfo.shmmax = ns->shm_ctlmax; + shminfo.shmall = ns->shm_ctlall; shminfo.shmmin = SHMMIN; - if(copy_shminfo_to_user (buf, &shminfo, version)) + if (copy_shminfo_to_user(buf, &shminfo, version)) return -EFAULT; - /* reading a integer is always atomic */ - err= shm_ids.max_id; - if(err<0) + + down_read(&shm_ids(ns).rwsem); + err = ipc_get_maxid(&shm_ids(ns)); + up_read(&shm_ids(ns).rwsem); + + if (err < 0) err = 0; goto out; } @@ -446,20 +864,16 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) { struct shm_info shm_info; - err = security_shm_shmctl(NULL, cmd); - if (err) - return err; - - memset(&shm_info,0,sizeof(shm_info)); - down(&shm_ids.sem); - shm_info.used_ids = shm_ids.in_use; - shm_get_stat (&shm_info.shm_rss, &shm_info.shm_swp); - shm_info.shm_tot = shm_tot; + memset(&shm_info, 0, sizeof(shm_info)); + down_read(&shm_ids(ns).rwsem); + shm_info.used_ids = shm_ids(ns).in_use; + shm_get_stat(ns, &shm_info.shm_rss, &shm_info.shm_swp); + shm_info.shm_tot = ns->shm_tot; shm_info.swap_attempts = 0; shm_info.swap_successes = 0; - err = shm_ids.max_id; - up(&shm_ids.sem); - if(copy_to_user (buf, &shm_info, sizeof(shm_info))) { + err = ipc_get_maxid(&shm_ids(ns)); + up_read(&shm_ids(ns).rwsem); + if (copy_to_user(buf, &shm_info, sizeof(shm_info))) { err = -EFAULT; goto out; } @@ -472,28 +886,33 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) { struct shmid64_ds tbuf; int result; - memset(&tbuf, 0, sizeof(tbuf)); - shp = shm_lock(shmid); - if(shp==NULL) { - err = -EINVAL; - goto out; - } else if(cmd==SHM_STAT) { - err = -EINVAL; - if (shmid > shm_ids.max_id) + + rcu_read_lock(); + if (cmd == SHM_STAT) { + shp = shm_obtain_object(ns, shmid); + if (IS_ERR(shp)) { + err = PTR_ERR(shp); goto out_unlock; - result = shm_buildid(shmid, shp->shm_perm.seq); + } + result = shp->shm_perm.id; } else { - err = shm_checkid(shp,shmid); - if(err) + shp = shm_obtain_object_check(ns, shmid); + if (IS_ERR(shp)) { + err = PTR_ERR(shp); goto out_unlock; + } result = 0; } - err=-EACCES; - if (ipcperms (&shp->shm_perm, S_IRUGO)) + + err = -EACCES; + if (ipcperms(ns, &shp->shm_perm, S_IRUGO)) goto out_unlock; + err = security_shm_shmctl(shp, cmd); if (err) goto out_unlock; + + memset(&tbuf, 0, sizeof(tbuf)); kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm); tbuf.shm_segsz = shp->shm_segsz; tbuf.shm_atime = shp->shm_atim; @@ -501,152 +920,120 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) tbuf.shm_ctime = shp->shm_ctim; tbuf.shm_cpid = shp->shm_cprid; tbuf.shm_lpid = shp->shm_lprid; - if (!is_file_hugepages(shp->shm_file)) - tbuf.shm_nattch = shp->shm_nattch; - else - tbuf.shm_nattch = file_count(shp->shm_file) - 1; - shm_unlock(shp); - if(copy_shmid_to_user (buf, &tbuf, version)) + tbuf.shm_nattch = shp->shm_nattch; + rcu_read_unlock(); + + if (copy_shmid_to_user(buf, &tbuf, version)) err = -EFAULT; else err = result; goto out; } + default: + return -EINVAL; + } + +out_unlock: + rcu_read_unlock(); +out: + return err; +} + +SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) +{ + struct shmid_kernel *shp; + int err, version; + struct ipc_namespace *ns; + + if (cmd < 0 || shmid < 0) + return -EINVAL; + + version = ipc_parse_version(&cmd); + ns = current->nsproxy->ipc_ns; + + switch (cmd) { + case IPC_INFO: + case SHM_INFO: + case SHM_STAT: + case IPC_STAT: + return shmctl_nolock(ns, shmid, cmd, version, buf); + case IPC_RMID: + case IPC_SET: + return shmctl_down(ns, shmid, cmd, buf, version); case SHM_LOCK: case SHM_UNLOCK: { - shp = shm_lock(shmid); - if(shp==NULL) { - err = -EINVAL; - goto out; - } - err = shm_checkid(shp,shmid); - if(err) - goto out_unlock; + struct file *shm_file; - if (!capable(CAP_IPC_LOCK)) { - err = -EPERM; - if (current->euid != shp->shm_perm.uid && - current->euid != shp->shm_perm.cuid) - goto out_unlock; - if (cmd == SHM_LOCK && - !current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur) - goto out_unlock; + rcu_read_lock(); + shp = shm_obtain_object_check(ns, shmid); + if (IS_ERR(shp)) { + err = PTR_ERR(shp); + goto out_unlock1; } + audit_ipc_obj(&(shp->shm_perm)); err = security_shm_shmctl(shp, cmd); if (err) - goto out_unlock; - - if(cmd==SHM_LOCK) { - struct user_struct * user = current->user; - if (!is_file_hugepages(shp->shm_file)) { - err = shmem_lock(shp->shm_file, 1, user); - if (!err) { - shp->shm_flags |= SHM_LOCKED; - shp->mlock_user = user; - } - } - } else if (!is_file_hugepages(shp->shm_file)) { - shmem_lock(shp->shm_file, 0, shp->mlock_user); - shp->shm_flags &= ~SHM_LOCKED; - shp->mlock_user = NULL; + goto out_unlock1; + + ipc_lock_object(&shp->shm_perm); + + /* check if shm_destroy() is tearing down shp */ + if (!ipc_valid_object(&shp->shm_perm)) { + err = -EIDRM; + goto out_unlock0; } - shm_unlock(shp); - goto out; - } - case IPC_RMID: - { - /* - * We cannot simply remove the file. The SVID states - * that the block remains until the last person - * detaches from it, then is deleted. A shmat() on - * an RMID segment is legal in older Linux and if - * we change it apps break... - * - * Instead we set a destroyed flag, and then blow - * the name away when the usage hits zero. - */ - down(&shm_ids.sem); - shp = shm_lock(shmid); - err = -EINVAL; - if (shp == NULL) - goto out_up; - err = shm_checkid(shp, shmid); - if(err) - goto out_unlock_up; - - if (current->euid != shp->shm_perm.uid && - current->euid != shp->shm_perm.cuid && - !capable(CAP_SYS_ADMIN)) { - err=-EPERM; - goto out_unlock_up; + + if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) { + kuid_t euid = current_euid(); + if (!uid_eq(euid, shp->shm_perm.uid) && + !uid_eq(euid, shp->shm_perm.cuid)) { + err = -EPERM; + goto out_unlock0; + } + if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) { + err = -EPERM; + goto out_unlock0; + } } - err = security_shm_shmctl(shp, cmd); - if (err) - goto out_unlock_up; - - if (shp->shm_nattch){ - shp->shm_flags |= SHM_DEST; - /* Do not find it any more */ - shp->shm_perm.key = IPC_PRIVATE; - shm_unlock(shp); - } else - shm_destroy (shp); - up(&shm_ids.sem); - goto out; - } + shm_file = shp->shm_file; + if (is_file_hugepages(shm_file)) + goto out_unlock0; - case IPC_SET: - { - if (copy_shmid_from_user (&setbuf, buf, version)) { - err = -EFAULT; - goto out; - } - if ((err = audit_ipc_perms(0, setbuf.uid, setbuf.gid, setbuf.mode))) - return err; - down(&shm_ids.sem); - shp = shm_lock(shmid); - err=-EINVAL; - if(shp==NULL) - goto out_up; - err = shm_checkid(shp,shmid); - if(err) - goto out_unlock_up; - err=-EPERM; - if (current->euid != shp->shm_perm.uid && - current->euid != shp->shm_perm.cuid && - !capable(CAP_SYS_ADMIN)) { - goto out_unlock_up; + if (cmd == SHM_LOCK) { + struct user_struct *user = current_user(); + err = shmem_lock(shm_file, 1, user); + if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) { + shp->shm_perm.mode |= SHM_LOCKED; + shp->mlock_user = user; + } + goto out_unlock0; } - err = security_shm_shmctl(shp, cmd); - if (err) - goto out_unlock_up; - - shp->shm_perm.uid = setbuf.uid; - shp->shm_perm.gid = setbuf.gid; - shp->shm_flags = (shp->shm_flags & ~S_IRWXUGO) - | (setbuf.mode & S_IRWXUGO); - shp->shm_ctim = get_seconds(); - break; + /* SHM_UNLOCK */ + if (!(shp->shm_perm.mode & SHM_LOCKED)) + goto out_unlock0; + shmem_lock(shm_file, 0, shp->mlock_user); + shp->shm_perm.mode &= ~SHM_LOCKED; + shp->mlock_user = NULL; + get_file(shm_file); + ipc_unlock_object(&shp->shm_perm); + rcu_read_unlock(); + shmem_unlock_mapping(shm_file->f_mapping); + + fput(shm_file); + return err; } - default: - err = -EINVAL; - goto out; + return -EINVAL; } - err = 0; -out_unlock_up: - shm_unlock(shp); -out_up: - up(&shm_ids.sem); - goto out; -out_unlock: - shm_unlock(shp); -out: +out_unlock0: + ipc_unlock_object(&shp->shm_perm); +out_unlock1: + rcu_read_unlock(); return err; } @@ -657,48 +1044,52 @@ out: * "raddr" thing points to kernel space, and there has to be a wrapper around * this. */ -long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) +long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, + unsigned long shmlba) { struct shmid_kernel *shp; unsigned long addr; unsigned long size; - struct file * file; + struct file *file; int err; unsigned long flags; unsigned long prot; - unsigned long o_flags; int acc_mode; - void *user_addr; - - if (shmid < 0) { - err = -EINVAL; + struct ipc_namespace *ns; + struct shm_file_data *sfd; + struct path path; + fmode_t f_mode; + unsigned long populate = 0; + + err = -EINVAL; + if (shmid < 0) goto out; - } else if ((addr = (ulong)shmaddr)) { - if (addr & (SHMLBA-1)) { + else if ((addr = (ulong)shmaddr)) { + if (addr & (shmlba - 1)) { if (shmflg & SHM_RND) - addr &= ~(SHMLBA-1); /* round down */ + addr &= ~(shmlba - 1); /* round down */ else #ifndef __ARCH_FORCE_SHMLBA if (addr & ~PAGE_MASK) #endif - return -EINVAL; + goto out; } flags = MAP_SHARED | MAP_FIXED; } else { if ((shmflg & SHM_REMAP)) - return -EINVAL; + goto out; flags = MAP_SHARED; } if (shmflg & SHM_RDONLY) { prot = PROT_READ; - o_flags = O_RDONLY; acc_mode = S_IRUGO; + f_mode = FMODE_READ; } else { prot = PROT_READ | PROT_WRITE; - o_flags = O_RDWR; acc_mode = S_IRUGO | S_IWUGO; + f_mode = FMODE_READ | FMODE_WRITE; } if (shmflg & SHM_EXEC) { prot |= PROT_EXEC; @@ -709,36 +1100,73 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) * We cannot rely on the fs check since SYSV IPC does have an * additional creator id... */ - shp = shm_lock(shmid); - if(shp == NULL) { - err = -EINVAL; - goto out; - } - err = shm_checkid(shp,shmid); - if (err) { - shm_unlock(shp); - goto out; - } - if (ipcperms(&shp->shm_perm, acc_mode)) { - shm_unlock(shp); - err = -EACCES; - goto out; + ns = current->nsproxy->ipc_ns; + rcu_read_lock(); + shp = shm_obtain_object_check(ns, shmid); + if (IS_ERR(shp)) { + err = PTR_ERR(shp); + goto out_unlock; } + err = -EACCES; + if (ipcperms(ns, &shp->shm_perm, acc_mode)) + goto out_unlock; + err = security_shm_shmat(shp, shmaddr, shmflg); - if (err) { - shm_unlock(shp); - return err; + if (err) + goto out_unlock; + + ipc_lock_object(&shp->shm_perm); + + /* check if shm_destroy() is tearing down shp */ + if (!ipc_valid_object(&shp->shm_perm)) { + ipc_unlock_object(&shp->shm_perm); + err = -EIDRM; + goto out_unlock; } - - file = shp->shm_file; - size = i_size_read(file->f_dentry->d_inode); + + path = shp->shm_file->f_path; + path_get(&path); shp->shm_nattch++; - shm_unlock(shp); + size = i_size_read(path.dentry->d_inode); + ipc_unlock_object(&shp->shm_perm); + rcu_read_unlock(); + + err = -ENOMEM; + sfd = kzalloc(sizeof(*sfd), GFP_KERNEL); + if (!sfd) { + path_put(&path); + goto out_nattch; + } + + file = alloc_file(&path, f_mode, + is_file_hugepages(shp->shm_file) ? + &shm_file_operations_huge : + &shm_file_operations); + err = PTR_ERR(file); + if (IS_ERR(file)) { + kfree(sfd); + path_put(&path); + goto out_nattch; + } + + file->private_data = sfd; + file->f_mapping = shp->shm_file->f_mapping; + sfd->id = shp->shm_perm.id; + sfd->ns = get_ipc_ns(ns); + sfd->file = shp->shm_file; + sfd->vm_ops = NULL; + + err = security_mmap_file(file, prot, flags); + if (err) + goto out_fput; down_write(¤t->mm->mmap_sem); if (addr && !(shmflg & SHM_REMAP)) { - user_addr = ERR_PTR(-EINVAL); + err = -EINVAL; + if (addr + size < addr) + goto invalid; + if (find_vma_intersection(current->mm, addr, addr + size)) goto invalid; /* @@ -749,37 +1177,44 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) addr > current->mm->start_stack - size - PAGE_SIZE * 5) goto invalid; } - - user_addr = (void*) do_mmap (file, addr, size, prot, flags, 0); + addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate); + *raddr = addr; + err = 0; + if (IS_ERR_VALUE(addr)) + err = (long)addr; invalid: up_write(¤t->mm->mmap_sem); + if (populate) + mm_populate(addr, populate); - down (&shm_ids.sem); - if(!(shp = shm_lock(shmid))) - BUG(); +out_fput: + fput(file); + +out_nattch: + down_write(&shm_ids(ns).rwsem); + shp = shm_lock(ns, shmid); + BUG_ON(IS_ERR(shp)); shp->shm_nattch--; - if(shp->shm_nattch == 0 && - shp->shm_flags & SHM_DEST) - shm_destroy (shp); + if (shm_may_destroy(ns, shp)) + shm_destroy(ns, shp); else shm_unlock(shp); - up (&shm_ids.sem); + up_write(&shm_ids(ns).rwsem); + return err; - *raddr = (unsigned long) user_addr; - err = 0; - if (IS_ERR(user_addr)) - err = PTR_ERR(user_addr); +out_unlock: + rcu_read_unlock(); out: return err; } -asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg) +SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg) { unsigned long ret; long err; - err = do_shmat(shmid, shmaddr, shmflg, &ret); + err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA); if (err) return err; force_successful_syscall_return(); @@ -790,13 +1225,19 @@ asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg) * detach and kill segment if marked destroyed. * The work is done in shm_close. */ -asmlinkage long sys_shmdt(char __user *shmaddr) +SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) { struct mm_struct *mm = current->mm; - struct vm_area_struct *vma, *next; + struct vm_area_struct *vma; unsigned long addr = (unsigned long)shmaddr; - loff_t size = 0; int retval = -EINVAL; +#ifdef CONFIG_MMU + loff_t size = 0; + struct vm_area_struct *next; +#endif + + if (addr & ~PAGE_MASK) + return retval; down_write(&mm->mmap_sem); @@ -822,6 +1263,7 @@ asmlinkage long sys_shmdt(char __user *shmaddr) */ vma = find_vma(mm, addr); +#ifdef CONFIG_MMU while (vma) { next = vma->vm_next; @@ -830,11 +1272,11 @@ asmlinkage long sys_shmdt(char __user *shmaddr) * a fragment created by mprotect() and/or munmap(), or it * otherwise it starts at this address with no hassles. */ - if ((vma->vm_ops == &shm_vm_ops || is_vm_hugetlb_page(vma)) && + if ((vma->vm_ops == &shm_vm_ops) && (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) { - size = vma->vm_file->f_dentry->d_inode->i_size; + size = file_inode(vma->vm_file)->i_size; do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); /* * We discovered the size of the shm segment, so @@ -852,19 +1294,30 @@ asmlinkage long sys_shmdt(char __user *shmaddr) /* * We need look no further than the maximum address a fragment * could possibly have landed at. Also cast things to loff_t to - * prevent overflows and make comparisions vs. equal-width types. + * prevent overflows and make comparisons vs. equal-width types. */ + size = PAGE_ALIGN(size); while (vma && (loff_t)(vma->vm_end - addr) <= size) { next = vma->vm_next; /* finding a matching vma now does not alter retval */ - if ((vma->vm_ops == &shm_vm_ops || is_vm_hugetlb_page(vma)) && + if ((vma->vm_ops == &shm_vm_ops) && (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); vma = next; } +#else /* CONFIG_MMU */ + /* under NOMMU conditions, the exact address to be destroyed must be + * given */ + if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) { + do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); + retval = 0; + } + +#endif + up_write(&mm->mmap_sem); return retval; } @@ -872,30 +1325,37 @@ asmlinkage long sys_shmdt(char __user *shmaddr) #ifdef CONFIG_PROC_FS static int sysvipc_shm_proc_show(struct seq_file *s, void *it) { + struct user_namespace *user_ns = seq_user_ns(s); struct shmid_kernel *shp = it; - char *format; + unsigned long rss = 0, swp = 0; -#define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n" -#define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n" + shm_add_rss_swap(shp, &rss, &swp); - if (sizeof(size_t) <= sizeof(int)) - format = SMALL_STRING; - else - format = BIG_STRING; - return seq_printf(s, format, +#if BITS_PER_LONG <= 32 +#define SIZE_SPEC "%10lu" +#else +#define SIZE_SPEC "%21lu" +#endif + + return seq_printf(s, + "%10d %10d %4o " SIZE_SPEC " %5u %5u " + "%5lu %5u %5u %5u %5u %10lu %10lu %10lu " + SIZE_SPEC " " SIZE_SPEC "\n", shp->shm_perm.key, - shp->id, - shp->shm_flags, + shp->shm_perm.id, + shp->shm_perm.mode, shp->shm_segsz, shp->shm_cprid, shp->shm_lprid, - is_file_hugepages(shp->shm_file) ? (file_count(shp->shm_file) - 1) : shp->shm_nattch, - shp->shm_perm.uid, - shp->shm_perm.gid, - shp->shm_perm.cuid, - shp->shm_perm.cgid, + shp->shm_nattch, + from_kuid_munged(user_ns, shp->shm_perm.uid), + from_kgid_munged(user_ns, shp->shm_perm.gid), + from_kuid_munged(user_ns, shp->shm_perm.cuid), + from_kgid_munged(user_ns, shp->shm_perm.cgid), shp->shm_atim, shp->shm_dtim, - shp->shm_ctim); + shp->shm_ctim, + rss * PAGE_SIZE, + swp * PAGE_SIZE); } #endif |
