diff options
Diffstat (limited to 'ipc')
| -rw-r--r-- | ipc/compat.c | 60 | ||||
| -rw-r--r-- | ipc/compat_mq.c | 53 | ||||
| -rw-r--r-- | ipc/ipc_sysctl.c | 48 | ||||
| -rw-r--r-- | ipc/mq_sysctl.c | 28 | ||||
| -rw-r--r-- | ipc/mqueue.c | 32 | ||||
| -rw-r--r-- | ipc/msg.c | 258 | ||||
| -rw-r--r-- | ipc/msgutil.c | 20 | ||||
| -rw-r--r-- | ipc/sem.c | 571 | ||||
| -rw-r--r-- | ipc/shm.c | 110 | ||||
| -rw-r--r-- | ipc/util.c | 367 | ||||
| -rw-r--r-- | ipc/util.h | 52 |
11 files changed, 879 insertions, 720 deletions
diff --git a/ipc/compat.c b/ipc/compat.c index 892f6585dd6..b5ef4f7946d 100644 --- a/ipc/compat.c +++ b/ipc/compat.c @@ -30,7 +30,7 @@ #include <linux/ptrace.h> #include <linux/mutex.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include "util.h" @@ -113,9 +113,6 @@ struct compat_shm_info { compat_ulong_t swap_attempts, swap_successes; }; -extern int sem_ctls[]; -#define sc_semopm (sem_ctls[2]) - static inline int compat_ipc_parse_version(int *cmd) { #ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION @@ -197,7 +194,7 @@ static inline int __put_compat_ipc_perm(struct ipc64_perm *p, static inline int get_compat_semid64_ds(struct semid64_ds *s64, struct compat_semid64_ds __user *up64) { - if (!access_ok (VERIFY_READ, up64, sizeof(*up64))) + if (!access_ok(VERIFY_READ, up64, sizeof(*up64))) return -EFAULT; return __get_compat_ipc64_perm(&s64->sem_perm, &up64->sem_perm); } @@ -205,7 +202,7 @@ static inline int get_compat_semid64_ds(struct semid64_ds *s64, static inline int get_compat_semid_ds(struct semid64_ds *s, struct compat_semid_ds __user *up) { - if (!access_ok (VERIFY_READ, up, sizeof(*up))) + if (!access_ok(VERIFY_READ, up, sizeof(*up))) return -EFAULT; return __get_compat_ipc_perm(&s->sem_perm, &up->sem_perm); } @@ -215,7 +212,7 @@ static inline int put_compat_semid64_ds(struct semid64_ds *s64, { int err; - if (!access_ok (VERIFY_WRITE, up64, sizeof(*up64))) + if (!access_ok(VERIFY_WRITE, up64, sizeof(*up64))) return -EFAULT; err = __put_compat_ipc64_perm(&s64->sem_perm, &up64->sem_perm); err |= __put_user(s64->sem_otime, &up64->sem_otime); @@ -229,7 +226,7 @@ static inline int put_compat_semid_ds(struct semid64_ds *s, { int err; - if (!access_ok (VERIFY_WRITE, up, sizeof(*up))) + if (!access_ok(VERIFY_WRITE, up, sizeof(*up))) return -EFAULT; err = __put_compat_ipc_perm(&s->sem_perm, &up->sem_perm); err |= __put_user(s->sem_otime, &up->sem_otime); @@ -288,11 +285,11 @@ static long do_compat_semctl(int first, int second, int third, u32 pad) break; case IPC_SET: - if (version == IPC_64) { + if (version == IPC_64) err = get_compat_semid64_ds(&s64, compat_ptr(pad)); - } else { + else err = get_compat_semid_ds(&s64, compat_ptr(pad)); - } + up64 = compat_alloc_user_space(sizeof(s64)); if (copy_to_user(up64, &s64, sizeof(s64))) err = -EFAULT; @@ -376,12 +373,12 @@ COMPAT_SYSCALL_DEFINE6(ipc, u32, call, int, first, int, second, struct compat_ipc_kludge ipck; if (!uptr) return -EINVAL; - if (copy_from_user (&ipck, uptr, sizeof(ipck))) + if (copy_from_user(&ipck, uptr, sizeof(ipck))) return -EFAULT; uptr = compat_ptr(ipck.msgp); fifth = ipck.msgtyp; } - return do_msgrcv(first, uptr, second, fifth, third, + return do_msgrcv(first, uptr, second, (s32)fifth, third, compat_do_msg_fill); } case MSGGET: @@ -430,9 +427,9 @@ COMPAT_SYSCALL_DEFINE4(msgsnd, int, msqid, compat_uptr_t, msgp, } COMPAT_SYSCALL_DEFINE5(msgrcv, int, msqid, compat_uptr_t, msgp, - compat_ssize_t, msgsz, long, msgtyp, int, msgflg) + compat_ssize_t, msgsz, compat_long_t, msgtyp, int, msgflg) { - return do_msgrcv(msqid, compat_ptr(msgp), (ssize_t)msgsz, msgtyp, + return do_msgrcv(msqid, compat_ptr(msgp), (ssize_t)msgsz, (long)msgtyp, msgflg, compat_do_msg_fill); } @@ -498,7 +495,7 @@ static inline int put_compat_msqid_ds(struct msqid64_ds *m, return err; } -long compat_sys_msgctl(int first, int second, void __user *uptr) +COMPAT_SYSCALL_DEFINE3(msgctl, int, first, int, second, void __user *, uptr) { int err, err2; struct msqid64_ds m64; @@ -515,11 +512,11 @@ long compat_sys_msgctl(int first, int second, void __user *uptr) break; case IPC_SET: - if (version == IPC_64) { + if (version == IPC_64) err = get_compat_msqid64(&m64, uptr); - } else { + else err = get_compat_msqid(&m64, uptr); - } + if (err) break; p = compat_alloc_user_space(sizeof(m64)); @@ -668,7 +665,7 @@ static inline int put_compat_shm_info(struct shm_info __user *ip, return err; } -long compat_sys_shmctl(int first, int second, void __user *uptr) +COMPAT_SYSCALL_DEFINE3(shmctl, int, first, int, second, void __user *, uptr) { void __user *p; struct shmid64_ds s64; @@ -702,11 +699,11 @@ long compat_sys_shmctl(int first, int second, void __user *uptr) case IPC_SET: - if (version == IPC_64) { + if (version == IPC_64) err = get_compat_shmid64_ds(&s64, uptr); - } else { + else err = get_compat_shmid_ds(&s64, uptr); - } + if (err) break; p = compat_alloc_user_space(sizeof(s64)); @@ -749,17 +746,12 @@ long compat_sys_shmctl(int first, int second, void __user *uptr) return err; } -long compat_sys_semtimedop(int semid, struct sembuf __user *tsems, - unsigned nsops, const struct compat_timespec __user *timeout) +COMPAT_SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsems, + unsigned, nsops, + const struct compat_timespec __user *, timeout) { - struct timespec __user *ts64 = NULL; - if (timeout) { - struct timespec ts; - ts64 = compat_alloc_user_space(sizeof(*ts64)); - if (get_compat_timespec(&ts, timeout)) - return -EFAULT; - if (copy_to_user(ts64, &ts, sizeof(ts))) - return -EFAULT; - } + struct timespec __user *ts64; + if (compat_convert_timespec(&ts64, timeout)) + return -EFAULT; return sys_semtimedop(semid, tsems, nsops, ts64); } diff --git a/ipc/compat_mq.c b/ipc/compat_mq.c index 380ea4fe08e..ef6f91cc449 100644 --- a/ipc/compat_mq.c +++ b/ipc/compat_mq.c @@ -12,7 +12,7 @@ #include <linux/mqueue.h> #include <linux/syscalls.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> struct compat_mq_attr { compat_long_t mq_flags; /* message queue flags */ @@ -46,9 +46,9 @@ static inline int put_compat_mq_attr(const struct mq_attr *attr, | __put_user(attr->mq_curmsgs, &uattr->mq_curmsgs); } -asmlinkage long compat_sys_mq_open(const char __user *u_name, - int oflag, compat_mode_t mode, - struct compat_mq_attr __user *u_attr) +COMPAT_SYSCALL_DEFINE4(mq_open, const char __user *, u_name, + int, oflag, compat_mode_t, mode, + struct compat_mq_attr __user *, u_attr) { void __user *p = NULL; if (u_attr && oflag & O_CREAT) { @@ -64,49 +64,36 @@ asmlinkage long compat_sys_mq_open(const char __user *u_name, return sys_mq_open(u_name, oflag, mode, p); } -static int compat_prepare_timeout(struct timespec __user * *p, - const struct compat_timespec __user *u) -{ - struct timespec ts; - if (!u) { - *p = NULL; - return 0; - } - *p = compat_alloc_user_space(sizeof(ts)); - if (get_compat_timespec(&ts, u) || copy_to_user(*p, &ts, sizeof(ts))) - return -EFAULT; - return 0; -} - -asmlinkage long compat_sys_mq_timedsend(mqd_t mqdes, - const char __user *u_msg_ptr, - size_t msg_len, unsigned int msg_prio, - const struct compat_timespec __user *u_abs_timeout) +COMPAT_SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, + const char __user *, u_msg_ptr, + compat_size_t, msg_len, unsigned int, msg_prio, + const struct compat_timespec __user *, u_abs_timeout) { struct timespec __user *u_ts; - if (compat_prepare_timeout(&u_ts, u_abs_timeout)) + if (compat_convert_timespec(&u_ts, u_abs_timeout)) return -EFAULT; return sys_mq_timedsend(mqdes, u_msg_ptr, msg_len, msg_prio, u_ts); } -asmlinkage ssize_t compat_sys_mq_timedreceive(mqd_t mqdes, - char __user *u_msg_ptr, - size_t msg_len, unsigned int __user *u_msg_prio, - const struct compat_timespec __user *u_abs_timeout) +COMPAT_SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, + char __user *, u_msg_ptr, + compat_size_t, msg_len, unsigned int __user *, u_msg_prio, + const struct compat_timespec __user *, u_abs_timeout) { struct timespec __user *u_ts; - if (compat_prepare_timeout(&u_ts, u_abs_timeout)) + + if (compat_convert_timespec(&u_ts, u_abs_timeout)) return -EFAULT; return sys_mq_timedreceive(mqdes, u_msg_ptr, msg_len, u_msg_prio, u_ts); } -asmlinkage long compat_sys_mq_notify(mqd_t mqdes, - const struct compat_sigevent __user *u_notification) +COMPAT_SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes, + const struct compat_sigevent __user *, u_notification) { struct sigevent __user *p = NULL; if (u_notification) { @@ -122,9 +109,9 @@ asmlinkage long compat_sys_mq_notify(mqd_t mqdes, return sys_mq_notify(mqdes, p); } -asmlinkage long compat_sys_mq_getsetattr(mqd_t mqdes, - const struct compat_mq_attr __user *u_mqstat, - struct compat_mq_attr __user *u_omqstat) +COMPAT_SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes, + const struct compat_mq_attr __user *, u_mqstat, + struct compat_mq_attr __user *, u_omqstat) { struct mq_attr mqstat; struct mq_attr __user *p = compat_alloc_user_space(2 * sizeof(*p)); diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 130dfece27a..c3f0326e98d 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -18,7 +18,7 @@ #include <linux/msg.h> #include "util.h" -static void *get_ipc(ctl_table *table) +static void *get_ipc(struct ctl_table *table) { char *which = table->data; struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; @@ -27,7 +27,7 @@ static void *get_ipc(ctl_table *table) } #ifdef CONFIG_PROC_SYSCTL -static int proc_ipc_dointvec(ctl_table *table, int write, +static int proc_ipc_dointvec(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; @@ -38,7 +38,7 @@ static int proc_ipc_dointvec(ctl_table *table, int write, return proc_dointvec(&ipc_table, write, buffer, lenp, ppos); } -static int proc_ipc_dointvec_minmax(ctl_table *table, int write, +static int proc_ipc_dointvec_minmax(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; @@ -49,7 +49,7 @@ static int proc_ipc_dointvec_minmax(ctl_table *table, int write, return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); } -static int proc_ipc_dointvec_minmax_orphans(ctl_table *table, int write, +static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ipc_namespace *ns = current->nsproxy->ipc_ns; @@ -62,7 +62,7 @@ static int proc_ipc_dointvec_minmax_orphans(ctl_table *table, int write, return err; } -static int proc_ipc_callback_dointvec(ctl_table *table, int write, +static int proc_ipc_callback_dointvec_minmax(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; @@ -72,7 +72,7 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write, memcpy(&ipc_table, table, sizeof(ipc_table)); ipc_table.data = get_ipc(table); - rc = proc_dointvec(&ipc_table, write, buffer, lenp, ppos); + rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); if (write && !rc && lenp_bef == *lenp) /* @@ -85,7 +85,7 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write, return rc; } -static int proc_ipc_doulongvec_minmax(ctl_table *table, int write, +static int proc_ipc_doulongvec_minmax(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; @@ -119,7 +119,7 @@ static void ipc_auto_callback(int val) } } -static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, +static int proc_ipcauto_dointvec_minmax(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; @@ -152,35 +152,33 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, #define proc_ipc_dointvec NULL #define proc_ipc_dointvec_minmax NULL #define proc_ipc_dointvec_minmax_orphans NULL -#define proc_ipc_callback_dointvec NULL +#define proc_ipc_callback_dointvec_minmax NULL #define proc_ipcauto_dointvec_minmax NULL #endif static int zero; static int one = 1; -#ifdef CONFIG_CHECKPOINT_RESTORE static int int_max = INT_MAX; -#endif static struct ctl_table ipc_kern_table[] = { { .procname = "shmmax", .data = &init_ipc_ns.shm_ctlmax, - .maxlen = sizeof (init_ipc_ns.shm_ctlmax), + .maxlen = sizeof(init_ipc_ns.shm_ctlmax), .mode = 0644, .proc_handler = proc_ipc_doulongvec_minmax, }, { .procname = "shmall", .data = &init_ipc_ns.shm_ctlall, - .maxlen = sizeof (init_ipc_ns.shm_ctlall), + .maxlen = sizeof(init_ipc_ns.shm_ctlall), .mode = 0644, .proc_handler = proc_ipc_doulongvec_minmax, }, { .procname = "shmmni", .data = &init_ipc_ns.shm_ctlmni, - .maxlen = sizeof (init_ipc_ns.shm_ctlmni), + .maxlen = sizeof(init_ipc_ns.shm_ctlmni), .mode = 0644, .proc_handler = proc_ipc_dointvec, }, @@ -196,28 +194,34 @@ static struct ctl_table ipc_kern_table[] = { { .procname = "msgmax", .data = &init_ipc_ns.msg_ctlmax, - .maxlen = sizeof (init_ipc_ns.msg_ctlmax), + .maxlen = sizeof(init_ipc_ns.msg_ctlmax), .mode = 0644, - .proc_handler = proc_ipc_dointvec, + .proc_handler = proc_ipc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &int_max, }, { .procname = "msgmni", .data = &init_ipc_ns.msg_ctlmni, - .maxlen = sizeof (init_ipc_ns.msg_ctlmni), + .maxlen = sizeof(init_ipc_ns.msg_ctlmni), .mode = 0644, - .proc_handler = proc_ipc_callback_dointvec, + .proc_handler = proc_ipc_callback_dointvec_minmax, + .extra1 = &zero, + .extra2 = &int_max, }, { .procname = "msgmnb", .data = &init_ipc_ns.msg_ctlmnb, - .maxlen = sizeof (init_ipc_ns.msg_ctlmnb), + .maxlen = sizeof(init_ipc_ns.msg_ctlmnb), .mode = 0644, - .proc_handler = proc_ipc_dointvec, + .proc_handler = proc_ipc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &int_max, }, { .procname = "sem", .data = &init_ipc_ns.sem_ctls, - .maxlen = 4*sizeof (int), + .maxlen = 4*sizeof(int), .mode = 0644, .proc_handler = proc_ipc_dointvec, }, @@ -277,4 +281,4 @@ static int __init ipc_sysctl_init(void) return 0; } -__initcall(ipc_sysctl_init); +device_initcall(ipc_sysctl_init); diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c index 383d638340b..68d4e953762 100644 --- a/ipc/mq_sysctl.c +++ b/ipc/mq_sysctl.c @@ -14,7 +14,7 @@ #include <linux/sysctl.h> #ifdef CONFIG_PROC_SYSCTL -static void *get_mq(ctl_table *table) +static void *get_mq(struct ctl_table *table) { char *which = table->data; struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; @@ -22,7 +22,17 @@ static void *get_mq(ctl_table *table) return which; } -static int proc_mq_dointvec_minmax(ctl_table *table, int write, +static int proc_mq_dointvec(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table mq_table; + memcpy(&mq_table, table, sizeof(mq_table)); + mq_table.data = get_mq(table); + + return proc_dointvec(&mq_table, write, buffer, lenp, ppos); +} + +static int proc_mq_dointvec_minmax(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table mq_table; @@ -33,27 +43,23 @@ static int proc_mq_dointvec_minmax(ctl_table *table, int write, lenp, ppos); } #else +#define proc_mq_dointvec NULL #define proc_mq_dointvec_minmax NULL #endif -static int msg_queues_limit_min = MIN_QUEUESMAX; -static int msg_queues_limit_max = HARD_QUEUESMAX; - static int msg_max_limit_min = MIN_MSGMAX; static int msg_max_limit_max = HARD_MSGMAX; static int msg_maxsize_limit_min = MIN_MSGSIZEMAX; static int msg_maxsize_limit_max = HARD_MSGSIZEMAX; -static ctl_table mq_sysctls[] = { +static struct ctl_table mq_sysctls[] = { { .procname = "queues_max", .data = &init_ipc_ns.mq_queues_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_mq_dointvec_minmax, - .extra1 = &msg_queues_limit_min, - .extra2 = &msg_queues_limit_max, + .proc_handler = proc_mq_dointvec, }, { .procname = "msg_max", @@ -94,7 +100,7 @@ static ctl_table mq_sysctls[] = { {} }; -static ctl_table mq_sysctl_dir[] = { +static struct ctl_table mq_sysctl_dir[] = { { .procname = "mqueue", .mode = 0555, @@ -103,7 +109,7 @@ static ctl_table mq_sysctl_dir[] = { {} }; -static ctl_table mq_sysctl_root[] = { +static struct ctl_table mq_sysctl_root[] = { { .procname = "fs", .mode = 0555, diff --git a/ipc/mqueue.c b/ipc/mqueue.c index ae1996d3c53..4fcf39af177 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -6,7 +6,7 @@ * * Spinlocks: Mohamed Abbas (abbas.mohamed@intel.com) * Lockless receive & send, fd based notify: - * Manfred Spraul (manfred@colorfullife.com) + * Manfred Spraul (manfred@colorfullife.com) * * Audit: George Wilson (ltcgcw@us.ibm.com) * @@ -73,7 +73,7 @@ struct mqueue_inode_info { struct mq_attr attr; struct sigevent notify; - struct pid* notify_owner; + struct pid *notify_owner; struct user_namespace *notify_user_ns; struct user_struct *user; /* user who created, for accounting */ struct sock *notify_sock; @@ -92,7 +92,7 @@ static void remove_notification(struct mqueue_inode_info *info); static struct kmem_cache *mqueue_inode_cachep; -static struct ctl_table_header * mq_sysctl_table; +static struct ctl_table_header *mq_sysctl_table; static inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode) { @@ -433,9 +433,9 @@ static int mqueue_create(struct inode *dir, struct dentry *dentry, error = -EACCES; goto out_unlock; } - if (ipc_ns->mq_queues_count >= HARD_QUEUESMAX || - (ipc_ns->mq_queues_count >= ipc_ns->mq_queues_max && - !capable(CAP_SYS_RESOURCE))) { + + if (ipc_ns->mq_queues_count >= ipc_ns->mq_queues_max && + !capable(CAP_SYS_RESOURCE)) { error = -ENOSPC; goto out_unlock; } @@ -466,13 +466,13 @@ out_unlock: static int mqueue_unlink(struct inode *dir, struct dentry *dentry) { - struct inode *inode = dentry->d_inode; + struct inode *inode = dentry->d_inode; dir->i_ctime = dir->i_mtime = dir->i_atime = CURRENT_TIME; dir->i_size -= DIRENT_SIZE; - drop_nlink(inode); - dput(dentry); - return 0; + drop_nlink(inode); + dput(dentry); + return 0; } /* @@ -622,7 +622,7 @@ static struct ext_wait_queue *wq_get_first_waiter( static inline void set_cookie(struct sk_buff *skb, char code) { - ((char*)skb->data)[NOTIFY_COOKIE_LEN-1] = code; + ((char *)skb->data)[NOTIFY_COOKIE_LEN-1] = code; } /* @@ -886,7 +886,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) err = -ENOENT; } else { ihold(inode); - err = vfs_unlink(dentry->d_parent->d_inode, dentry); + err = vfs_unlink(dentry->d_parent->d_inode, dentry, NULL); } dput(dentry); @@ -1303,11 +1303,11 @@ retry: out_fput: fdput(f); out: - if (sock) { + if (sock) netlink_detachskb(sock, nc); - } else if (nc) { + else if (nc) dev_kfree_skb(nc); - } + return ret; } @@ -1459,4 +1459,4 @@ out_sysctl: return error; } -__initcall(init_mqueue_fs); +device_initcall(init_mqueue_fs); diff --git a/ipc/msg.c b/ipc/msg.c index b0d541d4267..c5d8e374998 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -39,12 +39,10 @@ #include <linux/ipc_namespace.h> #include <asm/current.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include "util.h" -/* - * one msg_receiver structure for each sleeping receiver: - */ +/* one msg_receiver structure for each sleeping receiver */ struct msg_receiver { struct list_head r_list; struct task_struct *r_tsk; @@ -53,6 +51,12 @@ struct msg_receiver { long r_msgtype; long r_maxsize; + /* + * Mark r_msg volatile so that the compiler + * does not try to get smart and optimize + * it. We rely on this for the lockless + * receive algorithm. + */ struct msg_msg *volatile r_msg; }; @@ -70,75 +74,6 @@ struct msg_sender { #define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS]) -static void freeque(struct ipc_namespace *, struct kern_ipc_perm *); -static int newque(struct ipc_namespace *, struct ipc_params *); -#ifdef CONFIG_PROC_FS -static int sysvipc_msg_proc_show(struct seq_file *s, void *it); -#endif - -/* - * Scale msgmni with the available lowmem size: the memory dedicated to msg - * queues should occupy at most 1/MSG_MEM_SCALE of lowmem. - * Also take into account the number of nsproxies created so far. - * This should be done staying within the (MSGMNI , IPCMNI/nr_ipc_ns) range. - */ -void recompute_msgmni(struct ipc_namespace *ns) -{ - struct sysinfo i; - unsigned long allowed; - int nb_ns; - - si_meminfo(&i); - allowed = (((i.totalram - i.totalhigh) / MSG_MEM_SCALE) * i.mem_unit) - / MSGMNB; - nb_ns = atomic_read(&nr_ipc_ns); - allowed /= nb_ns; - - if (allowed < MSGMNI) { - ns->msg_ctlmni = MSGMNI; - return; - } - - if (allowed > IPCMNI / nb_ns) { - ns->msg_ctlmni = IPCMNI / nb_ns; - return; - } - - ns->msg_ctlmni = allowed; -} - -void msg_init_ns(struct ipc_namespace *ns) -{ - ns->msg_ctlmax = MSGMAX; - ns->msg_ctlmnb = MSGMNB; - - recompute_msgmni(ns); - - atomic_set(&ns->msg_bytes, 0); - atomic_set(&ns->msg_hdrs, 0); - ipc_init_ids(&ns->ids[IPC_MSG_IDS]); -} - -#ifdef CONFIG_IPC_NS -void msg_exit_ns(struct ipc_namespace *ns) -{ - free_ipcs(ns, &msg_ids(ns), freeque); - idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr); -} -#endif - -void __init msg_init(void) -{ - msg_init_ns(&init_ipc_ns); - - printk(KERN_INFO "msgmni has been set to %d\n", - init_ipc_ns.msg_ctlmni); - - ipc_init_proc_interface("sysvipc/msg", - " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n", - IPC_MSG_IDS, sysvipc_msg_proc_show); -} - static inline struct msg_queue *msq_obtain_object(struct ipc_namespace *ns, int id) { struct kern_ipc_perm *ipcp = ipc_obtain_object(&msg_ids(ns), id); @@ -165,6 +100,15 @@ static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s) ipc_rmid(&msg_ids(ns), &s->q_perm); } +static void msg_rcu_free(struct rcu_head *head) +{ + struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); + struct msg_queue *msq = ipc_rcu_to_struct(p); + + security_msg_queue_free(msq); + ipc_rcu_free(head); +} + /** * newque - Create a new msg queue * @ns: namespace @@ -189,15 +133,14 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) msq->q_perm.security = NULL; retval = security_msg_queue_alloc(msq); if (retval) { - ipc_rcu_putref(msq); + ipc_rcu_putref(msq, ipc_rcu_free); return retval; } /* ipc_addid() locks msq upon success. */ id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); if (id < 0) { - security_msg_queue_free(msq); - ipc_rcu_putref(msq); + ipc_rcu_putref(msq, msg_rcu_free); return id; } @@ -219,7 +162,7 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss) { mss->tsk = current; - current->state = TASK_INTERRUPTIBLE; + __set_current_state(TASK_INTERRUPTIBLE); list_add_tail(&mss->list, &msq->q_senders); } @@ -245,8 +188,14 @@ static void expunge_all(struct msg_queue *msq, int res) struct msg_receiver *msr, *t; list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { - msr->r_msg = NULL; + msr->r_msg = NULL; /* initialize expunge ordering */ wake_up_process(msr->r_tsk); + /* + * Ensure that the wakeup is visible before setting r_msg as + * the receiving end depends on it: either spinning on a nil, + * or dealing with -EAGAIN cases. See lockless receive part 1 + * and 2 in do_msgrcv(). + */ smp_mb(); msr->r_msg = ERR_PTR(res); } @@ -276,8 +225,7 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) free_msg(msg); } atomic_sub(msq->q_cbytes, &ns->msg_bytes); - security_msg_queue_free(msq); - ipc_rcu_putref(msq); + ipc_rcu_putref(msq, msg_rcu_free); } /* @@ -293,15 +241,14 @@ static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg) { struct ipc_namespace *ns; - struct ipc_ops msg_ops; + static const struct ipc_ops msg_ops = { + .getnew = newque, + .associate = msg_security, + }; struct ipc_params msg_params; ns = current->nsproxy->ipc_ns; - msg_ops.getnew = newque; - msg_ops.associate = msg_security; - msg_ops.more_checks = NULL; - msg_params.key = key; msg_params.flg = msgflg; @@ -311,7 +258,7 @@ SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg) static inline unsigned long copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version) { - switch(version) { + switch (version) { case IPC_64: return copy_to_user(buf, in, sizeof(*in)); case IPC_OLD: @@ -356,7 +303,7 @@ copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version) static inline unsigned long copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version) { - switch(version) { + switch (version) { case IPC_64: if (copy_from_user(out, buf, sizeof(*out))) return -EFAULT; @@ -368,9 +315,9 @@ copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version) if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) return -EFAULT; - out->msg_perm.uid = tbuf_old.msg_perm.uid; - out->msg_perm.gid = tbuf_old.msg_perm.gid; - out->msg_perm.mode = tbuf_old.msg_perm.mode; + out->msg_perm.uid = tbuf_old.msg_perm.uid; + out->msg_perm.gid = tbuf_old.msg_perm.gid; + out->msg_perm.mode = tbuf_old.msg_perm.mode; if (tbuf_old.msg_qbytes == 0) out->msg_qbytes = tbuf_old.msg_lqbytes; @@ -599,23 +546,22 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) static int testmsg(struct msg_msg *msg, long type, int mode) { - switch(mode) - { - case SEARCH_ANY: - case SEARCH_NUMBER: + switch (mode) { + case SEARCH_ANY: + case SEARCH_NUMBER: + return 1; + case SEARCH_LESSEQUAL: + if (msg->m_type <= type) return 1; - case SEARCH_LESSEQUAL: - if (msg->m_type <=type) - return 1; - break; - case SEARCH_EQUAL: - if (msg->m_type == type) - return 1; - break; - case SEARCH_NOTEQUAL: - if (msg->m_type != type) - return 1; - break; + break; + case SEARCH_EQUAL: + if (msg->m_type == type) + return 1; + break; + case SEARCH_NOTEQUAL: + if (msg->m_type != type) + return 1; + break; } return 0; } @@ -631,15 +577,22 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) list_del(&msr->r_list); if (msr->r_maxsize < msg->m_ts) { + /* initialize pipelined send ordering */ msr->r_msg = NULL; wake_up_process(msr->r_tsk); - smp_mb(); + smp_mb(); /* see barrier comment below */ msr->r_msg = ERR_PTR(-E2BIG); } else { msr->r_msg = NULL; msq->q_lrpid = task_pid_vnr(msr->r_tsk); msq->q_rtime = get_seconds(); wake_up_process(msr->r_tsk); + /* + * Ensure that the wakeup is visible before + * setting r_msg, as the receiving end depends + * on it. See lockless receive part 1 and 2 in + * do_msgrcv(). + */ smp_mb(); msr->r_msg = msg; @@ -647,6 +600,7 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) } } } + return 0; } @@ -688,6 +642,12 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, if (ipcperms(ns, &msq->q_perm, S_IWUGO)) goto out_unlock0; + /* raced with RMID? */ + if (!ipc_valid_object(&msq->q_perm)) { + err = -EIDRM; + goto out_unlock0; + } + err = security_msg_queue_msgsnd(msq, msg, msgflg); if (err) goto out_unlock0; @@ -703,6 +663,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, goto out_unlock0; } + /* enqueue the sender and prepare to block */ ss_add(msq, &s); if (!ipc_rcu_getref(msq)) { @@ -717,8 +678,9 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, rcu_read_lock(); ipc_lock_object(&msq->q_perm); - ipc_rcu_putref(msq); - if (msq->q_perm.deleted) { + ipc_rcu_putref(msq, ipc_rcu_free); + /* raced with RMID? */ + if (!ipc_valid_object(&msq->q_perm)) { err = -EIDRM; goto out_unlock0; } @@ -872,6 +834,8 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl return -EINVAL; if (msgflg & MSG_COPY) { + if ((msgflg & MSG_EXCEPT) || !(msgflg & IPC_NOWAIT)) + return -EINVAL; copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax)); if (IS_ERR(copy)) return PTR_ERR(copy); @@ -894,6 +858,13 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl goto out_unlock1; ipc_lock_object(&msq->q_perm); + + /* raced with RMID? */ + if (!ipc_valid_object(&msq->q_perm)) { + msg = ERR_PTR(-EIDRM); + goto out_unlock0; + } + msg = find_msg(msq, &msgtyp, mode); if (!IS_ERR(msg)) { /* @@ -940,7 +911,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl else msr_d.r_maxsize = bufsz; msr_d.r_msg = ERR_PTR(-EAGAIN); - current->state = TASK_INTERRUPTIBLE; + __set_current_state(TASK_INTERRUPTIBLE); ipc_unlock_object(&msq->q_perm); rcu_read_unlock(); @@ -963,7 +934,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl * wake_up_process(). There is a race with exit(), see * ipc/mqueue.c for the details. */ - msg = (struct msg_msg*)msr_d.r_msg; + msg = (struct msg_msg *)msr_d.r_msg; while (msg == NULL) { cpu_relax(); msg = (struct msg_msg *)msr_d.r_msg; @@ -984,7 +955,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl /* Lockless receive, part 4: * Repeat test after acquiring the spinlock. */ - msg = (struct msg_msg*)msr_d.r_msg; + msg = (struct msg_msg *)msr_d.r_msg; if (msg != ERR_PTR(-EAGAIN)) goto out_unlock0; @@ -1018,6 +989,57 @@ SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill); } +/* + * Scale msgmni with the available lowmem size: the memory dedicated to msg + * queues should occupy at most 1/MSG_MEM_SCALE of lowmem. + * Also take into account the number of nsproxies created so far. + * This should be done staying within the (MSGMNI , IPCMNI/nr_ipc_ns) range. + */ +void recompute_msgmni(struct ipc_namespace *ns) +{ + struct sysinfo i; + unsigned long allowed; + int nb_ns; + + si_meminfo(&i); + allowed = (((i.totalram - i.totalhigh) / MSG_MEM_SCALE) * i.mem_unit) + / MSGMNB; + nb_ns = atomic_read(&nr_ipc_ns); + allowed /= nb_ns; + + if (allowed < MSGMNI) { + ns->msg_ctlmni = MSGMNI; + return; + } + + if (allowed > IPCMNI / nb_ns) { + ns->msg_ctlmni = IPCMNI / nb_ns; + return; + } + + ns->msg_ctlmni = allowed; +} + +void msg_init_ns(struct ipc_namespace *ns) +{ + ns->msg_ctlmax = MSGMAX; + ns->msg_ctlmnb = MSGMNB; + + recompute_msgmni(ns); + + atomic_set(&ns->msg_bytes, 0); + atomic_set(&ns->msg_hdrs, 0); + ipc_init_ids(&ns->ids[IPC_MSG_IDS]); +} + +#ifdef CONFIG_IPC_NS +void msg_exit_ns(struct ipc_namespace *ns) +{ + free_ipcs(ns, &msg_ids(ns), freeque); + idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr); +} +#endif + #ifdef CONFIG_PROC_FS static int sysvipc_msg_proc_show(struct seq_file *s, void *it) { @@ -1042,3 +1064,15 @@ static int sysvipc_msg_proc_show(struct seq_file *s, void *it) msq->q_ctime); } #endif + +void __init msg_init(void) +{ + msg_init_ns(&init_ipc_ns); + + printk(KERN_INFO "msgmni has been set to %d\n", + init_ipc_ns.msg_ctlmni); + + ipc_init_proc_interface("sysvipc/msg", + " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n", + IPC_MSG_IDS, sysvipc_msg_proc_show); +} diff --git a/ipc/msgutil.c b/ipc/msgutil.c index 491e71f2a1b..7e7095974d5 100644 --- a/ipc/msgutil.c +++ b/ipc/msgutil.c @@ -41,15 +41,15 @@ struct msg_msgseg { /* the next part of the message follows immediately */ }; -#define DATALEN_MSG (int)(PAGE_SIZE-sizeof(struct msg_msg)) -#define DATALEN_SEG (int)(PAGE_SIZE-sizeof(struct msg_msgseg)) +#define DATALEN_MSG ((size_t)PAGE_SIZE-sizeof(struct msg_msg)) +#define DATALEN_SEG ((size_t)PAGE_SIZE-sizeof(struct msg_msgseg)) -static struct msg_msg *alloc_msg(int len) +static struct msg_msg *alloc_msg(size_t len) { struct msg_msg *msg; struct msg_msgseg **pseg; - int alen; + size_t alen; alen = min(len, DATALEN_MSG); msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL); @@ -80,12 +80,12 @@ out_err: return NULL; } -struct msg_msg *load_msg(const void __user *src, int len) +struct msg_msg *load_msg(const void __user *src, size_t len) { struct msg_msg *msg; struct msg_msgseg *seg; int err = -EFAULT; - int alen; + size_t alen; msg = alloc_msg(len); if (msg == NULL) @@ -117,8 +117,8 @@ out_err: struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst) { struct msg_msgseg *dst_pseg, *src_pseg; - int len = src->m_ts; - int alen; + size_t len = src->m_ts; + size_t alen; BUG_ON(dst == NULL); if (src->m_ts > dst->m_ts) @@ -147,9 +147,9 @@ struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst) return ERR_PTR(-ENOSYS); } #endif -int store_msg(void __user *dest, struct msg_msg *msg, int len) +int store_msg(void __user *dest, struct msg_msg *msg, size_t len) { - int alen; + size_t alen; struct msg_msgseg *seg; alen = min(len, DATALEN_MSG); diff --git a/ipc/sem.c b/ipc/sem.c index 69b6a21f384..454f6c6020a 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -47,8 +47,7 @@ * Thus: Perfect SMP scaling between independent semaphore arrays. * If multiple semaphores in one array are used, then cache line * trashing on the semaphore array spinlock will limit the scaling. - * - semncnt and semzcnt are calculated on demand in count_semncnt() and - * count_semzcnt() + * - semncnt and semzcnt are calculated on demand in count_semcnt() * - the task that performs a successful semop() scans the list of all * sleeping tasks and completes any pending operations that can be fulfilled. * Semaphores are actively given to waiting tasks (necessary for FIFO). @@ -87,7 +86,7 @@ #include <linux/nsproxy.h> #include <linux/ipc_namespace.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include "util.h" /* One semaphore structure for each semaphore in the system. */ @@ -110,6 +109,7 @@ struct sem_queue { int pid; /* process id of requesting process */ int status; /* completion status of operation */ struct sembuf *sops; /* array of pending operations */ + struct sembuf *blocking; /* the operation that blocked */ int nsops; /* number of operations */ int alter; /* does *sops alter the array? */ }; @@ -160,7 +160,7 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it); * sem_array.pending{_alter,_cont}, * sem_array.sem_undo: global sem_lock() for read/write * sem_undo.proc_next: only "current" is allowed to read/write that field. - * + * * sem_array.sem_base[i].pending_{const,alter}: * global or semaphore sem_lock() for read/write */ @@ -188,7 +188,7 @@ void sem_exit_ns(struct ipc_namespace *ns) } #endif -void __init sem_init (void) +void __init sem_init(void) { sem_init_ns(&init_ipc_ns); ipc_init_proc_interface("sysvipc/sem", @@ -225,7 +225,7 @@ static void unmerge_queues(struct sem_array *sma) } /** - * merge_queues - Merge single semop queues into global queue + * merge_queues - merge single semop queues into global queue * @sma: semaphore array * * This function merges all per-semaphore queues into the global queue. @@ -243,71 +243,122 @@ static void merge_queues(struct sem_array *sma) } } +static void sem_rcu_free(struct rcu_head *head) +{ + struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); + struct sem_array *sma = ipc_rcu_to_struct(p); + + security_sem_free(sma); + ipc_rcu_free(head); +} + +/* + * Wait until all currently ongoing simple ops have completed. + * Caller must own sem_perm.lock. + * New simple ops cannot start, because simple ops first check + * that sem_perm.lock is free. + * that a) sem_perm.lock is free and b) complex_count is 0. + */ +static void sem_wait_array(struct sem_array *sma) +{ + int i; + struct sem *sem; + + if (sma->complex_count) { + /* The thread that increased sma->complex_count waited on + * all sem->lock locks. Thus we don't need to wait again. + */ + return; + } + + for (i = 0; i < sma->sem_nsems; i++) { + sem = sma->sem_base + i; + spin_unlock_wait(&sem->lock); + } +} + /* * If the request contains only one semaphore operation, and there are * no complex transactions pending, lock only the semaphore involved. * Otherwise, lock the entire semaphore array, since we either have * multiple semaphores in our own semops, or we need to look at * semaphores from other pending complex operations. - * - * Carefully guard against sma->complex_count changing between zero - * and non-zero while we are spinning for the lock. The value of - * sma->complex_count cannot change while we are holding the lock, - * so sem_unlock should be fine. - * - * The global lock path checks that all the local locks have been released, - * checking each local lock once. This means that the local lock paths - * cannot start their critical sections while the global lock is held. */ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, int nsops) { - int locknum; - again: - if (nsops == 1 && !sma->complex_count) { - struct sem *sem = sma->sem_base + sops->sem_num; + struct sem *sem; - /* Lock just the semaphore we are interested in. */ - spin_lock(&sem->lock); + if (nsops != 1) { + /* Complex operation - acquire a full lock */ + ipc_lock_object(&sma->sem_perm); - /* - * If sma->complex_count was set while we were spinning, - * we may need to look at things we did not lock here. + /* And wait until all simple ops that are processed + * right now have dropped their locks. */ - if (unlikely(sma->complex_count)) { - spin_unlock(&sem->lock); - goto lock_array; - } + sem_wait_array(sma); + return -1; + } + /* + * Only one semaphore affected - try to optimize locking. + * The rules are: + * - optimized locking is possible if no complex operation + * is either enqueued or processed right now. + * - The test for enqueued complex ops is simple: + * sma->complex_count != 0 + * - Testing for complex ops that are processed right now is + * a bit more difficult. Complex ops acquire the full lock + * and first wait that the running simple ops have completed. + * (see above) + * Thus: If we own a simple lock and the global lock is free + * and complex_count is now 0, then it will stay 0 and + * thus just locking sem->lock is sufficient. + */ + sem = sma->sem_base + sops->sem_num; + + if (sma->complex_count == 0) { /* - * Another process is holding the global lock on the - * sem_array; we cannot enter our critical section, - * but have to wait for the global lock to be released. + * It appears that no complex operation is around. + * Acquire the per-semaphore lock. */ - if (unlikely(spin_is_locked(&sma->sem_perm.lock))) { - spin_unlock(&sem->lock); - spin_unlock_wait(&sma->sem_perm.lock); - goto again; + spin_lock(&sem->lock); + + /* Then check that the global lock is free */ + if (!spin_is_locked(&sma->sem_perm.lock)) { + /* spin_is_locked() is not a memory barrier */ + smp_mb(); + + /* Now repeat the test of complex_count: + * It can't change anymore until we drop sem->lock. + * Thus: if is now 0, then it will stay 0. + */ + if (sma->complex_count == 0) { + /* fast path successful! */ + return sops->sem_num; + } } + spin_unlock(&sem->lock); + } - locknum = sops->sem_num; + /* slow path: acquire the full lock */ + ipc_lock_object(&sma->sem_perm); + + if (sma->complex_count == 0) { + /* False alarm: + * There is no complex operation, thus we can switch + * back to the fast path. + */ + spin_lock(&sem->lock); + ipc_unlock_object(&sma->sem_perm); + return sops->sem_num; } else { - int i; - /* - * Lock the semaphore array, and wait for all of the - * individual semaphore locks to go away. The code - * above ensures no new single-lock holders will enter - * their critical section while the array lock is held. + /* Not a false alarm, thus complete the sequence for a + * full lock. */ - lock_array: - ipc_lock_object(&sma->sem_perm); - for (i = 0; i < sma->sem_nsems; i++) { - struct sem *sem = sma->sem_base + i; - spin_unlock_wait(&sem->lock); - } - locknum = -1; + sem_wait_array(sma); + return -1; } - return locknum; } static inline void sem_unlock(struct sem_array *sma, int locknum) @@ -343,7 +394,7 @@ static inline struct sem_array *sem_obtain_lock(struct ipc_namespace *ns, /* ipc_rmid() may have already freed the ID while sem_lock * was spinning: verify that the structure is still valid */ - if (!ipcp->deleted) + if (ipc_valid_object(ipcp)) return container_of(ipcp, struct sem_array, sem_perm); sem_unlock(sma, *locknum); @@ -374,12 +425,7 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns static inline void sem_lock_and_putref(struct sem_array *sma) { sem_lock(sma, NULL, -1); - ipc_rcu_putref(sma); -} - -static inline void sem_putref(struct sem_array *sma) -{ - ipc_rcu_putref(sma); + ipc_rcu_putref(sma, ipc_rcu_free); } static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) @@ -399,11 +445,11 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) * * call wake_up_process * * set queue.status to the final value. * - the previously blocked thread checks queue.status: - * * if it's IN_WAKEUP, then it must wait until the value changes - * * if it's not -EINTR, then the operation was completed by - * update_queue. semtimedop can return queue.status without - * performing any operation on the sem array. - * * otherwise it must acquire the spinlock and check what's up. + * * if it's IN_WAKEUP, then it must wait until the value changes + * * if it's not -EINTR, then the operation was completed by + * update_queue. semtimedop can return queue.status without + * performing any operation on the sem array. + * * otherwise it must acquire the spinlock and check what's up. * * The two-stage algorithm is necessary to protect against the following * races: @@ -428,7 +474,6 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) * * Called with sem_ids.rwsem held (as a writer) */ - static int newary(struct ipc_namespace *ns, struct ipc_params *params) { int id; @@ -445,12 +490,12 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) if (ns->used_sems + nsems > ns->sc_semmns) return -ENOSPC; - size = sizeof (*sma) + nsems * sizeof (struct sem); + size = sizeof(*sma) + nsems * sizeof(struct sem); sma = ipc_rcu_alloc(size); - if (!sma) { + if (!sma) return -ENOMEM; - } - memset (sma, 0, size); + + memset(sma, 0, size); sma->sem_perm.mode = (semflg & S_IRWXUGO); sma->sem_perm.key = key; @@ -458,14 +503,13 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) sma->sem_perm.security = NULL; retval = security_sem_alloc(sma); if (retval) { - ipc_rcu_putref(sma); + ipc_rcu_putref(sma, ipc_rcu_free); return retval; } id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); if (id < 0) { - security_sem_free(sma); - ipc_rcu_putref(sma); + ipc_rcu_putref(sma, sem_rcu_free); return id; } ns->used_sems += nsems; @@ -520,7 +564,11 @@ static inline int sem_more_checks(struct kern_ipc_perm *ipcp, SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) { struct ipc_namespace *ns; - struct ipc_ops sem_ops; + static const struct ipc_ops sem_ops = { + .getnew = newary, + .associate = sem_security, + .more_checks = sem_more_checks, + }; struct ipc_params sem_params; ns = current->nsproxy->ipc_ns; @@ -528,10 +576,6 @@ SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) if (nsems < 0 || nsems > ns->sc_semmsl) return -EINVAL; - sem_ops.getnew = newary; - sem_ops.associate = sem_security; - sem_ops.more_checks = sem_more_checks; - sem_params.key = key; sem_params.flg = semflg; sem_params.u.nsems = nsems; @@ -539,30 +583,32 @@ SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); } -/** perform_atomic_semop - Perform (if possible) a semaphore operation +/** + * perform_atomic_semop - Perform (if possible) a semaphore operation * @sma: semaphore array - * @sops: array with operations that should be checked - * @nsems: number of sops - * @un: undo array - * @pid: pid that did the change + * @q: struct sem_queue that describes the operation * * Returns 0 if the operation was possible. * Returns 1 if the operation is impossible, the caller must sleep. * Negative values are error codes. */ - -static int perform_atomic_semop(struct sem_array *sma, struct sembuf *sops, - int nsops, struct sem_undo *un, int pid) +static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q) { - int result, sem_op; + int result, sem_op, nsops, pid; struct sembuf *sop; - struct sem * curr; + struct sem *curr; + struct sembuf *sops; + struct sem_undo *un; + + sops = q->sops; + nsops = q->nsops; + un = q->undo; for (sop = sops; sop < sops + nsops; sop++) { curr = sma->sem_base + sop->sem_num; sem_op = sop->sem_op; result = curr->semval; - + if (!sem_op && result) goto would_block; @@ -571,25 +617,25 @@ static int perform_atomic_semop(struct sem_array *sma, struct sembuf *sops, goto would_block; if (result > SEMVMX) goto out_of_range; + if (sop->sem_flg & SEM_UNDO) { int undo = un->semadj[sop->sem_num] - sem_op; - /* - * Exceeding the undo range is an error. - */ + /* Exceeding the undo range is an error. */ if (undo < (-SEMAEM - 1) || undo > SEMAEM) goto out_of_range; + un->semadj[sop->sem_num] = undo; } + curr->semval = result; } sop--; + pid = q->pid; while (sop >= sops) { sma->sem_base[sop->sem_num].sempid = pid; - if (sop->sem_flg & SEM_UNDO) - un->semadj[sop->sem_num] -= sop->sem_op; sop--; } - + return 0; out_of_range: @@ -597,6 +643,8 @@ out_of_range: goto undo; would_block: + q->blocking = sop; + if (sop->sem_flg & IPC_NOWAIT) result = -EAGAIN; else @@ -605,7 +653,10 @@ would_block: undo: sop--; while (sop >= sops) { - sma->sem_base[sop->sem_num].semval -= sop->sem_op; + sem_op = sop->sem_op; + sma->sem_base[sop->sem_num].semval -= sem_op; + if (sop->sem_flg & SEM_UNDO) + un->semadj[sop->sem_num] += sem_op; sop--; } @@ -635,7 +686,7 @@ static void wake_up_sem_queue_prepare(struct list_head *pt, } /** - * wake_up_sem_queue_do(pt) - do the actual wake-up + * wake_up_sem_queue_do - do the actual wake-up * @pt: list of tasks to be woken up * * Do the actual wake-up. @@ -701,7 +752,7 @@ static int check_restart(struct sem_array *sma, struct sem_queue *q) } /** - * wake_const_ops(sma, semnum, pt) - Wake up non-alter tasks + * wake_const_ops - wake up non-alter tasks * @sma: semaphore array. * @semnum: semaphore that was modified. * @pt: list head for the tasks that must be woken up. @@ -734,8 +785,7 @@ static int wake_const_ops(struct sem_array *sma, int semnum, q = container_of(walk, struct sem_queue, list); walk = walk->next; - error = perform_atomic_semop(sma, q->sops, q->nsops, - q->undo, q->pid); + error = perform_atomic_semop(sma, q); if (error <= 0) { /* operation completed, remove from queue & wakeup */ @@ -751,15 +801,14 @@ static int wake_const_ops(struct sem_array *sma, int semnum, } /** - * do_smart_wakeup_zero(sma, sops, nsops, pt) - wakeup all wait for zero tasks + * do_smart_wakeup_zero - wakeup all wait for zero tasks * @sma: semaphore array * @sops: operations that were performed * @nsops: number of operations * @pt: list head of the tasks that must be woken up. * - * do_smart_wakeup_zero() checks all required queue for wait-for-zero - * operations, based on the actual changes that were performed on the - * semaphore array. + * Checks all required queue for wait-for-zero operations, based + * on the actual changes that were performed on the semaphore array. * The function returns 1 if at least one operation was completed successfully. */ static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, @@ -803,7 +852,7 @@ static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, /** - * update_queue(sma, semnum): Look for tasks that can be completed. + * update_queue - look for tasks that can be completed. * @sma: semaphore array. * @semnum: semaphore that was modified. * @pt: list head for the tasks that must be woken up. @@ -848,8 +897,7 @@ again: if (semnum != -1 && sma->sem_base[semnum].semval == 0) break; - error = perform_atomic_semop(sma, q->sops, q->nsops, - q->undo, q->pid); + error = perform_atomic_semop(sma, q); /* Does q->sleeper still need to sleep? */ if (error > 0) @@ -873,7 +921,25 @@ again: } /** - * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue + * set_semotime - set sem_otime + * @sma: semaphore array + * @sops: operations that modified the array, may be NULL + * + * sem_otime is replicated to avoid cache line trashing. + * This function sets one instance to the current time. + */ +static void set_semotime(struct sem_array *sma, struct sembuf *sops) +{ + if (sops == NULL) { + sma->sem_base[0].sem_otime = get_seconds(); + } else { + sma->sem_base[sops[0].sem_num].sem_otime = + get_seconds(); + } +} + +/** + * do_smart_update - optimized update_queue * @sma: semaphore array * @sops: operations that were performed * @nsops: number of operations @@ -922,76 +988,78 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop } } } - if (otime) { - if (sops == NULL) { - sma->sem_base[0].sem_otime = get_seconds(); - } else { - sma->sem_base[sops[0].sem_num].sem_otime = - get_seconds(); - } - } + if (otime) + set_semotime(sma, sops); } +/* + * check_qop: Test if a queued operation sleeps on the semaphore semnum + */ +static int check_qop(struct sem_array *sma, int semnum, struct sem_queue *q, + bool count_zero) +{ + struct sembuf *sop = q->blocking; + + /* + * Linux always (since 0.99.10) reported a task as sleeping on all + * semaphores. This violates SUS, therefore it was changed to the + * standard compliant behavior. + * Give the administrators a chance to notice that an application + * might misbehave because it relies on the Linux behavior. + */ + pr_info_once("semctl(GETNCNT/GETZCNT) is since 3.16 Single Unix Specification compliant.\n" + "The task %s (%d) triggered the difference, watch for misbehavior.\n", + current->comm, task_pid_nr(current)); + + if (sop->sem_num != semnum) + return 0; + + if (count_zero && sop->sem_op == 0) + return 1; + if (!count_zero && sop->sem_op < 0) + return 1; + + return 0; +} /* The following counts are associated to each semaphore: * semncnt number of tasks waiting on semval being nonzero * semzcnt number of tasks waiting on semval being zero - * This model assumes that a task waits on exactly one semaphore. - * Since semaphore operations are to be performed atomically, tasks actually - * wait on a whole sequence of semaphores simultaneously. - * The counts we return here are a rough approximation, but still - * warrant that semncnt+semzcnt>0 if the task is on the pending queue. + * + * Per definition, a task waits only on the semaphore of the first semop + * that cannot proceed, even if additional operation would block, too. */ -static int count_semncnt (struct sem_array * sma, ushort semnum) +static int count_semcnt(struct sem_array *sma, ushort semnum, + bool count_zero) { - int semncnt; - struct sem_queue * q; + struct list_head *l; + struct sem_queue *q; + int semcnt; - semncnt = 0; - list_for_each_entry(q, &sma->sem_base[semnum].pending_alter, list) { - struct sembuf * sops = q->sops; - BUG_ON(sops->sem_num != semnum); - if ((sops->sem_op < 0) && !(sops->sem_flg & IPC_NOWAIT)) - semncnt++; - } + semcnt = 0; + /* First: check the simple operations. They are easy to evaluate */ + if (count_zero) + l = &sma->sem_base[semnum].pending_const; + else + l = &sma->sem_base[semnum].pending_alter; - list_for_each_entry(q, &sma->pending_alter, list) { - struct sembuf * sops = q->sops; - int nsops = q->nsops; - int i; - for (i = 0; i < nsops; i++) - if (sops[i].sem_num == semnum - && (sops[i].sem_op < 0) - && !(sops[i].sem_flg & IPC_NOWAIT)) - semncnt++; + list_for_each_entry(q, l, list) { + /* all task on a per-semaphore list sleep on exactly + * that semaphore + */ + semcnt++; } - return semncnt; -} -static int count_semzcnt (struct sem_array * sma, ushort semnum) -{ - int semzcnt; - struct sem_queue * q; - - semzcnt = 0; - list_for_each_entry(q, &sma->sem_base[semnum].pending_const, list) { - struct sembuf * sops = q->sops; - BUG_ON(sops->sem_num != semnum); - if ((sops->sem_op == 0) && !(sops->sem_flg & IPC_NOWAIT)) - semzcnt++; + /* Then: check the complex operations. */ + list_for_each_entry(q, &sma->pending_alter, list) { + semcnt += check_qop(sma, semnum, q, count_zero); } - - list_for_each_entry(q, &sma->pending_const, list) { - struct sembuf * sops = q->sops; - int nsops = q->nsops; - int i; - for (i = 0; i < nsops; i++) - if (sops[i].sem_num == semnum - && (sops[i].sem_op == 0) - && !(sops[i].sem_flg & IPC_NOWAIT)) - semzcnt++; + if (count_zero) { + list_for_each_entry(q, &sma->pending_const, list) { + semcnt += check_qop(sma, semnum, q, count_zero); + } } - return semzcnt; + return semcnt; } /* Free a semaphore set. freeary() is called with sem_ids.rwsem locked @@ -1047,13 +1115,12 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) wake_up_sem_queue_do(&tasks); ns->used_sems -= sma->sem_nsems; - security_sem_free(sma); - ipc_rcu_putref(sma); + ipc_rcu_putref(sma, sem_rcu_free); } static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) { - switch(version) { + switch (version) { case IPC_64: return copy_to_user(buf, in, sizeof(*in)); case IPC_OLD: @@ -1096,7 +1163,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, int err; struct sem_array *sma; - switch(cmd) { + switch (cmd) { case IPC_INFO: case SEM_INFO: { @@ -1106,8 +1173,8 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, err = security_sem_semctl(NULL, cmd); if (err) return err; - - memset(&seminfo,0,sizeof(seminfo)); + + memset(&seminfo, 0, sizeof(seminfo)); seminfo.semmni = ns->sc_semmni; seminfo.semmns = ns->sc_semmns; seminfo.semmsl = ns->sc_semmsl; @@ -1126,9 +1193,9 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, } max_id = ipc_get_maxid(&sem_ids(ns)); up_read(&sem_ids(ns).rwsem); - if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) + if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) return -EFAULT; - return (max_id < 0) ? 0: max_id; + return (max_id < 0) ? 0 : max_id; } case IPC_STAT: case SEM_STAT: @@ -1184,7 +1251,7 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, { struct sem_undo *un; struct sem_array *sma; - struct sem* curr; + struct sem *curr; int err; struct list_head tasks; int val; @@ -1227,6 +1294,12 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, sem_lock(sma, NULL, -1); + if (!ipc_valid_object(&sma->sem_perm)) { + sem_unlock(sma, -1); + rcu_read_unlock(); + return -EIDRM; + } + curr = &sma->sem_base[semnum]; ipc_assert_locked_object(&sma->sem_perm); @@ -1248,10 +1321,10 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, int cmd, void __user *p) { struct sem_array *sma; - struct sem* curr; + struct sem *curr; int err, nsems; ushort fast_sem_io[SEMMSL_FAST]; - ushort* sem_io = fast_sem_io; + ushort *sem_io = fast_sem_io; struct list_head tasks; INIT_LIST_HEAD(&tasks); @@ -1281,28 +1354,28 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, int i; sem_lock(sma, NULL, -1); - if(nsems > SEMMSL_FAST) { + if (!ipc_valid_object(&sma->sem_perm)) { + err = -EIDRM; + goto out_unlock; + } + if (nsems > SEMMSL_FAST) { if (!ipc_rcu_getref(sma)) { - sem_unlock(sma, -1); - rcu_read_unlock(); err = -EIDRM; - goto out_free; + goto out_unlock; } sem_unlock(sma, -1); rcu_read_unlock(); sem_io = ipc_alloc(sizeof(ushort)*nsems); - if(sem_io == NULL) { - sem_putref(sma); + if (sem_io == NULL) { + ipc_rcu_putref(sma, ipc_rcu_free); return -ENOMEM; } rcu_read_lock(); sem_lock_and_putref(sma); - if (sma->sem_perm.deleted) { - sem_unlock(sma, -1); - rcu_read_unlock(); + if (!ipc_valid_object(&sma->sem_perm)) { err = -EIDRM; - goto out_free; + goto out_unlock; } } for (i = 0; i < sma->sem_nsems; i++) @@ -1310,7 +1383,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, sem_unlock(sma, -1); rcu_read_unlock(); err = 0; - if(copy_to_user(array, sem_io, nsems*sizeof(ushort))) + if (copy_to_user(array, sem_io, nsems*sizeof(ushort))) err = -EFAULT; goto out_free; } @@ -1320,39 +1393,37 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, struct sem_undo *un; if (!ipc_rcu_getref(sma)) { - rcu_read_unlock(); - return -EIDRM; + err = -EIDRM; + goto out_rcu_wakeup; } rcu_read_unlock(); - if(nsems > SEMMSL_FAST) { + if (nsems > SEMMSL_FAST) { sem_io = ipc_alloc(sizeof(ushort)*nsems); - if(sem_io == NULL) { - sem_putref(sma); + if (sem_io == NULL) { + ipc_rcu_putref(sma, ipc_rcu_free); return -ENOMEM; } } - if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) { - sem_putref(sma); + if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) { + ipc_rcu_putref(sma, ipc_rcu_free); err = -EFAULT; goto out_free; } for (i = 0; i < nsems; i++) { if (sem_io[i] > SEMVMX) { - sem_putref(sma); + ipc_rcu_putref(sma, ipc_rcu_free); err = -ERANGE; goto out_free; } } rcu_read_lock(); sem_lock_and_putref(sma); - if (sma->sem_perm.deleted) { - sem_unlock(sma, -1); - rcu_read_unlock(); + if (!ipc_valid_object(&sma->sem_perm)) { err = -EIDRM; - goto out_free; + goto out_unlock; } for (i = 0; i < nsems; i++) @@ -1376,6 +1447,10 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, goto out_rcu_wakeup; sem_lock(sma, NULL, -1); + if (!ipc_valid_object(&sma->sem_perm)) { + err = -EIDRM; + goto out_unlock; + } curr = &sma->sem_base[semnum]; switch (cmd) { @@ -1386,10 +1461,10 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, err = curr->sempid; goto out_unlock; case GETNCNT: - err = count_semncnt(sma,semnum); + err = count_semcnt(sma, semnum, 0); goto out_unlock; case GETZCNT: - err = count_semzcnt(sma,semnum); + err = count_semcnt(sma, semnum, 1); goto out_unlock; } @@ -1399,7 +1474,7 @@ out_rcu_wakeup: rcu_read_unlock(); wake_up_sem_queue_do(&tasks); out_free: - if(sem_io != fast_sem_io) + if (sem_io != fast_sem_io) ipc_free(sem_io, sizeof(ushort)*nsems); return err; } @@ -1407,7 +1482,7 @@ out_free: static inline unsigned long copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version) { - switch(version) { + switch (version) { case IPC_64: if (copy_from_user(out, buf, sizeof(*out))) return -EFAULT; @@ -1416,7 +1491,7 @@ copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version) { struct semid_ds tbuf_old; - if(copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) + if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) return -EFAULT; out->sem_perm.uid = tbuf_old.sem_perm.uid; @@ -1443,7 +1518,7 @@ static int semctl_down(struct ipc_namespace *ns, int semid, struct semid64_ds semid64; struct kern_ipc_perm *ipcp; - if(cmd == IPC_SET) { + if (cmd == IPC_SET) { if (copy_semid_from_user(&semid64, p, version)) return -EFAULT; } @@ -1503,7 +1578,7 @@ SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg) version = ipc_parse_version(&cmd); ns = current->nsproxy->ipc_ns; - switch(cmd) { + switch (cmd) { case IPC_INFO: case SEM_INFO: case IPC_STAT: @@ -1571,7 +1646,7 @@ static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) { struct sem_undo *un; - assert_spin_locked(&ulp->lock); + assert_spin_locked(&ulp->lock); un = __lookup_undo(ulp, semid); if (un) { @@ -1582,7 +1657,7 @@ static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) } /** - * find_alloc_undo - Lookup (and if not present create) undo array + * find_alloc_undo - lookup (and if not present create) undo array * @ns: namespace * @semid: semaphore array id * @@ -1607,7 +1682,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) spin_lock(&ulp->lock); un = lookup_undo(ulp, semid); spin_unlock(&ulp->lock); - if (likely(un!=NULL)) + if (likely(un != NULL)) goto out; /* no undo structure around - allocate one. */ @@ -1629,14 +1704,14 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) /* step 2: allocate new undo structure */ new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); if (!new) { - sem_putref(sma); + ipc_rcu_putref(sma, ipc_rcu_free); return ERR_PTR(-ENOMEM); } /* step 3: Acquire the lock on semaphore array */ rcu_read_lock(); sem_lock_and_putref(sma); - if (sma->sem_perm.deleted) { + if (!ipc_valid_object(&sma->sem_perm)) { sem_unlock(sma, -1); rcu_read_unlock(); kfree(new); @@ -1672,7 +1747,7 @@ out: /** - * get_queue_result - Retrieve the result code from sem_queue + * get_queue_result - retrieve the result code from sem_queue * @q: Pointer to queue structure * * Retrieve the return code from the pending queue. If IN_WAKEUP is found in @@ -1702,7 +1777,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, int error = -EINVAL; struct sem_array *sma; struct sembuf fast_sops[SEMOPM_FAST]; - struct sembuf* sops = fast_sops, *sop; + struct sembuf *sops = fast_sops, *sop; struct sem_undo *un; int undos = 0, alter = 0, max, locknum; struct sem_queue queue; @@ -1716,13 +1791,13 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, return -EINVAL; if (nsops > ns->sc_semopm) return -E2BIG; - if(nsops > SEMOPM_FAST) { - sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL); - if(sops==NULL) + if (nsops > SEMOPM_FAST) { + sops = kmalloc(sizeof(*sops)*nsops, GFP_KERNEL); + if (sops == NULL) return -ENOMEM; } - if (copy_from_user (sops, tsops, nsops * sizeof(*tsops))) { - error=-EFAULT; + if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) { + error = -EFAULT; goto out_free; } if (timeout) { @@ -1781,6 +1856,18 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, if (error) goto out_rcu_wakeup; + error = -EIDRM; + locknum = sem_lock(sma, sops, nsops); + /* + * We eventually might perform the following check in a lockless + * fashion, considering ipc_valid_object() locking constraints. + * If nsops == 1 and there is no contention for sem_perm.lock, then + * only a per-semaphore lock is held and it's OK to proceed with the + * check below. More details on the fine grained locking scheme + * entangled here and why it's RMID race safe on comments at sem_lock() + */ + if (!ipc_valid_object(&sma->sem_perm)) + goto out_unlock_free; /* * semid identifiers are not unique - find_alloc_undo may have * allocated an undo structure, it was invalidated by an RMID @@ -1788,29 +1875,31 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, * This case can be detected checking un->semid. The existence of * "un" itself is guaranteed by rcu. */ - error = -EIDRM; - locknum = sem_lock(sma, sops, nsops); if (un && un->semid == -1) goto out_unlock_free; - error = perform_atomic_semop(sma, sops, nsops, un, - task_tgid_vnr(current)); - if (error <= 0) { - if (alter && error == 0) - do_smart_update(sma, sops, nsops, 1, &tasks); + queue.sops = sops; + queue.nsops = nsops; + queue.undo = un; + queue.pid = task_tgid_vnr(current); + queue.alter = alter; - goto out_unlock_free; + error = perform_atomic_semop(sma, &queue); + if (error == 0) { + /* If the operation was successful, then do + * the required updates. + */ + if (alter) + do_smart_update(sma, sops, nsops, 1, &tasks); + else + set_semotime(sma, sops); } + if (error <= 0) + goto out_unlock_free; /* We need to sleep on this operation, so we put the current * task into the pending queue and go to sleep. */ - - queue.sops = sops; - queue.nsops = nsops; - queue.undo = un; - queue.pid = task_tgid_vnr(current); - queue.alter = alter; if (nsops == 1) { struct sem *curr; @@ -1889,10 +1978,8 @@ sleep_again: * If queue.status != -EINTR we are woken up by another process. * Leave without unlink_queue(), but with sem_unlock(). */ - - if (error != -EINTR) { + if (error != -EINTR) goto out_unlock_free; - } /* * If an interrupt occurred we have to clean up the queue @@ -1914,7 +2001,7 @@ out_rcu_wakeup: rcu_read_unlock(); wake_up_sem_queue_do(&tasks); out_free: - if(sops != fast_sops) + if (sops != fast_sops) kfree(sops); return error; } @@ -1940,7 +2027,7 @@ int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) return error; atomic_inc(&undo_list->refcnt); tsk->sysvsem.undo_list = undo_list; - } else + } else tsk->sysvsem.undo_list = NULL; return 0; @@ -1997,6 +2084,12 @@ void exit_sem(struct task_struct *tsk) } sem_lock(sma, NULL, -1); + /* exit_sem raced with IPC_RMID, nothing to do */ + if (!ipc_valid_object(&sma->sem_perm)) { + sem_unlock(sma, -1); + rcu_read_unlock(); + continue; + } un = __lookup_undo(ulp, semid); if (un == NULL) { /* exit_sem raced with IPC_RMID+semget() that created @@ -2017,7 +2110,7 @@ void exit_sem(struct task_struct *tsk) /* perform adjustments registered in un */ for (i = 0; i < sma->sem_nsems; i++) { - struct sem * semaphore = &sma->sem_base[i]; + struct sem *semaphore = &sma->sem_base[i]; if (un->semadj[i]) { semaphore->semval += un->semadj[i]; /* @@ -2031,7 +2124,7 @@ void exit_sem(struct task_struct *tsk) * Linux caps the semaphore value, both at 0 * and at SEMVMX. * - * Manfred <manfred@colorfullife.com> + * Manfred <manfred@colorfullife.com> */ if (semaphore->semval < 0) semaphore->semval = 0; @@ -2059,6 +2152,14 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it) struct sem_array *sma = it; time_t sem_otime; + /* + * The proc interface isn't aware of sem_lock(), it calls + * ipc_lock_object() directly (in sysvipc_find_ipc). + * In order to stay compatible with sem_lock(), we must wait until + * all simple semop() calls have left their critical regions. + */ + sem_wait_array(sma); + sem_otime = get_semotime(sma); return seq_printf(s, diff --git a/ipc/shm.c b/ipc/shm.c index 2821cdf93ad..89fc354156c 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -43,7 +43,7 @@ #include <linux/mount.h> #include <linux/ipc_namespace.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include "util.h" @@ -67,7 +67,7 @@ static const struct vm_operations_struct shm_vm_ops; static int newseg(struct ipc_namespace *, struct ipc_params *); static void shm_open(struct vm_area_struct *vma); static void shm_close(struct vm_area_struct *vma); -static void shm_destroy (struct ipc_namespace *ns, struct shmid_kernel *shp); +static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp); #ifdef CONFIG_PROC_FS static int sysvipc_shm_proc_show(struct seq_file *s, void *it); #endif @@ -91,7 +91,7 @@ static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) struct shmid_kernel *shp; shp = container_of(ipcp, struct shmid_kernel, shm_perm); - if (shp->shm_nattch){ + if (shp->shm_nattch) { shp->shm_perm.mode |= SHM_DEST; /* Do not find it any more */ shp->shm_perm.key = IPC_PRIVATE; @@ -116,7 +116,7 @@ static int __init ipc_ns_init(void) pure_initcall(ipc_ns_init); -void __init shm_init (void) +void __init shm_init(void) { ipc_init_proc_interface("sysvipc/shm", #if BITS_PER_LONG <= 32 @@ -167,6 +167,15 @@ static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp) ipc_lock_object(&ipcp->shm_perm); } +static void shm_rcu_free(struct rcu_head *head) +{ + struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); + struct shmid_kernel *shp = ipc_rcu_to_struct(p); + + security_shm_free(shp); + ipc_rcu_free(head); +} + static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) { ipc_rmid(&shm_ids(ns), &s->shm_perm); @@ -199,17 +208,19 @@ static void shm_open(struct vm_area_struct *vma) */ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) { + struct file *shm_file; + + shm_file = shp->shm_file; + shp->shm_file = NULL; ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; shm_rmid(ns, shp); shm_unlock(shp); - if (!is_file_hugepages(shp->shm_file)) - shmem_lock(shp->shm_file, 0, shp->mlock_user); + if (!is_file_hugepages(shm_file)) + shmem_lock(shm_file, 0, shp->mlock_user); else if (shp->mlock_user) - user_shm_unlock(file_inode(shp->shm_file)->i_size, - shp->mlock_user); - fput (shp->shm_file); - security_shm_free(shp); - ipc_rcu_putref(shp); + user_shm_unlock(file_inode(shm_file)->i_size, shp->mlock_user); + fput(shm_file); + ipc_rcu_putref(shp, shm_rcu_free); } /* @@ -237,7 +248,7 @@ static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) */ static void shm_close(struct vm_area_struct *vma) { - struct file * file = vma->vm_file; + struct file *file = vma->vm_file; struct shm_file_data *sfd = shm_file_data(file); struct shmid_kernel *shp; struct ipc_namespace *ns = sfd->ns; @@ -368,7 +379,7 @@ static struct mempolicy *shm_get_policy(struct vm_area_struct *vma, } #endif -static int shm_mmap(struct file * file, struct vm_area_struct * vma) +static int shm_mmap(struct file *file, struct vm_area_struct *vma) { struct shm_file_data *sfd = shm_file_data(file); int ret; @@ -466,7 +477,6 @@ static const struct vm_operations_struct shm_vm_ops = { * * Called with shm_ids.rwsem held as a writer. */ - static int newseg(struct ipc_namespace *ns, struct ipc_params *params) { key_t key = params->key; @@ -475,7 +485,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) int error; struct shmid_kernel *shp; size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; - struct file * file; + struct file *file; char name[13]; int id; vm_flags_t acctflag = 0; @@ -483,7 +493,11 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) if (size < SHMMIN || size > ns->shm_ctlmax) return -EINVAL; - if (ns->shm_tot + numpages > ns->shm_ctlall) + if (numpages << PAGE_SHIFT < size) + return -ENOSPC; + + if (ns->shm_tot + numpages < ns->shm_tot || + ns->shm_tot + numpages > ns->shm_ctlall) return -ENOSPC; shp = ipc_rcu_alloc(sizeof(*shp)); @@ -497,11 +511,11 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) shp->shm_perm.security = NULL; error = security_shm_alloc(shp); if (error) { - ipc_rcu_putref(shp); + ipc_rcu_putref(shp, ipc_rcu_free); return error; } - sprintf (name, "SYSV%08x", key); + sprintf(name, "SYSV%08x", key); if (shmflg & SHM_HUGETLB) { struct hstate *hs; size_t hugesize; @@ -522,7 +536,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) } else { /* * Do not allow no accounting for OVERCOMMIT_NEVER, even - * if it's asked for. + * if it's asked for. */ if ((shmflg & SHM_NORESERVE) && sysctl_overcommit_memory != OVERCOMMIT_NEVER) @@ -566,8 +580,7 @@ no_id: user_shm_unlock(size, shp->mlock_user); fput(file); no_file: - security_shm_free(shp); - ipc_rcu_putref(shp); + ipc_rcu_putref(shp, shm_rcu_free); return error; } @@ -600,15 +613,15 @@ static inline int shm_more_checks(struct kern_ipc_perm *ipcp, SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg) { struct ipc_namespace *ns; - struct ipc_ops shm_ops; + static const struct ipc_ops shm_ops = { + .getnew = newseg, + .associate = shm_security, + .more_checks = shm_more_checks, + }; struct ipc_params shm_params; ns = current->nsproxy->ipc_ns; - shm_ops.getnew = newseg; - shm_ops.associate = shm_security; - shm_ops.more_checks = shm_more_checks; - shm_params.key = key; shm_params.flg = shmflg; shm_params.u.size = size; @@ -618,7 +631,7 @@ SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg) static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version) { - switch(version) { + switch (version) { case IPC_64: return copy_to_user(buf, in, sizeof(*in)); case IPC_OLD: @@ -645,7 +658,7 @@ static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ static inline unsigned long copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version) { - switch(version) { + switch (version) { case IPC_64: if (copy_from_user(out, buf, sizeof(*out))) return -EFAULT; @@ -670,14 +683,14 @@ copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version) static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version) { - switch(version) { + switch (version) { case IPC_64: return copy_to_user(buf, in, sizeof(*in)); case IPC_OLD: { struct shminfo out; - if(in->shmmax > INT_MAX) + if (in->shmmax > INT_MAX) out.shmmax = INT_MAX; else out.shmmax = (int)in->shmmax; @@ -685,7 +698,7 @@ static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminf out.shmmin = in->shmmin; out.shmmni = in->shmmni; out.shmseg = in->shmseg; - out.shmall = in->shmall; + out.shmall = in->shmall; return copy_to_user(buf, &out, sizeof(out)); } @@ -836,14 +849,14 @@ static int shmctl_nolock(struct ipc_namespace *ns, int shmid, shminfo.shmall = ns->shm_ctlall; shminfo.shmmin = SHMMIN; - if(copy_shminfo_to_user (buf, &shminfo, version)) + if (copy_shminfo_to_user(buf, &shminfo, version)) return -EFAULT; down_read(&shm_ids(ns).rwsem); err = ipc_get_maxid(&shm_ids(ns)); up_read(&shm_ids(ns).rwsem); - if(err<0) + if (err < 0) err = 0; goto out; } @@ -854,7 +867,7 @@ static int shmctl_nolock(struct ipc_namespace *ns, int shmid, memset(&shm_info, 0, sizeof(shm_info)); down_read(&shm_ids(ns).rwsem); shm_info.used_ids = shm_ids(ns).in_use; - shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp); + shm_get_stat(ns, &shm_info.shm_rss, &shm_info.shm_swp); shm_info.shm_tot = ns->shm_tot; shm_info.swap_attempts = 0; shm_info.swap_successes = 0; @@ -965,14 +978,24 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) goto out_unlock1; ipc_lock_object(&shp->shm_perm); + + /* check if shm_destroy() is tearing down shp */ + if (!ipc_valid_object(&shp->shm_perm)) { + err = -EIDRM; + goto out_unlock0; + } + if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) { kuid_t euid = current_euid(); - err = -EPERM; if (!uid_eq(euid, shp->shm_perm.uid) && - !uid_eq(euid, shp->shm_perm.cuid)) + !uid_eq(euid, shp->shm_perm.cuid)) { + err = -EPERM; goto out_unlock0; - if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) + } + if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) { + err = -EPERM; goto out_unlock0; + } } shm_file = shp->shm_file; @@ -1027,7 +1050,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, struct shmid_kernel *shp; unsigned long addr; unsigned long size; - struct file * file; + struct file *file; int err; unsigned long flags; unsigned long prot; @@ -1094,6 +1117,14 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, goto out_unlock; ipc_lock_object(&shp->shm_perm); + + /* check if shm_destroy() is tearing down shp */ + if (!ipc_valid_object(&shp->shm_perm)) { + ipc_unlock_object(&shp->shm_perm); + err = -EIDRM; + goto out_unlock; + } + path = shp->shm_file->f_path; path_get(&path); shp->shm_nattch++; @@ -1133,6 +1164,9 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, down_write(¤t->mm->mmap_sem); if (addr && !(shmflg & SHM_REMAP)) { err = -EINVAL; + if (addr + size < addr) + goto invalid; + if (find_vma_intersection(current->mm, addr, addr + size)) goto invalid; /* diff --git a/ipc/util.c b/ipc/util.c index e829da9ed01..27d74e69fd5 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -17,12 +17,27 @@ * Pavel Emelianov <xemul@openvz.org> * * General sysv ipc locking scheme: - * when doing ipc id lookups, take the ids->rwsem - * rcu_read_lock() - * obtain the ipc object (kern_ipc_perm) - * perform security, capabilities, auditing and permission checks, etc. - * acquire the ipc lock (kern_ipc_perm.lock) throught ipc_lock_object() - * perform data updates (ie: SET, RMID, LOCK/UNLOCK commands) + * rcu_read_lock() + * obtain the ipc object (kern_ipc_perm) by looking up the id in an idr + * tree. + * - perform initial checks (capabilities, auditing and permission, + * etc). + * - perform read-only operations, such as STAT, INFO commands. + * acquire the ipc lock (kern_ipc_perm.lock) through + * ipc_lock_object() + * - perform data updates, such as SET, RMID commands and + * mechanism-specific operations (semop/semtimedop, + * msgsnd/msgrcv, shmat/shmdt). + * drop the ipc lock, through ipc_unlock_object(). + * rcu_read_unlock() + * + * The ids->rwsem must be taken when: + * - creating, removing and iterating the existing entries in ipc + * identifier sets. + * - iterating through files under /proc/sysvipc/ + * + * Note that sems have a special fast path that avoids kern_ipc_perm.lock - + * see sem_lock(). */ #include <linux/mm.h> @@ -75,10 +90,8 @@ static int ipc_memory_callback(struct notifier_block *self, * In order not to keep the lock on the hotplug memory chain * for too long, queue a work item that will, when waken up, * activate the ipcns notification chain. - * No need to keep several ipc work items on the queue. */ - if (!work_pending(&ipc_memory_wq)) - schedule_work(&ipc_memory_wq); + schedule_work(&ipc_memory_wq); break; case MEM_GOING_ONLINE: case MEM_GOING_OFFLINE: @@ -97,15 +110,15 @@ static struct notifier_block ipc_memory_nb = { }; /** - * ipc_init - initialise IPC subsystem + * ipc_init - initialise ipc subsystem + * + * The various sysv ipc resources (semaphores, messages and shared + * memory) are initialised. * - * The various system5 IPC resources (semaphores, messages and shared - * memory) are initialised - * A callback routine is registered into the memory hotplug notifier - * chain: since msgmni scales to lowmem this callback routine will be - * called upon successful memory add / remove to recompute msmgni. + * A callback routine is registered into the memory hotplug notifier + * chain: since msgmni scales to lowmem this callback routine will be + * called upon successful memory add / remove to recompute msmgni. */ - static int __init ipc_init(void) { sem_init(); @@ -115,42 +128,32 @@ static int __init ipc_init(void) register_ipcns_notifier(&init_ipc_ns); return 0; } -__initcall(ipc_init); +device_initcall(ipc_init); /** - * ipc_init_ids - initialise IPC identifiers - * @ids: Identifier set + * ipc_init_ids - initialise ipc identifiers + * @ids: ipc identifier set * - * Set up the sequence range to use for the ipc identifier range (limited - * below IPCMNI) then initialise the ids idr. + * Set up the sequence range to use for the ipc identifier range (limited + * below IPCMNI) then initialise the ids idr. */ - void ipc_init_ids(struct ipc_ids *ids) { - init_rwsem(&ids->rwsem); - ids->in_use = 0; ids->seq = 0; ids->next_id = -1; - { - int seq_limit = INT_MAX/SEQ_MULTIPLIER; - if (seq_limit > USHRT_MAX) - ids->seq_max = USHRT_MAX; - else - ids->seq_max = seq_limit; - } - + init_rwsem(&ids->rwsem); idr_init(&ids->ipcs_idr); } #ifdef CONFIG_PROC_FS static const struct file_operations sysvipc_proc_fops; /** - * ipc_init_proc_interface - Create a proc interface for sysipc types using a seq_file interface. - * @path: Path in procfs - * @header: Banner to be printed at the beginning of the file. - * @ids: ipc id table to iterate. - * @show: show routine. + * ipc_init_proc_interface - create a proc interface for sysipc types using a seq_file interface. + * @path: Path in procfs + * @header: Banner to be printed at the beginning of the file. + * @ids: ipc id table to iterate. + * @show: show routine. */ void __init ipc_init_proc_interface(const char *path, const char *header, int ids, int (*show)(struct seq_file *, void *)) @@ -171,23 +174,21 @@ void __init ipc_init_proc_interface(const char *path, const char *header, NULL, /* parent dir */ &sysvipc_proc_fops, iface); - if (!pde) { + if (!pde) kfree(iface); - } } #endif /** - * ipc_findkey - find a key in an ipc identifier set - * @ids: Identifier set - * @key: The key to find - * - * Requires ipc_ids.rwsem locked. - * Returns the LOCKED pointer to the ipc structure if found or NULL - * if not. - * If key is found ipc points to the owning ipc structure + * ipc_findkey - find a key in an ipc identifier set + * @ids: ipc identifier set + * @key: key to find + * + * Returns the locked pointer to the ipc structure if found or NULL + * otherwise. If key is found ipc points to the owning ipc structure + * + * Called with ipc_ids.rwsem held. */ - static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key) { struct kern_ipc_perm *ipc; @@ -214,12 +215,11 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key) } /** - * ipc_get_maxid - get the last assigned id - * @ids: IPC identifier set + * ipc_get_maxid - get the last assigned id + * @ids: ipc identifier set * - * Called with ipc_ids.rwsem held. + * Called with ipc_ids.rwsem held. */ - int ipc_get_maxid(struct ipc_ids *ids) { struct kern_ipc_perm *ipc; @@ -245,19 +245,19 @@ int ipc_get_maxid(struct ipc_ids *ids) } /** - * ipc_addid - add an IPC identifier - * @ids: IPC identifier set - * @new: new IPC permission set - * @size: limit for the number of used ids + * ipc_addid - add an ipc identifier + * @ids: ipc identifier set + * @new: new ipc permission set + * @size: limit for the number of used ids * - * Add an entry 'new' to the IPC ids idr. The permissions object is - * initialised and the first free entry is set up and the id assigned - * is returned. The 'new' entry is returned in a locked state on success. - * On failure the entry is not locked and a negative err-code is returned. + * Add an entry 'new' to the ipc ids idr. The permissions object is + * initialised and the first free entry is set up and the id assigned + * is returned. The 'new' entry is returned in a locked state on success. + * On failure the entry is not locked and a negative err-code is returned. * - * Called with writer ipc_ids.rwsem held. + * Called with writer ipc_ids.rwsem held. */ -int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) +int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size) { kuid_t euid; kgid_t egid; @@ -273,7 +273,7 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) idr_preload(GFP_KERNEL); spin_lock_init(&new->lock); - new->deleted = 0; + new->deleted = false; rcu_read_lock(); spin_lock(&new->lock); @@ -295,7 +295,7 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) if (next_id < 0) { new->seq = ids->seq++; - if (ids->seq > ids->seq_max) + if (ids->seq > IPCID_SEQ_MAX) ids->seq = 0; } else { new->seq = ipcid_to_seqx(next_id); @@ -307,17 +307,17 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) } /** - * ipcget_new - create a new ipc object - * @ns: namespace - * @ids: IPC identifer set - * @ops: the actual creation routine to call - * @params: its parameters - * - * This routine is called by sys_msgget, sys_semget() and sys_shmget() - * when the key is IPC_PRIVATE. + * ipcget_new - create a new ipc object + * @ns: ipc namespace + * @ids: ipc identifer set + * @ops: the actual creation routine to call + * @params: its parameters + * + * This routine is called by sys_msgget, sys_semget() and sys_shmget() + * when the key is IPC_PRIVATE. */ static int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids, - struct ipc_ops *ops, struct ipc_params *params) + const struct ipc_ops *ops, struct ipc_params *params) { int err; @@ -328,23 +328,23 @@ static int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids, } /** - * ipc_check_perms - check security and permissions for an IPC - * @ns: IPC namespace - * @ipcp: ipc permission set - * @ops: the actual security routine to call - * @params: its parameters + * ipc_check_perms - check security and permissions for an ipc object + * @ns: ipc namespace + * @ipcp: ipc permission set + * @ops: the actual security routine to call + * @params: its parameters * - * This routine is called by sys_msgget(), sys_semget() and sys_shmget() - * when the key is not IPC_PRIVATE and that key already exists in the - * ids IDR. + * This routine is called by sys_msgget(), sys_semget() and sys_shmget() + * when the key is not IPC_PRIVATE and that key already exists in the + * ds IDR. * - * On success, the IPC id is returned. + * On success, the ipc id is returned. * - * It is called with ipc_ids.rwsem and ipcp->lock held. + * It is called with ipc_ids.rwsem and ipcp->lock held. */ static int ipc_check_perms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, - struct ipc_ops *ops, + const struct ipc_ops *ops, struct ipc_params *params) { int err; @@ -361,21 +361,21 @@ static int ipc_check_perms(struct ipc_namespace *ns, } /** - * ipcget_public - get an ipc object or create a new one - * @ns: namespace - * @ids: IPC identifer set - * @ops: the actual creation routine to call - * @params: its parameters - * - * This routine is called by sys_msgget, sys_semget() and sys_shmget() - * when the key is not IPC_PRIVATE. - * It adds a new entry if the key is not found and does some permission - * / security checkings if the key is found. - * - * On success, the ipc id is returned. + * ipcget_public - get an ipc object or create a new one + * @ns: ipc namespace + * @ids: ipc identifer set + * @ops: the actual creation routine to call + * @params: its parameters + * + * This routine is called by sys_msgget, sys_semget() and sys_shmget() + * when the key is not IPC_PRIVATE. + * It adds a new entry if the key is not found and does some permission + * / security checkings if the key is found. + * + * On success, the ipc id is returned. */ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, - struct ipc_ops *ops, struct ipc_params *params) + const struct ipc_ops *ops, struct ipc_params *params) { struct kern_ipc_perm *ipcp; int flg = params->flg; @@ -418,39 +418,33 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, /** - * ipc_rmid - remove an IPC identifier - * @ids: IPC identifier set - * @ipcp: ipc perm structure containing the identifier to remove + * ipc_rmid - remove an ipc identifier + * @ids: ipc identifier set + * @ipcp: ipc perm structure containing the identifier to remove * - * ipc_ids.rwsem (as a writer) and the spinlock for this ID are held - * before this function is called, and remain locked on the exit. + * ipc_ids.rwsem (as a writer) and the spinlock for this ID are held + * before this function is called, and remain locked on the exit. */ - void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) { int lid = ipcid_to_idx(ipcp->id); idr_remove(&ids->ipcs_idr, lid); - ids->in_use--; - - ipcp->deleted = 1; - - return; + ipcp->deleted = true; } /** - * ipc_alloc - allocate ipc space - * @size: size desired + * ipc_alloc - allocate ipc space + * @size: size desired * - * Allocate memory from the appropriate pools and return a pointer to it. - * NULL is returned if the allocation fails + * Allocate memory from the appropriate pools and return a pointer to it. + * NULL is returned if the allocation fails */ - void *ipc_alloc(int size) { void *out; - if(size > PAGE_SIZE) + if (size > PAGE_SIZE) out = vmalloc(size); else out = kmalloc(size, GFP_KERNEL); @@ -458,33 +452,27 @@ void *ipc_alloc(int size) } /** - * ipc_free - free ipc space - * @ptr: pointer returned by ipc_alloc - * @size: size of block + * ipc_free - free ipc space + * @ptr: pointer returned by ipc_alloc + * @size: size of block * - * Free a block created with ipc_alloc(). The caller must know the size - * used in the allocation call. + * Free a block created with ipc_alloc(). The caller must know the size + * used in the allocation call. */ - -void ipc_free(void* ptr, int size) +void ipc_free(void *ptr, int size) { - if(size > PAGE_SIZE) + if (size > PAGE_SIZE) vfree(ptr); else kfree(ptr); } -struct ipc_rcu { - struct rcu_head rcu; - atomic_t refcount; -} ____cacheline_aligned_in_smp; - /** - * ipc_rcu_alloc - allocate ipc and rcu space - * @size: size desired + * ipc_rcu_alloc - allocate ipc and rcu space + * @size: size desired * - * Allocate memory for the rcu header structure + the object. - * Returns the pointer to the object or NULL upon failure. + * Allocate memory for the rcu header structure + the object. + * Returns the pointer to the object or NULL upon failure. */ void *ipc_rcu_alloc(int size) { @@ -505,41 +493,37 @@ int ipc_rcu_getref(void *ptr) return atomic_inc_not_zero(&p->refcount); } -/** - * ipc_schedule_free - free ipc + rcu space - * @head: RCU callback structure for queued work - */ -static void ipc_schedule_free(struct rcu_head *head) -{ - vfree(container_of(head, struct ipc_rcu, rcu)); -} - -void ipc_rcu_putref(void *ptr) +void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head)) { struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1; if (!atomic_dec_and_test(&p->refcount)) return; - if (is_vmalloc_addr(ptr)) { - call_rcu(&p->rcu, ipc_schedule_free); - } else { - kfree_rcu(p, rcu); - } + call_rcu(&p->rcu, func); +} + +void ipc_rcu_free(struct rcu_head *head) +{ + struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); + + if (is_vmalloc_addr(p)) + vfree(p); + else + kfree(p); } /** - * ipcperms - check IPC permissions - * @ns: IPC namespace - * @ipcp: IPC permission set - * @flag: desired permission set. + * ipcperms - check ipc permissions + * @ns: ipc namespace + * @ipcp: ipc permission set + * @flag: desired permission set * - * Check user, group, other permissions for access - * to ipc resources. return 0 if allowed + * Check user, group, other permissions for access + * to ipc resources. return 0 if allowed * - * @flag will most probably be 0 or S_...UGO from <linux/stat.h> + * @flag will most probably be 0 or S_...UGO from <linux/stat.h> */ - int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flag) { kuid_t euid = current_euid(); @@ -554,7 +538,7 @@ int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flag) else if (in_group_p(ipcp->cgid) || in_group_p(ipcp->gid)) granted_mode >>= 3; /* is there some bit set in requested_mode but not in granted_mode? */ - if ((requested_mode & ~granted_mode & 0007) && + if ((requested_mode & ~granted_mode & 0007) && !ns_capable(ns->user_ns, CAP_IPC_OWNER)) return -1; @@ -567,16 +551,14 @@ int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flag) */ /** - * kernel_to_ipc64_perm - convert kernel ipc permissions to user - * @in: kernel permissions - * @out: new style IPC permissions + * kernel_to_ipc64_perm - convert kernel ipc permissions to user + * @in: kernel permissions + * @out: new style ipc permissions * - * Turn the kernel object @in into a set of permissions descriptions - * for returning to userspace (@out). + * Turn the kernel object @in into a set of permissions descriptions + * for returning to userspace (@out). */ - - -void kernel_to_ipc64_perm (struct kern_ipc_perm *in, struct ipc64_perm *out) +void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out) { out->key = in->key; out->uid = from_kuid_munged(current_user_ns(), in->uid); @@ -588,15 +570,14 @@ void kernel_to_ipc64_perm (struct kern_ipc_perm *in, struct ipc64_perm *out) } /** - * ipc64_perm_to_ipc_perm - convert new ipc permissions to old - * @in: new style IPC permissions - * @out: old style IPC permissions + * ipc64_perm_to_ipc_perm - convert new ipc permissions to old + * @in: new style ipc permissions + * @out: old style ipc permissions * - * Turn the new style permissions object @in into a compatibility - * object and store it into the @out pointer. + * Turn the new style permissions object @in into a compatibility + * object and store it into the @out pointer. */ - -void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out) +void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out) { out->key = in->key; SET_UID(out->uid, in->uid); @@ -630,8 +611,8 @@ struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id) } /** - * ipc_lock - Lock an ipc structure without rwsem held - * @ids: IPC identifier set + * ipc_lock - lock an ipc structure without rwsem held + * @ids: ipc identifier set * @id: ipc id to look for * * Look for an id in the ipc ids idr and lock the associated ipc object. @@ -652,7 +633,7 @@ struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id) /* ipc_rmid() may have already freed the ID while ipc_lock * was spinning: here verify that the structure is still valid */ - if (!out->deleted) + if (ipc_valid_object(out)) return out; spin_unlock(&out->lock); @@ -688,16 +669,16 @@ out: /** * ipcget - Common sys_*get() code - * @ns : namsepace - * @ids : IPC identifier set - * @ops : operations to be called on ipc object creation, permission checks - * and further checks - * @params : the parameters needed by the previous operations. + * @ns: namsepace + * @ids: ipc identifier set + * @ops: operations to be called on ipc object creation, permission checks + * and further checks + * @params: the parameters needed by the previous operations. * * Common routine called by sys_msgget(), sys_semget() and sys_shmget(). */ int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids, - struct ipc_ops *ops, struct ipc_params *params) + const struct ipc_ops *ops, struct ipc_params *params) { if (params->key == IPC_PRIVATE) return ipcget_new(ns, ids, ops, params); @@ -706,7 +687,7 @@ int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids, } /** - * ipc_update_perm - update the permissions of an IPC. + * ipc_update_perm - update the permissions of an ipc object * @in: the permission given as input. * @out: the permission of the ipc to set. */ @@ -727,7 +708,7 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out) /** * ipcctl_pre_down_nolock - retrieve an ipc and check permissions for some IPC_XXX cmd - * @ns: the ipc namespace + * @ns: ipc namespace * @ids: the table of ids where to look for the ipc * @id: the id of the ipc to retrieve * @cmd: the cmd to check @@ -774,15 +755,14 @@ err: /** - * ipc_parse_version - IPC call version - * @cmd: pointer to command + * ipc_parse_version - ipc call version + * @cmd: pointer to command * - * Return IPC_64 for new style IPC and IPC_OLD for old style IPC. - * The @cmd value is turned from an encoding command and version into - * just the command code. + * Return IPC_64 for new style IPC and IPC_OLD for old style IPC. + * The @cmd value is turned from an encoding command and version into + * just the command code. */ - -int ipc_parse_version (int *cmd) +int ipc_parse_version(int *cmd) { if (*cmd & IPC_64) { *cmd ^= IPC_64; @@ -819,7 +799,7 @@ static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos, if (total >= ids->in_use) return NULL; - for ( ; pos < IPCMNI; pos++) { + for (; pos < IPCMNI; pos++) { ipc = idr_find(&ids->ipcs_idr, pos); if (ipc != NULL) { *new_pos = pos + 1; @@ -922,8 +902,10 @@ static int sysvipc_proc_open(struct inode *inode, struct file *file) goto out; ret = seq_open(file, &sysvipc_proc_seqops); - if (ret) - goto out_kfree; + if (ret) { + kfree(iter); + goto out; + } seq = file->private_data; seq->private = iter; @@ -932,9 +914,6 @@ static int sysvipc_proc_open(struct inode *inode, struct file *file) iter->ns = get_ipc_ns(current->nsproxy->ipc_ns); out: return ret; -out_kfree: - kfree(iter); - goto out; } static int sysvipc_proc_release(struct inode *inode, struct file *file) diff --git a/ipc/util.h b/ipc/util.h index c5f3338ba1f..1a5a0fcd099 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -15,9 +15,9 @@ #define SEQ_MULTIPLIER (IPCMNI) -void sem_init (void); -void msg_init (void); -void shm_init (void); +void sem_init(void); +void msg_init(void); +void shm_init(void); struct ipc_namespace; @@ -47,6 +47,13 @@ static inline void msg_exit_ns(struct ipc_namespace *ns) { } static inline void shm_exit_ns(struct ipc_namespace *ns) { } #endif +struct ipc_rcu { + struct rcu_head rcu; + atomic_t refcount; +} ____cacheline_aligned_in_smp; + +#define ipc_rcu_to_struct(p) ((void *)(p+1)) + /* * Structure that holds the parameters needed by the ipc operations * (see after) @@ -71,9 +78,9 @@ struct ipc_params { * . routine to call for an extra check if needed */ struct ipc_ops { - int (*getnew) (struct ipc_namespace *, struct ipc_params *); - int (*associate) (struct kern_ipc_perm *, int); - int (*more_checks) (struct kern_ipc_perm *, struct ipc_params *); + int (*getnew)(struct ipc_namespace *, struct ipc_params *); + int (*associate)(struct kern_ipc_perm *, int); + int (*more_checks)(struct kern_ipc_perm *, struct ipc_params *); }; struct seq_file; @@ -93,6 +100,7 @@ void __init ipc_init_proc_interface(const char *path, const char *header, #define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER) #define ipcid_to_seqx(id) ((id) / SEQ_MULTIPLIER) +#define IPCID_SEQ_MAX min_t(int, INT_MAX/SEQ_MULTIPLIER, USHRT_MAX) /* must be called with ids->rwsem acquired for writing */ int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int); @@ -109,8 +117,8 @@ int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flg); /* for rare, potentially huge allocations. * both function can sleep */ -void* ipc_alloc(int size); -void ipc_free(void* ptr, int size); +void *ipc_alloc(int size); +void ipc_free(void *ptr, int size); /* * For allocation that need to be freed by RCU. @@ -118,9 +126,10 @@ void ipc_free(void* ptr, int size); * getref increases the refcount, the putref call that reduces the recount * to 0 schedules the rcu destruction. Caller must guarantee locking. */ -void* ipc_rcu_alloc(int size); +void *ipc_rcu_alloc(int size); int ipc_rcu_getref(void *ptr); -void ipc_rcu_putref(void *ptr); +void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head)); +void ipc_rcu_free(struct rcu_head *head); struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int); struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id); @@ -133,16 +142,16 @@ struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, struct ipc64_perm *perm, int extra_perm); #ifndef CONFIG_ARCH_WANT_IPC_PARSE_VERSION - /* On IA-64, we always use the "64-bit version" of the IPC structures. */ +/* On IA-64, we always use the "64-bit version" of the IPC structures. */ # define ipc_parse_version(cmd) IPC_64 #else -int ipc_parse_version (int *cmd); +int ipc_parse_version(int *cmd); #endif extern void free_msg(struct msg_msg *msg); -extern struct msg_msg *load_msg(const void __user *src, int len); +extern struct msg_msg *load_msg(const void __user *src, size_t len); extern struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst); -extern int store_msg(void __user *dest, struct msg_msg *msg, int len); +extern int store_msg(void __user *dest, struct msg_msg *msg, size_t len); extern void recompute_msgmni(struct ipc_namespace *); @@ -177,9 +186,22 @@ static inline void ipc_unlock(struct kern_ipc_perm *perm) rcu_read_unlock(); } +/* + * ipc_valid_object() - helper to sort out IPC_RMID races for codepaths + * where the respective ipc_ids.rwsem is not being held down. + * Checks whether the ipc object is still around or if it's gone already, as + * ipc_rmid() may have already freed the ID while the ipc lock was spinning. + * Needs to be called with kern_ipc_perm.lock held -- exception made for one + * checkpoint case at sys_semtimedop() as noted in code commentary. + */ +static inline bool ipc_valid_object(struct kern_ipc_perm *perm) +{ + return !perm->deleted; +} + struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id); int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids, - struct ipc_ops *ops, struct ipc_params *params); + const struct ipc_ops *ops, struct ipc_params *params); void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids, void (*free)(struct ipc_namespace *, struct kern_ipc_perm *)); #endif |
