aboutsummaryrefslogtreecommitdiff
path: root/ipc/msg.c
diff options
context:
space:
mode:
Diffstat (limited to 'ipc/msg.c')
-rw-r--r--ipc/msg.c258
1 files changed, 146 insertions, 112 deletions
diff --git a/ipc/msg.c b/ipc/msg.c
index b0d541d4267..c5d8e374998 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -39,12 +39,10 @@
#include <linux/ipc_namespace.h>
#include <asm/current.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include "util.h"
-/*
- * one msg_receiver structure for each sleeping receiver:
- */
+/* one msg_receiver structure for each sleeping receiver */
struct msg_receiver {
struct list_head r_list;
struct task_struct *r_tsk;
@@ -53,6 +51,12 @@ struct msg_receiver {
long r_msgtype;
long r_maxsize;
+ /*
+ * Mark r_msg volatile so that the compiler
+ * does not try to get smart and optimize
+ * it. We rely on this for the lockless
+ * receive algorithm.
+ */
struct msg_msg *volatile r_msg;
};
@@ -70,75 +74,6 @@ struct msg_sender {
#define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS])
-static void freeque(struct ipc_namespace *, struct kern_ipc_perm *);
-static int newque(struct ipc_namespace *, struct ipc_params *);
-#ifdef CONFIG_PROC_FS
-static int sysvipc_msg_proc_show(struct seq_file *s, void *it);
-#endif
-
-/*
- * Scale msgmni with the available lowmem size: the memory dedicated to msg
- * queues should occupy at most 1/MSG_MEM_SCALE of lowmem.
- * Also take into account the number of nsproxies created so far.
- * This should be done staying within the (MSGMNI , IPCMNI/nr_ipc_ns) range.
- */
-void recompute_msgmni(struct ipc_namespace *ns)
-{
- struct sysinfo i;
- unsigned long allowed;
- int nb_ns;
-
- si_meminfo(&i);
- allowed = (((i.totalram - i.totalhigh) / MSG_MEM_SCALE) * i.mem_unit)
- / MSGMNB;
- nb_ns = atomic_read(&nr_ipc_ns);
- allowed /= nb_ns;
-
- if (allowed < MSGMNI) {
- ns->msg_ctlmni = MSGMNI;
- return;
- }
-
- if (allowed > IPCMNI / nb_ns) {
- ns->msg_ctlmni = IPCMNI / nb_ns;
- return;
- }
-
- ns->msg_ctlmni = allowed;
-}
-
-void msg_init_ns(struct ipc_namespace *ns)
-{
- ns->msg_ctlmax = MSGMAX;
- ns->msg_ctlmnb = MSGMNB;
-
- recompute_msgmni(ns);
-
- atomic_set(&ns->msg_bytes, 0);
- atomic_set(&ns->msg_hdrs, 0);
- ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
-}
-
-#ifdef CONFIG_IPC_NS
-void msg_exit_ns(struct ipc_namespace *ns)
-{
- free_ipcs(ns, &msg_ids(ns), freeque);
- idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
-}
-#endif
-
-void __init msg_init(void)
-{
- msg_init_ns(&init_ipc_ns);
-
- printk(KERN_INFO "msgmni has been set to %d\n",
- init_ipc_ns.msg_ctlmni);
-
- ipc_init_proc_interface("sysvipc/msg",
- " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n",
- IPC_MSG_IDS, sysvipc_msg_proc_show);
-}
-
static inline struct msg_queue *msq_obtain_object(struct ipc_namespace *ns, int id)
{
struct kern_ipc_perm *ipcp = ipc_obtain_object(&msg_ids(ns), id);
@@ -165,6 +100,15 @@ static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)
ipc_rmid(&msg_ids(ns), &s->q_perm);
}
+static void msg_rcu_free(struct rcu_head *head)
+{
+ struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
+ struct msg_queue *msq = ipc_rcu_to_struct(p);
+
+ security_msg_queue_free(msq);
+ ipc_rcu_free(head);
+}
+
/**
* newque - Create a new msg queue
* @ns: namespace
@@ -189,15 +133,14 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
msq->q_perm.security = NULL;
retval = security_msg_queue_alloc(msq);
if (retval) {
- ipc_rcu_putref(msq);
+ ipc_rcu_putref(msq, ipc_rcu_free);
return retval;
}
/* ipc_addid() locks msq upon success. */
id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
if (id < 0) {
- security_msg_queue_free(msq);
- ipc_rcu_putref(msq);
+ ipc_rcu_putref(msq, msg_rcu_free);
return id;
}
@@ -219,7 +162,7 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss)
{
mss->tsk = current;
- current->state = TASK_INTERRUPTIBLE;
+ __set_current_state(TASK_INTERRUPTIBLE);
list_add_tail(&mss->list, &msq->q_senders);
}
@@ -245,8 +188,14 @@ static void expunge_all(struct msg_queue *msq, int res)
struct msg_receiver *msr, *t;
list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
- msr->r_msg = NULL;
+ msr->r_msg = NULL; /* initialize expunge ordering */
wake_up_process(msr->r_tsk);
+ /*
+ * Ensure that the wakeup is visible before setting r_msg as
+ * the receiving end depends on it: either spinning on a nil,
+ * or dealing with -EAGAIN cases. See lockless receive part 1
+ * and 2 in do_msgrcv().
+ */
smp_mb();
msr->r_msg = ERR_PTR(res);
}
@@ -276,8 +225,7 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
free_msg(msg);
}
atomic_sub(msq->q_cbytes, &ns->msg_bytes);
- security_msg_queue_free(msq);
- ipc_rcu_putref(msq);
+ ipc_rcu_putref(msq, msg_rcu_free);
}
/*
@@ -293,15 +241,14 @@ static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg)
SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg)
{
struct ipc_namespace *ns;
- struct ipc_ops msg_ops;
+ static const struct ipc_ops msg_ops = {
+ .getnew = newque,
+ .associate = msg_security,
+ };
struct ipc_params msg_params;
ns = current->nsproxy->ipc_ns;
- msg_ops.getnew = newque;
- msg_ops.associate = msg_security;
- msg_ops.more_checks = NULL;
-
msg_params.key = key;
msg_params.flg = msgflg;
@@ -311,7 +258,7 @@ SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg)
static inline unsigned long
copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version)
{
- switch(version) {
+ switch (version) {
case IPC_64:
return copy_to_user(buf, in, sizeof(*in));
case IPC_OLD:
@@ -356,7 +303,7 @@ copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version)
static inline unsigned long
copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version)
{
- switch(version) {
+ switch (version) {
case IPC_64:
if (copy_from_user(out, buf, sizeof(*out)))
return -EFAULT;
@@ -368,9 +315,9 @@ copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version)
if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
return -EFAULT;
- out->msg_perm.uid = tbuf_old.msg_perm.uid;
- out->msg_perm.gid = tbuf_old.msg_perm.gid;
- out->msg_perm.mode = tbuf_old.msg_perm.mode;
+ out->msg_perm.uid = tbuf_old.msg_perm.uid;
+ out->msg_perm.gid = tbuf_old.msg_perm.gid;
+ out->msg_perm.mode = tbuf_old.msg_perm.mode;
if (tbuf_old.msg_qbytes == 0)
out->msg_qbytes = tbuf_old.msg_lqbytes;
@@ -599,23 +546,22 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf)
static int testmsg(struct msg_msg *msg, long type, int mode)
{
- switch(mode)
- {
- case SEARCH_ANY:
- case SEARCH_NUMBER:
+ switch (mode) {
+ case SEARCH_ANY:
+ case SEARCH_NUMBER:
+ return 1;
+ case SEARCH_LESSEQUAL:
+ if (msg->m_type <= type)
return 1;
- case SEARCH_LESSEQUAL:
- if (msg->m_type <=type)
- return 1;
- break;
- case SEARCH_EQUAL:
- if (msg->m_type == type)
- return 1;
- break;
- case SEARCH_NOTEQUAL:
- if (msg->m_type != type)
- return 1;
- break;
+ break;
+ case SEARCH_EQUAL:
+ if (msg->m_type == type)
+ return 1;
+ break;
+ case SEARCH_NOTEQUAL:
+ if (msg->m_type != type)
+ return 1;
+ break;
}
return 0;
}
@@ -631,15 +577,22 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
list_del(&msr->r_list);
if (msr->r_maxsize < msg->m_ts) {
+ /* initialize pipelined send ordering */
msr->r_msg = NULL;
wake_up_process(msr->r_tsk);
- smp_mb();
+ smp_mb(); /* see barrier comment below */
msr->r_msg = ERR_PTR(-E2BIG);
} else {
msr->r_msg = NULL;
msq->q_lrpid = task_pid_vnr(msr->r_tsk);
msq->q_rtime = get_seconds();
wake_up_process(msr->r_tsk);
+ /*
+ * Ensure that the wakeup is visible before
+ * setting r_msg, as the receiving end depends
+ * on it. See lockless receive part 1 and 2 in
+ * do_msgrcv().
+ */
smp_mb();
msr->r_msg = msg;
@@ -647,6 +600,7 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
}
}
}
+
return 0;
}
@@ -688,6 +642,12 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
if (ipcperms(ns, &msq->q_perm, S_IWUGO))
goto out_unlock0;
+ /* raced with RMID? */
+ if (!ipc_valid_object(&msq->q_perm)) {
+ err = -EIDRM;
+ goto out_unlock0;
+ }
+
err = security_msg_queue_msgsnd(msq, msg, msgflg);
if (err)
goto out_unlock0;
@@ -703,6 +663,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
goto out_unlock0;
}
+ /* enqueue the sender and prepare to block */
ss_add(msq, &s);
if (!ipc_rcu_getref(msq)) {
@@ -717,8 +678,9 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
rcu_read_lock();
ipc_lock_object(&msq->q_perm);
- ipc_rcu_putref(msq);
- if (msq->q_perm.deleted) {
+ ipc_rcu_putref(msq, ipc_rcu_free);
+ /* raced with RMID? */
+ if (!ipc_valid_object(&msq->q_perm)) {
err = -EIDRM;
goto out_unlock0;
}
@@ -872,6 +834,8 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl
return -EINVAL;
if (msgflg & MSG_COPY) {
+ if ((msgflg & MSG_EXCEPT) || !(msgflg & IPC_NOWAIT))
+ return -EINVAL;
copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax));
if (IS_ERR(copy))
return PTR_ERR(copy);
@@ -894,6 +858,13 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl
goto out_unlock1;
ipc_lock_object(&msq->q_perm);
+
+ /* raced with RMID? */
+ if (!ipc_valid_object(&msq->q_perm)) {
+ msg = ERR_PTR(-EIDRM);
+ goto out_unlock0;
+ }
+
msg = find_msg(msq, &msgtyp, mode);
if (!IS_ERR(msg)) {
/*
@@ -940,7 +911,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl
else
msr_d.r_maxsize = bufsz;
msr_d.r_msg = ERR_PTR(-EAGAIN);
- current->state = TASK_INTERRUPTIBLE;
+ __set_current_state(TASK_INTERRUPTIBLE);
ipc_unlock_object(&msq->q_perm);
rcu_read_unlock();
@@ -963,7 +934,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl
* wake_up_process(). There is a race with exit(), see
* ipc/mqueue.c for the details.
*/
- msg = (struct msg_msg*)msr_d.r_msg;
+ msg = (struct msg_msg *)msr_d.r_msg;
while (msg == NULL) {
cpu_relax();
msg = (struct msg_msg *)msr_d.r_msg;
@@ -984,7 +955,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl
/* Lockless receive, part 4:
* Repeat test after acquiring the spinlock.
*/
- msg = (struct msg_msg*)msr_d.r_msg;
+ msg = (struct msg_msg *)msr_d.r_msg;
if (msg != ERR_PTR(-EAGAIN))
goto out_unlock0;
@@ -1018,6 +989,57 @@ SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill);
}
+/*
+ * Scale msgmni with the available lowmem size: the memory dedicated to msg
+ * queues should occupy at most 1/MSG_MEM_SCALE of lowmem.
+ * Also take into account the number of nsproxies created so far.
+ * This should be done staying within the (MSGMNI , IPCMNI/nr_ipc_ns) range.
+ */
+void recompute_msgmni(struct ipc_namespace *ns)
+{
+ struct sysinfo i;
+ unsigned long allowed;
+ int nb_ns;
+
+ si_meminfo(&i);
+ allowed = (((i.totalram - i.totalhigh) / MSG_MEM_SCALE) * i.mem_unit)
+ / MSGMNB;
+ nb_ns = atomic_read(&nr_ipc_ns);
+ allowed /= nb_ns;
+
+ if (allowed < MSGMNI) {
+ ns->msg_ctlmni = MSGMNI;
+ return;
+ }
+
+ if (allowed > IPCMNI / nb_ns) {
+ ns->msg_ctlmni = IPCMNI / nb_ns;
+ return;
+ }
+
+ ns->msg_ctlmni = allowed;
+}
+
+void msg_init_ns(struct ipc_namespace *ns)
+{
+ ns->msg_ctlmax = MSGMAX;
+ ns->msg_ctlmnb = MSGMNB;
+
+ recompute_msgmni(ns);
+
+ atomic_set(&ns->msg_bytes, 0);
+ atomic_set(&ns->msg_hdrs, 0);
+ ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
+}
+
+#ifdef CONFIG_IPC_NS
+void msg_exit_ns(struct ipc_namespace *ns)
+{
+ free_ipcs(ns, &msg_ids(ns), freeque);
+ idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
+}
+#endif
+
#ifdef CONFIG_PROC_FS
static int sysvipc_msg_proc_show(struct seq_file *s, void *it)
{
@@ -1042,3 +1064,15 @@ static int sysvipc_msg_proc_show(struct seq_file *s, void *it)
msq->q_ctime);
}
#endif
+
+void __init msg_init(void)
+{
+ msg_init_ns(&init_ipc_ns);
+
+ printk(KERN_INFO "msgmni has been set to %d\n",
+ init_ipc_ns.msg_ctlmni);
+
+ ipc_init_proc_interface("sysvipc/msg",
+ " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n",
+ IPC_MSG_IDS, sysvipc_msg_proc_show);
+}