diff options
Diffstat (limited to 'ipc/msg.c')
| -rw-r--r-- | ipc/msg.c | 258 | 
1 files changed, 146 insertions, 112 deletions
diff --git a/ipc/msg.c b/ipc/msg.c index b0d541d4267..c5d8e374998 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -39,12 +39,10 @@  #include <linux/ipc_namespace.h>  #include <asm/current.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h>  #include "util.h" -/* - * one msg_receiver structure for each sleeping receiver: - */ +/* one msg_receiver structure for each sleeping receiver */  struct msg_receiver {  	struct list_head	r_list;  	struct task_struct	*r_tsk; @@ -53,6 +51,12 @@ struct msg_receiver {  	long			r_msgtype;  	long			r_maxsize; +	/* +	 * Mark r_msg volatile so that the compiler +	 * does not try to get smart and optimize +	 * it. We rely on this for the lockless +	 * receive algorithm. +	 */  	struct msg_msg		*volatile r_msg;  }; @@ -70,75 +74,6 @@ struct msg_sender {  #define msg_ids(ns)	((ns)->ids[IPC_MSG_IDS]) -static void freeque(struct ipc_namespace *, struct kern_ipc_perm *); -static int newque(struct ipc_namespace *, struct ipc_params *); -#ifdef CONFIG_PROC_FS -static int sysvipc_msg_proc_show(struct seq_file *s, void *it); -#endif - -/* - * Scale msgmni with the available lowmem size: the memory dedicated to msg - * queues should occupy at most 1/MSG_MEM_SCALE of lowmem. - * Also take into account the number of nsproxies created so far. - * This should be done staying within the (MSGMNI , IPCMNI/nr_ipc_ns) range. - */ -void recompute_msgmni(struct ipc_namespace *ns) -{ -	struct sysinfo i; -	unsigned long allowed; -	int nb_ns; - -	si_meminfo(&i); -	allowed = (((i.totalram - i.totalhigh) / MSG_MEM_SCALE) * i.mem_unit) -		/ MSGMNB; -	nb_ns = atomic_read(&nr_ipc_ns); -	allowed /= nb_ns; - -	if (allowed < MSGMNI) { -		ns->msg_ctlmni = MSGMNI; -		return; -	} - -	if (allowed > IPCMNI / nb_ns) { -		ns->msg_ctlmni = IPCMNI / nb_ns; -		return; -	} - -	ns->msg_ctlmni = allowed; -} - -void msg_init_ns(struct ipc_namespace *ns) -{ -	ns->msg_ctlmax = MSGMAX; -	ns->msg_ctlmnb = MSGMNB; - -	recompute_msgmni(ns); - -	atomic_set(&ns->msg_bytes, 0); -	atomic_set(&ns->msg_hdrs, 0); -	ipc_init_ids(&ns->ids[IPC_MSG_IDS]); -} - -#ifdef CONFIG_IPC_NS -void msg_exit_ns(struct ipc_namespace *ns) -{ -	free_ipcs(ns, &msg_ids(ns), freeque); -	idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr); -} -#endif - -void __init msg_init(void) -{ -	msg_init_ns(&init_ipc_ns); - -	printk(KERN_INFO "msgmni has been set to %d\n", -		init_ipc_ns.msg_ctlmni); - -	ipc_init_proc_interface("sysvipc/msg", -				"       key      msqid perms      cbytes       qnum lspid lrpid   uid   gid  cuid  cgid      stime      rtime      ctime\n", -				IPC_MSG_IDS, sysvipc_msg_proc_show); -} -  static inline struct msg_queue *msq_obtain_object(struct ipc_namespace *ns, int id)  {  	struct kern_ipc_perm *ipcp = ipc_obtain_object(&msg_ids(ns), id); @@ -165,6 +100,15 @@ static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)  	ipc_rmid(&msg_ids(ns), &s->q_perm);  } +static void msg_rcu_free(struct rcu_head *head) +{ +	struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); +	struct msg_queue *msq = ipc_rcu_to_struct(p); + +	security_msg_queue_free(msq); +	ipc_rcu_free(head); +} +  /**   * newque - Create a new msg queue   * @ns: namespace @@ -189,15 +133,14 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)  	msq->q_perm.security = NULL;  	retval = security_msg_queue_alloc(msq);  	if (retval) { -		ipc_rcu_putref(msq); +		ipc_rcu_putref(msq, ipc_rcu_free);  		return retval;  	}  	/* ipc_addid() locks msq upon success. */  	id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);  	if (id < 0) { -		security_msg_queue_free(msq); -		ipc_rcu_putref(msq); +		ipc_rcu_putref(msq, msg_rcu_free);  		return id;  	} @@ -219,7 +162,7 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)  static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss)  {  	mss->tsk = current; -	current->state = TASK_INTERRUPTIBLE; +	__set_current_state(TASK_INTERRUPTIBLE);  	list_add_tail(&mss->list, &msq->q_senders);  } @@ -245,8 +188,14 @@ static void expunge_all(struct msg_queue *msq, int res)  	struct msg_receiver *msr, *t;  	list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { -		msr->r_msg = NULL; +		msr->r_msg = NULL; /* initialize expunge ordering */  		wake_up_process(msr->r_tsk); +		/* +		 * Ensure that the wakeup is visible before setting r_msg as +		 * the receiving end depends on it: either spinning on a nil, +		 * or dealing with -EAGAIN cases. See lockless receive part 1 +		 * and 2 in do_msgrcv(). +		 */  		smp_mb();  		msr->r_msg = ERR_PTR(res);  	} @@ -276,8 +225,7 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)  		free_msg(msg);  	}  	atomic_sub(msq->q_cbytes, &ns->msg_bytes); -	security_msg_queue_free(msq); -	ipc_rcu_putref(msq); +	ipc_rcu_putref(msq, msg_rcu_free);  }  /* @@ -293,15 +241,14 @@ static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg)  SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg)  {  	struct ipc_namespace *ns; -	struct ipc_ops msg_ops; +	static const struct ipc_ops msg_ops = { +		.getnew = newque, +		.associate = msg_security, +	};  	struct ipc_params msg_params;  	ns = current->nsproxy->ipc_ns; -	msg_ops.getnew = newque; -	msg_ops.associate = msg_security; -	msg_ops.more_checks = NULL; -  	msg_params.key = key;  	msg_params.flg = msgflg; @@ -311,7 +258,7 @@ SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg)  static inline unsigned long  copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version)  { -	switch(version) { +	switch (version) {  	case IPC_64:  		return copy_to_user(buf, in, sizeof(*in));  	case IPC_OLD: @@ -356,7 +303,7 @@ copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version)  static inline unsigned long  copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version)  { -	switch(version) { +	switch (version) {  	case IPC_64:  		if (copy_from_user(out, buf, sizeof(*out)))  			return -EFAULT; @@ -368,9 +315,9 @@ copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version)  		if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))  			return -EFAULT; -		out->msg_perm.uid      	= tbuf_old.msg_perm.uid; -		out->msg_perm.gid      	= tbuf_old.msg_perm.gid; -		out->msg_perm.mode     	= tbuf_old.msg_perm.mode; +		out->msg_perm.uid	= tbuf_old.msg_perm.uid; +		out->msg_perm.gid	= tbuf_old.msg_perm.gid; +		out->msg_perm.mode	= tbuf_old.msg_perm.mode;  		if (tbuf_old.msg_qbytes == 0)  			out->msg_qbytes	= tbuf_old.msg_lqbytes; @@ -599,23 +546,22 @@ SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf)  static int testmsg(struct msg_msg *msg, long type, int mode)  { -	switch(mode) -	{ -		case SEARCH_ANY: -		case SEARCH_NUMBER: +	switch (mode) { +	case SEARCH_ANY: +	case SEARCH_NUMBER: +		return 1; +	case SEARCH_LESSEQUAL: +		if (msg->m_type <= type)  			return 1; -		case SEARCH_LESSEQUAL: -			if (msg->m_type <=type) -				return 1; -			break; -		case SEARCH_EQUAL: -			if (msg->m_type == type) -				return 1; -			break; -		case SEARCH_NOTEQUAL: -			if (msg->m_type != type) -				return 1; -			break; +		break; +	case SEARCH_EQUAL: +		if (msg->m_type == type) +			return 1; +		break; +	case SEARCH_NOTEQUAL: +		if (msg->m_type != type) +			return 1; +		break;  	}  	return 0;  } @@ -631,15 +577,22 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)  			list_del(&msr->r_list);  			if (msr->r_maxsize < msg->m_ts) { +				/* initialize pipelined send ordering */  				msr->r_msg = NULL;  				wake_up_process(msr->r_tsk); -				smp_mb(); +				smp_mb(); /* see barrier comment below */  				msr->r_msg = ERR_PTR(-E2BIG);  			} else {  				msr->r_msg = NULL;  				msq->q_lrpid = task_pid_vnr(msr->r_tsk);  				msq->q_rtime = get_seconds();  				wake_up_process(msr->r_tsk); +				/* +				 * Ensure that the wakeup is visible before +				 * setting r_msg, as the receiving end depends +				 * on it. See lockless receive part 1 and 2 in +				 * do_msgrcv(). +				 */  				smp_mb();  				msr->r_msg = msg; @@ -647,6 +600,7 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)  			}  		}  	} +  	return 0;  } @@ -688,6 +642,12 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,  		if (ipcperms(ns, &msq->q_perm, S_IWUGO))  			goto out_unlock0; +		/* raced with RMID? */ +		if (!ipc_valid_object(&msq->q_perm)) { +			err = -EIDRM; +			goto out_unlock0; +		} +  		err = security_msg_queue_msgsnd(msq, msg, msgflg);  		if (err)  			goto out_unlock0; @@ -703,6 +663,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,  			goto out_unlock0;  		} +		/* enqueue the sender and prepare to block */  		ss_add(msq, &s);  		if (!ipc_rcu_getref(msq)) { @@ -717,8 +678,9 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,  		rcu_read_lock();  		ipc_lock_object(&msq->q_perm); -		ipc_rcu_putref(msq); -		if (msq->q_perm.deleted) { +		ipc_rcu_putref(msq, ipc_rcu_free); +		/* raced with RMID? */ +		if (!ipc_valid_object(&msq->q_perm)) {  			err = -EIDRM;  			goto out_unlock0;  		} @@ -872,6 +834,8 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl  		return -EINVAL;  	if (msgflg & MSG_COPY) { +		if ((msgflg & MSG_EXCEPT) || !(msgflg & IPC_NOWAIT)) +			return -EINVAL;  		copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax));  		if (IS_ERR(copy))  			return PTR_ERR(copy); @@ -894,6 +858,13 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl  			goto out_unlock1;  		ipc_lock_object(&msq->q_perm); + +		/* raced with RMID? */ +		if (!ipc_valid_object(&msq->q_perm)) { +			msg = ERR_PTR(-EIDRM); +			goto out_unlock0; +		} +  		msg = find_msg(msq, &msgtyp, mode);  		if (!IS_ERR(msg)) {  			/* @@ -940,7 +911,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl  		else  			msr_d.r_maxsize = bufsz;  		msr_d.r_msg = ERR_PTR(-EAGAIN); -		current->state = TASK_INTERRUPTIBLE; +		__set_current_state(TASK_INTERRUPTIBLE);  		ipc_unlock_object(&msq->q_perm);  		rcu_read_unlock(); @@ -963,7 +934,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl  		 * wake_up_process(). There is a race with exit(), see  		 * ipc/mqueue.c for the details.  		 */ -		msg = (struct msg_msg*)msr_d.r_msg; +		msg = (struct msg_msg *)msr_d.r_msg;  		while (msg == NULL) {  			cpu_relax();  			msg = (struct msg_msg *)msr_d.r_msg; @@ -984,7 +955,7 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl  		/* Lockless receive, part 4:  		 * Repeat test after acquiring the spinlock.  		 */ -		msg = (struct msg_msg*)msr_d.r_msg; +		msg = (struct msg_msg *)msr_d.r_msg;  		if (msg != ERR_PTR(-EAGAIN))  			goto out_unlock0; @@ -1018,6 +989,57 @@ SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,  	return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill);  } +/* + * Scale msgmni with the available lowmem size: the memory dedicated to msg + * queues should occupy at most 1/MSG_MEM_SCALE of lowmem. + * Also take into account the number of nsproxies created so far. + * This should be done staying within the (MSGMNI , IPCMNI/nr_ipc_ns) range. + */ +void recompute_msgmni(struct ipc_namespace *ns) +{ +	struct sysinfo i; +	unsigned long allowed; +	int nb_ns; + +	si_meminfo(&i); +	allowed = (((i.totalram - i.totalhigh) / MSG_MEM_SCALE) * i.mem_unit) +		/ MSGMNB; +	nb_ns = atomic_read(&nr_ipc_ns); +	allowed /= nb_ns; + +	if (allowed < MSGMNI) { +		ns->msg_ctlmni = MSGMNI; +		return; +	} + +	if (allowed > IPCMNI / nb_ns) { +		ns->msg_ctlmni = IPCMNI / nb_ns; +		return; +	} + +	ns->msg_ctlmni = allowed; +} + +void msg_init_ns(struct ipc_namespace *ns) +{ +	ns->msg_ctlmax = MSGMAX; +	ns->msg_ctlmnb = MSGMNB; + +	recompute_msgmni(ns); + +	atomic_set(&ns->msg_bytes, 0); +	atomic_set(&ns->msg_hdrs, 0); +	ipc_init_ids(&ns->ids[IPC_MSG_IDS]); +} + +#ifdef CONFIG_IPC_NS +void msg_exit_ns(struct ipc_namespace *ns) +{ +	free_ipcs(ns, &msg_ids(ns), freeque); +	idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr); +} +#endif +  #ifdef CONFIG_PROC_FS  static int sysvipc_msg_proc_show(struct seq_file *s, void *it)  { @@ -1042,3 +1064,15 @@ static int sysvipc_msg_proc_show(struct seq_file *s, void *it)  			msq->q_ctime);  }  #endif + +void __init msg_init(void) +{ +	msg_init_ns(&init_ipc_ns); + +	printk(KERN_INFO "msgmni has been set to %d\n", +		init_ipc_ns.msg_ctlmni); + +	ipc_init_proc_interface("sysvipc/msg", +				"       key      msqid perms      cbytes       qnum lspid lrpid   uid   gid  cuid  cgid      stime      rtime      ctime\n", +				IPC_MSG_IDS, sysvipc_msg_proc_show); +}  | 
