diff options
Diffstat (limited to 'kernel/locking/rtmutex.c')
| -rw-r--r-- | kernel/locking/rtmutex.c | 1373 | 
1 files changed, 1373 insertions, 0 deletions
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c new file mode 100644 index 00000000000..fc605941b9b --- /dev/null +++ b/kernel/locking/rtmutex.c @@ -0,0 +1,1373 @@ +/* + * RT-Mutexes: simple blocking mutual exclusion locks with PI support + * + * started by Ingo Molnar and Thomas Gleixner. + * + *  Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + *  Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> + *  Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt + *  Copyright (C) 2006 Esben Nielsen + * + *  See Documentation/rt-mutex-design.txt for details. + */ +#include <linux/spinlock.h> +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/sched/rt.h> +#include <linux/sched/deadline.h> +#include <linux/timer.h> + +#include "rtmutex_common.h" + +/* + * lock->owner state tracking: + * + * lock->owner holds the task_struct pointer of the owner. Bit 0 + * is used to keep track of the "lock has waiters" state. + * + * owner	bit0 + * NULL		0	lock is free (fast acquire possible) + * NULL		1	lock is free and has waiters and the top waiter + *				is going to take the lock* + * taskpointer	0	lock is held (fast release possible) + * taskpointer	1	lock is held and has waiters** + * + * The fast atomic compare exchange based acquire and release is only + * possible when bit 0 of lock->owner is 0. + * + * (*) It also can be a transitional state when grabbing the lock + * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock, + * we need to set the bit0 before looking at the lock, and the owner may be + * NULL in this small time, hence this can be a transitional state. + * + * (**) There is a small time when bit 0 is set but there are no + * waiters. This can happen when grabbing the lock in the slow path. + * To prevent a cmpxchg of the owner releasing the lock, we need to + * set this bit before looking at the lock. + */ + +static void +rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner) +{ +	unsigned long val = (unsigned long)owner; + +	if (rt_mutex_has_waiters(lock)) +		val |= RT_MUTEX_HAS_WAITERS; + +	lock->owner = (struct task_struct *)val; +} + +static inline void clear_rt_mutex_waiters(struct rt_mutex *lock) +{ +	lock->owner = (struct task_struct *) +			((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); +} + +static void fixup_rt_mutex_waiters(struct rt_mutex *lock) +{ +	if (!rt_mutex_has_waiters(lock)) +		clear_rt_mutex_waiters(lock); +} + +/* + * We can speed up the acquire/release, if the architecture + * supports cmpxchg and if there's no debugging state to be set up + */ +#if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES) +# define rt_mutex_cmpxchg(l,c,n)	(cmpxchg(&l->owner, c, n) == c) +static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) +{ +	unsigned long owner, *p = (unsigned long *) &lock->owner; + +	do { +		owner = *p; +	} while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner); +} + +/* + * Safe fastpath aware unlock: + * 1) Clear the waiters bit + * 2) Drop lock->wait_lock + * 3) Try to unlock the lock with cmpxchg + */ +static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock) +	__releases(lock->wait_lock) +{ +	struct task_struct *owner = rt_mutex_owner(lock); + +	clear_rt_mutex_waiters(lock); +	raw_spin_unlock(&lock->wait_lock); +	/* +	 * If a new waiter comes in between the unlock and the cmpxchg +	 * we have two situations: +	 * +	 * unlock(wait_lock); +	 *					lock(wait_lock); +	 * cmpxchg(p, owner, 0) == owner +	 *					mark_rt_mutex_waiters(lock); +	 *					acquire(lock); +	 * or: +	 * +	 * unlock(wait_lock); +	 *					lock(wait_lock); +	 *					mark_rt_mutex_waiters(lock); +	 * +	 * cmpxchg(p, owner, 0) != owner +	 *					enqueue_waiter(); +	 *					unlock(wait_lock); +	 * lock(wait_lock); +	 * wake waiter(); +	 * unlock(wait_lock); +	 *					lock(wait_lock); +	 *					acquire(lock); +	 */ +	return rt_mutex_cmpxchg(lock, owner, NULL); +} + +#else +# define rt_mutex_cmpxchg(l,c,n)	(0) +static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) +{ +	lock->owner = (struct task_struct *) +			((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); +} + +/* + * Simple slow path only version: lock->owner is protected by lock->wait_lock. + */ +static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock) +	__releases(lock->wait_lock) +{ +	lock->owner = NULL; +	raw_spin_unlock(&lock->wait_lock); +	return true; +} +#endif + +static inline int +rt_mutex_waiter_less(struct rt_mutex_waiter *left, +		     struct rt_mutex_waiter *right) +{ +	if (left->prio < right->prio) +		return 1; + +	/* +	 * If both waiters have dl_prio(), we check the deadlines of the +	 * associated tasks. +	 * If left waiter has a dl_prio(), and we didn't return 1 above, +	 * then right waiter has a dl_prio() too. +	 */ +	if (dl_prio(left->prio)) +		return (left->task->dl.deadline < right->task->dl.deadline); + +	return 0; +} + +static void +rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) +{ +	struct rb_node **link = &lock->waiters.rb_node; +	struct rb_node *parent = NULL; +	struct rt_mutex_waiter *entry; +	int leftmost = 1; + +	while (*link) { +		parent = *link; +		entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry); +		if (rt_mutex_waiter_less(waiter, entry)) { +			link = &parent->rb_left; +		} else { +			link = &parent->rb_right; +			leftmost = 0; +		} +	} + +	if (leftmost) +		lock->waiters_leftmost = &waiter->tree_entry; + +	rb_link_node(&waiter->tree_entry, parent, link); +	rb_insert_color(&waiter->tree_entry, &lock->waiters); +} + +static void +rt_mutex_dequeue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) +{ +	if (RB_EMPTY_NODE(&waiter->tree_entry)) +		return; + +	if (lock->waiters_leftmost == &waiter->tree_entry) +		lock->waiters_leftmost = rb_next(&waiter->tree_entry); + +	rb_erase(&waiter->tree_entry, &lock->waiters); +	RB_CLEAR_NODE(&waiter->tree_entry); +} + +static void +rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) +{ +	struct rb_node **link = &task->pi_waiters.rb_node; +	struct rb_node *parent = NULL; +	struct rt_mutex_waiter *entry; +	int leftmost = 1; + +	while (*link) { +		parent = *link; +		entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry); +		if (rt_mutex_waiter_less(waiter, entry)) { +			link = &parent->rb_left; +		} else { +			link = &parent->rb_right; +			leftmost = 0; +		} +	} + +	if (leftmost) +		task->pi_waiters_leftmost = &waiter->pi_tree_entry; + +	rb_link_node(&waiter->pi_tree_entry, parent, link); +	rb_insert_color(&waiter->pi_tree_entry, &task->pi_waiters); +} + +static void +rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) +{ +	if (RB_EMPTY_NODE(&waiter->pi_tree_entry)) +		return; + +	if (task->pi_waiters_leftmost == &waiter->pi_tree_entry) +		task->pi_waiters_leftmost = rb_next(&waiter->pi_tree_entry); + +	rb_erase(&waiter->pi_tree_entry, &task->pi_waiters); +	RB_CLEAR_NODE(&waiter->pi_tree_entry); +} + +/* + * Calculate task priority from the waiter tree priority + * + * Return task->normal_prio when the waiter tree is empty or when + * the waiter is not allowed to do priority boosting + */ +int rt_mutex_getprio(struct task_struct *task) +{ +	if (likely(!task_has_pi_waiters(task))) +		return task->normal_prio; + +	return min(task_top_pi_waiter(task)->prio, +		   task->normal_prio); +} + +struct task_struct *rt_mutex_get_top_task(struct task_struct *task) +{ +	if (likely(!task_has_pi_waiters(task))) +		return NULL; + +	return task_top_pi_waiter(task)->task; +} + +/* + * Called by sched_setscheduler() to check whether the priority change + * is overruled by a possible priority boosting. + */ +int rt_mutex_check_prio(struct task_struct *task, int newprio) +{ +	if (!task_has_pi_waiters(task)) +		return 0; + +	return task_top_pi_waiter(task)->task->prio <= newprio; +} + +/* + * Adjust the priority of a task, after its pi_waiters got modified. + * + * This can be both boosting and unboosting. task->pi_lock must be held. + */ +static void __rt_mutex_adjust_prio(struct task_struct *task) +{ +	int prio = rt_mutex_getprio(task); + +	if (task->prio != prio || dl_prio(prio)) +		rt_mutex_setprio(task, prio); +} + +/* + * Adjust task priority (undo boosting). Called from the exit path of + * rt_mutex_slowunlock() and rt_mutex_slowlock(). + * + * (Note: We do this outside of the protection of lock->wait_lock to + * allow the lock to be taken while or before we readjust the priority + * of task. We do not use the spin_xx_mutex() variants here as we are + * outside of the debug path.) + */ +static void rt_mutex_adjust_prio(struct task_struct *task) +{ +	unsigned long flags; + +	raw_spin_lock_irqsave(&task->pi_lock, flags); +	__rt_mutex_adjust_prio(task); +	raw_spin_unlock_irqrestore(&task->pi_lock, flags); +} + +/* + * Max number of times we'll walk the boosting chain: + */ +int max_lock_depth = 1024; + +static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) +{ +	return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; +} + +/* + * Adjust the priority chain. Also used for deadlock detection. + * Decreases task's usage by one - may thus free the task. + * + * @task:	the task owning the mutex (owner) for which a chain walk is + *		probably needed + * @deadlock_detect: do we have to carry out deadlock detection? + * @orig_lock:	the mutex (can be NULL if we are walking the chain to recheck + *		things for a task that has just got its priority adjusted, and + *		is waiting on a mutex) + * @next_lock:	the mutex on which the owner of @orig_lock was blocked before + *		we dropped its pi_lock. Is never dereferenced, only used for + *		comparison to detect lock chain changes. + * @orig_waiter: rt_mutex_waiter struct for the task that has just donated + *		its priority to the mutex owner (can be NULL in the case + *		depicted above or if the top waiter is gone away and we are + *		actually deboosting the owner) + * @top_task:	the current top waiter + * + * Returns 0 or -EDEADLK. + */ +static int rt_mutex_adjust_prio_chain(struct task_struct *task, +				      int deadlock_detect, +				      struct rt_mutex *orig_lock, +				      struct rt_mutex *next_lock, +				      struct rt_mutex_waiter *orig_waiter, +				      struct task_struct *top_task) +{ +	struct rt_mutex *lock; +	struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; +	int detect_deadlock, ret = 0, depth = 0; +	unsigned long flags; + +	detect_deadlock = debug_rt_mutex_detect_deadlock(orig_waiter, +							 deadlock_detect); + +	/* +	 * The (de)boosting is a step by step approach with a lot of +	 * pitfalls. We want this to be preemptible and we want hold a +	 * maximum of two locks per step. So we have to check +	 * carefully whether things change under us. +	 */ + again: +	if (++depth > max_lock_depth) { +		static int prev_max; + +		/* +		 * Print this only once. If the admin changes the limit, +		 * print a new message when reaching the limit again. +		 */ +		if (prev_max != max_lock_depth) { +			prev_max = max_lock_depth; +			printk(KERN_WARNING "Maximum lock depth %d reached " +			       "task: %s (%d)\n", max_lock_depth, +			       top_task->comm, task_pid_nr(top_task)); +		} +		put_task_struct(task); + +		return -EDEADLK; +	} + retry: +	/* +	 * Task can not go away as we did a get_task() before ! +	 */ +	raw_spin_lock_irqsave(&task->pi_lock, flags); + +	waiter = task->pi_blocked_on; +	/* +	 * Check whether the end of the boosting chain has been +	 * reached or the state of the chain has changed while we +	 * dropped the locks. +	 */ +	if (!waiter) +		goto out_unlock_pi; + +	/* +	 * Check the orig_waiter state. After we dropped the locks, +	 * the previous owner of the lock might have released the lock. +	 */ +	if (orig_waiter && !rt_mutex_owner(orig_lock)) +		goto out_unlock_pi; + +	/* +	 * We dropped all locks after taking a refcount on @task, so +	 * the task might have moved on in the lock chain or even left +	 * the chain completely and blocks now on an unrelated lock or +	 * on @orig_lock. +	 * +	 * We stored the lock on which @task was blocked in @next_lock, +	 * so we can detect the chain change. +	 */ +	if (next_lock != waiter->lock) +		goto out_unlock_pi; + +	/* +	 * Drop out, when the task has no waiters. Note, +	 * top_waiter can be NULL, when we are in the deboosting +	 * mode! +	 */ +	if (top_waiter) { +		if (!task_has_pi_waiters(task)) +			goto out_unlock_pi; +		/* +		 * If deadlock detection is off, we stop here if we +		 * are not the top pi waiter of the task. +		 */ +		if (!detect_deadlock && top_waiter != task_top_pi_waiter(task)) +			goto out_unlock_pi; +	} + +	/* +	 * When deadlock detection is off then we check, if further +	 * priority adjustment is necessary. +	 */ +	if (!detect_deadlock && waiter->prio == task->prio) +		goto out_unlock_pi; + +	lock = waiter->lock; +	if (!raw_spin_trylock(&lock->wait_lock)) { +		raw_spin_unlock_irqrestore(&task->pi_lock, flags); +		cpu_relax(); +		goto retry; +	} + +	/* +	 * Deadlock detection. If the lock is the same as the original +	 * lock which caused us to walk the lock chain or if the +	 * current lock is owned by the task which initiated the chain +	 * walk, we detected a deadlock. +	 */ +	if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { +		debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock); +		raw_spin_unlock(&lock->wait_lock); +		ret = -EDEADLK; +		goto out_unlock_pi; +	} + +	top_waiter = rt_mutex_top_waiter(lock); + +	/* Requeue the waiter */ +	rt_mutex_dequeue(lock, waiter); +	waiter->prio = task->prio; +	rt_mutex_enqueue(lock, waiter); + +	/* Release the task */ +	raw_spin_unlock_irqrestore(&task->pi_lock, flags); +	if (!rt_mutex_owner(lock)) { +		/* +		 * If the requeue above changed the top waiter, then we need +		 * to wake the new top waiter up to try to get the lock. +		 */ + +		if (top_waiter != rt_mutex_top_waiter(lock)) +			wake_up_process(rt_mutex_top_waiter(lock)->task); +		raw_spin_unlock(&lock->wait_lock); +		goto out_put_task; +	} +	put_task_struct(task); + +	/* Grab the next task */ +	task = rt_mutex_owner(lock); +	get_task_struct(task); +	raw_spin_lock_irqsave(&task->pi_lock, flags); + +	if (waiter == rt_mutex_top_waiter(lock)) { +		/* Boost the owner */ +		rt_mutex_dequeue_pi(task, top_waiter); +		rt_mutex_enqueue_pi(task, waiter); +		__rt_mutex_adjust_prio(task); + +	} else if (top_waiter == waiter) { +		/* Deboost the owner */ +		rt_mutex_dequeue_pi(task, waiter); +		waiter = rt_mutex_top_waiter(lock); +		rt_mutex_enqueue_pi(task, waiter); +		__rt_mutex_adjust_prio(task); +	} + +	/* +	 * Check whether the task which owns the current lock is pi +	 * blocked itself. If yes we store a pointer to the lock for +	 * the lock chain change detection above. After we dropped +	 * task->pi_lock next_lock cannot be dereferenced anymore. +	 */ +	next_lock = task_blocked_on_lock(task); + +	raw_spin_unlock_irqrestore(&task->pi_lock, flags); + +	top_waiter = rt_mutex_top_waiter(lock); +	raw_spin_unlock(&lock->wait_lock); + +	/* +	 * We reached the end of the lock chain. Stop right here. No +	 * point to go back just to figure that out. +	 */ +	if (!next_lock) +		goto out_put_task; + +	if (!detect_deadlock && waiter != top_waiter) +		goto out_put_task; + +	goto again; + + out_unlock_pi: +	raw_spin_unlock_irqrestore(&task->pi_lock, flags); + out_put_task: +	put_task_struct(task); + +	return ret; +} + +/* + * Try to take an rt-mutex + * + * Must be called with lock->wait_lock held. + * + * @lock:   the lock to be acquired. + * @task:   the task which wants to acquire the lock + * @waiter: the waiter that is queued to the lock's wait list. (could be NULL) + */ +static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, +		struct rt_mutex_waiter *waiter) +{ +	/* +	 * We have to be careful here if the atomic speedups are +	 * enabled, such that, when +	 *  - no other waiter is on the lock +	 *  - the lock has been released since we did the cmpxchg +	 * the lock can be released or taken while we are doing the +	 * checks and marking the lock with RT_MUTEX_HAS_WAITERS. +	 * +	 * The atomic acquire/release aware variant of +	 * mark_rt_mutex_waiters uses a cmpxchg loop. After setting +	 * the WAITERS bit, the atomic release / acquire can not +	 * happen anymore and lock->wait_lock protects us from the +	 * non-atomic case. +	 * +	 * Note, that this might set lock->owner = +	 * RT_MUTEX_HAS_WAITERS in the case the lock is not contended +	 * any more. This is fixed up when we take the ownership. +	 * This is the transitional state explained at the top of this file. +	 */ +	mark_rt_mutex_waiters(lock); + +	if (rt_mutex_owner(lock)) +		return 0; + +	/* +	 * It will get the lock because of one of these conditions: +	 * 1) there is no waiter +	 * 2) higher priority than waiters +	 * 3) it is top waiter +	 */ +	if (rt_mutex_has_waiters(lock)) { +		if (task->prio >= rt_mutex_top_waiter(lock)->prio) { +			if (!waiter || waiter != rt_mutex_top_waiter(lock)) +				return 0; +		} +	} + +	if (waiter || rt_mutex_has_waiters(lock)) { +		unsigned long flags; +		struct rt_mutex_waiter *top; + +		raw_spin_lock_irqsave(&task->pi_lock, flags); + +		/* remove the queued waiter. */ +		if (waiter) { +			rt_mutex_dequeue(lock, waiter); +			task->pi_blocked_on = NULL; +		} + +		/* +		 * We have to enqueue the top waiter(if it exists) into +		 * task->pi_waiters list. +		 */ +		if (rt_mutex_has_waiters(lock)) { +			top = rt_mutex_top_waiter(lock); +			rt_mutex_enqueue_pi(task, top); +		} +		raw_spin_unlock_irqrestore(&task->pi_lock, flags); +	} + +	/* We got the lock. */ +	debug_rt_mutex_lock(lock); + +	rt_mutex_set_owner(lock, task); + +	rt_mutex_deadlock_account_lock(lock, task); + +	return 1; +} + +/* + * Task blocks on lock. + * + * Prepare waiter and propagate pi chain + * + * This must be called with lock->wait_lock held. + */ +static int task_blocks_on_rt_mutex(struct rt_mutex *lock, +				   struct rt_mutex_waiter *waiter, +				   struct task_struct *task, +				   int detect_deadlock) +{ +	struct task_struct *owner = rt_mutex_owner(lock); +	struct rt_mutex_waiter *top_waiter = waiter; +	struct rt_mutex *next_lock; +	int chain_walk = 0, res; +	unsigned long flags; + +	/* +	 * Early deadlock detection. We really don't want the task to +	 * enqueue on itself just to untangle the mess later. It's not +	 * only an optimization. We drop the locks, so another waiter +	 * can come in before the chain walk detects the deadlock. So +	 * the other will detect the deadlock and return -EDEADLOCK, +	 * which is wrong, as the other waiter is not in a deadlock +	 * situation. +	 */ +	if (owner == task) +		return -EDEADLK; + +	raw_spin_lock_irqsave(&task->pi_lock, flags); +	__rt_mutex_adjust_prio(task); +	waiter->task = task; +	waiter->lock = lock; +	waiter->prio = task->prio; + +	/* Get the top priority waiter on the lock */ +	if (rt_mutex_has_waiters(lock)) +		top_waiter = rt_mutex_top_waiter(lock); +	rt_mutex_enqueue(lock, waiter); + +	task->pi_blocked_on = waiter; + +	raw_spin_unlock_irqrestore(&task->pi_lock, flags); + +	if (!owner) +		return 0; + +	raw_spin_lock_irqsave(&owner->pi_lock, flags); +	if (waiter == rt_mutex_top_waiter(lock)) { +		rt_mutex_dequeue_pi(owner, top_waiter); +		rt_mutex_enqueue_pi(owner, waiter); + +		__rt_mutex_adjust_prio(owner); +		if (owner->pi_blocked_on) +			chain_walk = 1; +	} else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) { +		chain_walk = 1; +	} + +	/* Store the lock on which owner is blocked or NULL */ +	next_lock = task_blocked_on_lock(owner); + +	raw_spin_unlock_irqrestore(&owner->pi_lock, flags); +	/* +	 * Even if full deadlock detection is on, if the owner is not +	 * blocked itself, we can avoid finding this out in the chain +	 * walk. +	 */ +	if (!chain_walk || !next_lock) +		return 0; + +	/* +	 * The owner can't disappear while holding a lock, +	 * so the owner struct is protected by wait_lock. +	 * Gets dropped in rt_mutex_adjust_prio_chain()! +	 */ +	get_task_struct(owner); + +	raw_spin_unlock(&lock->wait_lock); + +	res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, +					 next_lock, waiter, task); + +	raw_spin_lock(&lock->wait_lock); + +	return res; +} + +/* + * Wake up the next waiter on the lock. + * + * Remove the top waiter from the current tasks pi waiter list and + * wake it up. + * + * Called with lock->wait_lock held. + */ +static void wakeup_next_waiter(struct rt_mutex *lock) +{ +	struct rt_mutex_waiter *waiter; +	unsigned long flags; + +	raw_spin_lock_irqsave(¤t->pi_lock, flags); + +	waiter = rt_mutex_top_waiter(lock); + +	/* +	 * Remove it from current->pi_waiters. We do not adjust a +	 * possible priority boost right now. We execute wakeup in the +	 * boosted mode and go back to normal after releasing +	 * lock->wait_lock. +	 */ +	rt_mutex_dequeue_pi(current, waiter); + +	/* +	 * As we are waking up the top waiter, and the waiter stays +	 * queued on the lock until it gets the lock, this lock +	 * obviously has waiters. Just set the bit here and this has +	 * the added benefit of forcing all new tasks into the +	 * slow path making sure no task of lower priority than +	 * the top waiter can steal this lock. +	 */ +	lock->owner = (void *) RT_MUTEX_HAS_WAITERS; + +	raw_spin_unlock_irqrestore(¤t->pi_lock, flags); + +	/* +	 * It's safe to dereference waiter as it cannot go away as +	 * long as we hold lock->wait_lock. The waiter task needs to +	 * acquire it in order to dequeue the waiter. +	 */ +	wake_up_process(waiter->task); +} + +/* + * Remove a waiter from a lock and give up + * + * Must be called with lock->wait_lock held and + * have just failed to try_to_take_rt_mutex(). + */ +static void remove_waiter(struct rt_mutex *lock, +			  struct rt_mutex_waiter *waiter) +{ +	int first = (waiter == rt_mutex_top_waiter(lock)); +	struct task_struct *owner = rt_mutex_owner(lock); +	struct rt_mutex *next_lock = NULL; +	unsigned long flags; + +	raw_spin_lock_irqsave(¤t->pi_lock, flags); +	rt_mutex_dequeue(lock, waiter); +	current->pi_blocked_on = NULL; +	raw_spin_unlock_irqrestore(¤t->pi_lock, flags); + +	if (!owner) +		return; + +	if (first) { + +		raw_spin_lock_irqsave(&owner->pi_lock, flags); + +		rt_mutex_dequeue_pi(owner, waiter); + +		if (rt_mutex_has_waiters(lock)) { +			struct rt_mutex_waiter *next; + +			next = rt_mutex_top_waiter(lock); +			rt_mutex_enqueue_pi(owner, next); +		} +		__rt_mutex_adjust_prio(owner); + +		/* Store the lock on which owner is blocked or NULL */ +		next_lock = task_blocked_on_lock(owner); + +		raw_spin_unlock_irqrestore(&owner->pi_lock, flags); +	} + +	if (!next_lock) +		return; + +	/* gets dropped in rt_mutex_adjust_prio_chain()! */ +	get_task_struct(owner); + +	raw_spin_unlock(&lock->wait_lock); + +	rt_mutex_adjust_prio_chain(owner, 0, lock, next_lock, NULL, current); + +	raw_spin_lock(&lock->wait_lock); +} + +/* + * Recheck the pi chain, in case we got a priority setting + * + * Called from sched_setscheduler + */ +void rt_mutex_adjust_pi(struct task_struct *task) +{ +	struct rt_mutex_waiter *waiter; +	struct rt_mutex *next_lock; +	unsigned long flags; + +	raw_spin_lock_irqsave(&task->pi_lock, flags); + +	waiter = task->pi_blocked_on; +	if (!waiter || (waiter->prio == task->prio && +			!dl_prio(task->prio))) { +		raw_spin_unlock_irqrestore(&task->pi_lock, flags); +		return; +	} +	next_lock = waiter->lock; +	raw_spin_unlock_irqrestore(&task->pi_lock, flags); + +	/* gets dropped in rt_mutex_adjust_prio_chain()! */ +	get_task_struct(task); + +	rt_mutex_adjust_prio_chain(task, 0, NULL, next_lock, NULL, task); +} + +/** + * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop + * @lock:		 the rt_mutex to take + * @state:		 the state the task should block in (TASK_INTERRUPTIBLE + * 			 or TASK_UNINTERRUPTIBLE) + * @timeout:		 the pre-initialized and started timer, or NULL for none + * @waiter:		 the pre-initialized rt_mutex_waiter + * + * lock->wait_lock must be held by the caller. + */ +static int __sched +__rt_mutex_slowlock(struct rt_mutex *lock, int state, +		    struct hrtimer_sleeper *timeout, +		    struct rt_mutex_waiter *waiter) +{ +	int ret = 0; + +	for (;;) { +		/* Try to acquire the lock: */ +		if (try_to_take_rt_mutex(lock, current, waiter)) +			break; + +		/* +		 * TASK_INTERRUPTIBLE checks for signals and +		 * timeout. Ignored otherwise. +		 */ +		if (unlikely(state == TASK_INTERRUPTIBLE)) { +			/* Signal pending? */ +			if (signal_pending(current)) +				ret = -EINTR; +			if (timeout && !timeout->task) +				ret = -ETIMEDOUT; +			if (ret) +				break; +		} + +		raw_spin_unlock(&lock->wait_lock); + +		debug_rt_mutex_print_deadlock(waiter); + +		schedule_rt_mutex(lock); + +		raw_spin_lock(&lock->wait_lock); +		set_current_state(state); +	} + +	return ret; +} + +static void rt_mutex_handle_deadlock(int res, int detect_deadlock, +				     struct rt_mutex_waiter *w) +{ +	/* +	 * If the result is not -EDEADLOCK or the caller requested +	 * deadlock detection, nothing to do here. +	 */ +	if (res != -EDEADLOCK || detect_deadlock) +		return; + +	/* +	 * Yell lowdly and stop the task right here. +	 */ +	rt_mutex_print_deadlock(w); +	while (1) { +		set_current_state(TASK_INTERRUPTIBLE); +		schedule(); +	} +} + +/* + * Slow path lock function: + */ +static int __sched +rt_mutex_slowlock(struct rt_mutex *lock, int state, +		  struct hrtimer_sleeper *timeout, +		  int detect_deadlock) +{ +	struct rt_mutex_waiter waiter; +	int ret = 0; + +	debug_rt_mutex_init_waiter(&waiter); +	RB_CLEAR_NODE(&waiter.pi_tree_entry); +	RB_CLEAR_NODE(&waiter.tree_entry); + +	raw_spin_lock(&lock->wait_lock); + +	/* Try to acquire the lock again: */ +	if (try_to_take_rt_mutex(lock, current, NULL)) { +		raw_spin_unlock(&lock->wait_lock); +		return 0; +	} + +	set_current_state(state); + +	/* Setup the timer, when timeout != NULL */ +	if (unlikely(timeout)) { +		hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); +		if (!hrtimer_active(&timeout->timer)) +			timeout->task = NULL; +	} + +	ret = task_blocks_on_rt_mutex(lock, &waiter, current, detect_deadlock); + +	if (likely(!ret)) +		ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); + +	set_current_state(TASK_RUNNING); + +	if (unlikely(ret)) { +		remove_waiter(lock, &waiter); +		rt_mutex_handle_deadlock(ret, detect_deadlock, &waiter); +	} + +	/* +	 * try_to_take_rt_mutex() sets the waiter bit +	 * unconditionally. We might have to fix that up. +	 */ +	fixup_rt_mutex_waiters(lock); + +	raw_spin_unlock(&lock->wait_lock); + +	/* Remove pending timer: */ +	if (unlikely(timeout)) +		hrtimer_cancel(&timeout->timer); + +	debug_rt_mutex_free_waiter(&waiter); + +	return ret; +} + +/* + * Slow path try-lock function: + */ +static inline int +rt_mutex_slowtrylock(struct rt_mutex *lock) +{ +	int ret = 0; + +	raw_spin_lock(&lock->wait_lock); + +	if (likely(rt_mutex_owner(lock) != current)) { + +		ret = try_to_take_rt_mutex(lock, current, NULL); +		/* +		 * try_to_take_rt_mutex() sets the lock waiters +		 * bit unconditionally. Clean this up. +		 */ +		fixup_rt_mutex_waiters(lock); +	} + +	raw_spin_unlock(&lock->wait_lock); + +	return ret; +} + +/* + * Slow path to release a rt-mutex: + */ +static void __sched +rt_mutex_slowunlock(struct rt_mutex *lock) +{ +	raw_spin_lock(&lock->wait_lock); + +	debug_rt_mutex_unlock(lock); + +	rt_mutex_deadlock_account_unlock(current); + +	/* +	 * We must be careful here if the fast path is enabled. If we +	 * have no waiters queued we cannot set owner to NULL here +	 * because of: +	 * +	 * foo->lock->owner = NULL; +	 *			rtmutex_lock(foo->lock);   <- fast path +	 *			free = atomic_dec_and_test(foo->refcnt); +	 *			rtmutex_unlock(foo->lock); <- fast path +	 *			if (free) +	 *				kfree(foo); +	 * raw_spin_unlock(foo->lock->wait_lock); +	 * +	 * So for the fastpath enabled kernel: +	 * +	 * Nothing can set the waiters bit as long as we hold +	 * lock->wait_lock. So we do the following sequence: +	 * +	 *	owner = rt_mutex_owner(lock); +	 *	clear_rt_mutex_waiters(lock); +	 *	raw_spin_unlock(&lock->wait_lock); +	 *	if (cmpxchg(&lock->owner, owner, 0) == owner) +	 *		return; +	 *	goto retry; +	 * +	 * The fastpath disabled variant is simple as all access to +	 * lock->owner is serialized by lock->wait_lock: +	 * +	 *	lock->owner = NULL; +	 *	raw_spin_unlock(&lock->wait_lock); +	 */ +	while (!rt_mutex_has_waiters(lock)) { +		/* Drops lock->wait_lock ! */ +		if (unlock_rt_mutex_safe(lock) == true) +			return; +		/* Relock the rtmutex and try again */ +		raw_spin_lock(&lock->wait_lock); +	} + +	/* +	 * The wakeup next waiter path does not suffer from the above +	 * race. See the comments there. +	 */ +	wakeup_next_waiter(lock); + +	raw_spin_unlock(&lock->wait_lock); + +	/* Undo pi boosting if necessary: */ +	rt_mutex_adjust_prio(current); +} + +/* + * debug aware fast / slowpath lock,trylock,unlock + * + * The atomic acquire/release ops are compiled away, when either the + * architecture does not support cmpxchg or when debugging is enabled. + */ +static inline int +rt_mutex_fastlock(struct rt_mutex *lock, int state, +		  int detect_deadlock, +		  int (*slowfn)(struct rt_mutex *lock, int state, +				struct hrtimer_sleeper *timeout, +				int detect_deadlock)) +{ +	if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) { +		rt_mutex_deadlock_account_lock(lock, current); +		return 0; +	} else +		return slowfn(lock, state, NULL, detect_deadlock); +} + +static inline int +rt_mutex_timed_fastlock(struct rt_mutex *lock, int state, +			struct hrtimer_sleeper *timeout, int detect_deadlock, +			int (*slowfn)(struct rt_mutex *lock, int state, +				      struct hrtimer_sleeper *timeout, +				      int detect_deadlock)) +{ +	if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) { +		rt_mutex_deadlock_account_lock(lock, current); +		return 0; +	} else +		return slowfn(lock, state, timeout, detect_deadlock); +} + +static inline int +rt_mutex_fasttrylock(struct rt_mutex *lock, +		     int (*slowfn)(struct rt_mutex *lock)) +{ +	if (likely(rt_mutex_cmpxchg(lock, NULL, current))) { +		rt_mutex_deadlock_account_lock(lock, current); +		return 1; +	} +	return slowfn(lock); +} + +static inline void +rt_mutex_fastunlock(struct rt_mutex *lock, +		    void (*slowfn)(struct rt_mutex *lock)) +{ +	if (likely(rt_mutex_cmpxchg(lock, current, NULL))) +		rt_mutex_deadlock_account_unlock(current); +	else +		slowfn(lock); +} + +/** + * rt_mutex_lock - lock a rt_mutex + * + * @lock: the rt_mutex to be locked + */ +void __sched rt_mutex_lock(struct rt_mutex *lock) +{ +	might_sleep(); + +	rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, 0, rt_mutex_slowlock); +} +EXPORT_SYMBOL_GPL(rt_mutex_lock); + +/** + * rt_mutex_lock_interruptible - lock a rt_mutex interruptible + * + * @lock: 		the rt_mutex to be locked + * @detect_deadlock:	deadlock detection on/off + * + * Returns: + *  0 		on success + * -EINTR 	when interrupted by a signal + * -EDEADLK	when the lock would deadlock (when deadlock detection is on) + */ +int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock, +						 int detect_deadlock) +{ +	might_sleep(); + +	return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, +				 detect_deadlock, rt_mutex_slowlock); +} +EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); + +/** + * rt_mutex_timed_lock - lock a rt_mutex interruptible + *			the timeout structure is provided + *			by the caller + * + * @lock: 		the rt_mutex to be locked + * @timeout:		timeout structure or NULL (no timeout) + * @detect_deadlock:	deadlock detection on/off + * + * Returns: + *  0 		on success + * -EINTR 	when interrupted by a signal + * -ETIMEDOUT	when the timeout expired + * -EDEADLK	when the lock would deadlock (when deadlock detection is on) + */ +int +rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout, +		    int detect_deadlock) +{ +	might_sleep(); + +	return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, +				       detect_deadlock, rt_mutex_slowlock); +} +EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); + +/** + * rt_mutex_trylock - try to lock a rt_mutex + * + * @lock:	the rt_mutex to be locked + * + * Returns 1 on success and 0 on contention + */ +int __sched rt_mutex_trylock(struct rt_mutex *lock) +{ +	return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); +} +EXPORT_SYMBOL_GPL(rt_mutex_trylock); + +/** + * rt_mutex_unlock - unlock a rt_mutex + * + * @lock: the rt_mutex to be unlocked + */ +void __sched rt_mutex_unlock(struct rt_mutex *lock) +{ +	rt_mutex_fastunlock(lock, rt_mutex_slowunlock); +} +EXPORT_SYMBOL_GPL(rt_mutex_unlock); + +/** + * rt_mutex_destroy - mark a mutex unusable + * @lock: the mutex to be destroyed + * + * This function marks the mutex uninitialized, and any subsequent + * use of the mutex is forbidden. The mutex must not be locked when + * this function is called. + */ +void rt_mutex_destroy(struct rt_mutex *lock) +{ +	WARN_ON(rt_mutex_is_locked(lock)); +#ifdef CONFIG_DEBUG_RT_MUTEXES +	lock->magic = NULL; +#endif +} + +EXPORT_SYMBOL_GPL(rt_mutex_destroy); + +/** + * __rt_mutex_init - initialize the rt lock + * + * @lock: the rt lock to be initialized + * + * Initialize the rt lock to unlocked state. + * + * Initializing of a locked rt lock is not allowed + */ +void __rt_mutex_init(struct rt_mutex *lock, const char *name) +{ +	lock->owner = NULL; +	raw_spin_lock_init(&lock->wait_lock); +	lock->waiters = RB_ROOT; +	lock->waiters_leftmost = NULL; + +	debug_rt_mutex_init(lock, name); +} +EXPORT_SYMBOL_GPL(__rt_mutex_init); + +/** + * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a + *				proxy owner + * + * @lock: 	the rt_mutex to be locked + * @proxy_owner:the task to set as owner + * + * No locking. Caller has to do serializing itself + * Special API call for PI-futex support + */ +void rt_mutex_init_proxy_locked(struct rt_mutex *lock, +				struct task_struct *proxy_owner) +{ +	__rt_mutex_init(lock, NULL); +	debug_rt_mutex_proxy_lock(lock, proxy_owner); +	rt_mutex_set_owner(lock, proxy_owner); +	rt_mutex_deadlock_account_lock(lock, proxy_owner); +} + +/** + * rt_mutex_proxy_unlock - release a lock on behalf of owner + * + * @lock: 	the rt_mutex to be locked + * + * No locking. Caller has to do serializing itself + * Special API call for PI-futex support + */ +void rt_mutex_proxy_unlock(struct rt_mutex *lock, +			   struct task_struct *proxy_owner) +{ +	debug_rt_mutex_proxy_unlock(lock); +	rt_mutex_set_owner(lock, NULL); +	rt_mutex_deadlock_account_unlock(proxy_owner); +} + +/** + * rt_mutex_start_proxy_lock() - Start lock acquisition for another task + * @lock:		the rt_mutex to take + * @waiter:		the pre-initialized rt_mutex_waiter + * @task:		the task to prepare + * @detect_deadlock:	perform deadlock detection (1) or not (0) + * + * Returns: + *  0 - task blocked on lock + *  1 - acquired the lock for task, caller should wake it up + * <0 - error + * + * Special API call for FUTEX_REQUEUE_PI support. + */ +int rt_mutex_start_proxy_lock(struct rt_mutex *lock, +			      struct rt_mutex_waiter *waiter, +			      struct task_struct *task, int detect_deadlock) +{ +	int ret; + +	raw_spin_lock(&lock->wait_lock); + +	if (try_to_take_rt_mutex(lock, task, NULL)) { +		raw_spin_unlock(&lock->wait_lock); +		return 1; +	} + +	/* We enforce deadlock detection for futexes */ +	ret = task_blocks_on_rt_mutex(lock, waiter, task, 1); + +	if (ret && !rt_mutex_owner(lock)) { +		/* +		 * Reset the return value. We might have +		 * returned with -EDEADLK and the owner +		 * released the lock while we were walking the +		 * pi chain.  Let the waiter sort it out. +		 */ +		ret = 0; +	} + +	if (unlikely(ret)) +		remove_waiter(lock, waiter); + +	raw_spin_unlock(&lock->wait_lock); + +	debug_rt_mutex_print_deadlock(waiter); + +	return ret; +} + +/** + * rt_mutex_next_owner - return the next owner of the lock + * + * @lock: the rt lock query + * + * Returns the next owner of the lock or NULL + * + * Caller has to serialize against other accessors to the lock + * itself. + * + * Special API call for PI-futex support + */ +struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock) +{ +	if (!rt_mutex_has_waiters(lock)) +		return NULL; + +	return rt_mutex_top_waiter(lock)->task; +} + +/** + * rt_mutex_finish_proxy_lock() - Complete lock acquisition + * @lock:		the rt_mutex we were woken on + * @to:			the timeout, null if none. hrtimer should already have + * 			been started. + * @waiter:		the pre-initialized rt_mutex_waiter + * @detect_deadlock:	perform deadlock detection (1) or not (0) + * + * Complete the lock acquisition started our behalf by another thread. + * + * Returns: + *  0 - success + * <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK + * + * Special API call for PI-futex requeue support + */ +int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, +			       struct hrtimer_sleeper *to, +			       struct rt_mutex_waiter *waiter, +			       int detect_deadlock) +{ +	int ret; + +	raw_spin_lock(&lock->wait_lock); + +	set_current_state(TASK_INTERRUPTIBLE); + +	ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); + +	set_current_state(TASK_RUNNING); + +	if (unlikely(ret)) +		remove_waiter(lock, waiter); + +	/* +	 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might +	 * have to fix that up. +	 */ +	fixup_rt_mutex_waiters(lock); + +	raw_spin_unlock(&lock->wait_lock); + +	return ret; +}  | 
