aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorMatthew Wilcox <matthew@wil.cx>2008-03-07 21:55:58 -0500
committerMatthew Wilcox <willy@linux.intel.com>2008-04-17 10:42:34 -0400
commit64ac24e738823161693bf791f87adc802cf529ff (patch)
tree19c0b0cf314d4394ca580c05b86cdf874ce0a167 /kernel
parente48b3deee475134585eed03e7afebe4bf9e0dba9 (diff)
Generic semaphore implementation
Semaphores are no longer performance-critical, so a generic C implementation is better for maintainability, debuggability and extensibility. Thanks to Peter Zijlstra for fixing the lockdep warning. Thanks to Harvey Harrison for pointing out that the unlikely() was unnecessary. Signed-off-by: Matthew Wilcox <willy@linux.intel.com> Acked-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/semaphore.c187
2 files changed, 188 insertions, 1 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 6c584c55a6e..f45c69e6968 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -8,7 +8,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
signal.o sys.o kmod.o workqueue.o pid.o \
rcupdate.o extable.o params.o posix-timers.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
- hrtimer.o rwsem.o nsproxy.o srcu.o \
+ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
notifier.o ksysfs.o pm_qos_params.o
obj-$(CONFIG_SYSCTL) += sysctl_check.o
diff --git a/kernel/semaphore.c b/kernel/semaphore.c
new file mode 100644
index 00000000000..d5a72702f26
--- /dev/null
+++ b/kernel/semaphore.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2008 Intel Corporation
+ * Author: Matthew Wilcox <willy@linux.intel.com>
+ *
+ * Distributed under the terms of the GNU GPL, version 2
+ */
+
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/semaphore.h>
+#include <linux/spinlock.h>
+
+/*
+ * Some notes on the implementation:
+ *
+ * down_trylock() and up() can be called from interrupt context.
+ * So we have to disable interrupts when taking the lock.
+ *
+ * The ->count variable, if positive, defines how many more tasks can
+ * acquire the semaphore. If negative, it represents how many tasks are
+ * waiting on the semaphore (*). If zero, no tasks are waiting, and no more
+ * tasks can acquire the semaphore.
+ *
+ * (*) Except for the window between one task calling up() and the task
+ * sleeping in a __down_common() waking up. In order to avoid a third task
+ * coming in and stealing the second task's wakeup, we leave the ->count
+ * negative. If we have a more complex situation, the ->count may become
+ * zero or negative (eg a semaphore with count = 2, three tasks attempt to
+ * acquire it, one sleeps, two finish and call up(), the second task to call
+ * up() notices that the list is empty and just increments count).
+ */
+
+static noinline void __down(struct semaphore *sem);
+static noinline int __down_interruptible(struct semaphore *sem);
+static noinline void __up(struct semaphore *sem);
+
+void down(struct semaphore *sem)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&sem->lock, flags);
+ if (unlikely(sem->count-- <= 0))
+ __down(sem);
+ spin_unlock_irqrestore(&sem->lock, flags);
+}
+EXPORT_SYMBOL(down);
+
+int down_interruptible(struct semaphore *sem)
+{
+ unsigned long flags;
+ int result = 0;
+
+ spin_lock_irqsave(&sem->lock, flags);
+ if (unlikely(sem->count-- <= 0))
+ result = __down_interruptible(sem);
+ spin_unlock_irqrestore(&sem->lock, flags);
+
+ return result;
+}
+EXPORT_SYMBOL(down_interruptible);
+
+/**
+ * down_trylock - try to acquire the semaphore, without waiting
+ * @sem: the semaphore to be acquired
+ *
+ * Try to acquire the semaphore atomically. Returns 0 if the mutex has
+ * been acquired successfully and 1 if it is contended.
+ *
+ * NOTE: This return value is inverted from both spin_trylock and
+ * mutex_trylock! Be careful about this when converting code.
+ *
+ * Unlike mutex_trylock, this function can be used from interrupt context,
+ * and the semaphore can be released by any task or interrupt.
+ */
+int down_trylock(struct semaphore *sem)
+{
+ unsigned long flags;
+ int count;
+
+ spin_lock_irqsave(&sem->lock, flags);
+ count = sem->count - 1;
+ if (likely(count >= 0))
+ sem->count = count;
+ spin_unlock_irqrestore(&sem->lock, flags);
+
+ return (count < 0);
+}
+EXPORT_SYMBOL(down_trylock);
+
+void up(struct semaphore *sem)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&sem->lock, flags);
+ if (likely(sem->count >= 0))
+ sem->count++;
+ else
+ __up(sem);
+ spin_unlock_irqrestore(&sem->lock, flags);
+}
+EXPORT_SYMBOL(up);
+
+/* Functions for the contended case */
+
+struct semaphore_waiter {
+ struct list_head list;
+ struct task_struct *task;
+ int up;
+};
+
+/*
+ * Wake up a process waiting on a semaphore. We need to call this from both
+ * __up and __down_common as it's possible to race a task into the semaphore
+ * if it comes in at just the right time between two tasks calling up() and
+ * a third task waking up. This function assumes the wait_list is already
+ * checked for being non-empty.
+ */
+static noinline void __sched __up_down_common(struct semaphore *sem)
+{
+ struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,
+ struct semaphore_waiter, list);
+ list_del(&waiter->list);
+ waiter->up = 1;
+ wake_up_process(waiter->task);
+}
+
+/*
+ * Because this function is inlined, the 'state' parameter will be constant,
+ * and thus optimised away by the compiler.
+ */
+static inline int __sched __down_common(struct semaphore *sem, long state)
+{
+ int result = 0;
+ struct task_struct *task = current;
+ struct semaphore_waiter waiter;
+
+ list_add_tail(&waiter.list, &sem->wait_list);
+ waiter.task = task;
+ waiter.up = 0;
+
+ for (;;) {
+ if (state == TASK_INTERRUPTIBLE && signal_pending(task))
+ goto interrupted;
+ __set_task_state(task, state);
+ spin_unlock_irq(&sem->lock);
+ schedule();
+ spin_lock_irq(&sem->lock);
+ if (waiter.up)
+ goto woken;
+ }
+
+ interrupted:
+ list_del(&waiter.list);
+ result = -EINTR;
+ woken:
+ /*
+ * Account for the process which woke us up. For the case where
+ * we're interrupted, we need to increment the count on our own
+ * behalf. I don't believe we can hit the case where the
+ * sem->count hits zero, *and* there's a second task sleeping,
+ * but it doesn't hurt, that's not a commonly exercised path and
+ * it's not a performance path either.
+ */
+ if (unlikely((++sem->count >= 0) && !list_empty(&sem->wait_list)))
+ __up_down_common(sem);
+ return result;
+}
+
+static noinline void __sched __down(struct semaphore *sem)
+{
+ __down_common(sem, TASK_UNINTERRUPTIBLE);
+}
+
+static noinline int __sched __down_interruptible(struct semaphore *sem)
+{
+ return __down_common(sem, TASK_INTERRUPTIBLE);
+}
+
+static noinline void __sched __up(struct semaphore *sem)
+{
+ if (unlikely(list_empty(&sem->wait_list)))
+ sem->count++;
+ else
+ __up_down_common(sem);
+}