/*
* Fast Userspace Mutexes (which I call "Futexes!").
* (C) Rusty Russell, IBM 2002
*
* Generalized futexes, futex requeueing, misc fixes by Ingo Molnar
* (C) Copyright 2003 Red Hat Inc, All Rights Reserved
*
* Removed page pinning, fix privately mapped COW pages and other cleanups
* (C) Copyright 2003, 2004 Jamie Lokier
*
* Robust futex support started by Ingo Molnar
* (C) Copyright 2006 Red Hat Inc, All Rights Reserved
* Thanks to Thomas Gleixner for suggestions, analysis and fixes.
*
* Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
* enough at me, Linus for the original (flawed) idea, Matthew
* Kirkwood for proof-of-concept implementation.
*
* "The futexes are also cursed."
* "But they come in a choice of three flavours!"
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/slab.h>
#include <linux/poll.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/jhash.h>
#include <linux/init.h>
#include <linux/futex.h>
#include <linux/mount.h>
#include <linux/pagemap.h>
#include <linux/syscalls.h>
#include <linux/signal.h>
#include <asm/futex.h>
#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
/*
* Futexes are matched on equal values of this key.
* The key type depends on whether it's a shared or private mapping.
* Don't rearrange members without looking at hash_futex().
*
* offset is aligned to a multiple of sizeof(u32) (== 4) by definition.
* We set bit 0 to indicate if it's an inode-based key.
*/
union futex_key {
struct {
unsigned long pgoff;
struct inode *inode;
int offset;
} shared;
struct {
unsigned long uaddr;
struct mm_struct *mm;
int offset;
} private;
struct {
unsigned long word;
void *ptr;
int offset;
} both;
};
/*
* We use this hashed waitqueue instead of a normal wait_queue_t, so
* we can wake only the relevant ones (hashed queues may be shared).
*
* A futex_q has a woken state, just like tasks have TASK_RUNNING.
* It is considered woken when list_empty(&q->list) || q->lock_ptr == 0.
* The order of wakup is always to make the first condition true, then
* wake up q->waiters, then make the second condition true.
*/
struct futex_q {
struct list_head list;
wait_queue_head_t waiters;
/* Which hash list lock to use. */
spinlock_t *lock_ptr;
/* Key which the futex is hashed on. */
union futex_key key;
/* For fd, sigio sent using these. */
int fd;
struct file *filp;
};
/*
* Split the global futex_lock into every hash list lock.
*/
struct futex_hash_bucket {
spinlock_t lock;
struct list_head chain;
};
static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
/* Futex-fs vfsmount entry: */
static struct vfsmount *futex_mnt;
/*
* We hash on the keys returned from get_futex_key (see below).
*/
static struct futex_hash_bucket *hash_futex(union futex_key *key)
{
u32 hash = jhash2((u32*)&key->both.word,
(sizeof(key->both.word)+sizeof(key->both.ptr))/4,
key->both.offset);
return &futex_queues[hash & ((1 << FUTEX_HASHBITS)-1)];
}
/*
* Return 1 if two futex_keys are equal, 0 otherwise.
*/
static inline int match_futex(union futex_key *key1, union futex_key *key2)
{
return (key1->both.word == key2->both.word
&& key1->both.ptr == key2->both.ptr
&& key1->both.offset == key2->both.offset);
}
/*
* Get parameters which are the keys for a futex.
*
* For shared mappings, it's (page->index, vma->vm_file->f_dentry->d_inode,
* offset_within_page). For private mappings, it's (uaddr, current->mm).
* We can usually work out the index without swapping in the page.
*
* Returns: 0, or negative error code.
* The key words are stored in *key on success.
*
* Should be called with ¤t->mm->mmap_sem but NOT any spinlocks.
*/
static int get_futex_key(unsigned long uaddr, union futex_key *key)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct page *page;
int err;
/*
* The futex address must be "naturally" aligned.
*/
key->both.offset = uaddr % PAGE_SIZE;
if (unlikely((key->both.offset % sizeof(u32)) != 0))
return -EINVAL;
uaddr -= key->both.offset;
/*
* The futex is hashed differently depending on whether
* it's in a shared or private mapping. So check vma first.
*/
vma = find_extend_vma(mm, uaddr);
if (unlikely(!vma))
return -EFAULT;
/*
* Permissions.
*/
if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ))
return (vma->vm_flags & VM_IO) ? -EPERM