From cf475ad28ac35cc9ba612d67158f29b73b38b05d Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Tue, 29 Apr 2008 01:00:16 -0700 Subject: cgroups: add an owner to the mm_struct Remove the mem_cgroup member from mm_struct and instead adds an owner. This approach was suggested by Paul Menage. The advantage of this approach is that, once the mm->owner is known, using the subsystem id, the cgroup can be determined. It also allows several control groups that are virtually grouped by mm_struct, to exist independent of the memory controller i.e., without adding mem_cgroup's for each controller, to mm_struct. A new config option CONFIG_MM_OWNER is added and the memory resource controller selects this config option. This patch also adds cgroup callbacks to notify subsystems when mm->owner changes. The mm_cgroup_changed callback is called with the task_lock() of the new task held and is called just prior to changing the mm->owner. I am indebted to Paul Menage for the several reviews of this patchset and helping me make it lighter and simpler. This patch was tested on a powerpc box, it was compiled with both the MM_OWNER config turned on and off. After the thread group leader exits, it's moved to init_css_state by cgroup_exit(), thus all future charges from runnings threads would be redirected to the init_css_set's subsystem. Signed-off-by: Balbir Singh Cc: Pavel Emelianov Cc: Hugh Dickins Cc: Sudhir Kumar Cc: YAMAMOTO Takashi Cc: Hirokazu Takahashi Cc: David Rientjes , Cc: Balbir Singh Acked-by: KAMEZAWA Hiroyuki Acked-by: Pekka Enberg Reviewed-by: Paul Menage Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 2a9d98c641a..ae0f2c4e452 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -557,6 +557,88 @@ void exit_fs(struct task_struct *tsk) EXPORT_SYMBOL_GPL(exit_fs); +#ifdef CONFIG_MM_OWNER +/* + * Task p is exiting and it owned mm, lets find a new owner for it + */ +static inline int +mm_need_new_owner(struct mm_struct *mm, struct task_struct *p) +{ + /* + * If there are other users of the mm and the owner (us) is exiting + * we need to find a new owner to take on the responsibility. + */ + if (!mm) + return 0; + if (atomic_read(&mm->mm_users) <= 1) + return 0; + if (mm->owner != p) + return 0; + return 1; +} + +void mm_update_next_owner(struct mm_struct *mm) +{ + struct task_struct *c, *g, *p = current; + +retry: + if (!mm_need_new_owner(mm, p)) + return; + + read_lock(&tasklist_lock); + /* + * Search in the children + */ + list_for_each_entry(c, &p->children, sibling) { + if (c->mm == mm) + goto assign_new_owner; + } + + /* + * Search in the siblings + */ + list_for_each_entry(c, &p->parent->children, sibling) { + if (c->mm == mm) + goto assign_new_owner; + } + + /* + * Search through everything else. We should not get + * here often + */ + do_each_thread(g, c) { + if (c->mm == mm) + goto assign_new_owner; + } while_each_thread(g, c); + + read_unlock(&tasklist_lock); + return; + +assign_new_owner: + BUG_ON(c == p); + get_task_struct(c); + /* + * The task_lock protects c->mm from changing. + * We always want mm->owner->mm == mm + */ + task_lock(c); + /* + * Delay read_unlock() till we have the task_lock() + * to ensure that c does not slip away underneath us + */ + read_unlock(&tasklist_lock); + if (c->mm != mm) { + task_unlock(c); + put_task_struct(c); + goto retry; + } + cgroup_mm_owner_callbacks(mm->owner, c); + mm->owner = c; + task_unlock(c); + put_task_struct(c); +} +#endif /* CONFIG_MM_OWNER */ + /* * Turn us into a lazy TLB process if we * aren't already.. @@ -596,6 +678,7 @@ static void exit_mm(struct task_struct * tsk) /* We don't want this task to be frozen prematurely */ clear_freeze_flag(tsk); task_unlock(tsk); + mm_update_next_owner(mm); mmput(mm); } -- cgit v1.2.3-70-g09d2 From bfc4b0890af566940de6e7aeb4b5faf46d3c3513 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 30 Apr 2008 00:52:36 -0700 Subject: signals: do_group_exit(): use signal_group_exit() more consistently do_group_exit() checks SIGNAL_GROUP_EXIT to avoid taking sighand->siglock. Since ed5d2cac114202fe2978a9cbcab8f5032796d538 exec() doesn't set this flag, we should use signal_group_exit(). This is not needed for correctness, but can speedup the multithreaded exec and makes the code more consistent. Signed-off-by: Oleg Nesterov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index ae0f2c4e452..6d019aa8522 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1115,12 +1115,13 @@ asmlinkage long sys_exit(int error_code) NORET_TYPE void do_group_exit(int exit_code) { + struct signal_struct *sig = current->signal; + BUG_ON(exit_code & 0x80); /* core dumps don't get here */ - if (current->signal->flags & SIGNAL_GROUP_EXIT) - exit_code = current->signal->group_exit_code; + if (signal_group_exit(sig)) + exit_code = sig->group_exit_code; else if (!thread_group_empty(current)) { - struct signal_struct *const sig = current->signal; struct sighand_struct *const sighand = current->sighand; spin_lock_irq(&sighand->siglock); if (signal_group_exit(sig)) -- cgit v1.2.3-70-g09d2 From d839fd4d2e95a5fbc4d50aa9d17eed6a5f2094e6 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 30 Apr 2008 00:53:11 -0700 Subject: ptrace: introduce task_detached() helper exit.c has numerous "->exit_signal == -1" comparisons, this check is subtle and deserves a helper. Imho makes the code more parseable for humans. At least it's surely more greppable. Also, a couple of whitespace cleanups. No functional changes. Signed-off-by: Oleg Nesterov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 45 +++++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 20 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 6d019aa8522..4035d391a0d 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -52,6 +52,11 @@ static void exit_mm(struct task_struct * tsk); +static inline int task_detached(struct task_struct *p) +{ + return p->exit_signal == -1; +} + static void __unhash_process(struct task_struct *p) { nr_threads--; @@ -160,7 +165,7 @@ repeat: zap_leader = 0; leader = p->group_leader; if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) { - BUG_ON(leader->exit_signal == -1); + BUG_ON(task_detached(leader)); do_notify_parent(leader, leader->exit_signal); /* * If we were the last child thread and the leader has @@ -170,7 +175,7 @@ repeat: * do_notify_parent() will have marked it self-reaping in * that case. */ - zap_leader = (leader->exit_signal == -1); + zap_leader = task_detached(leader); } write_unlock_irq(&tasklist_lock); @@ -721,14 +726,14 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) return; /* We don't want people slaying init. */ - if (p->exit_signal != -1) + if (!task_detached(p)) p->exit_signal = SIGCHLD; /* If we'd notified the old parent about this child's death, * also notify the new parent. */ if (!traced && p->exit_state == EXIT_ZOMBIE && - p->exit_signal != -1 && thread_group_empty(p)) + !task_detached(p) && thread_group_empty(p)) do_notify_parent(p, p->exit_signal); kill_orphaned_pgrp(p, father); @@ -781,18 +786,18 @@ static void forget_original_parent(struct task_struct *father) } else { /* reparent ptraced task to its real parent */ __ptrace_unlink (p); - if (p->exit_state == EXIT_ZOMBIE && p->exit_signal != -1 && + if (p->exit_state == EXIT_ZOMBIE && !task_detached(p) && thread_group_empty(p)) do_notify_parent(p, p->exit_signal); } /* - * if the ptraced child is a zombie with exit_signal == -1 - * we must collect it before we exit, or it will remain - * zombie forever since we prevented it from self-reap itself - * while it was being traced by us, to be able to see it in wait4. + * if the ptraced child is a detached zombie we must collect + * it before we exit, or it will remain zombie forever since + * we prevented it from self-reap itself while it was being + * traced by us, to be able to see it in wait4. */ - if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && p->exit_signal == -1)) + if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && task_detached(p))) list_add(&p->ptrace_list, &ptrace_dead); } @@ -849,26 +854,26 @@ static void exit_notify(struct task_struct *tsk, int group_dead) * we have changed execution domain as these two values started * the same after a fork. */ - if (tsk->exit_signal != SIGCHLD && tsk->exit_signal != -1 && + if (tsk->exit_signal != SIGCHLD && !task_detached(tsk) && (tsk->parent_exec_id != tsk->real_parent->self_exec_id || - tsk->self_exec_id != tsk->parent_exec_id) - && !capable(CAP_KILL)) + tsk->self_exec_id != tsk->parent_exec_id) && + !capable(CAP_KILL)) tsk->exit_signal = SIGCHLD; - /* If something other than our normal parent is ptracing us, then * send it a SIGCHLD instead of honoring exit_signal. exit_signal * only has special meaning to our real parent. */ - if (tsk->exit_signal != -1 && thread_group_empty(tsk)) { - int signal = tsk->parent == tsk->real_parent ? tsk->exit_signal : SIGCHLD; + if (!task_detached(tsk) && thread_group_empty(tsk)) { + int signal = (tsk->parent == tsk->real_parent) + ? tsk->exit_signal : SIGCHLD; do_notify_parent(tsk, signal); } else if (tsk->ptrace) { do_notify_parent(tsk, SIGCHLD); } state = EXIT_ZOMBIE; - if (tsk->exit_signal == -1 && likely(!tsk->ptrace)) + if (task_detached(tsk) && likely(!tsk->ptrace)) state = EXIT_DEAD; tsk->exit_state = state; @@ -1173,7 +1178,7 @@ static int eligible_child(enum pid_type type, struct pid *pid, int options, * Do not consider detached threads that are * not ptraced: */ - if (p->exit_signal == -1 && !p->ptrace) + if (task_detached(p) && !p->ptrace) return 0; /* Wait for all children (clone and not) if __WALL is set; @@ -1365,9 +1370,9 @@ static int wait_task_zombie(struct task_struct *p, int noreap, * If it's still not detached after that, don't release * it now. */ - if (p->exit_signal != -1) { + if (!task_detached(p)) { do_notify_parent(p, p->exit_signal); - if (p->exit_signal != -1) { + if (!task_detached(p)) { p->exit_state = EXIT_ZOMBIE; p = NULL; } -- cgit v1.2.3-70-g09d2 From 376e1d2531860358c8a79fecf5f4f42994d03c4d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 30 Apr 2008 00:53:12 -0700 Subject: reparent_thread: use same_thread_group() Trivial, use same_thread_group() in reparent_thread(). Signed-off-by: Oleg Nesterov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 4035d391a0d..413c81ec858 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -722,7 +722,7 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) /* If this is a threaded reparent there is no need to * notify anyone anything has happened. */ - if (p->real_parent->group_leader == father->group_leader) + if (same_thread_group(p->real_parent, father)) return; /* We don't want people slaying init. */ -- cgit v1.2.3-70-g09d2 From 2800d8d19e51414403df8144eaa214bb03400b87 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 30 Apr 2008 00:53:12 -0700 Subject: document de_thread() with exit_notify() connection Add a couple of small comments, it is not easy to see what this code does. Signed-off-by: Oleg Nesterov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- kernel/exit.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/exit.c') diff --git a/fs/exec.c b/fs/exec.c index 8fccc276d40..9f9f931ef94 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -798,7 +798,7 @@ static int de_thread(struct task_struct *tsk) if (!thread_group_leader(tsk)) { leader = tsk->group_leader; - sig->notify_count = -1; + sig->notify_count = -1; /* for exit_notify() */ for (;;) { write_lock_irq(&tasklist_lock); if (likely(leader->exit_state)) diff --git a/kernel/exit.c b/kernel/exit.c index 413c81ec858..879ed6e1c88 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -877,6 +877,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead) state = EXIT_DEAD; tsk->exit_state = state; + /* mt-exec, de_thread() is waiting for us */ if (thread_group_leader(tsk) && tsk->signal->notify_count < 0 && tsk->signal->group_exit_task) -- cgit v1.2.3-70-g09d2 From 53b6f9fbd3b63af14b4f6268e8b5b80d178d05bc Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 30 Apr 2008 00:53:13 -0700 Subject: ptrace: introduce ptrace_reparented() helper Add another trivial helper for the sake of grep. It also auto-documents the fact that ->parent != real_parent implies ->ptrace. No functional changes. Signed-off-by: Oleg Nesterov Acked-by: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ptrace.h | 4 ++++ kernel/exit.c | 9 ++++----- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'kernel/exit.c') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index ebe0c17039c..f98501ba557 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -98,6 +98,10 @@ extern void ptrace_untrace(struct task_struct *child); extern int ptrace_may_attach(struct task_struct *task); extern int __ptrace_may_attach(struct task_struct *task); +static inline int ptrace_reparented(struct task_struct *child) +{ + return child->real_parent != child->parent; +} static inline void ptrace_link(struct task_struct *child, struct task_struct *new_parent) { diff --git a/kernel/exit.c b/kernel/exit.c index 879ed6e1c88..0da2921b1e7 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -698,7 +698,7 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) if (unlikely(traced)) { /* Preserve ptrace links if someone else is tracing this child. */ list_del_init(&p->ptrace_list); - if (p->parent != p->real_parent) + if (ptrace_reparented(p)) list_add(&p->ptrace_list, &p->real_parent->ptrace_children); } else { /* If this child is being traced, then we're the one tracing it @@ -865,8 +865,8 @@ static void exit_notify(struct task_struct *tsk, int group_dead) * only has special meaning to our real parent. */ if (!task_detached(tsk) && thread_group_empty(tsk)) { - int signal = (tsk->parent == tsk->real_parent) - ? tsk->exit_signal : SIGCHLD; + int signal = ptrace_reparented(tsk) ? + SIGCHLD : tsk->exit_signal; do_notify_parent(tsk, signal); } else if (tsk->ptrace) { do_notify_parent(tsk, SIGCHLD); @@ -1269,8 +1269,7 @@ static int wait_task_zombie(struct task_struct *p, int noreap, return 0; } - /* traced means p->ptrace, but not vice versa */ - traced = (p->real_parent != p->parent); + traced = ptrace_reparented(p); if (likely(!traced)) { struct signal_struct *psig; -- cgit v1.2.3-70-g09d2 From 7d8da0962eaee30b4a380ded177349bfbdd6ac46 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 30 Apr 2008 00:54:27 -0700 Subject: pids: __set_special_pids: use change_pid() helper Use change_pid() instead of detach_pid() + attach_pid() in __set_special_pids(). This way task_session() is not NULL in between. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Cc: Pavel Emelyanov Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 0da2921b1e7..d3ad54677f9 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -334,13 +334,11 @@ void __set_special_pids(struct pid *pid) pid_t nr = pid_nr(pid); if (task_session(curr) != pid) { - detach_pid(curr, PIDTYPE_SID); - attach_pid(curr, PIDTYPE_SID, pid); + change_pid(curr, PIDTYPE_SID, pid); set_task_session(curr, nr); } if (task_pgrp(curr) != pid) { - detach_pid(curr, PIDTYPE_PGID); - attach_pid(curr, PIDTYPE_PGID, pid); + change_pid(curr, PIDTYPE_PGID, pid); set_task_pgrp(curr, nr); } } -- cgit v1.2.3-70-g09d2 From 9f3acc3140444a900ab280de942291959f0f615d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 24 Apr 2008 07:44:08 -0400 Subject: [PATCH] split linux/file.h Initial splitoff of the low-level stuff; taken to fdtable.h Signed-off-by: Al Viro --- arch/mips/kernel/kspd.c | 1 + arch/powerpc/platforms/cell/spufs/coredump.c | 1 + drivers/char/tty_audit.c | 1 + drivers/char/tty_io.c | 1 + fs/compat.c | 1 + fs/dnotify.c | 2 +- fs/exec.c | 1 + fs/fcntl.c | 1 + fs/file.c | 1 + fs/file_table.c | 1 + fs/locks.c | 1 + fs/open.c | 1 + fs/proc/array.c | 1 + fs/proc/base.c | 1 + fs/select.c | 1 + include/linux/fdtable.h | 99 ++++++++++++++++++++++++++++ include/linux/file.h | 86 +----------------------- include/linux/init_task.h | 2 +- kernel/exit.c | 1 + kernel/fork.c | 1 + kernel/kmod.c | 1 + security/selinux/hooks.c | 1 + 22 files changed, 121 insertions(+), 86 deletions(-) create mode 100644 include/linux/fdtable.h (limited to 'kernel/exit.c') diff --git a/arch/mips/kernel/kspd.c b/arch/mips/kernel/kspd.c index 998c4efcce8..ceb62dce1c9 100644 --- a/arch/mips/kernel/kspd.c +++ b/arch/mips/kernel/kspd.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index b962c3ab470..af116aadba1 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include diff --git a/drivers/char/tty_audit.c b/drivers/char/tty_audit.c index 6342b0534f4..3582f43345a 100644 --- a/drivers/char/tty_audit.c +++ b/drivers/char/tty_audit.c @@ -11,6 +11,7 @@ #include #include +#include #include struct tty_audit_buf { diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 1d298c2cf93..49c1a2267a5 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -78,6 +78,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/compat.c b/fs/compat.c index 139dc93c092..332a869d2c5 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/dnotify.c b/fs/dnotify.c index eaecc4cfe54..676073b8dda 100644 --- a/fs/dnotify.c +++ b/fs/dnotify.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include int dir_notify_enable __read_mostly = 1; diff --git a/fs/exec.c b/fs/exec.c index 9f9f931ef94..aeaa9791d8b 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -24,6 +24,7 @@ #include #include +#include #include #include #include diff --git a/fs/fcntl.c b/fs/fcntl.c index 3f3ac630ccd..bfd776509a7 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/file.c b/fs/file.c index 5110acb1c9e..f6fbcb49faf 100644 --- a/fs/file.c +++ b/fs/file.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/file_table.c b/fs/file_table.c index 7a0a9b87225..83084225b4c 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/locks.c b/fs/locks.c index 44d9a6a7ec5..663c069b59b 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -116,6 +116,7 @@ #include #include +#include #include #include #include diff --git a/fs/open.c b/fs/open.c index 7af1f05d597..a1450086e92 100644 --- a/fs/open.c +++ b/fs/open.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/proc/array.c b/fs/proc/array.c index c135cbdd912..dca997a93bf 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -73,6 +73,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/proc/base.c b/fs/proc/base.c index fcf02f2deeb..808cbdc193d 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/select.c b/fs/select.c index 80d70387e79..8dda969614a 100644 --- a/fs/select.c +++ b/fs/select.c @@ -21,6 +21,7 @@ #include #include /* for STICKY_TIMEOUTS */ #include +#include #include #include diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h new file mode 100644 index 00000000000..a118f3c0b24 --- /dev/null +++ b/include/linux/fdtable.h @@ -0,0 +1,99 @@ +/* + * descriptor table internals; you almost certainly want file.h instead. + */ + +#ifndef __LINUX_FDTABLE_H +#define __LINUX_FDTABLE_H + +#include +#include +#include +#include +#include +#include + +/* + * The default fd array needs to be at least BITS_PER_LONG, + * as this is the granularity returned by copy_fdset(). + */ +#define NR_OPEN_DEFAULT BITS_PER_LONG + +/* + * The embedded_fd_set is a small fd_set, + * suitable for most tasks (which open <= BITS_PER_LONG files) + */ +struct embedded_fd_set { + unsigned long fds_bits[1]; +}; + +struct fdtable { + unsigned int max_fds; + struct file ** fd; /* current fd array */ + fd_set *close_on_exec; + fd_set *open_fds; + struct rcu_head rcu; + struct fdtable *next; +}; + +/* + * Open file table structure + */ +struct files_struct { + /* + * read mostly part + */ + atomic_t count; + struct fdtable *fdt; + struct fdtable fdtab; + /* + * written part on a separate cache line in SMP + */ + spinlock_t file_lock ____cacheline_aligned_in_smp; + int next_fd; + struct embedded_fd_set close_on_exec_init; + struct embedded_fd_set open_fds_init; + struct file * fd_array[NR_OPEN_DEFAULT]; +}; + +#define files_fdtable(files) (rcu_dereference((files)->fdt)) + +extern struct kmem_cache *filp_cachep; + +struct file_operations; +struct vfsmount; +struct dentry; + +extern int expand_files(struct files_struct *, int nr); +extern void free_fdtable_rcu(struct rcu_head *rcu); +extern void __init files_defer_init(void); + +static inline void free_fdtable(struct fdtable *fdt) +{ + call_rcu(&fdt->rcu, free_fdtable_rcu); +} + +static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd) +{ + struct file * file = NULL; + struct fdtable *fdt = files_fdtable(files); + + if (fd < fdt->max_fds) + file = rcu_dereference(fdt->fd[fd]); + return file; +} + +/* + * Check whether the specified fd has an open file. + */ +#define fcheck(fd) fcheck_files(current->files, fd) + +struct task_struct; + +struct files_struct *get_files_struct(struct task_struct *); +void put_files_struct(struct files_struct *fs); +void reset_files_struct(struct files_struct *); +int unshare_files(struct files_struct **); + +extern struct kmem_cache *files_cachep; + +#endif /* __LINUX_FDTABLE_H */ diff --git a/include/linux/file.h b/include/linux/file.h index 69baf5a4f0a..27c64bdc68c 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -5,59 +5,11 @@ #ifndef __LINUX_FILE_H #define __LINUX_FILE_H -#include -#include #include -#include -#include #include +#include -/* - * The default fd array needs to be at least BITS_PER_LONG, - * as this is the granularity returned by copy_fdset(). - */ -#define NR_OPEN_DEFAULT BITS_PER_LONG - -/* - * The embedded_fd_set is a small fd_set, - * suitable for most tasks (which open <= BITS_PER_LONG files) - */ -struct embedded_fd_set { - unsigned long fds_bits[1]; -}; - -struct fdtable { - unsigned int max_fds; - struct file ** fd; /* current fd array */ - fd_set *close_on_exec; - fd_set *open_fds; - struct rcu_head rcu; - struct fdtable *next; -}; - -/* - * Open file table structure - */ -struct files_struct { - /* - * read mostly part - */ - atomic_t count; - struct fdtable *fdt; - struct fdtable fdtab; - /* - * written part on a separate cache line in SMP - */ - spinlock_t file_lock ____cacheline_aligned_in_smp; - int next_fd; - struct embedded_fd_set close_on_exec_init; - struct embedded_fd_set open_fds_init; - struct file * fd_array[NR_OPEN_DEFAULT]; -}; - -#define files_fdtable(files) (rcu_dereference((files)->fdt)) - -extern struct kmem_cache *filp_cachep; +struct file; extern void __fput(struct file *); extern void fput(struct file *); @@ -85,41 +37,7 @@ extern void put_filp(struct file *); extern int get_unused_fd(void); extern int get_unused_fd_flags(int flags); extern void put_unused_fd(unsigned int fd); -struct kmem_cache; - -extern int expand_files(struct files_struct *, int nr); -extern void free_fdtable_rcu(struct rcu_head *rcu); -extern void __init files_defer_init(void); - -static inline void free_fdtable(struct fdtable *fdt) -{ - call_rcu(&fdt->rcu, free_fdtable_rcu); -} - -static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd) -{ - struct file * file = NULL; - struct fdtable *fdt = files_fdtable(files); - - if (fd < fdt->max_fds) - file = rcu_dereference(fdt->fd[fd]); - return file; -} - -/* - * Check whether the specified fd has an open file. - */ -#define fcheck(fd) fcheck_files(current->files, fd) extern void fd_install(unsigned int fd, struct file *file); -struct task_struct; - -struct files_struct *get_files_struct(struct task_struct *); -void put_files_struct(struct files_struct *fs); -void reset_files_struct(struct files_struct *); -int unshare_files(struct files_struct **); - -extern struct kmem_cache *files_cachep; - #endif /* __LINUX_FILE_H */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index bf6b8a61f8d..b24c2875aa0 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -1,7 +1,7 @@ #ifndef _LINUX__INIT_TASK_H #define _LINUX__INIT_TASK_H -#include +#include #include #include #include diff --git a/kernel/exit.c b/kernel/exit.c index d3ad54677f9..1510f78a0ff 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/fork.c b/kernel/fork.c index 2bb675af4de..933e60ebcca 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/kmod.c b/kernel/kmod.c index e2764047ec0..8df97d3dfda 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 1b50a6ebc55..1c864c0efe2 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3-70-g09d2