aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.hz20
-rw-r--r--kernel/acct.c29
-rw-r--r--kernel/audit.c1
-rw-r--r--kernel/auditfilter.c3
-rw-r--r--kernel/auditsc.c13
-rw-r--r--kernel/compat.c33
-rw-r--r--kernel/configs.c2
-rw-r--r--kernel/cpu.c8
-rw-r--r--kernel/cpuset.c126
-rw-r--r--kernel/delayacct.c19
-rw-r--r--kernel/dma.c2
-rw-r--r--kernel/exit.c83
-rw-r--r--kernel/fork.c132
-rw-r--r--kernel/futex.c62
-rw-r--r--kernel/irq/chip.c2
-rw-r--r--kernel/irq/handle.c6
-rw-r--r--kernel/irq/manage.c9
-rw-r--r--kernel/irq/proc.c3
-rw-r--r--kernel/kallsyms.c33
-rw-r--r--kernel/kexec.c60
-rw-r--r--kernel/kmod.c26
-rw-r--r--kernel/kprobes.c117
-rw-r--r--kernel/kthread.c13
-rw-r--r--kernel/latency.c1
-rw-r--r--kernel/lockdep.c244
-rw-r--r--kernel/lockdep_internals.h2
-rw-r--r--kernel/lockdep_proc.c6
-rw-r--r--kernel/module.c75
-rw-r--r--kernel/mutex-debug.c3
-rw-r--r--kernel/mutex.c9
-rw-r--r--kernel/nsproxy.c38
-rw-r--r--kernel/pid.c77
-rw-r--r--kernel/posix-timers.c2
-rw-r--r--kernel/power/Kconfig11
-rw-r--r--kernel/power/disk.c101
-rw-r--r--kernel/power/main.c14
-rw-r--r--kernel/power/power.h32
-rw-r--r--kernel/power/poweroff.c4
-rw-r--r--kernel/power/process.c143
-rw-r--r--kernel/power/snapshot.c860
-rw-r--r--kernel/power/swap.c347
-rw-r--r--kernel/power/swsusp.c98
-rw-r--r--kernel/power/user.c102
-rw-r--r--kernel/printk.c45
-rw-r--r--kernel/profile.c47
-rw-r--r--kernel/rcupdate.c4
-rw-r--r--kernel/rcutorture.c4
-rw-r--r--kernel/relay.c20
-rw-r--r--kernel/resource.c6
-rw-r--r--kernel/rtmutex-tester.c1
-rw-r--r--kernel/sched.c554
-rw-r--r--kernel/signal.c38
-rw-r--r--kernel/softirq.c2
-rw-r--r--kernel/spinlock.c21
-rw-r--r--kernel/sys.c31
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/sysctl.c446
-rw-r--r--kernel/taskstats.c193
-rw-r--r--kernel/time/clocksource.c8
-rw-r--r--kernel/timer.c162
-rw-r--r--kernel/tsacct.c19
-rw-r--r--kernel/unwind.c212
-rw-r--r--kernel/user.c15
-rw-r--r--kernel/workqueue.c214
64 files changed, 3336 insertions, 1678 deletions
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 248e1c396f8..4af15802ccd 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -7,7 +7,7 @@ choice
default HZ_250
help
Allows the configuration of the timer frequency. It is customary
- to have the timer interrupt run at 1000 HZ but 100 HZ may be more
+ to have the timer interrupt run at 1000 Hz but 100 Hz may be more
beneficial for servers and NUMA systems that do not need to have
a fast response for user interaction and that may experience bus
contention and cacheline bounces as a result of timer interrupts.
@@ -19,21 +19,30 @@ choice
config HZ_100
bool "100 HZ"
help
- 100 HZ is a typical choice for servers, SMP and NUMA systems
+ 100 Hz is a typical choice for servers, SMP and NUMA systems
with lots of processors that may show reduced performance if
too many timer interrupts are occurring.
config HZ_250
bool "250 HZ"
help
- 250 HZ is a good compromise choice allowing server performance
+ 250 Hz is a good compromise choice allowing server performance
while also showing good interactive responsiveness even
- on SMP and NUMA systems.
+ on SMP and NUMA systems. If you are going to be using NTSC video
+ or multimedia, selected 300Hz instead.
+
+ config HZ_300
+ bool "300 HZ"
+ help
+ 300 Hz is a good compromise choice allowing server performance
+ while also showing good interactive responsiveness even
+ on SMP and NUMA systems and exactly dividing by both PAL and
+ NTSC frame rates for video and multimedia work.
config HZ_1000
bool "1000 HZ"
help
- 1000 HZ is the preferred choice for desktop systems and other
+ 1000 Hz is the preferred choice for desktop systems and other
systems requiring fast interactive responses to events.
endchoice
@@ -42,5 +51,6 @@ config HZ
int
default 100 if HZ_100
default 250 if HZ_250
+ default 300 if HZ_300
default 1000 if HZ_1000
diff --git a/kernel/acct.c b/kernel/acct.c
index 0aad5ca36a8..70d0d88e555 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -89,7 +89,8 @@ struct acct_glbs {
struct timer_list timer;
};
-static struct acct_glbs acct_globals __cacheline_aligned = {SPIN_LOCK_UNLOCKED};
+static struct acct_glbs acct_globals __cacheline_aligned =
+ {__SPIN_LOCK_UNLOCKED(acct_globals.lock)};
/*
* Called whenever the timer says to check the free space.
@@ -117,7 +118,7 @@ static int check_free_space(struct file *file)
spin_unlock(&acct_globals.lock);
/* May block */
- if (vfs_statfs(file->f_dentry, &sbuf))
+ if (vfs_statfs(file->f_path.dentry, &sbuf))
return res;
suspend = sbuf.f_blocks * SUSPEND;
resume = sbuf.f_blocks * RESUME;
@@ -193,7 +194,7 @@ static void acct_file_reopen(struct file *file)
add_timer(&acct_globals.timer);
}
if (old_acct) {
- mnt_unpin(old_acct->f_vfsmnt);
+ mnt_unpin(old_acct->f_path.mnt);
spin_unlock(&acct_globals.lock);
do_acct_process(old_acct);
filp_close(old_acct, NULL);
@@ -211,7 +212,7 @@ static int acct_on(char *name)
if (IS_ERR(file))
return PTR_ERR(file);
- if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
+ if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) {
filp_close(file, NULL);
return -EACCES;
}
@@ -228,11 +229,11 @@ static int acct_on(char *name)
}
spin_lock(&acct_globals.lock);
- mnt_pin(file->f_vfsmnt);
+ mnt_pin(file->f_path.mnt);
acct_file_reopen(file);
spin_unlock(&acct_globals.lock);
- mntput(file->f_vfsmnt); /* it's pinned, now give up active reference */
+ mntput(file->f_path.mnt); /* it's pinned, now give up active reference */
return 0;
}
@@ -282,7 +283,7 @@ asmlinkage long sys_acct(const char __user *name)
void acct_auto_close_mnt(struct vfsmount *m)
{
spin_lock(&acct_globals.lock);
- if (acct_globals.file && acct_globals.file->f_vfsmnt == m)
+ if (acct_globals.file && acct_globals.file->f_path.mnt == m)
acct_file_reopen(NULL);
spin_unlock(&acct_globals.lock);
}
@@ -298,7 +299,7 @@ void acct_auto_close(struct super_block *sb)
{
spin_lock(&acct_globals.lock);
if (acct_globals.file &&
- acct_globals.file->f_vfsmnt->mnt_sb == sb) {
+ acct_globals.file->f_path.mnt->mnt_sb == sb) {
acct_file_reopen(NULL);
}
spin_unlock(&acct_globals.lock);
@@ -427,6 +428,7 @@ static void do_acct_process(struct file *file)
u64 elapsed;
u64 run_time;
struct timespec uptime;
+ struct tty_struct *tty;
/*
* First check to see if there is enough free_space to continue
@@ -483,16 +485,9 @@ static void do_acct_process(struct file *file)
ac.ac_ppid = current->parent->tgid;
#endif
- mutex_lock(&tty_mutex);
- /* FIXME: Whoever is responsible for current->signal locking needs
- to use the same locking all over the kernel and document it */
- read_lock(&tasklist_lock);
- ac.ac_tty = current->signal->tty ?
- old_encode_dev(tty_devnum(current->signal->tty)) : 0;
- read_unlock(&tasklist_lock);
- mutex_unlock(&tty_mutex);
-
spin_lock_irq(&current->sighand->siglock);
+ tty = current->signal->tty;
+ ac.ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0;
ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime)));
ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime)));
ac.ac_flag = pacct->ac_flag;
diff --git a/kernel/audit.c b/kernel/audit.c
index 98106f6078b..d9b690ac684 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -57,6 +57,7 @@
#include <linux/netlink.h>
#include <linux/selinux.h>
#include <linux/inotify.h>
+#include <linux/freezer.h>
#include "audit.h"
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 4f40d923af8..2e896f8ae29 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -636,10 +636,9 @@ static struct audit_rule *audit_krule_to_rule(struct audit_krule *krule)
struct audit_rule *rule;
int i;
- rule = kmalloc(sizeof(*rule), GFP_KERNEL);
+ rule = kzalloc(sizeof(*rule), GFP_KERNEL);
if (unlikely(!rule))
return NULL;
- memset(rule, 0, sizeof(*rule));
rule->flags = krule->flags | krule->listnr;
rule->action = krule->action;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 42f2f117971..298897559ca 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -64,6 +64,7 @@
#include <linux/tty.h>
#include <linux/selinux.h>
#include <linux/binfmts.h>
+#include <linux/highmem.h>
#include <linux/syscalls.h>
#include "audit.h"
@@ -730,7 +731,7 @@ static inline void audit_free_context(struct audit_context *context)
printk(KERN_ERR "audit: freed %d contexts\n", count);
}
-static void audit_log_task_context(struct audit_buffer *ab)
+void audit_log_task_context(struct audit_buffer *ab)
{
char *ctx = NULL;
ssize_t len = 0;
@@ -759,6 +760,8 @@ error_path:
return;
}
+EXPORT_SYMBOL(audit_log_task_context);
+
static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk)
{
char name[sizeof(tsk->comm)];
@@ -778,8 +781,8 @@ static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk
if ((vma->vm_flags & VM_EXECUTABLE) &&
vma->vm_file) {
audit_log_d_path(ab, "exe=",
- vma->vm_file->f_dentry,
- vma->vm_file->f_vfsmnt);
+ vma->vm_file->f_path.dentry,
+ vma->vm_file->f_path.mnt);
break;
}
vma = vma->vm_next;
@@ -823,10 +826,12 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
context->return_code);
mutex_lock(&tty_mutex);
+ read_lock(&tasklist_lock);
if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name)
tty = tsk->signal->tty->name;
else
tty = "(none)";
+ read_unlock(&tasklist_lock);
audit_log_format(ab,
" a0=%lx a1=%lx a2=%lx a3=%lx items=%d"
" ppid=%d pid=%d auid=%u uid=%u gid=%u"
@@ -1487,6 +1492,8 @@ uid_t audit_get_loginuid(struct audit_context *ctx)
return ctx ? ctx->loginuid : -1;
}
+EXPORT_SYMBOL(audit_get_loginuid);
+
/**
* __audit_mq_open - record audit data for a POSIX MQ open
* @oflag: open flag
diff --git a/kernel/compat.c b/kernel/compat.c
index d4898aad6cf..6952dd05730 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -982,4 +982,37 @@ asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_pages,
}
return sys_move_pages(pid, nr_pages, pages, nodes, status, flags);
}
+
+asmlinkage long compat_sys_migrate_pages(compat_pid_t pid,
+ compat_ulong_t maxnode,
+ const compat_ulong_t __user *old_nodes,
+ const compat_ulong_t __user *new_nodes)
+{
+ unsigned long __user *old = NULL;
+ unsigned long __user *new = NULL;
+ nodemask_t tmp_mask;
+ unsigned long nr_bits;
+ unsigned long size;
+
+ nr_bits = min_t(unsigned long, maxnode - 1, MAX_NUMNODES);
+ size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
+ if (old_nodes) {
+ if (compat_get_bitmap(nodes_addr(tmp_mask), old_nodes, nr_bits))
+ return -EFAULT;
+ old = compat_alloc_user_space(new_nodes ? size * 2 : size);
+ if (new_nodes)
+ new = old + size / sizeof(unsigned long);
+ if (copy_to_user(old, nodes_addr(tmp_mask), size))
+ return -EFAULT;
+ }
+ if (new_nodes) {
+ if (compat_get_bitmap(nodes_addr(tmp_mask), new_nodes, nr_bits))
+ return -EFAULT;
+ if (new == NULL)
+ new = compat_alloc_user_space(size);
+ if (copy_to_user(new, nodes_addr(tmp_mask), size))
+ return -EFAULT;
+ }
+ return sys_migrate_pages(pid, nr_bits + 1, old, new);
+}
#endif
diff --git a/kernel/configs.c b/kernel/configs.c
index f9e31974f4a..8fa1fb28f8a 100644
--- a/kernel/configs.c
+++ b/kernel/configs.c
@@ -75,7 +75,7 @@ ikconfig_read_current(struct file *file, char __user *buf,
return count;
}
-static struct file_operations ikconfig_file_ops = {
+static const struct file_operations ikconfig_file_ops = {
.owner = THIS_MODULE,
.read = ikconfig_read_current,
};
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 663c920b223..9124669f458 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -58,8 +58,8 @@ void unlock_cpu_hotplug(void)
recursive_depth--;
return;
}
- mutex_unlock(&cpu_bitmask_lock);
recursive = NULL;
+ mutex_unlock(&cpu_bitmask_lock);
}
EXPORT_SYMBOL_GPL(unlock_cpu_hotplug);
@@ -270,11 +270,7 @@ int disable_nonboot_cpus(void)
goto out;
}
}
- error = set_cpus_allowed(current, cpumask_of_cpu(first_cpu));
- if (error) {
- printk(KERN_ERR "Could not run on CPU%d\n", first_cpu);
- goto out;
- }
+
/* We take down all of the non-boot CPUs in one shot to avoid races
* with the userspace trying to use the CPU hotplug at the same time
*/
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 6313c38c930..232aed2b10f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -413,8 +413,8 @@ static struct file_system_type cpuset_fs_type = {
*
*
* When reading/writing to a file:
- * - the cpuset to use in file->f_dentry->d_parent->d_fsdata
- * - the 'cftype' of the file is file->f_dentry->d_fsdata
+ * - the cpuset to use in file->f_path.dentry->d_parent->d_fsdata
+ * - the 'cftype' of the file is file->f_path.dentry->d_fsdata
*/
struct cftype {
@@ -729,9 +729,11 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
}
/* Remaining checks don't apply to root cpuset */
- if ((par = cur->parent) == NULL)
+ if (cur == &top_cpuset)
return 0;
+ par = cur->parent;
+
/* We must be a subset of our parent cpuset */
if (!is_cpuset_subset(trial, par))
return -EACCES;
@@ -1060,10 +1062,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
cpu_exclusive_changed =
(is_cpu_exclusive(cs) != is_cpu_exclusive(&trialcs));
mutex_lock(&callback_mutex);
- if (turning_on)
- set_bit(bit, &cs->flags);
- else
- clear_bit(bit, &cs->flags);
+ cs->flags = trialcs.flags;
mutex_unlock(&callback_mutex);
if (cpu_exclusive_changed)
@@ -1281,18 +1280,19 @@ typedef enum {
FILE_TASKLIST,
} cpuset_filetype_t;
-static ssize_t cpuset_common_file_write(struct file *file, const char __user *userbuf,
+static ssize_t cpuset_common_file_write(struct file *file,
+ const char __user *userbuf,
size_t nbytes, loff_t *unused_ppos)
{
- struct cpuset *cs = __d_cs(file->f_dentry->d_parent);
- struct cftype *cft = __d_cft(file->f_dentry);
+ struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent);
+ struct cftype *cft = __d_cft(file->f_path.dentry);
cpuset_filetype_t type = cft->private;
char *buffer;
char *pathbuf = NULL;
int retval = 0;
/* Crude upper limit on largest legitimate cpulist user might write. */
- if (nbytes > 100 + 6 * NR_CPUS)
+ if (nbytes > 100 + 6 * max(NR_CPUS, MAX_NUMNODES))
return -E2BIG;
/* +1 for nul-terminator */
@@ -1367,7 +1367,7 @@ static ssize_t cpuset_file_write(struct file *file, const char __user *buf,
size_t nbytes, loff_t *ppos)
{
ssize_t retval = 0;
- struct cftype *cft = __d_cft(file->f_dentry);
+ struct cftype *cft = __d_cft(file->f_path.dentry);
if (!cft)
return -ENODEV;
@@ -1417,8 +1417,8 @@ static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
size_t nbytes, loff_t *ppos)
{
- struct cftype *cft = __d_cft(file->f_dentry);
- struct cpuset *cs = __d_cs(file->f_dentry->d_parent);
+ struct cftype *cft = __d_cft(file->f_path.dentry);
+ struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent);
cpuset_filetype_t type = cft->private;
char *page;
ssize_t retval = 0;
@@ -1476,7 +1476,7 @@ static ssize_t cpuset_file_read(struct file *file, char __user *buf, size_t nbyt
loff_t *ppos)
{
ssize_t retval = 0;
- struct cftype *cft = __d_cft(file->f_dentry);
+ struct cftype *cft = __d_cft(file->f_path.dentry);
if (!cft)
return -ENODEV;
@@ -1498,7 +1498,7 @@ static int cpuset_file_open(struct inode *inode, struct file *file)
if (err)
return err;
- cft = __d_cft(file->f_dentry);
+ cft = __d_cft(file->f_path.dentry);
if (!cft)
return -ENODEV;
if (cft->open)
@@ -1511,7 +1511,7 @@ static int cpuset_file_open(struct inode *inode, struct file *file)
static int cpuset_file_release(struct inode *inode, struct file *file)
{
- struct cftype *cft = __d_cft(file->f_dentry);
+ struct cftype *cft = __d_cft(file->f_path.dentry);
if (cft->release)
return cft->release(inode, file);
return 0;
@@ -1532,7 +1532,7 @@ static int cpuset_rename(struct inode *old_dir, struct dentry *old_dentry,
return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
}
-static struct file_operations cpuset_file_operations = {
+static const struct file_operations cpuset_file_operations = {
.read = cpuset_file_read,
.write = cpuset_file_write,
.llseek = generic_file_llseek,
@@ -1700,7 +1700,7 @@ static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids)
*/
static int cpuset_tasks_open(struct inode *unused, struct file *file)
{
- struct cpuset *cs = __d_cs(file->f_dentry->d_parent);
+ struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent);
struct ctr_struct *ctr;
pid_t *pidarray;
int npids;
@@ -2045,7 +2045,6 @@ out:
return err;
}
-#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG)
/*
* If common_cpu_mem_hotplug_unplug(), below, unplugs any CPUs
* or memory nodes, we need to walk over the cpuset hierarchy,
@@ -2109,9 +2108,7 @@ static void common_cpu_mem_hotplug_unplug(void)
mutex_unlock(&callback_mutex);
mutex_unlock(&manage_mutex);
}
-#endif
-#ifdef CONFIG_HOTPLUG_CPU
/*
* The top_cpuset tracks what CPUs and Memory Nodes are online,
* period. This is necessary in order to make cpusets transparent
@@ -2128,7 +2125,6 @@ static int cpuset_handle_cpuhp(struct notifier_block *nb,
common_cpu_mem_hotplug_unplug();
return 0;
}
-#endif
#ifdef CONFIG_MEMORY_HOTPLUG
/*
@@ -2346,32 +2342,48 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
}
/**
- * cpuset_zone_allowed - Can we allocate memory on zone z's memory node?
+ * cpuset_zone_allowed_softwall - Can we allocate on zone z's memory node?
* @z: is this zone on an allowed node?
- * @gfp_mask: memory allocation flags (we use __GFP_HARDWALL)
+ * @gfp_mask: memory allocation flags
*
- * If we're in interrupt, yes, we can always allocate. If zone
+ * If we're in interrupt, yes, we can always allocate. If
+ * __GFP_THISNODE is set, yes, we can always allocate. If zone
* z's node is in our tasks mems_allowed, yes. If it's not a
* __GFP_HARDWALL request and this zone's nodes is in the nearest
* mem_exclusive cpuset ancestor to this tasks cpuset, yes.
* Otherwise, no.
*
+ * If __GFP_HARDWALL is set, cpuset_zone_allowed_softwall()
+ * reduces to cpuset_zone_allowed_hardwall(). Otherwise,
+ * cpuset_zone_allowed_softwall() might sleep, and might allow a zone
+ * from an enclosing cpuset.
+ *
+ * cpuset_zone_allowed_hardwall() only handles the simpler case of
+ * hardwall cpusets, and never sleeps.
+ *
+ * The __GFP_THISNODE placement logic is really handled elsewhere,
+ * by forcibly using a zonelist starting at a specified node, and by
+ * (in get_page_from_freelist()) refusing to consider the zones for
+ * any node on the zonelist except the first. By the time any such
+ * calls get to this routine, we should just shut up and say 'yes'.
+ *
* GFP_USER allocations are marked with the __GFP_HARDWALL bit,
* and do not allow allocations outside the current tasks cpuset.
* GFP_KERNEL allocations are not so marked, so can escape to the
- * nearest mem_exclusive ancestor cpuset.
+ * nearest enclosing mem_exclusive ancestor cpuset.
*
- * Scanning up parent cpusets requires callback_mutex. The __alloc_pages()
- * routine only calls here with __GFP_HARDWALL bit _not_ set if
- * it's a GFP_KERNEL allocation, and all nodes in the current tasks
- * mems_allowed came up empty on the first pass over the