aboutsummaryrefslogtreecommitdiff
path: root/init
diff options
context:
space:
mode:
Diffstat (limited to 'init')
-rw-r--r--init/Kconfig181
-rw-r--r--init/do_mounts.c1
-rw-r--r--init/do_mounts_initrd.c11
-rw-r--r--init/do_mounts_rd.c1
-rw-r--r--init/initramfs.c15
-rw-r--r--init/main.c137
6 files changed, 182 insertions, 164 deletions
diff --git a/init/Kconfig b/init/Kconfig
index 313506d8be6..2de5b1cbadd 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -320,13 +320,17 @@ config AUDITSYSCALL
help
Enable low-overhead system-call auditing infrastructure that
can be used independently or with another kernel subsystem,
- such as SELinux. To use audit's filesystem watch feature, please
- ensure that INOTIFY is configured.
+ such as SELinux.
+
+config AUDIT_WATCH
+ def_bool y
+ depends on AUDITSYSCALL
+ select FSNOTIFY
config AUDIT_TREE
def_bool y
depends on AUDITSYSCALL
- select INOTIFY
+ select FSNOTIFY
menu "RCU Subsystem"
@@ -404,6 +408,22 @@ config RCU_FANOUT_EXACT
Say N if unsure.
+config RCU_FAST_NO_HZ
+ bool "Accelerate last non-dyntick-idle CPU's grace periods"
+ depends on TREE_RCU && NO_HZ && SMP
+ default n
+ help
+ This option causes RCU to attempt to accelerate grace periods
+ in order to allow the final CPU to enter dynticks-idle state
+ more quickly. On the other hand, this option increases the
+ overhead of the dynticks-idle checking, particularly on systems
+ with large numbers of CPUs.
+
+ Say Y if energy efficiency is critically important, particularly
+ if you have relatively few CPUs.
+
+ Say N if you are unsure.
+
config TREE_RCU_TRACE
def_bool RCU_TRACE && ( TREE_RCU || TREE_PREEMPT_RCU )
select DEBUG_FS
@@ -453,59 +473,9 @@ config LOG_BUF_SHIFT
config HAVE_UNSTABLE_SCHED_CLOCK
bool
-config GROUP_SCHED
- bool "Group CPU scheduler"
- depends on EXPERIMENTAL
- default n
- help
- This feature lets CPU scheduler recognize task groups and control CPU
- bandwidth allocation to such task groups.
- In order to create a group from arbitrary set of processes, use
- CONFIG_CGROUPS. (See Control Group support.)
-
-config FAIR_GROUP_SCHED
- bool "Group scheduling for SCHED_OTHER"
- depends on GROUP_SCHED
- default GROUP_SCHED
-
-config RT_GROUP_SCHED
- bool "Group scheduling for SCHED_RR/FIFO"
- depends on EXPERIMENTAL
- depends on GROUP_SCHED
- default n
- help
- This feature lets you explicitly allocate real CPU bandwidth
- to users or control groups (depending on the "Basis for grouping tasks"
- setting below. If enabled, it will also make it impossible to
- schedule realtime tasks for non-root users until you allocate
- realtime bandwidth for them.
- See Documentation/scheduler/sched-rt-group.txt for more information.
-
-choice
- depends on GROUP_SCHED
- prompt "Basis for grouping tasks"
- default USER_SCHED
-
-config USER_SCHED
- bool "user id"
- help
- This option will choose userid as the basis for grouping
- tasks, thus providing equal CPU bandwidth to each user.
-
-config CGROUP_SCHED
- bool "Control groups"
- depends on CGROUPS
- help
- This option allows you to create arbitrary task groups
- using the "cgroup" pseudo filesystem and control
- the cpu bandwidth allocated to each such task group.
- Refer to Documentation/cgroups/cgroups.txt for more
- information on "cgroup" pseudo filesystem.
-
-endchoice
-
menuconfig CGROUPS
boolean "Control Group support"
+ depends on EVENTFD
help
This option adds support for grouping sets of processes together, for
use with process control subsystems such as Cpusets, CFS, memory
@@ -607,8 +577,8 @@ config CGROUP_MEM_RES_CTLR
could in turn add some fork/exit overhead.
config CGROUP_MEM_RES_CTLR_SWAP
- bool "Memory Resource Controller Swap Extension(EXPERIMENTAL)"
- depends on CGROUP_MEM_RES_CTLR && SWAP && EXPERIMENTAL
+ bool "Memory Resource Controller Swap Extension"
+ depends on CGROUP_MEM_RES_CTLR && SWAP
help
Add swap management feature to memory resource controller. When you
enable this, you can limit mem+swap usage per cgroup. In other words,
@@ -624,6 +594,62 @@ config CGROUP_MEM_RES_CTLR_SWAP
Now, memory usage of swap_cgroup is 2 bytes per entry. If swap page
size is 4096bytes, 512k per 1Gbytes of swap.
+menuconfig CGROUP_SCHED
+ bool "Group CPU scheduler"
+ depends on EXPERIMENTAL && CGROUPS
+ default n
+ help
+ This feature lets CPU scheduler recognize task groups and control CPU
+ bandwidth allocation to such task groups. It uses cgroups to group
+ tasks.
+
+if CGROUP_SCHED
+config FAIR_GROUP_SCHED
+ bool "Group scheduling for SCHED_OTHER"
+ depends on CGROUP_SCHED
+ default CGROUP_SCHED
+
+config RT_GROUP_SCHED
+ bool "Group scheduling for SCHED_RR/FIFO"
+ depends on EXPERIMENTAL
+ depends on CGROUP_SCHED
+ default n
+ help
+ This feature lets you explicitly allocate real CPU bandwidth
+ to task groups. If enabled, it will also make it impossible to
+ schedule realtime tasks for non-root users until you allocate
+ realtime bandwidth for them.
+ See Documentation/scheduler/sched-rt-group.txt for more information.
+
+endif #CGROUP_SCHED
+
+config BLK_CGROUP
+ tristate "Block IO controller"
+ depends on CGROUPS && BLOCK
+ default n
+ ---help---
+ Generic block IO controller cgroup interface. This is the common
+ cgroup interface which should be used by various IO controlling
+ policies.
+
+ Currently, CFQ IO scheduler uses it to recognize task groups and
+ control disk bandwidth allocation (proportional time slice allocation)
+ to such task groups.
+
+ This option only enables generic Block IO controller infrastructure.
+ One needs to also enable actual IO controlling logic in CFQ for it
+ to take effect. (CONFIG_CFQ_GROUP_IOSCHED=y).
+
+ See Documentation/cgroups/blkio-controller.txt for more information.
+
+config DEBUG_BLK_CGROUP
+ bool "Enable Block IO controller debugging"
+ depends on BLK_CGROUP
+ default n
+ ---help---
+ Enable some debugging help. Currently it exports additional stat
+ files in a cgroup which can be useful for debugging.
+
endif # CGROUPS
config MM_OWNER
@@ -984,19 +1010,6 @@ config PERF_EVENTS
Say Y if unsure.
-config EVENT_PROFILE
- bool "Tracepoint profiling sources"
- depends on PERF_EVENTS && EVENT_TRACING
- default y
- help
- Allow the use of tracepoints as software performance events.
-
- When this is enabled, you can create perf events based on
- tracepoints using PERF_TYPE_TRACEPOINT and the tracepoint ID
- found in debugfs://tracing/events/*/*/id. (The -e/--events
- option to the perf tool can parse and interpret symbolic
- tracepoints, in the subsystem:tracepoint_name format.)
-
config PERF_COUNTERS
bool "Kernel performance counters (old config option)"
depends on HAVE_PERF_EVENTS
@@ -1120,7 +1133,7 @@ config MMAP_ALLOW_UNINITIALIZED
See Documentation/nommu-mmap.txt for more information.
config PROFILING
- bool "Profiling support (EXPERIMENTAL)"
+ bool "Profiling support"
help
Say Y here to enable the extended profiling support mechanisms used
by profilers such as OProfile.
@@ -1134,30 +1147,6 @@ config TRACEPOINTS
source "arch/Kconfig"
-config SLOW_WORK
- default n
- bool
- help
- The slow work thread pool provides a number of dynamically allocated
- threads that can be used by the kernel to perform operations that
- take a relatively long time.
-
- An example of this would be CacheFiles doing a path lookup followed
- by a series of mkdirs and a create call, all of which have to touch
- disk.
-
- See Documentation/slow-work.txt.
-
-config SLOW_WORK_DEBUG
- bool "Slow work debugging through debugfs"
- default n
- depends on SLOW_WORK && DEBUG_FS
- help
- Display the contents of the slow work run queue through debugfs,
- including items currently executing.
-
- See Documentation/slow-work.txt.
-
endmenu # General setup
config HAVE_GENERIC_DMA_COHERENT
@@ -1270,4 +1259,8 @@ source "block/Kconfig"
config PREEMPT_NOTIFIERS
bool
+config PADATA
+ depends on SMP
+ bool
+
source "kernel/Kconfig.locks"
diff --git a/init/do_mounts.c b/init/do_mounts.c
index bb008d064c1..02e3ca4fc52 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -15,6 +15,7 @@
#include <linux/initrd.h>
#include <linux/async.h>
#include <linux/fs_struct.h>
+#include <linux/slab.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_fs_sb.h>
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index 614241b5200..3098a38f3ae 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -24,17 +24,14 @@ static int __init no_initrd(char *str)
__setup("noinitrd", no_initrd);
-static int __init do_linuxrc(void * shell)
+static int __init do_linuxrc(void *_shell)
{
- static char *argv[] = { "linuxrc", NULL, };
- extern char * envp_init[];
+ static const char *argv[] = { "linuxrc", NULL, };
+ extern const char *envp_init[];
+ const char *shell = _shell;
sys_close(old_fd);sys_close(root_fd);
- sys_close(0);sys_close(1);sys_close(2);
sys_setsid();
- (void) sys_open("/dev/console",O_RDWR,0);
- (void) sys_dup(0);
- (void) sys_dup(0);
return kernel_execve(shell, argv, envp_init);
}
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index 027a402708d..bf3ef667bf3 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -7,6 +7,7 @@
#include <linux/cramfs_fs.h>
#include <linux/initrd.h>
#include <linux/string.h>
+#include <linux/slab.h>
#include "do_mounts.h"
#include "../fs/squashfs/squashfs_fs.h"
diff --git a/init/initramfs.c b/init/initramfs.c
index b37d34beb90..4b9c2020509 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -457,7 +457,8 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len)
compress_name);
message = msg_buf;
}
- }
+ } else
+ error("junk in compressed archive");
if (state != Reset)
error("junk in compressed archive");
this_header = saved_offset + my_inptr;
@@ -525,7 +526,7 @@ static void __init clean_rootfs(void)
int fd;
void *buf;
struct linux_dirent64 *dirp;
- int count;
+ int num;
fd = sys_open("/", O_RDONLY, 0);
WARN_ON(fd < 0);
@@ -539,9 +540,9 @@ static void __init clean_rootfs(void)
}
dirp = buf;
- count = sys_getdents64(fd, dirp, BUF_SIZE);
- while (count > 0) {
- while (count > 0) {
+ num = sys_getdents64(fd, dirp, BUF_SIZE);
+ while (num > 0) {
+ while (num > 0) {
struct stat st;
int ret;
@@ -554,12 +555,12 @@ static void __init clean_rootfs(void)
sys_unlink(dirp->d_name);
}
- count -= dirp->d_reclen;
+ num -= dirp->d_reclen;
dirp = (void *)dirp + dirp->d_reclen;
}
dirp = buf;
memset(buf, 0, BUF_SIZE);
- count = sys_getdents64(fd, dirp, BUF_SIZE);
+ num = sys_getdents64(fd, dirp, BUF_SIZE);
}
sys_close(fd);
diff --git a/init/main.c b/init/main.c
index dac44a9356a..94ab488039a 100644
--- a/init/main.c
+++ b/init/main.c
@@ -25,7 +25,6 @@
#include <linux/bootmem.h>
#include <linux/acpi.h>
#include <linux/tty.h>
-#include <linux/gfp.h>
#include <linux/percpu.h>
#include <linux/kmod.h>
#include <linux/vmalloc.h>
@@ -33,7 +32,6 @@
#include <linux/start_kernel.h>
#include <linux/security.h>
#include <linux/smp.h>
-#include <linux/workqueue.h>
#include <linux/profile.h>
#include <linux/rcupdate.h>
#include <linux/moduleparam.h>
@@ -63,13 +61,13 @@
#include <linux/sched.h>
#include <linux/signal.h>
#include <linux/idr.h>
+#include <linux/kgdb.h>
#include <linux/ftrace.h>
#include <linux/async.h>
#include <linux/kmemcheck.h>
-#include <linux/kmemtrace.h>
#include <linux/sfi.h>
#include <linux/shmem_fs.h>
-#include <trace/boot.h>
+#include <linux/slab.h>
#include <asm/io.h>
#include <asm/bugs.h>
@@ -124,7 +122,9 @@ static char *ramdisk_execute_command;
#ifdef CONFIG_SMP
/* Setup configured maximum number of CPUs to activate */
-unsigned int __initdata setup_max_cpus = NR_CPUS;
+unsigned int setup_max_cpus = NR_CPUS;
+EXPORT_SYMBOL(setup_max_cpus);
+
/*
* Setup routine for controlling SMP activation
@@ -149,6 +149,20 @@ static int __init nosmp(char *str)
early_param("nosmp", nosmp);
+/* this is hard limit */
+static int __init nrcpus(char *str)
+{
+ int nr_cpus;
+
+ get_option(&str, &nr_cpus);
+ if (nr_cpus > 0 && nr_cpus < nr_cpu_ids)
+ nr_cpu_ids = nr_cpus;
+
+ return 0;
+}
+
+early_param("nr_cpus", nrcpus);
+
static int __init maxcpus(char *str)
{
get_option(&str, &setup_max_cpus);
@@ -160,7 +174,7 @@ static int __init maxcpus(char *str)
early_param("maxcpus", maxcpus);
#else
-const unsigned int setup_max_cpus = NR_CPUS;
+static const unsigned int setup_max_cpus = NR_CPUS;
#endif
/*
@@ -183,15 +197,15 @@ static int __init set_reset_devices(char *str)
__setup("reset_devices", set_reset_devices);
-static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
-char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
+static const char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
+const char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
static const char *panic_later, *panic_param;
-extern struct obs_kernel_param __setup_start[], __setup_end[];
+extern const struct obs_kernel_param __setup_start[], __setup_end[];
static int __init obsolete_checksetup(char *line)
{
- struct obs_kernel_param *p;
+ const struct obs_kernel_param *p;
int had_early_param = 0;
p = __setup_start;
@@ -407,17 +421,26 @@ static void __init setup_command_line(char *command_line)
* gcc-3.4 accidentally inlines this function, so use noinline.
*/
+static __initdata DECLARE_COMPLETION(kthreadd_done);
+
static noinline void __init_refok rest_init(void)
__releases(kernel_lock)
{
int pid;
rcu_scheduler_starting();
+ /*
+ * We need to spawn init first so that it obtains pid 1, however
+ * the init task will end up wanting to create kthreads, which, if
+ * we schedule it before we create kthreadd, will OOPS.
+ */
kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND);
numa_default_policy();
pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
+ rcu_read_lock();
kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
- unlock_kernel();
+ rcu_read_unlock();
+ complete(&kthreadd_done);
/*
* The boot idle thread must execute schedule()
@@ -435,7 +458,7 @@ static noinline void __init_refok rest_init(void)
/* Check for early params. */
static int __init do_early_param(char *param, char *val)
{
- struct obs_kernel_param *p;
+ const struct obs_kernel_param *p;
for (p = __setup_start; p < __setup_end; p++) {
if ((p->early && strcmp(param, p->str) == 0) ||
@@ -505,6 +528,7 @@ static void __init mm_init(void)
page_cgroup_init_flatmem();
mem_init();
kmem_cache_init();
+ percpu_init_late();
pgtable_cache_init();
vmalloc_init();
}
@@ -512,7 +536,7 @@ static void __init mm_init(void)
asmlinkage void __init start_kernel(void)
{
char * command_line;
- extern struct kernel_param __start___param[], __stop___param[];
+ extern const struct kernel_param __start___param[], __stop___param[];
smp_setup_processor_id();
@@ -538,7 +562,6 @@ asmlinkage void __init start_kernel(void)
* Interrupts are still disabled. Do necessary setups, then
* enable them
*/
- lock_kernel();
tick_init();
boot_cpu_init();
page_address_init();
@@ -550,7 +573,7 @@ asmlinkage void __init start_kernel(void)
setup_per_cpu_areas();
smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
- build_all_zonelists();
+ build_all_zonelists(NULL);
page_alloc_init();
printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line);
@@ -584,6 +607,7 @@ asmlinkage void __init start_kernel(void)
local_irq_disable();
}
rcu_init();
+ radix_tree_init();
/* init some links before init_ISA_irqs() */
early_irq_init();
init_IRQ();
@@ -601,7 +625,7 @@ asmlinkage void __init start_kernel(void)
local_irq_enable();
/* Interrupts are enabled now so all GFP allocations are safe. */
- set_gfp_allowed_mask(__GFP_BITS_MASK);
+ gfp_allowed_mask = __GFP_BITS_MASK;
kmem_cache_init_late();
@@ -635,7 +659,6 @@ asmlinkage void __init start_kernel(void)
#endif
page_cgroup_init();
enable_debug_pagealloc();
- kmemtrace_init();
kmemleak_init();
debug_objects_mem_init();
idr_init_cache();
@@ -658,8 +681,8 @@ asmlinkage void __init start_kernel(void)
buffer_init();
key_init();
security_init();
+ dbg_late_init();
vfs_caches_init(totalram_pages);
- radix_tree_init();
signals_init();
/* rootfs populating might need page-writeback */
page_writeback_init();
@@ -697,38 +720,39 @@ int initcall_debug;
core_param(initcall_debug, initcall_debug, bool, 0644);
static char msgbuf[64];
-static struct boot_trace_call call;
-static struct boot_trace_ret ret;
-int do_one_initcall(initcall_t fn)
+static int __init_or_module do_one_initcall_debug(initcall_t fn)
{
- int count = preempt_count();
ktime_t calltime, delta, rettime;
+ unsigned long long duration;
+ int ret;
+
+ printk(KERN_DEBUG "calling %pF @ %i\n", fn, task_pid_nr(current));
+ calltime = ktime_get();
+ ret = fn();
+ rettime = ktime_get();
+ delta = ktime_sub(rettime, calltime);
+ duration = (unsigned long long) ktime_to_ns(delta) >> 10;
+ printk(KERN_DEBUG "initcall %pF returned %d after %lld usecs\n", fn,
+ ret, duration);
+
+ return ret;
+}
- if (initcall_debug) {
- call.caller = task_pid_nr(current);
- printk("calling %pF @ %i\n", fn, call.caller);
- calltime = ktime_get();
- trace_boot_call(&call, fn);
- enable_boot_trace();
- }
-
- ret.result = fn();
+int __init_or_module do_one_initcall(initcall_t fn)
+{
+ int count = preempt_count();
+ int ret;
- if (initcall_debug) {
- disable_boot_trace();
- rettime = ktime_get();
- delta = ktime_sub(rettime, calltime);
- ret.duration = (unsigned long long) ktime_to_ns(delta) >> 10;
- trace_boot_ret(&ret, fn);
- printk("initcall %pF returned %d after %Ld usecs\n", fn,
- ret.result, ret.duration);
- }
+ if (initcall_debug)
+ ret = do_one_initcall_debug(fn);
+ else
+ ret = fn();
msgbuf[0] = 0;
- if (ret.result && ret.result != -ENODEV && initcall_debug)
- sprintf(msgbuf, "error code %d ", ret.result);
+ if (ret && ret != -ENODEV && initcall_debug)
+ sprintf(msgbuf, "error code %d ", ret);
if (preempt_count() != count) {
strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf));
@@ -742,7 +766,7 @@ int do_one_initcall(initcall_t fn)
printk("initcall %pF returned with %s\n", fn, msgbuf);
}
- return ret.result;
+ return ret;
}
@@ -768,7 +792,6 @@ static void __init do_initcalls(void)
*/
static void __init do_basic_setup(void)
{
- init_workqueues();
cpuset_init_smp();
usermodehelper_init();
init_tmpfs();
@@ -786,7 +809,7 @@ static void __init do_pre_smp_initcalls(void)
do_one_initcall(*fn);
}
-static void run_init_process(char *init_filename)
+static void run_init_process(const char *init_filename)
{
argv_init[0] = init_filename;
kernel_execve(init_filename, argv_init, envp_init);
@@ -801,16 +824,10 @@ static noinline int init_post(void)
/* need to finish all async __init code before freeing the memory */
async_synchronize_full();
free_initmem();
- unlock_kernel();
mark_rodata_ro();
system_state = SYSTEM_RUNNING;
numa_default_policy();
- if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
- printk(KERN_WARNING "Warning: unable to open an initial console.\n");
-
- (void) sys_dup(0);
- (void) sys_dup(0);
current->signal->flags |= SIGNAL_UNKILLABLE;
@@ -836,17 +853,20 @@ static noinline int init_post(void)
run_init_process("/bin/init");
run_init_process("/bin/sh");
- panic("No init found. Try passing init= option to kernel.");
+ panic("No init found. Try passing init= option to kernel. "
+ "See Linux Documentation/init.txt for guidance.");
}
static int __init kernel_init(void * unused)
{
- lock_kernel();
-
+ /*
+ * Wait until kthreadd is all set-up.
+ */
+ wait_for_completion(&kthreadd_done);
/*
* init can allocate pages on any node
*/
- set_mems_allowed(node_possible_map);
+ set_mems_allowed(node_states[N_HIGH_MEMORY]);
/*
* init can run on any cpu.
*/
@@ -866,13 +886,18 @@ static int __init kernel_init(void * unused)
smp_prepare_cpus(setup_max_cpus);
do_pre_smp_initcalls();
- start_boot_trace();
smp_init();
sched_init_smp();
do_basic_setup();
+ /* Open the /dev/console on the rootfs, this should never fail */
+ if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
+ printk(KERN_WARNING "Warning: unable to open an initial console.\n");
+
+ (void) sys_dup(0);
+ (void) sys_dup(0);
/*
* check if there is an early userspace init. If yes, let it do all
* the work