aboutsummaryrefslogtreecommitdiff
path: root/init/main.c
diff options
context:
space:
mode:
Diffstat (limited to 'init/main.c')
-rw-r--r--init/main.c825
1 files changed, 500 insertions, 325 deletions
diff --git a/init/main.c b/init/main.c
index 99ce94930b0..e8ae1fef090 100644
--- a/init/main.c
+++ b/init/main.c
@@ -9,29 +9,30 @@
* Simplified starting of init: Michael A. Griffith <grif@acm.org>
*/
+#define DEBUG /* Enable initcall_debug */
+
#include <linux/types.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
#include <linux/kernel.h>
#include <linux/syscalls.h>
+#include <linux/stackprotector.h>
#include <linux/string.h>
#include <linux/ctype.h>
#include <linux/delay.h>
-#include <linux/utsname.h>
#include <linux/ioport.h>
#include <linux/init.h>
-#include <linux/smp_lock.h>
#include <linux/initrd.h>
-#include <linux/hdreg.h>
#include <linux/bootmem.h>
+#include <linux/acpi.h>
#include <linux/tty.h>
-#include <linux/gfp.h>
#include <linux/percpu.h>
#include <linux/kmod.h>
+#include <linux/vmalloc.h>
#include <linux/kernel_stat.h>
#include <linux/start_kernel.h>
#include <linux/security.h>
-#include <linux/workqueue.h>
+#include <linux/smp.h>
#include <linux/profile.h>
#include <linux/rcupdate.h>
#include <linux/moduleparam.h>
@@ -49,15 +50,34 @@
#include <linux/rmap.h>
#include <linux/mempolicy.h>
#include <linux/key.h>
-#include <linux/unwind.h>
#include <linux/buffer_head.h>
+#include <linux/page_cgroup.h>
#include <linux/debug_locks.h>
+#include <linux/debugobjects.h>
#include <linux/lockdep.h>
+#include <linux/kmemleak.h>
#include <linux/pid_namespace.h>
#include <linux/device.h>
#include <linux/kthread.h>
#include <linux/sched.h>
#include <linux/signal.h>
+#include <linux/idr.h>
+#include <linux/kgdb.h>
+#include <linux/ftrace.h>
+#include <linux/async.h>
+#include <linux/kmemcheck.h>
+#include <linux/sfi.h>
+#include <linux/shmem_fs.h>
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+#include <linux/file.h>
+#include <linux/ptrace.h>
+#include <linux/blkdev.h>
+#include <linux/elevator.h>
+#include <linux/sched_clock.h>
+#include <linux/context_tracking.h>
+#include <linux/random.h>
+#include <linux/list.h>
#include <asm/io.h>
#include <asm/bugs.h>
@@ -69,40 +89,25 @@
#include <asm/smp.h>
#endif
-/*
- * This is one of the first .c files built. Error out early if we have compiler
- * trouble.
- */
-
-#if __GNUC__ == 4 && __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ == 0
-#warning gcc-4.1.0 is known to miscompile the kernel. A different compiler version is recommended.
-#endif
-
static int kernel_init(void *);
extern void init_IRQ(void);
extern void fork_init(unsigned long);
-extern void mca_init(void);
-extern void sbus_init(void);
-extern void pidhash_init(void);
-extern void pidmap_init(void);
-extern void prio_tree_init(void);
extern void radix_tree_init(void);
-extern void free_initmem(void);
-#ifdef CONFIG_ACPI
-extern void acpi_early_init(void);
-#else
-static inline void acpi_early_init(void) { }
-#endif
#ifndef CONFIG_DEBUG_RODATA
static inline void mark_rodata_ro(void) { }
#endif
-#ifdef CONFIG_TC
-extern void tc_init(void);
-#endif
+/*
+ * Debug helper: via this flag we know that we are in 'early bootup code'
+ * where only the boot processor is running with IRQ disabled. This means
+ * two things - IRQ must not be enabled before the flag is cleared and some
+ * operations which are not allowed with IRQ disabled are allowed while the
+ * flag is set.
+ */
+bool early_boot_irqs_disabled __read_mostly;
-enum system_states system_state;
+enum system_states system_state __read_mostly;
EXPORT_SYMBOL(system_state);
/*
@@ -113,8 +118,7 @@ EXPORT_SYMBOL(system_state);
extern void time_init(void);
/* Default late time init is NULL. archs can override this later. */
-void (*late_time_init)(void);
-extern void softirq_init(void);
+void (*__initdata late_time_init)(void);
/* Untouched command line saved by arch-specific code. */
char __initdata boot_command_line[COMMAND_LINE_SIZE];
@@ -122,50 +126,18 @@ char __initdata boot_command_line[COMMAND_LINE_SIZE];
char *saved_command_line;
/* Command line for parameter parsing */
static char *static_command_line;
+/* Command line for per-initcall parameter parsing */
+static char *initcall_command_line;
static char *execute_command;
static char *ramdisk_execute_command;
-#ifdef CONFIG_SMP
-/* Setup configured maximum number of CPUs to activate */
-unsigned int __initdata setup_max_cpus = NR_CPUS;
-
/*
- * Setup routine for controlling SMP activation
- *
- * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
- * activation entirely (the MPS table probe still happens, though).
- *
- * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
- * greater than 0, limits the maximum number of CPUs activated in
- * SMP mode to <NUM>.
+ * Used to generate warnings if static_key manipulation functions are used
+ * before jump_label_init is called.
*/
-#ifndef CONFIG_X86_IO_APIC
-static inline void disable_ioapic_setup(void) {};
-#endif
-
-static int __init nosmp(char *str)
-{
- setup_max_cpus = 0;
- disable_ioapic_setup();
- return 0;
-}
-
-early_param("nosmp", nosmp);
-
-static int __init maxcpus(char *str)
-{
- get_option(&str, &setup_max_cpus);
- if (setup_max_cpus == 0)
- disable_ioapic_setup();
-
- return 0;
-}
-
-early_param("maxcpus", maxcpus);
-#else
-#define setup_max_cpus NR_CPUS
-#endif
+bool static_key_initialized __read_mostly = false;
+EXPORT_SYMBOL_GPL(static_key_initialized);
/*
* If set, this is an indication to the drivers that reset the underlying
@@ -187,21 +159,21 @@ static int __init set_reset_devices(char *str)
__setup("reset_devices", set_reset_devices);
-static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
-char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
+static const char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
+const char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
static const char *panic_later, *panic_param;
-extern struct obs_kernel_param __setup_start[], __setup_end[];
+extern const struct obs_kernel_param __setup_start[], __setup_end[];
static int __init obsolete_checksetup(char *line)
{
- struct obs_kernel_param *p;
+ const struct obs_kernel_param *p;
int had_early_param = 0;
p = __setup_start;
do {
int n = strlen(p->str);
- if (!strncmp(line, p->str, n)) {
+ if (parameqn(line, p->str, n)) {
if (p->early) {
/* Already done in parse_early_param?
* (Needs exact match on param part).
@@ -210,8 +182,8 @@ static int __init obsolete_checksetup(char *line)
if (line[n] == '\0' || line[n] == '=')
had_early_param = 1;
} else if (!p->setup_func) {
- printk(KERN_WARNING "Parameter %s is obsolete,"
- " ignored\n", p->str);
+ pr_warn("Parameter %s is obsolete, ignored\n",
+ p->str);
return 1;
} else if (p->setup_func(line + n))
return 1;
@@ -232,13 +204,13 @@ EXPORT_SYMBOL(loops_per_jiffy);
static int __init debug_kernel(char *str)
{
- console_loglevel = 10;
+ console_loglevel = CONSOLE_LOGLEVEL_DEBUG;
return 0;
}
static int __init quiet_kernel(char *str)
{
- console_loglevel = 4;
+ console_loglevel = CONSOLE_LOGLEVEL_QUIET;
return 0;
}
@@ -247,19 +219,26 @@ early_param("quiet", quiet_kernel);
static int __init loglevel(char *str)
{
- get_option(&str, &console_loglevel);
- return 0;
+ int newlevel;
+
+ /*
+ * Only update loglevel value when a correct setting was passed,
+ * to prevent blind crashes (when loglevel being set to 0) that
+ * are quite hard to debug
+ */
+ if (get_option(&str, &newlevel)) {
+ console_loglevel = newlevel;
+ return 0;
+ }
+
+ return -EINVAL;
}
early_param("loglevel", loglevel);
-/*
- * Unknown boot options get handed to init, unless they look like
- * failed parameters
- */
-static int __init unknown_bootoption(char *param, char *val)
+/* Change NUL term back to "=", to make "param" the whole string. */
+static int __init repair_env_string(char *param, char *val, const char *unused)
{
- /* Change NUL term back to "=", to make "param" the whole string. */
if (val) {
/* param=val or param="val"? */
if (val == param+strlen(param)+1)
@@ -271,19 +250,45 @@ static int __init unknown_bootoption(char *param, char *val)
} else
BUG();
}
+ return 0;
+}
+
+/* Anything after -- gets handed straight to init. */
+static int __init set_init_arg(char *param, char *val, const char *unused)
+{
+ unsigned int i;
+
+ if (panic_later)
+ return 0;
+
+ repair_env_string(param, val, unused);
+
+ for (i = 0; argv_init[i]; i++) {
+ if (i == MAX_INIT_ARGS) {
+ panic_later = "init";
+ panic_param = param;
+ return 0;
+ }
+ }
+ argv_init[i] = param;
+ return 0;
+}
+
+/*
+ * Unknown boot options get handed to init, unless they look like
+ * unused parameters (modprobe will find them in /proc/cmdline).
+ */
+static int __init unknown_bootoption(char *param, char *val, const char *unused)
+{
+ repair_env_string(param, val, unused);
/* Handle obsolete-style parameters */
if (obsolete_checksetup(param))
return 0;
- /*
- * Preemptive maintenance for "why didn't my misspelled command
- * line work?"
- */
- if (strchr(param, '.') && (!val || strchr(param, '.') < val)) {
- printk(KERN_ERR "Unknown boot option `%s': ignoring\n", param);
+ /* Unused module parameter. */
+ if (strchr(param, '.') && (!val || strchr(param, '.') < val))
return 0;
- }
if (panic_later)
return 0;
@@ -293,7 +298,7 @@ static int __init unknown_bootoption(char *param, char *val)
unsigned int i;
for (i = 0; envp_init[i]; i++) {
if (i == MAX_INIT_ENVS) {
- panic_later = "Too many boot env vars at `%s'";
+ panic_later = "env";
panic_param = param;
}
if (!strncmp(param, envp_init[i], val - param))
@@ -305,7 +310,7 @@ static int __init unknown_bootoption(char *param, char *val)
unsigned int i;
for (i = 0; argv_init[i]; i++) {
if (i == MAX_INIT_ARGS) {
- panic_later = "Too many boot init vars at `%s'";
+ panic_later = "init";
panic_param = param;
}
}
@@ -314,10 +319,6 @@ static int __init unknown_bootoption(char *param, char *val)
return 0;
}
-#ifdef CONFIG_DEBUG_PAGEALLOC
-int __read_mostly debug_pagealloc_enabled = 0;
-#endif
-
static int __init init_setup(char *str)
{
unsigned int i;
@@ -348,7 +349,7 @@ static int __init rdinit_setup(char *str)
__setup("rdinit=", rdinit_setup);
#ifndef CONFIG_SMP
-
+static const unsigned int setup_max_cpus = NR_CPUS;
#ifdef CONFIG_X86_LOCAL_APIC
static void __init smp_init(void)
{
@@ -358,52 +359,8 @@ static void __init smp_init(void)
#define smp_init() do { } while (0)
#endif
-static inline void setup_per_cpu_areas(void) { }
+static inline void setup_nr_cpu_ids(void) { }
static inline void smp_prepare_cpus(unsigned int maxcpus) { }
-
-#else
-
-#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
-unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
-
-EXPORT_SYMBOL(__per_cpu_offset);
-
-static void __init setup_per_cpu_areas(void)
-{
- unsigned long size, i;
- char *ptr;
- unsigned long nr_possible_cpus = num_possible_cpus();
-
- /* Copy section for each CPU (we discard the original) */
- size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
- ptr = alloc_bootmem_pages(size * nr_possible_cpus);
-
- for_each_possible_cpu(i) {
- __per_cpu_offset[i] = ptr - __per_cpu_start;
- memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
- ptr += size;
- }
-}
-#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
-
-/* Called by boot processor to activate the rest. */
-static void __init smp_init(void)
-{
- unsigned int cpu;
-
- /* FIXME: This should be done in userspace --RR */
- for_each_present_cpu(cpu) {
- if (num_online_cpus() >= setup_max_cpus)
- break;
- if (!cpu_online(cpu))
- cpu_up(cpu);
- }
-
- /* Any cleanup work */
- printk(KERN_INFO "Brought up %ld CPUs\n", (long)num_online_cpus());
- smp_cpus_done(setup_max_cpus);
-}
-
#endif
/*
@@ -414,8 +371,11 @@ static void __init smp_init(void)
*/
static void __init setup_command_line(char *command_line)
{
- saved_command_line = alloc_bootmem(strlen (boot_command_line)+1);
- static_command_line = alloc_bootmem(strlen (command_line)+1);
+ saved_command_line =
+ memblock_virt_alloc(strlen(boot_command_line) + 1, 0);
+ initcall_command_line =
+ memblock_virt_alloc(strlen(boot_command_line) + 1, 0);
+ static_command_line = memblock_virt_alloc(strlen(command_line) + 1, 0);
strcpy (saved_command_line, boot_command_line);
strcpy (static_command_line, command_line);
}
@@ -429,49 +389,59 @@ static void __init setup_command_line(char *command_line)
* gcc-3.4 accidentally inlines this function, so use noinline.
*/
-static void noinline __init_refok rest_init(void)
- __releases(kernel_lock)
+static __initdata DECLARE_COMPLETION(kthreadd_done);
+
+static noinline void __init_refok rest_init(void)
{
int pid;
- kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND);
+ rcu_scheduler_starting();
+ /*
+ * We need to spawn init first so that it obtains pid 1, however
+ * the init task will end up wanting to create kthreads, which, if
+ * we schedule it before we create kthreadd, will OOPS.
+ */
+ kernel_thread(kernel_init, NULL, CLONE_FS);
numa_default_policy();
pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
- kthreadd_task = find_task_by_pid(pid);
- unlock_kernel();
+ rcu_read_lock();
+ kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
+ rcu_read_unlock();
+ complete(&kthreadd_done);
/*
* The boot idle thread must execute schedule()
* at least once to get things moving:
*/
init_idle_bootup_task(current);
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
-
+ schedule_preempt_disabled();
/* Call into cpu_idle with preempt disabled */
- cpu_idle();
+ cpu_startup_entry(CPUHP_ONLINE);
}
/* Check for early params. */
-static int __init do_early_param(char *param, char *val)
+static int __init do_early_param(char *param, char *val, const char *unused)
{
- struct obs_kernel_param *p;
+ const struct obs_kernel_param *p;
for (p = __setup_start; p < __setup_end; p++) {
- if ((p->early && strcmp(param, p->str) == 0) ||
+ if ((p->early && parameq(param, p->str)) ||
(strcmp(param, "console") == 0 &&
strcmp(p->str, "earlycon") == 0)
) {
if (p->setup_func(val) != 0)
- printk(KERN_WARNING
- "Malformed early option '%s'\n", param);
+ pr_warn("Malformed early option '%s'\n", param);
}
}
/* We accept everything at this stage. */
return 0;
}
+void __init parse_early_options(char *cmdline)
+{
+ parse_args("early options", cmdline, NULL, 0, 0, 0, do_early_param);
+}
+
/* Arch code calls this early on, or if not, just before other parsing. */
void __init parse_early_param(void)
{
@@ -483,7 +453,7 @@ void __init parse_early_param(void)
/* All fall through to do_early_param. */
strlcpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE);
- parse_args("early options", tmp_cmdline, NULL, 0, do_early_param);
+ parse_early_options(tmp_cmdline);
done = 1;
}
@@ -495,50 +465,102 @@ static void __init boot_cpu_init(void)
{
int cpu = smp_processor_id();
/* Mark the boot cpu "present", "online" etc for SMP and UP case */
- cpu_set(cpu, cpu_online_map);
- cpu_set(cpu, cpu_present_map);
- cpu_set(cpu, cpu_possible_map);
+ set_cpu_online(cpu, true);
+ set_cpu_active(cpu, true);
+ set_cpu_present(cpu, true);
+ set_cpu_possible(cpu, true);
}
-void __init __attribute__((weak)) smp_setup_processor_id(void)
+void __init __weak smp_setup_processor_id(void)
{
}
-asmlinkage void __init start_kernel(void)
+# if THREAD_SIZE >= PAGE_SIZE
+void __init __weak thread_info_cache_init(void)
{
- char * command_line;
- extern struct kernel_param __start___param[], __stop___param[];
+}
+#endif
- smp_setup_processor_id();
+/*
+ * Set up kernel memory allocators
+ */
+static void __init mm_init(void)
+{
+ /*
+ * page_cgroup requires contiguous pages,
+ * bigger than MAX_ORDER unless SPARSEMEM.
+ */
+ page_cgroup_init_flatmem();
+ mem_init();
+ kmem_cache_init();
+ percpu_init_late();
+ pgtable_init();
+ vmalloc_init();
+}
+
+asmlinkage __visible void __init start_kernel(void)
+{
+ char * command_line, *after_dashes;
+ extern const struct kernel_param __start___param[], __stop___param[];
/*
* Need to run as early as possible, to initialize the
* lockdep hash:
*/
- unwind_init();
lockdep_init();
+ smp_setup_processor_id();
+ debug_objects_early_init();
+
+ /*
+ * Set up the the initial canary ASAP:
+ */
+ boot_init_stack_canary();
+
cgroup_init_early();
local_irq_disable();
- early_boot_irqs_off();
- early_init_irq_lock_class();
+ early_boot_irqs_disabled = true;
/*
* Interrupts are still disabled. Do necessary setups, then
* enable them
*/
- lock_kernel();
- tick_init();
boot_cpu_init();
page_address_init();
- printk(KERN_NOTICE);
- printk(linux_banner);
+ pr_notice("%s", linux_banner);
setup_arch(&command_line);
+ mm_init_cpumask(&init_mm);
setup_command_line(command_line);
- unwind_setup();
+ setup_nr_cpu_ids();
setup_per_cpu_areas();
smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
+ build_all_zonelists(NULL, NULL);
+ page_alloc_init();
+
+ pr_notice("Kernel command line: %s\n", boot_command_line);
+ parse_early_param();
+ after_dashes = parse_args("Booting kernel",
+ static_command_line, __start___param,
+ __stop___param - __start___param,
+ -1, -1, &unknown_bootoption);
+ if (after_dashes)
+ parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
+ set_init_arg);
+
+ jump_label_init();
+
+ /*
+ * These use large bootmem allocations and must precede
+ * kmem_cache_init()
+ */
+ setup_log_buf(0);
+ pidhash_init();
+ vfs_caches_init_early();
+ sort_main_extable();
+ trap_init();
+ mm_init();
+
/*
* Set up the scheduler prior starting any interrupts (such as the
* timer interrupt). Full topology setup happens at smp_init()
@@ -550,34 +572,32 @@ asmlinkage void __init start_kernel(void)
* fragile until we cpu_idle() for the first time.
*/
preempt_disable();
- build_all_zonelists();
- page_alloc_init();
- printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line);
- parse_early_param();
- parse_args("Booting kernel", static_command_line, __start___param,
- __stop___param - __start___param,
- &unknown_bootoption);
- if (!irqs_disabled()) {
- printk(KERN_WARNING "start_kernel(): bug: interrupts were "
- "enabled *very* early, fixing it\n");
+ if (WARN(!irqs_disabled(), "Interrupts were enabled *very* early, fixing it\n"))
local_irq_disable();
- }
- sort_main_extable();
- trap_init();
+ idr_init_cache();
rcu_init();
+ tick_nohz_init();
+ context_tracking_init();
+ radix_tree_init();
+ /* init some links before init_ISA_irqs() */
+ early_irq_init();
init_IRQ();
- pidhash_init();
+ tick_init();
init_timers();
hrtimers_init();
softirq_init();
timekeeping_init();
time_init();
+ sched_clock_postinit();
+ perf_event_init();
profile_init();
- if (!irqs_disabled())
- printk("start_kernel(): bug: interrupts were enabled early\n");
- early_boot_irqs_on();
+ call_function_init();
+ WARN(!irqs_disabled(), "Interrupts were enabled early\n");
+ early_boot_irqs_disabled = false;
local_irq_enable();
+ kmem_cache_init_late();
+
/*
* HACK ALERT! This is early. We're enabling the console before
* we've done PCI setups etc, and console_init() must be aware of
@@ -585,7 +605,8 @@ asmlinkage void __init start_kernel(void)
*/
console_init();
if (panic_later)
- panic(panic_later, panic_param);
+ panic("Too many boot %s vars at `%s'", panic_later,
+ panic_param);
lockdep_info();
@@ -598,45 +619,46 @@ asmlinkage void __init start_kernel(void)
#ifdef CONFIG_BLK_DEV_INITRD
if (initrd_start && !initrd_below_start_ok &&
- initrd_start < min_low_pfn << PAGE_SHIFT) {
- printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - "
- "disabling it.\n",initrd_start,min_low_pfn << PAGE_SHIFT);
+ page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
+ pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it.\n",
+ page_to_pfn(virt_to_page((void *)initrd_start)),
+ min_low_pfn);
initrd_start = 0;
}
#endif
- vfs_caches_init_early();
- cpuset_init_early();
- mem_init();
- enable_debug_pagealloc();
- cpu_hotplug_init();
- kmem_cache_init();
+ page_cgroup_init();
+ debug_objects_mem_init();
+ kmemleak_init();
setup_per_cpu_pageset();
numa_policy_init();
if (late_time_init)
late_time_init();
+ sched_clock_init();
calibrate_delay();
pidmap_init();
- pgtable_cache_init();
- prio_tree_init();
anon_vma_init();
+ acpi_early_init();
#ifdef CONFIG_X86
- if (efi_enabled)
+ if (efi_enabled(EFI_RUNTIME_SERVICES))
efi_enter_virtual_mode();
#endif
- fork_init(num_physpages);
+#ifdef CONFIG_X86_ESPFIX64
+ /* Should be run before the first non-init thread is created */
+ init_espfix_bsp();
+#endif
+ thread_info_cache_init();
+ cred_init();
+ fork_init(totalram_pages);
proc_caches_init();
buffer_init();
- unnamed_dev_init();
key_init();
security_init();
- vfs_caches_init(num_physpages);
- radix_tree_init();
+ dbg_late_init();
+ vfs_caches_init(totalram_pages);
signals_init();
/* rootfs populating might need page-writeback */
page_writeback_init();
-#ifdef CONFIG_PROC_FS
proc_root_init();
-#endif
cgroup_init();
cpuset_init();
taskstats_init_early();
@@ -644,81 +666,202 @@ asmlinkage void __init start_kernel(void)
check_bugs();
- acpi_early_init(); /* before LAPIC and SMP init */
+ sfi_init_late();
+
+ if (efi_enabled(EFI_RUNTIME_SERVICES)) {
+ efi_late_init();
+ efi_free_boot_services();
+ }
+
+ ftrace_init();
/* Do the rest non-__init'ed, we're now alive */
rest_init();
}
-static int __initdata initcall_debug;
-
-static int __init initcall_debug_setup(char *str)
+/* Call all constructor functions linked into the kernel. */
+static void __init do_ctors(void)
{
- initcall_debug = 1;
- return 1;
+#ifdef CONFIG_CONSTRUCTORS
+ ctor_fn_t *fn = (ctor_fn_t *) __ctors_start;
+
+ for (; fn < (ctor_fn_t *) __ctors_end; fn++)
+ (*fn)();
+#endif
}
-__setup("initcall_debug", initcall_debug_setup);
-extern initcall_t __initcall_start[], __initcall_end[];
+bool initcall_debug;
+core_param(initcall_debug, initcall_debug, bool, 0644);
-static void __init do_initcalls(void)
+#ifdef CONFIG_KALLSYMS
+struct blacklist_entry {
+ struct list_head next;
+ char *buf;
+};
+
+static __initdata_or_module LIST_HEAD(blacklisted_initcalls);
+
+static int __init initcall_blacklist(char *str)
{
- initcall_t *call;
- int count = preempt_count();
+ char *str_entry;
+ struct blacklist_entry *entry;
- for (call = __initcall_start; call < __initcall_end; call++) {
- ktime_t t0, t1, delta;
- char *msg = NULL;
- char msgbuf[40];
- int result;
-
- if (initcall_debug) {
- printk("Calling initcall 0x%p", *call);
- print_fn_descriptor_symbol(": %s()",
- (unsigned long) *call);
- printk("\n");
- t0 = ktime_get();
+ /* str argument is a comma-separated list of functions */
+ do {
+ str_entry = strsep(&str, ",");
+ if (str_entry) {
+ pr_debug("blacklisting initcall %s\n", str_entry);
+ entry = alloc_bootmem(sizeof(*entry));
+ entry->buf = alloc_bootmem(strlen(str_entry) + 1);
+ strcpy(entry->buf, str_entry);
+ list_add(&entry->next, &blacklisted_initcalls);
}
+ } while (str_entry);
+
+ return 0;
+}
- result = (*call)();
+static bool __init_or_module initcall_blacklisted(initcall_t fn)
+{
+ struct list_head *tmp;
+ struct blacklist_entry *entry;
+ char *fn_name;
+
+ fn_name = kasprintf(GFP_KERNEL, "%pf", fn);
+ if (!fn_name)
+ return false;
+
+ list_for_each(tmp, &blacklisted_initcalls) {
+ entry = list_entry(tmp, struct blacklist_entry, next);
+ if (!strcmp(fn_name, entry->buf)) {
+ pr_debug("initcall %s blacklisted\n", fn_name);
+ kfree(fn_name);
+ return true;
+ }
+ }
- if (initcall_debug) {
- t1 = ktime_get();
- delta = ktime_sub(t1, t0);
+ kfree(fn_name);
+ return false;
+}
+#else
+static int __init initcall_blacklist(char *str)
+{
+ pr_warn("initcall_blacklist requires CONFIG_KALLSYMS\n");
+ return 0;
+}
- printk("initcall 0x%p", *call);
- print_fn_descriptor_symbol(": %s()",
- (unsigned long) *call);
- printk(" returned %d.\n", result);
+static bool __init_or_module initcall_blacklisted(initcall_t fn)
+{
+ return false;
+}
+#endif
+__setup("initcall_blacklist=", initcall_blacklist);
- printk("initcall 0x%p ran for %Ld msecs: ",
- *call, (unsigned long long)delta.tv64 >> 20);
- print_fn_descriptor_symbol("%s()\n",
- (unsigned long) *call);
- }
+static int __init_or_module do_one_initcall_debug(initcall_t fn)
+{
+ ktime_t calltime, delta, rettime;
+ unsigned long long duration;
+ int ret;
+
+ printk(KERN_DEBUG "calling %pF @ %i\n", fn, task_pid_nr(current));
+ calltime = ktime_get();
+ ret = fn();
+ rettime = ktime_get();
+ delta = ktime_sub(rettime, calltime);
+ duration = (unsigned long long) ktime_to_ns(delta) >> 10;
+ printk(KERN_DEBUG "initcall %pF returned %d after %lld usecs\n",
+ fn, ret, duration);
+
+ return ret;
+}
- if (result && result != -ENODEV && initcall_debug) {
- sprintf(msgbuf, "error code %d", result);
- msg = msgbuf;
- }
- if (preempt_count() != count) {
- msg = "preemption imbalance";
- preempt_count() = count;
- }
- if (irqs_disabled()) {
- msg = "disabled interrupts";
- local_irq_enable();
- }
- if (msg) {
- printk(KERN_WARNING "initcall at 0x%p", *call);
- print_fn_descriptor_symbol(": %s()",
- (unsigned long) *call);
- printk(": returned with %s\n", msg);
- }
+int __init_or_module do_one_initcall(initcall_t fn)
+{
+ int count = preempt_count();
+ int ret;
+ char msgbuf[64];
+
+ if (initcall_blacklisted(fn))
+ return -EPERM;
+
+ if (initcall_debug)
+ ret = do_one_initcall_debug(fn);
+ else
+ ret = fn();
+
+ msgbuf[0] = 0;
+
+ if (preempt_count() != count) {
+ sprintf(msgbuf, "preemption imbalance ");
+ preempt_count_set(count);
}
+ if (irqs_disabled()) {
+ strlcat(msgbuf, "disabled interrupts ", sizeof(msgbuf));
+ local_irq_enable();
+ }
+ WARN(msgbuf[0], "initcall %pF returned with %s\n", fn, msgbuf);
+
+ return ret;
+}
+
+
+extern initcall_t __initcall_start[];
+extern initcall_t __initcall0_start[];
+extern initcall_t __initcall1_start[];
+extern initcall_t __initcall2_start[];
+extern initcall_t __initcall3_start[];
+extern initcall_t __initcall4_start[];
+extern initcall_t __initcall5_start[];
+extern initcall_t __initcall6_start[];
+extern initcall_t __initcall7_start[];
+extern initcall_t __initcall_end[];
+
+static initcall_t *initcall_levels[] __initdata = {
+ __initcall0_start,
+ __initcall1_start,
+ __initcall2_start,
+ __initcall3_start,
+ __initcall4_start,
+ __initcall5_start,
+ __initcall6_start,
+ __initcall7_start,
+ __initcall_end,
+};
+
+/* Keep these in sync with initcalls in include/linux/init.h */
+static char *initcall_level_names[] __initdata = {
+ "early",
+ "core",
+ "postcore",
+ "arch",
+ "subsys",
+ "fs",
+ "device",
+ "late",
+};
+
+static void __init do_initcall_level(int level)
+{
+ extern const struct kernel_param __start___param[], __stop___param[];
+ initcall_t *fn;
+
+ strcpy(initcall_command_line, saved_command_line);
+ parse_args(initcall_level_names[level],
+ initcall_command_line, __start___param,
+ __stop___param - __start___param,
+ level, level,
+ &repair_env_string);
+
+ for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++)
+ do_one_initcall(*fn);
+}
- /* Make sure there is no pending stuff from the initcall sequence */
- flush_scheduled_work();
+static void __init do_initcalls(void)
+{
+ int level;
+
+ for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++)
+ do_initcall_level(level);
}
/*
@@ -730,60 +873,80 @@ static void __init do_initcalls(void)
*/
static void __init do_basic_setup(void)
{
- /* drivers will send hotplug events */
- init_workqueues();
+ cpuset_init_smp();
usermodehelper_init();
+ shmem_init();
driver_init();
init_irq_proc();
+ do_ctors();
+ usermodehelper_enable();
do_initcalls();
+ random_int_secret_init();
}
-static int __initdata nosoftlockup;
-
-static int __init nosoftlockup_setup(char *str)
+static void __init do_pre_smp_initcalls(void)
{
- nosoftlockup = 1;
- return 1;
+ initcall_t *fn;
+
+ for (fn = __initcall_start; fn < __initcall0_start; fn++)
+ do_one_initcall(*fn);
}
-__setup("nosoftlockup", nosoftlockup_setup);
-static void __init do_pre_smp_initcalls(void)
+/*
+ * This function requests modules which should be loaded by default and is
+ * called twice right after initrd is mounted and right before init is
+ * exec'd. If such modules are on either initrd or rootfs, they will be
+ * loaded before control is passed to userland.
+ */
+void __init load_default_modules(void)
{
- extern int spawn_ksoftirqd(void);
-
- migration_init();
- spawn_ksoftirqd();
- if (!nosoftlockup)
- spawn_softlockup_task();
+ load_default_elevator_module();
}
-static void run_init_process(char *init_filename)
+static int run_init_process(const char *init_filename)
{
argv_init[0] = init_filename;
- kernel_execve(init_filename, argv_init, envp_init);
+ return do_execve(getname_kernel(init_filename),
+ (const char __user *const __user *)argv_init,
+ (const char __user *const __user *)envp_init);
}
-/* This is a non __init function. Force it to be noinline otherwise gcc
- * makes it inline to init() and it becomes part of init.text section
- */
-static int noinline init_post(void)
+static int try_to_run_init_process(const char *init_filename)
+{
+ int ret;
+
+ ret = run_init_process(init_filename);
+
+ if (ret && ret != -ENOENT) {
+ pr_err("Starting init: %s exists but couldn't execute it (error %d)\n",
+ init_filename, ret);
+ }
+
+ return ret;
+}
+
+static noinline void __init kernel_init_freeable(void);
+
+static int __ref kernel_init(void *unused)
{
+ int ret;
+
+ kernel_init_freeable();
+ /* need to finish all async __init code before freeing the memory */
+ async_synchronize_full();
free_initmem();
- unlock_kernel();
mark_rodata_ro();
system_state = SYSTEM_RUNNING;
numa_default_policy();
- if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
- printk(KERN_WARNING "Warning: unable to open an initial console.\n");
-
- (void) sys_dup(0);
- (void) sys_dup(0);
+ flush_delayed_fput();
if (ramdisk_execute_command) {
- run_init_process(ramdisk_execute_command);
- printk(KERN_WARNING "Failed to execute %s\n",
- ramdisk_execute_command);
+ ret = run_init_process(ramdisk_execute_command);
+ if (!ret)
+ return 0;
+ pr_err("Failed to execute %s (error %d)\n",
+ ramdisk_execute_command, ret);
}
/*
@@ -793,48 +956,59 @@ static int noinline init_post(void)
* trying to recover a really broken machine.
*/
if (execute_command) {
- run_init_process(execute_command);
- printk(KERN_WARNING "Failed to execute %s. Attempting "
- "defaults...\n", execute_command);
+ ret = run_init_process(execute_command);
+ if (!ret)
+ return 0;
+ pr_err("Failed to execute %s (error %d). Attempting defaults...\n",
+ execute_command, ret);
}
- run_init_process("/sbin/init");
- run_init_process("/etc/init");
- run_init_process("/bin/init");
- run_init_process("/bin/sh");
+ if (!try_to_run_init_process("/sbin/init") ||
+ !try_to_run_init_process("/etc/init") ||
+ !try_to_run_init_process("/bin/init") ||
+ !try_to_run_init_process("/bin/sh"))
+ return 0;
- panic("No init found. Try passing init= option to kernel.");
+ panic("No working init found. Try passing init= option to kernel. "
+ "See Linux Documentation/init.txt for guidance.");
}
-static int __init kernel_init(void * unused)
+static noinline void __init kernel_init_freeable(void)
{
- lock_kernel();
/*
- * init can run on any cpu.
+ * Wait until kthreadd is all set-up.
*/
- set_cpus_allowed(current, CPU_MASK_ALL);
+ wait_for_completion(&kthreadd_done);
+
+ /* Now the scheduler is fully set up and can do blocking allocations */
+ gfp_allowed_mask = __GFP_BITS_MASK;
+
/*
- * Tell the world that we're going to be the grim
- * reaper of innocent orphaned children.
- *
- * We don't want people to have to make incorrect
- * assumptions about where in the task array this
- * can be found.
+ * init can allocate pages on any node
*/
- init_pid_ns.child_reaper = current;
+ set_mems_allowed(node_states[N_MEMORY]);
+ /*
+ * init can run on any cpu.
+ */
+ set_cpus_allowed_ptr(current, cpu_all_mask);
cad_pid = task_pid(current);
smp_prepare_cpus(setup_max_cpus);
do_pre_smp_initcalls();
+ lockup_detector_init();
smp_init();
sched_init_smp();
- cpuset_init_smp();
-
do_basic_setup();
+ /* Open the /dev/console on the rootfs, this should never fail */
+ if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
+ pr_err("Warning: unable to open an initial console.\n");
+
+ (void) sys_dup(0);
+ (void) sys_dup(0);
/*
* check if there is an early userspace init. If yes, let it do all
* the work
@@ -853,6 +1027,7 @@ static int __init kernel_init(void * unused)
* we're essentially up and running. Get rid of the
* initmem segments and start the user-mode stuff..
*/
- init_post();
- return 0;
+
+ /* rootfs is available now, try loading default modules */
+ load_default_modules();
}