diff options
Diffstat (limited to 'init')
| -rw-r--r-- | init/Kconfig | 262 | ||||
| -rw-r--r-- | init/calibrate.c | 13 | ||||
| -rw-r--r-- | init/do_mounts.c | 53 | ||||
| -rw-r--r-- | init/do_mounts_rd.c | 14 | ||||
| -rw-r--r-- | init/initramfs.c | 3 | ||||
| -rw-r--r-- | init/main.c | 210 |
6 files changed, 420 insertions, 135 deletions
diff --git a/init/Kconfig b/init/Kconfig index 2d9b83104dc..9d76b99af1b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -53,6 +53,20 @@ config CROSS_COMPILE need to set this unless you want the configured kernel build directory to select the cross-compiler automatically. +config COMPILE_TEST + bool "Compile also drivers which will not load" + default n + help + Some drivers can be compiled on a different platform than they are + intended to be run on. Despite they cannot be loaded there (or even + when they load they cannot be used due to missing HW support), + developers still, opposing to distributors, might want to build such + drivers to compile-test them. + + If you are a developer and want to build everything available, say Y + here. If you are a user/distributor, say N here to exclude useless + drivers to be distributed. + config LOCALVERSION string "Local version - append to kernel release" help @@ -98,10 +112,13 @@ config HAVE_KERNEL_XZ config HAVE_KERNEL_LZO bool +config HAVE_KERNEL_LZ4 + bool + choice prompt "Kernel compression mode" default KERNEL_GZIP - depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO + depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO || HAVE_KERNEL_LZ4 help The linux kernel is a kind of self-extracting executable. Several compression algorithms are available, which differ @@ -168,6 +185,18 @@ config KERNEL_LZO size is about 10% bigger than gzip; however its speed (both compression and decompression) is the fastest. +config KERNEL_LZ4 + bool "LZ4" + depends on HAVE_KERNEL_LZ4 + help + LZ4 is an LZ77-type compressor with a fixed, byte-oriented encoding. + A preliminary version of LZ4 de/compression tool is available at + <https://code.google.com/p/lz4/>. + + Its compression ratio is worse than LZO. The size of the kernel + is about 8% bigger than LZO. But the decompression speed is + faster than LZO. + endchoice config DEFAULT_HOSTNAME @@ -232,6 +261,16 @@ config POSIX_MQUEUE_SYSCTL depends on SYSCTL default y +config CROSS_MEMORY_ATTACH + bool "Enable process_vm_readv/writev syscalls" + depends on MMU + default y + help + Enabling this option adds the system calls process_vm_readv and + process_vm_writev which allow a process with the correct privileges + to directly read from or write to to another process's address space. + See the man page for more details. + config FHANDLE bool "open by fhandle syscalls" select EXPORTFS @@ -244,6 +283,16 @@ config FHANDLE get renamed. Enables open_by_handle_at(2) and name_to_handle_at(2) syscalls. +config USELIB + bool "uselib syscall" + default y + help + This option enables the uselib syscall, a system call used in the + dynamic linker from libc5 and earlier. glibc does not use this + system call. If you intend to run programs built on libc5 or + earlier, you may need to enable this syscall. Current systems + running glibc can safely disable this. + config AUDIT bool "Auditing support" depends on NET @@ -253,9 +302,12 @@ config AUDIT logging of avc messages output). Does not do system-call auditing without CONFIG_AUDITSYSCALL. +config HAVE_ARCH_AUDITSYSCALL + bool + config AUDITSYSCALL bool "Enable system-call auditing support" - depends on AUDIT && (X86 || PPC || S390 || IA64 || UML || SPARC64 || SUPERH || (ARM && AEABI && !OABI_COMPAT)) + depends on AUDIT && HAVE_ARCH_AUDITSYSCALL default y if SECURITY_SELINUX help Enable low-overhead system-call auditing infrastructure that @@ -272,20 +324,6 @@ config AUDIT_TREE depends on AUDITSYSCALL select FSNOTIFY -config AUDIT_LOGINUID_IMMUTABLE - bool "Make audit loginuid immutable" - depends on AUDIT - help - The config option toggles if a task setting its loginuid requires - CAP_SYS_AUDITCONTROL or if that task should require no special permissions - but should instead only allow setting its loginuid if it was never - previously set. On systems which use systemd or a similar central - process to restart login services this should be set to true. On older - systems in which an admin would typically have to directly stop and - start processes this should be set to false. Setting this to true allows - one to drop potentially dangerous capabilites from the login tasks, - but may not be backwards compatible with older init systems. - source "kernel/irq/Kconfig" source "kernel/time/Kconfig" @@ -325,7 +363,8 @@ config VIRT_CPU_ACCOUNTING_NATIVE config VIRT_CPU_ACCOUNTING_GEN bool "Full dynticks CPU time accounting" - depends on HAVE_CONTEXT_TRACKING && 64BIT + depends on HAVE_CONTEXT_TRACKING + depends on HAVE_VIRT_CPU_ACCOUNTING_GEN select VIRT_CPU_ACCOUNTING select CONTEXT_TRACKING help @@ -441,6 +480,7 @@ config TREE_RCU config TREE_PREEMPT_RCU bool "Preemptible tree-based hierarchical RCU" depends on PREEMPT + select IRQ_WORK help This option selects the RCU implementation that is designed for very large SMP systems with hundreds or @@ -459,18 +499,10 @@ config TINY_RCU is not required. This option greatly reduces the memory footprint of RCU. -config TINY_PREEMPT_RCU - bool "Preemptible UP-only small-memory-footprint RCU" - depends on PREEMPT && !SMP - help - This option selects the RCU implementation that is designed - for real-time UP systems. This option greatly reduces the - memory footprint of RCU. - endchoice config PREEMPT_RCU - def_bool ( TREE_PREEMPT_RCU || TINY_PREEMPT_RCU ) + def_bool TREE_PREEMPT_RCU help This option enables preemptible-RCU code that is common between the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations. @@ -506,13 +538,29 @@ config RCU_USER_QS config CONTEXT_TRACKING_FORCE bool "Force context tracking" depends on CONTEXT_TRACKING - default CONTEXT_TRACKING + default y if !NO_HZ_FULL help - Probe on user/kernel boundaries by default in order to - test the features that rely on it such as userspace RCU extended - quiescent states. - This test is there for debugging until we have a real user like the - full dynticks mode. + The major pre-requirement for full dynticks to work is to + support the context tracking subsystem. But there are also + other dependencies to provide in order to make the full + dynticks working. + + This option stands for testing when an arch implements the + context tracking backend but doesn't yet fullfill all the + requirements to make the full dynticks feature working. + Without the full dynticks, there is no way to test the support + for context tracking and the subsystems that rely on it: RCU + userspace extended quiescent state and tickless cputime + accounting. This option copes with the absence of the full + dynticks subsystem by forcing the context tracking on all + CPUs in the system. + + Say Y only if you're working on the development of an + architecture backend for the context tracking. + + Say N otherwise, this option brings an overhead that you + don't want in production. + config RCU_FANOUT int "Tree-based hierarchical RCU fanout value" @@ -656,7 +704,7 @@ config RCU_BOOST_DELAY Accept the default if unsure. config RCU_NOCB_CPU - bool "Offload RCU callback processing from boot-selected CPUs (EXPERIMENTAL" + bool "Offload RCU callback processing from boot-selected CPUs" depends on TREE_RCU || TREE_PREEMPT_RCU default n help @@ -682,9 +730,10 @@ choice prompt "Build-forced no-CBs CPUs" default RCU_NOCB_CPU_NONE help - This option allows no-CBs CPUs to be specified at build time. - Additional no-CBs CPUs may be specified by the rcu_nocbs= - boot parameter. + This option allows no-CBs CPUs (whose RCU callbacks are invoked + from kthreads rather than from softirq context) to be specified + at build time. Additional no-CBs CPUs may be specified by + the rcu_nocbs= boot parameter. config RCU_NOCB_CPU_NONE bool "No build_forced no-CBs CPUs" @@ -692,25 +741,40 @@ config RCU_NOCB_CPU_NONE help This option does not force any of the CPUs to be no-CBs CPUs. Only CPUs designated by the rcu_nocbs= boot parameter will be - no-CBs CPUs. + no-CBs CPUs, whose RCU callbacks will be invoked by per-CPU + kthreads whose names begin with "rcuo". All other CPUs will + invoke their own RCU callbacks in softirq context. + + Select this option if you want to choose no-CBs CPUs at + boot time, for example, to allow testing of different no-CBs + configurations without having to rebuild the kernel each time. config RCU_NOCB_CPU_ZERO bool "CPU 0 is a build_forced no-CBs CPU" depends on RCU_NOCB_CPU && !NO_HZ_FULL help - This option forces CPU 0 to be a no-CBs CPU. Additional CPUs - may be designated as no-CBs CPUs using the rcu_nocbs= boot - parameter will be no-CBs CPUs. + This option forces CPU 0 to be a no-CBs CPU, so that its RCU + callbacks are invoked by a per-CPU kthread whose name begins + with "rcuo". Additional CPUs may be designated as no-CBs + CPUs using the rcu_nocbs= boot parameter will be no-CBs CPUs. + All other CPUs will invoke their own RCU callbacks in softirq + context. Select this if CPU 0 needs to be a no-CBs CPU for real-time - or energy-efficiency reasons. + or energy-efficiency reasons, but the real reason it exists + is to ensure that randconfig testing covers mixed systems. config RCU_NOCB_CPU_ALL bool "All CPUs are build_forced no-CBs CPUs" depends on RCU_NOCB_CPU help This option forces all CPUs to be no-CBs CPUs. The rcu_nocbs= - boot parameter will be ignored. + boot parameter will be ignored. All CPUs' RCU callbacks will + be executed in the context of per-CPU rcuo kthreads created for + this purpose. Assuming that the kthreads whose names start with + "rcuo" are bound to "housekeeping" CPUs, this reduces OS jitter + on the remaining CPUs, but might decrease memory locality during + RCU-callback invocation, thus potentially degrading throughput. Select this if all CPUs need to be no-CBs CPUs for real-time or energy-efficiency reasons. @@ -758,6 +822,9 @@ config LOG_BUF_SHIFT config HAVE_UNSTABLE_SCHED_CLOCK bool +config GENERIC_SCHED_CLOCK + bool + # # For architectures that want to enable the support for NUMA-affine scheduler # balancing logic: @@ -765,6 +832,12 @@ config HAVE_UNSTABLE_SCHED_CLOCK config ARCH_SUPPORTS_NUMA_BALANCING bool +# +# For architectures that know their GCC __int128 support is sound +# +config ARCH_SUPPORTS_INT128 + bool + # For architectures that (ab)use NUMA to represent different memory regions # all cpu-local but of different latencies, such as SuperH. # @@ -787,7 +860,7 @@ config NUMA_BALANCING_DEFAULT_ENABLED default y depends on NUMA_BALANCING help - If set, autonumic NUMA balancing will be enabled if running on a NUMA + If set, automatic NUMA balancing will be enabled if running on a NUMA machine. config NUMA_BALANCING @@ -798,13 +871,13 @@ config NUMA_BALANCING help This option adds support for automatic NUMA aware memory/task placement. The mechanism is quite primitive and is based on migrating memory when - it is references to the node the task is running on. + it has references to the node the task is running on. This system will be inactive on UMA systems. menuconfig CGROUPS boolean "Control Group support" - depends on EVENTFD + select KERNFS help This option adds support for grouping sets of processes together, for use with process control subsystems such as Cpusets, CFS, memory @@ -870,14 +943,14 @@ config RESOURCE_COUNTERS config MEMCG bool "Memory Resource Controller for Control Groups" depends on RESOURCE_COUNTERS - select MM_OWNER + select EVENTFD help Provides a memory resource controller that manages both anonymous memory and page cache. (See Documentation/cgroups/memory.txt) Note that setting this option increases fixed memory overhead associated with each page of memory in the system. By this, - 20(40)bytes/PAGE_SIZE on 32(64)bit system will be occupied by memory + 8(16)bytes/PAGE_SIZE on 32(64)bit system will be occupied by memory usage tracking struct at boot. Total amount of this is printed out at boot. @@ -887,9 +960,6 @@ config MEMCG disable memory resource controller and you can avoid overheads. (and lose benefits of memory resource controller) - This config option also selects MM_OWNER config option, which - could in turn add some fork/exit overhead. - config MEMCG_SWAP bool "Memory Resource Controller Swap Extension" depends on MEMCG && SWAP @@ -915,7 +985,7 @@ config MEMCG_SWAP_ENABLED Memory Resource Controller Swap Extension comes with its price in a bigger memory consumption. General purpose distribution kernels which want to enable the feature but keep it disabled by default - and let the user enable it by swapaccount boot command line + and let the user enable it by swapaccount=1 boot command line parameter should have this option unselected. For those who want to have the feature enabled by default should select this option (if, for some reason, they need to disable it @@ -932,6 +1002,12 @@ config MEMCG_KMEM the kmem extension can use it to guarantee that no group of processes will ever exhaust kernel resources alone. + WARNING: Current implementation lacks reclaim support. That means + allocation attempts will fail when close to the limit even if there + are plenty of kmem available for reclaim. That makes this option + unusable in real life so DO NOT SELECT IT unless for development + purposes. + config CGROUP_HUGETLB bool "HugeTLB Resource Controller for Control Groups" depends on RESOURCE_COUNTERS && HUGETLB_PAGE @@ -1066,9 +1142,6 @@ config IPC_NS config USER_NS bool "User namespace" - depends on UIDGID_CONVERTED - select UIDGID_STRICT_TYPE_CHECKS - default n help This allows containers, i.e. vservers, to use user namespaces @@ -1100,30 +1173,8 @@ config NET_NS endif # NAMESPACES -config UIDGID_CONVERTED - # True if all of the selected software conmponents are known - # to have uid_t and gid_t converted to kuid_t and kgid_t - # where appropriate and are otherwise safe to use with - # the user namespace. - bool - default y - - # Filesystems - depends on XFS_FS = n - -config UIDGID_STRICT_TYPE_CHECKS - bool "Require conversions between uid/gids and their internal representation" - depends on UIDGID_CONVERTED - default n - help - While the nececessary conversions are being added to all subsystems this option allows - the code to continue to build for unconverted subsystems. - - Say Y here if you want the strict type checking enabled - config SCHED_AUTOGROUP bool "Automatic process group scheduling" - select EVENTFD select CGROUPS select CGROUP_SCHED select FAIR_GROUP_SCHED @@ -1134,9 +1185,6 @@ config SCHED_AUTOGROUP desktop applications. Task group autogeneration is currently based upon task session. -config MM_OWNER - bool - config SYSFS_DEPRECATED bool "Enable deprecated sysfs features to support old userspace tools" depends on SYSFS @@ -1245,9 +1293,6 @@ config SYSCTL_ARCH_UNALIGN_ALLOW the unaligned access emulation. see arch/parisc/kernel/unaligned.c for reference -config HOTPLUG - def_bool y - config HAVE_PCSPKR_PLATFORM bool @@ -1268,6 +1313,26 @@ config UID16 help This enables the legacy 16-bit UID syscall wrappers. +config SGETMASK_SYSCALL + bool "sgetmask/ssetmask syscalls support" if EXPERT + def_bool PARISC || MN10300 || BLACKFIN || M68K || PPC || MIPS || X86 || SPARC || CRIS || MICROBLAZE || SUPERH + ---help--- + sys_sgetmask and sys_ssetmask are obsolete system calls + no longer supported in libc but still enabled by default in some + architectures. + + If unsure, leave the default option here. + +config SYSFS_SYSCALL + bool "Sysfs syscall support" if EXPERT + default y + ---help--- + sys_sysfs is an obsolete system call no longer supported in libc. + Note that disabling this option is more secure but might break + compatibility with some systems. + + If unsure say Y here. + config SYSCTL_SYSCALL bool "Sysctl syscall support" if EXPERT depends on PROC_SYSCTL @@ -1365,6 +1430,13 @@ config FUTEX support for "fast userspace mutexes". The resulting kernel may not run glibc-based applications correctly. +config HAVE_FUTEX_CMPXCHG + bool + help + Architectures should select this if futex_atomic_cmpxchg_inatomic() + is implemented and always working. This removes a couple of runtime + checks. + config EPOLL bool "Enable eventpoll support" if EXPERT default y @@ -1433,6 +1505,7 @@ config PCI_QUIRKS config EMBEDDED bool "Embedded system" + option allnoconfig_y select EXPERT help This option should be enabled if compiling the kernel for @@ -1559,6 +1632,17 @@ config SLOB endchoice +config SLUB_CPU_PARTIAL + default y + depends on SLUB && SMP + bool "SLUB per cpu partial cache" + help + Per cpu partial caches accellerate objects allocation and freeing + that is local to a processor at the price of more indeterminism + in the latency of the free. On overflow these caches will be cleared + which requires the taking of locks that may cause latency spikes. + Typically one would choose no for a realtime system. + config MMAP_ALLOW_UNINITIALIZED bool "Allow mmapped anonymous memory to be uninitialized" depends on EXPERT && !MMU @@ -1581,6 +1665,18 @@ config MMAP_ALLOW_UNINITIALIZED See Documentation/nommu-mmap.txt for more information. +config SYSTEM_TRUSTED_KEYRING + bool "Provide system-wide ring of trusted keys" + depends on KEYS + help + Provide a system keyring to which trusted keys can be added. Keys in + the keyring are considered to be trusted. Keys may be added at will + by the kernel from compiled-in data and from hardware key stores, but + userspace may only add extra keys if those keys can be verified by + keys already in the keyring. + + Keys in this keyring are used by module signature checking. + config PROFILING bool "Profiling support" help @@ -1618,6 +1714,7 @@ config BASE_SMALL menuconfig MODULES bool "Enable loadable module support" + option modules help Kernel modules are small pieces of compiled code which can be inserted in the running kernel, rather than being @@ -1688,6 +1785,7 @@ config MODULE_SRCVERSION_ALL config MODULE_SIG bool "Module signature verification" depends on MODULES + select SYSTEM_TRUSTED_KEYRING select KEYS select CRYPTO select ASYMMETRIC_KEY_TYPE diff --git a/init/calibrate.c b/init/calibrate.c index fda0a7b0f06..520702db9ac 100644 --- a/init/calibrate.c +++ b/init/calibrate.c @@ -31,7 +31,7 @@ __setup("lpj=", lpj_setup); #define DELAY_CALIBRATION_TICKS ((HZ < 100) ? 1 : (HZ/100)) #define MAX_DIRECT_CALIBRATION_RETRIES 5 -static unsigned long __cpuinit calibrate_delay_direct(void) +static unsigned long calibrate_delay_direct(void) { unsigned long pre_start, start, post_start; unsigned long pre_end, end, post_end; @@ -166,7 +166,10 @@ static unsigned long __cpuinit calibrate_delay_direct(void) return 0; } #else -static unsigned long __cpuinit calibrate_delay_direct(void) {return 0;} +static unsigned long calibrate_delay_direct(void) +{ + return 0; +} #endif /* @@ -180,7 +183,7 @@ static unsigned long __cpuinit calibrate_delay_direct(void) {return 0;} */ #define LPS_PREC 8 -static unsigned long __cpuinit calibrate_delay_converge(void) +static unsigned long calibrate_delay_converge(void) { /* First stage - slowly accelerate to find initial bounds */ unsigned long lpj, lpj_base, ticks, loopadd, loopadd_base, chop_limit; @@ -254,12 +257,12 @@ static DEFINE_PER_CPU(unsigned long, cpu_loops_per_jiffy) = { 0 }; * Architectures should override this function if a faster calibration * method is available. */ -unsigned long __attribute__((weak)) __cpuinit calibrate_delay_is_known(void) +unsigned long __attribute__((weak)) calibrate_delay_is_known(void) { return 0; } -void __cpuinit calibrate_delay(void) +void calibrate_delay(void) { unsigned long lpj; static bool printed; diff --git a/init/do_mounts.c b/init/do_mounts.c index a2b49f2c1bd..82f22885c87 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -26,6 +26,8 @@ #include <linux/async.h> #include <linux/fs_struct.h> #include <linux/slab.h> +#include <linux/ramfs.h> +#include <linux/shmem_fs.h> #include <linux/nfs_fs.h> #include <linux/nfs_fs_sb.h> @@ -100,13 +102,13 @@ no_match: /** * devt_from_partuuid - looks up the dev_t of a partition by its UUID - * @uuid: char array containing ascii UUID + * @uuid_str: char array containing ascii UUID * * The function will return the first partition which contains a matching * UUID value in its partition_meta_info struct. This does not search * by filesystem UUIDs. * - * If @uuid is followed by a "/PARTNROFF=%d", then the number will be + * If @uuid_str is followed by a "/PARTNROFF=%d", then the number will be * extracted and used as an offset from the partition identified by the UUID. * * Returns the matching dev_t on success or 0 on failure. @@ -195,6 +197,8 @@ done: * is a zero-filled hex representation of the 1-based partition number. * 7) PARTUUID=<UUID>/PARTNROFF=<int> to select a partition in relation to * a partition with a known unique id. + * 8) <major>:<minor> major and minor number of the device separated by + * a colon. * * If name doesn't have fall into the categories above, we return (0,0). * block_class is used to check if something is a disk name. If the disk @@ -536,7 +540,7 @@ void __init prepare_namespace(void) int is_floppy; if (root_delay) { - printk(KERN_INFO "Waiting %dsec before mounting root device...\n", + printk(KERN_INFO "Waiting %d sec before mounting root device...\n", root_delay); ssleep(root_delay); } @@ -588,3 +592,46 @@ out: sys_mount(".", "/", NULL, MS_MOVE, NULL); sys_chroot("."); } + +static bool is_tmpfs; +static struct dentry *rootfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + static unsigned long once; + void *fill = ramfs_fill_super; + + if (test_and_set_bit(0, &once)) + return ERR_PTR(-ENODEV); + + if (IS_ENABLED(CONFIG_TMPFS) && is_tmpfs) + fill = shmem_fill_super; + + return mount_nodev(fs_type, flags, data, fill); +} + +static struct file_system_type rootfs_fs_type = { + .name = "rootfs", + .mount = rootfs_mount, + .kill_sb = kill_litter_super, +}; + +int __init init_rootfs(void) +{ + int err = register_filesystem(&rootfs_fs_type); + + if (err) + return err; + + if (IS_ENABLED(CONFIG_TMPFS) && !saved_root_name[0] && + (!root_fs_names || strstr(root_fs_names, "tmpfs"))) { + err = shmem_init(); + is_tmpfs = true; + } else { + err = init_ramfs_fs(); + } + + if (err) + unregister_filesystem(&rootfs_fs_type); + + return err; +} diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index 6be2879cca6..a8227022e3a 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -13,7 +13,7 @@ #include <linux/minix_fs.h> #include <linux/ext2_fs.h> #include <linux/romfs_fs.h> -#include <linux/cramfs_fs.h> +#include <uapi/linux/cramfs_fs.h> #include <linux/initrd.h> #include <linux/string.h> #include <linux/slab.h> @@ -57,6 +57,11 @@ static int __init crd_load(int in_fd, int out_fd, decompress_fn deco); * cramfs * squashfs * gzip + * bzip2 + * lzma + * xz + * lzo + * lz4 */ static int __init identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) @@ -342,6 +347,13 @@ static int __init crd_load(int in_fd, int out_fd, decompress_fn deco) int result; crd_infd = in_fd; crd_outfd = out_fd; + + if (!deco) { + pr_emerg("Invalid ramdisk decompression routine. " + "Select appropriate config option.\n"); + panic("Could not decompress initial ramdisk image."); + } + result = deco(NULL, 0, compr_fill, compr_flush, NULL, NULL, error); if (decompress_error) result = 1; diff --git a/init/initramfs.c b/init/initramfs.c index a67ef9dbda9..a8497fab1c3 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -455,6 +455,7 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len) } this_header = 0; decompress = decompress_method(buf, len, &compress_name); + pr_debug("Detected %s compressed data\n", compress_name); if (decompress) { res = decompress(buf, len, NULL, flush_buffer, NULL, &my_inptr, error); @@ -583,7 +584,7 @@ static int __init populate_rootfs(void) { char *err = unpack_to_rootfs(__initramfs_start, __initramfs_size); if (err) - panic(err); /* Failed to decompress INTERNAL initramfs */ + panic("%s", err); /* Failed to decompress INTERNAL initramfs */ if (initrd_start) { #ifdef CONFIG_BLK_DEV_RAM int fd; diff --git a/init/main.c b/init/main.c index 9484f4ba88d..e8ae1fef090 100644 --- a/init/main.c +++ b/init/main.c @@ -74,6 +74,10 @@ #include <linux/ptrace.h> #include <linux/blkdev.h> #include <linux/elevator.h> +#include <linux/sched_clock.h> +#include <linux/context_tracking.h> +#include <linux/random.h> +#include <linux/list.h> #include <asm/io.h> #include <asm/bugs.h> @@ -89,17 +93,11 @@ static int kernel_init(void *); extern void init_IRQ(void); extern void fork_init(unsigned long); -extern void mca_init(void); -extern void sbus_init(void); extern void radix_tree_init(void); #ifndef CONFIG_DEBUG_RODATA static inline void mark_rodata_ro(void) { } #endif -#ifdef CONFIG_TC -extern void tc_init(void); -#endif - /* * Debug helper: via this flag we know that we are in 'early bootup code' * where only the boot processor is running with IRQ disabled. This means @@ -121,7 +119,6 @@ EXPORT_SYMBOL(system_state); extern void time_init(void); /* Default late time init is NULL. archs can override this later. */ void (*__initdata late_time_init)(void); -extern void softirq_init(void); /* Untouched command line saved by arch-specific code. */ char __initdata boot_command_line[COMMAND_LINE_SIZE]; @@ -129,11 +126,20 @@ char __initdata boot_command_line[COMMAND_LINE_SIZE]; char *saved_command_line; /* Command line for parameter parsing */ static char *static_command_line; +/* Command line for per-initcall parameter parsing */ +static char *initcall_command_line; static char *execute_command; static char *ramdisk_execute_command; /* + * Used to generate warnings if static_key manipulation functions are used + * before jump_label_init is called. + */ +bool static_key_initialized __read_mostly = false; +EXPORT_SYMBOL_GPL(static_key_initialized); + +/* * If set, this is an indication to the drivers that reset the underlying * device before going ahead with the initialization otherwise driver might * rely on the BIOS and skip the reset operation. @@ -198,13 +204,13 @@ EXPORT_SYMBOL(loops_per_jiffy); static int __init debug_kernel(char *str) { - console_loglevel = 10; + console_loglevel = CONSOLE_LOGLEVEL_DEBUG; return 0; } static int __init quiet_kernel(char *str) { - console_loglevel = 4; + console_loglevel = CONSOLE_LOGLEVEL_QUIET; return 0; } @@ -247,6 +253,27 @@ static int __init repair_env_string(char *param, char *val, const char *unused) return 0; } +/* Anything after -- gets handed straight to init. */ +static int __init set_init_arg(char *param, char *val, const char *unused) +{ + unsigned int i; + + if (panic_later) + return 0; + + repair_env_string(param, val, unused); + + for (i = 0; argv_init[i]; i++) { + if (i == MAX_INIT_ARGS) { + panic_later = "init"; + panic_param = param; + return 0; + } + } + argv_init[i] = param; + return 0; +} + /* * Unknown boot options get handed to init, unless they look like * unused parameters (modprobe will find them in /proc/cmdline). @@ -271,7 +298,7 @@ static int __init unknown_bootoption(char *param, char *val, const char *unused) unsigned int i; for (i = 0; envp_init[i]; i++) { if (i == MAX_INIT_ENVS) { - panic_later = "Too many boot env vars at `%s'"; + panic_later = "env"; panic_param = param; } if (!strncmp(param, envp_init[i], val - param)) @@ -283,7 +310,7 @@ static int __init unknown_bootoption(char *param, char *val, const char *unused) unsigned int i; for (i = 0; argv_init[i]; i++) { if (i == MAX_INIT_ARGS) { - panic_later = "Too many boot init vars at `%s'"; + panic_later = "init"; panic_param = param; } } @@ -344,8 +371,11 @@ static inline void smp_prepare_cpus(unsigned int maxcpus) { } */ static void __init setup_command_line(char *command_line) { - saved_command_line = alloc_bootmem(strlen (boot_command_line)+1); - static_command_line = alloc_bootmem(strlen (command_line)+1); + saved_command_line = + memblock_virt_alloc(strlen(boot_command_line) + 1, 0); + initcall_command_line = + memblock_virt_alloc(strlen(boot_command_line) + 1, 0); + static_command_line = memblock_virt_alloc(strlen(command_line) + 1, 0); strcpy (saved_command_line, boot_command_line); strcpy (static_command_line, command_line); } @@ -371,7 +401,7 @@ static noinline void __init_refok rest_init(void) * the init task will end up wanting to create kthreads, which, if * we schedule it before we create kthreadd, will OOPS. */ - kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND); + kernel_thread(kernel_init, NULL, CLONE_FS); numa_default_policy(); pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES); rcu_read_lock(); @@ -464,13 +494,13 @@ static void __init mm_init(void) mem_init(); kmem_cache_init(); percpu_init_late(); - pgtable_cache_init(); + pgtable_init(); vmalloc_init(); } -asmlinkage void __init start_kernel(void) +asmlinkage __visible void __init start_kernel(void) { - char * command_line; + char * command_line, *after_dashes; extern const struct kernel_param __start___param[], __stop___param[]; /* @@ -499,7 +529,6 @@ asmlinkage void __init start_kernel(void) page_address_init(); pr_notice("%s", linux_banner); setup_arch(&command_line); - mm_init_owner(&init_mm, &init_task); mm_init_cpumask(&init_mm); setup_command_line(command_line); setup_nr_cpu_ids(); @@ -511,9 +540,13 @@ asmlinkage void __init start_kernel(void) pr_notice("Kernel command line: %s\n", boot_command_line); parse_early_param(); - parse_args("Booting kernel", static_command_line, __start___param, - __stop___param - __start___param, - -1, -1, &unknown_bootoption); + after_dashes = parse_args("Booting kernel", + static_command_line, __start___param, + __stop___param - __start___param, + -1, -1, &unknown_bootoption); + if (after_dashes) + parse_args("Setting init args", after_dashes, NULL, 0, -1, -1, + set_init_arg); jump_label_init(); @@ -542,9 +575,9 @@ asmlinkage void __init start_kernel(void) if (WARN(!irqs_disabled(), "Interrupts were enabled *very* early, fixing it\n")) local_irq_disable(); idr_init_cache(); - perf_event_init(); rcu_init(); tick_nohz_init(); + context_tracking_init(); radix_tree_init(); /* init some links before init_ISA_irqs() */ early_irq_init(); @@ -555,6 +588,8 @@ asmlinkage void __init start_kernel(void) softirq_init(); timekeeping_init(); time_init(); + sched_clock_postinit(); + perf_event_init(); profile_init(); call_function_init(); WARN(!irqs_disabled(), "Interrupts were enabled early\n"); @@ -570,7 +605,8 @@ asmlinkage void __init start_kernel(void) */ console_init(); if (panic_later) - panic(panic_later, panic_param); + panic("Too many boot %s vars at `%s'", panic_later, + panic_param); lockdep_info(); @@ -601,10 +637,15 @@ asmlinkage void __init start_kernel(void) calibrate_delay(); pidmap_init(); anon_vma_init(); + acpi_early_init(); #ifdef CONFIG_X86 if (efi_enabled(EFI_RUNTIME_SERVICES)) efi_enter_virtual_mode(); #endif +#ifdef CONFIG_X86_ESPFIX64 + /* Should be run before the first non-init thread is created */ + init_espfix_bsp(); +#endif thread_info_cache_init(); cred_init(); fork_init(totalram_pages); @@ -617,9 +658,7 @@ asmlinkage void __init start_kernel(void) signals_init(); /* rootfs populating might need page-writeback */ page_writeback_init(); -#ifdef CONFIG_PROC_FS proc_root_init(); -#endif cgroup_init(); cpuset_init(); taskstats_init_early(); @@ -627,7 +666,6 @@ asmlinkage void __init start_kernel(void) check_bugs(); - acpi_early_init(); /* before LAPIC and SMP init */ sfi_init_late(); if (efi_enabled(EFI_RUNTIME_SERVICES)) { @@ -655,7 +693,69 @@ static void __init do_ctors(void) bool initcall_debug; core_param(initcall_debug, initcall_debug, bool, 0644); -static char msgbuf[64]; +#ifdef CONFIG_KALLSYMS +struct blacklist_entry { + struct list_head next; + char *buf; +}; + +static __initdata_or_module LIST_HEAD(blacklisted_initcalls); + +static int __init initcall_blacklist(char *str) +{ + char *str_entry; + struct blacklist_entry *entry; + + /* str argument is a comma-separated list of functions */ + do { + str_entry = strsep(&str, ","); + if (str_entry) { + pr_debug("blacklisting initcall %s\n", str_entry); + entry = alloc_bootmem(sizeof(*entry)); + entry->buf = alloc_bootmem(strlen(str_entry) + 1); + strcpy(entry->buf, str_entry); + list_add(&entry->next, &blacklisted_initcalls); + } + } while (str_entry); + + return 0; +} + +static bool __init_or_module initcall_blacklisted(initcall_t fn) +{ + struct list_head *tmp; + struct blacklist_entry *entry; + char *fn_name; + + fn_name = kasprintf(GFP_KERNEL, "%pf", fn); + if (!fn_name) + return false; + + list_for_each(tmp, &blacklisted_initcalls) { + entry = list_entry(tmp, struct blacklist_entry, next); + if (!strcmp(fn_name, entry->buf)) { + pr_debug("initcall %s blacklisted\n", fn_name); + kfree(fn_name); + return true; + } + } + + kfree(fn_name); + return false; +} +#else +static int __init initcall_blacklist(char *str) +{ + pr_warn("initcall_blacklist requires CONFIG_KALLSYMS\n"); + return 0; +} + +static bool __init_or_module initcall_blacklisted(initcall_t fn) +{ + return false; +} +#endif +__setup("initcall_blacklist=", initcall_blacklist); static int __init_or_module do_one_initcall_debug(initcall_t fn) { @@ -663,13 +763,13 @@ static int __init_or_module do_one_initcall_debug(initcall_t fn) unsigned long long duration; int ret; - pr_debug("calling %pF @ %i\n", fn, task_pid_nr(current)); + printk(KERN_DEBUG "calling %pF @ %i\n", fn, task_pid_nr(current)); calltime = ktime_get(); ret = fn(); rettime = ktime_get(); delta = ktime_sub(rettime, calltime); duration = (unsigned long long) ktime_to_ns(delta) >> 10; - pr_debug("initcall %pF returned %d after %lld usecs\n", + printk(KERN_DEBUG "initcall %pF returned %d after %lld usecs\n", fn, ret, duration); return ret; @@ -679,6 +779,10 @@ int __init_or_module do_one_initcall(initcall_t fn) { int count = preempt_count(); int ret; + char msgbuf[64]; + + if (initcall_blacklisted(fn)) + return -EPERM; if (initcall_debug) ret = do_one_initcall_debug(fn); @@ -689,7 +793,7 @@ int __init_or_module do_one_initcall(initcall_t fn) if (preempt_count() != count) { sprintf(msgbuf, "preemption imbalance "); - preempt_count() = count; + preempt_count_set(count); } if (irqs_disabled()) { strlcat(msgbuf, "disabled interrupts ", sizeof(msgbuf)); @@ -741,9 +845,9 @@ static void __init do_initcall_level(int level) extern const struct kernel_param __start___param[], __stop___param[]; initcall_t *fn; - strcpy(static_command_line, saved_command_line); + strcpy(initcall_command_line, saved_command_line); parse_args(initcall_level_names[level], - static_command_line, __start___param, + initcall_command_line, __start___param, __stop___param - __start___param, level, level, &repair_env_string); @@ -777,6 +881,7 @@ static void __init do_basic_setup(void) do_ctors(); usermodehelper_enable(); do_initcalls(); + random_int_secret_init(); } static void __init do_pre_smp_initcalls(void) @@ -801,15 +906,31 @@ void __init load_default_modules(void) static int run_init_process(const char *init_filename) { argv_init[0] = init_filename; - return do_execve(init_filename, + return do_execve(getname_kernel(init_filename), (const char __user *const __user *)argv_init, (const char __user *const __user *)envp_init); } +static int try_to_run_init_process(const char *init_filename) +{ + int ret; + + ret = run_init_process(init_filename); + + if (ret && ret != -ENOENT) { + pr_err("Starting init: %s exists but couldn't execute it (error %d)\n", + init_filename, ret); + } + + return ret; +} + static noinline void __init kernel_init_freeable(void); static int __ref kernel_init(void *unused) { + int ret; + kernel_init_freeable(); /* need to finish all async __init code before freeing the memory */ async_synchronize_full(); @@ -821,9 +942,11 @@ static int __ref kernel_init(void *unused) flush_delayed_fput(); if (ramdisk_execute_command) { - if (!run_init_process(ramdisk_execute_command)) + ret = run_init_process(ramdisk_execute_command); + if (!ret) return 0; - pr_err("Failed to execute %s\n", ramdisk_execute_command); + pr_err("Failed to execute %s (error %d)\n", + ramdisk_execute_command, ret); } /* @@ -833,18 +956,19 @@ static int __ref kernel_init(void *unused) * trying to recover a really broken machine. */ if (execute_command) { - if (!run_init_process(execute_command)) + ret = run_init_process(execute_command); + if (!ret) return 0; - pr_err("Failed to execute %s. Attempting defaults...\n", - execute_command); + pr_err("Failed to execute %s (error %d). Attempting defaults...\n", + execute_command, ret); } - if (!run_init_process("/sbin/init") || - !run_init_process("/etc/init") || - !run_init_process("/bin/init") || - !run_init_process("/bin/sh")) + if (!try_to_run_init_process("/sbin/init") || + !try_to_run_init_process("/etc/init") || + !try_to_run_init_process("/bin/init") || + !try_to_run_init_process("/bin/sh")) return 0; - panic("No init found. Try passing init= option to kernel. " + panic("No working init found. Try passing init= option to kernel. " "See Linux Documentation/init.txt for guidance."); } |
