aboutsummaryrefslogtreecommitdiff
path: root/init
diff options
context:
space:
mode:
Diffstat (limited to 'init')
-rw-r--r--init/Kconfig585
-rw-r--r--init/calibrate.c13
-rw-r--r--init/do_mounts.c114
-rw-r--r--init/do_mounts_initrd.c15
-rw-r--r--init/do_mounts_rd.c14
-rw-r--r--init/init_task.c2
-rw-r--r--init/initramfs.c11
-rw-r--r--init/main.c281
-rw-r--r--init/version.c2
9 files changed, 750 insertions, 287 deletions
diff --git a/init/Kconfig b/init/Kconfig
index 6fdd6e33932..9d76b99af1b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -20,49 +20,14 @@ config CONSTRUCTORS
bool
depends on !UML
-config HAVE_IRQ_WORK
- bool
-
config IRQ_WORK
bool
- depends on HAVE_IRQ_WORK
config BUILDTIME_EXTABLE_SORT
bool
menu "General setup"
-config EXPERIMENTAL
- bool "Prompt for development and/or incomplete code/drivers"
- ---help---
- Some of the various things that Linux supports (such as network
- drivers, file systems, network protocols, etc.) can be in a state
- of development where the functionality, stability, or the level of
- testing is not yet high enough for general use. This is usually
- known as the "alpha-test" phase among developers. If a feature is
- currently in alpha-test, then the developers usually discourage
- uninformed widespread use of this feature by the general public to
- avoid "Why doesn't this work?" type mail messages. However, active
- testing and use of these systems is welcomed. Just be aware that it
- may not meet the normal level of reliability or it may fail to work
- in some special cases. Detailed bug reports from people familiar
- with the kernel internals are usually welcomed by the developers
- (before submitting bug reports, please read the documents
- <file:README>, <file:MAINTAINERS>, <file:REPORTING-BUGS>,
- <file:Documentation/BUG-HUNTING>, and
- <file:Documentation/oops-tracing.txt> in the kernel source).
-
- This option will also make obsoleted drivers available. These are
- drivers that have been replaced by something else, and/or are
- scheduled to be removed in a future kernel release.
-
- Unless you intend to help test and develop a feature or driver that
- falls into this category, or you have a situation that requires
- using these features, you should probably say N here, which will
- cause the configurator to present you with fewer choices. If
- you say Y here, you will be offered the choice of using features or
- drivers that are currently considered to be in the alpha-test phase.
-
config BROKEN
bool
@@ -88,6 +53,20 @@ config CROSS_COMPILE
need to set this unless you want the configured kernel build
directory to select the cross-compiler automatically.
+config COMPILE_TEST
+ bool "Compile also drivers which will not load"
+ default n
+ help
+ Some drivers can be compiled on a different platform than they are
+ intended to be run on. Despite they cannot be loaded there (or even
+ when they load they cannot be used due to missing HW support),
+ developers still, opposing to distributors, might want to build such
+ drivers to compile-test them.
+
+ If you are a developer and want to build everything available, say Y
+ here. If you are a user/distributor, say N here to exclude useless
+ drivers to be distributed.
+
config LOCALVERSION
string "Local version - append to kernel release"
help
@@ -133,10 +112,13 @@ config HAVE_KERNEL_XZ
config HAVE_KERNEL_LZO
bool
+config HAVE_KERNEL_LZ4
+ bool
+
choice
prompt "Kernel compression mode"
default KERNEL_GZIP
- depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO
+ depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO || HAVE_KERNEL_LZ4
help
The linux kernel is a kind of self-extracting executable.
Several compression algorithms are available, which differ
@@ -203,6 +185,18 @@ config KERNEL_LZO
size is about 10% bigger than gzip; however its speed
(both compression and decompression) is the fastest.
+config KERNEL_LZ4
+ bool "LZ4"
+ depends on HAVE_KERNEL_LZ4
+ help
+ LZ4 is an LZ77-type compressor with a fixed, byte-oriented encoding.
+ A preliminary version of LZ4 de/compression tool is available at
+ <https://code.google.com/p/lz4/>.
+
+ Its compression ratio is worse than LZO. The size of the kernel
+ is about 8% bigger than LZO. But the decompression speed is
+ faster than LZO.
+
endchoice
config DEFAULT_HOSTNAME
@@ -247,7 +241,7 @@ config SYSVIPC_SYSCTL
config POSIX_MQUEUE
bool "POSIX Message Queues"
- depends on NET && EXPERIMENTAL
+ depends on NET
---help---
POSIX variant of message queues is a part of IPC. In POSIX message
queues every message has a priority which decides about succession
@@ -267,6 +261,16 @@ config POSIX_MQUEUE_SYSCTL
depends on SYSCTL
default y
+config CROSS_MEMORY_ATTACH
+ bool "Enable process_vm_readv/writev syscalls"
+ depends on MMU
+ default y
+ help
+ Enabling this option adds the system calls process_vm_readv and
+ process_vm_writev which allow a process with the correct privileges
+ to directly read from or write to to another process's address space.
+ See the man page for more details.
+
config FHANDLE
bool "open by fhandle syscalls"
select EXPORTFS
@@ -279,6 +283,16 @@ config FHANDLE
get renamed. Enables open_by_handle_at(2) and name_to_handle_at(2)
syscalls.
+config USELIB
+ bool "uselib syscall"
+ default y
+ help
+ This option enables the uselib syscall, a system call used in the
+ dynamic linker from libc5 and earlier. glibc does not use this
+ system call. If you intend to run programs built on libc5 or
+ earlier, you may need to enable this syscall. Current systems
+ running glibc can safely disable this.
+
config AUDIT
bool "Auditing support"
depends on NET
@@ -288,9 +302,12 @@ config AUDIT
logging of avc messages output). Does not do system-call
auditing without CONFIG_AUDITSYSCALL.
+config HAVE_ARCH_AUDITSYSCALL
+ bool
+
config AUDITSYSCALL
bool "Enable system-call auditing support"
- depends on AUDIT && (X86 || PPC || S390 || IA64 || UML || SPARC64 || SUPERH || (ARM && AEABI && !OABI_COMPAT))
+ depends on AUDIT && HAVE_ARCH_AUDITSYSCALL
default y if SECURITY_SELINUX
help
Enable low-overhead system-call auditing infrastructure that
@@ -307,34 +324,23 @@ config AUDIT_TREE
depends on AUDITSYSCALL
select FSNOTIFY
-config AUDIT_LOGINUID_IMMUTABLE
- bool "Make audit loginuid immutable"
- depends on AUDIT
- help
- The config option toggles if a task setting its loginuid requires
- CAP_SYS_AUDITCONTROL or if that task should require no special permissions
- but should instead only allow setting its loginuid if it was never
- previously set. On systems which use systemd or a similar central
- process to restart login services this should be set to true. On older
- systems in which an admin would typically have to directly stop and
- start processes this should be set to false. Setting this to true allows
- one to drop potentially dangerous capabilites from the login tasks,
- but may not be backwards compatible with older init systems.
-
source "kernel/irq/Kconfig"
source "kernel/time/Kconfig"
menu "CPU/Task time and stats accounting"
+config VIRT_CPU_ACCOUNTING
+ bool
+
choice
prompt "Cputime accounting"
default TICK_CPU_ACCOUNTING if !PPC64
- default VIRT_CPU_ACCOUNTING if PPC64
+ default VIRT_CPU_ACCOUNTING_NATIVE if PPC64
# Kind of a stub config for the pure tick based cputime accounting
config TICK_CPU_ACCOUNTING
bool "Simple tick based cputime accounting"
- depends on !S390
+ depends on !S390 && !NO_HZ_FULL
help
This is the basic tick based cputime accounting that maintains
statistics about user, system and idle time spent on per jiffies
@@ -342,9 +348,10 @@ config TICK_CPU_ACCOUNTING
If unsure, say Y.
-config VIRT_CPU_ACCOUNTING
+config VIRT_CPU_ACCOUNTING_NATIVE
bool "Deterministic task and CPU time accounting"
- depends on HAVE_VIRT_CPU_ACCOUNTING
+ depends on HAVE_VIRT_CPU_ACCOUNTING && !NO_HZ_FULL
+ select VIRT_CPU_ACCOUNTING
help
Select this option to enable more accurate task and CPU time
accounting. This is done by reading a CPU counter on each
@@ -354,9 +361,27 @@ config VIRT_CPU_ACCOUNTING
this also enables accounting of stolen time on logically-partitioned
systems.
+config VIRT_CPU_ACCOUNTING_GEN
+ bool "Full dynticks CPU time accounting"
+ depends on HAVE_CONTEXT_TRACKING
+ depends on HAVE_VIRT_CPU_ACCOUNTING_GEN
+ select VIRT_CPU_ACCOUNTING
+ select CONTEXT_TRACKING
+ help
+ Select this option to enable task and CPU time accounting on full
+ dynticks systems. This accounting is implemented by watching every
+ kernel-user boundaries using the context tracking subsystem.
+ The accounting is thus performed at the expense of some significant
+ overhead.
+
+ For now this is only useful if you are working on the full
+ dynticks subsystem development.
+
+ If unsure, say N.
+
config IRQ_TIME_ACCOUNTING
bool "Fine granularity task level IRQ time accounting"
- depends on HAVE_IRQ_TIME_ACCOUNTING
+ depends on HAVE_IRQ_TIME_ACCOUNTING && !NO_HZ_FULL
help
Select this option to enable fine granularity task irq time
accounting. This is done by reading a timestamp on each
@@ -393,7 +418,7 @@ config BSD_PROCESS_ACCT_V3
at <http://www.gnu.org/software/acct/>.
config TASKSTATS
- bool "Export task/process statistics through netlink (EXPERIMENTAL)"
+ bool "Export task/process statistics through netlink"
depends on NET
default n
help
@@ -406,7 +431,7 @@ config TASKSTATS
Say N if unsure.
config TASK_DELAY_ACCT
- bool "Enable per-task delay accounting (EXPERIMENTAL)"
+ bool "Enable per-task delay accounting"
depends on TASKSTATS
help
Collect information on time spent by a task waiting for system
@@ -417,7 +442,7 @@ config TASK_DELAY_ACCT
Say N if unsure.
config TASK_XACCT
- bool "Enable extended accounting over taskstats (EXPERIMENTAL)"
+ bool "Enable extended accounting over taskstats"
depends on TASKSTATS
help
Collect extended task accounting data and send the data
@@ -426,7 +451,7 @@ config TASK_XACCT
Say N if unsure.
config TASK_IO_ACCOUNTING
- bool "Enable per-task storage I/O accounting (EXPERIMENTAL)"
+ bool "Enable per-task storage I/O accounting"
depends on TASK_XACCT
help
Collect information on the number of bytes of storage I/O which this
@@ -445,6 +470,7 @@ choice
config TREE_RCU
bool "Tree-based hierarchical RCU"
depends on !PREEMPT && SMP
+ select IRQ_WORK
help
This option selects the RCU implementation that is
designed for very large SMP system with hundreds or
@@ -453,7 +479,8 @@ config TREE_RCU
config TREE_PREEMPT_RCU
bool "Preemptible tree-based hierarchical RCU"
- depends on PREEMPT && SMP
+ depends on PREEMPT
+ select IRQ_WORK
help
This option selects the RCU implementation that is
designed for very large SMP systems with hundreds or
@@ -461,6 +488,8 @@ config TREE_PREEMPT_RCU
is also required. It also scales down nicely to
smaller systems.
+ Select this option if you are unsure.
+
config TINY_RCU
bool "UP-only small-memory-footprint RCU"
depends on !PREEMPT && !SMP
@@ -470,51 +499,68 @@ config TINY_RCU
is not required. This option greatly reduces the
memory footprint of RCU.
-config TINY_PREEMPT_RCU
- bool "Preemptible UP-only small-memory-footprint RCU"
- depends on PREEMPT && !SMP
- help
- This option selects the RCU implementation that is designed
- for real-time UP systems. This option greatly reduces the
- memory footprint of RCU.
-
endchoice
config PREEMPT_RCU
- def_bool ( TREE_PREEMPT_RCU || TINY_PREEMPT_RCU )
+ def_bool TREE_PREEMPT_RCU
help
This option enables preemptible-RCU code that is common between
the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations.
+config RCU_STALL_COMMON
+ def_bool ( TREE_RCU || TREE_PREEMPT_RCU || RCU_TRACE )
+ help
+ This option enables RCU CPU stall code that is common between
+ the TINY and TREE variants of RCU. The purpose is to allow
+ the tiny variants to disable RCU CPU stall warnings, while
+ making these warnings mandatory for the tree variants.
+
+config CONTEXT_TRACKING
+ bool
+
config RCU_USER_QS
bool "Consider userspace as in RCU extended quiescent state"
- depends on HAVE_RCU_USER_QS && SMP
+ depends on HAVE_CONTEXT_TRACKING && SMP
+ select CONTEXT_TRACKING
help
This option sets hooks on kernel / userspace boundaries and
puts RCU in extended quiescent state when the CPU runs in
userspace. It means that when a CPU runs in userspace, it is
excluded from the global RCU state machine and thus doesn't
- to keep the timer tick on for RCU.
+ try to keep the timer tick on for RCU.
Unless you want to hack and help the development of the full
- tickless feature, you shouldn't enable this option. It adds
- unnecessary overhead.
+ dynticks mode, you shouldn't enable this option. It also
+ adds unnecessary overhead.
If unsure say N
-config RCU_USER_QS_FORCE
- bool "Force userspace extended QS by default"
- depends on RCU_USER_QS
+config CONTEXT_TRACKING_FORCE
+ bool "Force context tracking"
+ depends on CONTEXT_TRACKING
+ default y if !NO_HZ_FULL
help
- Set the hooks in user/kernel boundaries by default in order to
- test this feature that treats userspace as an extended quiescent
- state until we have a real user like a full adaptive nohz option.
+ The major pre-requirement for full dynticks to work is to
+ support the context tracking subsystem. But there are also
+ other dependencies to provide in order to make the full
+ dynticks working.
- Unless you want to hack and help the development of the full
- tickless feature, you shouldn't enable this option. It adds
- unnecessary overhead.
+ This option stands for testing when an arch implements the
+ context tracking backend but doesn't yet fullfill all the
+ requirements to make the full dynticks feature working.
+ Without the full dynticks, there is no way to test the support
+ for context tracking and the subsystems that rely on it: RCU
+ userspace extended quiescent state and tickless cputime
+ accounting. This option copes with the absence of the full
+ dynticks subsystem by forcing the context tracking on all
+ CPUs in the system.
+
+ Say Y only if you're working on the development of an
+ architecture backend for the context tracking.
+
+ Say N otherwise, this option brings an overhead that you
+ don't want in production.
- If unsure say N
config RCU_FANOUT
int "Tree-based hierarchical RCU fanout value"
@@ -579,17 +625,19 @@ config RCU_FANOUT_EXACT
config RCU_FAST_NO_HZ
bool "Accelerate last non-dyntick-idle CPU's grace periods"
- depends on NO_HZ && SMP
+ depends on NO_HZ_COMMON && SMP
default n
help
- This option causes RCU to attempt to accelerate grace periods
- in order to allow CPUs to enter dynticks-idle state more
- quickly. On the other hand, this option increases the overhead
- of the dynticks-idle checking, particularly on systems with
- large numbers of CPUs.
+ This option permits CPUs to enter dynticks-idle state even if
+ they have RCU callbacks queued, and prevents RCU from waking
+ these CPUs up more than roughly once every four jiffies (by
+ default, you can adjust this using the rcutree.rcu_idle_gp_delay
+ parameter), thus improving energy efficiency. On the other
+ hand, this option increases the duration of RCU grace periods,
+ for example, slowing down synchronize_rcu().
- Say Y if energy efficiency is critically important, particularly
- if you have relatively few CPUs.
+ Say Y if energy efficiency is critically important, and you
+ don't care about increased grace-period durations.
Say N if you are unsure.
@@ -655,6 +703,84 @@ config RCU_BOOST_DELAY
Accept the default if unsure.
+config RCU_NOCB_CPU
+ bool "Offload RCU callback processing from boot-selected CPUs"
+ depends on TREE_RCU || TREE_PREEMPT_RCU
+ default n
+ help
+ Use this option to reduce OS jitter for aggressive HPC or
+ real-time workloads. It can also be used to offload RCU
+ callback invocation to energy-efficient CPUs in battery-powered
+ asymmetric multiprocessors.
+
+ This option offloads callback invocation from the set of
+ CPUs specified at boot time by the rcu_nocbs parameter.
+ For each such CPU, a kthread ("rcuox/N") will be created to
+ invoke callbacks, where the "N" is the CPU being offloaded,
+ and where the "x" is "b" for RCU-bh, "p" for RCU-preempt, and
+ "s" for RCU-sched. Nothing prevents this kthread from running
+ on the specified CPUs, but (1) the kthreads may be preempted
+ between each callback, and (2) affinity or cgroups can be used
+ to force the kthreads to run on whatever set of CPUs is desired.
+
+ Say Y here if you want to help to debug reduced OS jitter.
+ Say N here if you are unsure.
+
+choice
+ prompt "Build-forced no-CBs CPUs"
+ default RCU_NOCB_CPU_NONE
+ help
+ This option allows no-CBs CPUs (whose RCU callbacks are invoked
+ from kthreads rather than from softirq context) to be specified
+ at build time. Additional no-CBs CPUs may be specified by
+ the rcu_nocbs= boot parameter.
+
+config RCU_NOCB_CPU_NONE
+ bool "No build_forced no-CBs CPUs"
+ depends on RCU_NOCB_CPU && !NO_HZ_FULL
+ help
+ This option does not force any of the CPUs to be no-CBs CPUs.
+ Only CPUs designated by the rcu_nocbs= boot parameter will be
+ no-CBs CPUs, whose RCU callbacks will be invoked by per-CPU
+ kthreads whose names begin with "rcuo". All other CPUs will
+ invoke their own RCU callbacks in softirq context.
+
+ Select this option if you want to choose no-CBs CPUs at
+ boot time, for example, to allow testing of different no-CBs
+ configurations without having to rebuild the kernel each time.
+
+config RCU_NOCB_CPU_ZERO
+ bool "CPU 0 is a build_forced no-CBs CPU"
+ depends on RCU_NOCB_CPU && !NO_HZ_FULL
+ help
+ This option forces CPU 0 to be a no-CBs CPU, so that its RCU
+ callbacks are invoked by a per-CPU kthread whose name begins
+ with "rcuo". Additional CPUs may be designated as no-CBs
+ CPUs using the rcu_nocbs= boot parameter will be no-CBs CPUs.
+ All other CPUs will invoke their own RCU callbacks in softirq
+ context.
+
+ Select this if CPU 0 needs to be a no-CBs CPU for real-time
+ or energy-efficiency reasons, but the real reason it exists
+ is to ensure that randconfig testing covers mixed systems.
+
+config RCU_NOCB_CPU_ALL
+ bool "All CPUs are build_forced no-CBs CPUs"
+ depends on RCU_NOCB_CPU
+ help
+ This option forces all CPUs to be no-CBs CPUs. The rcu_nocbs=
+ boot parameter will be ignored. All CPUs' RCU callbacks will
+ be executed in the context of per-CPU rcuo kthreads created for
+ this purpose. Assuming that the kthreads whose names start with
+ "rcuo" are bound to "housekeeping" CPUs, this reduces OS jitter
+ on the remaining CPUs, but might decrease memory locality during
+ RCU-callback invocation, thus potentially degrading throughput.
+
+ Select this if all CPUs need to be no-CBs CPUs for real-time
+ or energy-efficiency reasons.
+
+endchoice
+
endmenu # "RCU Subsystem"
config IKCONFIG
@@ -696,9 +822,62 @@ config LOG_BUF_SHIFT
config HAVE_UNSTABLE_SCHED_CLOCK
bool
+config GENERIC_SCHED_CLOCK
+ bool
+
+#
+# For architectures that want to enable the support for NUMA-affine scheduler
+# balancing logic:
+#
+config ARCH_SUPPORTS_NUMA_BALANCING
+ bool
+
+#
+# For architectures that know their GCC __int128 support is sound
+#
+config ARCH_SUPPORTS_INT128
+ bool
+
+# For architectures that (ab)use NUMA to represent different memory regions
+# all cpu-local but of different latencies, such as SuperH.
+#
+config ARCH_WANT_NUMA_VARIABLE_LOCALITY
+ bool
+
+#
+# For architectures that are willing to define _PAGE_NUMA as _PAGE_PROTNONE
+config ARCH_WANTS_PROT_NUMA_PROT_NONE
+ bool
+
+config ARCH_USES_NUMA_PROT_NONE
+ bool
+ default y
+ depends on ARCH_WANTS_PROT_NUMA_PROT_NONE
+ depends on NUMA_BALANCING
+
+config NUMA_BALANCING_DEFAULT_ENABLED
+ bool "Automatically enable NUMA aware memory/task placement"
+ default y
+ depends on NUMA_BALANCING
+ help
+ If set, automatic NUMA balancing will be enabled if running on a NUMA
+ machine.
+
+config NUMA_BALANCING
+ bool "Memory placement aware NUMA scheduler"
+ depends on ARCH_SUPPORTS_NUMA_BALANCING
+ depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
+ depends on SMP && NUMA && MIGRATION
+ help
+ This option adds support for automatic NUMA aware memory/task placement.
+ The mechanism is quite primitive and is based on migrating memory when
+ it has references to the node the task is running on.
+
+ This system will be inactive on UMA systems.
+
menuconfig CGROUPS
boolean "Control Group support"
- depends on EVENTFD
+ select KERNFS
help
This option adds support for grouping sets of processes together, for
use with process control subsystems such as Cpusets, CFS, memory
@@ -764,14 +943,14 @@ config RESOURCE_COUNTERS
config MEMCG
bool "Memory Resource Controller for Control Groups"
depends on RESOURCE_COUNTERS
- select MM_OWNER
+ select EVENTFD
help
Provides a memory resource controller that manages both anonymous
memory and page cache. (See Documentation/cgroups/memory.txt)
Note that setting this option increases fixed memory overhead
associated with each page of memory in the system. By this,
- 20(40)bytes/PAGE_SIZE on 32(64)bit system will be occupied by memory
+ 8(16)bytes/PAGE_SIZE on 32(64)bit system will be occupied by memory
usage tracking struct at boot. Total amount of this is printed out
at boot.
@@ -781,9 +960,6 @@ config MEMCG
disable memory resource controller and you can avoid overheads.
(and lose benefits of memory resource controller)
- This config option also selects MM_OWNER config option, which
- could in turn add some fork/exit overhead.
-
config MEMCG_SWAP
bool "Memory Resource Controller Swap Extension"
depends on MEMCG && SWAP
@@ -809,15 +985,15 @@ config MEMCG_SWAP_ENABLED
Memory Resource Controller Swap Extension comes with its price in
a bigger memory consumption. General purpose distribution kernels
which want to enable the feature but keep it disabled by default
- and let the user enable it by swapaccount boot command line
+ and let the user enable it by swapaccount=1 boot command line
parameter should have this option unselected.
For those who want to have the feature enabled by default should
select this option (if, for some reason, they need to disable it
then swapaccount=0 does the trick).
config MEMCG_KMEM
- bool "Memory Resource Controller Kernel Memory accounting (EXPERIMENTAL)"
- depends on MEMCG && EXPERIMENTAL
- default n
+ bool "Memory Resource Controller Kernel Memory accounting"
+ depends on MEMCG
+ depends on SLUB || SLAB
help
The Kernel Memory extension for Memory Resource Controller can limit
the amount of memory used by kernel objects in the system. Those are
@@ -826,9 +1002,15 @@ config MEMCG_KMEM
the kmem extension can use it to guarantee that no group of processes
will ever exhaust kernel resources alone.
+ WARNING: Current implementation lacks reclaim support. That means
+ allocation attempts will fail when close to the limit even if there
+ are plenty of kmem available for reclaim. That makes this option
+ unusable in real life so DO NOT SELECT IT unless for development
+ purposes.
+
config CGROUP_HUGETLB
bool "HugeTLB Resource Controller for Control Groups"
- depends on RESOURCE_COUNTERS && HUGETLB_PAGE && EXPERIMENTAL
+ depends on RESOURCE_COUNTERS && HUGETLB_PAGE
default n
help
Provides a cgroup Resource Controller for HugeTLB pages.
@@ -867,7 +1049,6 @@ config FAIR_GROUP_SCHED
config CFS_BANDWIDTH
bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED"
- depends on EXPERIMENTAL
depends on FAIR_GROUP_SCHED
default n
help
@@ -879,7 +1060,6 @@ config CFS_BANDWIDTH
config RT_GROUP_SCHED
bool "Group scheduling for SCHED_RR/FIFO"
- depends on EXPERIMENTAL
depends on CGROUP_SCHED
default n
help
@@ -961,15 +1141,18 @@ config IPC_NS
different IPC objects in different namespaces.
config USER_NS
- bool "User namespace (EXPERIMENTAL)"
- depends on EXPERIMENTAL
- depends on UIDGID_CONVERTED
- select UIDGID_STRICT_TYPE_CHECKS
-
+ bool "User namespace"
default n
help
This allows containers, i.e. vservers, to use user namespaces
to provide different user info for different servers.
+
+ When user namespaces are enabled in the kernel it is
+ recommended that the MEMCG and MEMCG_KMEM options also be
+ enabled and that user-space use the memory control groups to
+ limit the amount of memory a memory unprivileged users can
+ use.
+
If unsure, say N.
config PID_NS
@@ -990,45 +1173,8 @@ config NET_NS
endif # NAMESPACES
-config UIDGID_CONVERTED
- # True if all of the selected software conmponents are known
- # to have uid_t and gid_t converted to kuid_t and kgid_t
- # where appropriate and are otherwise safe to use with
- # the user namespace.
- bool
- default y
-
- # Networking
- depends on NET_9P = n
-
- # Filesystems
- depends on 9P_FS = n
- depends on AFS_FS = n
- depends on AUTOFS4_FS = n
- depends on CEPH_FS = n
- depends on CIFS = n
- depends on CODA_FS = n
- depends on FUSE_FS = n
- depends on GFS2_FS = n
- depends on NCP_FS = n
- depends on NFSD = n
- depends on NFS_FS = n
- depends on OCFS2_FS = n
- depends on XFS_FS = n
-
-config UIDGID_STRICT_TYPE_CHECKS
- bool "Require conversions between uid/gids and their internal representation"
- depends on UIDGID_CONVERTED
- default n
- help
- While the nececessary conversions are being added to all subsystems this option allows
- the code to continue to build for unconverted subsystems.
-
- Say Y here if you want the strict type checking enabled
-
config SCHED_AUTOGROUP
bool "Automatic process group scheduling"
- select EVENTFD
select CGROUPS
select CGROUP_SCHED
select FAIR_GROUP_SCHED
@@ -1039,9 +1185,6 @@ config SCHED_AUTOGROUP
desktop applications. Task group autogeneration is currently based
upon task session.
-config MM_OWNER
- bool
-
config SYSFS_DEPRECATED
bool "Enable deprecated sysfs features to support old userspace tools"
depends on SYSFS
@@ -1119,7 +1262,7 @@ config CC_OPTIMIZE_FOR_SIZE
Enabling this option will pass "-Os" instead of "-O2" to gcc
resulting in a smaller kernel.
- If unsure, say Y.
+ If unsure, say N.
config SYSCTL
bool
@@ -1127,6 +1270,32 @@ config SYSCTL
config ANON_INODES
bool
+config HAVE_UID16
+ bool
+
+config SYSCTL_EXCEPTION_TRACE
+ bool
+ help
+ Enable support for /proc/sys/debug/exception-trace.
+
+config SYSCTL_ARCH_UNALIGN_NO_WARN
+ bool
+ help
+ Enable support for /proc/sys/kernel/ignore-unaligned-usertrap
+ Allows arch to define/use @no_unaligned_warning to possibly warn
+ about unaligned access emulation going on under the hood.
+
+config SYSCTL_ARCH_UNALIGN_ALLOW
+ bool
+ help
+ Enable support for /proc/sys/kernel/unaligned-trap
+ Allows arches to define/use @unaligned_enabled to runtime toggle
+ the unaligned access emulation.
+ see arch/parisc/kernel/unaligned.c for reference
+
+config HAVE_PCSPKR_PLATFORM
+ bool
+
menuconfig EXPERT
bool "Configure standard kernel features (expert users)"
# Unhide debug options, to make the on-by-default options visible
@@ -1137,9 +1306,6 @@ menuconfig EXPERT
environments which can tolerate a "non-standard" kernel.
Only use this if you really know what you are doing.
-config HAVE_UID16
- bool
-
config UID16
bool "Enable 16-bit UID system calls" if EXPERT
depends on HAVE_UID16
@@ -1147,6 +1313,26 @@ config UID16
help
This enables the legacy 16-bit UID syscall wrappers.
+config SGETMASK_SYSCALL
+ bool "sgetmask/ssetmask syscalls support" if EXPERT
+ def_bool PARISC || MN10300 || BLACKFIN || M68K || PPC || MIPS || X86 || SPARC || CRIS || MICROBLAZE || SUPERH
+ ---help---
+ sys_sgetmask and sys_ssetmask are obsolete system calls
+ no longer supported in libc but still enabled by default in some
+ architectures.
+
+ If unsure, leave the default option here.
+
+config SYSFS_SYSCALL
+ bool "Sysfs syscall support" if EXPERT
+ default y
+ ---help---
+ sys_sysfs is an obsolete system call no longer supported in libc.
+ Note that disabling this option is more secure but might break
+ compatibility with some systems.
+
+ If unsure say Y here.
+
config SYSCTL_SYSCALL
bool "Sysctl syscall support" if EXPERT
depends on PROC_SYSCTL
@@ -1164,11 +1350,6 @@ config SYSCTL_SYSCALL
If unsure say N here.
-config SYSCTL_EXCEPTION_TRACE
- bool
- help
- Enable support for /proc/sys/debug/exception-trace.
-
config KALLSYMS
bool "Load all symbols for debugging/ksymoops" if EXPERT
default y
@@ -1194,12 +1375,10 @@ config KALLSYMS_ALL
Say N unless you really need all symbols.
-config HOTPLUG
- def_bool y
-
config PRINTK
default y
bool "Enable support for printk" if EXPERT
+ select IRQ_WORK
help
This option enables normal printk support. Removing it
eliminates most of the message strings from the kernel image
@@ -1234,9 +1413,6 @@ config PCSPKR_PLATFORM
This option allows to disable the internal PC-Speaker
support, saving some memory.
-config HAVE_PCSPKR_PLATFORM
- bool
-
config BASE_FULL
default y
bool "Enable full-sized data structures for core" if EXPERT
@@ -1254,6 +1430,13 @@ config FUTEX
support for "fast userspace mutexes". The resulting kernel may not
run glibc-based applications correctly.
+config HAVE_FUTEX_CMPXCHG
+ bool
+ help
+ Architectures should select this if futex_atomic_cmpxchg_inatomic()
+ is implemented and always working. This removes a couple of runtime
+ checks.
+
config EPOLL
bool "Enable eventpoll support" if EXPERT
default y
@@ -1308,11 +1491,21 @@ config AIO
default y
help
This option enables POSIX asynchronous I/O which may by used
- by some high performance threaded applications. Disabling
- this option saves about 7k.
+ by some high performance threaded applications. Disabling
+ this option saves about 7k.
+
+config PCI_QUIRKS
+ default y
+ bool "Enable PCI quirk workarounds" if EXPERT
+ depends on PCI
+ help
+ This enables workarounds for various PCI chipset
+ bugs/quirks. Disable this only if your target machine is
+ unaffected by PCI quirks.
config EMBEDDED
bool "Embedded system"
+ option allnoconfig_y
select EXPERT
help
This option should be enabled if compiling the kernel for
@@ -1384,15 +1577,6 @@ config VM_EVENT_COUNTERS
on EXPERT systems. /proc/vmstat will only show page counts
if VM event counters are disabled.
-config PCI_QUIRKS
- default y
- bool "Enable PCI quirk workarounds" if EXPERT
- depends on PCI
- help
- This enables workarounds for various PCI chipset
- bugs/quirks. Disable this only if your target machine is
- unaffected by PCI quirks.
-
config SLUB_DEBUG
default y
bool "Enable SLUB debugging support" if EXPERT
@@ -1448,6 +1632,17 @@ config SLOB
endchoice
+config SLUB_CPU_PARTIAL
+ default y
+ depends on SLUB && SMP
+ bool "SLUB per cpu partial cache"
+ help
+ Per cpu partial caches accellerate objects allocation and freeing
+ that is local to a processor at the price of more indeterminism
+ in the latency of the free. On overflow these caches will be cleared
+ which requires the taking of locks that may cause latency spikes.
+ Typically one would choose no for a realtime system.
+
config MMAP_ALLOW_UNINITIALIZED
bool "Allow mmapped anonymous memory to be uninitialized"
depends on EXPERT && !MMU
@@ -1470,6 +1665,18 @@ config MMAP_ALLOW_UNINITIALIZED
See Documentation/nommu-mmap.txt for more information.
+config SYSTEM_TRUSTED_KEYRING
+ bool "Provide system-wide ring of trusted keys"
+ depends on KEYS
+ help
+ Provide a system keyring to which trusted keys can be added. Keys in
+ the keyring are considered to be trusted. Keys may be added at will
+ by the kernel from compiled-in data and from hardware key stores, but
+ userspace may only add extra keys if those keys can be verified by
+ keys already in the keyring.
+
+ Keys in this keyring are used by module signature checking.
+
config PROFILING
bool "Profiling support"
help
@@ -1507,6 +1714,7 @@ config BASE_SMALL
menuconfig MODULES
bool "Enable loadable module support"
+ option modules
help
Kernel modules are small pieces of compiled code which can
be inserted in the running kernel, rather than being
@@ -1545,7 +1753,7 @@ config MODULE_UNLOAD
config MODULE_FORCE_UNLOAD
bool "Forced module unloading"
- depends on MODULE_UNLOAD && EXPERIMENTAL
+ depends on MODULE_UNLOAD
help
This option allows you to force a module to unload, even if the
kernel believes it is unsafe: the kernel will remove the module
@@ -1577,6 +1785,7 @@ config MODULE_SRCVERSION_ALL
config MODULE_SIG
bool "Module signature verification"
depends on MODULES
+ select SYSTEM_TRUSTED_KEYRING
select KEYS
select CRYPTO
select ASYMMETRIC_KEY_TYPE
@@ -1602,6 +1811,17 @@ config MODULE_SIG_FORCE
Reject unsigned modules or signed modules for which we don't have a
key. Without this, such modules will simply taint the kernel.
+config MODULE_SIG_ALL
+ bool "Automatically sign all modules"
+ default y
+ depends on MODULE_SIG
+ help
+ Sign all modules during make modules_install. Without this option,
+ modules must be signed manually, using the scripts/sign-file tool.
+
+comment "Do not forget to sign required modules with scripts/sign-file"
+ depends on MODULE_SIG_FORCE && !MODULE_SIG_ALL
+
choice
prompt "Which hash algorithm should modules be signed with?"
depends on MODULE_SIG
@@ -1634,6 +1854,15 @@ config MODULE_SIG_SHA512
endchoice
+config MODULE_SIG_HASH
+ string
+ depends on MODULE_SIG
+ default "sha1" if MODULE_SIG_SHA1
+ default "sha224" if MODULE_SIG_SHA224
+ default "sha256" if MODULE_SIG_SHA256
+ default "sha384" if MODULE_SIG_SHA384
+ default "sha512" if MODULE_SIG_SHA512
+
endif # MODULES
config INIT_ALL_POSSIBLE
diff --git a/init/calibrate.c b/init/calibrate.c
index fda0a7b0f06..520702db9ac 100644
--- a/init/calibrate.c
+++ b/init/calibrate.c
@@ -31,7 +31,7 @@ __setup("lpj=", lpj_setup);
#define DELAY_CALIBRATION_TICKS ((HZ < 100) ? 1 : (HZ/100))
#define MAX_DIRECT_CALIBRATION_RETRIES 5
-static unsigned long __cpuinit calibrate_delay_direct(void)
+static unsigned long calibrate_delay_direct(void)
{
unsigned long pre_start, start, post_start;
unsigned long pre_end, end, post_end;
@@ -166,7 +166,10 @@ static unsigned long __cpuinit calibrate_delay_direct(void)
return 0;
}
#else
-static unsigned long __cpuinit calibrate_delay_direct(void) {return 0;}
+static unsigned long calibrate_delay_direct(void)
+{
+ return 0;
+}
#endif
/*
@@ -180,7 +183,7 @@ static unsigned long __cpuinit calibrate_delay_direct(void) {return 0;}
*/
#define LPS_PREC 8
-static unsigned long __cpuinit calibrate_delay_converge(void)
+static unsigned long calibrate_delay_converge(void)
{
/* First stage - slowly accelerate to find initial bounds */
unsigned long lpj, lpj_base, ticks, loopadd, loopadd_base, chop_limit;
@@ -254,12 +257,12 @@ static DEFINE_PER_CPU(unsigned long, cpu_loops_per_jiffy) = { 0 };
* Architectures should override this function if a faster calibration
* method is available.
*/
-unsigned long __attribute__((weak)) __cpuinit calibrate_delay_is_known(void)
+unsigned long __attribute__((weak)) calibrate_delay_is_known(void)
{
return 0;
}
-void __cpuinit calibrate_delay(void)
+void calibrate_delay(void)
{
unsigned long lpj;
static bool printed;
diff --git a/init/do_mounts.c b/init/do_mounts.c
index f8a66424360..82f22885c87 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -26,6 +26,8 @@
#include <linux/async.h>
#include <linux/fs_struct.h>
#include <linux/slab.h>
+#include <linux/ramfs.h>
+#include <linux/shmem_fs.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_fs_sb.h>
@@ -69,23 +71,28 @@ __setup("ro", readonly);
__setup("rw", readwrite);
#ifdef CONFIG_BLOCK
+struct uuidcmp {
+ const char *uuid;
+ int len;
+};
+
/**
* match_dev_by_uuid - callback for finding a partition using its uuid
* @dev: device passed in by the caller
- * @data: opaque pointer to a 36 byte char array with a UUID
+ * @data: opaque pointer to the desired struct uuidcmp to match
*
* Returns 1 if the device matches, and 0 otherwise.
*/
-static int match_dev_by_uuid(struct device *dev, void *data)
+static int match_dev_by_uuid(struct device *dev, const void *data)
{
- u8 *uuid = data;
+ const struct uuidcmp *cmp = data;
struct hd_struct *part = dev_to_part(dev);
if (!part->info)
goto no_match;
- if (memcmp(uuid, part->info->uuid, sizeof(part->info->uuid)))
- goto no_match;
+ if (strncasecmp(cmp->uuid, part->info->uuid, cmp->len))
+ goto no_match;
return 1;
no_match:
@@ -95,49 +102,52 @@ no_match:
/**
* devt_from_partuuid - looks up the dev_t of a partition by its UUID
- * @uuid: min 36 byte char array containing a hex ascii UUID
+ * @uuid_str: char array containing ascii UUID
*
* The function will return the first partition which contains a matching
* UUID value in its partition_meta_info struct. This does not search
* by filesystem UUIDs.
*
- * If @uuid is followed by a "/PARTNROFF=%d", then the number will be
+ * If @uuid_str is followed by a "/PARTNROFF=%d", then the number will be
* extracted and used as an offset from the partition identified by the UUID.
*
* Returns the matching dev_t on success or 0 on failure.
*/
-static dev_t devt_from_partuuid(char *uuid_str)
+static dev_t devt_from_partuuid(const char *uuid_str)
{
dev_t res = 0;
+ struct uuidcmp cmp;
struct device *dev = NULL;
- u8 uuid[16];
struct gendisk *disk;
struct hd_struct *part;
int offset = 0;
+ bool clear_root_wait = false;
+ char *slash;
- if (strlen(uuid_str) < 36)
- goto done;
+ cmp.uuid = uuid_str;
+ slash = strchr(uuid_str, '/');
/* Check for optional partition number offset attributes. */
- if (uuid_str[36]) {
+ if (slash) {
char c = 0;
/* Explicitly fail on poor PARTUUID syntax. */
- if (sscanf(&uuid_str[36],
- "/PARTNROFF=%d%c", &offset, &c) != 1) {
- printk(KERN_ERR "VFS: PARTUUID= is invalid.\n"
- "Expected PARTUUID=<valid-uuid-id>[/PARTNROFF=%%d]\n");
- if (root_wait)
- printk(KERN_ERR
- "Disabling rootwait; root= is invalid.\n");
- root_wait = 0;
+ if (sscanf(slash + 1,
+ "PARTNROFF=%d%c", &offset, &c) != 1) {
+ clear_root_wait = true;
goto done;
}
+ cmp.len = slash - uuid_str;
+ } else {
+ cmp.len = strlen(uuid_str);
}
- /* Pack the requested UUID in the expected format. */
- part_pack_uuid(uuid_str, uuid);
+ if (!cmp.len) {
+ clear_root_wait = true;
+ goto done;
+ }
- dev = class_find_device(&block_class, NULL, uuid, &match_dev_by_uuid);
+ dev = class_find_device(&block_class, NULL, &cmp,
+ &match_dev_by_uuid);
if (!dev)
goto done;
@@ -158,6 +168,13 @@ static dev_t devt_from_partuuid(char *uuid_str)
no_offset:
put_device(dev);
done:
+ if (clear_root_wait) {
+ pr_err("VFS: PARTUUID= is invalid.\n"
+ "Expected PARTUUID=<valid-uuid-id>[/PARTNROFF=%%d]\n");
+ if (root_wait)
+ pr_err("Disabling rootwait; root= is invalid.\n");
+ root_wait = 0;
+ }
return res;
}
#endif
@@ -174,8 +191,14 @@ done:
* used when disk name of partitioned disk ends on a digit.
* 6) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the
* unique id of a partition if the partition table provides it.
+ * The UUID may be either an EFI/GPT UUID, or refer to an MSDOS
+ * partition using the format SSSSSSSS-PP, where SSSSSSSS is a zero-
+ * filled hex representation of the 32-bit "NT disk signature", and PP
+ * is a zero-filled hex representation of the 1-based partition number.
* 7) PARTUUID=<UUID>/PARTNROFF=<int> to select a partition in relation to
* a partition with a known unique id.
+ * 8) <major>:<minor> major and minor number of the device separated by
+ * a colon.
*
* If name doesn't have fall into the categories above, we return (0,0).
* block_class is used to check if something is a disk name. If the disk
@@ -517,7 +540,7 @@ void __init prepare_namespace(void)
int is_floppy;
if (root_delay) {
- printk(KERN_INFO "Waiting %dsec before mounting root device...\n",
+ printk(KERN_INFO "Waiting %d sec before mounting root device...\n",
root_delay);
ssleep(root_delay);
}
@@ -569,3 +592,46 @@ out:
sys_mount(".", "/", NULL, MS_MOVE, NULL);
sys_chroot(".");
}
+
+static bool is_tmpfs;
+static struct dentry *rootfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
+{
+ static unsigned long once;
+ void *fill = ramfs_fill_super;
+
+ if (test_and_set_bit(0, &once))
+ return ERR_PTR(-ENODEV);
+
+ if (IS_ENABLED(CONFIG_TMPFS) && is_tmpfs)
+ fill = shmem_fill_super;
+
+ return mount_nodev(fs_type, flags, data, fill);
+}
+
+static struct file_system_type rootfs_fs_type = {
+ .name = "rootfs",
+ .mount = rootfs_mount,
+ .kill_sb = kill_litter_super,
+};
+
+int __init init_rootfs(void)
+{
+ int err = register_filesystem(&rootfs_fs_type);
+
+ if (err)
+ return err;
+
+ if (IS_ENABLED(CONFIG_TMPFS) && !saved_root_name[0] &&
+ (!root_fs_names || strstr(root_fs_names, "tmpfs"))) {
+ err = shmem_init();
+ is_tmpfs = true;
+ } else {
+ err = init_ramfs_fs();
+ }
+
+ if (err)
+ unregister_filesystem(&rootfs_fs_type);
+
+ return err;
+}
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index 5e4ded51788..3e0878e8a80 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -36,6 +36,10 @@ __setup("noinitrd", no_initrd);
static int init_linuxrc(struct subprocess_info *info, struct cred *new)
{
sys_unshare(CLONE_FS | CLONE_FILES);
+ /* stdin/stdout/stderr for /linuxrc */
+ sys_open("/dev/console", O_RDWR, 0);
+ sys_dup(0);
+ sys_dup(0);
/* move initrd over / and chdir/chroot in initrd root */
sys_chdir("/root");
sys_mount(".", "/", NULL, MS_MOVE, NULL);
@@ -46,6 +50,7 @@ static int init_linuxrc(struct subprocess_info *info, struct cred *new)
static void __init handle_initrd(void)
{
+ struct subprocess_info *info;
static char *argv[] = { "linuxrc", NULL, };
extern char *envp_init[];
int error;
@@ -57,14 +62,20 @@ static void __init handle_initrd(void)
sys_mkdir("/old", 0700);
sys_chdir("/old");
+ /* try loading default modules from initrd */
+ load_default_modules();
+
/*
* In case that a resume from disk is carried out by linuxrc or one of
* its children, we need to tell the freezer not to wait for us.
*/
current->flags |= PF_FREEZER_SKIP;
- call_usermodehelper_fns("/linuxrc", argv, envp_init, UMH_WAIT_PROC,
- init_linuxrc, NULL, NULL);
+ info = call_usermodehelper_setup("/linuxrc", argv, envp_init,
+ GFP_KERNEL, init_linuxrc, NULL, NULL);
+ if (!info)
+ return;
+ call_usermodehelper_exec(info, UMH_WAIT_PROC);
current->flags &= ~PF_FREEZER_SKIP;
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index 6be2879cca6..a8227022e3a 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -13,7 +13,7 @@
#include <linux/minix_fs.h>
#include <linux/ext2_fs.h>
#include <linux/romfs_fs.h>
-#include <linux/cramfs_fs.h>
+#include <uapi/linux/cramfs_fs.h>
#include <linux/initrd.h>
#include <linux/string.h>
#include <linux/slab.h>
@@ -57,6 +57,11 @@ static int __init crd_load(int in_fd, int out_fd, decompress_fn deco);
* cramfs
* squashfs
* gzip
+ * bzip2
+ * lzma
+ * xz
+ * lzo
+ * lz4
*/
static int __init
identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor)
@@ -342,6 +347,13 @@ static int __init crd_load(int in_fd, int out_fd, decompress_fn deco)
int result;
crd_infd = in_fd;
crd_outfd = out_fd;
+
+ if (!deco) {
+ pr_emerg("Invalid ramdisk decompression routine. "
+ "Select appropriate config option.\n");
+ panic("Could not decompress initial ramdisk image.");
+ }
+
result = deco(NULL, 0, compr_fill, compr_flush, NULL, NULL, error);
if (decompress_error)
result = 1;
diff --git a/init/init_task.c b/init/init_task.c
index 8b2f3996b03..ba0a7f362d9 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -2,6 +2,8 @@
#include <linux/export.h>
#include <linux/mqueue.h>
#include <linux/sched.h>
+#include <linux/sched/sysctl.h>
+#include <linux/sched/rt.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/mm.h>
diff --git a/init/initramfs.c b/init/initramfs.c
index 84c6bf11130..a8497fab1c3 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -455,6 +455,7 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len)
}
this_header = 0;
decompress = decompress_method(buf, len, &compress_name);
+ pr_debug("Detected %s compressed data\n", compress_name);
if (decompress) {
res = decompress(buf, len, NULL, flush_buffer, NULL,
&my_inptr, error);
@@ -583,7 +584,7 @@ static int __init populate_rootfs(void)
{
char *err = unpack_to_rootfs(__initramfs_start, __initramfs_size);
if (err)
- panic(err); /* Failed to decompress INTERNAL initramfs */
+ panic("%s", err); /* Failed to decompress INTERNAL initramfs */
if (initrd_start) {
#ifdef CONFIG_BLK_DEV_RAM
int fd;
@@ -592,7 +593,7 @@ static int __init populate_rootfs(void)
initrd_end - initrd_start);
if (!err) {
free_initrd();
- return 0;
+ goto done;
} else {
clean_rootfs();
unpack_to_rootfs(__initramfs_start, __initramfs_size);
@@ -607,6 +608,7 @@ static int __init populate_rootfs(void)
sys_close(fd);
free_initrd();
}
+ done:
#else
printk(KERN_INFO "Unpacking initramfs...\n");
err = unpack_to_rootfs((char *)initrd_start,
@@ -615,6 +617,11 @@ static int __init populate_rootfs(void)
printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err);
free_initrd();
#endif
+ /*
+ * Try loading default modules from initramfs. This gives
+ * us a chance to load before device_initcalls.
+ */
+ load_default_modules();
}
return 0;
}
diff --git a/init/main.c b/init/main.c
index e33e09df3cb..e8ae1fef090 100644
--- a/init/main.c
+++ b/init/main.c
@@ -9,6 +9,8 @@
* Simplified starting of init: Michael A. Griffith <grif@acm.org>
*/
+#define DEBUG /* Enable initcall_debug */
+
#include <linux/types.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
@@ -70,6 +72,12 @@
#include <linux/perf_event.h>
#include <linux/file.h>
#include <linux/ptrace.h>
+#include <linux/blkdev.h>
+#include <linux/elevator.h>
+#include <linux/sched_clock.h>
+#include <linux/context_tracking.h>
+#include <linux/random.h>
+#include <linux/list.h>
#include <asm/io.h>
#include <asm/bugs.h>
@@ -85,17 +93,11 @@ static int kernel_init(void *);
extern void init_IRQ(void);
extern void fork_init(unsigned long);
-extern void mca_init(void);
-extern void sbus_init(void);
extern void radix_tree_init(void);
#ifndef CONFIG_DEBUG_RODATA
static inline void mark_rodata_ro(void) { }
#endif
-#ifdef CONFIG_TC
-extern void tc_init(void);
-#endif
-
/*
* Debug helper: via this flag we know that we are in 'early bootup code'
* where only the boot processor is running with IRQ disabled. This means
@@ -117,7 +119,6 @@ EXPORT_SYMBOL(system_state);
extern void time_init(void);
/* Default late time init is NULL. archs can override this later. */
void (*__initdata late_time_init)(void);
-extern void softirq_init(void);
/* Untouched command line saved by arch-specific code. */
char __initdata boot_command_line[COMMAND_LINE_SIZE];
@@ -125,11 +126,20 @@ char __initdata boot_command_line[COMMAND_LINE_SIZE];
char *saved_command_line;
/* Command line for parameter parsing */
static char *static_command_line;
+/* Command line for per-initcall parameter parsing */
+static char *initcall_command_line;
static char *execute_command;
static char *ramdisk_execute_command;
/*
+ * Used to generate warnings if static_key manipulation functions are used
+ * before jump_label_init is called.
+ */
+bool static_key_initialized __read_mostly = false;
+EXPORT_SYMBOL_GPL(static_key_initialized);
+
+/*
* If set, this is an indication to the drivers that reset the underlying
* device before going ahead with the initialization otherwise driver might
* rely on the BIOS and skip the reset operation.
@@ -172,8 +182,8 @@ static int __init obsolete_checksetup(char *line)
if (line[n] == '\0' || line[n] == '=')
had_early_param = 1;
} else if (!p->setup_func) {
- printk(KERN_WARNING "Parameter %s is obsolete,"
- " ignored\n", p->str);
+ pr_warn("Parameter %s is obsolete, ignored\n",
+ p->str);
return 1;
} else if (p->setup_func(line + n))
return 1;
@@ -194,13 +204,13 @@ EXPORT_SYMBOL(loops_per_jiffy);
static int __init debug_kernel(char *str)
{
- console_loglevel = 10;
+ console_loglevel = CONSOLE_LOGLEVEL_DEBUG;
return 0;
}
static int __init quiet_kernel(char *str)
{
- console_loglevel = 4;
+ console_loglevel = CONSOLE_LOGLEVEL_QUIET;
return 0;
}
@@ -243,6 +253,27 @@ static int __init repair_env_string(char *param, char *val, const char *unused)
return 0;
}
+/* Anything after -- gets handed straight to init. */
+static int __init set_init_arg(char *param, char *val, const char *unused)
+{
+ unsigned int i;
+
+ if (panic_later)
+ return 0;
+
+ repair_env_string(param, val, unused);
+
+ for (i = 0; argv_init[i]; i++) {
+ if (i == MAX_INIT_ARGS) {
+ panic_later = "init";
+ panic_param = param;
+ return 0;
+ }
+ }
+ argv_init[i] = param;
+ return 0;
+}
+
/*
* Unknown boot options get handed to init, unless they look like
* unused parameters (modprobe will find them in /proc/cmdline).
@@ -267,7 +298,7 @@ static int __init unknown_bootoption(char *param, char *val, const char *unused)
unsigned int i;
for (i = 0; envp_init[i]; i++) {
if (i == MAX_INIT_ENVS) {
- panic_later = "Too many boot env vars at `%s'";
+ panic_later = "env";
panic_param = param;
}
if (!strncmp(param, envp_init[i], val - param))
@@ -279,7 +310,7 @@ static int __init unknown_bootoption(char *param, char *val, const char *unused)
unsigned int i;
for (i = 0; argv_init[i]; i++) {
if (i == MAX_INIT_ARGS) {
- panic_later = "Too many boot init vars at `%s'";
+ panic_later = "init";
panic_param = param;
}
}
@@ -340,8 +371,11 @@ static inline void smp_prepare_cpus(unsigned int maxcpus) { }
*/
static void __init setup_command_line(char *command_line)
{
- saved_command_line = alloc_bootmem(strlen (boot_command_line)+1);
- static_command_line = alloc_bootmem(strlen (command_line)+1);
+ saved_command_line =
+ memblock_virt_alloc(strlen(boot_command_line) + 1, 0);
+ initcall_command_line =
+ memblock_virt_alloc(strlen(boot_command_line) + 1, 0);
+ static_command_line = memblock_virt_alloc(strlen(command_line) + 1, 0);
strcpy (saved_command_line, boot_command_line);
strcpy (static_command_line, command_line);
}
@@ -367,7 +401,7 @@ static noinline void __init_refok rest_init(void)
* the init task will end up wanting to create kthreads, which, if
* we schedule it before we create kthreadd, will OOPS.
*/
- kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND);
+ kernel_thread(kernel_init, NULL, CLONE_FS);
numa_default_policy();
pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
rcu_read_lock();
@@ -382,7 +416,7 @@ static noinline void __init_refok rest_init(void)
init_idle_bootup_task(current);
schedule_preempt_disabled();
/* Call into cpu_idle with preempt disabled */
- cpu_idle();
+ cpu_startup_entry(CPUHP_ONLINE);
}
/* Check for early params. */
@@ -396,8 +430,7 @@ static int __init do_early_param(char *param, char *val, const char *unused)
strcmp(p->str, "earlycon") == 0)
) {
if (p->setup_func(val) != 0)
- printk(KERN_WARNING
- "Malformed early option '%s'\n", param);
+ pr_warn("Malformed early option '%s'\n", param);
}
}
/* We accept everything at this stage. */
@@ -461,13 +494,13 @@ static void __init mm_init(void)
mem_init();
kmem_cache_init();
percpu_init_late();
- pgtable_cache_init();
+ pgtable_init();
vmalloc_init();
}
-asmlinkage void __init start_kernel(void)
+asmlinkage __visible void __init start_kernel(void)
{
- char * command_line;
+ char * command_line, *after_dashes;
extern const struct kernel_param __start___param[], __stop___param[];
/*
@@ -492,12 +525,10 @@ asmlinkage void __init start_kernel(void)
* Interrupts are still disabled. Do necessary setups, then
* enable them
*/
- tick_init();
boot_cpu_init();
page_address_init();
- printk(KERN_NOTICE "%s", linux_banner);
+ pr_notice("%s", linux_banner);
setup_arch(&command_line);
- mm_init_owner(&init_mm, &init_task);
mm_init_cpumask(&init_mm);
setup_command_line(command_line);
setup_nr_cpu_ids();
@@ -507,11 +538,15 @@ asmlinkage void __init start_kernel(void)
build_all_zonelists(NULL, NULL);
page_alloc_init();
- printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line);
+ pr_notice("Kernel command line: %s\n", boot_command_line);
parse_early_param();
- parse_args("Booting kernel", static_command_line, __start___param,
- __stop___param - __start___param,
- -1, -1, &unknown_bootoption);
+ after_dashes = parse_args("Booting kernel",
+ static_command_line, __start___param,
+ __stop___param - __start___param,
+ -1, -1, &unknown_bootoption);
+ if (after_dashes)
+ parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
+ set_init_arg);
jump_label_init();
@@ -537,28 +572,27 @@ asmlinkage void __init start_kernel(void)
* fragile until we cpu_idle() for the first time.
*/
preempt_disable();
- if (!irqs_disabled()) {
- printk(KERN_WARNING "start_kernel(): bug: interrupts were "
- "enabled *very* early, fixing it\n");
+ if (WARN(!irqs_disabled(), "Interrupts were enabled *very* early, fixing it\n"))
local_irq_disable();
- }
idr_init_cache();
- perf_event_init();
rcu_init();
+ tick_nohz_init();
+ context_tracking_init();
radix_tree_init();
/* init some links before init_ISA_irqs() */
early_irq_init();
init_IRQ();
+ tick_init();
init_timers();
hrtimers_init();
softirq_init();
timekeeping_init();
time_init();
+ sched_clock_postinit();
+ perf_event_init();
profile_init();
call_function_init();
- if (!irqs_disabled())
- printk(KERN_CRIT "start_kernel(): bug: interrupts were "
- "enabled early\n");
+ WARN(!irqs_disabled(), "Interrupts were enabled early\n");
early_boot_irqs_disabled = false;
local_irq_enable();
@@ -571,7 +605,8 @@ asmlinkage void __init start_kernel(void)
*/
console_init();
if (panic_later)
- panic(panic_later, panic_param);
+ panic("Too many boot %s vars at `%s'", panic_later,
+ panic_param);
lockdep_info();
@@ -585,8 +620,7 @@ asmlinkage void __init start_kernel(void)
#ifdef CONFIG_BLK_DEV_INITRD
if (initrd_start && !initrd_below_start_ok &&
page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
- printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - "
- "disabling it.\n",
+ pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it.\n",
page_to_pfn(virt_to_page((void *)initrd_start)),
min_low_pfn);
initrd_start = 0;
@@ -603,10 +637,15 @@ asmlinkage void __init start_kernel(void)
calibrate_delay();
pidmap_init();
anon_vma_init();
+ acpi_early_init();
#ifdef CONFIG_X86
- if (efi_enabled)
+ if (efi_enabled(EFI_RUNTIME_SERVICES))
efi_enter_virtual_mode();
#endif
+#ifdef CONFIG_X86_ESPFIX64
+ /* Should be run before the first non-init thread is created */
+ init_espfix_bsp();
+#endif
thread_info_cache_init();
cred_init();
fork_init(totalram_pages);
@@ -619,9 +658,7 @@ asmlinkage void __init start_kernel(void)
signals_init();
/* rootfs populating might need page-writeback */
page_writeback_init();
-#ifdef CONFIG_PROC_FS
proc_root_init();
-#endif
cgroup_init();
cpuset_init();
taskstats_init_early();
@@ -629,10 +666,9 @@ asmlinkage void __init start_kernel(void)
check_bugs();
- acpi_early_init(); /* before LAPIC and SMP init */
sfi_init_late();
- if (efi_enabled) {
+ if (efi_enabled(EFI_RUNTIME_SERVICES)) {
efi_late_init();
efi_free_boot_services();
}
@@ -657,7 +693,69 @@ static void __init do_ctors(void)
bool initcall_debug;
core_param(initcall_debug, initcall_debug, bool, 0644);
-static char msgbuf[64];
+#ifdef CONFIG_KALLSYMS
+struct blacklist_entry {
+ struct list_head next;
+ char *buf;
+};
+
+static __initdata_or_module LIST_HEAD(blacklisted_initcalls);
+
+static int __init initcall_blacklist(char *str)
+{
+ char *str_entry;
+ struct blacklist_entry *entry;
+
+ /* str argument is a comma-separated list of functions */
+ do {
+ str_entry = strsep(&str, ",");
+ if (str_entry) {
+ pr_debug("blacklisting initcall %s\n", str_entry);
+ entry = alloc_bootmem(sizeof(*entry));
+ entry->buf = alloc_bootmem(strlen(str_entry) + 1);
+ strcpy(entry->buf, str_entry);
+ list_add(&entry->next, &blacklisted_initcalls);
+ }
+ } while (str_entry);
+
+ return 0;
+}
+
+static bool __init_or_module initcall_blacklisted(initcall_t fn)
+{
+ struct list_head *tmp;
+ struct blacklist_entry *entry;
+ char *fn_name;
+
+ fn_name = kasprintf(GFP_KERNEL, "%pf", fn);
+ if (!fn_name)
+ return false;
+
+ list_for_each(tmp, &blacklisted_initcalls) {
+ entry = list_entry(tmp, struct blacklist_entry, next);
+ if (!strcmp(fn_name, entry->buf)) {
+ pr_debug("initcall %s blacklisted\n", fn_name);
+ kfree(fn_name);
+ return true;
+ }
+ }
+
+ kfree(fn_name);
+ return false;
+}
+#else
+static int __init initcall_blacklist(char *str)
+{
+ pr_warn("initcall_blacklist requires CONFIG_KALLSYMS\n");
+ return 0;
+}
+
+static bool __init_or_module initcall_blacklisted(initcall_t fn)
+{
+ return false;
+}
+#endif
+__setup("initcall_blacklist=", initcall_blacklist);
static int __init_or_module do_one_initcall_debug(initcall_t fn)
{
@@ -671,8 +769,8 @@ static int __init_or_module do_one_initcall_debug(initcall_t fn)
rettime = ktime_get();
delta = ktime_sub(rettime, calltime);
duration = (unsigned long long) ktime_to_ns(delta) >> 10;
- printk(KERN_DEBUG "initcall %pF returned %d after %lld usecs\n", fn,
- ret, duration);
+ printk(KERN_DEBUG "initcall %pF returned %d after %lld usecs\n",
+ fn, ret, duration);
return ret;
}
@@ -681,6 +779,10 @@ int __init_or_module do_one_initcall(initcall_t fn)
{
int count = preempt_count();
int ret;
+ char msgbuf[64];
+
+ if (initcall_blacklisted(fn))
+ return -EPERM;
if (initcall_debug)
ret = do_one_initcall_debug(fn);
@@ -689,20 +791,15 @@ int __init_or_module do_one_initcall(initcall_t fn)
msgbuf[0] = 0;
- if (ret && ret != -ENODEV && initcall_debug)
- sprintf(msgbuf, "error code %d ", ret);
-
if (preempt_count() != count) {
- strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf));
- preempt_count() = count;
+ sprintf(msgbuf, "preemption imbalance ");
+ preempt_count_set(count);
}
if (irqs_disabled()) {
strlcat(msgbuf, "disabled interrupts ", sizeof(msgbuf));
local_irq_enable();
}
- if (msgbuf[0]) {
- printk("initcall %pF returned with %s\n", fn, msgbuf);
- }
+ WARN(msgbuf[0], "initcall %pF returned with %s\n", fn, msgbuf);
return ret;
}
@@ -748,9 +845,9 @@ static void __init do_initcall_level(int level)
extern const struct kernel_param __start___param[], __stop___param[];
initcall_t *fn;
- strcpy(static_command_line, saved_command_line);
+ strcpy(initcall_command_line, saved_command_line);
parse_args(initcall_level_names[level],
- static_command_line, __start___param,
+ initcall_command_line, __start___param,
__stop___param - __start___param,
level, level,
&repair_env_string);
@@ -784,6 +881,7 @@ static void __init do_basic_setup(void)
do_ctors();
usermodehelper_enable();
do_initcalls();
+ random_int_secret_init();
}
static void __init do_pre_smp_initcalls(void)
@@ -794,16 +892,45 @@ static void __init do_pre_smp_initcalls(void)
do_one_initcall(*fn);
}
+/*
+ * This function requests modules which should be loaded by default and is
+ * called twice right after initrd is mounted and right before init is
+ * exec'd. If such modules are on either initrd or rootfs, they will be
+ * loaded before control is passed to userland.
+ */
+void __init load_default_modules(void)
+{
+ load_default_elevator_module();
+}
+
static int run_init_process(const char *init_filename)
{
argv_init[0] = init_filename;
- return kernel_execve(init_filename, argv_init, envp_init);
+ return do_execve(getname_kernel(init_filename),
+ (const char __user *const __user *)argv_init,
+ (const char __user *const __user *)envp_init);
+}
+
+static int try_to_run_init_process(const char *init_filename)
+{
+ int ret;
+
+ ret = run_init_process(init_filename);
+
+ if (ret && ret != -ENOENT) {
+ pr_err("Starting init: %s exists but couldn't execute it (error %d)\n",
+ init_filename, ret);
+ }
+
+ return ret;
}
-static void __init kernel_init_freeable(void);
+static noinline void __init kernel_init_freeable(void);
static int __ref kernel_init(void *unused)
{
+ int ret;
+
kernel_init_freeable();
/* need to finish all async __init code before freeing the memory */
async_synchronize_full();
@@ -812,14 +939,14 @@ static int __ref kernel_init(void *unused)
system_state = SYSTEM_RUNNING;
numa_default_policy();
- current->signal->flags |= SIGNAL_UNKILLABLE;
flush_delayed_fput();
if (ramdisk_execute_command) {
- if (!run_init_process(ramdisk_execute_command))
+ ret = run_init_process(ramdisk_execute_command);
+ if (!ret)
return 0;
- printk(KERN_WARNING "Failed to execute %s\n",
- ramdisk_execute_command);
+ pr_err("Failed to execute %s (error %d)\n",
+ ramdisk_execute_command, ret);
}
/*
@@ -829,22 +956,23 @@ static int __ref kernel_init(void *unused)
* trying to recover a really broken machine.
*/
if (execute_command) {
- if (!run_init_process(execute_command))
+ ret = run_init_process(execute_command);
+ if (!ret)
return 0;
- printk(KERN_WARNING "Failed to execute %s. Attempting "
- "defaults...\n", execute_command);
+ pr_err("Failed to execute %s (error %d). Attempting defaults...\n",
+ execute_command, ret);
}
- if (!run_init_process("/sbin/init") ||
- !run_init_process("/etc/init") ||
- !run_init_process("/bin/init") ||
- !run_init_process("/bin/sh"))
+ if (!try_to_run_init_process("/sbin/init") ||
+ !try_to_run_init_process("/etc/init") ||
+ !try_to_run_init_process("/bin/init") ||
+ !try_to_run_init_process("/bin/sh"))
return 0;
- panic("No init found. Try passing init= option to kernel. "
+ panic("No working init found. Try passing init= option to kernel. "
"See Linux Documentation/init.txt for guidance.");
}
-static void __init kernel_init_freeable(void)
+static noinline void __init kernel_init_freeable(void)
{
/*
* Wait until kthreadd is all set-up.
@@ -857,7 +985,7 @@ static void __init kernel_init_freeable(void)
/*
* init can allocate pages on any node
*/
- set_mems_allowed(node_states[N_HIGH_MEMORY]);
+ set_mems_allowed(node_states[N_MEMORY]);
/*
* init can run on any cpu.
*/
@@ -877,7 +1005,7 @@ static void __init kernel_init_freeable(void)
/* Open the /dev/console on the rootfs, this should never fail */
if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
- printk(KERN_WARNING "Warning: unable to open an initial console.\n");
+ pr_err("Warning: unable to open an initial console.\n");
(void) sys_dup(0);
(void) sys_dup(0);
@@ -899,4 +1027,7 @@ static void __init kernel_init_freeable(void)
* we're essentially up and running. Get rid of the
* initmem segments and start the user-mode stuff..
*/
+
+ /* rootfs is available now, try loading default modules */
+ load_default_modules();
}
diff --git a/init/version.c b/init/version.c
index 86fe0ccb997..1a4718e500f 100644
--- a/init/version.c
+++ b/init/version.c
@@ -12,6 +12,7 @@
#include <linux/utsname.h>
#include <generated/utsrelease.h>
#include <linux/version.h>
+#include <linux/proc_ns.h>
#ifndef CONFIG_KALLSYMS
#define version(a) Version_ ## a
@@ -34,6 +35,7 @@ struct uts_namespace init_uts_ns = {
.domainname = UTS_DOMAINNAME,
},
.user_ns = &init_user_ns,
+ .proc_inum = PROC_UTS_INIT_INO,
};
EXPORT_SYMBOL_GPL(init_uts_ns);