diff options
Diffstat (limited to 'kernel/sysctl.c')
| -rw-r--r-- | kernel/sysctl.c | 1797 |
1 files changed, 943 insertions, 854 deletions
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8a68b244846..75b22e22a72 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -23,10 +23,14 @@ #include <linux/swap.h> #include <linux/slab.h> #include <linux/sysctl.h> +#include <linux/bitmap.h> +#include <linux/signal.h> +#include <linux/printk.h> #include <linux/proc_fs.h> #include <linux/security.h> #include <linux/ctype.h> #include <linux/kmemcheck.h> +#include <linux/kmemleak.h> #include <linux/fs.h> #include <linux/init.h> #include <linux/kernel.h> @@ -36,20 +40,29 @@ #include <linux/highuid.h> #include <linux/writeback.h> #include <linux/ratelimit.h> +#include <linux/compaction.h> #include <linux/hugetlb.h> #include <linux/initrd.h> #include <linux/key.h> #include <linux/times.h> #include <linux/limits.h> #include <linux/dcache.h> +#include <linux/dnotify.h> #include <linux/syscalls.h> #include <linux/vmstat.h> #include <linux/nfs_fs.h> #include <linux/acpi.h> #include <linux/reboot.h> #include <linux/ftrace.h> -#include <linux/slow-work.h> #include <linux/perf_event.h> +#include <linux/kprobes.h> +#include <linux/pipe_fs_i.h> +#include <linux/oom.h> +#include <linux/kmod.h> +#include <linux/capability.h> +#include <linux/binfmts.h> +#include <linux/sched/sysctl.h> +#include <linux/kexec.h> #include <asm/uaccess.h> #include <asm/processor.h> @@ -59,27 +72,39 @@ #include <asm/stacktrace.h> #include <asm/io.h> #endif +#ifdef CONFIG_SPARC +#include <asm/setup.h> +#endif +#ifdef CONFIG_BSD_PROCESS_ACCT +#include <linux/acct.h> +#endif +#ifdef CONFIG_RT_MUTEXES +#include <linux/rtmutex.h> +#endif +#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT) +#include <linux/lockdep.h> +#endif +#ifdef CONFIG_CHR_DEV_SG +#include <scsi/sg.h> +#endif + +#ifdef CONFIG_LOCKUP_DETECTOR +#include <linux/nmi.h> +#endif #if defined(CONFIG_SYSCTL) /* External variables not in a header file. */ -extern int C_A_D; -extern int print_fatal_signals; -extern int sysctl_overcommit_memory; -extern int sysctl_overcommit_ratio; -extern int sysctl_panic_on_oom; -extern int sysctl_oom_kill_allocating_task; -extern int sysctl_oom_dump_tasks; extern int max_threads; -extern int core_uses_pid; extern int suid_dumpable; +#ifdef CONFIG_COREDUMP +extern int core_uses_pid; extern char core_pattern[]; extern unsigned int core_pipe_limit; +#endif extern int pid_max; -extern int min_free_kbytes; extern int pid_max_min, pid_max_max; -extern int sysctl_drop_caches; extern int percpu_pagelist_fraction; extern int compat_log; extern int latencytop_enabled; @@ -87,22 +112,18 @@ extern int sysctl_nr_open_min, sysctl_nr_open_max; #ifndef CONFIG_MMU extern int sysctl_nr_trim_pages; #endif -#ifdef CONFIG_RCU_TORTURE_TEST -extern int rcutorture_runnable; -#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ -#ifdef CONFIG_BLOCK -extern int blk_iopoll_enabled; -#endif /* Constants used for minimum and maximum */ -#ifdef CONFIG_DETECT_SOFTLOCKUP +#ifdef CONFIG_LOCKUP_DETECTOR static int sixty = 60; -static int neg_one = -1; #endif +static int __maybe_unused neg_one = -1; + static int zero; static int __maybe_unused one = 1; static int __maybe_unused two = 2; +static int __maybe_unused four = 4; static unsigned long one_ul = 1; static int one_hundred = 100; #ifdef CONFIG_PRINTK @@ -115,74 +136,84 @@ static unsigned long dirty_bytes_min = 2 * PAGE_SIZE; /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ static int maxolduid = 65535; static int minolduid; -static int min_percpu_pagelist_fract = 8; static int ngroups_max = NGROUPS_MAX; +static const int cap_last_cap = CAP_LAST_CAP; -#ifdef CONFIG_MODULES -extern char modprobe_path[]; -extern int modules_disabled; -#endif -#ifdef CONFIG_CHR_DEV_SG -extern int sg_big_buff; +/*this is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs */ +#ifdef CONFIG_DETECT_HUNG_TASK +static unsigned long hung_task_timeout_max = (LONG_MAX/HZ); #endif -#ifdef CONFIG_SPARC -#include <asm/system.h> +#ifdef CONFIG_INOTIFY_USER +#include <linux/inotify.h> #endif - -#ifdef CONFIG_SPARC64 -extern int sysctl_tsb_ratio; +#ifdef CONFIG_SPARC #endif #ifdef __hppa__ extern int pwrsw_enabled; -extern int unaligned_enabled; -#endif - -#ifdef CONFIG_S390 -#ifdef CONFIG_MATHEMU -extern int sysctl_ieee_emulation_warnings; -#endif -extern int sysctl_userprocess_debug; -extern int spin_retry; #endif -#ifdef CONFIG_BSD_PROCESS_ACCT -extern int acct_parm[]; +#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW +extern int unaligned_enabled; #endif #ifdef CONFIG_IA64 -extern int no_unaligned_warning; extern int unaligned_dump_stack; #endif -extern struct ratelimit_state printk_ratelimit_state; - -#ifdef CONFIG_RT_MUTEXES -extern int max_lock_depth; +#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN +extern int no_unaligned_warning; #endif #ifdef CONFIG_PROC_SYSCTL + +#define SYSCTL_WRITES_LEGACY -1 +#define SYSCTL_WRITES_WARN 0 +#define SYSCTL_WRITES_STRICT 1 + +static int sysctl_writes_strict = SYSCTL_WRITES_WARN; + static int proc_do_cad_pid(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); static int proc_taint(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); #endif -static struct ctl_table root_table[]; -static struct ctl_table_root sysctl_table_root; -static struct ctl_table_header root_table_header = { - .count = 1, - .ctl_table = root_table, - .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list), - .root = &sysctl_table_root, - .set = &sysctl_table_root.default_set, -}; -static struct ctl_table_root sysctl_table_root = { - .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list), - .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry), -}; +#ifdef CONFIG_PRINTK +static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); +#endif + +static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); +#ifdef CONFIG_COREDUMP +static int proc_dostring_coredump(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); +#endif + +#ifdef CONFIG_MAGIC_SYSRQ +/* Note: sysrq code uses it's own private copy */ +static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE; + +static int sysrq_sysctl_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int error; + + error = proc_dointvec(table, write, buffer, lenp, ppos); + if (error) + return error; + + if (write) + sysrq_toggle_support(__sysrq_enabled); + + return 0; +} + +#endif static struct ctl_table kern_table[]; static struct ctl_table vm_table[]; @@ -190,9 +221,6 @@ static struct ctl_table fs_table[]; static struct ctl_table debug_table[]; static struct ctl_table dev_table[]; extern struct ctl_table random_table[]; -#ifdef CONFIG_INOTIFY_USER -extern struct ctl_table inotify_table[]; -#endif #ifdef CONFIG_EPOLL extern struct ctl_table epoll_table[]; #endif @@ -201,12 +229,9 @@ extern struct ctl_table epoll_table[]; int sysctl_legacy_va_layout; #endif -extern int prove_locking; -extern int lock_stat; - /* The default sysctl tables: */ -static struct ctl_table root_table[] = { +static struct ctl_table sysctl_base_table[] = { { .procname = "kernel", .mode = 0555, @@ -232,10 +257,6 @@ static struct ctl_table root_table[] = { .mode = 0555, .child = dev_table, }, -/* - * NOTE: do not add new entries to this table unless you have read - * Documentation/sysctl/ctl_unnumbered.txt - */ { } }; @@ -244,10 +265,15 @@ static int min_sched_granularity_ns = 100000; /* 100 usecs */ static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */ static int min_wakeup_granularity_ns; /* 0 usecs */ static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */ +#ifdef CONFIG_SMP static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE; static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1; -static int min_sched_shares_ratelimit = 100000; /* 100 usec */ -static int max_sched_shares_ratelimit = NSEC_PER_SEC; /* 1 second */ +#endif /* CONFIG_SMP */ +#endif /* CONFIG_SCHED_DEBUG */ + +#ifdef CONFIG_COMPACTION +static int min_extfrag_threshold; +static int max_extfrag_threshold = 1000; #endif static struct ctl_table kern_table[] = { @@ -286,15 +312,7 @@ static struct ctl_table kern_table[] = { .extra1 = &min_wakeup_granularity_ns, .extra2 = &max_wakeup_granularity_ns, }, - { - .procname = "sched_shares_ratelimit", - .data = &sysctl_sched_shares_ratelimit, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = sched_proc_update_handler, - .extra1 = &min_sched_shares_ratelimit, - .extra2 = &max_sched_shares_ratelimit, - }, +#ifdef CONFIG_SMP { .procname = "sched_tunable_scaling", .data = &sysctl_sched_tunable_scaling, @@ -305,15 +323,7 @@ static struct ctl_table kern_table[] = { .extra2 = &max_sched_tunable_scaling, }, { - .procname = "sched_shares_thresh", - .data = &sysctl_sched_shares_thresh, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - }, - { - .procname = "sched_migration_cost", + .procname = "sched_migration_cost_ns", .data = &sysctl_sched_migration_cost, .maxlen = sizeof(unsigned int), .mode = 0644, @@ -327,13 +337,20 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, { - .procname = "sched_time_avg", + .procname = "sched_time_avg_ms", .data = &sysctl_sched_time_avg, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, }, { + .procname = "sched_shares_window_ns", + .data = &sysctl_sched_shares_window, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { .procname = "timer_migration", .data = &sysctl_timer_migration, .maxlen = sizeof(unsigned int), @@ -342,7 +359,47 @@ static struct ctl_table kern_table[] = { .extra1 = &zero, .extra2 = &one, }, -#endif +#endif /* CONFIG_SMP */ +#ifdef CONFIG_NUMA_BALANCING + { + .procname = "numa_balancing_scan_delay_ms", + .data = &sysctl_numa_balancing_scan_delay, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "numa_balancing_scan_period_min_ms", + .data = &sysctl_numa_balancing_scan_period_min, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "numa_balancing_scan_period_max_ms", + .data = &sysctl_numa_balancing_scan_period_max, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "numa_balancing_scan_size_mb", + .data = &sysctl_numa_balancing_scan_size, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "numa_balancing", + .data = NULL, /* filled in by handler */ + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sysctl_numa_balancing, + .extra1 = &zero, + .extra2 = &one, + }, +#endif /* CONFIG_NUMA_BALANCING */ +#endif /* CONFIG_SCHED_DEBUG */ { .procname = "sched_rt_period_us", .data = &sysctl_sched_rt_period, @@ -358,12 +415,33 @@ static struct ctl_table kern_table[] = { .proc_handler = sched_rt_handler, }, { - .procname = "sched_compat_yield", - .data = &sysctl_sched_compat_yield, + .procname = "sched_rr_timeslice_ms", + .data = &sched_rr_timeslice, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = sched_rr_handler, + }, +#ifdef CONFIG_SCHED_AUTOGROUP + { + .procname = "sched_autogroup_enabled", + .data = &sysctl_sched_autogroup_enabled, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, }, +#endif +#ifdef CONFIG_CFS_BANDWIDTH + { + .procname = "sched_cfs_bandwidth_slice_us", + .data = &sysctl_sched_cfs_bandwidth_slice, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + }, +#endif #ifdef CONFIG_PROVE_LOCKING { .procname = "prove_locking", @@ -389,6 +467,7 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_COREDUMP { .procname = "core_uses_pid", .data = &core_uses_pid, @@ -401,7 +480,7 @@ static struct ctl_table kern_table[] = { .data = core_pattern, .maxlen = CORENAME_MAX_SIZE, .mode = 0644, - .proc_handler = proc_dostring, + .proc_handler = proc_dostring_coredump, }, { .procname = "core_pipe_limit", @@ -410,6 +489,7 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#endif #ifdef CONFIG_PROC_SYSCTL { .procname = "tainted", @@ -417,6 +497,15 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_taint, }, + { + .procname = "sysctl_writes_strict", + .data = &sysctl_writes_strict, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &neg_one, + .extra2 = &one, + }, #endif #ifdef CONFIG_LATENCYTOP { @@ -483,6 +572,8 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#endif +#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW { .procname = "unaligned-trap", .data = &unaligned_enabled, @@ -524,6 +615,25 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "traceoff_on_warning", + .data = &__disable_trace_on_warning, + .maxlen = sizeof(__disable_trace_on_warning), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif +#ifdef CONFIG_KEXEC + { + .procname = "kexec_load_disabled", + .data = &kexec_load_disabled, + .maxlen = sizeof(int), + .mode = 0644, + /* only handle a transition from default "0" to "1" */ + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + .extra2 = &one, + }, #endif #ifdef CONFIG_MODULES { @@ -544,7 +654,7 @@ static struct ctl_table kern_table[] = { .extra2 = &one, }, #endif -#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET) +#ifdef CONFIG_UEVENT_HELPER { .procname = "hotplug", .data = &uevent_helper, @@ -577,7 +687,7 @@ static struct ctl_table kern_table[] = { .data = &__sysrq_enabled, .maxlen = sizeof (int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = sysrq_sysctl_handler, }, #endif #ifdef CONFIG_PROC_SYSCTL @@ -602,6 +712,11 @@ static struct ctl_table kern_table[] = { .child = random_table, }, { + .procname = "usermodehelper", + .mode = 0555, + .child = usermodehelper_table, + }, + { .procname = "overflowuid", .data = &overflowuid, .maxlen = sizeof(int), @@ -631,7 +746,7 @@ static struct ctl_table kern_table[] = { #endif { .procname = "userprocess_debug", - .data = &sysctl_userprocess_debug, + .data = &show_unhandled_signals, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, @@ -684,6 +799,24 @@ static struct ctl_table kern_table[] = { .extra1 = &zero, .extra2 = &ten_thousand, }, + { + .procname = "dmesg_restrict", + .data = &dmesg_restrict, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax_sysadmin, + .extra1 = &zero, + .extra2 = &one, + }, + { + .procname = "kptr_restrict", + .data = &kptr_restrict, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax_sysadmin, + .extra1 = &zero, + .extra2 = &two, + }, #endif { .procname = "ngroups_max", @@ -692,20 +825,69 @@ static struct ctl_table kern_table[] = { .mode = 0444, .proc_handler = proc_dointvec, }, -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) { - .procname = "unknown_nmi_panic", - .data = &unknown_nmi_panic, + .procname = "cap_last_cap", + .data = (void *)&cap_last_cap, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = proc_dointvec, + }, +#if defined(CONFIG_LOCKUP_DETECTOR) + { + .procname = "watchdog", + .data = &watchdog_user_enabled, .maxlen = sizeof (int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dowatchdog, + .extra1 = &zero, + .extra2 = &one, + }, + { + .procname = "watchdog_thresh", + .data = &watchdog_thresh, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dowatchdog, + .extra1 = &zero, + .extra2 = &sixty, }, { + .procname = "softlockup_panic", + .data = &softlockup_panic, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, +#ifdef CONFIG_SMP + { + .procname = "softlockup_all_cpu_backtrace", + .data = &sysctl_softlockup_all_cpu_backtrace, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, +#endif /* CONFIG_SMP */ + { .procname = "nmi_watchdog", - .data = &nmi_watchdog_enabled, + .data = &watchdog_user_enabled, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = proc_dowatchdog, + .extra1 = &zero, + .extra2 = &one, + }, +#endif +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) + { + .procname = "unknown_nmi_panic", + .data = &unknown_nmi_panic, .maxlen = sizeof (int), .mode = 0644, - .proc_handler = proc_nmi_enabled, + .proc_handler = proc_dointvec, }, #endif #if defined(CONFIG_X86) @@ -723,6 +905,15 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_DEBUG_STACKOVERFLOW + { + .procname = "panic_on_stackoverflow", + .data = &sysctl_panic_on_stackoverflow, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif { .procname = "bootloader_type", .data = &bootloader_type, @@ -779,7 +970,7 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_doulongvec_minmax, }, #endif -#ifdef CONFIG_IA64 +#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN { .procname = "ignore-unaligned-usertrap", .data = &no_unaligned_warning, @@ -787,6 +978,8 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#endif +#ifdef CONFIG_IA64 { .procname = "unaligned-dump-stack", .data = &unaligned_dump_stack, @@ -795,26 +988,6 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif -#ifdef CONFIG_DETECT_SOFTLOCKUP - { - .procname = "softlockup_panic", - .data = &softlockup_panic, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, - }, - { - .procname = "softlockup_thresh", - .data = &softlockup_thresh, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dosoftlockup_thresh, - .extra1 = &neg_one, - .extra2 = &sixty, - }, -#endif #ifdef CONFIG_DETECT_HUNG_TASK { .procname = "hung_task_panic", @@ -828,9 +1001,10 @@ static struct ctl_table kern_table[] = { { .procname = "hung_task_check_count", .data = &sysctl_hung_task_check_count, - .maxlen = sizeof(unsigned long), + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_doulongvec_minmax, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, }, { .procname = "hung_task_timeout_secs", @@ -838,13 +1012,15 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_dohung_task_timeout_secs, + .extra2 = &hung_task_timeout_max, }, { .procname = "hung_task_warnings", .data = &sysctl_hung_task_warnings, - .maxlen = sizeof(unsigned long), + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_doulongvec_minmax, + .proc_handler = proc_dointvec_minmax, + .extra1 = &neg_one, }, #endif #ifdef CONFIG_COMPAT @@ -888,14 +1064,13 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif -#ifdef CONFIG_SLOW_WORK - { - .procname = "slow-work", - .mode = 0555, - .child = slow_work_sysctls, - }, -#endif #ifdef CONFIG_PERF_EVENTS + /* + * User-space scripts rely on the existence of this file + * as a feature check for perf_events being enabled. + * + * So it's an ABI, do not remove! + */ { .procname = "perf_event_paranoid", .data = &sysctl_perf_event_paranoid, @@ -915,31 +1090,28 @@ static struct ctl_table kern_table[] = { .data = &sysctl_perf_event_sample_rate, .maxlen = sizeof(sysctl_perf_event_sample_rate), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = perf_proc_update_handler, + .extra1 = &one, }, -#endif -#ifdef CONFIG_KMEMCHECK { - .procname = "kmemcheck", - .data = &kmemcheck_enabled, - .maxlen = sizeof(int), + .procname = "perf_cpu_time_max_percent", + .data = &sysctl_perf_cpu_time_max_percent, + .maxlen = sizeof(sysctl_perf_cpu_time_max_percent), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = perf_cpu_time_max_percent_handler, + .extra1 = &zero, + .extra2 = &one_hundred, }, #endif -#ifdef CONFIG_BLOCK +#ifdef CONFIG_KMEMCHECK { - .procname = "blk_iopoll", - .data = &blk_iopoll_enabled, + .procname = "kmemcheck", + .data = &kmemcheck_enabled, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, #endif -/* - * NOTE: do not add new entries to this table unless you have read - * Documentation/sysctl/ctl_unnumbered.txt - */ { } }; @@ -949,14 +1121,18 @@ static struct ctl_table vm_table[] = { .data = &sysctl_overcommit_memory, .maxlen = sizeof(sysctl_overcommit_memory), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &two, }, { .procname = "panic_on_oom", .data = &sysctl_panic_on_oom, .maxlen = sizeof(sysctl_panic_on_oom), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &two, }, { .procname = "oom_kill_allocating_task", @@ -977,14 +1153,22 @@ static struct ctl_table vm_table[] = { .data = &sysctl_overcommit_ratio, .maxlen = sizeof(sysctl_overcommit_ratio), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = overcommit_ratio_handler, + }, + { + .procname = "overcommit_kbytes", + .data = &sysctl_overcommit_kbytes, + .maxlen = sizeof(sysctl_overcommit_kbytes), + .mode = 0644, + .proc_handler = overcommit_kbytes_handler, }, { .procname = "page-cluster", .data = &page_cluster, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, }, { .procname = "dirty_background_ratio", @@ -1032,14 +1216,13 @@ static struct ctl_table vm_table[] = { .data = &dirty_expire_interval, .maxlen = sizeof(dirty_expire_interval), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, }, { - .procname = "nr_pdflush_threads", - .data = &nr_pdflush_threads, - .maxlen = sizeof nr_pdflush_threads, - .mode = 0444 /* read-only*/, - .proc_handler = proc_dointvec, + .procname = "nr_pdflush_threads", + .mode = 0444 /* read-only */, + .proc_handler = pdflush_proc_obsolete, }, { .procname = "swappiness", @@ -1083,7 +1266,7 @@ static struct ctl_table vm_table[] = { .data = &hugepages_treat_as_movable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = hugetlb_treat_movable_handler, + .proc_handler = proc_dointvec, }, { .procname = "nr_overcommit_hugepages", @@ -1108,7 +1291,28 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = drop_caches_sysctl_handler, + .extra1 = &one, + .extra2 = &four, + }, +#ifdef CONFIG_COMPACTION + { + .procname = "compact_memory", + .data = &sysctl_compact_memory, + .maxlen = sizeof(int), + .mode = 0200, + .proc_handler = sysctl_compaction_handler, + }, + { + .procname = "extfrag_threshold", + .data = &sysctl_extfrag_threshold, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = sysctl_extfrag_handler, + .extra1 = &min_extfrag_threshold, + .extra2 = &max_extfrag_threshold, }, + +#endif /* CONFIG_COMPACTION */ { .procname = "min_free_kbytes", .data = &min_free_kbytes, @@ -1123,7 +1327,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(percpu_pagelist_fraction), .mode = 0644, .proc_handler = percpu_pagelist_fraction_sysctl_handler, - .extra1 = &min_percpu_pagelist_fract, + .extra1 = &zero, }, #ifdef CONFIG_MMU { @@ -1236,8 +1440,13 @@ static struct ctl_table vm_table[] = { (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL)) { .procname = "vdso_enabled", +#ifdef CONFIG_X86_32 + .data = &vdso32_enabled, + .maxlen = sizeof(vdso32_enabled), +#else .data = &vdso_enabled, .maxlen = sizeof(vdso_enabled), +#endif .mode = 0644, .proc_handler = proc_dointvec, .extra1 = &zero, @@ -1281,11 +1490,20 @@ static struct ctl_table vm_table[] = { .extra2 = &one, }, #endif - -/* - * NOTE: do not add new entries to this table unless you have read - * Documentation/sysctl/ctl_unnumbered.txt - */ + { + .procname = "user_reserve_kbytes", + .data = &sysctl_user_reserve_kbytes, + .maxlen = sizeof(sysctl_user_reserve_kbytes), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, + { + .procname = "admin_reserve_kbytes", + .data = &sysctl_admin_reserve_kbytes, + .maxlen = sizeof(sysctl_admin_reserve_kbytes), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, { } }; @@ -1299,30 +1517,30 @@ static struct ctl_table fs_table[] = { { .procname = "inode-nr", .data = &inodes_stat, - .maxlen = 2*sizeof(int), + .maxlen = 2*sizeof(long), .mode = 0444, - .proc_handler = proc_dointvec, + .proc_handler = proc_nr_inodes, }, { .procname = "inode-state", .data = &inodes_stat, - .maxlen = 7*sizeof(int), + .maxlen = 7*sizeof(long), .mode = 0444, - .proc_handler = proc_dointvec, + .proc_handler = proc_nr_inodes, }, { .procname = "file-nr", .data = &files_stat, - .maxlen = 3*sizeof(int), + .maxlen = sizeof(files_stat), .mode = 0444, .proc_handler = proc_nr_files, }, { .procname = "file-max", .data = &files_stat.max_files, - .maxlen = sizeof(int), + .maxlen = sizeof(files_stat.max_files), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_doulongvec_minmax, }, { .procname = "nr_open", @@ -1336,9 +1554,9 @@ static struct ctl_table fs_table[] = { { .procname = "dentry-state", .data = &dentry_stat, - .maxlen = 6*sizeof(int), + .maxlen = 6*sizeof(long), .mode = 0444, - .proc_handler = proc_dointvec, + .proc_handler = proc_nr_dentry, }, { .procname = "overflowuid", @@ -1418,11 +1636,29 @@ static struct ctl_table fs_table[] = { #endif #endif { + .procname = "protected_symlinks", + .data = &sysctl_protected_symlinks, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, + { + .procname = "protected_hardlinks", + .data = &sysctl_protected_hardlinks, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, + { .procname = "suid_dumpable", .data = &suid_dumpable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dointvec_minmax_coredump, .extra1 = &zero, .extra2 = &two, }, @@ -1433,15 +1669,19 @@ static struct ctl_table fs_table[] = { .child = binfmt_misc_table, }, #endif -/* - * NOTE: do not add new entries to this table unless you have read - * Documentation/sysctl/ctl_unnumbered.txt - */ + { + .procname = "pipe-max-size", + .data = &pipe_max_size, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &pipe_proc_fn, + .extra1 = &pipe_min_size, + }, { } }; static struct ctl_table debug_table[] = { -#if defined(CONFIG_X86) || defined(CONFIG_PPC) +#ifdef CONFIG_SYSCTL_EXCEPTION_TRACE { .procname = "exception-trace", .data = &show_unhandled_signals, @@ -1450,6 +1690,17 @@ static struct ctl_table debug_table[] = { .proc_handler = proc_dointvec }, #endif +#if defined(CONFIG_OPTPROBES) + { + .procname = "kprobes-optimization", + .data = &sysctl_kprobes_optimization, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_kprobes_optimization_handler, + .extra1 = &zero, + .extra2 = &one, + }, +#endif { } }; @@ -1457,498 +1708,15 @@ static struct ctl_table dev_table[] = { { } }; -static DEFINE_SPINLOCK(sysctl_lock); - -/* called under sysctl_lock */ -static int use_table(struct ctl_table_header *p) -{ - if (unlikely(p->unregistering)) - return 0; - p->used++; - return 1; -} - -/* called under sysctl_lock */ -static void unuse_table(struct ctl_table_header *p) -{ - if (!--p->used) - if (unlikely(p->unregistering)) - complete(p->unregistering); -} - -/* called under sysctl_lock, will reacquire if has to wait */ -static void start_unregistering(struct ctl_table_header *p) -{ - /* - * if p->used is 0, nobody will ever touch that entry again; - * we'll eliminate all paths to it before dropping sysctl_lock - */ - if (unlikely(p->used)) { - struct completion wait; - init_completion(&wait); - p->unregistering = &wait; - spin_unlock(&sysctl_lock); - wait_for_completion(&wait); - spin_lock(&sysctl_lock); - } else { - /* anything non-NULL; we'll never dereference it */ - p->unregistering = ERR_PTR(-EINVAL); - } - /* - * do not remove from the list until nobody holds it; walking the - * list in do_sysctl() relies on that. - */ - list_del_init(&p->ctl_entry); -} - -void sysctl_head_get(struct ctl_table_header *head) -{ - spin_lock(&sysctl_lock); - head->count++; - spin_unlock(&sysctl_lock); -} - -void sysctl_head_put(struct ctl_table_header *head) -{ - spin_lock(&sysctl_lock); - if (!--head->count) - kfree(head); - spin_unlock(&sysctl_lock); -} - -struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) -{ - if (!head) - BUG(); - spin_lock(&sysctl_lock); - if (!use_table(head)) - head = ERR_PTR(-ENOENT); - spin_unlock(&sysctl_lock); - return head; -} - -void sysctl_head_finish(struct ctl_table_header *head) -{ - if (!head) - return; - spin_lock(&sysctl_lock); - unuse_table(head); - spin_unlock(&sysctl_lock); -} - -static struct ctl_table_set * -lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces) -{ - struct ctl_table_set *set = &root->default_set; - if (root->lookup) - set = root->lookup(root, namespaces); - return set; -} - -static struct list_head * -lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces) -{ - struct ctl_table_set *set = lookup_header_set(root, namespaces); - return &set->list; -} - -struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, - struct ctl_table_header *prev) -{ - struct ctl_table_root *root; - struct list_head *header_list; - struct ctl_table_header *head; - struct list_head *tmp; - - spin_lock(&sysctl_lock); - if (prev) { - head = prev; - tmp = &prev->ctl_entry; - unuse_table(prev); - goto next; - } - tmp = &root_table_header.ctl_entry; - for (;;) { - head = list_entry(tmp, struct ctl_table_header, ctl_entry); - - if (!use_table(head)) - goto next; - spin_unlock(&sysctl_lock); - return head; - next: - root = head->root; - tmp = tmp->next; - header_list = lookup_header_list(root, namespaces); - if (tmp != header_list) - continue; - - do { - root = list_entry(root->root_list.next, - struct ctl_table_root, root_list); - if (root == &sysctl_table_root) - goto out; - header_list = lookup_header_list(root, namespaces); - } while (list_empty(header_list)); - tmp = header_list->next; - } -out: - spin_unlock(&sysctl_lock); - return NULL; -} - -struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev) -{ - return __sysctl_head_next(current->nsproxy, prev); -} - -void register_sysctl_root(struct ctl_table_root *root) -{ - spin_lock(&sysctl_lock); - list_add_tail(&root->root_list, &sysctl_table_root.root_list); - spin_unlock(&sysctl_lock); -} - -/* - * sysctl_perm does NOT grant the superuser all rights automatically, because - * some sysctl variables are readonly even to root. - */ - -static int test_perm(int mode, int op) +int __init sysctl_init(void) { - if (!current_euid()) - mode >>= 6; - else if (in_egroup_p(0)) - mode >>= 3; - if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0) - return 0; - return -EACCES; -} - -int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) -{ - int error; - int mode; - - error = security_sysctl(table, op & (MAY_READ | MAY_WRITE | MAY_EXEC)); - if (error) - return error; - - if (root->permissions) - mode = root->permissions(root, current->nsproxy, table); - else - mode = table->mode; - - return test_perm(mode, op); -} + struct ctl_table_header *hdr; -static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table) -{ - for (; table->procname; table++) { - table->parent = parent; - if (table->child) - sysctl_set_parent(table, table->child); - } -} - -static __init int sysctl_init(void) -{ - sysctl_set_parent(NULL, root_table); -#ifdef CONFIG_SYSCTL_SYSCALL_CHECK - { - int err; - err = sysctl_check_table(current->nsproxy, root_table); - } -#endif + hdr = register_sysctl_table(sysctl_base_table); + kmemleak_not_leak(hdr); return 0; } -core_initcall(sysctl_init); - -static struct ctl_table *is_branch_in(struct ctl_table *branch, - struct ctl_table *table) -{ - struct ctl_table *p; - const char *s = branch->procname; - - /* branch should have named subdirectory as its first element */ - if (!s || !branch->child) - return NULL; - - /* ... and nothing else */ - if (branch[1].procname) - return NULL; - - /* table should contain subdirectory with the same name */ - for (p = table; p->procname; p++) { - if (!p->child) - continue; - if (p->procname && strcmp(p->procname, s) == 0) - return p; - } - return NULL; -} - -/* see if attaching q to p would be an improvement */ -static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) -{ - struct ctl_table *to = p->ctl_table, *by = q->ctl_table; - struct ctl_table *next; - int is_better = 0; - int not_in_parent = !p->attached_by; - - while ((next = is_branch_in(by, to)) != NULL) { - if (by == q->attached_by) - is_better = 1; - if (to == p->attached_by) - not_in_parent = 1; - by = by->child; - to = next->child; - } - - if (is_better && not_in_parent) { - q->attached_by = by; - q->attached_to = to; - q->parent = p; - } -} - -/** - * __register_sysctl_paths - register a sysctl hierarchy - * @root: List of sysctl headers to register on - * @namespaces: Data to compute which lists of sysctl entries are visible - * @path: The path to the directory the sysctl table is in. - * @table: the top-level table structure - * - * Register a sysctl table hierarchy. @table should be a filled in ctl_table - * array. A completely 0 filled entry terminates the table. - * - * The members of the &struct ctl_table structure are used as follows: - * - * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not - * enter a sysctl file - * - * data - a pointer to data for use by proc_handler - * - * maxlen - the maximum size in bytes of the data - * - * mode - the file permissions for the /proc/sys file, and for sysctl(2) - * - * child - a pointer to the child sysctl table if this entry is a directory, or - * %NULL. - * - * proc_handler - the text handler routine (described below) - * - * de - for internal use by the sysctl routines - * - * extra1, extra2 - extra pointers usable by the proc handler routines - * - * Leaf nodes in the sysctl tree will be represented by a single file - * under /proc; non-leaf nodes will be represented by directories. - * - * sysctl(2) can automatically manage read and write requests through - * the sysctl table. The data and maxlen fields of the ctl_table - * struct enable minimal validation of the values being written to be - * performed, and the mode field allows minimal authentication. - * - * There must be a proc_handler routine for any terminal nodes - * mirrored under /proc/sys (non-terminals are handled by a built-in - * directory handler). Several default handlers are available to - * cover common cases - - * - * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), - * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), - * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax() - * - * It is the handler's job to read the input buffer from user memory - * and process it. The handler should return 0 on success. - * - * This routine returns %NULL on a failure to register, and a pointer - * to the table header on success. - */ -struct ctl_table_header *__register_sysctl_paths( - struct ctl_table_root *root, - struct nsproxy *namespaces, - const struct ctl_path *path, struct ctl_table *table) -{ - struct ctl_table_header *header; - struct ctl_table *new, **prevp; - unsigned int n, npath; - struct ctl_table_set *set; - - /* Count the path components */ - for (npath = 0; path[npath].procname; ++npath) - ; - - /* - * For each path component, allocate a 2-element ctl_table array. - * The first array element will be filled with the sysctl entry - * for this, the second will be the sentinel (procname == 0). - * - * We allocate everything in one go so that we don't have to - * worry about freeing additional memory in unregister_sysctl_table. - */ - header = kzalloc(sizeof(struct ctl_table_header) + - (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL); - if (!header) - return NULL; - - new = (struct ctl_table *) (header + 1); - - /* Now connect the dots */ - prevp = &header->ctl_table; - for (n = 0; n < npath; ++n, ++path) { - /* Copy the procname */ - new->procname = path->procname; - new->mode = 0555; - - *prevp = new; - prevp = &new->child; - - new += 2; - } - *prevp = table; - header->ctl_table_arg = table; - - INIT_LIST_HEAD(&header->ctl_entry); - header->used = 0; - header->unregistering = NULL; - header->root = root; - sysctl_set_parent(NULL, header->ctl_table); - header->count = 1; -#ifdef CONFIG_SYSCTL_SYSCALL_CHECK - if (sysctl_check_table(namespaces, header->ctl_table)) { - kfree(header); - return NULL; - } -#endif - spin_lock(&sysctl_lock); - header->set = lookup_header_set(root, namespaces); - header->attached_by = header->ctl_table; - header->attached_to = root_table; - header->parent = &root_table_header; - for (set = header->set; set; set = set->parent) { - struct ctl_table_header *p; - list_for_each_entry(p, &set->list, ctl_entry) { - if (p->unregistering) - continue; - try_attach(p, header); - } - } - header->parent->count++; - list_add_tail(&header->ctl_entry, &header->set->list); - spin_unlock(&sysctl_lock); - - return header; -} - -/** - * register_sysctl_table_path - register a sysctl table hierarchy - * @path: The path to the directory the sysctl table is in. - * @table: the top-level table structure - * - * Register a sysctl table hierarchy. @table should be a filled in ctl_table - * array. A completely 0 filled entry terminates the table. - * - * See __register_sysctl_paths for more details. - */ -struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, - struct ctl_table *table) -{ - return __register_sysctl_paths(&sysctl_table_root, current->nsproxy, - path, table); -} - -/** - * register_sysctl_table - register a sysctl table hierarchy - * @table: the top-level table structure - * - * Register a sysctl table hierarchy. @table should be a filled in ctl_table - * array. A completely 0 filled entry terminates the table. - * - * See register_sysctl_paths for more details. - */ -struct ctl_table_header *register_sysctl_table(struct ctl_table *table) -{ - static const struct ctl_path null_path[] = { {} }; - - return register_sysctl_paths(null_path, table); -} - -/** - * unregister_sysctl_table - unregister a sysctl table hierarchy - * @header: the header returned from register_sysctl_table - * - * Unregisters the sysctl table and all children. proc entries may not - * actually be removed until they are no longer used by anyone. - */ -void unregister_sysctl_table(struct ctl_table_header * header) -{ - might_sleep(); - - if (header == NULL) - return; - - spin_lock(&sysctl_lock); - start_unregistering(header); - if (!--header->parent->count) { - WARN_ON(1); - kfree(header->parent); - } - if (!--header->count) - kfree(header); - spin_unlock(&sysctl_lock); -} - -int sysctl_is_seen(struct ctl_table_header *p) -{ - struct ctl_table_set *set = p->set; - int res; - spin_lock(&sysctl_lock); - if (p->unregistering) - res = 0; - else if (!set->is_seen) - res = 1; - else - res = set->is_seen(set); - spin_unlock(&sysctl_lock); - return res; -} - -void setup_sysctl_set(struct ctl_table_set *p, - struct ctl_table_set *parent, - int (*is_seen)(struct ctl_table_set *)) -{ - INIT_LIST_HEAD(&p->list); - p->parent = parent ? parent : &sysctl_table_root.default_set; - p->is_seen = is_seen; -} - -#else /* !CONFIG_SYSCTL */ -struct ctl_table_header *register_sysctl_table(struct ctl_table * table) -{ - return NULL; -} - -struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, - struct ctl_table *table) -{ - return NULL; -} - -void unregister_sysctl_table(struct ctl_table_header * table) -{ -} - -void setup_sysctl_set(struct ctl_table_set *p, - struct ctl_table_set *parent, - int (*is_seen)(struct ctl_table_set *)) -{ -} - -void sysctl_head_put(struct ctl_table_header *head) -{ -} - #endif /* CONFIG_SYSCTL */ /* @@ -1957,8 +1725,8 @@ void sysctl_head_put(struct ctl_table_header *head) #ifdef CONFIG_PROC_SYSCTL -static int _proc_do_string(void* data, int maxlen, int write, - void __user *buffer, +static int _proc_do_string(char *data, int maxlen, int write, + char __user *buffer, size_t *lenp, loff_t *ppos) { size_t len; @@ -1971,21 +1739,30 @@ static int _proc_do_string(void* data, int maxlen, int write, } if (write) { - len = 0; + if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) { + /* Only continue writes not past the end of buffer. */ + len = strlen(data); + if (len > maxlen - 1) + len = maxlen - 1; + + if (*ppos > len) + return 0; + len = *ppos; + } else { + /* Start writing from beginning of buffer. */ + len = 0; + } + + *ppos += *lenp; p = buffer; - while (len < *lenp) { + while ((p - buffer) < *lenp && len < maxlen - 1) { if (get_user(c, p++)) return -EFAULT; if (c == 0 || c == '\n') break; - len++; + data[len++] = c; } - if (len >= maxlen) - len = maxlen-1; - if(copy_from_user(data, buffer, len)) - return -EFAULT; - ((char *) data)[len] = 0; - *ppos += *lenp; + data[len] = 0; } else { len = strlen(data); if (len > maxlen) @@ -2002,10 +1779,10 @@ static int _proc_do_string(void* data, int maxlen, int write, if (len > *lenp) len = *lenp; if (len) - if(copy_to_user(buffer, data, len)) + if (copy_to_user(buffer, data, len)) return -EFAULT; if (len < *lenp) { - if(put_user('\n', ((char __user *) buffer) + len)) + if (put_user('\n', buffer + len)) return -EFAULT; len++; } @@ -2015,6 +1792,14 @@ static int _proc_do_string(void* data, int maxlen, int write, return 0; } +static void warn_sysctl_write(struct ctl_table *table) +{ + pr_warn_once("%s wrote to %s when file position was not 0!\n" + "This will not be supported in the future. To silence this\n" + "warning, set kernel.sysctl_writes_strict = -1\n", + current->comm, table->procname); +} + /** * proc_dostring - read a string sysctl * @table: the sysctl table @@ -2035,12 +1820,139 @@ static int _proc_do_string(void* data, int maxlen, int write, int proc_dostring(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - return _proc_do_string(table->data, table->maxlen, write, - buffer, lenp, ppos); + if (write && *ppos && sysctl_writes_strict == SYSCTL_WRITES_WARN) + warn_sysctl_write(table); + + return _proc_do_string((char *)(table->data), table->maxlen, write, + (char __user *)buffer, lenp, ppos); +} + +static size_t proc_skip_spaces(char **buf) +{ + size_t ret; + char *tmp = skip_spaces(*buf); + ret = tmp - *buf; + *buf = tmp; + return ret; +} + +static void proc_skip_char(char **buf, size_t *size, const char v) +{ + while (*size) { + if (**buf != v) + break; + (*size)--; + (*buf)++; + } +} + +#define TMPBUFLEN 22 +/** + * proc_get_long - reads an ASCII formatted integer from a user buffer + * + * @buf: a kernel buffer + * @size: size of the kernel buffer + * @val: this is where the number will be stored + * @neg: set to %TRUE if number is negative + * @perm_tr: a vector which contains the allowed trailers + * @perm_tr_len: size of the perm_tr vector + * @tr: pointer to store the trailer character + * + * In case of success %0 is returned and @buf and @size are updated with + * the amount of bytes read. If @tr is non-NULL and a trailing + * character exists (size is non-zero after returning from this + * function), @tr is updated with the trailing character. + */ +static int proc_get_long(char **buf, size_t *size, + unsigned long *val, bool *neg, + const char *perm_tr, unsigned perm_tr_len, char *tr) +{ + int len; + char *p, tmp[TMPBUFLEN]; + + if (!*size) + return -EINVAL; + + len = *size; + if (len > TMPBUFLEN - 1) + len = TMPBUFLEN - 1; + + memcpy(tmp, *buf, len); + + tmp[len] = 0; + p = tmp; + if (*p == '-' && *size > 1) { + *neg = true; + p++; + } else + *neg = false; + if (!isdigit(*p)) + return -EINVAL; + + *val = simple_strtoul(p, &p, 0); + + len = p - tmp; + + /* We don't know if the next char is whitespace thus we may accept + * invalid integers (e.g. 1234...a) or two integers instead of one + * (e.g. 123...1). So lets not allow such large numbers. */ + if (len == TMPBUFLEN - 1) + return -EINVAL; + + if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len)) + return -EINVAL; + + if (tr && (len < *size)) + *tr = *p; + + *buf += len; + *size -= len; + + return 0; +} + +/** + * proc_put_long - converts an integer to a decimal ASCII formatted string + * + * @buf: the user buffer + * @size: the size of the user buffer + * @val: the integer to be converted + * @neg: sign of the number, %TRUE for negative + * + * In case of success %0 is returned and @buf and @size are updated with + * the amount of bytes written. + */ +static int proc_put_long(void __user **buf, size_t *size, unsigned long val, + bool neg) +{ + int len; + char tmp[TMPBUFLEN], *p = tmp; + + sprintf(p, "%s%lu", neg ? "-" : "", val); + len = strlen(tmp); + if (len > *size) + len = *size; + if (copy_to_user(*buf, tmp, len)) + return -EFAULT; + *size -= len; + *buf += len; + return 0; } +#undef TMPBUFLEN +static int proc_put_char(void __user **buf, size_t *size, char c) +{ + if (*size) { + char __user **buffer = (char __user **)buf; + if (put_user(c, *buffer)) + return -EFAULT; + (*size)--, (*buffer)++; + *buf = *buffer; + } + return 0; +} -static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp, +static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp, int *valp, int write, void *data) { @@ -2049,33 +1961,31 @@ static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp, } else { int val = *valp; if (val < 0) { - *negp = -1; + *negp = true; *lvalp = (unsigned long)-val; } else { - *negp = 0; + *negp = false; *lvalp = (unsigned long)val; } } return 0; } +static const char proc_wspace_sep[] = { ' ', '\t', '\n' }; + static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos, - int (*conv)(int *negp, unsigned long *lvalp, int *valp, + int (*conv)(bool *negp, unsigned long *lvalp, int *valp, int write, void *data), void *data) { -#define TMPBUFLEN 21 - int *i, vleft, first = 1, neg; - unsigned long lval; - size_t left, len; + int *i, vleft, first = 1, err = 0; + unsigned long page = 0; + size_t left; + char *kbuf; - char buf[TMPBUFLEN], *p; - char __user *s = buffer; - - if (!tbl_data || !table->maxlen || !*lenp || - (*ppos && !write)) { + if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) { *lenp = 0; return 0; } @@ -2087,89 +1997,84 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, if (!conv) conv = do_proc_dointvec_conv; - for (; left && vleft--; i++, first=0) { - if (write) { - while (left) { - char c; - if (get_user(c, s)) - return -EFAULT; - if (!isspace(c)) - break; - left--; - s++; - } - if (!left) + if (write) { + if (*ppos) { + switch (sysctl_writes_strict) { + case SYSCTL_WRITES_STRICT: + goto out; + case SYSCTL_WRITES_WARN: + warn_sysctl_write(table); break; - neg = 0; - len = left; - if (len > sizeof(buf) - 1) - len = sizeof(buf) - 1; - if (copy_from_user(buf, s, len)) - return -EFAULT; - buf[len] = 0; - p = buf; - if (*p == '-' && left > 1) { - neg = 1; - p++; - } - if (*p < '0' || *p > '9') + default: break; + } + } - lval = simple_strtoul(p, &p, 0); + if (left > PAGE_SIZE - 1) + left = PAGE_SIZE - 1; + page = __get_free_page(GFP_TEMPORARY); + kbuf = (char *) page; + if (!kbuf) + return -ENOMEM; + if (copy_from_user(kbuf, buffer, left)) { + err = -EFAULT; + goto free; + } + kbuf[left] = 0; + } - len = p-buf; - if ((len < left) && *p && !isspace(*p)) - break; - s += len; - left -= len; + for (; left && vleft--; i++, first=0) { + unsigned long lval; + bool neg; + + if (write) { + left -= proc_skip_spaces(&kbuf); - if (conv(&neg, &lval, i, 1, data)) + if (!left) + break; + err = proc_get_long(&kbuf, &left, &lval, &neg, + proc_wspace_sep, + sizeof(proc_wspace_sep), NULL); + if (err) + break; + if (conv(&neg, &lval, i, 1, data)) { + err = -EINVAL; break; + } } else { - p = buf; + if (conv(&neg, &lval, i, 0, data)) { + err = -EINVAL; + break; + } if (!first) - *p++ = '\t'; - - if (conv(&neg, &lval, i, 0, data)) + err = proc_put_char(&buffer, &left, '\t'); + if (err) + break; + err = proc_put_long(&buffer, &left, lval, neg); + if (err) break; - - sprintf(p, "%s%lu", neg ? "-" : "", lval); - len = strlen(buf); - if (len > left) - len = left; - if(copy_to_user(s, buf, len)) - return -EFAULT; - left -= len; - s += len; } } - if (!write && !first && left) { - if(put_user('\n', s)) - return -EFAULT; - left--, s++; - } + if (!write && !first && left && !err) + err = proc_put_char(&buffer, &left, '\n'); + if (write && !err && left) + left -= proc_skip_spaces(&kbuf); +free: if (write) { - while (left) { - char c; - if (get_user(c, s++)) - return -EFAULT; - if (!isspace(c)) - break; - left--; - } + free_page(page); + if (first) + return err ? : -EINVAL; } - if (write && first) - return -EINVAL; *lenp -= left; +out: *ppos += *lenp; - return 0; -#undef TMPBUFLEN + return err; } static int do_proc_dointvec(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos, - int (*conv)(int *negp, unsigned long *lvalp, int *valp, + int (*conv)(bool *negp, unsigned long *lvalp, int *valp, int write, void *data), void *data) { @@ -2225,20 +2130,31 @@ static int proc_taint(struct ctl_table *table, int write, int i; for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) { if ((tmptaint >> i) & 1) - add_taint(i); + add_taint(i, LOCKDEP_STILL_OK); } } return err; } +#ifdef CONFIG_PRINTK +static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + return proc_dointvec_minmax(table, write, buffer, lenp, ppos); +} +#endif + struct do_proc_dointvec_minmax_conv_param { int *min; int *max; }; -static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, - int *valp, +static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp, + int *valp, int write, void *data) { struct do_proc_dointvec_minmax_conv_param *param = data; @@ -2251,10 +2167,10 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, } else { int val = *valp; if (val < 0) { - *negp = -1; + *negp = true; *lvalp = (unsigned long)-val; } else { - *negp = 0; + *negp = false; *lvalp = (unsigned long)val; } } @@ -2288,108 +2204,132 @@ int proc_dointvec_minmax(struct ctl_table *table, int write, do_proc_dointvec_minmax_conv, ¶m); } +static void validate_coredump_safety(void) +{ +#ifdef CONFIG_COREDUMP + if (suid_dumpable == SUID_DUMP_ROOT && + core_pattern[0] != '/' && core_pattern[0] != '|') { + printk(KERN_WARNING "Unsafe core_pattern used with "\ + "suid_dumpable=2. Pipe handler or fully qualified "\ + "core dump path required.\n"); + } +#endif +} + +static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (!error) + validate_coredump_safety(); + return error; +} + +#ifdef CONFIG_COREDUMP +static int proc_dostring_coredump(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int error = proc_dostring(table, write, buffer, lenp, ppos); + if (!error) + validate_coredump_safety(); + return error; +} +#endif + static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul, unsigned long convdiv) { -#define TMPBUFLEN 21 - unsigned long *i, *min, *max, val; - int vleft, first=1, neg; - size_t len, left; - char buf[TMPBUFLEN], *p; - char __user *s = buffer; - - if (!data || !table->maxlen || !*lenp || - (*ppos && !write)) { + unsigned long *i, *min, *max; + int vleft, first = 1, err = 0; + unsigned long page = 0; + size_t left; + char *kbuf; + + if (!data || !table->maxlen || !*lenp || (*ppos && !write)) { *lenp = 0; return 0; } - + i = (unsigned long *) data; min = (unsigned long *) table->extra1; max = (unsigned long *) table->extra2; vleft = table->maxlen / sizeof(unsigned long); left = *lenp; - - for (; left && vleft--; i++, min++, max++, first=0) { - if (write) { - while (left) { - char c; - if (get_user(c, s)) - return -EFAULT; - if (!isspace(c)) - break; - left--; - s++; - } - if (!left) + + if (write) { + if (*ppos) { + switch (sysctl_writes_strict) { + case SYSCTL_WRITES_STRICT: + goto out; + case SYSCTL_WRITES_WARN: + warn_sysctl_write(table); break; - neg = 0; - len = left; - if (len > TMPBUFLEN-1) - len = TMPBUFLEN-1; - if (copy_from_user(buf, s, len)) - return -EFAULT; - buf[len] = 0; - p = buf; - if (*p == '-' && left > 1) { - neg = 1; - p++; - } - if (*p < '0' || *p > '9') + default: break; - val = simple_strtoul(p, &p, 0) * convmul / convdiv ; - len = p-buf; - if ((len < left) && *p && !isspace(*p)) + } + } + + if (left > PAGE_SIZE - 1) + left = PAGE_SIZE - 1; + page = __get_free_page(GFP_TEMPORARY); + kbuf = (char *) page; + if (!kbuf) + return -ENOMEM; + if (copy_from_user(kbuf, buffer, left)) { + err = -EFAULT; + goto free; + } + kbuf[left] = 0; + } + + for (; left && vleft--; i++, first = 0) { + unsigned long val; + + if (write) { + bool neg; + + left -= proc_skip_spaces(&kbuf); + + err = proc_get_long(&kbuf, &left, &val, &neg, + proc_wspace_sep, + sizeof(proc_wspace_sep), NULL); + if (err) break; if (neg) - val = -val; - s += len; - left -= len; - - if(neg) continue; if ((min && val < *min) || (max && val > *max)) continue; *i = val; } else { - p = buf; - if (!first) - *p++ = '\t'; - sprintf(p, "%lu", convdiv * (*i) / convmul); - len = strlen(buf); - if (len > left) - len = left; - if(copy_to_user(s, buf, len)) - return -EFAULT; - left -= len; - s += len; + val = convdiv * (*i) / convmul; + if (!first) { + err = proc_put_char(&buffer, &left, '\t'); + if (err) + break; + } + err = proc_put_long(&buffer, &left, val, false); + if (err) + break; } } - if (!write && !first && left) { - if(put_user('\n', s)) - return -EFAULT; - left--, s++; - } + if (!write && !first && left && !err) + err = proc_put_char(&buffer, &left, '\n'); + if (write && !err) + left -= proc_skip_spaces(&kbuf); +free: if (write) { - while (left) { - char c; - if (get_user(c, s++)) - return -EFAULT; - if (!isspace(c)) - break; - left--; - } + free_page(page); + if (first) + return err ? : -EINVAL; } - if (write && first) - return -EINVAL; *lenp -= left; +out: *ppos += *lenp; - return 0; -#undef TMPBUFLEN + return err; } static int do_proc_doulongvec_minmax(struct ctl_table *table, int write, @@ -2450,7 +2390,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, } -static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp, +static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp, int *valp, int write, void *data) { @@ -2462,10 +2402,10 @@ static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp, int val = *valp; unsigned long lval; if (val < 0) { - *negp = -1; + *negp = true; lval = (unsigned long)-val; } else { - *negp = 0; + *negp = false; lval = (unsigned long)val; } *lvalp = lval / HZ; @@ -2473,7 +2413,7 @@ static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp, return 0; } -static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp, +static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp, int *valp, int write, void *data) { @@ -2485,10 +2425,10 @@ static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp, int val = *valp; unsigned long lval; if (val < 0) { - *negp = -1; + *negp = true; lval = (unsigned long)-val; } else { - *negp = 0; + *negp = false; lval = (unsigned long)val; } *lvalp = jiffies_to_clock_t(lval); @@ -2496,20 +2436,24 @@ static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp, return 0; } -static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp, +static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp, int *valp, int write, void *data) { if (write) { - *valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp); + unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp); + + if (jif > INT_MAX) + return 1; + *valp = (int)jif; } else { int val = *valp; unsigned long lval; if (val < 0) { - *negp = -1; + *negp = true; lval = (unsigned long)-val; } else { - *negp = 0; + *negp = false; lval = (unsigned long)val; } *lvalp = jiffies_to_msecs(lval); @@ -2606,7 +2550,155 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, return 0; } -#else /* CONFIG_PROC_FS */ +/** + * proc_do_large_bitmap - read/write from/to a large bitmap + * @table: the sysctl table + * @write: %TRUE if this is a write to the sysctl file + * @buffer: the user buffer + * @lenp: the size of the user buffer + * @ppos: file position + * + * The bitmap is stored at table->data and the bitmap length (in bits) + * in table->maxlen. + * + * We use a range comma separated format (e.g. 1,3-4,10-10) so that + * large bitmaps may be represented in a compact manner. Writing into + * the file will clear the bitmap then update it with the given input. + * + * Returns 0 on success. + */ +int proc_do_large_bitmap(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int err = 0; + bool first = 1; + size_t left = *lenp; + unsigned long bitmap_len = table->maxlen; + unsigned long *bitmap = *(unsigned long **) table->data; + unsigned long *tmp_bitmap = NULL; + char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c; + + if (!bitmap || !bitmap_len || !left || (*ppos && !write)) { + *lenp = 0; + return 0; + } + + if (write) { + unsigned long page = 0; + char *kbuf; + + if (left > PAGE_SIZE - 1) + left = PAGE_SIZE - 1; + + page = __get_free_page(GFP_TEMPORARY); + kbuf = (char *) page; + if (!kbuf) + return -ENOMEM; + if (copy_from_user(kbuf, buffer, left)) { + free_page(page); + return -EFAULT; + } + kbuf[left] = 0; + + tmp_bitmap = kzalloc(BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long), + GFP_KERNEL); + if (!tmp_bitmap) { + free_page(page); + return -ENOMEM; + } + proc_skip_char(&kbuf, &left, '\n'); + while (!err && left) { + unsigned long val_a, val_b; + bool neg; + + err = proc_get_long(&kbuf, &left, &val_a, &neg, tr_a, + sizeof(tr_a), &c); + if (err) + break; + if (val_a >= bitmap_len || neg) { + err = -EINVAL; + break; + } + + val_b = val_a; + if (left) { + kbuf++; + left--; + } + + if (c == '-') { + err = proc_get_long(&kbuf, &left, &val_b, + &neg, tr_b, sizeof(tr_b), + &c); + if (err) + break; + if (val_b >= bitmap_len || neg || + val_a > val_b) { + err = -EINVAL; + break; + } + if (left) { + kbuf++; + left--; + } + } + + bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1); + first = 0; + proc_skip_char(&kbuf, &left, '\n'); + } + free_page(page); + } else { + unsigned long bit_a, bit_b = 0; + + while (left) { + bit_a = find_next_bit(bitmap, bitmap_len, bit_b); + if (bit_a >= bitmap_len) + break; + bit_b = find_next_zero_bit(bitmap, bitmap_len, + bit_a + 1) - 1; + + if (!first) { + err = proc_put_char(&buffer, &left, ','); + if (err) + break; + } + err = proc_put_long(&buffer, &left, bit_a, false); + if (err) + break; + if (bit_a != bit_b) { + err = proc_put_char(&buffer, &left, '-'); + if (err) + break; + err = proc_put_long(&buffer, &left, bit_b, false); + if (err) + break; + } + + first = 0; bit_b++; + } + if (!err) + err = proc_put_char(&buffer, &left, '\n'); + } + + if (!err) { + if (write) { + if (*ppos) + bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len); + else + bitmap_copy(bitmap, tmp_bitmap, bitmap_len); + } + kfree(tmp_bitmap); + *lenp -= left; + *ppos += *lenp; + return 0; + } else { + kfree(tmp_bitmap); + return err; + } +} + +#else /* CONFIG_PROC_SYSCTL */ int proc_dostring(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -2658,7 +2750,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, } -#endif /* CONFIG_PROC_FS */ +#endif /* CONFIG_PROC_SYSCTL */ /* * No sense putting this after each symbol definition, twice, @@ -2672,6 +2764,3 @@ EXPORT_SYMBOL(proc_dointvec_ms_jiffies); EXPORT_SYMBOL(proc_dostring); EXPORT_SYMBOL(proc_doulongvec_minmax); EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax); -EXPORT_SYMBOL(register_sysctl_table); -EXPORT_SYMBOL(register_sysctl_paths); -EXPORT_SYMBOL(unregister_sysctl_table); |
