diff options
Diffstat (limited to 'fs/proc')
| -rw-r--r-- | fs/proc/Kconfig | 4 | ||||
| -rw-r--r-- | fs/proc/Makefile | 1 | ||||
| -rw-r--r-- | fs/proc/array.c | 24 | ||||
| -rw-r--r-- | fs/proc/base.c | 139 | ||||
| -rw-r--r-- | fs/proc/cmdline.c | 2 | ||||
| -rw-r--r-- | fs/proc/consoles.c | 12 | ||||
| -rw-r--r-- | fs/proc/cpuinfo.c | 2 | ||||
| -rw-r--r-- | fs/proc/devices.c | 2 | ||||
| -rw-r--r-- | fs/proc/fd.c | 10 | ||||
| -rw-r--r-- | fs/proc/generic.c | 23 | ||||
| -rw-r--r-- | fs/proc/inode.c | 32 | ||||
| -rw-r--r-- | fs/proc/internal.h | 7 | ||||
| -rw-r--r-- | fs/proc/interrupts.c | 2 | ||||
| -rw-r--r-- | fs/proc/kcore.c | 5 | ||||
| -rw-r--r-- | fs/proc/kmsg.c | 2 | ||||
| -rw-r--r-- | fs/proc/loadavg.c | 2 | ||||
| -rw-r--r-- | fs/proc/meminfo.c | 52 | ||||
| -rw-r--r-- | fs/proc/namespaces.c | 22 | ||||
| -rw-r--r-- | fs/proc/nommu.c | 14 | ||||
| -rw-r--r-- | fs/proc/page.c | 9 | ||||
| -rw-r--r-- | fs/proc/proc_devtree.c | 243 | ||||
| -rw-r--r-- | fs/proc/root.c | 15 | ||||
| -rw-r--r-- | fs/proc/self.c | 12 | ||||
| -rw-r--r-- | fs/proc/softirqs.c | 2 | ||||
| -rw-r--r-- | fs/proc/stat.c | 26 | ||||
| -rw-r--r-- | fs/proc/task_mmu.c | 155 | ||||
| -rw-r--r-- | fs/proc/task_nommu.c | 19 | ||||
| -rw-r--r-- | fs/proc/uptime.c | 4 | ||||
| -rw-r--r-- | fs/proc/version.c | 2 | ||||
| -rw-r--r-- | fs/proc/vmcore.c | 181 |
30 files changed, 460 insertions, 565 deletions
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index 15af6222f8a..2183fcf41d5 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -31,6 +31,10 @@ config PROC_FS config PROC_KCORE bool "/proc/kcore support" if !ARM depends on PROC_FS && MMU + help + Provides a virtual ELF core file of the live kernel. This can + be read with gdb and other ELF tools. No modifications can be + made using this mechanism. config PROC_VMCORE bool "/proc/vmcore support" diff --git a/fs/proc/Makefile b/fs/proc/Makefile index ab30716584f..239493ec718 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -27,6 +27,5 @@ proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o proc-$(CONFIG_NET) += proc_net.o proc-$(CONFIG_PROC_KCORE) += kcore.o proc-$(CONFIG_PROC_VMCORE) += vmcore.o -proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o proc-$(CONFIG_PRINTK) += kmsg.o proc-$(CONFIG_PROC_PAGE_MONITOR) += page.o diff --git a/fs/proc/array.c b/fs/proc/array.c index cbd0f1b324b..64db2bceac5 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -138,26 +138,17 @@ static const char * const task_state_array[] = { "D (disk sleep)", /* 2 */ "T (stopped)", /* 4 */ "t (tracing stop)", /* 8 */ - "Z (zombie)", /* 16 */ - "X (dead)", /* 32 */ - "x (dead)", /* 64 */ - "K (wakekill)", /* 128 */ - "W (waking)", /* 256 */ - "P (parked)", /* 512 */ + "X (dead)", /* 16 */ + "Z (zombie)", /* 32 */ }; static inline const char *get_task_state(struct task_struct *tsk) { - unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state; - const char * const *p = &task_state_array[0]; + unsigned int state = (tsk->state | tsk->exit_state) & TASK_REPORT; - BUILD_BUG_ON(1 + ilog2(TASK_STATE_MAX) != ARRAY_SIZE(task_state_array)); + BUILD_BUG_ON(1 + ilog2(TASK_REPORT) != ARRAY_SIZE(task_state_array)-1); - while (state) { - p++; - state >>= 1; - } - return *p; + return task_state_array[fls(state)]; } static inline void task_state(struct seq_file *m, struct pid_namespace *ns, @@ -183,6 +174,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, seq_printf(m, "State:\t%s\n" "Tgid:\t%d\n" + "Ngid:\t%d\n" "Pid:\t%d\n" "PPid:\t%d\n" "TracerPid:\t%d\n" @@ -190,6 +182,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, "Gid:\t%d\t%d\t%d\t%d\n", get_task_state(p), task_tgid_nr_ns(p, ns), + task_numa_group_id(p), pid_nr_ns(pid, ns), ppid, tpid, from_kuid_munged(user_ns, cred->uid), @@ -451,8 +444,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, min_flt += t->min_flt; maj_flt += t->maj_flt; gtime += task_gtime(t); - t = next_thread(t); - } while (t != task); + } while_each_thread(task, t); min_flt += sig->min_flt; maj_flt += sig->maj_flt; diff --git a/fs/proc/base.c b/fs/proc/base.c index 1485e38daaa..2d696b0c93b 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -200,41 +200,9 @@ static int proc_root_link(struct dentry *dentry, struct path *path) return result; } -static int proc_pid_cmdline(struct task_struct *task, char * buffer) +static int proc_pid_cmdline(struct task_struct *task, char *buffer) { - int res = 0; - unsigned int len; - struct mm_struct *mm = get_task_mm(task); - if (!mm) - goto out; - if (!mm->arg_end) - goto out_mm; /* Shh! No looking before we're done */ - - len = mm->arg_end - mm->arg_start; - - if (len > PAGE_SIZE) - len = PAGE_SIZE; - - res = access_process_vm(task, mm->arg_start, buffer, len, 0); - - // If the nul at the end of args has been overwritten, then - // assume application is using setproctitle(3). - if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) { - len = strnlen(buffer, res); - if (len < res) { - res = len; - } else { - len = mm->env_end - mm->env_start; - if (len > PAGE_SIZE - res) - len = PAGE_SIZE - res; - res += access_process_vm(task, mm->env_start, buffer+res, len, 0); - res = strnlen(buffer, res); - } - } -out_mm: - mmput(mm); -out: - return res; + return get_cmdline(task, buffer, PAGE_SIZE); } static int proc_pid_auxv(struct task_struct *task, char *buffer) @@ -1151,10 +1119,16 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, goto out_free_page; } - kloginuid = make_kuid(file->f_cred->user_ns, loginuid); - if (!uid_valid(kloginuid)) { - length = -EINVAL; - goto out_free_page; + + /* is userspace tring to explicitly UNSET the loginuid? */ + if (loginuid == AUDIT_UID_UNSET) { + kloginuid = INVALID_UID; + } else { + kloginuid = make_kuid(file->f_cred->user_ns, loginuid); + if (!uid_valid(kloginuid)) { + length = -EINVAL; + goto out_free_page; + } } length = audit_set_loginuid(kloginuid); @@ -1230,6 +1204,9 @@ static ssize_t proc_fault_inject_write(struct file * file, make_it_fail = simple_strtol(strstrip(buffer), &end, 0); if (*end) return -EINVAL; + if (make_it_fail < 0 || make_it_fail > 1) + return -EINVAL; + task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; @@ -1652,13 +1629,18 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags) return 0; } +static inline bool proc_inode_is_dead(struct inode *inode) +{ + return !proc_pid(inode)->tasks[PIDTYPE_PID].first; +} + int pid_delete_dentry(const struct dentry *dentry) { /* Is the task we represent dead? * If so, then don't put the dentry on the lru list, * kill it immediately. */ - return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; + return proc_inode_is_dead(dentry->d_inode); } const struct dentry_operations pid_dentry_operations = @@ -1813,6 +1795,7 @@ static int proc_map_files_get_link(struct dentry *dentry, struct path *path) if (rc) goto out_mmput; + rc = -ENOENT; down_read(&mm->mmap_sem); vma = find_exact_vma(mm, vm_start, vm_end); if (vma && vma->vm_file) { @@ -2576,7 +2559,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("environ", S_IRUSR, proc_environ_operations), INF("auxv", S_IRUSR, proc_pid_auxv), ONE("status", S_IRUGO, proc_pid_status), - ONE("personality", S_IRUGO, proc_pid_personality), + ONE("personality", S_IRUSR, proc_pid_personality), INF("limits", S_IRUGO, proc_pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), @@ -2586,7 +2569,7 @@ static const struct pid_entry tgid_base_stuff[] = { #endif REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), #ifdef CONFIG_HAVE_ARCH_TRACEHOOK - INF("syscall", S_IRUGO, proc_pid_syscall), + INF("syscall", S_IRUSR, proc_pid_syscall), #endif INF("cmdline", S_IRUGO, proc_pid_cmdline), ONE("stat", S_IRUGO, proc_tgid_stat), @@ -2605,7 +2588,7 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, proc_clear_refs_operations), REG("smaps", S_IRUGO, proc_pid_smaps_operations), - REG("pagemap", S_IRUGO, proc_pagemap_operations), + REG("pagemap", S_IRUSR, proc_pagemap_operations), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), @@ -2614,7 +2597,7 @@ static const struct pid_entry tgid_base_stuff[] = { INF("wchan", S_IRUGO, proc_pid_wchan), #endif #ifdef CONFIG_STACKTRACE - ONE("stack", S_IRUGO, proc_pid_stack), + ONE("stack", S_IRUSR, proc_pid_stack), #endif #ifdef CONFIG_SCHEDSTATS INF("schedstat", S_IRUGO, proc_pid_schedstat), @@ -2915,14 +2898,14 @@ static const struct pid_entry tid_base_stuff[] = { REG("environ", S_IRUSR, proc_environ_operations), INF("auxv", S_IRUSR, proc_pid_auxv), ONE("status", S_IRUGO, proc_pid_status), - ONE("personality", S_IRUGO, proc_pid_personality), + ONE("personality", S_IRUSR, proc_pid_personality), INF("limits", S_IRUGO, proc_pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), #endif REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), #ifdef CONFIG_HAVE_ARCH_TRACEHOOK - INF("syscall", S_IRUGO, proc_pid_syscall), + INF("syscall", S_IRUSR, proc_pid_syscall), #endif INF("cmdline", S_IRUGO, proc_pid_cmdline), ONE("stat", S_IRUGO, proc_tid_stat), @@ -2943,7 +2926,7 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, proc_clear_refs_operations), REG("smaps", S_IRUGO, proc_tid_smaps_operations), - REG("pagemap", S_IRUGO, proc_pagemap_operations), + REG("pagemap", S_IRUSR, proc_pagemap_operations), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), @@ -2952,7 +2935,7 @@ static const struct pid_entry tid_base_stuff[] = { INF("wchan", S_IRUGO, proc_pid_wchan), #endif #ifdef CONFIG_STACKTRACE - ONE("stack", S_IRUGO, proc_pid_stack), + ONE("stack", S_IRUSR, proc_pid_stack), #endif #ifdef CONFIG_SCHEDSTATS INF("schedstat", S_IRUGO, proc_pid_schedstat), @@ -3086,34 +3069,42 @@ out_no_task: * In the case of a seek we start with the leader and walk nr * threads past it. */ -static struct task_struct *first_tid(struct task_struct *leader, - int tid, int nr, struct pid_namespace *ns) +static struct task_struct *first_tid(struct pid *pid, int tid, loff_t f_pos, + struct pid_namespace *ns) { - struct task_struct *pos; + struct task_struct *pos, *task; + unsigned long nr = f_pos; + + if (nr != f_pos) /* 32bit overflow? */ + return NULL; rcu_read_lock(); - /* Attempt to start with the pid of a thread */ - if (tid && (nr > 0)) { + task = pid_task(pid, PIDTYPE_PID); + if (!task) + goto fail; + + /* Attempt to start with the tid of a thread */ + if (tid && nr) { pos = find_task_by_pid_ns(tid, ns); - if (pos && (pos->group_leader == leader)) + if (pos && same_thread_group(pos, task)) goto found; } /* If nr exceeds the number of threads there is nothing todo */ - pos = NULL; - if (nr && nr >= get_nr_threads(leader)) - goto out; + if (nr >= get_nr_threads(task)) + goto fail; /* If we haven't found our starting place yet start * with the leader and walk nr threads forward. */ - for (pos = leader; nr > 0; --nr) { - pos = next_thread(pos); - if (pos == leader) { - pos = NULL; - goto out; - } - } + pos = task = task->group_leader; + do { + if (!nr--) + goto found; + } while_each_thread(task, pos); +fail: + pos = NULL; + goto out; found: get_task_struct(pos); out: @@ -3146,25 +3137,16 @@ static struct task_struct *next_tid(struct task_struct *start) /* for the /proc/TGID/task/ directories */ static int proc_task_readdir(struct file *file, struct dir_context *ctx) { - struct task_struct *leader = NULL; - struct task_struct *task = get_proc_task(file_inode(file)); + struct inode *inode = file_inode(file); + struct task_struct *task; struct pid_namespace *ns; int tid; - if (!task) - return -ENOENT; - rcu_read_lock(); - if (pid_alive(task)) { - leader = task->group_leader; - get_task_struct(leader); - } - rcu_read_unlock(); - put_task_struct(task); - if (!leader) + if (proc_inode_is_dead(inode)) return -ENOENT; if (!dir_emit_dots(file, ctx)) - goto out; + return 0; /* f_version caches the tgid value that the last readdir call couldn't * return. lseek aka telldir automagically resets f_version to 0. @@ -3172,7 +3154,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx) ns = file->f_dentry->d_sb->s_fs_info; tid = (int)file->f_version; file->f_version = 0; - for (task = first_tid(leader, tid, ctx->pos - 2, ns); + for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns); task; task = next_tid(task), ctx->pos++) { char name[PROC_NUMBUF]; @@ -3188,8 +3170,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx) break; } } -out: - put_task_struct(leader); + return 0; } diff --git a/fs/proc/cmdline.c b/fs/proc/cmdline.c index 82676e3fcd1..cbd82dff7e8 100644 --- a/fs/proc/cmdline.c +++ b/fs/proc/cmdline.c @@ -26,4 +26,4 @@ static int __init proc_cmdline_init(void) proc_create("cmdline", 0, NULL, &cmdline_proc_fops); return 0; } -module_init(proc_cmdline_init); +fs_initcall(proc_cmdline_init); diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c index b701eaa482b..290ba85cb90 100644 --- a/fs/proc/consoles.c +++ b/fs/proc/consoles.c @@ -29,7 +29,6 @@ static int show_console_dev(struct seq_file *m, void *v) char flags[ARRAY_SIZE(con_flags) + 1]; struct console *con = v; unsigned int a; - int len; dev_t dev = 0; if (con->device) { @@ -47,11 +46,10 @@ static int show_console_dev(struct seq_file *m, void *v) con_flags[a].name : ' '; flags[a] = 0; - seq_printf(m, "%s%d%n", con->name, con->index, &len); - len = 21 - len; - if (len < 1) - len = 1; - seq_printf(m, "%*c%c%c%c (%s)", len, ' ', con->read ? 'R' : '-', + seq_setwidth(m, 21 - 1); + seq_printf(m, "%s%d", con->name, con->index); + seq_pad(m, ' '); + seq_printf(m, "%c%c%c (%s)", con->read ? 'R' : '-', con->write ? 'W' : '-', con->unblank ? 'U' : '-', flags); if (dev) @@ -111,4 +109,4 @@ static int __init proc_consoles_init(void) proc_create("consoles", 0, NULL, &proc_consoles_operations); return 0; } -module_init(proc_consoles_init); +fs_initcall(proc_consoles_init); diff --git a/fs/proc/cpuinfo.c b/fs/proc/cpuinfo.c index 5a1e539a234..06f4d31e039 100644 --- a/fs/proc/cpuinfo.c +++ b/fs/proc/cpuinfo.c @@ -21,4 +21,4 @@ static int __init proc_cpuinfo_init(void) proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations); return 0; } -module_init(proc_cpuinfo_init); +fs_initcall(proc_cpuinfo_init); diff --git a/fs/proc/devices.c b/fs/proc/devices.c index b14347167c3..50493edc30e 100644 --- a/fs/proc/devices.c +++ b/fs/proc/devices.c @@ -67,4 +67,4 @@ static int __init proc_devices_init(void) proc_create("devices", 0, NULL, &proc_devinfo_operations); return 0; } -module_init(proc_devices_init); +fs_initcall(proc_devices_init); diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 75f2890abbd..0788d093f5d 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -11,6 +11,7 @@ #include <linux/proc_fs.h> +#include "../mount.h" #include "internal.h" #include "fd.h" @@ -48,8 +49,9 @@ static int seq_show(struct seq_file *m, void *v) } if (!ret) { - seq_printf(m, "pos:\t%lli\nflags:\t0%o\n", - (long long)file->f_pos, f_flags); + seq_printf(m, "pos:\t%lli\nflags:\t0%o\nmnt_id:\t%i\n", + (long long)file->f_pos, f_flags, + real_mount(file->f_path.mnt)->mnt_id); if (file->f_op->show_fdinfo) ret = file->f_op->show_fdinfo(m, file); fput(file); @@ -230,8 +232,6 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx, if (!dir_emit_dots(file, ctx)) goto out; - if (!dir_emit_dots(file, ctx)) - goto out; files = get_files_struct(p); if (!files) goto out; @@ -288,7 +288,7 @@ int proc_fd_permission(struct inode *inode, int mask) int rv = generic_permission(inode, mask); if (rv == 0) return 0; - if (task_pid(current) == proc_pid(inode)) + if (task_tgid(current) == proc_pid(inode)) rv = 0; return rv; } diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 94441a40733..b7f268eb5f4 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -49,8 +49,7 @@ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) setattr_copy(inode, iattr); mark_inode_dirty(inode); - de->uid = inode->i_uid; - de->gid = inode->i_gid; + proc_set_user(de, inode->i_uid, inode->i_gid); de->mode = inode->i_mode; return 0; } @@ -175,22 +174,6 @@ static const struct inode_operations proc_link_inode_operations = { }; /* - * As some entries in /proc are volatile, we want to - * get rid of unused dentries. This could be made - * smarter: we could keep a "volatile" flag in the - * inode to indicate which ones to keep. - */ -static int proc_delete_dentry(const struct dentry * dentry) -{ - return 1; -} - -static const struct dentry_operations proc_dentry_operations = -{ - .d_delete = proc_delete_dentry, -}; - -/* * Don't create negative dentries here, return -ENOENT by hand * instead. */ @@ -209,7 +192,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, inode = proc_get_inode(dir->i_sb, de); if (!inode) return ERR_PTR(-ENOMEM); - d_set_d_op(dentry, &proc_dentry_operations); + d_set_d_op(dentry, &simple_dentry_operations); d_add(dentry, inode); return NULL; } @@ -271,7 +254,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file, de = next; } while (de); spin_unlock(&proc_subdir_lock); - return 0; + return 1; } int proc_readdir(struct file *file, struct dir_context *ctx) diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 073aea60cf8..0adbc02d60e 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -35,7 +35,7 @@ static void proc_evict_inode(struct inode *inode) const struct proc_ns_operations *ns_ops; void *ns; - truncate_inode_pages(&inode->i_data, 0); + truncate_inode_pages_final(&inode->i_data); clear_inode(inode); /* Stop tracking associated processes */ @@ -47,7 +47,7 @@ static void proc_evict_inode(struct inode *inode) pde_put(de); head = PROC_I(inode)->sysctl; if (head) { - rcu_assign_pointer(PROC_I(inode)->sysctl, NULL); + RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL); sysctl_head_put(head); } /* Release any associated namespace */ @@ -285,6 +285,32 @@ static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma) return rv; } +static unsigned long +proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr, + unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + struct proc_dir_entry *pde = PDE(file_inode(file)); + unsigned long rv = -EIO; + + if (use_pde(pde)) { + typeof(proc_reg_get_unmapped_area) *get_area; + + get_area = pde->proc_fops->get_unmapped_area; +#ifdef CONFIG_MMU + if (!get_area) + get_area = current->mm->get_unmapped_area; +#endif + + if (get_area) + rv = get_area(file, orig_addr, len, pgoff, flags); + else + rv = orig_addr; + unuse_pde(pde); + } + return rv; +} + static int proc_reg_open(struct inode *inode, struct file *file) { struct proc_dir_entry *pde = PDE(inode); @@ -356,6 +382,7 @@ static const struct file_operations proc_reg_file_ops = { .compat_ioctl = proc_reg_compat_ioctl, #endif .mmap = proc_reg_mmap, + .get_unmapped_area = proc_reg_get_unmapped_area, .open = proc_reg_open, .release = proc_reg_release, }; @@ -368,6 +395,7 @@ static const struct file_operations proc_reg_file_ops_no_compat = { .poll = proc_reg_poll, .unlocked_ioctl = proc_reg_unlocked_ioctl, .mmap = proc_reg_mmap, + .get_unmapped_area = proc_reg_get_unmapped_area, .open = proc_reg_open, .release = proc_reg_release, }; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 651d09a11dd..3ab6d14e71c 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -211,13 +211,6 @@ extern int proc_fill_super(struct super_block *); extern void proc_entry_rundown(struct proc_dir_entry *); /* - * proc_devtree.c - */ -#ifdef CONFIG_PROC_DEVICETREE -extern void proc_device_tree_init(void); -#endif - -/* * proc_namespaces.c */ extern const struct inode_operations proc_ns_dir_inode_operations; diff --git a/fs/proc/interrupts.c b/fs/proc/interrupts.c index 05029c0e2f2..a352d5703b4 100644 --- a/fs/proc/interrupts.c +++ b/fs/proc/interrupts.c @@ -50,4 +50,4 @@ static int __init proc_interrupts_init(void) proc_create("interrupts", 0, NULL, &proc_interrupts_operations); return 0; } -module_init(proc_interrupts_init); +fs_initcall(proc_interrupts_init); diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 06ea155e1a5..39e6ef32f0b 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -255,8 +255,7 @@ static int kcore_update_ram(void) end_pfn = 0; for_each_node_state(nid, N_MEMORY) { unsigned long node_end; - node_end = NODE_DATA(nid)->node_start_pfn + - NODE_DATA(nid)->node_spanned_pages; + node_end = node_end_pfn(nid); if (end_pfn < node_end) end_pfn = node_end; } @@ -640,4 +639,4 @@ static int __init proc_kcore_init(void) return 0; } -module_init(proc_kcore_init); +fs_initcall(proc_kcore_init); diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c index bdfabdaefdc..05f8dcdb086 100644 --- a/fs/proc/kmsg.c +++ b/fs/proc/kmsg.c @@ -61,4 +61,4 @@ static int __init proc_kmsg_init(void) proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations); return 0; } -module_init(proc_kmsg_init); +fs_initcall(proc_kmsg_init); diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c index 1afa4dd4cae..aec66e6c206 100644 --- a/fs/proc/loadavg.c +++ b/fs/proc/loadavg.c @@ -42,4 +42,4 @@ static int __init proc_loadavg_init(void) proc_create("loadavg", 0, NULL, &loadavg_proc_fops); return 0; } -module_init(proc_loadavg_init); +fs_initcall(proc_loadavg_init); diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 5aa847a603c..7445af0b1aa 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -1,8 +1,8 @@ #include <linux/fs.h> -#include <linux/hugetlb.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/hugetlb.h> #include <linux/mman.h> #include <linux/mmzone.h> #include <linux/proc_fs.h> @@ -24,10 +24,13 @@ static int meminfo_proc_show(struct seq_file *m, void *v) { struct sysinfo i; unsigned long committed; - unsigned long allowed; struct vmalloc_info vmi; long cached; + long available; + unsigned long pagecache; + unsigned long wmark_low = 0; unsigned long pages[NR_LRU_LISTS]; + struct zone *zone; int lru; /* @@ -37,8 +40,6 @@ static int meminfo_proc_show(struct seq_file *m, void *v) si_meminfo(&i); si_swapinfo(&i); committed = percpu_counter_read_positive(&vm_committed_as); - allowed = ((totalram_pages - hugetlb_total_pages()) - * sysctl_overcommit_ratio / 100) + total_swap_pages; cached = global_page_state(NR_FILE_PAGES) - total_swapcache_pages() - i.bufferram; @@ -50,12 +51,44 @@ static int meminfo_proc_show(struct seq_file *m, void *v) for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++) pages[lru] = global_page_state(NR_LRU_BASE + lru); + for_each_zone(zone) + wmark_low += zone->watermark[WMARK_LOW]; + + /* + * Estimate the amount of memory available for userspace allocations, + * without causing swapping. + * + * Free memory cannot be taken below the low watermark, before the + * system starts swapping. + */ + available = i.freeram - wmark_low; + + /* + * Not all the page cache can be freed, otherwise the system will + * start swapping. Assume at least half of the page cache, or the + * low watermark worth of cache, needs to stay. + */ + pagecache = pages[LRU_ACTIVE_FILE] + pages[LRU_INACTIVE_FILE]; + pagecache -= min(pagecache / 2, wmark_low); + available += pagecache; + + /* + * Part of the reclaimable slab consists of items that are in use, + * and cannot be freed. Cap this estimate at the low watermark. + */ + available += global_page_state(NR_SLAB_RECLAIMABLE) - + min(global_page_state(NR_SLAB_RECLAIMABLE) / 2, wmark_low); + + if (available < 0) + available = 0; + /* * Tagged format, for easy grepping and expansion. */ seq_printf(m, "MemTotal: %8lu kB\n" "MemFree: %8lu kB\n" + "MemAvailable: %8lu kB\n" "Buffers: %8lu kB\n" "Cached: %8lu kB\n" "SwapCached: %8lu kB\n" @@ -108,6 +141,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) , K(i.totalram), K(i.freeram), + K(available), K(i.bufferram), K(cached), K(total_swapcache_pages()), @@ -132,13 +166,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) K(i.freeswap), K(global_page_state(NR_FILE_DIRTY)), K(global_page_state(NR_WRITEBACK)), -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - K(global_page_state(NR_ANON_PAGES) - + global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) * - HPAGE_PMD_NR), -#else K(global_page_state(NR_ANON_PAGES)), -#endif K(global_page_state(NR_FILE_MAPPED)), K(global_page_state(NR_SHMEM)), K(global_page_state(NR_SLAB_RECLAIMABLE) + @@ -153,7 +181,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) K(global_page_state(NR_UNSTABLE_NFS)), K(global_page_state(NR_BOUNCE)), K(global_page_state(NR_WRITEBACK_TEMP)), - K(allowed), + K(vm_commit_limit()), K(committed), (unsigned long)VMALLOC_TOTAL >> 10, vmi.used >> 10, @@ -192,4 +220,4 @@ static int __init proc_meminfo_init(void) proc_create("meminfo", 0, NULL, &meminfo_proc_fops); return 0; } -module_init(proc_meminfo_init); +fs_initcall(proc_meminfo_init); diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 49a7fff2e83..89026095f2b 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -42,12 +42,6 @@ static const struct inode_operations ns_inode_operations = { .setattr = proc_setattr, }; -static int ns_delete_dentry(const struct dentry *dentry) -{ - /* Don't cache namespace inodes when not in use */ - return 1; -} - static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) { struct inode *inode = dentry->d_inode; @@ -59,7 +53,7 @@ static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) const struct dentry_operations ns_dentry_operations = { - .d_delete = ns_delete_dentry, + .d_delete = always_delete_dentry, .d_dname = ns_dname, }; @@ -152,7 +146,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl struct task_struct *task; void *ns; char name[50]; - int len = -EACCES; + int res = -EACCES; task = get_proc_task(inode); if (!task) @@ -161,24 +155,18 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl if (!ptrace_may_access(task, PTRACE_MODE_READ)) goto out_put_task; - len = -ENOENT; + res = -ENOENT; ns = ns_ops->get(task); if (!ns) goto out_put_task; snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns)); - len = strlen(name); - - if (len > buflen) - len = buflen; - if (copy_to_user(buffer, name, len)) - len = -EFAULT; - + res = readlink_copy(buffer, buflen, name); ns_ops->put(ns); out_put_task: put_task_struct(task); out: - return len; + return res; } static const struct inode_operations proc_ns_link_inode_operations = { diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index ccfd99bd1c5..d4a35746cab 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -39,7 +39,7 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region) unsigned long ino = 0; struct file *file; dev_t dev = 0; - int flags, len; + int flags; flags = region->vm_flags; file = region->vm_file; @@ -50,8 +50,9 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region) ino = inode->i_ino; } + seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); seq_printf(m, - "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", + "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ", region->vm_start, region->vm_end, flags & VM_READ ? 'r' : '-', @@ -59,13 +60,10 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region) flags & VM_EXEC ? 'x' : '-', flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', ((loff_t)region->vm_pgoff) << PAGE_SHIFT, - MAJOR(dev), MINOR(dev), ino, &len); + MAJOR(dev), MINOR(dev), ino); if (file) { - len = 25 + sizeof(void *) * 6 - len; - if (len < 1) - len = 1; - seq_printf(m, "%*c", len, ' '); + seq_pad(m, ' '); seq_path(m, &file->f_path, ""); } @@ -133,4 +131,4 @@ static int __init proc_nommu_init(void) return 0; } -module_init(proc_nommu_init); +fs_initcall(proc_nommu_init); diff --git a/fs/proc/page.c b/fs/proc/page.c index b8730d9ebae..e647c55275d 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -118,10 +118,11 @@ u64 stable_page_flags(struct page *page) /* * PageTransCompound can be true for non-huge compound pages (slab * pages or pages allocated by drivers with __GFP_COMP) because it - * just checks PG_head/PG_tail, so we need to check PageLRU to make - * sure a given page is a thp, not a non-huge compound page. + * just checks PG_head/PG_tail, so we need to check PageLRU/PageAnon + * to make sure a given page is a thp, not a non-huge compound page. */ - else if (PageTransCompound(page) && PageLRU(compound_trans_head(page))) + else if (PageTransCompound(page) && (PageLRU(compound_head(page)) || + PageAnon(compound_head(page)))) u |= 1 << KPF_THP; /* @@ -217,4 +218,4 @@ static int __init proc_page_init(void) proc_create("kpageflags", S_IRUSR, NULL, &proc_kpageflags_operations); return 0; } -module_init(proc_page_init); +fs_initcall(proc_page_init); diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c deleted file mode 100644 index 106a8357063..00000000000 --- a/fs/proc/proc_devtree.c +++ /dev/null @@ -1,243 +0,0 @@ -/* - * proc_devtree.c - handles /proc/device-tree - * - * Copyright 1997 Paul Mackerras - */ -#include <linux/errno.h> -#include <linux/init.h> -#include <linux/time.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/printk.h> -#include <linux/stat.h> -#include <linux/string.h> -#include <linux/of.h> -#include <linux/export.h> -#include <linux/slab.h> -#include <asm/prom.h> -#include <asm/uaccess.h> -#include "internal.h" - -static inline void set_node_proc_entry(struct device_node *np, - struct proc_dir_entry *de) -{ -#ifdef HAVE_ARCH_DEVTREE_FIXUPS - np->pde = de; -#endif -} - -static struct proc_dir_entry *proc_device_tree; - -/* - * Supply data on a read from /proc/device-tree/node/property. - */ -static int property_proc_show(struct seq_file *m, void *v) -{ - struct property *pp = m->private; - - seq_write(m, pp->value, pp->length); - return 0; -} - -static int property_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, property_proc_show, __PDE_DATA(inode)); -} - -static const struct file_operations property_proc_fops = { - .owner = THIS_MODULE, - .open = property_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -/* - * For a node with a name like "gc@10", we make symlinks called "gc" - * and "@10" to it. - */ - -/* - * Add a property to a node - */ -static struct proc_dir_entry * -__proc_device_tree_add_prop(struct proc_dir_entry *de, struct property *pp, - const char *name) -{ - struct proc_dir_entry *ent; - - /* - * Unfortunately proc_register puts each new entry - * at the beginning of the list. So we rearrange them. - */ - ent = proc_create_data(name, - strncmp(name, "security-", 9) ? S_IRUGO : S_IRUSR, - de, &property_proc_fops, pp); - if (ent == NULL) - return NULL; - - if (!strncmp(name, "security-", 9)) - ent->size = 0; /* don't leak number of password chars */ - else - ent->size = pp->length; - - return ent; -} - - -void proc_device_tree_add_prop(struct proc_dir_entry *pde, struct property *prop) -{ - __proc_device_tree_add_prop(pde, prop, prop->name); -} - -void proc_device_tree_remove_prop(struct proc_dir_entry *pde, - struct property *prop) -{ - remove_proc_entry(prop->name, pde); -} - -void proc_device_tree_update_prop(struct proc_dir_entry *pde, - struct property *newprop, - struct property *oldprop) -{ - struct proc_dir_entry *ent; - - if (!oldprop) { - proc_device_tree_add_prop(pde, newprop); - return; - } - - for (ent = pde->subdir; ent != NULL; ent = ent->next) - if (ent->data == oldprop) - break; - if (ent == NULL) { - pr_warn("device-tree: property \"%s\" does not exist\n", - oldprop->name); - } else { - ent->data = newprop; - ent->size = newprop->length; - } -} - -/* - * Various dodgy firmware might give us nodes and/or properties with - * conflicting names. That's generally ok, except for exporting via /proc, - * so munge names here to ensure they're unique. - */ - -static int duplicate_name(struct proc_dir_entry *de, const char *name) -{ - struct proc_dir_entry *ent; - int found = 0; - - spin_lock(&proc_subdir_lock); - - for (ent = de->subdir; ent != NULL; ent = ent->next) { - if (strcmp(ent->name, name) == 0) { - found = 1; - break; - } - } - - spin_unlock(&proc_subdir_lock); - - return found; -} - -static const char *fixup_name(struct device_node *np, struct proc_dir_entry *de, - const char *name) -{ - char *fixed_name; - int fixup_len = strlen(name) + 2 + 1; /* name + #x + \0 */ - int i = 1, size; - -realloc: - fixed_name = kmalloc(fixup_len, GFP_KERNEL); - if (fixed_name == NULL) { - pr_err("device-tree: Out of memory trying to fixup " - "name \"%s\"\n", name); - return name; - } - -retry: - size = snprintf(fixed_name, fixup_len, "%s#%d", name, i); - size++; /* account for NULL */ - - if (size > fixup_len) { - /* We ran out of space, free and reallocate. */ - kfree(fixed_name); - fixup_len = size; - goto realloc; - } - - if (duplicate_name(de, fixed_name)) { - /* Multiple duplicates. Retry with a different offset. */ - i++; - goto retry; - } - - pr_warn("device-tree: Duplicate name in %s, renamed to \"%s\"\n", - np->full_name, fixed_name); - - return fixed_name; -} - -/* - * Process a node, adding entries for its children and its properties. - */ -void proc_device_tree_add_node(struct device_node *np, - struct proc_dir_entry *de) -{ - struct property *pp; - struct proc_dir_entry *ent; - struct device_node *child; - const char *p; - - set_node_proc_entry(np, de); - for (child = NULL; (child = of_get_next_child(np, child));) { - /* Use everything after the last slash, or the full name */ - p = kbasename(child->full_name); - - if (duplicate_name(de, p)) - p = fixup_name(np, de, p); - - ent = proc_mkdir(p, de); - if (ent == NULL) - break; - proc_device_tree_add_node(child, ent); - } - of_node_put(child); - - for (pp = np->properties; pp != NULL; pp = pp->next) { - p = pp->name; - - if (strchr(p, '/')) - continue; - - if (duplicate_name(de, p)) - p = fixup_name(np, de, p); - - ent = __proc_device_tree_add_prop(de, pp, p); - if (ent == NULL) - break; - } -} - -/* - * Called on initialization to set up the /proc/device-tree subtree - */ -void __init proc_device_tree_init(void) -{ - struct device_node *root; - - proc_device_tree = proc_mkdir("device-tree", NULL); - if (proc_device_tree == NULL) - return; - root = of_find_node_by_path("/"); - if (root == NULL) { - pr_debug("/proc/device-tree: can't find root\n"); - return; - } - proc_device_tree_add_node(root, proc_device_tree); - of_node_put(root); -} diff --git a/fs/proc/root.c b/fs/proc/root.c index 229e366598d..5dbadecb234 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -92,6 +92,8 @@ static int proc_parse_options(char *options, struct pid_namespace *pid) int proc_remount(struct super_block *sb, int *flags, char *data) { struct pid_namespace *pid = sb->s_fs_info; + + sync_filesystem(sb); return !proc_parse_options(data, pid); } @@ -110,7 +112,11 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, ns = task_active_pid_ns(current); options = data; - if (!current_user_ns()->may_mount_proc) + if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type)) + return ERR_PTR(-EPERM); + + /* Does the mounter have privilege over the pid namespace? */ + if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); } @@ -179,9 +185,6 @@ void __init proc_root_init(void) proc_mkdir("openprom", NULL); #endif proc_tty_init(); -#ifdef CONFIG_PROC_DEVICETREE - proc_device_tree_init(); -#endif proc_mkdir("bus", NULL); proc_sys_init(); } @@ -205,7 +208,9 @@ static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentr static int proc_root_readdir(struct file *file, struct dir_context *ctx) { if (ctx->pos < FIRST_PROCESS_ENTRY) { - proc_readdir(file, ctx); + int error = proc_readdir(file, ctx); + if (unlikely(error <= 0)) + return error; ctx->pos = FIRST_PROCESS_ENTRY; } diff --git a/fs/proc/self.c b/fs/proc/self.c index 6b6a993b5c2..4348bb8907c 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -16,7 +16,7 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer, if (!tgid) return -ENOENT; sprintf(tmp, "%d", tgid); - return vfs_readlink(dentry,buffer,buflen,tmp); + return readlink_copy(buffer, buflen, tmp); } static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) @@ -36,18 +36,10 @@ static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) return NULL; } -static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd, - void *cookie) -{ - char *s = nd_get_link(nd); - if (!IS_ERR(s)) - kfree(s); -} - static const struct inode_operations proc_self_inode_operations = { .readlink = proc_self_readlink, .follow_link = proc_self_follow_link, - .put_link = proc_self_put_link, + .put_link = kfree_put_link, }; static unsigned self_inum; diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c index 62604be9f58..ad8a77f94be 100644 --- a/fs/proc/softirqs.c +++ b/fs/proc/softirqs.c @@ -41,4 +41,4 @@ static int __init proc_softirqs_init(void) proc_create("softirqs", 0, NULL, &proc_softirqs_operations); return 0; } -module_init(proc_softirqs_init); +fs_initcall(proc_softirqs_init); diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 1cf86c0e868..bf2d03f8fd3 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -9,7 +9,7 @@ #include <linux/slab.h> #include <linux/time.h> #include <linux/irqnr.h> -#include <asm/cputime.h> +#include <linux/cputime.h> #include <linux/tick.h> #ifndef arch_irq_stat_cpu @@ -184,29 +184,11 @@ static int show_stat(struct seq_file *p, void *v) static int stat_open(struct inode *inode, struct file *file) { - size_t size = 1024 + 128 * num_possible_cpus(); - char *buf; - struct seq_file *m; - int res; + size_t size = 1024 + 128 * num_online_cpus(); /* minimum size to display an interrupt count : 2 bytes */ size += 2 * nr_irqs; - - /* don't ask for more than the kmalloc() max size */ - if (size > KMALLOC_MAX_SIZE) - size = KMALLOC_MAX_SIZE; - buf = kmalloc(size, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - res = single_open(file, show_stat, NULL); - if (!res) { - m = file->private_data; - m->buf = buf; - m->size = ksize(buf); - } else - kfree(buf); - return res; + return single_open_size(file, show_stat, NULL, size); } static const struct file_operations proc_stat_operations = { @@ -221,4 +203,4 @@ static int __init proc_stat_init(void) proc_create("stat", 0, NULL, &proc_stat_operations); return 0; } -module_init(proc_stat_init); +fs_initcall(proc_stat_init); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index dbf61f6174f..cfa63ee92c9 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1,4 +1,5 @@ #include <linux/mm.h> +#include <linux/vmacache.h> #include <linux/hugetlb.h> #include <linux/huge_mm.h> #include <linux/mount.h> @@ -62,7 +63,8 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) total_rss << (PAGE_SHIFT-10), data << (PAGE_SHIFT-10), mm->stack_vm << (PAGE_SHIFT-10), text, lib, - (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10, + (PTRS_PER_PTE * sizeof(pte_t) * + atomic_long_read(&mm->nr_ptes)) >> 10, swap << (PAGE_SHIFT-10)); } @@ -83,14 +85,6 @@ unsigned long task_statm(struct mm_struct *mm, return mm->total_vm; } -static void pad_len_spaces(struct seq_file *m, int len) -{ - len = 25 + sizeof(void*) * 6 - len; - if (len < 1) - len = 1; - seq_printf(m, "%*c", len, ' '); -} - #ifdef CONFIG_NUMA /* * These functions are for numa_maps but called in generic **maps seq_file @@ -159,7 +153,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) /* * We remember last_addr rather than next_addr to hit with - * mmap_cache most of the time. We have zero last_addr at + * vmacache most of the time. We have zero last_addr at * the beginning and also after lseek. We will have -1 last_addr * after the end of the vmas. */ @@ -268,7 +262,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) unsigned long long pgoff = 0; unsigned long start, end; dev_t dev = 0; - int len; const char *name = NULL; if (file) { @@ -286,7 +279,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) if (stack_guard_page_end(vma, end)) end -= PAGE_SIZE; - seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", + seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); + seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ", start, end, flags & VM_READ ? 'r' : '-', @@ -294,18 +288,24 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) flags & VM_EXEC ? 'x' : '-', flags & VM_MAYSHARE ? 's' : 'p', pgoff, - MAJOR(dev), MINOR(dev), ino, &len); + MAJOR(dev), MINOR(dev), ino); /* * Print the dentry name for named mappings, and a * special [heap] marker for the heap: */ if (file) { - pad_len_spaces(m, len); + seq_pad(m, ' '); seq_path(m, &file->f_path, "\n"); goto done; } + if (vma->vm_ops && vma->vm_ops->name) { + name = vma->vm_ops->name(vma); + if (name) + goto done; + } + name = arch_vma_name(vma); if (!name) { pid_t tid; @@ -333,7 +333,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) name = "[stack]"; } else { /* Thread stack in /proc/PID/maps */ - pad_len_spaces(m, len); + seq_pad(m, ' '); seq_printf(m, "[stack:%d]", tid); } } @@ -341,7 +341,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) done: if (name) { - pad_len_spaces(m, len); + seq_pad(m, ' '); seq_puts(m, name); } seq_putc(m, '\n'); @@ -505,9 +505,9 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pte_t *pte; spinlock_t *ptl; - if (pmd_trans_huge_lock(pmd, vma) == 1) { + if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk); - spin_unlock(&walk->mm->page_table_lock); + spin_unlock(ptl); mss->anonymous_thp += HPAGE_PMD_SIZE; return 0; } @@ -561,6 +561,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) [ilog2(VM_NONLINEAR)] = "nl", [ilog2(VM_ARCH_1)] = "ar", [ilog2(VM_DONTDUMP)] = "dd", +#ifdef CONFIG_MEM_SOFT_DIRTY + [ilog2(VM_SOFTDIRTY)] = "sd", +#endif [ilog2(VM_MIXEDMAP)] = "mm", [ilog2(VM_HUGEPAGE)] = "hg", [ilog2(VM_NOHUGEPAGE)] = "nh", @@ -730,8 +733,16 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, * of how soft-dirty works. */ pte_t ptent = *pte; - ptent = pte_wrprotect(ptent); - ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY); + + if (pte_present(ptent)) { + ptent = pte_wrprotect(ptent); + ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY); + } else if (is_swap_pte(ptent)) { + ptent = pte_swp_clear_soft_dirty(ptent); + } else if (pte_file(ptent)) { + ptent = pte_file_clear_soft_dirty(ptent); + } + set_pte_at(vma->vm_mm, addr, pte, ptent); #endif } @@ -752,14 +763,15 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) { ptent = *pte; - if (!pte_present(ptent)) - continue; if (cp->type == CLEAR_REFS_SOFT_DIRTY) { clear_soft_dirty(vma, addr, pte); continue; } + if (!pte_present(ptent)) + continue; + page = vm_normal_page(vma, addr, ptent); if (!page) continue; @@ -798,8 +810,9 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, if (type == CLEAR_REFS_SOFT_DIRTY) { soft_dirty_cleared = true; - pr_warn_once("The pagemap bits 55-60 has changed their meaning! " - "See the linux/Documentation/vm/pagemap.txt for details.\n"); + pr_warn_once("The pagemap bits 55-60 has changed their meaning!" + " See the linux/Documentation/vm/pagemap.txt for " + "details.\n"); } task = get_proc_task(file_inode(file)); @@ -830,11 +843,17 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, * * Writing 3 to /proc/pid/clear_refs only affects file * mapped pages. + * + * Writing 4 to /proc/pid/clear_refs affects all pages. */ if (type == CLEAR_REFS_ANON && vma->vm_file) continue; if (type == CLEAR_REFS_MAPPED && !vma->vm_file) continue; + if (type == CLEAR_REFS_SOFT_DIRTY) { + if (vma->vm_flags & VM_SOFTDIRTY) + vma->vm_flags &= ~VM_SOFTDIRTY; + } walk_page_range(vma->vm_start, vma->vm_end, &clear_refs_walk); } @@ -859,7 +878,7 @@ typedef struct { } pagemap_entry_t; struct pagemapread { - int pos, len; + int pos, len; /* units: PM_ENTRY_BYTES, not bytes */ pagemap_entry_t *buffer; bool v2; }; @@ -867,7 +886,7 @@ struct pagemapread { #define PAGEMAP_WALK_SIZE (PMD_SIZE) #define PAGEMAP_WALK_MASK (PMD_MASK) -#define PM_ENTRY_BYTES sizeof(u64) +#define PM_ENTRY_BYTES sizeof(pagemap_entry_t) #define PM_STATUS_BITS 3 #define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) #define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) @@ -929,22 +948,28 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, frame = pte_pfn(pte); flags = PM_PRESENT; page = vm_normal_page(vma, addr, pte); + if (pte_soft_dirty(pte)) + flags2 |= __PM_SOFT_DIRTY; } else if (is_swap_pte(pte)) { - swp_entry_t entry = pte_to_swp_entry(pte); - + swp_entry_t entry; + if (pte_swp_soft_dirty(pte)) + flags2 |= __PM_SOFT_DIRTY; + entry = pte_to_swp_entry(pte); frame = swp_type(entry) | (swp_offset(entry) << MAX_SWAPFILES_SHIFT); flags = PM_SWAP; if (is_migration_entry(entry)) page = migration_entry_to_page(entry); } else { - *pme = make_pme(PM_NOT_PRESENT(pm->v2)); + if (vma->vm_flags & VM_SOFTDIRTY) + flags2 |= __PM_SOFT_DIRTY; + *pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2)); return; } if (page && !PageAnon(page)) flags |= PM_FILE; - if (pte_soft_dirty(pte)) + if ((vma->vm_flags & VM_SOFTDIRTY)) flags2 |= __PM_SOFT_DIRTY; *pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags); @@ -963,7 +988,7 @@ static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *p *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset) | PM_STATUS2(pm->v2, pmd_flags2) | PM_PRESENT); else - *pme = make_pme(PM_NOT_PRESENT(pm->v2)); + *pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, pmd_flags2)); } #else static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, @@ -977,16 +1002,21 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, { struct vm_area_struct *vma; struct pagemapread *pm = walk->private; + spinlock_t *ptl; pte_t *pte; int err = 0; pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); /* find the first VMA at or above 'addr' */ vma = find_vma(walk->mm, addr); - if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { + if (vma && pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { int pmd_flags2; - pmd_flags2 = (pmd_soft_dirty(*pmd) ? __PM_SOFT_DIRTY : 0); + if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd)) + pmd_flags2 = __PM_SOFT_DIRTY; + else + pmd_flags2 = 0; + for (; addr != end; addr += PAGE_SIZE) { unsigned long offset; @@ -997,19 +1027,24 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, if (err) break; } - spin_unlock(&walk->mm->page_table_lock); + spin_unlock(ptl); return err; } if (pmd_trans_unstable(pmd)) return 0; for (; addr != end; addr += PAGE_SIZE) { + int flags2; /* check to see if we've left 'vma' behind * and need a new, higher one */ if (vma && (addr >= vma->vm_end)) { vma = find_vma(walk->mm, addr); - pme = make_pme(PM_NOT_PRESENT(pm->v2)); + if (vma && (vma->vm_flags & VM_SOFTDIRTY)) + flags2 = __PM_SOFT_DIRTY; + else + flags2 = 0; + pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2)); } /* check that 'vma' actually covers this address, @@ -1033,13 +1068,15 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, #ifdef CONFIG_HUGETLB_PAGE static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, - pte_t pte, int offset) + pte_t pte, int offset, int flags2) { if (pte_present(pte)) - *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) - | PM_STATUS2(pm->v2, 0) | PM_PRESENT); + *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) | + PM_STATUS2(pm->v2, flags2) | + PM_PRESENT); else - *pme = make_pme(PM_NOT_PRESENT(pm->v2)); + *pme = make_pme(PM_NOT_PRESENT(pm->v2) | + PM_STATUS2(pm->v2, flags2)); } /* This function walks within one hugetlb entry in the single call */ @@ -1048,12 +1085,22 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, struct mm_walk *walk) { struct pagemapread *pm = walk->private; + struct vm_area_struct *vma; int err = 0; + int flags2; pagemap_entry_t pme; + vma = find_vma(walk->mm, addr); + WARN_ON_ONCE(!vma); + + if (vma && (vma->vm_flags & VM_SOFTDIRTY)) + flags2 = __PM_SOFT_DIRTY; + else + flags2 = 0; + for (; addr != end; addr += PAGE_SIZE) { int offset = (addr & ~hmask) >> PAGE_SHIFT; - huge_pte_to_pagemap_entry(&pme, pm, *pte, offset); + huge_pte_to_pagemap_entry(&pme, pm, *pte, offset, flags2); err = add_to_pagemap(addr, &pme, pm); if (err) return err; @@ -1116,8 +1163,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, goto out_task; pm.v2 = soft_dirty_cleared; - pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); - pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); + pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); + pm.buffer = kmalloc(pm.len * PM_ENTRY_BYTES, GFP_TEMPORARY); ret = -ENOMEM; if (!pm.buffer) goto out_task; @@ -1282,7 +1329,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, md = walk->private; - if (pmd_trans_huge_lock(pmd, md->vma) == 1) { + if (pmd_trans_huge_lock(pmd, md->vma, &ptl) == 1) { pte_t huge_pte = *(pte_t *)pmd; struct page *page; @@ -1290,7 +1337,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, if (page) gather_stats(page, md, pte_dirty(huge_pte), HPAGE_PMD_SIZE/PAGE_SIZE); - spin_unlock(&walk->mm->page_table_lock); + spin_unlock(ptl); return 0; } @@ -1314,7 +1361,7 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, struct numa_maps *md; struct page *page; - if (pte_none(*pte)) + if (!pte_present(*pte)) return 0; page = pte_page(*pte); @@ -1348,8 +1395,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) struct mm_struct *mm = vma->vm_mm; struct mm_walk walk = {}; struct mempolicy *pol; - int n; - char buffer[50]; + char buffer[64]; + int nid; if (!mm) return 0; @@ -1371,10 +1418,10 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) seq_printf(m, "%08lx %s", vma->vm_start, buffer); if (file) { - seq_printf(m, " file="); + seq_puts(m, " file="); seq_path(m, &file->f_path, "\n\t= "); } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { - seq_printf(m, " heap"); + seq_puts(m, " heap"); } else { pid_t tid = vm_is_stack(task, vma, is_pid); if (tid != 0) { @@ -1384,14 +1431,14 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) */ if (!is_pid || (vma->vm_start <= mm->start_stack && vma->vm_end >= mm->start_stack)) - seq_printf(m, " stack"); + seq_puts(m, " stack"); else seq_printf(m, " stack:%d", tid); } } if (is_vm_hugetlb_page(vma)) - seq_printf(m, " huge"); + seq_puts(m, " huge"); walk_page_range(vma->vm_start, vma->vm_end, &walk); @@ -1419,9 +1466,9 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) if (md->writeback) seq_printf(m, " writeback=%lu", md->writeback); - for_each_node_state(n, N_MEMORY) - if (md->node[n]) - seq_printf(m, " N%d=%lu", n, md->node[n]); + for_each_node_state(nid, N_MEMORY) + if (md->node[nid]) + seq_printf(m, " N%d=%lu", nid, md->node[nid]); out: seq_putc(m, '\n'); diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 56123a6f462..678455d2d68 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -123,14 +123,6 @@ unsigned long task_statm(struct mm_struct *mm, return size; } -static void pad_len_spaces(struct seq_file *m, int len) -{ - len = 25 + sizeof(void*) * 6 - len; - if (len < 1) - len = 1; - seq_printf(m, "%*c", len, ' '); -} - /* * display a single VMA to a sequenced file */ @@ -142,7 +134,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, unsigned long ino = 0; struct file *file; dev_t dev = 0; - int flags, len; + int flags; unsigned long long pgoff = 0; flags = vma->vm_flags; @@ -155,8 +147,9 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT; } + seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); seq_printf(m, - "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", + "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ", vma->vm_start, vma->vm_end, flags & VM_READ ? 'r' : '-', @@ -164,16 +157,16 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, flags & VM_EXEC ? 'x' : '-', flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', pgoff, - MAJOR(dev), MINOR(dev), ino, &len); + MAJOR(dev), MINOR(dev), ino); if (file) { - pad_len_spaces(m, len); + seq_pad(m, ' '); seq_path(m, &file->f_path, ""); } else if (mm) { pid_t tid = vm_is_stack(priv->task, vma, is_pid); if (tid != 0) { - pad_len_spaces(m, len); + seq_pad(m, ' '); /* * Thread stack in /proc/PID/task/TID/maps or * the main process stack. diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index 06189462590..33de567c25a 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c @@ -5,7 +5,7 @@ #include <linux/seq_file.h> #include <linux/time.h> #include <linux/kernel_stat.h> -#include <asm/cputime.h> +#include <linux/cputime.h> static int uptime_proc_show(struct seq_file *m, void *v) { @@ -49,4 +49,4 @@ static int __init proc_uptime_init(void) proc_create("uptime", 0, NULL, &uptime_proc_fops); return 0; } -module_init(proc_uptime_init); +fs_initcall(proc_uptime_init); diff --git a/fs/proc/version.c b/fs/proc/version.c index 76817a60678..d2154eb6d78 100644 --- a/fs/proc/version.c +++ b/fs/proc/version.c @@ -31,4 +31,4 @@ static int __init proc_version_init(void) proc_create("version", 0, NULL, &version_proc_fops); return 0; } -module_init(proc_version_init); +fs_initcall(proc_version_init); diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 28503172f2e..382aa890e22 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -21,6 +21,7 @@ #include <linux/crash_dump.h> #include <linux/list.h> #include <linux/vmalloc.h> +#include <linux/pagemap.h> #include <asm/uaccess.h> #include <asm/io.h> #include "internal.h" @@ -41,7 +42,7 @@ static size_t elfnotes_sz; /* Total size of vmcore file. */ static u64 vmcore_size; -static struct proc_dir_entry *proc_vmcore = NULL; +static struct proc_dir_entry *proc_vmcore; /* * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error @@ -123,11 +124,65 @@ static ssize_t read_from_oldmem(char *buf, size_t count, return read; } +/* + * Architectures may override this function to allocate ELF header in 2nd kernel + */ +int __weak elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) +{ + return 0; +} + +/* + * Architectures may override this function to free header + */ +void __weak elfcorehdr_free(unsigned long long addr) +{} + +/* + * Architectures may override this function to read from ELF header + */ +ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos) +{ + return read_from_oldmem(buf, count, ppos, 0); +} + +/* + * Architectures may override this function to read from notes sections + */ +ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos) +{ + return read_from_oldmem(buf, count, ppos, 0); +} + +/* + * Architectures may override this function to map oldmem + */ +int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma, + unsigned long from, unsigned long pfn, + unsigned long size, pgprot_t prot) +{ + return remap_pfn_range(vma, from, pfn, size, prot); +} + +/* + * Copy to either kernel or user space + */ +static int copy_to(void *target, void *src, size_t size, int userbuf) +{ + if (userbuf) { + if (copy_to_user((char __user *) target, src, size)) + return -EFAULT; + } else { + memcpy(target, src, size); + } + return 0; +} + /* Read from the ELF header and then the crash dump. On error, negative value is * returned otherwise number of bytes read are returned. */ -static ssize_t read_vmcore(struct file *file, char __user *buffer, - size_t buflen, loff_t *fpos) +static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos, + int userbuf) { ssize_t acc = 0, tmp; size_t tsz; @@ -144,7 +199,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, /* Read ELF core header */ if (*fpos < elfcorebuf_sz) { tsz = min(elfcorebuf_sz - (size_t)*fpos, buflen); - if (copy_to_user(buffer, elfcorebuf + *fpos, tsz)) + if (copy_to(buffer, elfcorebuf + *fpos, tsz, userbuf)) return -EFAULT; buflen -= tsz; *fpos += tsz; @@ -162,7 +217,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)*fpos, buflen); kaddr = elfnotes_buf + *fpos - elfcorebuf_sz; - if (copy_to_user(buffer, kaddr, tsz)) + if (copy_to(buffer, kaddr, tsz, userbuf)) return -EFAULT; buflen -= tsz; *fpos += tsz; @@ -178,7 +233,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, if (*fpos < m->offset + m->size) { tsz = min_t(size_t, m->offset + m->size - *fpos, buflen); start = m->paddr + *fpos - m->offset; - tmp = read_from_oldmem(buffer, tsz, &start, 1); + tmp = read_from_oldmem(buffer, tsz, &start, userbuf); if (tmp < 0) return tmp; buflen -= tsz; @@ -195,6 +250,55 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, return acc; } +static ssize_t read_vmcore(struct file *file, char __user *buffer, + size_t buflen, loff_t *fpos) +{ + return __read_vmcore((__force char *) buffer, buflen, fpos, 1); +} + +/* + * The vmcore fault handler uses the page cache and fills data using the + * standard __vmcore_read() function. + * + * On s390 the fault handler is used for memory regions that can't be mapped + * directly with remap_pfn_range(). + */ +static int mmap_vmcore_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ +#ifdef CONFIG_S390 + struct address_space *mapping = vma->vm_file->f_mapping; + pgoff_t index = vmf->pgoff; + struct page *page; + loff_t offset; + char *buf; + int rc; + + page = find_or_create_page(mapping, index, GFP_KERNEL); + if (!page) + return VM_FAULT_OOM; + if (!PageUptodate(page)) { + offset = (loff_t) index << PAGE_CACHE_SHIFT; + buf = __va((page_to_pfn(page) << PAGE_SHIFT)); + rc = __read_vmcore(buf, PAGE_SIZE, &offset, 0); + if (rc < 0) { + unlock_page(page); + page_cache_release(page); + return (rc == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS; + } + SetPageUptodate(page); + } + unlock_page(page); + vmf->page = page; + return 0; +#else + return VM_FAULT_SIGBUS; +#endif +} + +static const struct vm_operations_struct vmcore_mmap_ops = { + .fault = mmap_vmcore_fault, +}; + /** * alloc_elfnotes_buf - allocate buffer for ELF note segment in * vmalloc memory @@ -241,6 +345,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC); vma->vm_flags |= VM_MIXEDMAP; + vma->vm_ops = &vmcore_mmap_ops; len = 0; @@ -282,9 +387,9 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) tsz = min_t(size_t, m->offset + m->size - start, size); paddr = m->paddr + start - m->offset; - if (remap_pfn_range(vma, vma->vm_start + len, - paddr >> PAGE_SHIFT, tsz, - vma->vm_page_prot)) + if (remap_oldmem_pfn_range(vma, vma->vm_start + len, + paddr >> PAGE_SHIFT, tsz, + vma->vm_page_prot)) goto fail; size -= tsz; start += tsz; @@ -357,23 +462,29 @@ static int __init update_note_header_size_elf64(const Elf64_Ehdr *ehdr_ptr) notes_section = kmalloc(max_sz, GFP_KERNEL); if (!notes_section) return -ENOMEM; - rc = read_from_oldmem(notes_section, max_sz, &offset, 0); + rc = elfcorehdr_read_notes(notes_section, max_sz, &offset); if (rc < 0) { kfree(notes_section); return rc; } nhdr_ptr = notes_section; - while (real_sz < max_sz) { - if (nhdr_ptr->n_namesz == 0) - break; + while (nhdr_ptr->n_namesz != 0) { sz = sizeof(Elf64_Nhdr) + ((nhdr_ptr->n_namesz + 3) & ~3) + ((nhdr_ptr->n_descsz + 3) & ~3); + if ((real_sz + sz) > max_sz) { + pr_warn("Warning: Exceeded p_memsz, dropping PT_NOTE entry n_namesz=0x%x, n_descsz=0x%x\n", + nhdr_ptr->n_namesz, nhdr_ptr->n_descsz); + break; + } real_sz += sz; nhdr_ptr = (Elf64_Nhdr*)((char*)nhdr_ptr + sz); } kfree(notes_section); phdr_ptr->p_memsz = real_sz; + if (real_sz == 0) { + pr_warn("Warning: Zero PT_NOTE entries found\n"); + } } return 0; @@ -444,7 +555,8 @@ static int __init copy_notes_elf64(const Elf64_Ehdr *ehdr_ptr, char *notes_buf) if (phdr_ptr->p_type != PT_NOTE) continue; offset = phdr_ptr->p_offset; - rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0); + rc = elfcorehdr_read_notes(notes_buf, phdr_ptr->p_memsz, + &offset); if (rc < 0) return rc; notes_buf += phdr_ptr->p_memsz; @@ -536,23 +648,29 @@ static int __init update_note_header_size_elf32(const Elf32_Ehdr *ehdr_ptr) notes_section = kmalloc(max_sz, GFP_KERNEL); if (!notes_section) return -ENOMEM; - rc = read_from_oldmem(notes_section, max_sz, &offset, 0); + rc = elfcorehdr_read_notes(notes_section, max_sz, &offset); if (rc < 0) { kfree(notes_section); return rc; } nhdr_ptr = notes_section; - while (real_sz < max_sz) { - if (nhdr_ptr->n_namesz == 0) - break; + while (nhdr_ptr->n_namesz != 0) { sz = sizeof(Elf32_Nhdr) + ((nhdr_ptr->n_namesz + 3) & ~3) + ((nhdr_ptr->n_descsz + 3) & ~3); + if ((real_sz + sz) > max_sz) { + pr_warn("Warning: Exceeded p_memsz, dropping PT_NOTE entry n_namesz=0x%x, n_descsz=0x%x\n", + nhdr_ptr->n_namesz, nhdr_ptr->n_descsz); + break; + } real_sz += sz; nhdr_ptr = (Elf32_Nhdr*)((char*)nhdr_ptr + sz); } kfree(notes_section); phdr_ptr->p_memsz = real_sz; + if (real_sz == 0) { + pr_warn("Warning: Zero PT_NOTE entries found\n"); + } } return 0; @@ -623,7 +741,8 @@ static int __init copy_notes_elf32(const Elf32_Ehdr *ehdr_ptr, char *notes_buf) if (phdr_ptr->p_type != PT_NOTE) continue; offset = phdr_ptr->p_offset; - rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0); + rc = elfcorehdr_read_notes(notes_buf, phdr_ptr->p_memsz, + &offset); if (rc < 0) return rc; notes_buf += phdr_ptr->p_memsz; @@ -810,7 +929,7 @@ static int __init parse_crash_elf64_headers(void) addr = elfcorehdr_addr; /* Read Elf header */ - rc = read_from_oldmem((char*)&ehdr, sizeof(Elf64_Ehdr), &addr, 0); + rc = elfcorehdr_read((char *)&ehdr, sizeof(Elf64_Ehdr), &addr); if (rc < 0) return rc; @@ -837,7 +956,7 @@ static int __init parse_crash_elf64_headers(void) if (!elfcorebuf) return -ENOMEM; addr = elfcorehdr_addr; - rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0); + rc = elfcorehdr_read(elfcorebuf, elfcorebuf_sz_orig, &addr); if (rc < 0) goto fail; @@ -866,7 +985,7 @@ static int __init parse_crash_elf32_headers(void) addr = elfcorehdr_addr; /* Read Elf header */ - rc = read_from_oldmem((char*)&ehdr, sizeof(Elf32_Ehdr), &addr, 0); + rc = elfcorehdr_read((char *)&ehdr, sizeof(Elf32_Ehdr), &addr); if (rc < 0) return rc; @@ -892,7 +1011,7 @@ static int __init parse_crash_elf32_headers(void) if (!elfcorebuf) return -ENOMEM; addr = elfcorehdr_addr; - rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0); + rc = elfcorehdr_read(elfcorebuf, elfcorebuf_sz_orig, &addr); if (rc < 0) goto fail; @@ -919,7 +1038,7 @@ static int __init parse_crash_elf_headers(void) int rc=0; addr = elfcorehdr_addr; - rc = read_from_oldmem(e_ident, EI_NIDENT, &addr, 0); + rc = elfcorehdr_read(e_ident, EI_NIDENT, &addr); if (rc < 0) return rc; if (memcmp(e_ident, ELFMAG, SELFMAG) != 0) { @@ -952,7 +1071,14 @@ static int __init vmcore_init(void) { int rc = 0; - /* If elfcorehdr= has been passed in cmdline, then capture the dump.*/ + /* Allow architectures to allocate ELF header in 2nd kernel */ + rc = elfcorehdr_alloc(&elfcorehdr_addr, &elfcorehdr_size); + if (rc) + return rc; + /* + * If elfcorehdr= has been passed in cmdline or created in 2nd kernel, + * then capture the dump. + */ if (!(is_vmcore_usable())) return rc; rc = parse_crash_elf_headers(); @@ -960,13 +1086,15 @@ static int __init vmcore_init(void) pr_warn("Kdump: vmcore not initialized\n"); return rc; } + elfcorehdr_free(elfcorehdr_addr); + elfcorehdr_addr = ELFCORE_ADDR_ERR; proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations); if (proc_vmcore) proc_vmcore->size = vmcore_size; return 0; } -module_init(vmcore_init) +fs_initcall(vmcore_init); /* Cleanup function for vmcore module. */ void vmcore_cleanup(void) @@ -988,4 +1116,3 @@ void vmcore_cleanup(void) } free_elfcorebuf(); } -EXPORT_SYMBOL_GPL(vmcore_cleanup); |
