aboutsummaryrefslogtreecommitdiff
path: root/fs/proc
diff options
context:
space:
mode:
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/Kconfig4
-rw-r--r--fs/proc/Makefile1
-rw-r--r--fs/proc/array.c24
-rw-r--r--fs/proc/base.c139
-rw-r--r--fs/proc/cmdline.c2
-rw-r--r--fs/proc/consoles.c12
-rw-r--r--fs/proc/cpuinfo.c2
-rw-r--r--fs/proc/devices.c2
-rw-r--r--fs/proc/fd.c10
-rw-r--r--fs/proc/generic.c23
-rw-r--r--fs/proc/inode.c32
-rw-r--r--fs/proc/internal.h7
-rw-r--r--fs/proc/interrupts.c2
-rw-r--r--fs/proc/kcore.c5
-rw-r--r--fs/proc/kmsg.c2
-rw-r--r--fs/proc/loadavg.c2
-rw-r--r--fs/proc/meminfo.c52
-rw-r--r--fs/proc/namespaces.c22
-rw-r--r--fs/proc/nommu.c14
-rw-r--r--fs/proc/page.c9
-rw-r--r--fs/proc/proc_devtree.c243
-rw-r--r--fs/proc/root.c15
-rw-r--r--fs/proc/self.c12
-rw-r--r--fs/proc/softirqs.c2
-rw-r--r--fs/proc/stat.c26
-rw-r--r--fs/proc/task_mmu.c155
-rw-r--r--fs/proc/task_nommu.c19
-rw-r--r--fs/proc/uptime.c4
-rw-r--r--fs/proc/version.c2
-rw-r--r--fs/proc/vmcore.c183
30 files changed, 461 insertions, 566 deletions
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index 15af6222f8a..2183fcf41d5 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -31,6 +31,10 @@ config PROC_FS
config PROC_KCORE
bool "/proc/kcore support" if !ARM
depends on PROC_FS && MMU
+ help
+ Provides a virtual ELF core file of the live kernel. This can
+ be read with gdb and other ELF tools. No modifications can be
+ made using this mechanism.
config PROC_VMCORE
bool "/proc/vmcore support"
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index ab30716584f..239493ec718 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -27,6 +27,5 @@ proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
proc-$(CONFIG_NET) += proc_net.o
proc-$(CONFIG_PROC_KCORE) += kcore.o
proc-$(CONFIG_PROC_VMCORE) += vmcore.o
-proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o
proc-$(CONFIG_PRINTK) += kmsg.o
proc-$(CONFIG_PROC_PAGE_MONITOR) += page.o
diff --git a/fs/proc/array.c b/fs/proc/array.c
index cbd0f1b324b..64db2bceac5 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -138,26 +138,17 @@ static const char * const task_state_array[] = {
"D (disk sleep)", /* 2 */
"T (stopped)", /* 4 */
"t (tracing stop)", /* 8 */
- "Z (zombie)", /* 16 */
- "X (dead)", /* 32 */
- "x (dead)", /* 64 */
- "K (wakekill)", /* 128 */
- "W (waking)", /* 256 */
- "P (parked)", /* 512 */
+ "X (dead)", /* 16 */
+ "Z (zombie)", /* 32 */
};
static inline const char *get_task_state(struct task_struct *tsk)
{
- unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state;
- const char * const *p = &task_state_array[0];
+ unsigned int state = (tsk->state | tsk->exit_state) & TASK_REPORT;
- BUILD_BUG_ON(1 + ilog2(TASK_STATE_MAX) != ARRAY_SIZE(task_state_array));
+ BUILD_BUG_ON(1 + ilog2(TASK_REPORT) != ARRAY_SIZE(task_state_array)-1);
- while (state) {
- p++;
- state >>= 1;
- }
- return *p;
+ return task_state_array[fls(state)];
}
static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
@@ -183,6 +174,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
seq_printf(m,
"State:\t%s\n"
"Tgid:\t%d\n"
+ "Ngid:\t%d\n"
"Pid:\t%d\n"
"PPid:\t%d\n"
"TracerPid:\t%d\n"
@@ -190,6 +182,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
"Gid:\t%d\t%d\t%d\t%d\n",
get_task_state(p),
task_tgid_nr_ns(p, ns),
+ task_numa_group_id(p),
pid_nr_ns(pid, ns),
ppid, tpid,
from_kuid_munged(user_ns, cred->uid),
@@ -451,8 +444,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
min_flt += t->min_flt;
maj_flt += t->maj_flt;
gtime += task_gtime(t);
- t = next_thread(t);
- } while (t != task);
+ } while_each_thread(task, t);
min_flt += sig->min_flt;
maj_flt += sig->maj_flt;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1485e38daaa..2d696b0c93b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -200,41 +200,9 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
return result;
}
-static int proc_pid_cmdline(struct task_struct *task, char * buffer)
+static int proc_pid_cmdline(struct task_struct *task, char *buffer)
{
- int res = 0;
- unsigned int len;
- struct mm_struct *mm = get_task_mm(task);
- if (!mm)
- goto out;
- if (!mm->arg_end)
- goto out_mm; /* Shh! No looking before we're done */
-
- len = mm->arg_end - mm->arg_start;
-
- if (len > PAGE_SIZE)
- len = PAGE_SIZE;
-
- res = access_process_vm(task, mm->arg_start, buffer, len, 0);
-
- // If the nul at the end of args has been overwritten, then
- // assume application is using setproctitle(3).
- if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) {
- len = strnlen(buffer, res);
- if (len < res) {
- res = len;
- } else {
- len = mm->env_end - mm->env_start;
- if (len > PAGE_SIZE - res)
- len = PAGE_SIZE - res;
- res += access_process_vm(task, mm->env_start, buffer+res, len, 0);
- res = strnlen(buffer, res);
- }
- }
-out_mm:
- mmput(mm);
-out:
- return res;
+ return get_cmdline(task, buffer, PAGE_SIZE);
}
static int proc_pid_auxv(struct task_struct *task, char *buffer)
@@ -1151,10 +1119,16 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
goto out_free_page;
}
- kloginuid = make_kuid(file->f_cred->user_ns, loginuid);
- if (!uid_valid(kloginuid)) {
- length = -EINVAL;
- goto out_free_page;
+
+ /* is userspace tring to explicitly UNSET the loginuid? */
+ if (loginuid == AUDIT_UID_UNSET) {
+ kloginuid = INVALID_UID;
+ } else {
+ kloginuid = make_kuid(file->f_cred->user_ns, loginuid);
+ if (!uid_valid(kloginuid)) {
+ length = -EINVAL;
+ goto out_free_page;
+ }
}
length = audit_set_loginuid(kloginuid);
@@ -1230,6 +1204,9 @@ static ssize_t proc_fault_inject_write(struct file * file,
make_it_fail = simple_strtol(strstrip(buffer), &end, 0);
if (*end)
return -EINVAL;
+ if (make_it_fail < 0 || make_it_fail > 1)
+ return -EINVAL;
+
task = get_proc_task(file_inode(file));
if (!task)
return -ESRCH;
@@ -1652,13 +1629,18 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags)
return 0;
}
+static inline bool proc_inode_is_dead(struct inode *inode)
+{
+ return !proc_pid(inode)->tasks[PIDTYPE_PID].first;
+}
+
int pid_delete_dentry(const struct dentry *dentry)
{
/* Is the task we represent dead?
* If so, then don't put the dentry on the lru list,
* kill it immediately.
*/
- return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
+ return proc_inode_is_dead(dentry->d_inode);
}
const struct dentry_operations pid_dentry_operations =
@@ -1813,6 +1795,7 @@ static int proc_map_files_get_link(struct dentry *dentry, struct path *path)
if (rc)
goto out_mmput;
+ rc = -ENOENT;
down_read(&mm->mmap_sem);
vma = find_exact_vma(mm, vm_start, vm_end);
if (vma && vma->vm_file) {
@@ -2576,7 +2559,7 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("environ", S_IRUSR, proc_environ_operations),
INF("auxv", S_IRUSR, proc_pid_auxv),
ONE("status", S_IRUGO, proc_pid_status),
- ONE("personality", S_IRUGO, proc_pid_personality),
+ ONE("personality", S_IRUSR, proc_pid_personality),
INF("limits", S_IRUGO, proc_pid_limits),
#ifdef CONFIG_SCHED_DEBUG
REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
@@ -2586,7 +2569,7 @@ static const struct pid_entry tgid_base_stuff[] = {
#endif
REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
- INF("syscall", S_IRUGO, proc_pid_syscall),
+ INF("syscall", S_IRUSR, proc_pid_syscall),
#endif
INF("cmdline", S_IRUGO, proc_pid_cmdline),
ONE("stat", S_IRUGO, proc_tgid_stat),
@@ -2605,7 +2588,7 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_PROC_PAGE_MONITOR
REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
REG("smaps", S_IRUGO, proc_pid_smaps_operations),
- REG("pagemap", S_IRUGO, proc_pagemap_operations),
+ REG("pagemap", S_IRUSR, proc_pagemap_operations),
#endif
#ifdef CONFIG_SECURITY
DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
@@ -2614,7 +2597,7 @@ static const struct pid_entry tgid_base_stuff[] = {
INF("wchan", S_IRUGO, proc_pid_wchan),
#endif
#ifdef CONFIG_STACKTRACE
- ONE("stack", S_IRUGO, proc_pid_stack),
+ ONE("stack", S_IRUSR, proc_pid_stack),
#endif
#ifdef CONFIG_SCHEDSTATS
INF("schedstat", S_IRUGO, proc_pid_schedstat),
@@ -2915,14 +2898,14 @@ static const struct pid_entry tid_base_stuff[] = {
REG("environ", S_IRUSR, proc_environ_operations),
INF("auxv", S_IRUSR, proc_pid_auxv),
ONE("status", S_IRUGO, proc_pid_status),
- ONE("personality", S_IRUGO, proc_pid_personality),
+ ONE("personality", S_IRUSR, proc_pid_personality),
INF("limits", S_IRUGO, proc_pid_limits),
#ifdef CONFIG_SCHED_DEBUG
REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
#endif
REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
- INF("syscall", S_IRUGO, proc_pid_syscall),
+ INF("syscall", S_IRUSR, proc_pid_syscall),
#endif
INF("cmdline", S_IRUGO, proc_pid_cmdline),
ONE("stat", S_IRUGO, proc_tid_stat),
@@ -2943,7 +2926,7 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_PROC_PAGE_MONITOR
REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
REG("smaps", S_IRUGO, proc_tid_smaps_operations),
- REG("pagemap", S_IRUGO, proc_pagemap_operations),
+ REG("pagemap", S_IRUSR, proc_pagemap_operations),
#endif
#ifdef CONFIG_SECURITY
DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
@@ -2952,7 +2935,7 @@ static const struct pid_entry tid_base_stuff[] = {
INF("wchan", S_IRUGO, proc_pid_wchan),
#endif
#ifdef CONFIG_STACKTRACE
- ONE("stack", S_IRUGO, proc_pid_stack),
+ ONE("stack", S_IRUSR, proc_pid_stack),
#endif
#ifdef CONFIG_SCHEDSTATS
INF("schedstat", S_IRUGO, proc_pid_schedstat),
@@ -3086,34 +3069,42 @@ out_no_task:
* In the case of a seek we start with the leader and walk nr
* threads past it.
*/
-static struct task_struct *first_tid(struct task_struct *leader,
- int tid, int nr, struct pid_namespace *ns)
+static struct task_struct *first_tid(struct pid *pid, int tid, loff_t f_pos,
+ struct pid_namespace *ns)
{
- struct task_struct *pos;
+ struct task_struct *pos, *task;
+ unsigned long nr = f_pos;
+
+ if (nr != f_pos) /* 32bit overflow? */
+ return NULL;
rcu_read_lock();
- /* Attempt to start with the pid of a thread */
- if (tid && (nr > 0)) {
+ task = pid_task(pid, PIDTYPE_PID);
+ if (!task)
+ goto fail;
+
+ /* Attempt to start with the tid of a thread */
+ if (tid && nr) {
pos = find_task_by_pid_ns(tid, ns);
- if (pos && (pos->group_leader == leader))
+ if (pos && same_thread_group(pos, task))
goto found;
}
/* If nr exceeds the number of threads there is nothing todo */
- pos = NULL;
- if (nr && nr >= get_nr_threads(leader))
- goto out;
+ if (nr >= get_nr_threads(task))
+ goto fail;
/* If we haven't found our starting place yet start
* with the leader and walk nr threads forward.
*/
- for (pos = leader; nr > 0; --nr) {
- pos = next_thread(pos);
- if (pos == leader) {
- pos = NULL;
- goto out;
- }
- }
+ pos = task = task->group_leader;
+ do {
+ if (!nr--)
+ goto found;
+ } while_each_thread(task, pos);
+fail:
+ pos = NULL;
+ goto out;
found:
get_task_struct(pos);
out:
@@ -3146,25 +3137,16 @@ static struct task_struct *next_tid(struct task_struct *start)
/* for the /proc/TGID/task/ directories */
static int proc_task_readdir(struct file *file, struct dir_context *ctx)
{
- struct task_struct *leader = NULL;
- struct task_struct *task = get_proc_task(file_inode(file));
+ struct inode *inode = file_inode(file);
+ struct task_struct *task;
struct pid_namespace *ns;
int tid;
- if (!task)
- return -ENOENT;
- rcu_read_lock();
- if (pid_alive(task)) {
- leader = task->group_leader;
- get_task_struct(leader);
- }
- rcu_read_unlock();
- put_task_struct(task);
- if (!leader)
+ if (proc_inode_is_dead(inode))
return -ENOENT;
if (!dir_emit_dots(file, ctx))
- goto out;
+ return 0;
/* f_version caches the tgid value that the last readdir call couldn't
* return. lseek aka telldir automagically resets f_version to 0.
@@ -3172,7 +3154,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
ns = file->f_dentry->d_sb->s_fs_info;
tid = (int)file->f_version;
file->f_version = 0;
- for (task = first_tid(leader, tid, ctx->pos - 2, ns);
+ for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns);
task;
task = next_tid(task), ctx->pos++) {
char name[PROC_NUMBUF];
@@ -3188,8 +3170,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
break;
}
}
-out:
- put_task_struct(leader);
+
return 0;
}
diff --git a/fs/proc/cmdline.c b/fs/proc/cmdline.c
index 82676e3fcd1..cbd82dff7e8 100644
--- a/fs/proc/cmdline.c
+++ b/fs/proc/cmdline.c
@@ -26,4 +26,4 @@ static int __init proc_cmdline_init(void)
proc_create("cmdline", 0, NULL, &cmdline_proc_fops);
return 0;
}
-module_init(proc_cmdline_init);
+fs_initcall(proc_cmdline_init);
diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c
index b701eaa482b..290ba85cb90 100644
--- a/fs/proc/consoles.c
+++ b/fs/proc/consoles.c
@@ -29,7 +29,6 @@ static int show_console_dev(struct seq_file *m, void *v)
char flags[ARRAY_SIZE(con_flags) + 1];
struct console *con = v;
unsigned int a;
- int len;
dev_t dev = 0;
if (con->device) {
@@ -47,11 +46,10 @@ static int show_console_dev(struct seq_file *m, void *v)
con_flags[a].name : ' ';
flags[a] = 0;
- seq_printf(m, "%s%d%n", con->name, con->index, &len);
- len = 21 - len;
- if (len < 1)
- len = 1;
- seq_printf(m, "%*c%c%c%c (%s)", len, ' ', con->read ? 'R' : '-',
+ seq_setwidth(m, 21 - 1);
+ seq_printf(m, "%s%d", con->name, con->index);
+ seq_pad(m, ' ');
+ seq_printf(m, "%c%c%c (%s)", con->read ? 'R' : '-',
con->write ? 'W' : '-', con->unblank ? 'U' : '-',
flags);
if (dev)
@@ -111,4 +109,4 @@ static int __init proc_consoles_init(void)
proc_create("consoles", 0, NULL, &proc_consoles_operations);
return 0;
}
-module_init(proc_consoles_init);
+fs_initcall(proc_consoles_init);
diff --git a/fs/proc/cpuinfo.c b/fs/proc/cpuinfo.c
index 5a1e539a234..06f4d31e039 100644
--- a/fs/proc/cpuinfo.c
+++ b/fs/proc/cpuinfo.c
@@ -21,4 +21,4 @@ static int __init proc_cpuinfo_init(void)
proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations);
return 0;
}
-module_init(proc_cpuinfo_init);
+fs_initcall(proc_cpuinfo_init);
diff --git a/fs/proc/devices.c b/fs/proc/devices.c
index b14347167c3..50493edc30e 100644
--- a/fs/proc/devices.c
+++ b/fs/proc/devices.c
@@ -67,4 +67,4 @@ static int __init proc_devices_init(void)
proc_create("devices", 0, NULL, &proc_devinfo_operations);
return 0;
}
-module_init(proc_devices_init);
+fs_initcall(proc_devices_init);
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 75f2890abbd..0788d093f5d 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -11,6 +11,7 @@
#include <linux/proc_fs.h>
+#include "../mount.h"
#include "internal.h"
#include "fd.h"
@@ -48,8 +49,9 @@ static int seq_show(struct seq_file *m, void *v)
}
if (!ret) {
- seq_printf(m, "pos:\t%lli\nflags:\t0%o\n",
- (long long)file->f_pos, f_flags);
+ seq_printf(m, "pos:\t%lli\nflags:\t0%o\nmnt_id:\t%i\n",
+ (long long)file->f_pos, f_flags,
+ real_mount(file->f_path.mnt)->mnt_id);
if (file->f_op->show_fdinfo)
ret = file->f_op->show_fdinfo(m, file);
fput(file);
@@ -230,8 +232,6 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx,
if (!dir_emit_dots(file, ctx))
goto out;
- if (!dir_emit_dots(file, ctx))
- goto out;
files = get_files_struct(p);
if (!files)
goto out;
@@ -288,7 +288,7 @@ int proc_fd_permission(struct inode *inode, int mask)
int rv = generic_permission(inode, mask);
if (rv == 0)
return 0;
- if (task_pid(current) == proc_pid(inode))
+ if (task_tgid(current) == proc_pid(inode))
rv = 0;
return rv;
}
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 94441a40733..b7f268eb5f4 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -49,8 +49,7 @@ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
setattr_copy(inode, iattr);
mark_inode_dirty(inode);
- de->uid = inode->i_uid;
- de->gid = inode->i_gid;
+ proc_set_user(de, inode->i_uid, inode->i_gid);
de->mode = inode->i_mode;
return 0;
}
@@ -175,22 +174,6 @@ static const struct inode_operations proc_link_inode_operations = {
};
/*
- * As some entries in /proc are volatile, we want to
- * get rid of unused dentries. This could be made
- * smarter: we could keep a "volatile" flag in the
- * inode to indicate which ones to keep.
- */
-static int proc_delete_dentry(const struct dentry * dentry)
-{
- return 1;
-}
-
-static const struct dentry_operations proc_dentry_operations =
-{
- .d_delete = proc_delete_dentry,
-};
-
-/*
* Don't create negative dentries here, return -ENOENT by hand
* instead.
*/
@@ -209,7 +192,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
inode = proc_get_inode(dir->i_sb, de);
if (!inode)
return ERR_PTR(-ENOMEM);
- d_set_d_op(dentry, &proc_dentry_operations);
+ d_set_d_op(dentry, &simple_dentry_operations);
d_add(dentry, inode);
return NULL;
}
@@ -271,7 +254,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file,
de = next;
} while (de);
spin_unlock(&proc_subdir_lock);
- return 0;
+ return 1;
}
int proc_readdir(struct file *file, struct dir_context *ctx)
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 073aea60cf8..0adbc02d60e 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -35,7 +35,7 @@ static void proc_evict_inode(struct inode *inode)
const struct proc_ns_operations *ns_ops;
void *ns;
- truncate_inode_pages(&inode->i_data, 0);
+ truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
/* Stop tracking associated processes */
@@ -47,7 +47,7 @@ static void proc_evict_inode(struct inode *inode)
pde_put(de);
head = PROC_I(inode)->sysctl;
if (head) {
- rcu_assign_pointer(PROC_I(inode)->sysctl, NULL);
+ RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL);
sysctl_head_put(head);
}
/* Release any associated namespace */
@@ -285,6 +285,32 @@ static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
return rv;
}
+static unsigned long
+proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
+{
+ struct proc_dir_entry *pde = PDE(file_inode(file));
+ unsigned long rv = -EIO;
+
+ if (use_pde(pde)) {
+ typeof(proc_reg_get_unmapped_area) *get_area;
+
+ get_area = pde->proc_fops->get_unmapped_area;
+#ifdef CONFIG_MMU
+ if (!get_area)
+ get_area = current->mm->get_unmapped_area;
+#endif
+
+ if (get_area)
+ rv = get_area(file, orig_addr, len, pgoff, flags);
+ else
+ rv = orig_addr;
+ unuse_pde(pde);
+ }
+ return rv;
+}
+
static int proc_reg_open(struct inode *inode, struct file *file)
{
struct proc_dir_entry *pde = PDE(inode);
@@ -356,6 +382,7 @@ static const struct file_operations proc_reg_file_ops = {
.compat_ioctl = proc_reg_compat_ioctl,
#endif
.mmap = proc_reg_mmap,
+ .get_unmapped_area = proc_reg_get_unmapped_area,
.open = proc_reg_open,
.release = proc_reg_release,
};
@@ -368,6 +395,7 @@ static const struct file_operations proc_reg_file_ops_no_compat = {
.poll = proc_reg_poll,
.unlocked_ioctl = proc_reg_unlocked_ioctl,
.mmap = proc_reg_mmap,
+ .get_unmapped_area = proc_reg_get_unmapped_area,
.open = proc_reg_open,
.release = proc_reg_release,
};
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 651d09a11dd..3ab6d14e71c 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -211,13 +211,6 @@ extern int proc_fill_super(struct super_block *);
extern void proc_entry_rundown(struct proc_dir_entry *);
/*
- * proc_devtree.c
- */
-#ifdef CONFIG_PROC_DEVICETREE
-extern void proc_device_tree_init(void);
-#endif
-
-/*
* proc_namespaces.c
*/
extern const struct inode_operations proc_ns_dir_inode_operations;
diff --git a/fs/proc/interrupts.c b/fs/proc/interrupts.c
index 05029c0e2f2..a352d5703b4 100644
--- a/fs/proc/interrupts.c
+++ b/fs/proc/interrupts.c
@@ -50,4 +50,4 @@ static int __init proc_interrupts_init(void)
proc_create("interrupts", 0, NULL, &proc_interrupts_operations);
return 0;
}
-module_init(proc_interrupts_init);
+fs_initcall(proc_interrupts_init);
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 06ea155e1a5..39e6ef32f0b 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -255,8 +255,7 @@ static int kcore_update_ram(void)
end_pfn = 0;
for_each_node_state(nid, N_MEMORY) {
unsigned long node_end;
- node_end = NODE_DATA(nid)->node_start_pfn +
- NODE_DATA(nid)->node_spanned_pages;
+ node_end = node_end_pfn(nid);
if (end_pfn < node_end)
end_pfn = node_end;
}
@@ -640,4 +639,4 @@ static int __init proc_kcore_init(void)
return 0;
}
-module_init(proc_kcore_init);
+fs_initcall(proc_kcore_init);
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index bdfabdaefdc..05f8dcdb086 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -61,4 +61,4 @@ static int __init proc_kmsg_init(void)
proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations);
return 0;
}
-module_init(proc_kmsg_init);
+fs_initcall(proc_kmsg_init);
diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c
index 1afa4dd4cae..aec66e6c206 100644
--- a/fs/proc/loadavg.c
+++ b/fs/proc/loadavg.c
@@ -42,4 +42,4 @@ static int __init proc_loadavg_init(void)
proc_create("loadavg", 0, NULL, &loadavg_proc_fops);
return 0;
}
-module_init(proc_loadavg_init);
+fs_initcall(proc_loadavg_init);
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 5aa847a603c..7445af0b1aa 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -1,8 +1,8 @@
#include <linux/fs.h>
-#include <linux/hugetlb.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/mm.h>
+#include <linux/hugetlb.h>
#include <linux/mman.h>
#include <linux/mmzone.h>
#include <linux/proc_fs.h>
@@ -24,10 +24,13 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
{
struct sysinfo i;
unsigned long committed;
- unsigned long allowed;
struct vmalloc_info vmi;
long cached;
+ long available;
+ unsigned long pagecache;
+ unsigned long wmark_low = 0;
unsigned long pages[NR_LRU_LISTS];
+ struct zone *zone;
int lru;
/*
@@ -37,8 +40,6 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
si_meminfo(&i);
si_swapinfo(&i);
committed = percpu_counter_read_positive(&vm_committed_as);
- allowed = ((totalram_pages - hugetlb_total_pages())
- * sysctl_overcommit_ratio / 100) + total_swap_pages;
cached = global_page_state(NR_FILE_PAGES) -
total_swapcache_pages() - i.bufferram;
@@ -50,12 +51,44 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
pages[lru] = global_page_state(NR_LRU_BASE + lru);
+ for_each_zone(zone)
+ wmark_low += zone->watermark[WMARK_LOW];
+
+ /*
+ * Estimate the amount of memory available for userspace allocations,
+ * without causing swapping.
+ *
+ * Free memory cannot be taken below the low watermark, before the
+ * system starts swapping.
+ */
+ available = i.freeram - wmark_low;
+
+ /*
+ * Not all the page cache can be freed, otherwise the system will
+ * start swapping. Assume at least half of the page cache, or the
+ * low watermark worth of cache, needs to stay.
+ */
+ pagecache = pages[LRU_ACTIVE_FILE] + pages[LRU_INACTIVE_FILE];
+ pagecache -= min(pagecache / 2, wmark_low);
+ available += pagecache;
+
+ /*
+ * Part of the reclaimable slab consists of items that are in use,
+ * and cannot be freed. Cap this estimate at the low watermark.
+ */
+ available += global_page_state(NR_SLAB_RECLAIMABLE) -
+ min(global_page_state(NR_SLAB_RECLAIMABLE) / 2, wmark_low);
+
+ if (available < 0)
+ available = 0;
+
/*
* Tagged format, for easy grepping and expansion.
*/
seq_printf(m,
"MemTotal: %8lu kB\n"
"MemFree: %8lu kB\n"
+ "MemAvailable: %8lu kB\n"
"Buffers: %8lu kB\n"
"Cached: %8lu kB\n"
"SwapCached: %8lu kB\n"
@@ -108,6 +141,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
,
K(i.totalram),
K(i.freeram),
+ K(available),
K(i.bufferram),
K(cached),
K(total_swapcache_pages()),
@@ -132,13 +166,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
K(i.freeswap),
K(global_page_state(NR_FILE_DIRTY)),
K(global_page_state(NR_WRITEBACK)),
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- K(global_page_state(NR_ANON_PAGES)
- + global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) *
- HPAGE_PMD_NR),
-#else
K(global_page_state(NR_ANON_PAGES)),
-#endif
K(global_page_state(NR_FILE_MAPPED)),
K(global_page_state(NR_SHMEM)),
K(global_page_state(NR_SLAB_RECLAIMABLE) +
@@ -153,7 +181,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
K(global_page_state(NR_UNSTABLE_NFS)),
K(global_page_state(NR_BOUNCE)),
K(global_page_state(NR_WRITEBACK_TEMP)),
- K(allowed),
+ K(vm_commit_limit()),
K(committed),
(unsigned long)VMALLOC_TOTAL >> 10,
vmi.used >> 10,
@@ -192,4 +220,4 @@ static int __init proc_meminfo_init(void)
proc_create("meminfo", 0, NULL, &meminfo_proc_fops);
return 0;
}
-module_init(proc_meminfo_init);
+fs_initcall(proc_meminfo_init);
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 49a7fff2e83..89026095f2b 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -42,12 +42,6 @@ static const struct inode_operations ns_inode_operations = {
.setattr = proc_setattr,
};
-static int ns_delete_dentry(const struct dentry *dentry)
-{
- /* Don't cache namespace inodes when not in use */
- return 1;
-}
-
static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
{
struct inode *inode = dentry->d_inode;
@@ -59,7 +53,7 @@ static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
const struct dentry_operations ns_dentry_operations =
{
- .d_delete = ns_delete_dentry,
+ .d_delete = always_delete_dentry,
.d_dname = ns_dname,
};
@@ -152,7 +146,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl
struct task_struct *task;
void *ns;
char name[50];
- int len = -EACCES;
+ int res = -EACCES;
task = get_proc_task(inode);
if (!task)
@@ -161,24 +155,18 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl
if (!ptrace_may_access(task, PTRACE_MODE_READ))
goto out_put_task;
- len = -ENOENT;
+ res = -ENOENT;
ns = ns_ops->get(task);
if (!ns)
goto out_put_task;
snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns));
- len = strlen(name);
-
- if (len > buflen)
- len = buflen;
- if (copy_to_user(buffer, name, len))
- len = -EFAULT;
-
+ res = readlink_copy(buffer, buflen, name);
ns_ops->put(ns);
out_put_task:
put_task_struct(task);
out:
- return len;
+ return res;
}
static const struct inode_operations proc_ns_link_inode_operations = {
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index ccfd99bd1c5..d4a35746cab 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -39,7 +39,7 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region)
unsigned long ino = 0;
struct file *file;
dev_t dev = 0;
- int flags, len;
+ int flags;
flags = region->vm_flags;
file = region->vm_file;
@@ -50,8 +50,9 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region)
ino = inode->i_ino;
}
+ seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
seq_printf(m,
- "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
+ "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
region->vm_start,
region->vm_end,
flags & VM_READ ? 'r' : '-',
@@ -59,13 +60,10 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region)
flags & VM_EXEC ? 'x' : '-',
flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
((loff_t)region->vm_pgoff) << PAGE_SHIFT,
- MAJOR(dev), MINOR(dev), ino, &len);
+ MAJOR(dev), MINOR(dev), ino);
if (file) {
- len = 25 + sizeof(void *) * 6 - len;
- if (len < 1)
- len = 1;
- seq_printf(m, "%*c", len, ' ');
+ seq_pad(m, ' ');
seq_path(m, &file->f_path, "");
}
@@ -133,4 +131,4 @@ static int __init proc_nommu_init(void)
return 0;
}
-module_init(proc_nommu_init);
+fs_initcall(proc_nommu_init);
diff --git a/fs/proc/page.c b/fs/proc/page.c
index b8730d9ebae..e647c55275d 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -118,10 +118,11 @@ u64 stable_page_flags(struct page *page)
/*
* PageTransCompound can be true for non-huge compound pages (slab
* pages or pages allocated by drivers with __GFP_COMP) because it
- * just checks PG_head/PG_tail, so we need to check PageLRU to make
- * sure a given page is a thp, not a non-huge compound page.
+ * just checks PG_head/PG_tail, so we need to check PageLRU/PageAnon
+ * to make sure a given page is a thp, not a non-huge compound page.
*/
- else if (PageTransCompound(page) && PageLRU(compound_trans_head(page)))
+ else if (PageTransCompound(page) && (PageLRU(compound_head(page)) ||
+ PageAnon(compound_head(page))))
u |= 1 << KPF_THP;
/*
@@ -217,4 +218,4 @@ static int __init proc_page_init(void)
proc_create("kpageflags", S_IRUSR, NULL, &proc_kpageflags_operations);
return 0;
}
-module_init(proc_page_init);
+fs_initcall(proc_page_init);
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
deleted file mode 100644
index 106a8357063..00000000000
--- a/fs/proc/proc_devtree.c
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
- * proc_devtree.c - handles /proc/device-tree
- *
- * Copyright 1997 Paul Mackerras
- */
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/time.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/printk.h>
-#include <linux/stat.h>
-#include <linux/string.h>
-#include <linux/of.h>
-#include <linux/export.h>
-#include <linux/slab.h>
-#include <asm/prom.h>
-#include <asm/uaccess.h>
-#include "internal.h"
-
-static inline void set_node_proc_entry(struct device_node *np,
- struct proc_dir_entry *de)
-{
-#ifdef HAVE_ARCH_DEVTREE_FIXUPS
- np->pde = de;
-#endif
-}
-
-static struct proc_dir_entry *proc_device_tree;
-
-/*
- * Supply data on a read from /proc/device-tree/node/property.
- */
-static int property_proc_show(struct seq_file *m, void *v)
-{
- struct property *pp = m->private;
-
- seq_write(m, pp->value, pp->length);
- return 0;
-}
-
-static int property_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, property_proc_show, __PDE_DATA(inode));
-}
-
-static const struct file_operations property_proc_fops = {
- .owner = THIS_MODULE,
- .open = property_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-/*
- * For a node with a name like "gc@10", we make symlinks called "gc"
- * and "@10" to it.
- */
-
-/*
- * Add a property to a node
- */
-static struct proc_dir_entry *
-__proc_device_tree_add_prop(struct proc_dir_entry *de, struct property *pp,
- const char *name)
-{
- struct proc_dir_entry *ent;
-
- /*
- * Unfortunately proc_register puts each new entry
- * at the beginning of the list. So we rearrange them.
- */
- ent = proc_create_data(name,
- strncmp(name, "security-", 9) ? S_IRUGO : S_IRUSR,
- de, &property_proc_fops, pp);
- if (ent == NULL)
- return NULL;
-
- if (!strncmp(name, "security-", 9))
- ent->size = 0; /* don't leak number of password chars */
- else
- ent->size = pp->length;
-
- return ent;
-}
-
-
-void proc_device_tree_add_prop(struct proc_dir_entry *pde, struct property *prop)
-{
- __proc_device_tree_add_prop(pde, prop, prop->name);
-}
-
-void proc_device_tree_remove_prop(struct proc_dir_entry *pde,
- struct property *prop)
-{
- remove_proc_entry(prop->name, pde);
-}
-
-void proc_device_tree_update_prop(struct proc_dir_entry *pde,
- struct property *newprop,
- struct property *oldprop)
-{
- struct proc_dir_entry *ent;
-
- if (!oldprop) {
- proc_device_tree_add_prop(pde, newprop);
- return;
- }
-
- for (ent = pde->subdir; ent != NULL; ent = ent->next)
- if (ent->data == oldprop)
- break;
- if (ent == NULL) {
- pr_warn("device-tree: property \"%s\" does not exist\n",
- oldprop->name);
- } else {
- ent->data = newprop;
- ent->size = newprop->length;
- }
-}
-
-/*
- * Various dodgy firmware might give us nodes and/or properties with
- * conflicting names. That's generally ok, except for exporting via /proc,
- * so munge names here to ensure they're unique.
- */
-
-static int duplicate_name(struct proc_dir_entry *de, const char *name)
-{
- struct proc_dir_entry *ent;
- int found = 0;
-
- spin_lock(&proc_subdir_lock);
-
- for (ent = de->subdir; ent != NULL; ent = ent->next) {
- if (strcmp(ent->name, name) == 0) {
- found = 1;
- break;
- }
- }
-
- spin_unlock(&proc_subdir_lock);
-
- return found;
-}
-
-static const char *fixup_name(struct device_node *np, struct proc_dir_entry *de,
- const char *name)
-{
- char *fixed_name;
- int fixup_len = strlen(name) + 2 + 1; /* name + #x + \0 */
- int i = 1, size;
-
-realloc:
- fixed_name = kmalloc(fixup_len, GFP_KERNEL);
- if (fixed_name == NULL) {
- pr_err("device-tree: Out of memory trying to fixup "
- "name \"%s\"\n", name);
- return name;
- }
-
-retry:
- size = snprintf(fixed_name, fixup_len, "%s#%d", name, i);
- size++; /* account for NULL */
-
- if (size > fixup_len) {
- /* We ran out of space, free and reallocate. */
- kfree(fixed_name);
- fixup_len = size;
- goto realloc;
- }
-
- if (duplicate_name(de, fixed_name)) {
- /* Multiple duplicates. Retry with a different offset. */
- i++;
- goto retry;
- }
-
- pr_warn("device-tree: Duplicate name in %s, renamed to \"%s\"\n",
- np->full_name, fixed_name);
-
- return fixed_name;
-}
-
-/*
- * Process a node, adding entries for its children and its properties.
- */
-void proc_device_tree_add_node(struct device_node *np,
- struct proc_dir_entry *de)
-{
- struct property *pp;
- struct proc_dir_entry *ent;
- struct device_node *child;
- const char *p;
-
- set_node_proc_entry(np, de);
- for (child = NULL; (child = of_get_next_child(np, child));) {
- /* Use everything after the last slash, or the full name */
- p = kbasename(child->full_name);
-
- if (duplicate_name(de, p))
- p = fixup_name(np, de, p);
-
- ent = proc_mkdir(p, de);
- if (ent == NULL)
- break;
- proc_device_tree_add_node(child, ent);
- }
- of_node_put(child);
-
- for (pp = np->properties; pp != NULL; pp = pp->next) {
- p = pp->name;
-
- if (strchr(p, '/'))
- continue;
-
- if (duplicate_name(de, p))
- p = fixup_name(np, de, p);
-
- ent = __proc_device_tree_add_prop(de, pp, p);
- if (ent == NULL)
- break;
- }
-}
-
-/*
- * Called on initialization to set up the /proc/device-tree subtree
- */
-void __init proc_device_tree_init(void)
-{
- struct device_node *root;
-
- proc_device_tree = proc_mkdir("device-tree", NULL);
- if (proc_device_tree == NULL)
- return;
- root = of_find_node_by_path("/");
- if (root == NULL) {
- pr_debug("/proc/device-tree: can't find root\n");
- return;
- }
- proc_device_tree_add_node(root, proc_device_tree);
- of_node_put(root);
-}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 229e366598d..5dbadecb234 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -92,6 +92,8 @@ static int proc_parse_options(char *options, struct pid_namespace *pid)
int proc_remount(struct super_block *sb, int *flags, char *data)
{
struct pid_namespace *pid = sb->s_fs_info;
+
+ sync_filesystem(sb);
return !proc_parse_options(data, pid);
}
@@ -110,7 +112,11 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
ns = task_active_pid_ns(current);
options = data;
- if (!current_user_ns()->may_mount_proc)
+ if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
+ return ERR_PTR(-EPERM);
+
+ /* Does the mounter have privilege over the pid namespace? */
+ if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
}
@@ -179,9 +185,6 @@ void __init proc_root_init(void)
proc_mkdir("openprom", NULL);
#endif
proc_tty_init();
-#ifdef CONFIG_PROC_DEVICETREE
- proc_device_tree_init();
-#endif
proc_mkdir("bus", NULL);
proc_sys_init();
}
@@ -205,7 +208,9 @@ static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentr
static int proc_root_readdir(struct file *file, struct dir_context *ctx)
{
if (ctx->pos < FIRST_PROCESS_ENTRY) {
- proc_readdir(file, ctx);
+ int error = proc_readdir(file, ctx);
+ if (unlikely(error <= 0))
+ return error;
ctx->pos = FIRST_PROCESS_ENTRY;
}
diff --git a/fs/proc/self.c b/fs/proc/self.c
index 6b6a993b5c2..4348bb8907c 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -16,7 +16,7 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
if (!tgid)
return -ENOENT;
sprintf(tmp, "%d", tgid);
- return vfs_readlink(dentry,buffer,buflen,tmp);
+ return readlink_copy(buffer, buflen, tmp);
}
static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
@@ -36,18 +36,10 @@ static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
return NULL;
}
-static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
- void *cookie)
-{
- char *s = nd_get_link(nd);
- if (!IS_ERR(s))
- kfree(s);
-}
-
static const struct inode_operations proc_self_inode_operations = {
.readlink = proc_self_readlink,
.follow_link = proc_self_follow_link,
- .put_link = proc_self_put_link,
+ .put_link = kfree_put_link,
};
static unsigned self_inum;
diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c
index 62604be9f58..ad8a77f94be 100644
--- a/fs/proc/softirqs.c
+++ b/fs/proc/softirqs.c
@@ -41,4 +41,4 @@ static int __init proc_softirqs_init(void)
proc_create("softirqs", 0, NULL, &proc_softirqs_operations);
return 0;
}
-module_init(proc_softirqs_init);
+fs_initcall(proc_softirqs_init);
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 1cf86c0e868..bf2d03f8fd3 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -9,7 +9,7 @@
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/irqnr.h>
-#include <asm/cputime.h>
+#include <linux/cputime.h>
#include <linux/tick.h>
#ifndef arch_irq_stat_cpu
@@ -184,29 +184,11 @@ static int show_stat(struct seq_file *p, void *v)
static int stat_open(struct inode *inode, struct file *file)
{
- size_t size = 1024 + 128 * num_possible_cpus();
- char *buf;
- struct seq_file *m;
- int res;
+ size_t size = 1024 + 128 * num_online_cpus();
/* minimum size to display an interrupt count : 2 bytes */
size += 2 * nr_irqs;
-
- /* don't ask for more than the kmalloc() max size */
- if (size > KMALLOC_MAX_SIZE)
- size = KMALLOC_MAX_SIZE;
- buf = kmalloc(size, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- res = single_open(file, show_stat, NULL);
- if (!res) {
- m = file->private_data;
- m->buf = buf;
- m->size = ksize(buf);
- } else
- kfree(buf);
- return res;
+ return single_open_size(file, show_stat, NULL, size);
}
static const struct file_operations proc_stat_operations = {
@@ -221,4 +203,4 @@ static int __init proc_stat_init(void)
proc_create("stat", 0, NULL, &proc_stat_operations);
return 0;
}
-module_init(proc_stat_init);
+fs_initcall(proc_stat_init);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index dbf61f6174f..cfa63ee92c9 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1,4 +1,5 @@
#include <linux/mm.h>
+#include <linux/vmacache.h>
#include <linux/hugetlb.h>
#include <linux/huge_mm.h>
#include <linux/mount.h>
@@ -62,7 +63,8 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
total_rss << (PAGE_SHIFT-10),
data << (PAGE_SHIFT-10),
mm->stack_vm << (PAGE_SHIFT-10), text, lib,
- (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10,
+ (PTRS_PER_PTE * sizeof(pte_t) *
+ atomic_long_read(&mm->nr_ptes)) >> 10,
swap << (PAGE_SHIFT-10));
}
@@ -83,14 +85,6 @@ unsigned long task_statm(struct mm_struct *mm,
return mm->total_vm;
}
-static void pad_len_spaces(struct seq_file *m, int len)
-{
- len = 25 + sizeof(void*) * 6 - len;
- if (len < 1)
- len = 1;
- seq_printf(m, "%*c", len, ' ');
-}
-
#ifdef CONFIG_NUMA
/*
* These functions are for numa_maps but called in generic **maps seq_file
@@ -159,7 +153,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
/*
* We remember last_addr rather than next_addr to hit with
- * mmap_cache most of the time. We have zero last_addr at
+ * vmacache most of the time. We have zero last_addr at
* the beginning and also after lseek. We will have -1 last_addr
* after the end of the vmas.
*/
@@ -268,7 +262,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
unsigned long long pgoff = 0;
unsigned long start, end;
dev_t dev = 0;
- int len;
const char *name = NULL;
if (file) {
@@ -286,7 +279,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
if (stack_guard_page_end(vma, end))
end -= PAGE_SIZE;
- seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
+ seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
+ seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
start,
end,
flags & VM_READ ? 'r' : '-',
@@ -294,18 +288,24 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
flags & VM_EXEC ? 'x' : '-',
flags & VM_MAYSHARE ? 's' : 'p',
pgoff,
- MAJOR(dev), MINOR(dev), ino, &len);
+ MAJOR(dev), MINOR(dev), ino);
/*
* Print the dentry name for named mappings, and a
* special [heap] marker for the heap:
*/
if (file) {
- pad_len_spaces(m, len);
+ seq_pad(m, ' ');
seq_path(m, &file->f_path, "\n");
goto done;
}
+ if (vma->vm_ops && vma->vm_ops->name) {
+ name = vma->vm_ops->name(vma);
+ if (name)
+ goto done;
+ }
+
name = arch_vma_name(vma);
if (!name) {
pid_t tid;
@@ -333,7 +333,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
name = "[stack]";
} else {
/* Thread stack in /proc/PID/maps */
- pad_len_spaces(m, len);
+ seq_pad(m, ' ');
seq_printf(m, "[stack:%d]", tid);
}
}
@@ -341,7 +341,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
done:
if (name) {
- pad_len_spaces(m, len);
+ seq_pad(m, ' ');
seq_puts(m, name);
}
seq_putc(m, '\n');
@@ -505,9 +505,9 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
pte_t *pte;
spinlock_t *ptl;
- if (pmd_trans_huge_lock(pmd, vma) == 1) {
+ if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk);
- spin_unlock(&walk->mm->page_table_lock);
+ spin_unlock(ptl);
mss->anonymous_thp += HPAGE_PMD_SIZE;
return 0;
}
@@ -561,6 +561,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
[ilog2(VM_NONLINEAR)] = "nl",
[ilog2(VM_ARCH_1)] = "ar",
[ilog2(VM_DONTDUMP)] = "dd",
+#ifdef CONFIG_MEM_SOFT_DIRTY
+ [ilog2(VM_SOFTDIRTY)] = "sd",
+#endif
[ilog2(VM_MIXEDMAP)] = "mm",
[ilog2(VM_HUGEPAGE)] = "hg",
[ilog2(VM_NOHUGEPAGE)] = "nh",
@@ -730,8 +733,16 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
* of how soft-dirty works.
*/
pte_t ptent = *pte;
- ptent = pte_wrprotect(ptent);
- ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY);
+
+ if (pte_present(ptent)) {
+ ptent = pte_wrprotect(ptent);
+ ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY);
+ } else if (is_swap_pte(ptent)) {
+ ptent = pte_swp_clear_soft_dirty(ptent);
+ } else if (pte_file(ptent)) {
+ ptent = pte_file_clear_soft_dirty(ptent);
+ }
+
set_pte_at(vma->vm_mm, addr, pte, ptent);
#endif
}
@@ -752,14 +763,15 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
for (; addr != end; pte++, addr += PAGE_SIZE) {
ptent = *pte;
- if (!pte_present(ptent))
- continue;
if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
clear_soft_dirty(vma, addr, pte);
continue;
}
+ if (!pte_present(ptent))
+ continue;
+
page = vm_normal_page(vma, addr, ptent);
if (!page)
continue;
@@ -798,8 +810,9 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
if (type == CLEAR_REFS_SOFT_DIRTY) {
soft_dirty_cleared = true;
- pr_warn_once("The pagemap bits 55-60 has changed their meaning! "
- "See the linux/Documentation/vm/pagemap.txt for details.\n");
+ pr_warn_once("The pagemap bits 55-60 has changed their meaning!"
+ " See the linux/Documentation/vm/pagemap.txt for "
+ "details.\n");
}
task = get_proc_task(file_inode(file));
@@ -830,11 +843,17 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
*
* Writing 3 to /proc/pid/clear_refs only affects file
* mapped pages.
+ *
+ * Writing 4 to /proc/pid/clear_refs affects all pages.
*/
if (type == CLEAR_REFS_ANON && vma->vm_file)
continue;
if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
continue;
+ if (type == CLEAR_REFS_SOFT_DIRTY) {
+ if (vma->vm_flags & VM_SOFTDIRTY)
+ vma->vm_flags &= ~VM_SOFTDIRTY;
+ }
walk_page_range(vma->vm_start, vma->vm_end,
&clear_refs_walk);
}
@@ -859,7 +878,7 @@ typedef struct {
} pagemap_entry_t;
struct pagemapread {
- int pos, len;
+ int pos, len; /* units: PM_ENTRY_BYTES, not bytes */
pagemap_entry_t *buffer;
bool v2;
};
@@ -867,7 +886,7 @@ struct pagemapread {
#define PAGEMAP_WALK_SIZE (PMD_SIZE)
#define PAGEMAP_WALK_MASK (PMD_MASK)
-#define PM_ENTRY_BYTES sizeof(u64)
+#define PM_ENTRY_BYTES sizeof(pagemap_entry_t)
#define PM_STATUS_BITS 3
#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
@@ -929,22 +948,28 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
frame = pte_pfn(pte);
flags = PM_PRESENT;
page = vm_normal_page(vma, addr, pte);
+ if (pte_soft_dirty(pte))
+ flags2 |= __PM_SOFT_DIRTY;
} else if (is_swap_pte(pte)) {
- swp_entry_t entry = pte_to_swp_entry(pte);
-
+ swp_entry_t entry;
+ if (pte_swp_soft_dirty(pte))
+ flags2 |= __PM_SOFT_DIRTY;
+ entry = pte_to_swp_entry(pte);
frame = swp_type(entry) |
(swp_offset(entry) << MAX_SWAPFILES_SHIFT);
flags = PM_SWAP;
if (is_migration_entry(entry))
page = migration_entry_to_page(entry);
} else {
- *pme = make_pme(PM_NOT_PRESENT(pm->v2));
+ if (vma->vm_flags & VM_SOFTDIRTY)
+ flags2 |= __PM_SOFT_DIRTY;
+ *pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2));
return;
}
if (page && !PageAnon(page))
flags |= PM_FILE;
- if (pte_soft_dirty(pte))
+ if ((vma->vm_flags & VM_SOFTDIRTY))
flags2 |= __PM_SOFT_DIRTY;
*pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags);
@@ -963,7 +988,7 @@ static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *p
*pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset)
| PM_STATUS2(pm->v2, pmd_flags2) | PM_PRESENT);
else
- *pme = make_pme(PM_NOT_PRESENT(pm->v2));
+ *pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, pmd_flags2));
}
#else
static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
@@ -977,16 +1002,21 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
{
struct vm_area_struct *vma;
struct pagemapread *pm = walk->private;
+ spinlock_t *ptl;
pte_t *pte;
int err = 0;
pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
/* find the first VMA at or above 'addr' */
vma = find_vma(walk->mm, addr);
- if (vma && pmd_trans_huge_lock(pmd, vma) == 1) {
+ if (vma && pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
int pmd_flags2;
- pmd_flags2 = (pmd_soft_dirty(*pmd) ? __PM_SOFT_DIRTY : 0);
+ if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd))
+ pmd_flags2 = __PM_SOFT_DIRTY;
+ else
+ pmd_flags2 = 0;
+
for (; addr != end; addr += PAGE_SIZE) {
unsigned long offset;
@@ -997,19 +1027,24 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
if (err)
break;
}
- spin_unlock(&walk->mm->page_table_lock);
+ spin_unlock(ptl);
return err;
}
if (pmd_trans_unstable(pmd))
return 0;
for (; addr != end; addr += PAGE_SIZE) {
+ int flags2;
/* check to see if we've left 'vma' behind
* and need a new, higher one */
if (vma && (addr >= vma->vm_end)) {
vma = find_vma(walk->mm, addr);
- pme = make_pme(PM_NOT_PRESENT(pm->v2));
+ if (vma && (vma->vm_flags & VM_SOFTDIRTY))
+ flags2 = __PM_SOFT_DIRTY;
+ else
+ flags2 = 0;
+ pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2));
}
/* check that 'vma' actually covers this address,
@@ -1033,13 +1068,15 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
#ifdef CONFIG_HUGETLB_PAGE
static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
- pte_t pte, int offset)
+ pte_t pte, int offset, int flags2)
{
if (pte_present(pte))
- *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset)
- | PM_STATUS2(pm->v2, 0) | PM_PRESENT);
+ *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) |
+ PM_STATUS2(pm->v2, flags2) |
+ PM_PRESENT);
else
- *pme = make_pme(PM_NOT_PRESENT(pm->v2));
+ *pme = make_pme(PM_NOT_PRESENT(pm->v2) |
+ PM_STATUS2(pm->v2, flags2));
}
/* This function walks within one hugetlb entry in the single call */
@@ -1048,12 +1085,22 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
struct mm_walk *walk)
{
struct pagemapread *pm = walk->private;
+ struct vm_area_struct *vma;
int err = 0;
+ int flags2;
pagemap_entry_t pme;
+ vma = find_vma(walk->mm, addr);
+ WARN_ON_ONCE(!vma);
+
+ if (vma && (vma->vm_flags & VM_SOFTDIRTY))
+ flags2 = __PM_SOFT_DIRTY;
+ else
+ flags2 = 0;
+
for (; addr != end; addr += PAGE_SIZE) {
int offset = (addr & ~hmask) >> PAGE_SHIFT;
- huge_pte_to_pagemap_entry(&pme, pm, *pte, offset);
+ huge_pte_to_pagemap_entry(&pme, pm, *pte, offset, flags2);
err = add_to_pagemap(addr, &pme, pm);
if (err)
return err;
@@ -1116,8 +1163,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
goto out_task;
pm.v2 = soft_dirty_cleared;
- pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
- pm.buffer = kmalloc(pm.len, GFP_TEMPORARY);
+ pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
+ pm.buffer = kmalloc(pm.len * PM_ENTRY_BYTES, GFP_TEMPORARY);
ret = -ENOMEM;
if (!pm.buffer)
goto out_task;
@@ -1282,7 +1329,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
md = walk->private;
- if (pmd_trans_huge_lock(pmd, md->vma) == 1) {
+ if (pmd_trans_huge_lock(pmd, md->vma, &ptl) == 1) {
pte_t huge_pte = *(pte_t *)pmd;
struct page *page;
@@ -1290,7 +1337,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
if (page)
gather_stats(page, md, pte_dirty(huge_pte),
HPAGE_PMD_SIZE/PAGE_SIZE);
- spin_unlock(&walk->mm->page_table_lock);
+ spin_unlock(ptl);
return 0;
}
@@ -1314,7 +1361,7 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
struct numa_maps *md;
struct page *page;
- if (pte_none(*pte))
+ if (!pte_present(*pte))
return 0;
page = pte_page(*pte);
@@ -1348,8 +1395,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
struct mm_struct *mm = vma->vm_mm;
struct mm_walk walk = {};
struct mempolicy *pol;
- int n;
- char buffer[50];
+ char buffer[64];
+ int nid;
if (!mm)
return 0;
@@ -1371,10 +1418,10 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
seq_printf(m, "%08lx %s", vma->vm_start, buffer);
if (file) {
- seq_printf(m, " file=");
+ seq_puts(m, " file=");
seq_path(m, &file->f_path, "\n\t= ");
} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
- seq_printf(m, " heap");
+ seq_puts(m, " heap");
} else {
pid_t tid = vm_is_stack(task, vma, is_pid);
if (tid != 0) {
@@ -1384,14 +1431,14 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
*/
if (!is_pid || (vma->vm_start <= mm->start_stack &&
vma->vm_end >= mm->start_stack))
- seq_printf(m, " stack");
+ seq_puts(m, " stack");
else
seq_printf(m, " stack:%d", tid);
}
}
if (is_vm_hugetlb_page(vma))
- seq_printf(m, " huge");
+ seq_puts(m, " huge");
walk_page_range(vma->vm_start, vma->vm_end, &walk);
@@ -1419,9 +1466,9 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
if (md->writeback)
seq_printf(m, " writeback=%lu", md->writeback);
- for_each_node_state(n, N_MEMORY)
- if (md->node[n])
- seq_printf(m, " N%d=%lu", n, md->node[n]);
+ for_each_node_state(nid, N_MEMORY)
+ if (md->node[nid])
+ seq_printf(m, " N%d=%lu", nid, md->node[nid]);
out:
seq_putc(m, '\n');
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 56123a6f462..678455d2d68 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -123,14 +123,6 @@ unsigned long task_statm(struct mm_struct *mm,
return size;
}
-static void pad_len_spaces(struct seq_file *m, int len)
-{
- len = 25 + sizeof(void*) * 6 - len;
- if (len < 1)
- len = 1;
- seq_printf(m, "%*c", len, ' ');
-}
-
/*
* display a single VMA to a sequenced file
*/
@@ -142,7 +134,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
unsigned long ino = 0;
struct file *file;
dev_t dev = 0;
- int flags, len;
+ int flags;
unsigned long long pgoff = 0;
flags = vma->vm_flags;
@@ -155,8 +147,9 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
}
+ seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
seq_printf(m,
- "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
+ "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
vma->vm_start,
vma->vm_end,
flags & VM_READ ? 'r' : '-',
@@ -164,16 +157,16 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
flags & VM_EXEC ? 'x' : '-',
flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
pgoff,
- MAJOR(dev), MINOR(dev), ino, &len);
+ MAJOR(dev), MINOR(dev), ino);
if (file) {
- pad_len_spaces(m, len);
+ seq_pad(m, ' ');
seq_path(m, &file->f_path, "");
} else if (mm) {
pid_t tid = vm_is_stack(priv->task, vma, is_pid);
if (tid != 0) {
- pad_len_spaces(m, len);
+ seq_pad(m, ' ');
/*
* Thread stack in /proc/PID/task/TID/maps or
* the main process stack.
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index 06189462590..33de567c25a 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -5,7 +5,7 @@
#include <linux/seq_file.h>
#include <linux/time.h>
#include <linux/kernel_stat.h>
-#include <asm/cputime.h>
+#include <linux/cputime.h>
static int uptime_proc_show(struct seq_file *m, void *v)
{
@@ -49,4 +49,4 @@ static int __init proc_uptime_init(void)
proc_create("uptime", 0, NULL, &uptime_proc_fops);
return 0;
}
-module_init(proc_uptime_init);
+fs_initcall(proc_uptime_init);
diff --git a/fs/proc/version.c b/fs/proc/version.c
index 76817a60678..d2154eb6d78 100644
--- a/fs/proc/version.c
+++ b/fs/proc/version.c
@@ -31,4 +31,4 @@ static int __init proc_version_init(void)
proc_create("version", 0, NULL, &version_proc_fops);
return 0;
}
-module_init(proc_version_init);
+fs_initcall(proc_version_init);
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index a1a16eb97c7..382aa890e22 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -21,6 +21,7 @@
#include <linux/crash_dump.h>
#include <linux/list.h>
#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
#include <asm/uaccess.h>
#include <asm/io.h>
#include "internal.h"
@@ -41,7 +42,7 @@ static size_t elfnotes_sz;
/* Total size of vmcore file. */
static u64 vmcore_size;
-static struct proc_dir_entry *proc_vmcore = NULL;
+static struct proc_dir_entry *proc_vmcore;
/*
* Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error
@@ -123,11 +124,65 @@ static ssize_t read_from_oldmem(char *buf, size_t count,
return read;
}
+/*
+ * Architectures may override this function to allocate ELF header in 2nd kernel
+ */
+int __weak elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
+{
+ return 0;
+}
+
+/*
+ * Architectures may override this function to free header
+ */
+void __weak elfcorehdr_free(unsigned long long addr)
+{}
+
+/*
+ * Architectures may override this function to read from ELF header
+ */
+ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos)
+{
+ return read_from_oldmem(buf, count, ppos, 0);
+}
+
+/*
+ * Architectures may override this function to read from notes sections
+ */
+ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos)
+{
+ return read_from_oldmem(buf, count, ppos, 0);
+}
+
+/*
+ * Architectures may override this function to map oldmem
+ */
+int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma,
+ unsigned long from, unsigned long pfn,
+ unsigned long size, pgprot_t prot)
+{
+ return remap_pfn_range(vma, from, pfn, size, prot);
+}
+
+/*
+ * Copy to either kernel or user space
+ */
+static int copy_to(void *target, void *src, size_t size, int userbuf)
+{
+ if (userbuf) {
+ if (copy_to_user((char __user *) target, src, size))
+ return -EFAULT;
+ } else {
+ memcpy(target, src, size);
+ }
+ return 0;
+}
+
/* Read from the ELF header and then the crash dump. On error, negative value is
* returned otherwise number of bytes read are returned.
*/
-static ssize_t read_vmcore(struct file *file, char __user *buffer,
- size_t buflen, loff_t *fpos)
+static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos,
+ int userbuf)
{
ssize_t acc = 0, tmp;
size_t tsz;
@@ -144,7 +199,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
/* Read ELF core header */
if (*fpos < elfcorebuf_sz) {
tsz = min(elfcorebuf_sz - (size_t)*fpos, buflen);
- if (copy_to_user(buffer, elfcorebuf + *fpos, tsz))
+ if (copy_to(buffer, elfcorebuf + *fpos, tsz, userbuf))
return -EFAULT;
buflen -= tsz;
*fpos += tsz;
@@ -162,7 +217,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)*fpos, buflen);
kaddr = elfnotes_buf + *fpos - elfcorebuf_sz;
- if (copy_to_user(buffer, kaddr, tsz))
+ if (copy_to(buffer, kaddr, tsz, userbuf))
return -EFAULT;
buflen -= tsz;
*fpos += tsz;
@@ -178,7 +233,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
if (*fpos < m->offset + m->size) {
tsz = min_t(size_t, m->offset + m->size - *fpos, buflen);
start = m->paddr + *fpos - m->offset;
- tmp = read_from_oldmem(buffer, tsz, &start, 1);
+ tmp = read_from_oldmem(buffer, tsz, &start, userbuf);
if (tmp < 0)
return tmp;
buflen -= tsz;
@@ -195,6 +250,55 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
return acc;
}
+static ssize_t read_vmcore(struct file *file, char __user *buffer,
+ size_t buflen, loff_t *fpos)
+{
+ return __read_vmcore((__force char *) buffer, buflen, fpos, 1);
+}
+
+/*
+ * The vmcore fault handler uses the page cache and fills data using the
+ * standard __vmcore_read() function.
+ *
+ * On s390 the fault handler is used for memory regions that can't be mapped
+ * directly with remap_pfn_range().
+ */
+static int mmap_vmcore_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+#ifdef CONFIG_S390
+ struct address_space *mapping = vma->vm_file->f_mapping;
+ pgoff_t index = vmf->pgoff;
+ struct page *page;
+ loff_t offset;
+ char *buf;
+ int rc;
+
+ page = find_or_create_page(mapping, index, GFP_KERNEL);
+ if (!page)
+ return VM_FAULT_OOM;
+ if (!PageUptodate(page)) {
+ offset = (loff_t) index << PAGE_CACHE_SHIFT;
+ buf = __va((page_to_pfn(page) << PAGE_SHIFT));
+ rc = __read_vmcore(buf, PAGE_SIZE, &offset, 0);
+ if (rc < 0) {
+ unlock_page(page);
+ page_cache_release(page);
+ return (rc == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS;
+ }
+ SetPageUptodate(page);
+ }
+ unlock_page(page);
+ vmf->page = page;
+ return 0;
+#else
+ return VM_FAULT_SIGBUS;
+#endif
+}
+
+static const struct vm_operations_struct vmcore_mmap_ops = {
+ .fault = mmap_vmcore_fault,
+};
+
/**
* alloc_elfnotes_buf - allocate buffer for ELF note segment in
* vmalloc memory
@@ -223,7 +327,7 @@ static inline char *alloc_elfnotes_buf(size_t notes_sz)
* regions in the 1st kernel pointed to by PT_LOAD entries) into
* virtually contiguous user-space in ELF layout.
*/
-#if defined(CONFIG_MMU) && !defined(CONFIG_S390)
+#ifdef CONFIG_MMU
static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
{
size_t size = vma->vm_end - vma->vm_start;
@@ -241,6 +345,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC);
vma->vm_flags |= VM_MIXEDMAP;
+ vma->vm_ops = &vmcore_mmap_ops;
len = 0;
@@ -282,9 +387,9 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
tsz = min_t(size_t, m->offset + m->size - start, size);
paddr = m->paddr + start - m->offset;
- if (remap_pfn_range(vma, vma->vm_start + len,
- paddr >> PAGE_SHIFT, tsz,
- vma->vm_page_prot))
+ if (remap_oldmem_pfn_range(vma, vma->vm_start + len,
+ paddr >> PAGE_SHIFT, tsz,
+ vma->vm_page_prot))
goto fail;
size -= tsz;
start += tsz;
@@ -357,23 +462,29 @@ static int __init update_note_header_size_elf64(const Elf64_Ehdr *ehdr_ptr)
notes_section = kmalloc(max_sz, GFP_KERNEL);
if (!notes_section)
return -ENOMEM;
- rc = read_from_oldmem(notes_section, max_sz, &offset, 0);
+ rc = elfcorehdr_read_notes(notes_section, max_sz, &offset);
if (rc < 0) {
kfree(notes_section);
return rc;
}
nhdr_ptr = notes_section;
- while (real_sz < max_sz) {
- if (nhdr_ptr->n_namesz == 0)
- break;
+ while (nhdr_ptr->n_namesz != 0) {
sz = sizeof(Elf64_Nhdr) +
((nhdr_ptr->n_namesz + 3) & ~3) +
((nhdr_ptr->n_descsz + 3) & ~3);
+ if ((real_sz + sz) > max_sz) {
+ pr_warn("Warning: Exceeded p_memsz, dropping PT_NOTE entry n_namesz=0x%x, n_descsz=0x%x\n",
+ nhdr_ptr->n_namesz, nhdr_ptr->n_descsz);
+ break;
+ }
real_sz += sz;
nhdr_ptr = (Elf64_Nhdr*)((char*)nhdr_ptr + sz);
}
kfree(notes_section);
phdr_ptr->p_memsz = real_sz;
+ if (real_sz == 0) {
+ pr_warn("Warning: Zero PT_NOTE entries found\n");
+ }
}
return 0;
@@ -444,7 +555,8 @@ static int __init copy_notes_elf64(const Elf64_Ehdr *ehdr_ptr, char *notes_buf)
if (phdr_ptr->p_type != PT_NOTE)
continue;
offset = phdr_ptr->p_offset;
- rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0);
+ rc = elfcorehdr_read_notes(notes_buf, phdr_ptr->p_memsz,
+ &offset);
if (rc < 0)
return rc;
notes_buf += phdr_ptr->p_memsz;
@@ -536,23 +648,29 @@ static int __init update_note_header_size_elf32(const Elf32_Ehdr *ehdr_ptr)
notes_section = kmalloc(max_sz, GFP_KERNEL);
if (!notes_section)
return -ENOMEM;
- rc = read_from_oldmem(notes_section, max_sz, &offset, 0);
+ rc = elfcorehdr_read_notes(notes_section, max_sz, &offset);
if (rc < 0) {
kfree(notes_section);
return rc;
}
nhdr_ptr = notes_section;
- while (real_sz < max_sz) {
- if (nhdr_ptr->n_namesz == 0)
- break;
+ while (nhdr_ptr->n_namesz != 0) {
sz = sizeof(Elf32_Nhdr) +
((nhdr_ptr->n_namesz + 3) & ~3) +
((nhdr_ptr->n_descsz + 3) & ~3);
+ if ((real_sz + sz) > max_sz) {
+ pr_warn("Warning: Exceeded p_memsz, dropping PT_NOTE entry n_namesz=0x%x, n_descsz=0x%x\n",
+ nhdr_ptr->n_namesz, nhdr_ptr->n_descsz);
+ break;
+ }
real_sz += sz;
nhdr_ptr = (Elf32_Nhdr*)((char*)nhdr_ptr + sz);
}
kfree(notes_section);
phdr_ptr->p_memsz = real_sz;
+ if (real_sz == 0) {
+ pr_warn("Warning: Zero PT_NOTE entries found\n");
+ }
}
return 0;
@@ -623,7 +741,8 @@ static int __init copy_notes_elf32(const Elf32_Ehdr *ehdr_ptr, char *notes_buf)
if (phdr_ptr->p_type != PT_NOTE)
continue;
offset = phdr_ptr->p_offset;
- rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0);
+ rc = elfcorehdr_read_notes(notes_buf, phdr_ptr->p_memsz,
+ &offset);
if (rc < 0)
return rc;
notes_buf += phdr_ptr->p_memsz;
@@ -810,7 +929,7 @@ static int __init parse_crash_elf64_headers(void)
addr = elfcorehdr_addr;
/* Read Elf header */
- rc = read_from_oldmem((char*)&ehdr, sizeof(Elf64_Ehdr), &addr, 0);
+ rc = elfcorehdr_read((char *)&ehdr, sizeof(Elf64_Ehdr), &addr);
if (rc < 0)
return rc;
@@ -837,7 +956,7 @@ static int __init parse_crash_elf64_headers(void)
if (!elfcorebuf)
return -ENOMEM;
addr = elfcorehdr_addr;
- rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0);
+ rc = elfcorehdr_read(elfcorebuf, elfcorebuf_sz_orig, &addr);
if (rc < 0)
goto fail;
@@ -866,7 +985,7 @@ static int __init parse_crash_elf32_headers(void)
addr = elfcorehdr_addr;
/* Read Elf header */
- rc = read_from_oldmem((char*)&ehdr, sizeof(Elf32_Ehdr), &addr, 0);
+ rc = elfcorehdr_read((char *)&ehdr, sizeof(Elf32_Ehdr), &addr);
if (rc < 0)
return rc;
@@ -892,7 +1011,7 @@ static int __init parse_crash_elf32_headers(void)
if (!elfcorebuf)
return -ENOMEM;
addr = elfcorehdr_addr;
- rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0);
+ rc = elfcorehdr_read(elfcorebuf, elfcorebuf_sz_orig, &addr);
if (rc < 0)
goto fail;
@@ -919,7 +1038,7 @@ static int __init parse_crash_elf_headers(void)
int rc=0;
addr = elfcorehdr_addr;
- rc = read_from_oldmem(e_ident, EI_NIDENT, &addr, 0);
+ rc = elfcorehdr_read(e_ident, EI_NIDENT, &addr);
if (rc < 0)
return rc;
if (memcmp(e_ident, ELFMAG, SELFMAG) != 0) {
@@ -952,7 +1071,14 @@ static int __init vmcore_init(void)
{
int rc = 0;
- /* If elfcorehdr= has been passed in cmdline, then capture the dump.*/
+ /* Allow architectures to allocate ELF header in 2nd kernel */
+ rc = elfcorehdr_alloc(&elfcorehdr_addr, &elfcorehdr_size);
+ if (rc)
+ return rc;
+ /*
+ * If elfcorehdr= has been passed in cmdline or created in 2nd kernel,
+ * then capture the dump.
+ */
if (!(is_vmcore_usable()))
return rc;
rc = parse_crash_elf_headers();
@@ -960,13 +1086,15 @@ static int __init vmcore_init(void)
pr_warn("Kdump: vmcore not initialized\n");
return rc;
}
+ elfcorehdr_free(elfcorehdr_addr);
+ elfcorehdr_addr = ELFCORE_ADDR_ERR;
proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations);
if (proc_vmcore)
proc_vmcore->size = vmcore_size;
return 0;
}
-module_init(vmcore_init)
+fs_initcall(vmcore_init);
/* Cleanup function for vmcore module. */
void vmcore_cleanup(void)
@@ -988,4 +1116,3 @@ void vmcore_cleanup(void)
}
free_elfcorebuf();
}
-EXPORT_SYMBOL_GPL(vmcore_cleanup);