aboutsummaryrefslogtreecommitdiff
path: root/fs/exec.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/exec.c')
-rw-r--r--fs/exec.c1306
1 files changed, 414 insertions, 892 deletions
diff --git a/fs/exec.c b/fs/exec.c
index 99d33a1371e..a3d33fe592d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -26,6 +26,7 @@
#include <linux/file.h>
#include <linux/fdtable.h>
#include <linux/mm.h>
+#include <linux/vmacache.h>
#include <linux/stat.h>
#include <linux/fcntl.h>
#include <linux/swap.h>
@@ -42,7 +43,6 @@
#include <linux/pid_namespace.h>
#include <linux/module.h>
#include <linux/namei.h>
-#include <linux/proc_fs.h>
#include <linux/mount.h>
#include <linux/security.h>
#include <linux/syscalls.h>
@@ -55,37 +55,31 @@
#include <linux/fs_struct.h>
#include <linux/pipe_fs_i.h>
#include <linux/oom.h>
+#include <linux/compat.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
#include <asm/tlb.h>
-#include "internal.h"
-int core_uses_pid;
-char core_pattern[CORENAME_MAX_SIZE] = "core";
-unsigned int core_pipe_limit;
-int suid_dumpable = 0;
+#include <trace/events/task.h>
+#include "internal.h"
-struct core_name {
- char *corename;
- int used, size;
-};
-static atomic_t call_count = ATOMIC_INIT(1);
+#include <trace/events/sched.h>
-/* The maximal length of core_pattern is also specified in sysctl.c */
+int suid_dumpable = 0;
static LIST_HEAD(formats);
static DEFINE_RWLOCK(binfmt_lock);
-int __register_binfmt(struct linux_binfmt * fmt, int insert)
+void __register_binfmt(struct linux_binfmt * fmt, int insert)
{
- if (!fmt)
- return -EINVAL;
+ BUG_ON(!fmt);
+ if (WARN_ON(!fmt->load_binary))
+ return;
write_lock(&binfmt_lock);
insert ? list_add(&fmt->lh, &formats) :
list_add_tail(&fmt->lh, &formats);
write_unlock(&binfmt_lock);
- return 0;
}
EXPORT_SYMBOL(__register_binfmt);
@@ -104,6 +98,7 @@ static inline void put_binfmt(struct linux_binfmt * fmt)
module_put(fmt->module);
}
+#ifdef CONFIG_USELIB
/*
* Note that a shared library must be both readable and executable due to
* security reasons.
@@ -112,23 +107,28 @@ static inline void put_binfmt(struct linux_binfmt * fmt)
*/
SYSCALL_DEFINE1(uselib, const char __user *, library)
{
+ struct linux_binfmt *fmt;
struct file *file;
- char *tmp = getname(library);
+ struct filename *tmp = getname(library);
int error = PTR_ERR(tmp);
+ static const struct open_flags uselib_flags = {
+ .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
+ .acc_mode = MAY_READ | MAY_EXEC | MAY_OPEN,
+ .intent = LOOKUP_OPEN,
+ .lookup_flags = LOOKUP_FOLLOW,
+ };
if (IS_ERR(tmp))
goto out;
- file = do_filp_open(AT_FDCWD, tmp,
- O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0,
- MAY_READ | MAY_EXEC | MAY_OPEN);
+ file = do_filp_open(AT_FDCWD, tmp, &uselib_flags);
putname(tmp);
error = PTR_ERR(file);
if (IS_ERR(file))
goto out;
error = -EINVAL;
- if (!S_ISREG(file->f_path.dentry->d_inode->i_mode))
+ if (!S_ISREG(file_inode(file)->i_mode))
goto exit;
error = -EACCES;
@@ -138,31 +138,46 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
fsnotify_open(file);
error = -ENOEXEC;
- if(file->f_op) {
- struct linux_binfmt * fmt;
- read_lock(&binfmt_lock);
- list_for_each_entry(fmt, &formats, lh) {
- if (!fmt->load_shlib)
- continue;
- if (!try_module_get(fmt->module))
- continue;
- read_unlock(&binfmt_lock);
- error = fmt->load_shlib(file);
- read_lock(&binfmt_lock);
- put_binfmt(fmt);
- if (error != -ENOEXEC)
- break;
- }
+ read_lock(&binfmt_lock);
+ list_for_each_entry(fmt, &formats, lh) {
+ if (!fmt->load_shlib)
+ continue;
+ if (!try_module_get(fmt->module))
+ continue;
read_unlock(&binfmt_lock);
+ error = fmt->load_shlib(file);
+ read_lock(&binfmt_lock);
+ put_binfmt(fmt);
+ if (error != -ENOEXEC)
+ break;
}
+ read_unlock(&binfmt_lock);
exit:
fput(file);
out:
return error;
}
+#endif /* #ifdef CONFIG_USELIB */
#ifdef CONFIG_MMU
+/*
+ * The nascent bprm->mm is not visible until exec_mmap() but it can
+ * use a lot of memory, account these pages in current->mm temporary
+ * for oom_badness()->get_mm_rss(). Once exec succeeds or fails, we
+ * change the counter back via acct_arg_size(0).
+ */
+static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
+{
+ struct mm_struct *mm = current->mm;
+ long diff = (long)(pages - bprm->vma_pages);
+
+ if (!mm || !diff)
+ return;
+
+ bprm->vma_pages = pages;
+ add_mm_counter(mm, MM_ANONPAGES, diff);
+}
static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
int write)
@@ -172,7 +187,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
#ifdef CONFIG_STACK_GROWSUP
if (write) {
- ret = expand_stack_downwards(bprm->vma, pos);
+ ret = expand_downwards(bprm->vma, pos);
if (ret < 0)
return NULL;
}
@@ -186,6 +201,8 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
struct rlimit *rlim;
+ acct_arg_size(bprm, size / PAGE_SIZE);
+
/*
* We've historically supported up to 32 pages (ARG_MAX)
* of argument strings even with small stacks
@@ -248,12 +265,13 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
* use STACK_TOP because that can depend on attributes which aren't
* configured yet.
*/
- BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
+ BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
vma->vm_end = STACK_TOP_MAX;
vma->vm_start = vma->vm_end - PAGE_SIZE;
- vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
+ vma->vm_flags = VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
INIT_LIST_HEAD(&vma->anon_vma_chain);
+
err = insert_vm_struct(mm, vma);
if (err)
goto err;
@@ -276,6 +294,10 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len)
#else
+static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
+{
+}
+
static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
int write)
{
@@ -336,7 +358,7 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len)
* flags, permissions, and offset, so we use temporary values. We'll update
* them later in setup_arg_pages().
*/
-int bprm_mm_init(struct linux_binprm *bprm)
+static int bprm_mm_init(struct linux_binprm *bprm)
{
int err;
struct mm_struct *mm = NULL;
@@ -365,24 +387,59 @@ err:
return err;
}
+struct user_arg_ptr {
+#ifdef CONFIG_COMPAT
+ bool is_compat;
+#endif
+ union {
+ const char __user *const __user *native;
+#ifdef CONFIG_COMPAT
+ const compat_uptr_t __user *compat;
+#endif
+ } ptr;
+};
+
+static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr)
+{
+ const char __user *native;
+
+#ifdef CONFIG_COMPAT
+ if (unlikely(argv.is_compat)) {
+ compat_uptr_t compat;
+
+ if (get_user(compat, argv.ptr.compat + nr))
+ return ERR_PTR(-EFAULT);
+
+ return compat_ptr(compat);
+ }
+#endif
+
+ if (get_user(native, argv.ptr.native + nr))
+ return ERR_PTR(-EFAULT);
+
+ return native;
+}
+
/*
* count() counts the number of strings in array ARGV.
*/
-static int count(const char __user * const __user * argv, int max)
+static int count(struct user_arg_ptr argv, int max)
{
int i = 0;
- if (argv != NULL) {
+ if (argv.ptr.native != NULL) {
for (;;) {
- const char __user * p;
+ const char __user *p = get_user_arg_ptr(argv, i);
- if (get_user(p, argv))
- return -EFAULT;
if (!p)
break;
- argv++;
- if (i++ >= max)
+
+ if (IS_ERR(p))
+ return -EFAULT;
+
+ if (i >= max)
return -E2BIG;
+ ++i;
if (fatal_signal_pending(current))
return -ERESTARTNOHAND;
@@ -397,7 +454,7 @@ static int count(const char __user * const __user * argv, int max)
* processes's memory to the new process's stack. The call to get_user_pages()
* ensures the destination page is created and not swapped out.
*/
-static int copy_strings(int argc, const char __user *const __user *argv,
+static int copy_strings(int argc, struct user_arg_ptr argv,
struct linux_binprm *bprm)
{
struct page *kmapped_page = NULL;
@@ -410,16 +467,18 @@ static int copy_strings(int argc, const char __user *const __user *argv,
int len;
unsigned long pos;
- if (get_user(str, argv+argc) ||
- !(len = strnlen_user(str, MAX_ARG_STRLEN))) {
- ret = -EFAULT;
+ ret = -EFAULT;
+ str = get_user_arg_ptr(argv, argc);
+ if (IS_ERR(str))
goto out;
- }
- if (!valid_arg_len(bprm, len)) {
- ret = -E2BIG;
+ len = strnlen_user(str, MAX_ARG_STRLEN);
+ if (!len)
+ goto out;
+
+ ret = -E2BIG;
+ if (!valid_arg_len(bprm, len))
goto out;
- }
/* We're going to work our way backwords. */
pos = bprm->p;
@@ -486,14 +545,19 @@ out:
/*
* Like copy_strings, but get argv and its values from kernel memory.
*/
-int copy_strings_kernel(int argc, const char *const *argv,
+int copy_strings_kernel(int argc, const char *const *__argv,
struct linux_binprm *bprm)
{
int r;
mm_segment_t oldfs = get_fs();
+ struct user_arg_ptr argv = {
+ .ptr.native = (const char __user *const __user *)__argv,
+ };
+
set_fs(KERNEL_DS);
- r = copy_strings(argc, (const char __user *const __user *)argv, bprm);
+ r = copy_strings(argc, argv, bprm);
set_fs(oldfs);
+
return r;
}
EXPORT_SYMBOL(copy_strings_kernel);
@@ -520,7 +584,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
unsigned long length = old_end - old_start;
unsigned long new_start = old_start - shift;
unsigned long new_end = old_end - shift;
- struct mmu_gather *tlb;
+ struct mmu_gather tlb;
BUG_ON(new_start > new_end);
@@ -542,17 +606,17 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
* process cleanup to remove whatever mess we made.
*/
if (length != move_page_tables(vma, old_start,
- vma, new_start, length))
+ vma, new_start, length, false))
return -ENOMEM;
lru_add_drain();
- tlb = tlb_gather_mmu(mm, 0);
+ tlb_gather_mmu(&tlb, mm, old_start, old_end);
if (new_end > old_start) {
/*
* when the old and new regions overlap clear from new_end.
*/
- free_pgd_range(tlb, new_end, old_end, new_end,
- vma->vm_next ? vma->vm_next->vm_start : 0);
+ free_pgd_range(&tlb, new_end, old_end, new_end,
+ vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
} else {
/*
* otherwise, clean from old_start; this is done to not touch
@@ -560,10 +624,10 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
* have constraints on va-space that make this illegal (IA64) -
* for the others its just a little faster.
*/
- free_pgd_range(tlb, old_start, old_end, new_end,
- vma->vm_next ? vma->vm_next->vm_start : 0);
+ free_pgd_range(&tlb, old_start, old_end, new_end,
+ vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
}
- tlb_finish_mmu(tlb, new_end, old_end);
+ tlb_finish_mmu(&tlb, old_start, old_end);
/*
* Shrink the vma to just the new range. Always succeeds.
@@ -593,10 +657,10 @@ int setup_arg_pages(struct linux_binprm *bprm,
unsigned long rlim_stack;
#ifdef CONFIG_STACK_GROWSUP
- /* Limit stack size to 1GB */
+ /* Limit stack size */
stack_base = rlimit_max(RLIMIT_STACK);
- if (stack_base > (1 << 30))
- stack_base = 1 << 30;
+ if (stack_base > STACK_SIZE_MAX)
+ stack_base = STACK_SIZE_MAX;
/* Make sure we didn't let the argument array grow too large. */
if (vma->vm_end - vma->vm_start > stack_base)
@@ -687,19 +751,23 @@ EXPORT_SYMBOL(setup_arg_pages);
#endif /* CONFIG_MMU */
-struct file *open_exec(const char *name)
+static struct file *do_open_exec(struct filename *name)
{
struct file *file;
int err;
+ static const struct open_flags open_exec_flags = {
+ .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
+ .acc_mode = MAY_EXEC | MAY_OPEN,
+ .intent = LOOKUP_OPEN,
+ .lookup_flags = LOOKUP_FOLLOW,
+ };
- file = do_filp_open(AT_FDCWD, name,
- O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0,
- MAY_EXEC | MAY_OPEN);
+ file = do_filp_open(AT_FDCWD, name, &open_exec_flags);
if (IS_ERR(file))
goto out;
err = -EACCES;
- if (!S_ISREG(file->f_path.dentry->d_inode->i_mode))
+ if (!S_ISREG(file_inode(file)->i_mode))
goto exit;
if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
@@ -718,6 +786,12 @@ exit:
fput(file);
return ERR_PTR(err);
}
+
+struct file *open_exec(const char *name)
+{
+ struct filename tmp = { .name = name };
+ return do_open_exec(&tmp);
+}
EXPORT_SYMBOL(open_exec);
int kernel_read(struct file *file, loff_t offset,
@@ -737,18 +811,27 @@ int kernel_read(struct file *file, loff_t offset,
EXPORT_SYMBOL(kernel_read);
+ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
+{
+ ssize_t res = vfs_read(file, (void __user *)addr, len, &pos);
+ if (res > 0)
+ flush_icache_range(addr, addr + len);
+ return res;
+}
+EXPORT_SYMBOL(read_code);
+
static int exec_mmap(struct mm_struct *mm)
{
struct task_struct *tsk;
- struct mm_struct * old_mm, *active_mm;
+ struct mm_struct *old_mm, *active_mm;
/* Notify parent that we're no longer interested in the old VM */
tsk = current;
old_mm = current->mm;
- sync_mm_rss(tsk, old_mm);
mm_release(tsk, old_mm);
if (old_mm) {
+ sync_mm_rss(old_mm);
/*
* Make sure that if there is a core dump in progress
* for the old mm, we get out and die instead of going
@@ -766,15 +849,13 @@ static int exec_mmap(struct mm_struct *mm)
tsk->mm = mm;
tsk->active_mm = mm;
activate_mm(active_mm, mm);
- if (old_mm && tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
- atomic_dec(&old_mm->oom_disable_count);
- atomic_inc(&tsk->mm->oom_disable_count);
- }
+ tsk->mm->vmacache_seqnum = 0;
+ vmacache_flush(tsk);
task_unlock(tsk);
- arch_pick_mmap_layout(mm);
if (old_mm) {
up_read(&old_mm->mmap_sem);
BUG_ON(active_mm != old_mm);
+ setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm);
mm_update_next_owner(old_mm);
mmput(old_mm);
return 0;
@@ -817,9 +898,11 @@ static int de_thread(struct task_struct *tsk)
sig->notify_count--;
while (sig->notify_count) {
- __set_current_state(TASK_UNINTERRUPTIBLE);
+ __set_current_state(TASK_KILLABLE);
spin_unlock_irq(lock);
schedule();
+ if (unlikely(__fatal_signal_pending(tsk)))
+ goto killed;
spin_lock_irq(lock);
}
spin_unlock_irq(lock);
@@ -834,12 +917,16 @@ static int de_thread(struct task_struct *tsk)
sig->notify_count = -1; /* for exit_notify() */
for (;;) {
+ threadgroup_change_begin(tsk);
write_lock_irq(&tasklist_lock);
if (likely(leader->exit_state))
break;
- __set_current_state(TASK_UNINTERRUPTIBLE);
+ __set_current_state(TASK_KILLABLE);
write_unlock_irq(&tasklist_lock);
+ threadgroup_change_end(tsk);
schedule();
+ if (unlikely(__fatal_signal_pending(tsk)))
+ goto killed;
}
/*
@@ -853,6 +940,7 @@ static int de_thread(struct task_struct *tsk)
* also take its birthdate (always earlier than our own).
*/
tsk->start_time = leader->start_time;
+ tsk->real_start_time = leader->real_start_time;
BUG_ON(!same_thread_group(leader, tsk));
BUG_ON(has_group_leader_pid(tsk));
@@ -868,9 +956,8 @@ static int de_thread(struct task_struct *tsk)
* Note: The old leader also uses this pid until release_task
* is called. Odd but simple and correct.
*/
- detach_pid(tsk, PIDTYPE_PID);
tsk->pid = leader->pid;
- attach_pid(tsk, PIDTYPE_PID, task_pid(leader));
+ change_pid(tsk, PIDTYPE_PID, task_pid(leader));
transfer_pid(leader, tsk, PIDTYPE_PGID);
transfer_pid(leader, tsk, PIDTYPE_SID);
@@ -881,10 +968,20 @@ static int de_thread(struct task_struct *tsk)
leader->group_leader = tsk;
tsk->exit_signal = SIGCHLD;
+ leader->exit_signal = -1;
BUG_ON(leader->exit_state != EXIT_ZOMBIE);
leader->exit_state = EXIT_DEAD;
+
+ /*
+ * We are going to release_task()->ptrace_unlink() silently,
+ * the tracer can sleep in do_wait(). EXIT_DEAD guarantees
+ * the tracer wont't block again waiting for this thread.
+ */
+ if (unlikely(leader->ptrace))
+ __wake_up_parent(leader, leader->parent);
write_unlock_irq(&tasklist_lock);
+ threadgroup_change_end(tsk);
release_task(leader);
}
@@ -893,8 +990,8 @@ static int de_thread(struct task_struct *tsk)
sig->notify_count = 0;
no_thread_group:
- if (current->mm)
- setmax_mm_hiwater_rss(&sig->maxrss, current->mm);
+ /* we have changed execution domain */
+ tsk->exit_signal = SIGCHLD;
exit_itimers(sig);
flush_itimer_signals();
@@ -924,40 +1021,14 @@ no_thread_group:
BUG_ON(!thread_group_leader(tsk));
return 0;
-}
-/*
- * These functions flushes out all traces of the currently running executable
- * so that a new one can be started
- */
-static void flush_old_files(struct files_struct * files)
-{
- long j = -1;
- struct fdtable *fdt;
-
- spin_lock(&files->file_lock);
- for (;;) {
- unsigned long set, i;
-
- j++;
- i = j * __NFDBITS;
- fdt = files_fdtable(files);
- if (i >= fdt->max_fds)
- break;
- set = fdt->close_on_exec->fds_bits[j];
- if (!set)
- continue;
- fdt->close_on_exec->fds_bits[j] = 0;
- spin_unlock(&files->file_lock);
- for ( ; set ; i++,set >>= 1) {
- if (set & 1) {
- sys_close(i);
- }
- }
- spin_lock(&files->file_lock);
-
- }
- spin_unlock(&files->file_lock);
+killed:
+ /* protects against exit_notify() and __exit_signal() */
+ read_lock(&tasklist_lock);
+ sig->group_exit_task = NULL;
+ sig->notify_count = 0;
+ read_unlock(&tasklist_lock);
+ return -EAGAIN;
}
char *get_task_comm(char *buf, struct task_struct *tsk)
@@ -968,22 +1039,20 @@ char *get_task_comm(char *buf, struct task_struct *tsk)
task_unlock(tsk);
return buf;
}
+EXPORT_SYMBOL_GPL(get_task_comm);
-void set_task_comm(struct task_struct *tsk, char *buf)
+/*
+ * These functions flushes out all traces of the currently running executable
+ * so that a new one can be started
+ */
+
+void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
{
task_lock(tsk);
-
- /*
- * Threads may access current->comm without holding
- * the task lock, so write the string carefully.
- * Readers without a lock may see incomplete new
- * names but are safe from non-terminating string reads.
- */
- memset(tsk->comm, 0, TASK_COMM_LEN);
- wmb();
+ trace_task_rename(tsk, buf);
strlcpy(tsk->comm, buf, sizeof(tsk->comm));
task_unlock(tsk);
- perf_event_comm(tsk);
+ perf_event_comm(tsk, exec);
}
int flush_old_exec(struct linux_binprm * bprm)
@@ -999,17 +1068,19 @@ int flush_old_exec(struct linux_binprm * bprm)
goto out;
set_mm_exe_file(bprm->mm, bprm->file);
-
/*
* Release all of the old mmap stuff
*/
+ acct_arg_size(bprm, 0);
retval = exec_mmap(bprm->mm);
if (retval)
goto out;
bprm->mm = NULL; /* We're using it now */
- current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD);
+ set_fs(USER_DS);
+ current->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD |
+ PF_NOFREEZE | PF_NO_SETAFFINITY);
flush_thread();
current->personality &= ~bprm->per_clear;
@@ -1020,34 +1091,27 @@ out:
}
EXPORT_SYMBOL(flush_old_exec);
-void setup_new_exec(struct linux_binprm * bprm)
+void would_dump(struct linux_binprm *bprm, struct file *file)
{
- int i, ch;
- const char *name;
- char tcomm[sizeof(current->comm)];
+ if (inode_permission(file_inode(file), MAY_READ) < 0)
+ bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
+}
+EXPORT_SYMBOL(would_dump);
+void setup_new_exec(struct linux_binprm * bprm)
+{
arch_pick_mmap_layout(current->mm);
/* This is the point of no return */
current->sas_ss_sp = current->sas_ss_size = 0;
- if (current_euid() == current_uid() && current_egid() == current_gid())
- set_dumpable(current->mm, 1);
+ if (uid_eq(current_euid(), current_uid()) && gid_eq(current_egid(), current_gid()))
+ set_dumpable(current->mm, SUID_DUMP_USER);
else
set_dumpable(current->mm, suid_dumpable);
- name = bprm->filename;
-
- /* Copies the binary name from after last slash */
- for (i=0; (ch = *(name++)) != '\0';) {
- if (ch == '/')
- i = 0; /* overwrite what we wrote */
- else
- if (i < (sizeof(tcomm) - 1))
- tcomm[i++] = ch;
- }
- tcomm[i] = '\0';
- set_task_comm(current, tcomm);
+ perf_event_exec();
+ __set_task_comm(current, kbasename(bprm->filename), true);
/* Set the new mm task size. We have to do that late because it may
* depend on TIF_32BIT which is only updated in flush_thread() on
@@ -1056,28 +1120,20 @@ void setup_new_exec(struct linux_binprm * bprm)
current->mm->task_size = TASK_SIZE;
/* install the new credentials */
- if (bprm->cred->uid != current_euid() ||
- bprm->cred->gid != current_egid()) {
+ if (!uid_eq(bprm->cred->uid, current_euid()) ||
+ !gid_eq(bprm->cred->gid, current_egid())) {
current->pdeath_signal = 0;
- } else if (file_permission(bprm->file, MAY_READ) ||
- bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP) {
- set_dumpable(current->mm, suid_dumpable);
+ } else {
+ would_dump(bprm, bprm->file);
+ if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)
+ set_dumpable(current->mm, suid_dumpable);
}
- /*
- * Flush performance counters when crossing a
- * security domain:
- */
- if (!get_dumpable(current->mm))
- perf_event_exit_task(current);
-
/* An exec changes our domain. We are no longer part of the thread
group */
-
current->self_exec_id++;
-
flush_signal_handlers(current, 0);
- flush_old_files(current->files);
+ do_close_on_exec(current->files);
}
EXPORT_SYMBOL(setup_new_exec);
@@ -1100,16 +1156,35 @@ int prepare_bprm_creds(struct linux_binprm *bprm)
return -ENOMEM;
}
-void free_bprm(struct linux_binprm *bprm)
+static void free_bprm(struct linux_binprm *bprm)
{
free_arg_pages(bprm);
if (bprm->cred) {
mutex_unlock(&current->signal->cred_guard_mutex);
abort_creds(bprm->cred);
}
+ if (bprm->file) {
+ allow_write_access(bprm->file);
+ fput(bprm->file);
+ }
+ /* If a binfmt changed the interp, free it. */
+ if (bprm->interp != bprm->filename)
+ kfree(bprm->interp);
kfree(bprm);
}
+int bprm_change_interp(char *interp, struct linux_binprm *bprm)
+{
+ /* If a binfmt changed the interp, free it first. */
+ if (bprm->interp != bprm->filename)
+ kfree(bprm->interp);
+ bprm->interp = kstrdup(interp, GFP_KERNEL);
+ if (!bprm->interp)
+ return -ENOMEM;
+ return 0;
+}
+EXPORT_SYMBOL(bprm_change_interp);
+
/*
* install the new credentials for this executable
*/
@@ -1119,6 +1194,15 @@ void install_exec_creds(struct linux_binprm *bprm)
commit_creds(bprm->cred);
bprm->cred = NULL;
+
+ /*
+ * Disable monitoring for regular users
+ * when executing setuid binaries. Must
+ * wait until new credentials are committed
+ * by commit_creds() above
+ */
+ if (get_dumpable(current->mm) != SUID_DUMP_USER)
+ perf_event_exit_task(current);
/*
* cred_guard_mutex must be held at least to this point to prevent
* ptrace_attach() from altering our determination of the task's
@@ -1134,58 +1218,63 @@ EXPORT_SYMBOL(install_exec_creds);
* - the caller must hold ->cred_guard_mutex to protect against
* PTRACE_ATTACH
*/
-int check_unsafe_exec(struct linux_binprm *bprm)
+static void check_unsafe_exec(struct linux_binprm *bprm)
{
struct task_struct *p = current, *t;
unsigned n_fs;
- int res = 0;
- bprm->unsafe = tracehook_unsafe_exec(p);
+ if (p->ptrace) {
+ if (p->ptrace & PT_PTRACE_CAP)
+ bprm->unsafe |= LSM_UNSAFE_PTRACE_CAP;
+ else
+ bprm->unsafe |= LSM_UNSAFE_PTRACE;
+ }
+
+ /*
+ * This isn't strictly necessary, but it makes it harder for LSMs to
+ * mess up.
+ */
+ if (current->no_new_privs)
+ bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
+ t = p;
n_fs = 1;
spin_lock(&p->fs->lock);
rcu_read_lock();
- for (t = next_thread(p); t != p; t = next_thread(t)) {
+ while_each_thread(p, t) {
if (t->fs == p->fs)
n_fs++;
}
rcu_read_unlock();
- if (p->fs->users > n_fs) {
+ if (p->fs->users > n_fs)
bprm->unsafe |= LSM_UNSAFE_SHARE;
- } else {
- res = -EAGAIN;
- if (!p->fs->in_exec) {
- p->fs->in_exec = 1;
- res = 1;
- }
- }
+ else
+ p->fs->in_exec = 1;
spin_unlock(&p->fs->lock);
-
- return res;
}
-/*
- * Fill the binprm structure from the inode.
+/*
+ * Fill the binprm structure from the inode.
* Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
*
* This may be called multiple times for binary chains (scripts for example).
*/
int prepare_binprm(struct linux_binprm *bprm)
{
- umode_t mode;
- struct inode * inode = bprm->file->f_path.dentry->d_inode;
+ struct inode *inode = file_inode(bprm->file);
+ umode_t mode = inode->i_mode;
int retval;
- mode = inode->i_mode;
- if (bprm->file->f_op == NULL)
- return -EACCES;
/* clear any previous set[ug]id data from a previous binary */
bprm->cred->euid = current_euid();
bprm->cred->egid = current_egid();
- if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)) {
+ if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
+ !current->no_new_privs &&
+ kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
+ kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
/* Set-uid? */
if (mode & S_ISUID) {
bprm->per_clear |= PER_CLEAR_ON_SETID;
@@ -1238,13 +1327,13 @@ int remove_arg_zero(struct linux_binprm *bprm)
ret = -EFAULT;
goto out;
}
- kaddr = kmap_atomic(page, KM_USER0);
+ kaddr = kmap_atomic(page);
for (; offset < PAGE_SIZE && kaddr[offset];
offset++, bprm->p++)
;
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
put_arg_page(page);
if (offset == PAGE_SIZE)
@@ -1260,99 +1349,111 @@ out:
}
EXPORT_SYMBOL(remove_arg_zero);
+#define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
/*
* cycle the list of binary formats handler, until one recognizes the image
*/
-int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
+int search_binary_handler(struct linux_binprm *bprm)
{
- unsigned int depth = bprm->recursion_depth;
- int try,retval;
+ bool need_retry = IS_ENABLED(CONFIG_MODULES);
struct linux_binfmt *fmt;
+ int retval;
- retval = security_bprm_check(bprm);
- if (retval)
- return retval;
-
- /* kernel module loader fixup */
- /* so we don't try to load run modprobe in kernel space. */
- set_fs(USER_DS);
+ /* This allows 4 levels of binfmt rewrites before failing hard. */
+ if (bprm->recursion_depth > 5)
+ return -ELOOP;
- retval = audit_bprm(bprm);
+ retval = security_bprm_check(bprm);
if (retval)
return retval;
retval = -ENOENT;
- for (try=0; try<2; try++) {
- read_lock(&binfmt_lock);
- list_for_each_entry(fmt, &formats, lh) {
- int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
- if (!fn)
- continue;
- if (!try_module_get(fmt->module))
- continue;
- read_unlock(&binfmt_lock);
- retval = fn(bprm, regs);
- /*
- * Restore the depth counter to its starting value
- * in this call, so we don't have to rely on every
- * load_binary function to restore it on return.
- */
- bprm->recursion_depth = depth;
- if (retval >= 0) {
- if (depth == 0)
- tracehook_report_exec(fmt, bprm, regs);
- put_binfmt(fmt);
- allow_write_access(bprm->file);
- if (bprm->file)
- fput(bprm->file);
- bprm->file = NULL;
- current->did_exec = 1;
- proc_exec_connector(current);
- return retval;
- }
- read_lock(&binfmt_lock);
- put_binfmt(fmt);
- if (retval != -ENOEXEC || bprm->mm == NULL)
- break;
- if (!bprm->file) {
- read_unlock(&binfmt_lock);
- return retval;
- }
- }
+ retry:
+ read_lock(&binfmt_lock);
+ list_for_each_entry(fmt, &formats, lh) {
+ if (!try_module_get(fmt->module))
+ continue;
read_unlock(&binfmt_lock);
- if (retval != -ENOEXEC || bprm->mm == NULL) {
- break;
-#ifdef CONFIG_MODULES
- } else {
-#define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
- if (printable(bprm->buf[0]) &&
- printable(bprm->buf[1]) &&
- printable(bprm->buf[2]) &&
- printable(bprm->buf[3]))
- break; /* -ENOEXEC */
- request_module("binfmt-%04x", *(unsigned short *)(&bprm->buf[2]));
-#endif
+ bprm->recursion_depth++;
+ retval = fmt->load_binary(bprm);
+ bprm->recursion_depth--;
+ if (retval >= 0 || retval != -ENOEXEC ||
+ bprm->mm == NULL || bprm->file == NULL) {
+ put_binfmt(fmt);
+ return retval;
}
+ read_lock(&binfmt_lock);
+ put_binfmt(fmt);
+ }
+ read_unlock(&binfmt_lock);
+
+ if (need_retry && retval == -ENOEXEC) {
+ if (printable(bprm->buf[0]) && printable(bprm->buf[1]) &&
+ printable(bprm->buf[2]) && printable(bprm->buf[3]))
+ return retval;
+ if (request_module("binfmt-%04x", *(ushort *)(bprm->buf + 2)) < 0)
+ return retval;
+ need_retry = false;
+ goto retry;
}
+
return retval;
}
-
EXPORT_SYMBOL(search_binary_handler);
+static int exec_binprm(struct linux_binprm *bprm)
+{
+ pid_t old_pid, old_vpid;
+ int ret;
+
+ /* Need to fetch pid before load_binary changes it */
+ old_pid = current->pid;
+ rcu_read_lock();
+ old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent));
+ rcu_read_unlock();
+
+ ret = search_binary_handler(bprm);
+ if (ret >= 0) {
+ audit_bprm(bprm);
+ trace_sched_process_exec(current, old_pid, bprm);
+ ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
+ proc_exec_connector(current);
+ }
+
+ return ret;
+}
+
/*
* sys_execve() executes a new program.
*/
-int do_execve(const char * filename,
- const char __user *const __user *argv,
- const char __user *const __user *envp,
- struct pt_regs * regs)
+static int do_execve_common(struct filename *filename,
+ struct user_arg_ptr argv,
+ struct user_arg_ptr envp)
{
struct linux_binprm *bprm;
struct file *file;
struct files_struct *displaced;
- bool clear_in_exec;
int retval;
+ if (IS_ERR(filename))
+ return PTR_ERR(filename);
+
+ /*
+ * We move the actual failure in case of RLIMIT_NPROC excess from
+ * set*uid() to execve() because too many poorly written programs
+ * don't check setuid() return code. Here we additionally recheck
+ * whether NPROC limit is still exceeded.
+ */
+ if ((current->flags & PF_NPROC_EXCEEDED) &&
+ atomic_read(&current_user()->processes) > rlimit(RLIMIT_NPROC)) {
+ retval = -EAGAIN;
+ goto out_ret;
+ }
+
+ /* We're below the limit (still or again), so we don't want to make
+ * further execve() calls fail. */
+ current->flags &= ~PF_NPROC_EXCEEDED;
+
retval = unshare_files(&displaced);
if (retval)
goto out_ret;
@@ -1366,13 +1467,10 @@ int do_execve(const char * filename,
if (retval)
goto out_free;
- retval = check_unsafe_exec(bprm);
- if (retval < 0)
- goto out_free;
- clear_in_exec = retval;
+ check_unsafe_exec(bprm);
current->in_execve = 1;
- file = open_exec(filename);
+ file = do_open_exec(filename);
retval = PTR_ERR(file);
if (IS_ERR(file))
goto out_unmark;
@@ -1380,12 +1478,11 @@ int do_execve(const char * filename,
sched_exec();
bprm->file = file;
- bprm->filename = filename;
- bprm->interp = filename;
+ bprm->filename = bprm->interp = filename->name;
retval = bprm_mm_init(bprm);
if (retval)
- goto out_file;
+ goto out_unmark;
bprm->argc = count(argv, MAX_ARG_STRINGS);
if ((retval = bprm->argc) < 0)
@@ -1412,7 +1509,7 @@ int do_execve(const char * filename,
if (retval < 0)
goto out;
- retval = search_binary_handler(bprm,regs);
+ retval = exec_binprm(bprm);
if (retval < 0)
goto out;
@@ -1420,24 +1517,21 @@ int do_execve(const char * filename,
current->fs->in_exec = 0;
current->in_execve = 0;
acct_update_integrals(current);
+ task_numa_free(current);
free_bprm(bprm);
+ putname(filename);
if (displaced)
put_files_struct(displaced);
return retval;
out:
- if (bprm->mm)
- mmput (bprm->mm);
-
-out_file:
- if (bprm->file) {
- allow_write_access(bprm->file);
- fput(bprm->file);
+ if (bprm->mm) {
+ acct_arg_size(bprm, 0);
+ mmput(bprm->mm);
}
out_unmark:
- if (clear_in_exec)
- current->fs->in_exec = 0;
+ current->fs->in_exec = 0;
current->in_execve = 0;
out_free:
@@ -1447,9 +1541,36 @@ out_files:
if (displaced)
reset_files_struct(displaced);
out_ret:
+ putname(filename);
return retval;
}
+int do_execve(struct filename *filename,
+ const char __user *const __user *__argv,
+ const char __user *const __user *__envp)
+{
+ struct user_arg_ptr argv = { .ptr.native = __argv };
+ struct user_arg_ptr envp = { .ptr.native = __envp };
+ return do_execve_common(filename, argv, envp);
+}
+
+#ifdef CONFIG_COMPAT
+static int compat_do_execve(struct filename *filename,
+ const compat_uptr_t __user *__argv,
+ const compat_uptr_t __user *__envp)
+{
+ struct user_arg_ptr argv = {
+ .is_compat = true,
+ .ptr.compat = __argv,
+ };
+ struct user_arg_ptr envp = {
+ .is_compat = true,
+ .ptr.compat = __envp,
+ };
+ return do_execve_common(filename, argv, envp);
+}
+#endif
+
void set_binfmt(struct linux_binfmt *new)
{
struct mm_struct *mm = current->mm;
@@ -1461,635 +1582,36 @@ void set_binfmt(struct linux_binfmt *new)
if (new)
__module_get(new->module);
}
-
EXPORT_SYMBOL(set_binfmt);
-static int expand_corename(struct core_name *cn)
-{
- char *old_corename = cn->corename;
-
- cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
- cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
-
- if (!cn->corename) {
- kfree(old_corename);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-static int cn_printf(struct core_name *cn, const char *fmt, ...)
-{
- char *cur;
- int need;
- int ret;
- va_list arg;
-
- va_start(arg, fmt);
- need = vsnprintf(NULL, 0, fmt, arg);
- va_end(arg);
-
- if (likely(need < cn->size - cn->used - 1))
- goto out_printf;
-
- ret = expand_corename(cn);
- if (ret)
- goto expand_fail;
-
-out_printf:
- cur = cn->corename + cn->used;
- va_start(arg, fmt);
- vsnprintf(cur, need + 1, fmt, arg);
- va_end(arg);
- cn->used += need;
- return 0;
-
-expand_fail:
- return ret;
-}
-
-/* format_corename will inspect the pattern parameter, and output a
- * name into corename, which must have space for at least
- * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
- */
-static int format_corename(struct core_name *cn, long signr)
-{
- const struct cred *cred = current_cred();
- const char *pat_ptr = core_pattern;
- int ispipe = (*pat_ptr == '|');
- int pid_in_pattern = 0;
- int err = 0;
-
- cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
- cn->corename = kmalloc(cn->size, GFP_KERNEL);
- cn->used = 0;
-
- if (!cn->corename)
- return -ENOMEM;
-
- /* Repeat as long as we have more pattern to process and more output
- space */
- while (*pat_ptr) {
- if (*pat_ptr != '%') {
- if (*pat_ptr == 0)
- goto out;
- err = cn_printf(cn, "%c", *pat_ptr++);
- } else {
- switch (*++pat_ptr) {
- /* single % at the end, drop that */
- case 0:
- goto out;
- /* Double percent, output one percent */
- case '%':
- err = cn_printf(cn, "%c", '%');
- break;
- /* pid */
- case 'p':
- pid_in_pattern = 1;
- err = cn_printf(cn, "%d",
- task_tgid_vnr(current));
- break;
- /* uid */
- case 'u':
- err = cn_printf(cn, "%d", cred->uid);
- break;
- /* gid */
- case 'g':
- err = cn_printf(cn, "%d", cred->gid);
- break;
- /* signal that caused the coredump */
- case 's':
- err = cn_printf(cn, "%ld", signr);
- break;
- /* UNIX time of coredump */
- case 't': {
- struct timeval tv;
- do_gettimeofday(&tv);
- err = cn_printf(cn, "%lu", tv.tv_sec);
- break;
- }
- /* hostname */
- case 'h':
- down_read(&uts_sem);
- err = cn_printf(cn, "%s",
- utsname()->nodename);
- up_read(&uts_sem);
- break;
- /* executable */
- case 'e':
- err = cn_printf(cn, "%s", current->comm);
- break;
- /* core limit size */
- case 'c':
- err = cn_printf(cn, "%lu",
- rlimit(RLIMIT_CORE));
- break;
- default:
- break;
- }
- ++pat_ptr;
- }
-
- if (err)
- return err;
- }
-
- /* Backward compatibility with core_uses_pid:
- *
- * If core_pattern does not include a %p (as is the default)
- * and core_uses_pid is set, then .%pid will be appended to
- * the filename. Do not do this for piped commands. */
- if (!ispipe && !pid_in_pattern && core_uses_pid) {
- err = cn_printf(cn, ".%d", task_tgid_vnr(current));
- if (err)
- return err;
- }
-out:
- return ispipe;
-}
-
-static int zap_process(struct task_struct *start, int exit_code)
-{
- struct task_struct *t;
- int nr = 0;
-
- start->signal->flags = SIGNAL_GROUP_EXIT;
- start->signal->group_exit_code = exit_code;
- start->signal->group_stop_count = 0;
-
- t = start;
- do {
- if (t != current && t->mm) {
- sigaddset(&t->pending.signal, SIGKILL);
- signal_wake_up(t, 1);
- nr++;
- }
- } while_each_thread(start, t);
-
- return nr;
-}
-
-static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
- struct core_state *core_state, int exit_code)
-{
- struct task_struct *g, *p;
- unsigned long flags;
- int nr = -EAGAIN;
-
- spin_lock_irq(&tsk->sighand->siglock);
- if (!signal_group_exit(tsk->signal)) {
- mm->core_state = core_state;
- nr = zap_process(tsk, exit_code);
- }
- spin_unlock_irq(&tsk->sighand->siglock);
- if (unlikely(nr < 0))
- return nr;
-
- if (atomic_read(&mm->mm_users) == nr + 1)
- goto done;
- /*
- * We should find and kill all tasks which use this mm, and we should
- * count them correctly into ->nr_threads. We don't take tasklist
- * lock, but this is safe wrt:
- *
- * fork:
- * None of sub-threads can fork after zap_process(leader). All
- * processes which were created before this point should be
- * visible to zap_threads() because copy_process() adds the new
- * process to the tail of init_task.tasks list, and lock/unlock
- * of ->siglock provides a memory barrier.
- *
- * do_exit:
- * The caller holds mm->mmap_sem. This means that the task which
- * uses this mm can't pass exit_mm(), so it can't exit or clear
- * its ->mm.
- *
- * de_thread:
- * It does list_replace_rcu(&leader->tasks, &current->tasks),
- * we must see either old or new leader, this does not matter.
- * However, it can change p->sighand, so lock_task_sighand(p)
- * must be used. Since p->mm != NULL and we hold ->mmap_sem
- * it can't fail.
- *
- * Note also that "g" can be the old leader with ->mm == NULL
- * and already unhashed and thus removed from ->thread_group.
- * This is OK, __unhash_process()->list_del_rcu() does not
- * clear the ->next pointer, we will find the new leader via
- * next_thread().
- */
- rcu_read_lock();
- for_each_process(g) {
- if (g == tsk->group_leader)
- continue;
- if (g->flags & PF_KTHREAD)
- continue;
- p = g;
- do {
- if (p->mm) {
- if (unlikely(p->mm == mm)) {
- lock_task_sighand(p, &flags);
- nr += zap_process(p, exit_code);
- unlock_task_sighand(p, &flags);
- }
- break;
- }
- } while_each_thread(g, p);
- }
- rcu_read_unlock();
-done:
- atomic_set(&core_state->nr_threads, nr);
- return nr;
-}
-
-static int coredump_wait(int exit_code, struct core_state *core_state)
-{
- struct task_struct *tsk = current;
- struct mm_struct *mm = tsk->mm;
- struct completion *vfork_done;
- int core_waiters = -EBUSY;
-
- init_completion(&core_state->startup);
- core_state->dumper.task = tsk;
- core_state->dumper.next = NULL;
-
- down_write(&mm->mmap_sem);
- if (!mm->core_state)
- core_waiters = zap_threads(tsk, mm, core_state, exit_code);
- up_write(&mm->mmap_sem);
-
- if (unlikely(core_waiters < 0))
- goto fail;
-
- /*
- * Make sure nobody is waiting for us to release the VM,
- * otherwise we can deadlock when we wait on each other
- */
- vfork_done = tsk->vfork_done;
- if (vfork_done) {
- tsk->vfork_done = NULL;
- complete(vfork_done);
- }
-
- if (core_waiters)
- wait_for_completion(&core_state->startup);
-fail:
- return core_waiters;
-}
-
-static void coredump_finish(struct mm_struct *mm)
-{
- struct core_thread *curr, *next;
- struct task_struct *task;
-
- next = mm->core_state->dumper.next;
- while ((curr = next) != NULL) {
- next = curr->next;
- task = curr->task;
- /*
- * see exit_mm(), curr->task must not see
- * ->task == NULL before we read ->next.
- */
- smp_mb();
- curr->task = NULL;
- wake_up_process(task);
- }
-
- mm->core_state = NULL;
-}
-
/*
- * set_dumpable converts traditional three-value dumpable to two flags and
- * stores them into mm->flags. It modifies lower two bits of mm->flags, but
- * these bits are not changed atomically. So get_dumpable can observe the
- * intermediate state. To avoid doing unexpected behavior, get get_dumpable
- * return either old dumpable or new one by paying attention to the order of
- * modifying the bits.
- *
- * dumpable | mm->flags (binary)
- * old new | initial interim final
- * ---------+-----------------------
- * 0 1 | 00 01 01
- * 0 2 | 00 10(*) 11
- * 1 0 | 01 00 00
- * 1 2 | 01 11 11
- * 2 0 | 11 10(*) 00
- * 2 1 | 11 11 01
- *
- * (*) get_dumpable regards interim value of 10 as 11.
+ * set_dumpable stores three-value SUID_DUMP_* into mm->flags.
*/
void set_dumpable(struct mm_struct *mm, int value)
{
- switch (value) {
- case 0:
- clear_bit(MMF_DUMPABLE, &mm->flags);
- smp_wmb();
- clear_bit(MMF_DUMP_SECURELY, &mm->flags);
- break;
- case 1:
- set_bit(MMF_DUMPABLE, &mm->flags);
- smp_wmb();
- clear_bit(MMF_DUMP_SECURELY, &mm->flags);
- break;
- case 2:
- set_bit(MMF_DUMP_SECURELY, &mm->flags);
- smp_wmb();
- set_bit(MMF_DUMPABLE, &mm->flags);
- break;
- }
-}
-
-static int __get_dumpable(unsigned long mm_flags)
-{
- int ret;
-
- ret = mm_flags & MMF_DUMPABLE_MASK;
- return (ret >= 2) ? 2 : ret;
-}
-
-int get_dumpable(struct mm_struct *mm)
-{
- return __get_dumpable(mm->flags);
-}
-
-static void wait_for_dump_helpers(struct file *file)
-{
- struct pipe_inode_info *pipe;
-
- pipe = file->f_path.dentry->d_inode->i_pipe;
-
- pipe_lock(pipe);
- pipe->readers++;
- pipe->writers--;
-
- while ((pipe->readers > 1) && (!signal_pending(current))) {
- wake_up_interruptible_sync(&pipe->wait);
- kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
- pipe_wait(pipe);
- }
+ unsigned long old, new;
- pipe->readers--;
- pipe->writers++;
- pipe_unlock(pipe);
+ if (WARN_ON((unsigned)value > SUID_DUMP_ROOT))
+ return;
+ do {
+ old = ACCESS_ONCE(mm->flags);
+ new = (old & ~MMF_DUMPABLE_MASK) | value;
+ } while (cmpxchg(&mm->flags, old, new) != old);
}
-
-/*
- * uhm_pipe_setup
- * helper function to customize the process used
- * to collect the core in userspace. Specifically
- * it sets up a pipe and installs it as fd 0 (stdin)
- * for the process. Returns 0 on success, or
- * PTR_ERR on failure.
- * Note that it also sets the core limit to 1. This
- * is a special value that we use to trap recursive
- * core dumps
- */
-static int umh_pipe_setup(struct subprocess_info *info)
-{
- struct file *rp, *wp;
- struct fdtable *fdt;
- struct coredump_params *cp = (struct coredump_params *)info->data;
- struct files_struct *cf = current->files;
-
- wp = create_write_pipe(0);
- if (IS_ERR(wp))
- return PTR_ERR(wp);
-
- rp = create_read_pipe(wp, 0);
- if (IS_ERR(rp)) {
- free_write_pipe(wp);
- return PTR_ERR(rp);
- }
-
- cp->file = wp;
-
- sys_close(0);
- fd_install(0, rp);
- spin_lock(&cf->file_lock);
- fdt = files_fdtable(cf);
- FD_SET(0, fdt->open_fds);
- FD_CLR(0, fdt->close_on_exec);
- spin_unlock(&cf->file_lock);
-
- /* and disallow core files too */
- current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
-
- return 0;
-}
-
-void do_coredump(long signr, int exit_code, struct pt_regs *regs)
-{
- struct core_state core_state;
- struct core_name cn;
- struct mm_struct *mm = current->mm;
- struct linux_binfmt * binfmt;
- const struct cred *old_cred;
- struct cred *cred;
- int retval = 0;
- int flag = 0;
- int ispipe;
- static atomic_t core_dump_count = ATOMIC_INIT(0);
- struct coredump_params cprm = {
- .signr = signr,
- .regs = regs,
- .limit = rlimit(RLIMIT_CORE),
- /*
- * We must use the same mm->flags while dumping core to avoid
- * inconsistency of bit flags, since this flag is not protected
- * by any locks.
- */
- .mm_flags = mm->flags,
- };
-
- audit_core_dumps(signr);
-
- binfmt = mm->binfmt;
- if (!binfmt || !binfmt->core_dump)
- goto fail;
- if (!__get_dumpable(cprm.mm_flags))
- goto fail;
-
- cred = prepare_creds();
- if (!cred)
- goto fail;
- /*
- * We cannot trust fsuid as being the "true" uid of the
- * process nor do we know its entire history. We only know it
- * was tainted so we dump it as root in mode 2.
- */
- if (__get_dumpable(cprm.mm_flags) == 2) {
- /* Setuid core dump mode */
- flag = O_EXCL; /* Stop rewrite attacks */
- cred->fsuid = 0; /* Dump root private */
- }
-
- retval = coredump_wait(exit_code, &core_state);
- if (retval < 0)
- goto fail_creds;
-
- old_cred = override_creds(cred);
-
- /*
- * Clear any false indication of pending signals that might
- * be seen by the filesystem code called to write the core file.
- */
- clear_thread_flag(TIF_SIGPENDING);
-
- ispipe = format_corename(&cn, signr);
-
- if (ispipe == -ENOMEM) {
- printk(KERN_WARNING "format_corename failed\n");
- printk(KERN_WARNING "Aborting core\n");
- goto fail_corename;
- }
-
- if (ispipe) {
- int dump_count;
- char **helper_argv;
-
- if (cprm.limit == 1) {
- /*
- * Normally core limits are irrelevant to pipes, since
- * we're not writing to the file system, but we use
- * cprm.limit of 1 here as a speacial value. Any
- * non-1 limit gets set to RLIM_INFINITY below, but
- * a limit of 0 skips the dump. This is a consistent
- * way to catch recursive crashes. We can still crash
- * if the core_pattern binary sets RLIM_CORE = !1
- * but it runs as root, and can do lots of stupid things
- * Note that we use task_tgid_vnr here to grab the pid
- * of the process group leader. That way we get the
- * right pid if a thread in a multi-threaded
- * core_pattern process dies.
- */
- printk(KERN_WARNING
- "Process %d(%s) has RLIMIT_CORE set to 1\n",
- task_tgid_vnr(current), current->comm);
- printk(KERN_WARNING "Aborting core\n");
- goto fail_unlock;
- }
- cprm.limit = RLIM_INFINITY;
-
- dump_count = atomic_inc_return(&core_dump_count);
- if (core_pipe_limit && (core_pipe_limit < dump_count)) {
- printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
- task_tgid_vnr(current), current->comm);
- printk(KERN_WARNING "Skipping core dump\n");
- goto fail_dropcount;
- }
-
- helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL);
- if (!helper_argv) {
- printk(KERN_WARNING "%s failed to allocate memory\n",
- __func__);
- goto fail_dropcount;
- }
-
- retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
- NULL, UMH_WAIT_EXEC, umh_pipe_setup,
- NULL, &cprm);
- argv_free(helper_argv);
- if (retval) {
- printk(KERN_INFO "Core dump to %s pipe failed\n",
- cn.corename);
- goto close_fail;
- }
- } else {
- struct inode *inode;
-
- if (cprm.limit < binfmt->min_coredump)
- goto fail_unlock;
-
- cprm.file = filp_open(cn.corename,
- O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
- 0600);
- if (IS_ERR(cprm.file))
- goto fail_unlock;
-
- inode = cprm.file->f_path.dentry->d_inode;
- if (inode->i_nlink > 1)
- goto close_fail;
- if (d_unhashed(cprm.file->f_path.dentry))
- goto close_fail;
- /*
- * AK: actually i see no reason to not allow this for named
- * pipes etc, but keep the previous behaviour for now.
- */
- if (!S_ISREG(inode->i_mode))
- goto close_fail;
- /*
- * Dont allow local users get cute and trick others to coredump
- * into their pre-created files.
- */
- if (inode->i_uid != current_fsuid())
- goto close_fail;
- if (!cprm.file->f_op || !cprm.file->f_op->write)
- goto close_fail;
- if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
- goto close_fail;
- }
-
- retval = binfmt->core_dump(&cprm);
- if (retval)
- current->signal->group_exit_code |= 0x80;
-
- if (ispipe && core_pipe_limit)
- wait_for_dump_helpers(cprm.file);
-close_fail:
- if (cprm.file)
- filp_close(cprm.file, NULL);
-fail_dropcount:
- if (ispipe)
- atomic_dec(&core_dump_count);
-fail_unlock:
- kfree(cn.corename);
-fail_corename:
- coredump_finish(mm);
- revert_creds(old_cred);
-fail_creds:
- put_cred(cred);
-fail:
- return;
-}
-
-/*
- * Core dumping helper functions. These are the only things you should
- * do on a core-file: use only these functions to write out all the
- * necessary info.
- */
-int dump_write(struct file *file, const void *addr, int nr)
+SYSCALL_DEFINE3(execve,
+ const char __user *, filename,
+ const char __user *const __user *, argv,
+ const char __user *const __user *, envp)
{
- return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
+ return do_execve(getname(filename), argv, envp);
}
-EXPORT_SYMBOL(dump_write);
-
-int dump_seek(struct file *file, loff_t off)
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
+ const compat_uptr_t __user *, argv,
+ const compat_uptr_t __user *, envp)
{
- int ret = 1;
-
- if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
- if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
- return 0;
- } else {
- char *buf = (char *)get_zeroed_page(GFP_KERNEL);
-
- if (!buf)
- return 0;
- while (off > 0) {
- unsigned long n = off;
-
- if (n > PAGE_SIZE)
- n = PAGE_SIZE;
- if (!dump_write(file, buf, n)) {
- ret = 0;
- break;
- }
- off -= n;
- }
- free_page((unsigned long)buf);
- }
- return ret;
+ return compat_do_execve(getname(filename), argv, envp);
}
-EXPORT_SYMBOL(dump_seek);
+#endif