diff options
Diffstat (limited to 'kernel/acct.c')
| -rw-r--r-- | kernel/acct.c | 308 |
1 files changed, 176 insertions, 132 deletions
diff --git a/kernel/acct.c b/kernel/acct.c index 91e1cfd734d..808a86ff229 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -55,7 +55,7 @@ #include <linux/times.h> #include <linux/syscalls.h> #include <linux/mount.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/div64.h> #include <linux/blkdev.h> /* sector_div */ #include <linux/pid_namespace.h> @@ -75,58 +75,50 @@ int acct_parm[3] = {4, 2, 30}; /* * External references and all of the globals. */ -static void do_acct_process(struct pid_namespace *ns, struct file *); +static void do_acct_process(struct bsd_acct_struct *acct, + struct pid_namespace *ns, struct file *); /* * This structure is used so that all the data protected by lock * can be placed in the same cache line as the lock. This primes * the cache line to have the data after getting the lock. */ -struct acct_glbs { - spinlock_t lock; - volatile int active; - volatile int needcheck; +struct bsd_acct_struct { + int active; + unsigned long needcheck; struct file *file; struct pid_namespace *ns; - struct timer_list timer; + struct list_head list; }; -static struct acct_glbs acct_globals __cacheline_aligned = - {__SPIN_LOCK_UNLOCKED(acct_globals.lock)}; - -/* - * Called whenever the timer says to check the free space. - */ -static void acct_timeout(unsigned long unused) -{ - acct_globals.needcheck = 1; -} +static DEFINE_SPINLOCK(acct_lock); +static LIST_HEAD(acct_list); /* * Check the amount of free space and suspend/resume accordingly. */ -static int check_free_space(struct file *file) +static int check_free_space(struct bsd_acct_struct *acct, struct file *file) { struct kstatfs sbuf; int res; int act; - sector_t resume; - sector_t suspend; + u64 resume; + u64 suspend; - spin_lock(&acct_globals.lock); - res = acct_globals.active; - if (!file || !acct_globals.needcheck) + spin_lock(&acct_lock); + res = acct->active; + if (!file || time_is_before_jiffies(acct->needcheck)) goto out; - spin_unlock(&acct_globals.lock); + spin_unlock(&acct_lock); /* May block */ - if (vfs_statfs(file->f_path.dentry, &sbuf)) + if (vfs_statfs(&file->f_path, &sbuf)) return res; suspend = sbuf.f_blocks * SUSPEND; resume = sbuf.f_blocks * RESUME; - sector_div(suspend, 100); - sector_div(resume, 100); + do_div(suspend, 100); + do_div(resume, 100); if (sbuf.f_bavail <= suspend) act = -1; @@ -136,35 +128,32 @@ static int check_free_space(struct file *file) act = 0; /* - * If some joker switched acct_globals.file under us we'ld better be + * If some joker switched acct->file under us we'ld better be * silent and _not_ touch anything. */ - spin_lock(&acct_globals.lock); - if (file != acct_globals.file) { + spin_lock(&acct_lock); + if (file != acct->file) { if (act) - res = act>0; + res = act > 0; goto out; } - if (acct_globals.active) { + if (acct->active) { if (act < 0) { - acct_globals.active = 0; + acct->active = 0; printk(KERN_INFO "Process accounting paused\n"); } } else { if (act > 0) { - acct_globals.active = 1; + acct->active = 1; printk(KERN_INFO "Process accounting resumed\n"); } } - del_timer(&acct_globals.timer); - acct_globals.needcheck = 0; - acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ; - add_timer(&acct_globals.timer); - res = acct_globals.active; + acct->needcheck = jiffies + ACCT_TIMEOUT*HZ; + res = acct->active; out: - spin_unlock(&acct_globals.lock); + spin_unlock(&acct_lock); return res; } @@ -172,53 +161,51 @@ out: * Close the old accounting file (if currently open) and then replace * it with file (if non-NULL). * - * NOTE: acct_globals.lock MUST be held on entry and exit. + * NOTE: acct_lock MUST be held on entry and exit. */ -static void acct_file_reopen(struct file *file) +static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file, + struct pid_namespace *ns) { struct file *old_acct = NULL; struct pid_namespace *old_ns = NULL; - if (acct_globals.file) { - old_acct = acct_globals.file; - old_ns = acct_globals.ns; - del_timer(&acct_globals.timer); - acct_globals.active = 0; - acct_globals.needcheck = 0; - acct_globals.file = NULL; + if (acct->file) { + old_acct = acct->file; + old_ns = acct->ns; + acct->active = 0; + acct->file = NULL; + acct->ns = NULL; + list_del(&acct->list); } if (file) { - acct_globals.file = file; - acct_globals.ns = get_pid_ns(task_active_pid_ns(current)); - acct_globals.needcheck = 0; - acct_globals.active = 1; - /* It's been deleted if it was used before so this is safe */ - init_timer(&acct_globals.timer); - acct_globals.timer.function = acct_timeout; - acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ; - add_timer(&acct_globals.timer); + acct->file = file; + acct->ns = ns; + acct->needcheck = jiffies + ACCT_TIMEOUT*HZ; + acct->active = 1; + list_add(&acct->list, &acct_list); } if (old_acct) { mnt_unpin(old_acct->f_path.mnt); - spin_unlock(&acct_globals.lock); - do_acct_process(old_ns, old_acct); + spin_unlock(&acct_lock); + do_acct_process(acct, old_ns, old_acct); filp_close(old_acct, NULL); - put_pid_ns(old_ns); - spin_lock(&acct_globals.lock); + spin_lock(&acct_lock); } } -static int acct_on(char *name) +static int acct_on(struct filename *pathname) { struct file *file; - int error; + struct vfsmount *mnt; + struct pid_namespace *ns; + struct bsd_acct_struct *acct = NULL; /* Difference from BSD - they don't do O_APPEND */ - file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0); + file = file_open_name(pathname, O_WRONLY|O_APPEND|O_LARGEFILE, 0); if (IS_ERR(file)) return PTR_ERR(file); - if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) { + if (!S_ISREG(file_inode(file)->i_mode)) { filp_close(file, NULL); return -EACCES; } @@ -228,18 +215,28 @@ static int acct_on(char *name) return -EIO; } - error = security_acct(file); - if (error) { - filp_close(file, NULL); - return error; + ns = task_active_pid_ns(current); + if (ns->bacct == NULL) { + acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL); + if (acct == NULL) { + filp_close(file, NULL); + return -ENOMEM; + } + } + + spin_lock(&acct_lock); + if (ns->bacct == NULL) { + ns->bacct = acct; + acct = NULL; } - spin_lock(&acct_globals.lock); - mnt_pin(file->f_path.mnt); - acct_file_reopen(file); - spin_unlock(&acct_globals.lock); + mnt = file->f_path.mnt; + mnt_pin(mnt); + acct_file_reopen(ns->bacct, file, ns); + spin_unlock(&acct_lock); - mntput(file->f_path.mnt); /* it's pinned, now give up active reference */ + mntput(mnt); /* it's pinned, now give up active reference */ + kfree(acct); return 0; } @@ -255,27 +252,31 @@ static int acct_on(char *name) * should be written. If the filename is NULL, accounting will be * shutdown. */ -asmlinkage long sys_acct(const char __user *name) +SYSCALL_DEFINE1(acct, const char __user *, name) { - int error; + int error = 0; if (!capable(CAP_SYS_PACCT)) return -EPERM; if (name) { - char *tmp = getname(name); + struct filename *tmp = getname(name); if (IS_ERR(tmp)) - return (PTR_ERR(tmp)); + return PTR_ERR(tmp); error = acct_on(tmp); putname(tmp); } else { - error = security_acct(NULL); - if (!error) { - spin_lock(&acct_globals.lock); - acct_file_reopen(NULL); - spin_unlock(&acct_globals.lock); - } + struct bsd_acct_struct *acct; + + acct = task_active_pid_ns(current)->bacct; + if (acct == NULL) + return 0; + + spin_lock(&acct_lock); + acct_file_reopen(acct, NULL, NULL); + spin_unlock(&acct_lock); } + return error; } @@ -288,10 +289,16 @@ asmlinkage long sys_acct(const char __user *name) */ void acct_auto_close_mnt(struct vfsmount *m) { - spin_lock(&acct_globals.lock); - if (acct_globals.file && acct_globals.file->f_path.mnt == m) - acct_file_reopen(NULL); - spin_unlock(&acct_globals.lock); + struct bsd_acct_struct *acct; + + spin_lock(&acct_lock); +restart: + list_for_each_entry(acct, &acct_list, list) + if (acct->file && acct->file->f_path.mnt == m) { + acct_file_reopen(acct, NULL, NULL); + goto restart; + } + spin_unlock(&acct_lock); } /** @@ -303,12 +310,31 @@ void acct_auto_close_mnt(struct vfsmount *m) */ void acct_auto_close(struct super_block *sb) { - spin_lock(&acct_globals.lock); - if (acct_globals.file && - acct_globals.file->f_path.mnt->mnt_sb == sb) { - acct_file_reopen(NULL); - } - spin_unlock(&acct_globals.lock); + struct bsd_acct_struct *acct; + + spin_lock(&acct_lock); +restart: + list_for_each_entry(acct, &acct_list, list) + if (acct->file && acct->file->f_path.dentry->d_sb == sb) { + acct_file_reopen(acct, NULL, NULL); + goto restart; + } + spin_unlock(&acct_lock); +} + +void acct_exit_ns(struct pid_namespace *ns) +{ + struct bsd_acct_struct *acct = ns->bacct; + + if (acct == NULL) + return; + + spin_lock(&acct_lock); + if (acct->file != NULL) + acct_file_reopen(acct, NULL, NULL); + spin_unlock(&acct_lock); + + kfree(acct); } /* @@ -425,7 +451,8 @@ static u32 encode_float(u64 value) /* * do_acct_process does all actual work. Caller holds the reference to file. */ -static void do_acct_process(struct pid_namespace *ns, struct file *file) +static void do_acct_process(struct bsd_acct_struct *acct, + struct pid_namespace *ns, struct file *file) { struct pacct_struct *pacct = ¤t->signal->pacct; acct_t ac; @@ -435,19 +462,23 @@ static void do_acct_process(struct pid_namespace *ns, struct file *file) u64 run_time; struct timespec uptime; struct tty_struct *tty; + const struct cred *orig_cred; + + /* Perform file operations on behalf of whoever enabled accounting */ + orig_cred = override_creds(file->f_cred); /* * First check to see if there is enough free_space to continue * the process accounting system. */ - if (!check_free_space(file)) - return; + if (!check_free_space(acct, file)) + goto out; /* * Fill the accounting struct with the needed info as recorded * by the different kernel functions. */ - memset((caddr_t)&ac, 0, sizeof(acct_t)); + memset(&ac, 0, sizeof(acct_t)); ac.ac_version = ACCT_VERSION | ACCT_BYTEORDER; strlcpy(ac.ac_comm, current->comm, sizeof(ac.ac_comm)); @@ -476,15 +507,15 @@ static void do_acct_process(struct pid_namespace *ns, struct file *file) do_div(elapsed, AHZ); ac.ac_btime = get_seconds() - elapsed; /* we really need to bite the bullet and change layout */ - ac.ac_uid = current->uid; - ac.ac_gid = current->gid; + ac.ac_uid = from_kuid_munged(file->f_cred->user_ns, orig_cred->uid); + ac.ac_gid = from_kgid_munged(file->f_cred->user_ns, orig_cred->gid); #if ACCT_VERSION==2 ac.ac_ahz = AHZ; #endif #if ACCT_VERSION==1 || ACCT_VERSION==2 /* backward-compatible 16 bit fields */ - ac.ac_uid16 = current->uid; - ac.ac_gid16 = current->gid; + ac.ac_uid16 = ac.ac_uid; + ac.ac_gid16 = ac.ac_gid; #endif #if ACCT_VERSION==3 ac.ac_pid = task_tgid_nr_ns(current, ns); @@ -494,7 +525,7 @@ static void do_acct_process(struct pid_namespace *ns, struct file *file) #endif spin_lock_irq(¤t->sighand->siglock); - tty = current->signal->tty; + tty = current->signal->tty; /* Safe as we hold the siglock */ ac.ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0; ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime))); ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime))); @@ -509,6 +540,12 @@ static void do_acct_process(struct pid_namespace *ns, struct file *file) ac.ac_swaps = encode_comp_t(0); /* + * Get freeze protection. If the fs is frozen, just skip the write + * as we could deadlock the system otherwise. + */ + if (!file_start_write_trylock(file)) + goto out; + /* * Kernel segment override to datasegment and write it * to the accounting file. */ @@ -523,16 +560,9 @@ static void do_acct_process(struct pid_namespace *ns, struct file *file) sizeof(acct_t), &file->f_pos); current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim; set_fs(fs); -} - -/** - * acct_init_pacct - initialize a new pacct_struct - * @pacct: per-process accounting info struct to initialize - */ -void acct_init_pacct(struct pacct_struct *pacct) -{ - memset(pacct, 0, sizeof(struct pacct_struct)); - pacct->ac_utime = pacct->ac_stime = cputime_zero; + file_end_write(file); +out: + revert_creds(orig_cred); } /** @@ -543,6 +573,7 @@ void acct_init_pacct(struct pacct_struct *pacct) void acct_collect(long exitcode, int group_dead) { struct pacct_struct *pacct = ¤t->signal->pacct; + cputime_t utime, stime; unsigned long vsize = 0; if (group_dead && current->mm) { @@ -570,41 +601,54 @@ void acct_collect(long exitcode, int group_dead) pacct->ac_flag |= ACORE; if (current->flags & PF_SIGNALED) pacct->ac_flag |= AXSIG; - pacct->ac_utime = cputime_add(pacct->ac_utime, current->utime); - pacct->ac_stime = cputime_add(pacct->ac_stime, current->stime); + task_cputime(current, &utime, &stime); + pacct->ac_utime += utime; + pacct->ac_stime += stime; pacct->ac_minflt += current->min_flt; pacct->ac_majflt += current->maj_flt; spin_unlock_irq(¤t->sighand->siglock); } -/** - * acct_process - now just a wrapper around do_acct_process - * @exitcode: task exit code - * - * handles process accounting for an exiting task - */ -void acct_process(void) +static void acct_process_in_ns(struct pid_namespace *ns) { struct file *file = NULL; - struct pid_namespace *ns; + struct bsd_acct_struct *acct; + acct = ns->bacct; /* * accelerate the common fastpath: */ - if (!acct_globals.file) + if (!acct || !acct->file) return; - spin_lock(&acct_globals.lock); - file = acct_globals.file; + spin_lock(&acct_lock); + file = acct->file; if (unlikely(!file)) { - spin_unlock(&acct_globals.lock); + spin_unlock(&acct_lock); return; } get_file(file); - ns = get_pid_ns(acct_globals.ns); - spin_unlock(&acct_globals.lock); + spin_unlock(&acct_lock); - do_acct_process(ns, file); + do_acct_process(acct, ns, file); fput(file); - put_pid_ns(ns); +} + +/** + * acct_process - now just a wrapper around acct_process_in_ns, + * which in turn is a wrapper around do_acct_process. + * + * handles process accounting for an exiting task + */ +void acct_process(void) +{ + struct pid_namespace *ns; + + /* + * This loop is safe lockless, since current is still + * alive and holds its namespace, which in turn holds + * its parent. + */ + for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent) + acct_process_in_ns(ns); } |
