aboutsummaryrefslogtreecommitdiff
path: root/kernel/acct.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/acct.c')
-rw-r--r--kernel/acct.c308
1 files changed, 176 insertions, 132 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index 91e1cfd734d..808a86ff229 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -55,7 +55,7 @@
#include <linux/times.h>
#include <linux/syscalls.h>
#include <linux/mount.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/div64.h>
#include <linux/blkdev.h> /* sector_div */
#include <linux/pid_namespace.h>
@@ -75,58 +75,50 @@ int acct_parm[3] = {4, 2, 30};
/*
* External references and all of the globals.
*/
-static void do_acct_process(struct pid_namespace *ns, struct file *);
+static void do_acct_process(struct bsd_acct_struct *acct,
+ struct pid_namespace *ns, struct file *);
/*
* This structure is used so that all the data protected by lock
* can be placed in the same cache line as the lock. This primes
* the cache line to have the data after getting the lock.
*/
-struct acct_glbs {
- spinlock_t lock;
- volatile int active;
- volatile int needcheck;
+struct bsd_acct_struct {
+ int active;
+ unsigned long needcheck;
struct file *file;
struct pid_namespace *ns;
- struct timer_list timer;
+ struct list_head list;
};
-static struct acct_glbs acct_globals __cacheline_aligned =
- {__SPIN_LOCK_UNLOCKED(acct_globals.lock)};
-
-/*
- * Called whenever the timer says to check the free space.
- */
-static void acct_timeout(unsigned long unused)
-{
- acct_globals.needcheck = 1;
-}
+static DEFINE_SPINLOCK(acct_lock);
+static LIST_HEAD(acct_list);
/*
* Check the amount of free space and suspend/resume accordingly.
*/
-static int check_free_space(struct file *file)
+static int check_free_space(struct bsd_acct_struct *acct, struct file *file)
{
struct kstatfs sbuf;
int res;
int act;
- sector_t resume;
- sector_t suspend;
+ u64 resume;
+ u64 suspend;
- spin_lock(&acct_globals.lock);
- res = acct_globals.active;
- if (!file || !acct_globals.needcheck)
+ spin_lock(&acct_lock);
+ res = acct->active;
+ if (!file || time_is_before_jiffies(acct->needcheck))
goto out;
- spin_unlock(&acct_globals.lock);
+ spin_unlock(&acct_lock);
/* May block */
- if (vfs_statfs(file->f_path.dentry, &sbuf))
+ if (vfs_statfs(&file->f_path, &sbuf))
return res;
suspend = sbuf.f_blocks * SUSPEND;
resume = sbuf.f_blocks * RESUME;
- sector_div(suspend, 100);
- sector_div(resume, 100);
+ do_div(suspend, 100);
+ do_div(resume, 100);
if (sbuf.f_bavail <= suspend)
act = -1;
@@ -136,35 +128,32 @@ static int check_free_space(struct file *file)
act = 0;
/*
- * If some joker switched acct_globals.file under us we'ld better be
+ * If some joker switched acct->file under us we'ld better be
* silent and _not_ touch anything.
*/
- spin_lock(&acct_globals.lock);
- if (file != acct_globals.file) {
+ spin_lock(&acct_lock);
+ if (file != acct->file) {
if (act)
- res = act>0;
+ res = act > 0;
goto out;
}
- if (acct_globals.active) {
+ if (acct->active) {
if (act < 0) {
- acct_globals.active = 0;
+ acct->active = 0;
printk(KERN_INFO "Process accounting paused\n");
}
} else {
if (act > 0) {
- acct_globals.active = 1;
+ acct->active = 1;
printk(KERN_INFO "Process accounting resumed\n");
}
}
- del_timer(&acct_globals.timer);
- acct_globals.needcheck = 0;
- acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ;
- add_timer(&acct_globals.timer);
- res = acct_globals.active;
+ acct->needcheck = jiffies + ACCT_TIMEOUT*HZ;
+ res = acct->active;
out:
- spin_unlock(&acct_globals.lock);
+ spin_unlock(&acct_lock);
return res;
}
@@ -172,53 +161,51 @@ out:
* Close the old accounting file (if currently open) and then replace
* it with file (if non-NULL).
*
- * NOTE: acct_globals.lock MUST be held on entry and exit.
+ * NOTE: acct_lock MUST be held on entry and exit.
*/
-static void acct_file_reopen(struct file *file)
+static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file,
+ struct pid_namespace *ns)
{
struct file *old_acct = NULL;
struct pid_namespace *old_ns = NULL;
- if (acct_globals.file) {
- old_acct = acct_globals.file;
- old_ns = acct_globals.ns;
- del_timer(&acct_globals.timer);
- acct_globals.active = 0;
- acct_globals.needcheck = 0;
- acct_globals.file = NULL;
+ if (acct->file) {
+ old_acct = acct->file;
+ old_ns = acct->ns;
+ acct->active = 0;
+ acct->file = NULL;
+ acct->ns = NULL;
+ list_del(&acct->list);
}
if (file) {
- acct_globals.file = file;
- acct_globals.ns = get_pid_ns(task_active_pid_ns(current));
- acct_globals.needcheck = 0;
- acct_globals.active = 1;
- /* It's been deleted if it was used before so this is safe */
- init_timer(&acct_globals.timer);
- acct_globals.timer.function = acct_timeout;
- acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ;
- add_timer(&acct_globals.timer);
+ acct->file = file;
+ acct->ns = ns;
+ acct->needcheck = jiffies + ACCT_TIMEOUT*HZ;
+ acct->active = 1;
+ list_add(&acct->list, &acct_list);
}
if (old_acct) {
mnt_unpin(old_acct->f_path.mnt);
- spin_unlock(&acct_globals.lock);
- do_acct_process(old_ns, old_acct);
+ spin_unlock(&acct_lock);
+ do_acct_process(acct, old_ns, old_acct);
filp_close(old_acct, NULL);
- put_pid_ns(old_ns);
- spin_lock(&acct_globals.lock);
+ spin_lock(&acct_lock);
}
}
-static int acct_on(char *name)
+static int acct_on(struct filename *pathname)
{
struct file *file;
- int error;
+ struct vfsmount *mnt;
+ struct pid_namespace *ns;
+ struct bsd_acct_struct *acct = NULL;
/* Difference from BSD - they don't do O_APPEND */
- file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
+ file = file_open_name(pathname, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
if (IS_ERR(file))
return PTR_ERR(file);
- if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) {
+ if (!S_ISREG(file_inode(file)->i_mode)) {
filp_close(file, NULL);
return -EACCES;
}
@@ -228,18 +215,28 @@ static int acct_on(char *name)
return -EIO;
}
- error = security_acct(file);
- if (error) {
- filp_close(file, NULL);
- return error;
+ ns = task_active_pid_ns(current);
+ if (ns->bacct == NULL) {
+ acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
+ if (acct == NULL) {
+ filp_close(file, NULL);
+ return -ENOMEM;
+ }
+ }
+
+ spin_lock(&acct_lock);
+ if (ns->bacct == NULL) {
+ ns->bacct = acct;
+ acct = NULL;
}
- spin_lock(&acct_globals.lock);
- mnt_pin(file->f_path.mnt);
- acct_file_reopen(file);
- spin_unlock(&acct_globals.lock);
+ mnt = file->f_path.mnt;
+ mnt_pin(mnt);
+ acct_file_reopen(ns->bacct, file, ns);
+ spin_unlock(&acct_lock);
- mntput(file->f_path.mnt); /* it's pinned, now give up active reference */
+ mntput(mnt); /* it's pinned, now give up active reference */
+ kfree(acct);
return 0;
}
@@ -255,27 +252,31 @@ static int acct_on(char *name)
* should be written. If the filename is NULL, accounting will be
* shutdown.
*/
-asmlinkage long sys_acct(const char __user *name)
+SYSCALL_DEFINE1(acct, const char __user *, name)
{
- int error;
+ int error = 0;
if (!capable(CAP_SYS_PACCT))
return -EPERM;
if (name) {
- char *tmp = getname(name);
+ struct filename *tmp = getname(name);
if (IS_ERR(tmp))
- return (PTR_ERR(tmp));
+ return PTR_ERR(tmp);
error = acct_on(tmp);
putname(tmp);
} else {
- error = security_acct(NULL);
- if (!error) {
- spin_lock(&acct_globals.lock);
- acct_file_reopen(NULL);
- spin_unlock(&acct_globals.lock);
- }
+ struct bsd_acct_struct *acct;
+
+ acct = task_active_pid_ns(current)->bacct;
+ if (acct == NULL)
+ return 0;
+
+ spin_lock(&acct_lock);
+ acct_file_reopen(acct, NULL, NULL);
+ spin_unlock(&acct_lock);
}
+
return error;
}
@@ -288,10 +289,16 @@ asmlinkage long sys_acct(const char __user *name)
*/
void acct_auto_close_mnt(struct vfsmount *m)
{
- spin_lock(&acct_globals.lock);
- if (acct_globals.file && acct_globals.file->f_path.mnt == m)
- acct_file_reopen(NULL);
- spin_unlock(&acct_globals.lock);
+ struct bsd_acct_struct *acct;
+
+ spin_lock(&acct_lock);
+restart:
+ list_for_each_entry(acct, &acct_list, list)
+ if (acct->file && acct->file->f_path.mnt == m) {
+ acct_file_reopen(acct, NULL, NULL);
+ goto restart;
+ }
+ spin_unlock(&acct_lock);
}
/**
@@ -303,12 +310,31 @@ void acct_auto_close_mnt(struct vfsmount *m)
*/
void acct_auto_close(struct super_block *sb)
{
- spin_lock(&acct_globals.lock);
- if (acct_globals.file &&
- acct_globals.file->f_path.mnt->mnt_sb == sb) {
- acct_file_reopen(NULL);
- }
- spin_unlock(&acct_globals.lock);
+ struct bsd_acct_struct *acct;
+
+ spin_lock(&acct_lock);
+restart:
+ list_for_each_entry(acct, &acct_list, list)
+ if (acct->file && acct->file->f_path.dentry->d_sb == sb) {
+ acct_file_reopen(acct, NULL, NULL);
+ goto restart;
+ }
+ spin_unlock(&acct_lock);
+}
+
+void acct_exit_ns(struct pid_namespace *ns)
+{
+ struct bsd_acct_struct *acct = ns->bacct;
+
+ if (acct == NULL)
+ return;
+
+ spin_lock(&acct_lock);
+ if (acct->file != NULL)
+ acct_file_reopen(acct, NULL, NULL);
+ spin_unlock(&acct_lock);
+
+ kfree(acct);
}
/*
@@ -425,7 +451,8 @@ static u32 encode_float(u64 value)
/*
* do_acct_process does all actual work. Caller holds the reference to file.
*/
-static void do_acct_process(struct pid_namespace *ns, struct file *file)
+static void do_acct_process(struct bsd_acct_struct *acct,
+ struct pid_namespace *ns, struct file *file)
{
struct pacct_struct *pacct = &current->signal->pacct;
acct_t ac;
@@ -435,19 +462,23 @@ static void do_acct_process(struct pid_namespace *ns, struct file *file)
u64 run_time;
struct timespec uptime;
struct tty_struct *tty;
+ const struct cred *orig_cred;
+
+ /* Perform file operations on behalf of whoever enabled accounting */
+ orig_cred = override_creds(file->f_cred);
/*
* First check to see if there is enough free_space to continue
* the process accounting system.
*/
- if (!check_free_space(file))
- return;
+ if (!check_free_space(acct, file))
+ goto out;
/*
* Fill the accounting struct with the needed info as recorded
* by the different kernel functions.
*/
- memset((caddr_t)&ac, 0, sizeof(acct_t));
+ memset(&ac, 0, sizeof(acct_t));
ac.ac_version = ACCT_VERSION | ACCT_BYTEORDER;
strlcpy(ac.ac_comm, current->comm, sizeof(ac.ac_comm));
@@ -476,15 +507,15 @@ static void do_acct_process(struct pid_namespace *ns, struct file *file)
do_div(elapsed, AHZ);
ac.ac_btime = get_seconds() - elapsed;
/* we really need to bite the bullet and change layout */
- ac.ac_uid = current->uid;
- ac.ac_gid = current->gid;
+ ac.ac_uid = from_kuid_munged(file->f_cred->user_ns, orig_cred->uid);
+ ac.ac_gid = from_kgid_munged(file->f_cred->user_ns, orig_cred->gid);
#if ACCT_VERSION==2
ac.ac_ahz = AHZ;
#endif
#if ACCT_VERSION==1 || ACCT_VERSION==2
/* backward-compatible 16 bit fields */
- ac.ac_uid16 = current->uid;
- ac.ac_gid16 = current->gid;
+ ac.ac_uid16 = ac.ac_uid;
+ ac.ac_gid16 = ac.ac_gid;
#endif
#if ACCT_VERSION==3
ac.ac_pid = task_tgid_nr_ns(current, ns);
@@ -494,7 +525,7 @@ static void do_acct_process(struct pid_namespace *ns, struct file *file)
#endif
spin_lock_irq(&current->sighand->siglock);
- tty = current->signal->tty;
+ tty = current->signal->tty; /* Safe as we hold the siglock */
ac.ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0;
ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime)));
ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime)));
@@ -509,6 +540,12 @@ static void do_acct_process(struct pid_namespace *ns, struct file *file)
ac.ac_swaps = encode_comp_t(0);
/*
+ * Get freeze protection. If the fs is frozen, just skip the write
+ * as we could deadlock the system otherwise.
+ */
+ if (!file_start_write_trylock(file))
+ goto out;
+ /*
* Kernel segment override to datasegment and write it
* to the accounting file.
*/
@@ -523,16 +560,9 @@ static void do_acct_process(struct pid_namespace *ns, struct file *file)
sizeof(acct_t), &file->f_pos);
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
set_fs(fs);
-}
-
-/**
- * acct_init_pacct - initialize a new pacct_struct
- * @pacct: per-process accounting info struct to initialize
- */
-void acct_init_pacct(struct pacct_struct *pacct)
-{
- memset(pacct, 0, sizeof(struct pacct_struct));
- pacct->ac_utime = pacct->ac_stime = cputime_zero;
+ file_end_write(file);
+out:
+ revert_creds(orig_cred);
}
/**
@@ -543,6 +573,7 @@ void acct_init_pacct(struct pacct_struct *pacct)
void acct_collect(long exitcode, int group_dead)
{
struct pacct_struct *pacct = &current->signal->pacct;
+ cputime_t utime, stime;
unsigned long vsize = 0;
if (group_dead && current->mm) {
@@ -570,41 +601,54 @@ void acct_collect(long exitcode, int group_dead)
pacct->ac_flag |= ACORE;
if (current->flags & PF_SIGNALED)
pacct->ac_flag |= AXSIG;
- pacct->ac_utime = cputime_add(pacct->ac_utime, current->utime);
- pacct->ac_stime = cputime_add(pacct->ac_stime, current->stime);
+ task_cputime(current, &utime, &stime);
+ pacct->ac_utime += utime;
+ pacct->ac_stime += stime;
pacct->ac_minflt += current->min_flt;
pacct->ac_majflt += current->maj_flt;
spin_unlock_irq(&current->sighand->siglock);
}
-/**
- * acct_process - now just a wrapper around do_acct_process
- * @exitcode: task exit code
- *
- * handles process accounting for an exiting task
- */
-void acct_process(void)
+static void acct_process_in_ns(struct pid_namespace *ns)
{
struct file *file = NULL;
- struct pid_namespace *ns;
+ struct bsd_acct_struct *acct;
+ acct = ns->bacct;
/*
* accelerate the common fastpath:
*/
- if (!acct_globals.file)
+ if (!acct || !acct->file)
return;
- spin_lock(&acct_globals.lock);
- file = acct_globals.file;
+ spin_lock(&acct_lock);
+ file = acct->file;
if (unlikely(!file)) {
- spin_unlock(&acct_globals.lock);
+ spin_unlock(&acct_lock);
return;
}
get_file(file);
- ns = get_pid_ns(acct_globals.ns);
- spin_unlock(&acct_globals.lock);
+ spin_unlock(&acct_lock);
- do_acct_process(ns, file);
+ do_acct_process(acct, ns, file);
fput(file);
- put_pid_ns(ns);
+}
+
+/**
+ * acct_process - now just a wrapper around acct_process_in_ns,
+ * which in turn is a wrapper around do_acct_process.
+ *
+ * handles process accounting for an exiting task
+ */
+void acct_process(void)
+{
+ struct pid_namespace *ns;
+
+ /*
+ * This loop is safe lockless, since current is still
+ * alive and holds its namespace, which in turn holds
+ * its parent.
+ */
+ for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent)
+ acct_process_in_ns(ns);
}