From 5f45f1a78fbac3cc859ec10c5366e97d20d40fa2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Jun 2005 00:09:12 -0700 Subject: [PATCH] remove duplicate get_dentry functions in various places Various filesystem drivers have grown a get_dentry() function that's a duplicate of lookup_one_len, except that it doesn't take a maximum length argument and doesn't check for \0 or / in the passed in filename. Switch all these places to use lookup_one_len. Signed-off-by: Christoph Hellwig Cc: Greg KH Cc: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpuset.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 00e8f257551..79dd929f408 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -228,13 +228,7 @@ static struct dentry_operations cpuset_dops = { static struct dentry *cpuset_get_dentry(struct dentry *parent, const char *name) { - struct qstr qstr; - struct dentry *d; - - qstr.name = name; - qstr.len = strlen(name); - qstr.hash = full_name_hash(name, qstr.len); - d = lookup_hash(&qstr, parent); + struct dentry *d = lookup_one_len(name, parent, strlen(name)); if (!IS_ERR(d)) d->d_op = &cpuset_dops; return d; -- cgit v1.2.3-18-g5258 From 85d7b94981e2e919697bc235aad7367b33c3864b Mon Sep 17 00:00:00 2001 From: Dinakar Guniguntala Date: Sat, 25 Jun 2005 14:57:34 -0700 Subject: [PATCH] Dynamic sched domains: cpuset changes Adds the core update_cpu_domains code and updated cpusets documentation Signed-off-by: Dinakar Guniguntala Acked-by: Paul Jackson Acked-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpuset.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 76 insertions(+), 13 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 79dd929f408..984c0bf3807 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -595,10 +595,62 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) return 0; } +/* + * For a given cpuset cur, partition the system as follows + * a. All cpus in the parent cpuset's cpus_allowed that are not part of any + * exclusive child cpusets + * b. All cpus in the current cpuset's cpus_allowed that are not part of any + * exclusive child cpusets + * Build these two partitions by calling partition_sched_domains + * + * Call with cpuset_sem held. May nest a call to the + * lock_cpu_hotplug()/unlock_cpu_hotplug() pair. + */ +static void update_cpu_domains(struct cpuset *cur) +{ + struct cpuset *c, *par = cur->parent; + cpumask_t pspan, cspan; + + if (par == NULL || cpus_empty(cur->cpus_allowed)) + return; + + /* + * Get all cpus from parent's cpus_allowed not part of exclusive + * children + */ + pspan = par->cpus_allowed; + list_for_each_entry(c, &par->children, sibling) { + if (is_cpu_exclusive(c)) + cpus_andnot(pspan, pspan, c->cpus_allowed); + } + if (is_removed(cur) || !is_cpu_exclusive(cur)) { + cpus_or(pspan, pspan, cur->cpus_allowed); + if (cpus_equal(pspan, cur->cpus_allowed)) + return; + cspan = CPU_MASK_NONE; + } else { + if (cpus_empty(pspan)) + return; + cspan = cur->cpus_allowed; + /* + * Get all cpus from current cpuset's cpus_allowed not part + * of exclusive children + */ + list_for_each_entry(c, &cur->children, sibling) { + if (is_cpu_exclusive(c)) + cpus_andnot(cspan, cspan, c->cpus_allowed); + } + } + + lock_cpu_hotplug(); + partition_sched_domains(&pspan, &cspan); + unlock_cpu_hotplug(); +} + static int update_cpumask(struct cpuset *cs, char *buf) { struct cpuset trialcs; - int retval; + int retval, cpus_unchanged; trialcs = *cs; retval = cpulist_parse(buf, trialcs.cpus_allowed); @@ -608,9 +660,13 @@ static int update_cpumask(struct cpuset *cs, char *buf) if (cpus_empty(trialcs.cpus_allowed)) return -ENOSPC; retval = validate_change(cs, &trialcs); - if (retval == 0) - cs->cpus_allowed = trialcs.cpus_allowed; - return retval; + if (retval < 0) + return retval; + cpus_unchanged = cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed); + cs->cpus_allowed = trialcs.cpus_allowed; + if (is_cpu_exclusive(cs) && !cpus_unchanged) + update_cpu_domains(cs); + return 0; } static int update_nodemask(struct cpuset *cs, char *buf) @@ -646,7 +702,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf) { int turning_on; struct cpuset trialcs; - int err; + int err, cpu_exclusive_changed; turning_on = (simple_strtoul(buf, NULL, 10) != 0); @@ -657,13 +713,18 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf) clear_bit(bit, &trialcs.flags); err = validate_change(cs, &trialcs); - if (err == 0) { - if (turning_on) - set_bit(bit, &cs->flags); - else - clear_bit(bit, &cs->flags); - } - return err; + if (err < 0) + return err; + cpu_exclusive_changed = + (is_cpu_exclusive(cs) != is_cpu_exclusive(&trialcs)); + if (turning_on) + set_bit(bit, &cs->flags); + else + clear_bit(bit, &cs->flags); + + if (cpu_exclusive_changed) + update_cpu_domains(cs); + return 0; } static int attach_task(struct cpuset *cs, char *buf) @@ -1309,12 +1370,14 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) up(&cpuset_sem); return -EBUSY; } - spin_lock(&cs->dentry->d_lock); parent = cs->parent; set_bit(CS_REMOVED, &cs->flags); + if (is_cpu_exclusive(cs)) + update_cpu_domains(cs); list_del(&cs->sibling); /* delete my sibling from parent->children */ if (list_empty(&parent->children)) check_for_release(parent); + spin_lock(&cs->dentry->d_lock); d = dget(cs->dentry); cs->dentry = NULL; spin_unlock(&d->d_lock); -- cgit v1.2.3-18-g5258 From d9fd8a6d443b509147280f058d4e59f0b796a323 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 27 Jul 2005 11:45:11 -0700 Subject: [PATCH] kernel/cpuset.c: add kerneldoc, fix typos Add kerneldoc to kernel/cpuset.c Fix cpuset typos in init/Kconfig Signed-off-by: Randy Dunlap Acked-by: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpuset.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 984c0bf3807..805fb909731 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1440,10 +1440,10 @@ void __init cpuset_init_smp(void) /** * cpuset_fork - attach newly forked task to its parents cpuset. - * @p: pointer to task_struct of forking parent process. + * @tsk: pointer to task_struct of forking parent process. * * Description: By default, on fork, a task inherits its - * parents cpuset. The pointer to the shared cpuset is + * parent's cpuset. The pointer to the shared cpuset is * automatically copied in fork.c by dup_task_struct(). * This cpuset_fork() routine need only increment the usage * counter in that cpuset. @@ -1471,7 +1471,6 @@ void cpuset_fork(struct task_struct *tsk) * by the cpuset_sem semaphore. If you don't hold cpuset_sem, * then a zero cpuset use count is a license to any other task to * nuke the cpuset immediately. - * **/ void cpuset_exit(struct task_struct *tsk) @@ -1521,7 +1520,9 @@ void cpuset_init_current_mems_allowed(void) current->mems_allowed = NODE_MASK_ALL; } -/* +/** + * cpuset_update_current_mems_allowed - update mems parameters to new values + * * If the current tasks cpusets mems_allowed changed behind our backs, * update current->mems_allowed and mems_generation to the new value. * Do not call this routine if in_interrupt(). @@ -1540,13 +1541,20 @@ void cpuset_update_current_mems_allowed(void) } } +/** + * cpuset_restrict_to_mems_allowed - limit nodes to current mems_allowed + * @nodes: pointer to a node bitmap that is and-ed with mems_allowed + */ void cpuset_restrict_to_mems_allowed(unsigned long *nodes) { bitmap_and(nodes, nodes, nodes_addr(current->mems_allowed), MAX_NUMNODES); } -/* +/** + * cpuset_zonelist_valid_mems_allowed - check zonelist vs. curremt mems_allowed + * @zl: the zonelist to be checked + * * Are any of the nodes on zonelist zl allowed in current->mems_allowed? */ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) @@ -1562,8 +1570,12 @@ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) return 0; } -/* - * Is 'current' valid, and is zone z allowed in current->mems_allowed? +/** + * cpuset_zone_allowed - is zone z allowed in current->mems_allowed + * @z: zone in question + * + * Is zone z allowed in current->mems_allowed, or is + * the CPU in interrupt context? (zone is always allowed in this case) */ int cpuset_zone_allowed(struct zone *z) { -- cgit v1.2.3-18-g5258 From 3077a260e9f316b611436b1506eec9cc5c4f8aa6 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Tue, 9 Aug 2005 10:07:59 -0700 Subject: [PATCH] cpuset release ABBA deadlock fix Fix possible cpuset_sem ABBA deadlock if 'notify_on_release' set. For a particular usage pattern, creating and destroying cpusets fairly frequently using notify_on_release, on a very large system, this deadlock can be seen every few days. If you are not using the cpuset notify_on_release feature, you will never see this deadlock. The existing code, on task exit (or cpuset deletion) did: get cpuset_sem if cpuset marked notify_on_release and is ready to release: compute cpuset path relative to /dev/cpuset mount point call_usermodehelper() forks /sbin/cpuset_release_agent with path drop cpuset_sem Unfortunately, the fork in call_usermodehelper can allocate memory, and allocating memory can require cpuset_sem, if the mems_generation values changed in the interim. This results in an ABBA deadlock, trying to obtain cpuset_sem when it is already held by the current task. To fix this, I put the cpuset path (which must be computed while holding cpuset_sem) in a temporary buffer, to be used in the call_usermodehelper call of /sbin/cpuset_release_agent only _after_ dropping cpuset_sem. So the new logic is: get cpuset_sem if cpuset marked notify_on_release and is ready to release: compute cpuset path relative to /dev/cpuset mount point stash path in kmalloc'd buffer drop cpuset_sem call_usermodehelper() forks /sbin/cpuset_release_agent with path free path The sharp eyed reader might notice that this patch does not contain any calls to kmalloc. The existing code in the check_for_release() routine was already kmalloc'ing a buffer to hold the cpuset path. In the old code, it just held the buffer for a few lines, over the cpuset_release_agent() call that in turn invoked call_usermodehelper(). In the new code, with the application of this patch, it returns that buffer via the new char **ppathbuf parameter, for later use and freeing in cpuset_release_agent(), which is called after cpuset_sem is dropped. Whereas the old code has just one call to cpuset_release_agent(), right in the check_for_release() routine, the new code has three calls to cpuset_release_agent(), from the various places that a cpuset can be released. This patch has been build and booted on SN2, and passed a stress test that previously hit the deadlock within a few seconds. Signed-off-by: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpuset.c | 68 ++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 20 deletions(-) (limited to 'kernel/cpuset.c') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 805fb909731..21a4e3b2cbd 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -398,21 +398,31 @@ static int cpuset_path(const struct cpuset *cs, char *buf, int buflen) * to continue to serve a useful existence. Next time it's released, * we will get notified again, if it still has 'notify_on_release' set. * - * Note final arg to call_usermodehelper() is 0 - that means - * don't wait. Since we are holding the global cpuset_sem here, - * and we are asking another thread (started from keventd) to rmdir a - * cpuset, we can't wait - or we'd deadlock with the removing thread - * on cpuset_sem. + * The final arg to call_usermodehelper() is 0, which means don't + * wait. The separate /sbin/cpuset_release_agent task is forked by + * call_usermodehelper(), then control in this thread returns here, + * without waiting for the release agent task. We don't bother to + * wait because the caller of this routine has no use for the exit + * status of the /sbin/cpuset_release_agent task, so no sense holding + * our caller up for that. + * + * The simple act of forking that task might require more memory, + * which might need cpuset_sem. So this routine must be called while + * cpuset_sem is not held, to avoid a possible deadlock. See also + * comments for check_for_release(), below. */ -static int cpuset_release_agent(char *cpuset_str) +static void cpuset_release_agent(const char *pathbuf) { char *argv[3], *envp[3]; int i; + if (!pathbuf) + return; + i = 0; argv[i++] = "/sbin/cpuset_release_agent"; - argv[i++] = cpuset_str; + argv[i++] = (char *)pathbuf; argv[i] = NULL; i = 0; @@ -421,17 +431,29 @@ static int cpuset_release_agent(char *cpuset_str) envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; envp[i] = NULL; - return call_usermodehelper(argv[0], argv, envp, 0); + call_usermodehelper(argv[0], argv, envp, 0); + kfree(pathbuf); } /* * Either cs->count of using tasks transitioned to zero, or the * cs->children list of child cpusets just became empty. If this * cs is notify_on_release() and now both the user count is zero and - * the list of children is empty, send notice to user land. + * the list of children is empty, prepare cpuset path in a kmalloc'd + * buffer, to be returned via ppathbuf, so that the caller can invoke + * cpuset_release_agent() with it later on, once cpuset_sem is dropped. + * Call here with cpuset_sem held. + * + * This check_for_release() routine is responsible for kmalloc'ing + * pathbuf. The above cpuset_release_agent() is responsible for + * kfree'ing pathbuf. The caller of these routines is responsible + * for providing a pathbuf pointer, initialized to NULL, then + * calling check_for_release() with cpuset_sem held and the address + * of the pathbuf pointer, then dropping cpuset_sem, then calling + * cpuset_release_agent() with pathbuf, as set by check_for_release(). */ -static void check_for_release(struct cpuset *cs) +static void check_for_release(struct cpuset *cs, char **ppathbuf) { if (notify_on_release(cs) && atomic_read(&cs->count) == 0 && list_empty(&cs->children)) { @@ -441,10 +463,9 @@ static void check_for_release(struct cpuset *cs) if (!buf) return; if (cpuset_path(cs, buf, PAGE_SIZE) < 0) - goto out; - cpuset_release_agent(buf); -out: - kfree(buf); + kfree(buf); + else + *ppathbuf = buf; } } @@ -727,14 +748,14 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf) return 0; } -static int attach_task(struct cpuset *cs, char *buf) +static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) { pid_t pid; struct task_struct *tsk; struct cpuset *oldcs; cpumask_t cpus; - if (sscanf(buf, "%d", &pid) != 1) + if (sscanf(pidbuf, "%d", &pid) != 1) return -EIO; if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) return -ENOSPC; @@ -777,7 +798,7 @@ static int attach_task(struct cpuset *cs, char *buf) put_task_struct(tsk); if (atomic_dec_and_test(&oldcs->count)) - check_for_release(oldcs); + check_for_release(oldcs, ppathbuf); return 0; } @@ -801,6 +822,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us struct cftype *cft = __d_cft(file->f_dentry); cpuset_filetype_t type = cft->private; char *buffer; + char *pathbuf = NULL; int retval = 0; /* Crude upper limit on largest legitimate cpulist user might write. */ @@ -841,7 +863,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer); break; case FILE_TASKLIST: - retval = attach_task(cs, buffer); + retval = attach_task(cs, buffer, &pathbuf); break; default: retval = -EINVAL; @@ -852,6 +874,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us retval = nbytes; out2: up(&cpuset_sem); + cpuset_release_agent(pathbuf); out1: kfree(buffer); return retval; @@ -1357,6 +1380,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) struct cpuset *cs = dentry->d_fsdata; struct dentry *d; struct cpuset *parent; + char *pathbuf = NULL; /* the vfs holds both inode->i_sem already */ @@ -1376,7 +1400,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) update_cpu_domains(cs); list_del(&cs->sibling); /* delete my sibling from parent->children */ if (list_empty(&parent->children)) - check_for_release(parent); + check_for_release(parent, &pathbuf); spin_lock(&cs->dentry->d_lock); d = dget(cs->dentry); cs->dentry = NULL; @@ -1384,6 +1408,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) cpuset_d_remove_dir(d); dput(d); up(&cpuset_sem); + cpuset_release_agent(pathbuf); return 0; } @@ -1483,10 +1508,13 @@ void cpuset_exit(struct task_struct *tsk) task_unlock(tsk); if (notify_on_release(cs)) { + char *pathbuf = NULL; + down(&cpuset_sem); if (atomic_dec_and_test(&cs->count)) - check_for_release(cs); + check_for_release(cs, &pathbuf); up(&cpuset_sem); + cpuset_release_agent(pathbuf); } else { atomic_dec(&cs->count); } -- cgit v1.2.3-18-g5258