diff options
Diffstat (limited to 'kernel')
36 files changed, 2574 insertions, 891 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index b782b046543..a7b16086d36 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -21,7 +21,7 @@ * * Written by Rickard E. (Rik) Faith <faith@redhat.com> * - * Goals: 1) Integrate fully with SELinux. + * Goals: 1) Integrate fully with Security Modules. * 2) Minimal run-time overhead: * a) Minimal when syscall auditing is disabled (audit_enable=0). * b) Small when syscall auditing is enabled and no audit record @@ -55,7 +55,6 @@ #include <net/netlink.h> #include <linux/skbuff.h> #include <linux/netlink.h> -#include <linux/selinux.h> #include <linux/inotify.h> #include <linux/freezer.h> #include <linux/tty.h> @@ -265,13 +264,13 @@ static int audit_log_config_change(char *function_name, int new, int old, char *ctx = NULL; u32 len; - rc = selinux_sid_to_string(sid, &ctx, &len); + rc = security_secid_to_secctx(sid, &ctx, &len); if (rc) { audit_log_format(ab, " sid=%u", sid); allow_changes = 0; /* Something weird, deny request */ } else { audit_log_format(ab, " subj=%s", ctx); - kfree(ctx); + security_release_secctx(ctx, len); } } audit_log_format(ab, " res=%d", allow_changes); @@ -550,12 +549,13 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type, audit_log_format(*ab, "user pid=%d uid=%u auid=%u", pid, uid, auid); if (sid) { - rc = selinux_sid_to_string(sid, &ctx, &len); + rc = security_secid_to_secctx(sid, &ctx, &len); if (rc) audit_log_format(*ab, " ssid=%u", sid); - else + else { audit_log_format(*ab, " subj=%s", ctx); - kfree(ctx); + security_release_secctx(ctx, len); + } } return rc; @@ -758,18 +758,18 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) break; } case AUDIT_SIGNAL_INFO: - err = selinux_sid_to_string(audit_sig_sid, &ctx, &len); + err = security_secid_to_secctx(audit_sig_sid, &ctx, &len); if (err) return err; sig_data = kmalloc(sizeof(*sig_data) + len, GFP_KERNEL); if (!sig_data) { - kfree(ctx); + security_release_secctx(ctx, len); return -ENOMEM; } sig_data->uid = audit_sig_uid; sig_data->pid = audit_sig_pid; memcpy(sig_data->ctx, ctx, len); - kfree(ctx); + security_release_secctx(ctx, len); audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_SIGNAL_INFO, 0, 0, sig_data, sizeof(*sig_data) + len); kfree(sig_data); @@ -881,10 +881,6 @@ static int __init audit_init(void) audit_enabled = audit_default; audit_ever_enabled |= !!audit_default; - /* Register the callback with selinux. This callback will be invoked - * when a new policy is loaded. */ - selinux_audit_set_callback(&selinux_audit_rule_update); - audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized"); #ifdef CONFIG_AUDITSYSCALL diff --git a/kernel/audit.h b/kernel/audit.h index 2554bd524fd..3cfc54ee3e1 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -65,34 +65,9 @@ struct audit_watch { struct list_head rules; /* associated rules */ }; -struct audit_field { - u32 type; - u32 val; - u32 op; - char *se_str; - struct selinux_audit_rule *se_rule; -}; - struct audit_tree; struct audit_chunk; -struct audit_krule { - int vers_ops; - u32 flags; - u32 listnr; - u32 action; - u32 mask[AUDIT_BITMASK_SIZE]; - u32 buflen; /* for data alloc on list rules */ - u32 field_count; - char *filterkey; /* ties events to rules */ - struct audit_field *fields; - struct audit_field *arch_f; /* quick access to arch field */ - struct audit_field *inode_f; /* quick access to an inode field */ - struct audit_watch *watch; /* associated watch */ - struct audit_tree *tree; /* associated watched tree */ - struct list_head rlist; /* entry in audit_{watch,tree}.rules list */ -}; - struct audit_entry { struct list_head list; struct rcu_head rcu; diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 2f2914b7cc3..28fef6bf853 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -28,7 +28,7 @@ #include <linux/netlink.h> #include <linux/sched.h> #include <linux/inotify.h> -#include <linux/selinux.h> +#include <linux/security.h> #include "audit.h" /* @@ -38,7 +38,7 @@ * Synchronizes writes and blocking reads of audit's filterlist * data. Rcu is used to traverse the filterlist and access * contents of structs audit_entry, audit_watch and opaque - * selinux rules during filtering. If modified, these structures + * LSM rules during filtering. If modified, these structures * must be copied and replace their counterparts in the filterlist. * An audit_parent struct is not accessed during filtering, so may * be written directly provided audit_filter_mutex is held. @@ -139,8 +139,8 @@ static inline void audit_free_rule(struct audit_entry *e) if (e->rule.fields) for (i = 0; i < e->rule.field_count; i++) { struct audit_field *f = &e->rule.fields[i]; - kfree(f->se_str); - selinux_audit_rule_free(f->se_rule); + kfree(f->lsm_str); + security_audit_rule_free(f->lsm_rule); } kfree(e->rule.fields); kfree(e->rule.filterkey); @@ -554,8 +554,8 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, f->op = data->fieldflags[i] & AUDIT_OPERATORS; f->type = data->fields[i]; f->val = data->values[i]; - f->se_str = NULL; - f->se_rule = NULL; + f->lsm_str = NULL; + f->lsm_rule = NULL; switch(f->type) { case AUDIT_PID: case AUDIT_UID: @@ -597,12 +597,12 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, goto exit_free; entry->rule.buflen += f->val; - err = selinux_audit_rule_init(f->type, f->op, str, - &f->se_rule); + err = security_audit_rule_init(f->type, f->op, str, + (void **)&f->lsm_rule); /* Keep currently invalid fields around in case they * become valid after a policy reload. */ if (err == -EINVAL) { - printk(KERN_WARNING "audit rule for selinux " + printk(KERN_WARNING "audit rule for LSM " "\'%s\' is invalid\n", str); err = 0; } @@ -610,7 +610,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, kfree(str); goto exit_free; } else - f->se_str = str; + f->lsm_str = str; break; case AUDIT_WATCH: str = audit_unpack_string(&bufp, &remain, f->val); @@ -754,7 +754,7 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule) case AUDIT_OBJ_LEV_LOW: case AUDIT_OBJ_LEV_HIGH: data->buflen += data->values[i] = - audit_pack_string(&bufp, f->se_str); + audit_pack_string(&bufp, f->lsm_str); break; case AUDIT_WATCH: data->buflen += data->values[i] = @@ -806,7 +806,7 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b) case AUDIT_OBJ_TYPE: case AUDIT_OBJ_LEV_LOW: case AUDIT_OBJ_LEV_HIGH: - if (strcmp(a->fields[i].se_str, b->fields[i].se_str)) + if (strcmp(a->fields[i].lsm_str, b->fields[i].lsm_str)) return 1; break; case AUDIT_WATCH: @@ -862,28 +862,28 @@ out: return new; } -/* Duplicate selinux field information. The se_rule is opaque, so must be +/* Duplicate LSM field information. The lsm_rule is opaque, so must be * re-initialized. */ -static inline int audit_dupe_selinux_field(struct audit_field *df, +static inline int audit_dupe_lsm_field(struct audit_field *df, struct audit_field *sf) { int ret = 0; - char *se_str; + char *lsm_str; - /* our own copy of se_str */ - se_str = kstrdup(sf->se_str, GFP_KERNEL); - if (unlikely(!se_str)) + /* our own copy of lsm_str */ + lsm_str = kstrdup(sf->lsm_str, GFP_KERNEL); + if (unlikely(!lsm_str)) return -ENOMEM; - df->se_str = se_str; + df->lsm_str = lsm_str; - /* our own (refreshed) copy of se_rule */ - ret = selinux_audit_rule_init(df->type, df->op, df->se_str, - &df->se_rule); + /* our own (refreshed) copy of lsm_rule */ + ret = security_audit_rule_init(df->type, df->op, df->lsm_str, + (void **)&df->lsm_rule); /* Keep currently invalid fields around in case they * become valid after a policy reload. */ if (ret == -EINVAL) { - printk(KERN_WARNING "audit rule for selinux \'%s\' is " - "invalid\n", df->se_str); + printk(KERN_WARNING "audit rule for LSM \'%s\' is " + "invalid\n", df->lsm_str); ret = 0; } @@ -891,7 +891,7 @@ static inline int audit_dupe_selinux_field(struct audit_field *df, } /* Duplicate an audit rule. This will be a deep copy with the exception - * of the watch - that pointer is carried over. The selinux specific fields + * of the watch - that pointer is carried over. The LSM specific fields * will be updated in the copy. The point is to be able to replace the old * rule with the new rule in the filterlist, then free the old rule. * The rlist element is undefined; list manipulations are handled apart from @@ -930,7 +930,7 @@ static struct audit_entry *audit_dupe_rule(struct audit_krule *old, new->tree = old->tree; memcpy(new->fields, old->fields, sizeof(struct audit_field) * fcount); - /* deep copy this information, updating the se_rule fields, because + /* deep copy this information, updating the lsm_rule fields, because * the originals will all be freed when the old rule is freed. */ for (i = 0; i < fcount; i++) { switch (new->fields[i].type) { @@ -944,7 +944,7 @@ static struct audit_entry *audit_dupe_rule(struct audit_krule *old, case AUDIT_OBJ_TYPE: case AUDIT_OBJ_LEV_LOW: case AUDIT_OBJ_LEV_HIGH: - err = audit_dupe_selinux_field(&new->fields[i], + err = audit_dupe_lsm_field(&new->fields[i], &old->fields[i]); break; case AUDIT_FILTERKEY: @@ -1515,11 +1515,12 @@ static void audit_log_rule_change(uid_t loginuid, u32 sid, char *action, if (sid) { char *ctx = NULL; u32 len; - if (selinux_sid_to_string(sid, &ctx, &len)) + if (security_secid_to_secctx(sid, &ctx, &len)) audit_log_format(ab, " ssid=%u", sid); - else + else { audit_log_format(ab, " subj=%s", ctx); - kfree(ctx); + security_release_secctx(ctx, len); + } } audit_log_format(ab, " op=%s rule key=", action); if (rule->filterkey) @@ -1761,38 +1762,12 @@ unlock_and_return: return result; } -/* Check to see if the rule contains any selinux fields. Returns 1 if there - are selinux fields specified in the rule, 0 otherwise. */ -static inline int audit_rule_has_selinux(struct audit_krule *rule) -{ - int i; - - for (i = 0; i < rule->field_count; i++) { - struct audit_field *f = &rule->fields[i]; - switch (f->type) { - case AUDIT_SUBJ_USER: - case AUDIT_SUBJ_ROLE: - case AUDIT_SUBJ_TYPE: - case AUDIT_SUBJ_SEN: - case AUDIT_SUBJ_CLR: - case AUDIT_OBJ_USER: - case AUDIT_OBJ_ROLE: - case AUDIT_OBJ_TYPE: - case AUDIT_OBJ_LEV_LOW: - case AUDIT_OBJ_LEV_HIGH: - return 1; - } - } - - return 0; -} - -/* This function will re-initialize the se_rule field of all applicable rules. - * It will traverse the filter lists serarching for rules that contain selinux +/* This function will re-initialize the lsm_rule field of all applicable rules. + * It will traverse the filter lists serarching for rules that contain LSM * specific filter fields. When such a rule is found, it is copied, the - * selinux field is re-initialized, and the old rule is replaced with the + * LSM field is re-initialized, and the old rule is replaced with the * updated rule. */ -int selinux_audit_rule_update(void) +int audit_update_lsm_rules(void) { struct audit_entry *entry, *n, *nentry; struct audit_watch *watch; @@ -1804,7 +1779,7 @@ int selinux_audit_rule_update(void) for (i = 0; i < AUDIT_NR_FILTERS; i++) { list_for_each_entry_safe(entry, n, &audit_filter_list[i], list) { - if (!audit_rule_has_selinux(&entry->rule)) + if (!security_audit_rule_known(&entry->rule)) continue; watch = entry->rule.watch; @@ -1815,7 +1790,7 @@ int selinux_audit_rule_update(void) * return value */ if (!err) err = PTR_ERR(nentry); - audit_panic("error updating selinux filters"); + audit_panic("error updating LSM filters"); if (watch) list_del(&entry->rule.rlist); list_del_rcu(&entry->list); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 782262e4107..56e56ed594a 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -61,7 +61,6 @@ #include <linux/security.h> #include <linux/list.h> #include <linux/tty.h> -#include <linux/selinux.h> #include <linux/binfmts.h> #include <linux/highmem.h> #include <linux/syscalls.h> @@ -528,14 +527,14 @@ static int audit_filter_rules(struct task_struct *tsk, match for now to avoid losing information that may be wanted. An error message will also be logged upon error */ - if (f->se_rule) { + if (f->lsm_rule) { if (need_sid) { - selinux_get_task_sid(tsk, &sid); + security_task_getsecid(tsk, &sid); need_sid = 0; } - result = selinux_audit_rule_match(sid, f->type, + result = security_audit_rule_match(sid, f->type, f->op, - f->se_rule, + f->lsm_rule, ctx); } break; @@ -546,18 +545,18 @@ static int audit_filter_rules(struct task_struct *tsk, case AUDIT_OBJ_LEV_HIGH: /* The above note for AUDIT_SUBJ_USER...AUDIT_SUBJ_CLR also applies here */ - if (f->se_rule) { + if (f->lsm_rule) { /* Find files that match */ if (name) { - result = selinux_audit_rule_match( + result = security_audit_rule_match( name->osid, f->type, f->op, - f->se_rule, ctx); + f->lsm_rule, ctx); } else if (ctx) { for (j = 0; j < ctx->name_count; j++) { - if (selinux_audit_rule_match( + if (security_audit_rule_match( ctx->names[j].osid, f->type, f->op, - f->se_rule, ctx)) { + f->lsm_rule, ctx)) { ++result; break; } @@ -570,7 +569,7 @@ static int audit_filter_rules(struct task_struct *tsk, aux = aux->next) { if (aux->type == AUDIT_IPC) { struct audit_aux_data_ipcctl *axi = (void *)aux; - if (selinux_audit_rule_match(axi->osid, f->type, f->op, f->se_rule, ctx)) { + if (security_audit_rule_match(axi->osid, f->type, f->op, f->lsm_rule, ctx)) { ++result; break; } @@ -885,11 +884,11 @@ void audit_log_task_context(struct audit_buffer *ab) int error; u32 sid; - selinux_get_task_sid(current, &sid); + security_task_getsecid(current, &sid); if (!sid) return; - error = selinux_sid_to_string(sid, &ctx, &len); + error = security_secid_to_secctx(sid, &ctx, &len); if (error) { if (error != -EINVAL) goto error_path; @@ -897,7 +896,7 @@ void audit_log_task_context(struct audit_buffer *ab) } audit_log_format(ab, " subj=%s", ctx); - kfree(ctx); + security_release_secctx(ctx, len); return; error_path: @@ -941,7 +940,7 @@ static int audit_log_pid_context(struct audit_context *context, pid_t pid, u32 sid, char *comm) { struct audit_buffer *ab; - char *s = NULL; + char *ctx = NULL; u32 len; int rc = 0; @@ -951,15 +950,16 @@ static int audit_log_pid_context(struct audit_context *context, pid_t pid, audit_log_format(ab, "opid=%d oauid=%d ouid=%d oses=%d", pid, auid, uid, sessionid); - if (selinux_sid_to_string(sid, &s, &len)) { + if (security_secid_to_secctx(sid, &ctx, &len)) { audit_log_format(ab, " obj=(none)"); rc = 1; - } else - audit_log_format(ab, " obj=%s", s); + } else { + audit_log_format(ab, " obj=%s", ctx); + security_release_secctx(ctx, len); + } audit_log_format(ab, " ocomm="); audit_log_untrustedstring(ab, comm); audit_log_end(ab); - kfree(s); return rc; } @@ -1271,14 +1271,15 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts if (axi->osid != 0) { char *ctx = NULL; u32 len; - if (selinux_sid_to_string( + if (security_secid_to_secctx( axi->osid, &ctx, &len)) { audit_log_format(ab, " osid=%u", axi->osid); call_panic = 1; - } else + } else { audit_log_format(ab, " obj=%s", ctx); - kfree(ctx); + security_release_secctx(ctx, len); + } } break; } @@ -1392,13 +1393,14 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts if (n->osid != 0) { char *ctx = NULL; u32 len; - if (selinux_sid_to_string( + if (security_secid_to_secctx( n->osid, &ctx, &len)) { audit_log_format(ab, " osid=%u", n->osid); call_panic = 2; - } else + } else { audit_log_format(ab, " obj=%s", ctx); - kfree(ctx); + security_release_secctx(ctx, len); + } } audit_log_end(ab); @@ -1775,7 +1777,7 @@ static void audit_copy_inode(struct audit_names *name, const struct inode *inode name->uid = inode->i_uid; name->gid = inode->i_gid; name->rdev = inode->i_rdev; - selinux_get_inode_sid(inode, &name->osid); + security_inode_getsecid(inode, &name->osid); } /** @@ -2190,8 +2192,7 @@ int __audit_ipc_obj(struct kern_ipc_perm *ipcp) ax->uid = ipcp->uid; ax->gid = ipcp->gid; ax->mode = ipcp->mode; - selinux_get_ipc_sid(ipcp, &ax->osid); - + security_ipc_getsecid(ipcp, &ax->osid); ax->d.type = AUDIT_IPC; ax->d.next = context->aux; context->aux = (void *)ax; @@ -2343,7 +2344,7 @@ void __audit_ptrace(struct task_struct *t) context->target_auid = audit_get_loginuid(t); context->target_uid = t->uid; context->target_sessionid = audit_get_sessionid(t); - selinux_get_task_sid(t, &context->target_sid); + security_task_getsecid(t, &context->target_sid); memcpy(context->target_comm, t->comm, TASK_COMM_LEN); } @@ -2371,7 +2372,7 @@ int __audit_signal_info(int sig, struct task_struct *t) audit_sig_uid = tsk->loginuid; else audit_sig_uid = tsk->uid; - selinux_get_task_sid(tsk, &audit_sig_sid); + security_task_getsecid(tsk, &audit_sig_sid); } if (!audit_signals || audit_dummy_context()) return 0; @@ -2384,7 +2385,7 @@ int __audit_signal_info(int sig, struct task_struct *t) ctx->target_auid = audit_get_loginuid(t); ctx->target_uid = t->uid; ctx->target_sessionid = audit_get_sessionid(t); - selinux_get_task_sid(t, &ctx->target_sid); + security_task_getsecid(t, &ctx->target_sid); memcpy(ctx->target_comm, t->comm, TASK_COMM_LEN); return 0; } @@ -2405,7 +2406,7 @@ int __audit_signal_info(int sig, struct task_struct *t) axp->target_auid[axp->pid_count] = audit_get_loginuid(t); axp->target_uid[axp->pid_count] = t->uid; axp->target_sessionid[axp->pid_count] = audit_get_sessionid(t); - selinux_get_task_sid(t, &axp->target_sid[axp->pid_count]); + security_task_getsecid(t, &axp->target_sid[axp->pid_count]); memcpy(axp->target_comm[axp->pid_count], t->comm, TASK_COMM_LEN); axp->pid_count++; @@ -2435,16 +2436,17 @@ void audit_core_dumps(long signr) ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND); audit_log_format(ab, "auid=%u uid=%u gid=%u ses=%u", auid, current->uid, current->gid, sessionid); - selinux_get_task_sid(current, &sid); + security_task_getsecid(current, &sid); if (sid) { char *ctx = NULL; u32 len; - if (selinux_sid_to_string(sid, &ctx, &len)) + if (security_secid_to_secctx(sid, &ctx, &len)) audit_log_format(ab, " ssid=%u", sid); - else + else { audit_log_format(ab, " subj=%s", ctx); - kfree(ctx); + security_release_secctx(ctx, len); + } } audit_log_format(ab, " pid=%d comm=", current->pid); audit_log_untrustedstring(ab, current->comm); diff --git a/kernel/compat.c b/kernel/compat.c index 9c48abfcd4a..e1ef04870c2 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -445,7 +445,7 @@ asmlinkage long compat_sys_sched_setaffinity(compat_pid_t pid, if (retval) return retval; - return sched_setaffinity(pid, new_mask); + return sched_setaffinity(pid, &new_mask); } asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid, unsigned int len, diff --git a/kernel/cpu.c b/kernel/cpu.c index 2eff3f63abe..2011ad8d269 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -232,9 +232,9 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen) /* Ensure that we are not runnable on dying cpu */ old_allowed = current->cpus_allowed; - tmp = CPU_MASK_ALL; + cpus_setall(tmp); cpu_clear(cpu, tmp); - set_cpus_allowed(current, tmp); + set_cpus_allowed_ptr(current, &tmp); p = __stop_machine_run(take_cpu_down, &tcd_param, cpu); @@ -268,7 +268,7 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen) out_thread: err = kthread_stop(p); out_allowed: - set_cpus_allowed(current, old_allowed); + set_cpus_allowed_ptr(current, &old_allowed); out_release: cpu_hotplug_done(); return err; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index a1b61f41422..8b35fbd8292 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -98,6 +98,9 @@ struct cpuset { /* partition number for rebuild_sched_domains() */ int pn; + /* for custom sched domain */ + int relax_domain_level; + /* used for walking a cpuset heirarchy */ struct list_head stack_list; }; @@ -478,6 +481,16 @@ static int cpusets_overlap(struct cpuset *a, struct cpuset *b) return cpus_intersects(a->cpus_allowed, b->cpus_allowed); } +static void +update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c) +{ + if (!dattr) + return; + if (dattr->relax_domain_level < c->relax_domain_level) + dattr->relax_domain_level = c->relax_domain_level; + return; +} + /* * rebuild_sched_domains() * @@ -553,12 +566,14 @@ static void rebuild_sched_domains(void) int csn; /* how many cpuset ptrs in csa so far */ int i, j, k; /* indices for partition finding loops */ cpumask_t *doms; /* resulting partition; i.e. sched domains */ + struct sched_domain_attr *dattr; /* attributes for custom domains */ int ndoms; /* number of sched domains in result */ int nslot; /* next empty doms[] cpumask_t slot */ q = NULL; csa = NULL; doms = NULL; + dattr = NULL; /* Special case for the 99% of systems with one, full, sched domain */ if (is_sched_load_balance(&top_cpuset)) { @@ -566,6 +581,11 @@ static void rebuild_sched_domains(void) doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL); if (!doms) goto rebuild; + dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL); + if (dattr) { + *dattr = SD_ATTR_INIT; + update_domain_attr(dattr, &top_cpuset); + } *doms = top_cpuset.cpus_allowed; goto rebuild; } @@ -622,6 +642,7 @@ restart: doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL); if (!doms) goto rebuild; + dattr = kmalloc(ndoms * sizeof(struct sched_domain_attr), GFP_KERNEL); for (nslot = 0, i = 0; i < csn; i++) { struct cpuset *a = csa[i]; @@ -644,12 +665,15 @@ restart: } cpus_clear(*dp); + if (dattr) + *(dattr + nslot) = SD_ATTR_INIT; for (j = i; j < csn; j++) { struct cpuset *b = csa[j]; if (apn == b->pn) { cpus_or(*dp, *dp, b->cpus_allowed); b->pn = -1; + update_domain_attr(dattr, b); } } nslot++; @@ -660,7 +684,7 @@ restart: rebuild: /* Have scheduler rebuild sched domains */ get_online_cpus(); - partition_sched_domains(ndoms, doms); + partition_sched_domains(ndoms, doms, dattr); put_online_cpus(); done: @@ -668,6 +692,7 @@ done: kfifo_free(q); kfree(csa); /* Don't kfree(doms) -- partition_sched_domains() does that. */ + /* Don't kfree(dattr) -- partition_sched_domains() does that. */ } static inline int started_after_time(struct task_struct *t1, @@ -729,7 +754,7 @@ int cpuset_test_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) */ void cpuset_change_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) { - set_cpus_allowed(tsk, (cgroup_cs(scan->cg))->cpus_allowed); + set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed)); } /** @@ -1011,6 +1036,21 @@ static int update_memory_pressure_enabled(struct cpuset *cs, char *buf) return 0; } +static int update_relax_domain_level(struct cpuset *cs, char *buf) +{ + int val = simple_strtol(buf, NULL, 10); + + if (val < 0) + val = -1; + + if (val != cs->relax_domain_level) { + cs->relax_domain_level = val; + rebuild_sched_domains(); + } + + return 0; +} + /* * update_flag - read a 0 or a 1 in a file and update associated flag * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, @@ -1178,7 +1218,7 @@ static void cpuset_attach(struct cgroup_subsys *ss, mutex_lock(&callback_mutex); guarantee_online_cpus(cs, &cpus); - set_cpus_allowed(tsk, cpus); + set_cpus_allowed_ptr(tsk, &cpus); mutex_unlock(&callback_mutex); from = oldcs->mems_allowed; @@ -1202,6 +1242,7 @@ typedef enum { FILE_CPU_EXCLUSIVE, FILE_MEM_EXCLUSIVE, FILE_SCHED_LOAD_BALANCE, + FILE_SCHED_RELAX_DOMAIN_LEVEL, FILE_MEMORY_PRESSURE_ENABLED, FILE_MEMORY_PRESSURE, FILE_SPREAD_PAGE, |