diff options
Diffstat (limited to 'arch/powerpc/platforms/cell/spufs/sched.c')
| -rw-r--r-- | arch/powerpc/platforms/cell/spufs/sched.c | 566 |
1 files changed, 382 insertions, 184 deletions
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 5bebe7fbe05..4a0a64fe25d 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -22,11 +22,12 @@ #undef DEBUG -#include <linux/module.h> #include <linux/errno.h> #include <linux/sched.h> +#include <linux/sched/rt.h> #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/slab.h> #include <linux/completion.h> #include <linux/vmalloc.h> #include <linux/smp.h> @@ -46,6 +47,8 @@ #include <asm/spu_csa.h> #include <asm/spu_priv1.h> #include "spufs.h" +#define CREATE_TRACE_POINTS +#include "sputrace.h" struct spu_prio_array { DECLARE_BITMAP(bitmap, MAX_PRIO); @@ -58,6 +61,7 @@ static unsigned long spu_avenrun[3]; static struct spu_prio_array *spu_prio; static struct task_struct *spusched_task; static struct timer_list spusched_timer; +static struct timer_list spuloadavg_timer; /* * Priority of a normal, non-rt, non-niced'd process (aka nice level 0). @@ -79,7 +83,6 @@ static struct timer_list spusched_timer; #define MIN_SPU_TIMESLICE max(5 * HZ / (1000 * SPUSCHED_TICK), 1) #define DEF_SPU_TIMESLICE (100 * HZ / (1000 * SPUSCHED_TICK)) -#define MAX_USER_PRIO (MAX_PRIO - MAX_RT_PRIO) #define SCALE_PRIO(x, prio) \ max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_SPU_TIMESLICE) @@ -105,15 +108,21 @@ void spu_set_timeslice(struct spu_context *ctx) void __spu_update_sched_info(struct spu_context *ctx) { /* - * 32-Bit assignment are atomic on powerpc, and we don't care about - * memory ordering here because retriving the controlling thread is - * per defintion racy. + * assert that the context is not on the runqueue, so it is safe + * to change its scheduling parameters. + */ + BUG_ON(!list_empty(&ctx->rq)); + + /* + * 32-Bit assignments are atomic on powerpc, and we don't care about + * memory ordering here because retrieving the controlling thread is + * per definition racy. */ ctx->tid = current->pid; /* * We do our own priority calculations, so we normally want - * ->static_prio to start with. Unfortunately thies field + * ->static_prio to start with. Unfortunately this field * contains junk for threads with a realtime scheduling * policy so we have to look at ->prio in this case. */ @@ -124,31 +133,43 @@ void __spu_update_sched_info(struct spu_context *ctx) ctx->policy = current->policy; /* - * A lot of places that don't hold list_mutex poke into - * cpus_allowed, including grab_runnable_context which - * already holds the runq_lock. So abuse runq_lock - * to protect this field aswell. + * TO DO: the context may be loaded, so we may need to activate + * it again on a different node. But it shouldn't hurt anything + * to update its parameters, because we know that the scheduler + * is not actively looking at this field, since it is not on the + * runqueue. The context will be rescheduled on the proper node + * if it is timesliced or preempted. */ - spin_lock(&spu_prio->runq_lock); - ctx->cpus_allowed = current->cpus_allowed; - spin_unlock(&spu_prio->runq_lock); + cpumask_copy(&ctx->cpus_allowed, tsk_cpus_allowed(current)); + + /* Save the current cpu id for spu interrupt routing. */ + ctx->last_ran = raw_smp_processor_id(); } void spu_update_sched_info(struct spu_context *ctx) { - int node = ctx->spu->node; + int node; - mutex_lock(&cbe_spu_info[node].list_mutex); - __spu_update_sched_info(ctx); - mutex_unlock(&cbe_spu_info[node].list_mutex); + if (ctx->state == SPU_STATE_RUNNABLE) { + node = ctx->spu->node; + + /* + * Take list_mutex to sync with find_victim(). + */ + mutex_lock(&cbe_spu_info[node].list_mutex); + __spu_update_sched_info(ctx); + mutex_unlock(&cbe_spu_info[node].list_mutex); + } else { + __spu_update_sched_info(ctx); + } } static int __node_allowed(struct spu_context *ctx, int node) { if (nr_cpus_node(node)) { - cpumask_t mask = node_to_cpumask(node); + const struct cpumask *mask = cpumask_of_node(node); - if (cpus_intersects(mask, ctx->cpus_allowed)) + if (cpumask_intersects(mask, &ctx->cpus_allowed)) return 1; } @@ -166,15 +187,7 @@ static int node_allowed(struct spu_context *ctx, int node) return rval; } -static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier); - -void spu_switch_notify(struct spu *spu, struct spu_context *ctx) -{ - blocking_notifier_call_chain(&spu_switch_notifier, - ctx ? ctx->object_id : 0, spu); -} - -static void notify_spus_active(void) +void do_notify_spus_active(void) { int node; @@ -182,7 +195,7 @@ static void notify_spus_active(void) * Wake up the active spu_contexts. * * When the awakened processes see their "notify_active" flag is set, - * they will call spu_switch_notify(); + * they will call spu_switch_notify(). */ for_each_online_node(node) { struct spu *spu; @@ -201,22 +214,6 @@ static void notify_spus_active(void) } } -int spu_switch_event_register(struct notifier_block * n) -{ - int ret; - ret = blocking_notifier_chain_register(&spu_switch_notifier, n); - if (!ret) - notify_spus_active(); - return ret; -} -EXPORT_SYMBOL_GPL(spu_switch_event_register); - -int spu_switch_event_unregister(struct notifier_block * n) -{ - return blocking_notifier_chain_unregister(&spu_switch_notifier, n); -} -EXPORT_SYMBOL_GPL(spu_switch_event_unregister); - /** * spu_bind_context - bind spu context to physical spu * @spu: physical spu to bind to @@ -224,39 +221,40 @@ EXPORT_SYMBOL_GPL(spu_switch_event_unregister); */ static void spu_bind_context(struct spu *spu, struct spu_context *ctx) { - pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid, - spu->number, spu->node); + spu_context_trace(spu_bind_context__enter, ctx, spu); + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); if (ctx->flags & SPU_CREATE_NOSCHED) atomic_inc(&cbe_spu_info[spu->node].reserved_spus); - if (!list_empty(&ctx->aff_list)) - atomic_inc(&ctx->gang->aff_sched_count); ctx->stats.slb_flt_base = spu->stats.slb_flt; ctx->stats.class2_intr_base = spu->stats.class2_intr; + spu_associate_mm(spu, ctx->owner); + + spin_lock_irq(&spu->register_lock); spu->ctx = ctx; spu->flags = 0; ctx->spu = spu; ctx->ops = &spu_hw_ops; spu->pid = current->pid; spu->tgid = current->tgid; - spu_associate_mm(spu, ctx->owner); spu->ibox_callback = spufs_ibox_callback; spu->wbox_callback = spufs_wbox_callback; spu->stop_callback = spufs_stop_callback; spu->mfc_callback = spufs_mfc_callback; - spu->dma_callback = spufs_dma_callback; - mb(); + spin_unlock_irq(&spu->register_lock); + spu_unmap_mappings(ctx); + + spu_switch_log_notify(spu, ctx, SWITCH_LOG_START, 0); spu_restore(&ctx->csa, spu); spu->timestamp = jiffies; - spu_cpu_affinity_set(spu, raw_smp_processor_id()); spu_switch_notify(spu, ctx); ctx->state = SPU_STATE_RUNNABLE; - spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED); + spuctx_switch_state(ctx, SPU_UTIL_USER); } /* @@ -315,11 +313,35 @@ static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff, */ node = cpu_to_node(raw_smp_processor_id()); for (n = 0; n < MAX_NUMNODES; n++, node++) { + /* + * "available_spus" counts how many spus are not potentially + * going to be used by other affinity gangs whose reference + * context is already in place. Although this code seeks to + * avoid having affinity gangs with a summed amount of + * contexts bigger than the amount of spus in the node, + * this may happen sporadically. In this case, available_spus + * becomes negative, which is harmless. + */ + int available_spus; + node = (node < MAX_NUMNODES) ? node : 0; if (!node_allowed(ctx, node)) continue; + + available_spus = 0; mutex_lock(&cbe_spu_info[node].list_mutex); list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + if (spu->ctx && spu->ctx->gang && !spu->ctx->aff_offset + && spu->ctx->gang->aff_ref_spu) + available_spus -= spu->ctx->gang->contexts; + available_spus++; + } + if (available_spus < ctx->gang->contexts) { + mutex_unlock(&cbe_spu_info[node].list_mutex); + continue; + } + + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { if ((!mem_aff || spu->has_mem_affinity) && sched_spu(spu)) { mutex_unlock(&cbe_spu_info[node].list_mutex); @@ -392,7 +414,9 @@ static int has_affinity(struct spu_context *ctx) if (list_empty(&ctx->aff_list)) return 0; - mutex_lock(&gang->aff_mutex); + if (atomic_read(&ctx->gang->aff_sched_count) == 0) + ctx->gang->aff_ref_spu = NULL; + if (!gang->aff_ref_spu) { if (!(gang->aff_flags & AFF_MERGED)) aff_merge_remaining_ctxs(gang); @@ -400,7 +424,6 @@ static int has_affinity(struct spu_context *ctx) aff_set_offsets(gang); aff_set_ref_point_location(gang); } - mutex_unlock(&gang->aff_mutex); return gang->aff_ref_spu != NULL; } @@ -412,31 +435,43 @@ static int has_affinity(struct spu_context *ctx) */ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx) { - pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__, - spu->pid, spu->number, spu->node); + u32 status; + + spu_context_trace(spu_unbind_context__enter, ctx, spu); + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); if (spu->ctx->flags & SPU_CREATE_NOSCHED) atomic_dec(&cbe_spu_info[spu->node].reserved_spus); - if (!list_empty(&ctx->aff_list)) - if (atomic_dec_and_test(&ctx->gang->aff_sched_count)) - ctx->gang->aff_ref_spu = NULL; + + if (ctx->gang) + /* + * If ctx->gang->aff_sched_count is positive, SPU affinity is + * being considered in this gang. Using atomic_dec_if_positive + * allow us to skip an explicit check for affinity in this gang + */ + atomic_dec_if_positive(&ctx->gang->aff_sched_count); + spu_switch_notify(spu, NULL); spu_unmap_mappings(ctx); spu_save(&ctx->csa, spu); + spu_switch_log_notify(spu, ctx, SWITCH_LOG_STOP, 0); + + spin_lock_irq(&spu->register_lock); spu->timestamp = jiffies; ctx->state = SPU_STATE_SAVED; spu->ibox_callback = NULL; spu->wbox_callback = NULL; spu->stop_callback = NULL; spu->mfc_callback = NULL; - spu->dma_callback = NULL; - spu_associate_mm(spu, NULL); spu->pid = 0; spu->tgid = 0; ctx->ops = &spu_backing_ops; spu->flags = 0; spu->ctx = NULL; + spin_unlock_irq(&spu->register_lock); + + spu_associate_mm(spu, NULL); ctx->stats.slb_flt += (spu->stats.slb_flt - ctx->stats.slb_flt_base); @@ -446,6 +481,9 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx) /* This maps the underlying spu state to idle */ spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED); ctx->spu = NULL; + + if (spu_stopped(ctx, &status)) + wake_up_all(&ctx->stop_wq); } /** @@ -471,10 +509,17 @@ static void __spu_add_to_rq(struct spu_context *ctx) list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]); set_bit(ctx->prio, spu_prio->bitmap); if (!spu_prio->nr_waiting++) - __mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK); + mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK); } } +static void spu_add_to_rq(struct spu_context *ctx) +{ + spin_lock(&spu_prio->runq_lock); + __spu_add_to_rq(ctx); + spin_unlock(&spu_prio->runq_lock); +} + static void __spu_del_from_rq(struct spu_context *ctx) { int prio = ctx->prio; @@ -489,10 +534,24 @@ static void __spu_del_from_rq(struct spu_context *ctx) } } +void spu_del_from_rq(struct spu_context *ctx) +{ + spin_lock(&spu_prio->runq_lock); + __spu_del_from_rq(ctx); + spin_unlock(&spu_prio->runq_lock); +} + static void spu_prio_wait(struct spu_context *ctx) { DEFINE_WAIT(wait); + /* + * The caller must explicitly wait for a context to be loaded + * if the nosched flag is set. If NOSCHED is not set, the caller + * queues the context and waits for an spu event or error. + */ + BUG_ON(!(ctx->flags & SPU_CREATE_NOSCHED)); + spin_lock(&spu_prio->runq_lock); prepare_to_wait_exclusive(&ctx->stop_wq, &wait, TASK_INTERRUPTIBLE); if (!signal_pending(current)) { @@ -511,20 +570,30 @@ static void spu_prio_wait(struct spu_context *ctx) static struct spu *spu_get_idle(struct spu_context *ctx) { - struct spu *spu; + struct spu *spu, *aff_ref_spu; int node, n; - if (has_affinity(ctx)) { - node = ctx->gang->aff_ref_spu->node; + spu_context_nospu_trace(spu_get_idle__enter, ctx); - mutex_lock(&cbe_spu_info[node].list_mutex); - spu = ctx_location(ctx->gang->aff_ref_spu, ctx->aff_offset, node); - if (spu && spu->alloc_state == SPU_FREE) - goto found; - mutex_unlock(&cbe_spu_info[node].list_mutex); - return NULL; - } + if (ctx->gang) { + mutex_lock(&ctx->gang->aff_mutex); + if (has_affinity(ctx)) { + aff_ref_spu = ctx->gang->aff_ref_spu; + atomic_inc(&ctx->gang->aff_sched_count); + mutex_unlock(&ctx->gang->aff_mutex); + node = aff_ref_spu->node; + mutex_lock(&cbe_spu_info[node].list_mutex); + spu = ctx_location(aff_ref_spu, ctx->aff_offset, node); + if (spu && spu->alloc_state == SPU_FREE) + goto found; + mutex_unlock(&cbe_spu_info[node].list_mutex); + + atomic_dec(&ctx->gang->aff_sched_count); + goto not_found; + } + mutex_unlock(&ctx->gang->aff_mutex); + } node = cpu_to_node(raw_smp_processor_id()); for (n = 0; n < MAX_NUMNODES; n++, node++) { node = (node < MAX_NUMNODES) ? node : 0; @@ -539,12 +608,14 @@ static struct spu *spu_get_idle(struct spu_context *ctx) mutex_unlock(&cbe_spu_info[node].list_mutex); } + not_found: + spu_context_nospu_trace(spu_get_idle__not_found, ctx); return NULL; found: spu->alloc_state = SPU_USED; mutex_unlock(&cbe_spu_info[node].list_mutex); - pr_debug("Got SPU %d %d\n", spu->number, spu->node); + spu_context_trace(spu_get_idle__found, ctx, spu); spu_init_channels(spu); return spu; } @@ -561,10 +632,12 @@ static struct spu *find_victim(struct spu_context *ctx) struct spu *spu; int node, n; + spu_context_nospu_trace(spu_find_victim__enter, ctx); + /* * Look for a possible preemption candidate on the local node first. * If there is no candidate look at the other nodes. This isn't - * exactly fair, but so far the whole spu schedule tries to keep + * exactly fair, but so far the whole spu scheduler tries to keep * a strong node affinity. We might want to fine-tune this in * the future. */ @@ -580,9 +653,13 @@ static struct spu *find_victim(struct spu_context *ctx) struct spu_context *tmp = spu->ctx; if (tmp && tmp->prio > ctx->prio && - (!victim || tmp->prio > victim->prio)) + !(tmp->flags & SPU_CREATE_NOSCHED) && + (!victim || tmp->prio > victim->prio)) { victim = spu->ctx; + } } + if (victim) + get_spu_context(victim); mutex_unlock(&cbe_spu_info[node].list_mutex); if (victim) { @@ -591,24 +668,32 @@ static struct spu *find_victim(struct spu_context *ctx) * higher priority contexts before lower priority * ones, so this is safe until we introduce * priority inheritance schemes. + * + * XXX if the highest priority context is locked, + * this can loop a long time. Might be better to + * look at another context or give up after X retries. */ if (!mutex_trylock(&victim->state_mutex)) { + put_spu_context(victim); victim = NULL; goto restart; } spu = victim->spu; - if (!spu) { + if (!spu || victim->prio <= ctx->prio) { /* * This race can happen because we've dropped - * the active list mutex. No a problem, just + * the active list mutex. Not a problem, just * restart the search. */ mutex_unlock(&victim->state_mutex); + put_spu_context(victim); victim = NULL; goto restart; } + spu_context_trace(__spu_deactivate__unload, ctx, spu); + mutex_lock(&cbe_spu_info[node].list_mutex); cbe_spu_info[node].nr_active--; spu_unbind_context(spu, victim); @@ -616,13 +701,12 @@ static struct spu *find_victim(struct spu_context *ctx) victim->stats.invol_ctx_switch++; spu->stats.invol_ctx_switch++; + if (test_bit(SPU_SCHED_SPU_RUN, &victim->sched_flags)) + spu_add_to_rq(victim); + mutex_unlock(&victim->state_mutex); - /* - * We need to break out of the wait loop in spu_run - * manually to ensure this context gets put on the - * runqueue again ASAP. - */ - wake_up(&victim->stop_wq); + put_spu_context(victim); + return spu; } } @@ -630,6 +714,66 @@ static struct spu *find_victim(struct spu_context *ctx) return NULL; } +static void __spu_schedule(struct spu *spu, struct spu_context *ctx) +{ + int node = spu->node; + int success = 0; + + spu_set_timeslice(ctx); + + mutex_lock(&cbe_spu_info[node].list_mutex); + if (spu->ctx == NULL) { + spu_bind_context(spu, ctx); + cbe_spu_info[node].nr_active++; + spu->alloc_state = SPU_USED; + success = 1; + } + mutex_unlock(&cbe_spu_info[node].list_mutex); + + if (success) + wake_up_all(&ctx->run_wq); + else + spu_add_to_rq(ctx); +} + +static void spu_schedule(struct spu *spu, struct spu_context *ctx) +{ + /* not a candidate for interruptible because it's called either + from the scheduler thread or from spu_deactivate */ + mutex_lock(&ctx->state_mutex); + if (ctx->state == SPU_STATE_SAVED) + __spu_schedule(spu, ctx); + spu_release(ctx); +} + +/** + * spu_unschedule - remove a context from a spu, and possibly release it. + * @spu: The SPU to unschedule from + * @ctx: The context currently scheduled on the SPU + * @free_spu Whether to free the SPU for other contexts + * + * Unbinds the context @ctx from the SPU @spu. If @free_spu is non-zero, the + * SPU is made available for other contexts (ie, may be returned by + * spu_get_idle). If this is zero, the caller is expected to schedule another + * context to this spu. + * + * Should be called with ctx->state_mutex held. + */ +static void spu_unschedule(struct spu *spu, struct spu_context *ctx, + int free_spu) +{ + int node = spu->node; + + mutex_lock(&cbe_spu_info[node].list_mutex); + cbe_spu_info[node].nr_active--; + if (free_spu) + spu->alloc_state = SPU_FREE; + spu_unbind_context(spu, ctx); + ctx->stats.invol_ctx_switch++; + spu->stats.invol_ctx_switch++; + mutex_unlock(&cbe_spu_info[node].list_mutex); +} + /** * spu_activate - find a free spu for a context and execute it * @ctx: spu context to schedule @@ -641,39 +785,47 @@ static struct spu *find_victim(struct spu_context *ctx) */ int spu_activate(struct spu_context *ctx, unsigned long flags) { - do { - struct spu *spu; + struct spu *spu; - /* - * If there are multiple threads waiting for a single context - * only one actually binds the context while the others will - * only be able to acquire the state_mutex once the context - * already is in runnable state. - */ - if (ctx->spu) - return 0; + /* + * If there are multiple threads waiting for a single context + * only one actually binds the context while the others will + * only be able to acquire the state_mutex once the context + * already is in runnable state. + */ + if (ctx->spu) + return 0; - spu = spu_get_idle(ctx); - /* - * If this is a realtime thread we try to get it running by - * preempting a lower priority thread. - */ - if (!spu && rt_prio(ctx->prio)) - spu = find_victim(ctx); - if (spu) { - int node = spu->node; +spu_activate_top: + if (signal_pending(current)) + return -ERESTARTSYS; - mutex_lock(&cbe_spu_info[node].list_mutex); - spu_bind_context(spu, ctx); - cbe_spu_info[node].nr_active++; - mutex_unlock(&cbe_spu_info[node].list_mutex); - return 0; - } + spu = spu_get_idle(ctx); + /* + * If this is a realtime thread we try to get it running by + * preempting a lower priority thread. + */ + if (!spu && rt_prio(ctx->prio)) + spu = find_victim(ctx); + if (spu) { + unsigned long runcntl; + + runcntl = ctx->ops->runcntl_read(ctx); + __spu_schedule(spu, ctx); + if (runcntl & SPU_RUNCNTL_RUNNABLE) + spuctx_switch_state(ctx, SPU_UTIL_USER); + return 0; + } + + if (ctx->flags & SPU_CREATE_NOSCHED) { spu_prio_wait(ctx); - } while (!signal_pending(current)); + goto spu_activate_top; + } + + spu_add_to_rq(ctx); - return -ERESTARTSYS; + return 0; } /** @@ -693,7 +845,7 @@ static struct spu_context *grab_runnable_context(int prio, int node) struct list_head *rq = &spu_prio->runq[best]; list_for_each_entry(ctx, rq, rq) { - /* XXX(hch): check for affinity here aswell */ + /* XXX(hch): check for affinity here as well */ if (__node_allowed(ctx, node)) { __spu_del_from_rq(ctx); goto found; @@ -715,21 +867,19 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio) if (spu) { new = grab_runnable_context(max_prio, spu->node); if (new || force) { - int node = spu->node; - - mutex_lock(&cbe_spu_info[node].list_mutex); - spu_unbind_context(spu, ctx); - spu->alloc_state = SPU_FREE; - cbe_spu_info[node].nr_active--; - mutex_unlock(&cbe_spu_info[node].list_mutex); - - ctx->stats.vol_ctx_switch++; - spu->stats.vol_ctx_switch++; - - if (new) - wake_up(&new->stop_wq); + spu_unschedule(spu, ctx, new == NULL); + if (new) { + if (new->flags & SPU_CREATE_NOSCHED) + wake_up(&new->stop_wq); + else { + spu_release(ctx); + spu_schedule(spu, new); + /* this one can't easily be made + interruptible */ + mutex_lock(&ctx->state_mutex); + } + } } - } return new != NULL; @@ -744,6 +894,7 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio) */ void spu_deactivate(struct spu_context *ctx) { + spu_context_nospu_trace(spu_deactivate__enter, ctx); __spu_deactivate(ctx, 1, MAX_PRIO); } @@ -757,6 +908,7 @@ void spu_deactivate(struct spu_context *ctx) */ void spu_yield(struct spu_context *ctx) { + spu_context_nospu_trace(spu_yield__enter, ctx); if (!(ctx->flags & SPU_CREATE_NOSCHED)) { mutex_lock(&ctx->state_mutex); __spu_deactivate(ctx, 0, MAX_PRIO); @@ -766,43 +918,41 @@ void spu_yield(struct spu_context *ctx) static noinline void spusched_tick(struct spu_context *ctx) { + struct spu_context *new = NULL; + struct spu *spu = NULL; + + if (spu_acquire(ctx)) + BUG(); /* a kernel thread never has signals pending */ + + if (ctx->state != SPU_STATE_RUNNABLE) + goto out; if (ctx->flags & SPU_CREATE_NOSCHED) - return; + goto out; if (ctx->policy == SCHED_FIFO) - return; + goto out; - if (--ctx->time_slice) - return; + if (--ctx->time_slice && test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags)) + goto out; - /* - * Unfortunately list_mutex ranks outside of state_mutex, so - * we have to trylock here. If we fail give the context another - * tick and try again. - */ - if (mutex_trylock(&ctx->state_mutex)) { - struct spu *spu = ctx->spu; - struct spu_context *new; - - new = grab_runnable_context(ctx->prio + 1, spu->node); - if (new) { - spu_unbind_context(spu, ctx); - ctx->stats.invol_ctx_switch++; - spu->stats.invol_ctx_switch++; - spu->alloc_state = SPU_FREE; - cbe_spu_info[spu->node].nr_active--; - wake_up(&new->stop_wq); - /* - * We need to break out of the wait loop in - * spu_run manually to ensure this context - * gets put on the runqueue again ASAP. - */ - wake_up(&ctx->stop_wq); - } - spu_set_timeslice(ctx); - mutex_unlock(&ctx->state_mutex); + spu = ctx->spu; + + spu_context_trace(spusched_tick__preempt, ctx, spu); + + new = grab_runnable_context(ctx->prio + 1, spu->node); + if (new) { + spu_unschedule(spu, ctx, 0); + if (test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags)) + spu_add_to_rq(ctx); } else { - ctx->time_slice++; + spu_context_nospu_trace(spusched_tick__newslice, ctx); + if (!ctx->time_slice) + ctx->time_slice++; } +out: + spu_release(ctx); + + if (new) + spu_schedule(spu, new); } /** @@ -826,35 +976,31 @@ static unsigned long count_active_contexts(void) } /** - * spu_calc_load - given tick count, update the avenrun load estimates. - * @tick: tick count + * spu_calc_load - update the avenrun load estimates. * * No locking against reading these values from userspace, as for * the CPU loadavg code. */ -static void spu_calc_load(unsigned long ticks) +static void spu_calc_load(void) { unsigned long active_tasks; /* fixed-point */ - static int count = LOAD_FREQ; - - count -= ticks; - - if (unlikely(count < 0)) { - active_tasks = count_active_contexts() * FIXED_1; - do { - CALC_LOAD(spu_avenrun[0], EXP_1, active_tasks); - CALC_LOAD(spu_avenrun[1], EXP_5, active_tasks); - CALC_LOAD(spu_avenrun[2], EXP_15, active_tasks); - count += LOAD_FREQ; - } while (count < 0); - } + + active_tasks = count_active_contexts() * FIXED_1; + CALC_LOAD(spu_avenrun[0], EXP_1, active_tasks); + CALC_LOAD(spu_avenrun[1], EXP_5, active_tasks); + CALC_LOAD(spu_avenrun[2], EXP_15, active_tasks); } static void spusched_wake(unsigned long data) { mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK); wake_up_process(spusched_task); - spu_calc_load(SPUSCHED_TICK); +} + +static void spuloadavg_wake(unsigned long data) +{ + mod_timer(&spuloadavg_timer, jiffies + LOAD_FREQ); + spu_calc_load(); } static int spusched_thread(void *unused) @@ -866,17 +1012,66 @@ static int spusched_thread(void *unused) set_current_state(TASK_INTERRUPTIBLE); schedule(); for (node = 0; node < MAX_NUMNODES; node++) { - mutex_lock(&cbe_spu_info[node].list_mutex); - list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) - if (spu->ctx) - spusched_tick(spu->ctx); - mutex_unlock(&cbe_spu_info[node].list_mutex); + struct mutex *mtx = &cbe_spu_info[node].list_mutex; + + mutex_lock(mtx); + list_for_each_entry(spu, &cbe_spu_info[node].spus, + cbe_list) { + struct spu_context *ctx = spu->ctx; + + if (ctx) { + get_spu_context(ctx); + mutex_unlock(mtx); + spusched_tick(ctx); + mutex_lock(mtx); + put_spu_context(ctx); + } + } + mutex_unlock(mtx); } } return 0; } +void spuctx_switch_state(struct spu_context *ctx, + enum spu_utilization_state new_state) +{ + unsigned long long curtime; + signed long long delta; + struct timespec ts; + struct spu *spu; + enum spu_utilization_state old_state; + int node; + + ktime_get_ts(&ts); + curtime = timespec_to_ns(&ts); + delta = curtime - ctx->stats.tstamp; + + WARN_ON(!mutex_is_locked(&ctx->state_mutex)); + WARN_ON(delta < 0); + + spu = ctx->spu; + old_state = ctx->stats.util_state; + ctx->stats.util_state = new_state; + ctx->stats.tstamp = curtime; + + /* + * Update the physical SPU utilization statistics. + */ + if (spu) { + ctx->stats.times[old_state] += delta; + spu->stats.times[old_state] += delta; + spu->stats.util_state = new_state; + spu->stats.tstamp = curtime; + node = spu->node; + if (old_state == SPU_UTIL_USER) + atomic_dec(&cbe_spu_info[node].busy_spus); + if (new_state == SPU_UTIL_USER) + atomic_inc(&cbe_spu_info[node].busy_spus); + } +} + #define LOAD_INT(x) ((x) >> FSHIFT) #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) @@ -890,7 +1085,7 @@ static int show_spu_loadavg(struct seq_file *s, void *private) /* * Note that last_pid doesn't really make much sense for the - * SPU loadavg (it even seems very odd on the CPU side..), + * SPU loadavg (it even seems very odd on the CPU side...), * but we include it here to have a 100% compatible interface. */ seq_printf(s, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n", @@ -899,7 +1094,7 @@ static int show_spu_loadavg(struct seq_file *s, void *private) LOAD_INT(c), LOAD_FRAC(c), count_active_contexts(), atomic_read(&nr_spu_contexts), - current->nsproxy->pid_ns->last_pid); + task_active_pid_ns(current)->last_pid); return 0; } @@ -931,6 +1126,7 @@ int __init spu_sched_init(void) spin_lock_init(&spu_prio->runq_lock); setup_timer(&spusched_timer, spusched_wake, 0); + setup_timer(&spuloadavg_timer, spuloadavg_wake, 0); spusched_task = kthread_run(spusched_thread, NULL, "spusched"); if (IS_ERR(spusched_task)) { @@ -938,10 +1134,11 @@ int __init spu_sched_init(void) goto out_free_spu_prio; } - entry = create_proc_entry("spu_loadavg", 0, NULL); + mod_timer(&spuloadavg_timer, 0); + + entry = proc_create("spu_loadavg", 0, NULL, &spu_loadavg_fops); if (!entry) goto out_stop_kthread; - entry->proc_fops = &spu_loadavg_fops; pr_debug("spusched: tick: %d, min ticks: %d, default ticks: %d\n", SPUSCHED_TICK, MIN_SPU_TIMESLICE, DEF_SPU_TIMESLICE); @@ -963,6 +1160,7 @@ void spu_sched_exit(void) remove_proc_entry("spu_loadavg", NULL); del_timer_sync(&spusched_timer); + del_timer_sync(&spuloadavg_timer); kthread_stop(spusched_task); for (node = 0; node < MAX_NUMNODES; node++) { |
