aboutsummaryrefslogtreecommitdiff
path: root/drivers/misc/sgi-gru/grukservices.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/sgi-gru/grukservices.c')
-rw-r--r--drivers/misc/sgi-gru/grukservices.c699
1 files changed, 575 insertions, 124 deletions
diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c
index d8bd7d84a7c..913de07e577 100644
--- a/drivers/misc/sgi-gru/grukservices.c
+++ b/drivers/misc/sgi-gru/grukservices.c
@@ -24,13 +24,15 @@
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/mm.h>
-#include <linux/smp_lock.h>
#include <linux/spinlock.h>
#include <linux/device.h>
#include <linux/miscdevice.h>
#include <linux/proc_fs.h>
#include <linux/interrupt.h>
#include <linux/uaccess.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <asm/io_apic.h>
#include "gru.h"
#include "grulib.h"
#include "grutables.h"
@@ -45,18 +47,63 @@
* resources. This will likely be replaced when we better understand the
* kernel/user requirements.
*
- * At boot time, the kernel permanently reserves a fixed number of
- * CBRs/DSRs for each cpu to use. The resources are all taken from
- * the GRU chiplet 1 on the blade. This leaves the full set of resources
- * of chiplet 0 available to be allocated to a single user.
+ * Blade percpu resources reserved for kernel use. These resources are
+ * reserved whenever the the kernel context for the blade is loaded. Note
+ * that the kernel context is not guaranteed to be always available. It is
+ * loaded on demand & can be stolen by a user if the user demand exceeds the
+ * kernel demand. The kernel can always reload the kernel context but
+ * a SLEEP may be required!!!.
+ *
+ * Async Overview:
+ *
+ * Each blade has one "kernel context" that owns GRU kernel resources
+ * located on the blade. Kernel drivers use GRU resources in this context
+ * for sending messages, zeroing memory, etc.
+ *
+ * The kernel context is dynamically loaded on demand. If it is not in
+ * use by the kernel, the kernel context can be unloaded & given to a user.
+ * The kernel context will be reloaded when needed. This may require that
+ * a context be stolen from a user.
+ * NOTE: frequent unloading/reloading of the kernel context is
+ * expensive. We are depending on batch schedulers, cpusets, sane
+ * drivers or some other mechanism to prevent the need for frequent
+ * stealing/reloading.
+ *
+ * The kernel context consists of two parts:
+ * - 1 CB & a few DSRs that are reserved for each cpu on the blade.
+ * Each cpu has it's own private resources & does not share them
+ * with other cpus. These resources are used serially, ie,
+ * locked, used & unlocked on each call to a function in
+ * grukservices.
+ * (Now that we have dynamic loading of kernel contexts, I
+ * may rethink this & allow sharing between cpus....)
+ *
+ * - Additional resources can be reserved long term & used directly
+ * by UV drivers located in the kernel. Drivers using these GRU
+ * resources can use asynchronous GRU instructions that send
+ * interrupts on completion.
+ * - these resources must be explicitly locked/unlocked
+ * - locked resources prevent (obviously) the kernel
+ * context from being unloaded.
+ * - drivers using these resource directly issue their own
+ * GRU instruction and must wait/check completion.
+ *
+ * When these resources are reserved, the caller can optionally
+ * associate a wait_queue with the resources and use asynchronous
+ * GRU instructions. When an async GRU instruction completes, the
+ * driver will do a wakeup on the event.
+ *
*/
-/* Blade percpu resources PERMANENTLY reserved for kernel use */
+
+#define ASYNC_HAN_TO_BID(h) ((h) - 1)
+#define ASYNC_BID_TO_HAN(b) ((b) + 1)
+#define ASYNC_HAN_TO_BS(h) gru_base[ASYNC_HAN_TO_BID(h)]
+
#define GRU_NUM_KERNEL_CBR 1
#define GRU_NUM_KERNEL_DSR_BYTES 256
#define GRU_NUM_KERNEL_DSR_CL (GRU_NUM_KERNEL_DSR_BYTES / \
GRU_CACHE_LINE_BYTES)
-#define KERNEL_CTXNUM 15
/* GRU instruction attributes for all instructions */
#define IMA IMA_CB_DELAY
@@ -98,6 +145,120 @@ struct message_header {
#define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h]))
+/*
+ * Reload the blade's kernel context into a GRU chiplet. Called holding
+ * the bs_kgts_sema for READ. Will steal user contexts if necessary.
+ */
+static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id)
+{
+ struct gru_state *gru;
+ struct gru_thread_state *kgts;
+ void *vaddr;
+ int ctxnum, ncpus;
+
+ up_read(&bs->bs_kgts_sema);
+ down_write(&bs->bs_kgts_sema);
+
+ if (!bs->bs_kgts) {
+ bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0);
+ bs->bs_kgts->ts_user_blade_id = blade_id;
+ }
+ kgts = bs->bs_kgts;
+
+ if (!kgts->ts_gru) {
+ STAT(load_kernel_context);
+ ncpus = uv_blade_nr_possible_cpus(blade_id);
+ kgts->ts_cbr_au_count = GRU_CB_COUNT_TO_AU(
+ GRU_NUM_KERNEL_CBR * ncpus + bs->bs_async_cbrs);
+ kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU(
+ GRU_NUM_KERNEL_DSR_BYTES * ncpus +
+ bs->bs_async_dsr_bytes);
+ while (!gru_assign_gru_context(kgts)) {
+ msleep(1);
+ gru_steal_context(kgts);
+ }
+ gru_load_context(kgts);
+ gru = bs->bs_kgts->ts_gru;
+ vaddr = gru->gs_gru_base_vaddr;
+ ctxnum = kgts->ts_ctxnum;
+ bs->kernel_cb = get_gseg_base_address_cb(vaddr, ctxnum, 0);
+ bs->kernel_dsr = get_gseg_base_address_ds(vaddr, ctxnum, 0);
+ }
+ downgrade_write(&bs->bs_kgts_sema);
+}
+
+/*
+ * Free all kernel contexts that are not currently in use.
+ * Returns 0 if all freed, else number of inuse context.
+ */
+static int gru_free_kernel_contexts(void)
+{
+ struct gru_blade_state *bs;
+ struct gru_thread_state *kgts;
+ int bid, ret = 0;
+
+ for (bid = 0; bid < GRU_MAX_BLADES; bid++) {
+ bs = gru_base[bid];
+ if (!bs)
+ continue;
+
+ /* Ignore busy contexts. Don't want to block here. */
+ if (down_write_trylock(&bs->bs_kgts_sema)) {
+ kgts = bs->bs_kgts;
+ if (kgts && kgts->ts_gru)
+ gru_unload_context(kgts, 0);
+ bs->bs_kgts = NULL;
+ up_write(&bs->bs_kgts_sema);
+ kfree(kgts);
+ } else {
+ ret++;
+ }
+ }
+ return ret;
+}
+
+/*
+ * Lock & load the kernel context for the specified blade.
+ */
+static struct gru_blade_state *gru_lock_kernel_context(int blade_id)
+{
+ struct gru_blade_state *bs;
+ int bid;
+
+ STAT(lock_kernel_context);
+again:
+ bid = blade_id < 0 ? uv_numa_blade_id() : blade_id;
+ bs = gru_base[bid];
+
+ /* Handle the case where migration occurred while waiting for the sema */
+ down_read(&bs->bs_kgts_sema);
+ if (blade_id < 0 && bid != uv_numa_blade_id()) {
+ up_read(&bs->bs_kgts_sema);
+ goto again;
+ }
+ if (!bs->bs_kgts || !bs->bs_kgts->ts_gru)
+ gru_load_kernel_context(bs, bid);
+ return bs;
+
+}
+
+/*
+ * Unlock the kernel context for the specified blade. Context is not
+ * unloaded but may be stolen before next use.
+ */
+static void gru_unlock_kernel_context(int blade_id)
+{
+ struct gru_blade_state *bs;
+
+ bs = gru_base[blade_id];
+ up_read(&bs->bs_kgts_sema);
+ STAT(unlock_kernel_context);
+}
+
+/*
+ * Reserve & get pointers to the DSR/CBRs reserved for the current cpu.
+ * - returns with preemption disabled
+ */
static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr)
{
struct gru_blade_state *bs;
@@ -105,30 +266,166 @@ static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr)
BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES);
preempt_disable();
- bs = gru_base[uv_numa_blade_id()];
+ bs = gru_lock_kernel_context(-1);
lcpu = uv_blade_processor_id();
*cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE;
*dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES;
return 0;
}
+/*
+ * Free the current cpus reserved DSR/CBR resources.
+ */
static void gru_free_cpu_resources(void *cb, void *dsr)
{
+ gru_unlock_kernel_context(uv_numa_blade_id());
preempt_enable();
}
+/*
+ * Reserve GRU resources to be used asynchronously.
+ * Note: currently supports only 1 reservation per blade.
+ *
+ * input:
+ * blade_id - blade on which resources should be reserved
+ * cbrs - number of CBRs
+ * dsr_bytes - number of DSR bytes needed
+ * output:
+ * handle to identify resource
+ * (0 = async resources already reserved)
+ */
+unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes,
+ struct completion *cmp)
+{
+ struct gru_blade_state *bs;
+ struct gru_thread_state *kgts;
+ int ret = 0;
+
+ bs = gru_base[blade_id];
+
+ down_write(&bs->bs_kgts_sema);
+
+ /* Verify no resources already reserved */
+ if (bs->bs_async_dsr_bytes + bs->bs_async_cbrs)
+ goto done;
+ bs->bs_async_dsr_bytes = dsr_bytes;
+ bs->bs_async_cbrs = cbrs;
+ bs->bs_async_wq = cmp;
+ kgts = bs->bs_kgts;
+
+ /* Resources changed. Unload context if already loaded */
+ if (kgts && kgts->ts_gru)
+ gru_unload_context(kgts, 0);
+ ret = ASYNC_BID_TO_HAN(blade_id);
+
+done:
+ up_write(&bs->bs_kgts_sema);
+ return ret;
+}
+
+/*
+ * Release async resources previously reserved.
+ *
+ * input:
+ * han - handle to identify resources
+ */
+void gru_release_async_resources(unsigned long han)
+{
+ struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
+
+ down_write(&bs->bs_kgts_sema);
+ bs->bs_async_dsr_bytes = 0;
+ bs->bs_async_cbrs = 0;
+ bs->bs_async_wq = NULL;
+ up_write(&bs->bs_kgts_sema);
+}
+
+/*
+ * Wait for async GRU instructions to complete.
+ *
+ * input:
+ * han - handle to identify resources
+ */
+void gru_wait_async_cbr(unsigned long han)
+{
+ struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
+
+ wait_for_completion(bs->bs_async_wq);
+ mb();
+}
+
+/*
+ * Lock previous reserved async GRU resources
+ *
+ * input:
+ * han - handle to identify resources
+ * output:
+ * cb - pointer to first CBR
+ * dsr - pointer to first DSR
+ */
+void gru_lock_async_resource(unsigned long han, void **cb, void **dsr)
+{
+ struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
+ int blade_id = ASYNC_HAN_TO_BID(han);
+ int ncpus;
+
+ gru_lock_kernel_context(blade_id);
+ ncpus = uv_blade_nr_possible_cpus(blade_id);
+ if (cb)
+ *cb = bs->kernel_cb + ncpus * GRU_HANDLE_STRIDE;
+ if (dsr)
+ *dsr = bs->kernel_dsr + ncpus * GRU_NUM_KERNEL_DSR_BYTES;
+}
+
+/*
+ * Unlock previous reserved async GRU resources
+ *
+ * input:
+ * han - handle to identify resources
+ */
+void gru_unlock_async_resource(unsigned long han)
+{
+ int blade_id = ASYNC_HAN_TO_BID(han);
+
+ gru_unlock_kernel_context(blade_id);
+}
+
+/*----------------------------------------------------------------------*/
int gru_get_cb_exception_detail(void *cb,
struct control_block_extended_exc_detail *excdet)
{
struct gru_control_block_extended *cbe;
+ struct gru_thread_state *kgts = NULL;
+ unsigned long off;
+ int cbrnum, bid;
- cbe = get_cbe(GRUBASE(cb), get_cb_number(cb));
- prefetchw(cbe); /* Harmless on hardware, required for emulator */
+ /*
+ * Locate kgts for cb. This algorithm is SLOW but
+ * this function is rarely called (ie., almost never).
+ * Performance does not matter.
+ */
+ for_each_possible_blade(bid) {
+ if (!gru_base[bid])
+ break;
+ kgts = gru_base[bid]->bs_kgts;
+ if (!kgts || !kgts->ts_gru)
+ continue;
+ off = cb - kgts->ts_gru->gs_gru_base_vaddr;
+ if (off < GRU_SIZE)
+ break;
+ kgts = NULL;
+ }
+ BUG_ON(!kgts);
+ cbrnum = thread_cbr_number(kgts, get_cb_number(cb));
+ cbe = get_cbe(GRUBASE(cb), cbrnum);
+ gru_flush_cache(cbe); /* CBE not coherent */
+ sync_core();
excdet->opc = cbe->opccpy;
excdet->exopc = cbe->exopccpy;
excdet->ecause = cbe->ecause;
excdet->exceptdet0 = cbe->idef1upd;
excdet->exceptdet1 = cbe->idef3upd;
+ gru_flush_cache(cbe);
return 0;
}
@@ -141,8 +438,8 @@ char *gru_get_cb_exception_detail_str(int ret, void *cb,
if (ret > 0 && gen->istatus == CBS_EXCEPTION) {
gru_get_cb_exception_detail(cb, &excdet);
snprintf(buf, size,
- "GRU exception: cb %p, opc %d, exopc %d, ecause 0x%x,"
- "excdet0 0x%lx, excdet1 0x%x",
+ "GRU:%d exception: cb %p, opc %d, exopc %d, ecause 0x%x,"
+ "excdet0 0x%lx, excdet1 0x%x", smp_processor_id(),
gen, excdet.opc, excdet.exopc, excdet.ecause,
excdet.exceptdet0, excdet.exceptdet1);
} else {
@@ -167,13 +464,13 @@ static int gru_retry_exception(void *cb)
int retry = EXCEPTION_RETRY_LIMIT;
while (1) {
- if (gru_get_cb_message_queue_substatus(cb))
- break;
if (gru_wait_idle_or_exception(gen) == CBS_IDLE)
return CBS_IDLE;
-
+ if (gru_get_cb_message_queue_substatus(cb))
+ return CBS_EXCEPTION;
gru_get_cb_exception_detail(cb, &excdet);
- if (excdet.ecause & ~EXCEPTION_RETRY_BITS)
+ if ((excdet.ecause & ~EXCEPTION_RETRY_BITS) ||
+ (excdet.cbrexecstatus & CBR_EXS_ABORT_OCC))
break;
if (retry-- == 0)
break;
@@ -189,9 +486,10 @@ int gru_check_status_proc(void *cb)
int ret;
ret = gen->istatus;
- if (ret != CBS_EXCEPTION)
- return ret;
- return gru_retry_exception(cb);
+ if (ret == CBS_EXCEPTION)
+ ret = gru_retry_exception(cb);
+ rmb();
+ return ret;
}
@@ -203,7 +501,7 @@ int gru_wait_proc(void *cb)
ret = gru_wait_idle_or_exception(gen);
if (ret == CBS_EXCEPTION)
ret = gru_retry_exception(cb);
-
+ rmb();
return ret;
}
@@ -270,7 +568,7 @@ int gru_create_message_queue(struct gru_message_queue_desc *mqd,
mqd->mq = mq;
mqd->mq_gpa = uv_gpa(mq);
mqd->qlines = qlines;
- mqd->interrupt_pnode = UV_NASID_TO_PNODE(nasid);
+ mqd->interrupt_pnode = nasid >> 1;
mqd->interrupt_vector = vector;
mqd->interrupt_apicid = apicid;
return 0;
@@ -330,6 +628,8 @@ static int send_noop_message(void *cb, struct gru_message_queue_desc *mqd,
ret = MQE_UNEXPECTED_CB_ERR;
break;
case CBSS_PAGE_OVERFLOW:
+ STAT(mesq_noop_page_overflow);
+ /* fallthru */
default:
BUG();
}
@@ -405,17 +705,48 @@ cberr:
}
/*
- * Send a cross-partition interrupt to the SSI that contains the target
- * message queue. Normally, the interrupt is automatically delivered by hardware
- * but some error conditions require explicit delivery.
+ * Handle a PUT failure. Note: if message was a 2-line message, one of the
+ * lines might have successfully have been written. Before sending the
+ * message, "present" must be cleared in BOTH lines to prevent the receiver
+ * from prematurely seeing the full message.
*/
-static void send_message_queue_interrupt(struct gru_message_queue_desc *mqd)
+static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd,
+ void *mesg, int lines)
{
- if (mqd->interrupt_vector)
- uv_hub_send_ipi(mqd->interrupt_pnode, mqd->interrupt_apicid,
- mqd->interrupt_vector);
-}
+ unsigned long m, *val = mesg, gpa, save;
+ int ret;
+
+ m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6);
+ if (lines == 2) {
+ gru_vset(cb, m, 0, XTYPE_CL, lines, 1, IMA);
+ if (gru_wait(cb) != CBS_IDLE)
+ return MQE_UNEXPECTED_CB_ERR;
+ }
+ gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA);
+ if (gru_wait(cb) != CBS_IDLE)
+ return MQE_UNEXPECTED_CB_ERR;
+
+ if (!mqd->interrupt_vector)
+ return MQE_OK;
+ /*
+ * Send a cross-partition interrupt to the SSI that contains the target
+ * message queue. Normally, the interrupt is automatically delivered by
+ * hardware but some error conditions require explicit delivery.
+ * Use the GRU to deliver the interrupt. Otherwise partition failures
+ * could cause unrecovered errors.
+ */
+ gpa = uv_global_gru_mmr_address(mqd->interrupt_pnode, UVH_IPI_INT);
+ save = *val;
+ *val = uv_hub_ipi_value(mqd->interrupt_apicid, mqd->interrupt_vector,
+ dest_Fixed);
+ gru_vstore_phys(cb, gpa, gru_get_tri(mesg), IAA_REGISTER, IMA);
+ ret = gru_wait(cb);
+ *val = save;
+ if (ret != CBS_IDLE)
+ return MQE_UNEXPECTED_CB_ERR;
+ return MQE_OK;
+}
/*
* Handle a gru_mesq failure. Some of these failures are software recoverable
@@ -425,7 +756,6 @@ static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd,
void *mesg, int lines)
{
int substatus, ret = 0;
- unsigned long m;
substatus = gru_get_cb_message_queue_substatus(cb);
switch (substatus) {
@@ -447,15 +777,11 @@ static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd,
break;
case CBSS_PUT_NACKED:
STAT(mesq_send_put_nacked);
- m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6);
- gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA);
- if (gru_wait(cb) == CBS_IDLE) {
- ret = MQE_OK;
- send_message_queue_interrupt(mqd);
- } else {
- ret = MQE_UNEXPECTED_CB_ERR;
- }
+ ret = send_message_put_nacked(cb, mqd, mesg, lines);
break;
+ case CBSS_PAGE_OVERFLOW:
+ STAT(mesq_page_overflow);
+ /* fallthru */
default:
BUG();
}
@@ -548,7 +874,6 @@ void *gru_get_next_message(struct gru_message_queue_desc *mqd)
int present = mhdr->present;
/* skip NOOP messages */
- STAT(mesq_receive);
while (present == MQS_NOOP) {
gru_free_message(mqd, mhdr);
mhdr = mq->next;
@@ -568,6 +893,7 @@ void *gru_get_next_message(struct gru_message_queue_desc *mqd)
if (mhdr->lines == 2)
restore_present2(mhdr, mhdr->present2);
+ STAT(mesq_receive);
return mhdr;
}
EXPORT_SYMBOL_GPL(gru_get_next_message);
@@ -575,6 +901,29 @@ EXPORT_SYMBOL_GPL(gru_get_next_message);
/* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/
/*
+ * Load a DW from a global GPA. The GPA can be a memory or MMR address.
+ */
+int gru_read_gpa(unsigned long *value, unsigned long gpa)
+{
+ void *cb;
+ void *dsr;
+ int ret, iaa;
+
+ STAT(read_gpa);
+ if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr))
+ return MQE_BUG_NO_RESOURCES;
+ iaa = gpa >> 62;
+ gru_vload_phys(cb, gpa, gru_get_tri(dsr), iaa, IMA);
+ ret = gru_wait(cb);
+ if (ret == CBS_IDLE)
+ *value = *(unsigned long *)dsr;
+ gru_free_cpu_resources(cb, dsr);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(gru_read_gpa);
+
+
+/*
* Copy a block of data using the GRU resources
*/
int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,
@@ -597,115 +946,217 @@ EXPORT_SYMBOL_GPL(gru_copy_gpa);
/* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/
/* Temp - will delete after we gain confidence in the GRU */
-static __cacheline_aligned unsigned long word0;
-static __cacheline_aligned unsigned long word1;
-static int quicktest(struct gru_state *gru)
+static int quicktest0(unsigned long arg)
{
+ unsigned long word0;
+ unsigned long word1;
void *cb;
- void *ds;
+ void *dsr;
unsigned long *p;
+ int ret = -EIO;
- cb = get_gseg_base_address_cb(gru->gs_gru_base_vaddr, KERNEL_CTXNUM, 0);
- ds = get_gseg_base_address_ds(gru->gs_gru_base_vaddr, KERNEL_CTXNUM, 0);
- p = ds;
+ if (gru_get_cpu_resources(GRU_CACHE_LINE_BYTES, &cb, &dsr))
+ return MQE_BUG_NO_RESOURCES;
+ p = dsr;
word0 = MAGIC;
+ word1 = 0;
- gru_vload(cb, uv_gpa(&word0), 0, XTYPE_DW, 1, 1, IMA);
- if (gru_wait(cb) != CBS_IDLE)
- BUG();
+ gru_vload(cb, uv_gpa(&word0), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA);
+ if (gru_wait(cb) != CBS_IDLE) {
+ printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 1\n", smp_processor_id());
+ goto done;
+ }
- if (*(unsigned long *)ds != MAGIC)
- BUG();
- gru_vstore(cb, uv_gpa(&word1), 0, XTYPE_DW, 1, 1, IMA);
- if (gru_wait(cb) != CBS_IDLE)
- BUG();
+ if (*p != MAGIC) {
+ printk(KERN_DEBUG "GRU:%d quicktest0 bad magic 0x%lx\n", smp_processor_id(), *p);
+ goto done;
+ }
+ gru_vstore(cb, uv_gpa(&word1), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA);
+ if (gru_wait(cb) != CBS_IDLE) {
+ printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 2\n", smp_processor_id());
+ goto done;
+ }
- if (word0 != word1 || word0 != MAGIC) {
- printk
- ("GRU quicktest err: gid %d, found 0x%lx, expected 0x%lx\n",
- gru->gs_gid, word1, MAGIC);
- BUG(); /* ZZZ should not be fatal */
+ if (word0 != word1 || word1 != MAGIC) {
+ printk(KERN_DEBUG
+ "GRU:%d quicktest0 err: found 0x%lx, expected 0x%lx\n",
+ smp_processor_id(), word1, MAGIC);
+ goto done;
}
+ ret = 0;
- return 0;
+done:
+ gru_free_cpu_resources(cb, dsr);
+ return ret;
}
+#define ALIGNUP(p, q) ((void *)(((unsigned long)(p) + (q) - 1) & ~(q - 1)))
-int gru_kservices_init(struct gru_state *gru)
+static int quicktest1(unsigned long arg)
{
- struct gru_blade_state *bs;
- struct gru_context_configuration_handle *cch;
- unsigned long cbr_map, dsr_map;
- int err, num, cpus_possible;
+ struct gru_message_queue_desc mqd;
+ void *p, *mq;
+ unsigned long *dw;
+ int i, ret = -EIO;
+ char mes[GRU_CACHE_LINE_BYTES], *m;
+
+ /* Need 1K cacheline aligned that does not cross page boundary */
+ p = kmalloc(4096, 0);
+ if (p == NULL)
+ return -ENOMEM;
+ mq = ALIGNUP(p, 1024);
+ memset(mes, 0xee, sizeof(mes));
+ dw = mq;
+
+ gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0);
+ for (i = 0; i < 6; i++) {
+ mes[8] = i;
+ do {
+ ret = gru_send_message_gpa(&mqd, mes, sizeof(mes));
+ } while (ret == MQE_CONGESTION);
+ if (ret)
+ break;
+ }
+ if (ret != MQE_QUEUE_FULL || i != 4) {
+ printk(KERN_DEBUG "GRU:%d quicktest1: unexpect status %d, i %d\n",
+ smp_processor_id(), ret, i);
+ goto done;
+ }
- /*
- * Currently, resources are reserved ONLY on the second chiplet
- * on each blade. This leaves ALL resources on chiplet 0 available
- * for user code.
- */
- bs = gru->gs_blade;
- if (gru != &bs->bs_grus[1])
- return 0;
-
- cpus_possible = uv_blade_nr_possible_cpus(gru->gs_blade_id);
-
- num = GRU_NUM_KERNEL_CBR * cpus_possible;
- cbr_map = gru_reserve_cb_resources(gru, GRU_CB_COUNT_TO_AU(num), NULL);
- gru->gs_reserved_cbrs += num;
-
- num = GRU_NUM_KERNEL_DSR_BYTES * cpus_possible;
- dsr_map = gru_reserve_ds_resources(gru, GRU_DS_BYTES_TO_AU(num), NULL);
- gru->gs_reserved_dsr_bytes += num;
-
- gru->gs_active_contexts++;
- __set_bit(KERNEL_CTXNUM, &gru->gs_context_map);
- cch = get_cch(gru->gs_gru_base_vaddr, KERNEL_CTXNUM);
-
- bs->kernel_cb = get_gseg_base_address_cb(gru->gs_gru_base_vaddr,
- KERNEL_CTXNUM, 0);
- bs->kernel_dsr = get_gseg_base_address_ds(gru->gs_gru_base_vaddr,
- KERNEL_CTXNUM, 0);
-
- lock_cch_handle(cch);
- cch->tfm_fault_bit_enable = 0;
- cch->tlb_int_enable = 0;
- cch->tfm_done_bit_enable = 0;
- cch->unmap_enable = 1;
- err = cch_allocate(cch, 0, 0, cbr_map, dsr_map);
- if (err) {
- gru_dbg(grudev,
- "Unable to allocate kernel CCH: gid %d, err %d\n",
- gru->gs_gid, err);
- BUG();
+ for (i = 0; i < 6; i++) {
+ m = gru_get_next_message(&mqd);
+ if (!m || m[8] != i)
+ break;
+ gru_free_message(&mqd, m);
}
- if (cch_start(cch)) {
- gru_dbg(grudev, "Unable to start kernel CCH: gid %d, err %d\n",
- gru->gs_gid, err);
- BUG();
+ if (i != 4) {
+ printk(KERN_DEBUG "GRU:%d quicktest2: bad message, i %d, m %p, m8 %d\n",
+ smp_processor_id(), i, m, m ? m[8] : -1);
+ goto done;
}
- unlock_cch_handle(cch);
+ ret = 0;
- if (gru_options & GRU_QUICKLOOK)
- quicktest(gru);
- return 0;
+done:
+ kfree(p);
+ return ret;
}
-void gru_kservices_exit(struct gru_state *gru)
+static int quicktest2(unsigned long arg)
{
- struct gru_context_configuration_handle *cch;
- struct gru_blade_state *bs;
+ static DECLARE_COMPLETION(cmp);
+ unsigned long han;
+ int blade_id = 0;
+ int numcb = 4;
+ int ret = 0;
+ unsigned long *buf;
+ void *cb0, *cb;
+ struct gru_control_block_status *gen;
+ int i, k, istatus, bytes;
+
+ bytes = numcb * 4 * 8;
+ buf = kmalloc(bytes, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ ret = -EBUSY;
+ han = gru_reserve_async_resources(blade_id, numcb, 0, &cmp);
+ if (!han)
+ goto done;
+
+ gru_lock_async_resource(han, &cb0, NULL);
+ memset(buf, 0xee, bytes);
+ for (i = 0; i < numcb; i++)
+ gru_vset(cb0 + i * GRU_HANDLE_STRIDE, uv_gpa(&buf[i * 4]), 0,
+ XTYPE_DW, 4, 1, IMA_INTERRUPT);
+
+ ret = 0;
+ k = numcb;
+ do {
+ gru_wait_async_cbr(han);
+ for (i = 0; i < numcb; i++) {
+ cb = cb0 + i * GRU_HANDLE_STRIDE;
+ istatus = gru_check_status(cb);
+ if (istatus != CBS_ACTIVE && istatus != CBS_CALL_OS)
+ break;
+ }
+ if (i == numcb)
+ continue;
+ if (istatus != CBS_IDLE) {
+ printk(KERN_DEBUG "GRU:%d quicktest2: cb %d, exception\n", smp_processor_id(), i);
+ ret = -EFAULT;
+ } else if (buf[4 * i] || buf[4 * i + 1] || buf[4 * i + 2] ||
+ buf[4 * i + 3]) {
+ printk(KERN_DEBUG "GRU:%d quicktest2:cb %d, buf 0x%lx, 0x%lx, 0x%lx, 0x%lx\n",
+ smp_processor_id(), i, buf[4 * i], buf[4 * i + 1], buf[4 * i + 2], buf[4 * i + 3]);
+ ret = -EIO;
+ }
+ k--;
+ gen = cb;
+ gen->istatus = CBS_CALL_OS; /* don't handle this CBR again */
+ } while (k);
+ BUG_ON(cmp.done);
+
+ gru_unlock_async_resource(han);
+ gru_release_async_resources(han);
+done:
+ kfree(buf);
+ return ret;
+}
+
+#define BUFSIZE 200
+static int quicktest3(unsigned long arg)
+{
+ char buf1[BUFSIZE], buf2[BUFSIZE];
+ int ret = 0;
+
+ memset(buf2, 0, sizeof(buf2));
+ memset(buf1, get_cycles() & 255, sizeof(buf1));
+ gru_copy_gpa(uv_gpa(buf2), uv_gpa(buf1), BUFSIZE);
+ if (memcmp(buf1, buf2, BUFSIZE)) {
+ printk(KERN_DEBUG "GRU:%d quicktest3 error\n", smp_processor_id());
+ ret = -EIO;
+ }
+ return ret;
+}
- bs = gru->gs_blade;
- if (gru != &bs->bs_grus[1])
- return;
+/*
+ * Debugging only. User hook for various kernel tests
+ * of driver & gru.
+ */
+int gru_ktest(unsigned long arg)
+{
+ int ret = -EINVAL;
- cch = get_cch(gru->gs_gru_base_vaddr, KERNEL_CTXNUM);
- lock_cch_handle(cch);
- if (cch_interrupt_sync(cch))
- BUG();
- if (cch_deallocate(cch))
+ switch (arg & 0xff) {
+ case 0:
+ ret = quicktest0(arg);
+ break;
+ case 1:
+ ret = quicktest1(arg);
+ break;
+ case 2:
+ ret = quicktest2(arg);
+ break;
+ case 3:
+ ret = quicktest3(arg);
+ break;
+ case 99:
+ ret = gru_free_kernel_contexts();
+ break;
+ }
+ return ret;
+
+}
+
+int gru_kservices_init(void)
+{
+ return 0;
+}
+
+void gru_kservices_exit(void)
+{
+ if (gru_free_kernel_contexts())
BUG();
- unlock_cch_handle(cch);
}