aboutsummaryrefslogtreecommitdiff
path: root/drivers/misc/sgi-gru
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/sgi-gru')
-rw-r--r--drivers/misc/sgi-gru/Makefile6
-rw-r--r--drivers/misc/sgi-gru/gru.h11
-rw-r--r--drivers/misc/sgi-gru/gru_instructions.h211
-rw-r--r--drivers/misc/sgi-gru/grufault.c411
-rw-r--r--drivers/misc/sgi-gru/grufile.c381
-rw-r--r--drivers/misc/sgi-gru/gruhandles.c81
-rw-r--r--drivers/misc/sgi-gru/gruhandles.h67
-rw-r--r--drivers/misc/sgi-gru/grukdump.c234
-rw-r--r--drivers/misc/sgi-gru/grukservices.c699
-rw-r--r--drivers/misc/sgi-gru/grukservices.h65
-rw-r--r--drivers/misc/sgi-gru/grulib.h64
-rw-r--r--drivers/misc/sgi-gru/grumain.c374
-rw-r--r--drivers/misc/sgi-gru/gruprocfs.c71
-rw-r--r--drivers/misc/sgi-gru/grutables.h142
-rw-r--r--drivers/misc/sgi-gru/grutlbpurge.c17
15 files changed, 2144 insertions, 690 deletions
diff --git a/drivers/misc/sgi-gru/Makefile b/drivers/misc/sgi-gru/Makefile
index bcd8136d2f9..0003a1d56f7 100644
--- a/drivers/misc/sgi-gru/Makefile
+++ b/drivers/misc/sgi-gru/Makefile
@@ -1,7 +1,5 @@
-ifdef CONFIG_SGI_GRU_DEBUG
- EXTRA_CFLAGS += -DDEBUG
-endif
+ccflags-$(CONFIG_SGI_GRU_DEBUG) := -DDEBUG
obj-$(CONFIG_SGI_GRU) := gru.o
-gru-y := grufile.o grumain.o grufault.o grutlbpurge.o gruprocfs.o grukservices.o gruhandles.o
+gru-y := grufile.o grumain.o grufault.o grutlbpurge.o gruprocfs.o grukservices.o gruhandles.o grukdump.o
diff --git a/drivers/misc/sgi-gru/gru.h b/drivers/misc/sgi-gru/gru.h
index f93f03a9e6e..3ad76cd18b4 100644
--- a/drivers/misc/sgi-gru/gru.h
+++ b/drivers/misc/sgi-gru/gru.h
@@ -53,6 +53,17 @@ struct gru_chiplet_info {
int free_user_cbr;
};
+/*
+ * Statictics kept for each context.
+ */
+struct gru_gseg_statistics {
+ unsigned long fmm_tlbmiss;
+ unsigned long upm_tlbmiss;
+ unsigned long tlbdropin;
+ unsigned long context_stolen;
+ unsigned long reserved[10];
+};
+
/* Flags for GRU options on the gru_create_context() call */
/* Select one of the follow 4 options to specify how TLB misses are handled */
#define GRU_OPT_MISS_DEFAULT 0x0000 /* Use default mode */
diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h
index 3fde33c1e8f..04d5170ac14 100644
--- a/drivers/misc/sgi-gru/gru_instructions.h
+++ b/drivers/misc/sgi-gru/gru_instructions.h
@@ -34,17 +34,18 @@ extern void gru_wait_abort_proc(void *cb);
#include <asm/intrinsics.h>
#define __flush_cache(p) ia64_fc((unsigned long)p)
/* Use volatile on IA64 to ensure ordering via st4.rel */
-#define gru_ordered_store_int(p, v) \
+#define gru_ordered_store_ulong(p, v) \
do { \
barrier(); \
- *((volatile int *)(p)) = v; /* force st.rel */ \
+ *((volatile unsigned long *)(p)) = v; /* force st.rel */ \
} while (0)
#elif defined(CONFIG_X86_64)
+#include <asm/cacheflush.h>
#define __flush_cache(p) clflush(p)
-#define gru_ordered_store_int(p, v) \
+#define gru_ordered_store_ulong(p, v) \
do { \
barrier(); \
- *(int *)p = v; \
+ *(unsigned long *)p = v; \
} while (0)
#else
#error "Unsupported architecture"
@@ -81,6 +82,8 @@ struct control_block_extended_exc_detail {
int exopc;
long exceptdet0;
int exceptdet1;
+ int cbrstate;
+ int cbrexecstatus;
};
/*
@@ -107,7 +110,8 @@ struct gru_instruction_bits {
unsigned char reserved2: 2;
unsigned char istatus: 2;
unsigned char isubstatus:4;
- unsigned char reserved3: 2;
+ unsigned char reserved3: 1;
+ unsigned char tlb_fault_color: 1;
/* DW 1 */
unsigned long idef4; /* 42 bits: TRi1, BufSize */
/* DW 2-6 */
@@ -126,8 +130,13 @@ struct gru_instruction_bits {
*/
struct gru_instruction {
/* DW 0 */
- unsigned int op32; /* icmd,xtype,iaa0,ima,opc */
- unsigned int tri0;
+ union {
+ unsigned long op64; /* icmd,xtype,iaa0,ima,opc,tri0 */
+ struct {
+ unsigned int op32;
+ unsigned int tri0;
+ };
+ };
unsigned long tri1_bufsize; /* DW 1 */
unsigned long baddr0; /* DW 2 */
unsigned long nelem; /* DW 3 */
@@ -137,7 +146,7 @@ struct gru_instruction {
unsigned long avalue; /* DW 7 */
};
-/* Some shifts and masks for the low 32 bits of a GRU command */
+/* Some shifts and masks for the low 64 bits of a GRU command */
#define GRU_CB_ICMD_SHFT 0
#define GRU_CB_ICMD_MASK 0x1
#define GRU_CB_XTYPE_SHFT 8
@@ -152,6 +161,10 @@ struct gru_instruction {
#define GRU_CB_OPC_MASK 0xff
#define GRU_CB_EXOPC_SHFT 24
#define GRU_CB_EXOPC_MASK 0xff
+#define GRU_IDEF2_SHFT 32
+#define GRU_IDEF2_MASK 0x3ffff
+#define GRU_ISTATUS_SHFT 56
+#define GRU_ISTATUS_MASK 0x3
/* GRU instruction opcodes (opc field) */
#define OP_NOP 0x00
@@ -250,17 +263,40 @@ struct gru_instruction {
#define CBE_CAUSE_HA_RESPONSE_FATAL (1 << 13)
#define CBE_CAUSE_HA_RESPONSE_NON_FATAL (1 << 14)
#define CBE_CAUSE_ADDRESS_SPACE_DECODE_ERROR (1 << 15)
-#define CBE_CAUSE_RESPONSE_DATA_ERROR (1 << 16)
-#define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR (1 << 17)
+#define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR (1 << 16)
+#define CBE_CAUSE_RA_RESPONSE_DATA_ERROR (1 << 17)
+#define CBE_CAUSE_HA_RESPONSE_DATA_ERROR (1 << 18)
+#define CBE_CAUSE_FORCED_ERROR (1 << 19)
+
+/* CBE cbrexecstatus bits */
+#define CBR_EXS_ABORT_OCC_BIT 0
+#define CBR_EXS_INT_OCC_BIT 1
+#define CBR_EXS_PENDING_BIT 2
+#define CBR_EXS_QUEUED_BIT 3
+#define CBR_EXS_TLB_INVAL_BIT 4
+#define CBR_EXS_EXCEPTION_BIT 5
+#define CBR_EXS_CB_INT_PENDING_BIT 6
+
+#define CBR_EXS_ABORT_OCC (1 << CBR_EXS_ABORT_OCC_BIT)
+#define CBR_EXS_INT_OCC (1 << CBR_EXS_INT_OCC_BIT)
+#define CBR_EXS_PENDING (1 << CBR_EXS_PENDING_BIT)
+#define CBR_EXS_QUEUED (1 << CBR_EXS_QUEUED_BIT)
+#define CBR_EXS_TLB_INVAL (1 << CBR_EXS_TLB_INVAL_BIT)
+#define CBR_EXS_EXCEPTION (1 << CBR_EXS_EXCEPTION_BIT)
+#define CBR_EXS_CB_INT_PENDING (1 << CBR_EXS_CB_INT_PENDING_BIT)
/*
* Exceptions are retried for the following cases. If any OTHER bits are set
* in ecause, the exception is not retryable.
*/
-#define EXCEPTION_RETRY_BITS (CBE_CAUSE_RESPONSE_DATA_ERROR | \
- CBE_CAUSE_RA_REQUEST_TIMEOUT | \
+#define EXCEPTION_RETRY_BITS (CBE_CAUSE_EXECUTION_HW_ERROR | \
CBE_CAUSE_TLBHW_ERROR | \
- CBE_CAUSE_HA_REQUEST_TIMEOUT)
+ CBE_CAUSE_RA_REQUEST_TIMEOUT | \
+ CBE_CAUSE_RA_RESPONSE_NON_FATAL | \
+ CBE_CAUSE_HA_RESPONSE_NON_FATAL | \
+ CBE_CAUSE_RA_RESPONSE_DATA_ERROR | \
+ CBE_CAUSE_HA_RESPONSE_DATA_ERROR \
+ )
/* Message queue head structure */
union gru_mesqhead {
@@ -273,12 +309,14 @@ union gru_mesqhead {
/* Generate the low word of a GRU instruction */
-static inline unsigned int
-__opword(unsigned char opcode, unsigned char exopc, unsigned char xtype,
+static inline unsigned long
+__opdword(unsigned char opcode, unsigned char exopc, unsigned char xtype,
unsigned char iaa0, unsigned char iaa1,
- unsigned char ima)
+ unsigned long idef2, unsigned char ima)
{
return (1 << GRU_CB_ICMD_SHFT) |
+ ((unsigned long)CBS_ACTIVE << GRU_ISTATUS_SHFT) |
+ (idef2<< GRU_IDEF2_SHFT) |
(iaa0 << GRU_CB_IAA0_SHFT) |
(iaa1 << GRU_CB_IAA1_SHFT) |
(ima << GRU_CB_IMA_SHFT) |
@@ -296,12 +334,13 @@ static inline void gru_flush_cache(void *p)
}
/*
- * Store the lower 32 bits of the command including the "start" bit. Then
+ * Store the lower 64 bits of the command including the "start" bit. Then
* start the instruction executing.
*/
-static inline void gru_start_instruction(struct gru_instruction *ins, int op32)
+static inline void gru_start_instruction(struct gru_instruction *ins, unsigned long op64)
{
- gru_ordered_store_int(ins, op32);
+ gru_ordered_store_ulong(ins, op64);
+ mb();
gru_flush_cache(ins);
}
@@ -317,6 +356,30 @@ static inline void gru_start_instruction(struct gru_instruction *ins, int op32)
* - nelem and stride are in elements
* - tri0/tri1 is in bytes for the beginning of the data segment.
*/
+static inline void gru_vload_phys(void *cb, unsigned long gpa,
+ unsigned int tri0, int iaa, unsigned long hints)
+{
+ struct gru_instruction *ins = (struct gru_instruction *)cb;
+
+ ins->baddr0 = (long)gpa | ((unsigned long)iaa << 62);
+ ins->nelem = 1;
+ ins->op1_stride = 1;
+ gru_start_instruction(ins, __opdword(OP_VLOAD, 0, XTYPE_DW, iaa, 0,
+ (unsigned long)tri0, CB_IMA(hints)));
+}
+
+static inline void gru_vstore_phys(void *cb, unsigned long gpa,
+ unsigned int tri0, int iaa, unsigned long hints)
+{
+ struct gru_instruction *ins = (struct gru_instruction *)cb;
+
+ ins->baddr0 = (long)gpa | ((unsigned long)iaa << 62);
+ ins->nelem = 1;
+ ins->op1_stride = 1;
+ gru_start_instruction(ins, __opdword(OP_VSTORE, 0, XTYPE_DW, iaa, 0,
+ (unsigned long)tri0, CB_IMA(hints)));
+}
+
static inline void gru_vload(void *cb, unsigned long mem_addr,
unsigned int tri0, unsigned char xtype, unsigned long nelem,
unsigned long stride, unsigned long hints)
@@ -325,10 +388,9 @@ static inline void gru_vload(void *cb, unsigned long mem_addr,
ins->baddr0 = (long)mem_addr;
ins->nelem = nelem;
- ins->tri0 = tri0;
ins->op1_stride = stride;
- gru_start_instruction(ins, __opword(OP_VLOAD, 0, xtype, IAA_RAM, 0,
- CB_IMA(hints)));
+ gru_start_instruction(ins, __opdword(OP_VLOAD, 0, xtype, IAA_RAM, 0,
+ (unsigned long)tri0, CB_IMA(hints)));
}
static inline void gru_vstore(void *cb, unsigned long mem_addr,
@@ -339,10 +401,9 @@ static inline void gru_vstore(void *cb, unsigned long mem_addr,
ins->baddr0 = (long)mem_addr;
ins->nelem = nelem;
- ins->tri0 = tri0;
ins->op1_stride = stride;
- gru_start_instruction(ins, __opword(OP_VSTORE, 0, xtype, IAA_RAM, 0,
- CB_IMA(hints)));
+ gru_start_instruction(ins, __opdword(OP_VSTORE, 0, xtype, IAA_RAM, 0,
+ tri0, CB_IMA(hints)));
}
static inline void gru_ivload(void *cb, unsigned long mem_addr,
@@ -353,10 +414,9 @@ static inline void gru_ivload(void *cb, unsigned long mem_addr,
ins->baddr0 = (long)mem_addr;
ins->nelem = nelem;
- ins->tri0 = tri0;
ins->tri1_bufsize = tri1;
- gru_start_instruction(ins, __opword(OP_IVLOAD, 0, xtype, IAA_RAM, 0,
- CB_IMA(hints)));
+ gru_start_instruction(ins, __opdword(OP_IVLOAD, 0, xtype, IAA_RAM, 0,
+ tri0, CB_IMA(hints)));
}
static inline void gru_ivstore(void *cb, unsigned long mem_addr,
@@ -367,10 +427,9 @@ static inline void gru_ivstore(void *cb, unsigned long mem_addr,
ins->baddr0 = (long)mem_addr;
ins->nelem = nelem;
- ins->tri0 = tri0;
ins->tri1_bufsize = tri1;
- gru_start_instruction(ins, __opword(OP_IVSTORE, 0, xtype, IAA_RAM, 0,
- CB_IMA(hints)));
+ gru_start_instruction(ins, __opdword(OP_IVSTORE, 0, xtype, IAA_RAM, 0,
+ tri0, CB_IMA(hints)));
}
static inline void gru_vset(void *cb, unsigned long mem_addr,
@@ -383,8 +442,8 @@ static inline void gru_vset(void *cb, unsigned long mem_addr,
ins->op2_value_baddr1 = value;
ins->nelem = nelem;
ins->op1_stride = stride;
- gru_start_instruction(ins, __opword(OP_VSET, 0, xtype, IAA_RAM, 0,
- CB_IMA(hints)));
+ gru_start_instruction(ins, __opdword(OP_VSET, 0, xtype, IAA_RAM, 0,
+ 0, CB_IMA(hints)));
}
static inline void gru_ivset(void *cb, unsigned long mem_addr,
@@ -397,8 +456,8 @@ static inline void gru_ivset(void *cb, unsigned long mem_addr,
ins->op2_value_baddr1 = value;
ins->nelem = nelem;
ins->tri1_bufsize = tri1;
- gru_start_instruction(ins, __opword(OP_IVSET, 0, xtype, IAA_RAM, 0,
- CB_IMA(hints)));
+ gru_start_instruction(ins, __opdword(OP_IVSET, 0, xtype, IAA_RAM, 0,
+ 0, CB_IMA(hints)));
}
static inline void gru_vflush(void *cb, unsigned long mem_addr,
@@ -410,15 +469,15 @@ static inline void gru_vflush(void *cb, unsigned long mem_addr,
ins->baddr0 = (long)mem_addr;
ins->op1_stride = stride;
ins->nelem = nelem;
- gru_start_instruction(ins, __opword(OP_VFLUSH, 0, xtype, IAA_RAM, 0,
- CB_IMA(hints)));
+ gru_start_instruction(ins, __opdword(OP_VFLUSH, 0, xtype, IAA_RAM, 0,
+ 0, CB_IMA(hints)));
}
static inline void gru_nop(void *cb, int hints)
{
struct gru_instruction *ins = (void *)cb;
- gru_start_instruction(ins, __opword(OP_NOP, 0, 0, 0, 0, CB_IMA(hints)));
+ gru_start_instruction(ins, __opdword(OP_NOP, 0, 0, 0, 0, 0, CB_IMA(hints)));
}
@@ -432,10 +491,9 @@ static inline void gru_bcopy(void *cb, const unsigned long src,
ins->baddr0 = (long)src;
ins->op2_value_baddr1 = (long)dest;
ins->nelem = nelem;
- ins->tri0 = tri0;
ins->tri1_bufsize = bufsize;
- gru_start_instruction(ins, __opword(OP_BCOPY, 0, xtype, IAA_RAM,
- IAA_RAM, CB_IMA(hints)));
+ gru_start_instruction(ins, __opdword(OP_BCOPY, 0, xtype, IAA_RAM,
+ IAA_RAM, tri0, CB_IMA(hints)));
}
static inline void gru_bstore(void *cb, const unsigned long src,
@@ -447,9 +505,8 @@ static inline void gru_bstore(void *cb, const unsigned long src,
ins->baddr0 = (long)src;
ins->op2_value_baddr1 = (long)dest;
ins->nelem = nelem;
- ins->tri0 = tri0;
- gru_start_instruction(ins, __opword(OP_BSTORE, 0, xtype, 0, IAA_RAM,
- CB_IMA(hints)));
+ gru_start_instruction(ins, __opdword(OP_BSTORE, 0, xtype, 0, IAA_RAM,
+ tri0, CB_IMA(hints)));
}
static inline void gru_gamir(void *cb, int exopc, unsigned long src,
@@ -458,8 +515,8 @@ static inline void gru_gamir(void *cb, int exopc, unsigned long src,
struct gru_instruction *ins = (void *)cb;
ins->baddr0 = (long)src;
- gru_start_instruction(ins, __opword(OP_GAMIR, exopc, xtype, IAA_RAM, 0,
- CB_IMA(hints)));
+ gru_start_instruction(ins, __opdword(OP_GAMIR, exopc, xtype, IAA_RAM, 0,
+ 0, CB_IMA(hints)));
}
static inline void gru_gamirr(void *cb, int exopc, unsigned long src,
@@ -468,8 +525,8 @@ static inline void gru_gamirr(void *cb, int exopc, unsigned long src,
struct gru_instruction *ins = (void *)cb;
ins->baddr0 = (long)src;
- gru_start_instruction(ins, __opword(OP_GAMIRR, exopc, xtype, IAA_RAM, 0,
- CB_IMA(hints)));
+ gru_start_instruction(ins, __opdword(OP_GAMIRR, exopc, xtype, IAA_RAM, 0,
+ 0, CB_IMA(hints)));
}
static inline void gru_gamer(void *cb, int exopc, unsigned long src,
@@ -482,8 +539,8 @@ static inline void gru_gamer(void *cb, int exopc, unsigned long src,
ins->baddr0 = (long)src;
ins->op1_stride = operand1;
ins->op2_value_baddr1 = operand2;
- gru_start_instruction(ins, __opword(OP_GAMER, exopc, xtype, IAA_RAM, 0,
- CB_IMA(hints)));
+ gru_start_instruction(ins, __opdword(OP_GAMER, exopc, xtype, IAA_RAM, 0,
+ 0, CB_IMA(hints)));
}
static inline void gru_gamerr(void *cb, int exopc, unsigned long src,
@@ -495,8 +552,8 @@ static inline void gru_gamerr(void *cb, int exopc, unsigned long src,
ins->baddr0 = (long)src;
ins->op1_stride = operand1;
ins->op2_value_baddr1 = operand2;
- gru_start_instruction(ins, __opword(OP_GAMERR, exopc, xtype, IAA_RAM, 0,
- CB_IMA(hints)));
+ gru_start_instruction(ins, __opdword(OP_GAMERR, exopc, xtype, IAA_RAM, 0,
+ 0, CB_IMA(hints)));
}
static inline void gru_gamxr(void *cb, unsigned long src,
@@ -506,8 +563,8 @@ static inline void gru_gamxr(void *cb, unsigned long src,
ins->baddr0 = (long)src;
ins->nelem = 4;
- gru_start_instruction(ins, __opword(OP_GAMXR, EOP_XR_CSWAP, XTYPE_DW,
- IAA_RAM, 0, CB_IMA(hints)));
+ gru_start_instruction(ins, __opdword(OP_GAMXR, EOP_XR_CSWAP, XTYPE_DW,
+ IAA_RAM, 0, 0, CB_IMA(hints)));
}
static inline void gru_mesq(void *cb, unsigned long queue,
@@ -518,9 +575,8 @@ static inline void gru_mesq(void *cb, unsigned long queue,
ins->baddr0 = (long)queue;
ins->nelem = nelem;
- ins->tri0 = tri0;
- gru_start_instruction(ins, __opword(OP_MESQ, 0, XTYPE_CL, IAA_RAM, 0,
- CB_IMA(hints)));
+ gru_start_instruction(ins, __opdword(OP_MESQ, 0, XTYPE_CL, IAA_RAM, 0,
+ tri0, CB_IMA(hints)));
}
static inline unsigned long gru_get_amo_value(void *cb)
@@ -600,9 +656,11 @@ static inline int gru_get_cb_substatus(void *cb)
return cbs->isubstatus;
}
-/* Check the status of a CB. If the CB is in UPM mode, call the
- * OS to handle the UPM status.
- * Returns the CB status field value (0 for normal completion)
+/*
+ * User interface to check an instruction status. UPM and exceptions
+ * are handled automatically. However, this function does NOT wait
+ * for an active instruction to complete.
+ *
*/
static inline int gru_check_status(void *cb)
{
@@ -610,36 +668,41 @@ static inline int gru_check_status(void *cb)
int ret;
ret = cbs->istatus;
- if (ret == CBS_CALL_OS)
+ if (ret != CBS_ACTIVE)
ret = gru_check_status_proc(cb);
return ret;
}
-/* Wait for CB to complete.
- * Returns the CB status field value (0 for normal completion)
+/*
+ * User interface (via inline function) to wait for an instruction
+ * to complete. Completion status (IDLE or EXCEPTION is returned
+ * to the user. Exception due to hardware errors are automatically
+ * retried before returning an exception.
+ *
*/
static inline int gru_wait(void *cb)
{
- struct gru_control_block_status *cbs = (void *)cb;
- int ret = cbs->istatus;
-
- if (ret != CBS_IDLE)
- ret = gru_wait_proc(cb);
- return ret;
+ return gru_wait_proc(cb);
}
-/* Wait for CB to complete. Aborts program if error. (Note: error does NOT
+/*
+ * Wait for CB to complete. Aborts program if error. (Note: error does NOT
* mean TLB mis - only fatal errors such as memory parity error or user
* bugs will cause termination.
*/
static inline void gru_wait_abort(void *cb)
{
- struct gru_control_block_status *cbs = (void *)cb;
-
- if (cbs->istatus != CBS_IDLE)
- gru_wait_abort_proc(cb);
+ gru_wait_abort_proc(cb);
}
+/*
+ * Get a pointer to the start of a gseg
+ * p - Any valid pointer within the gseg
+ */
+static inline void *gru_get_gseg_pointer (void *p)
+{
+ return (void *)((unsigned long)p & ~(GRU_GSEG_PAGESIZE - 1));
+}
/*
* Get a pointer to a control block
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
index ab118558552..f74fc0ca2ef 100644
--- a/drivers/misc/sgi-gru/grufault.c
+++ b/drivers/misc/sgi-gru/grufault.c
@@ -33,6 +33,7 @@
#include <linux/io.h>
#include <linux/uaccess.h>
#include <linux/security.h>
+#include <linux/prefetch.h>
#include <asm/pgtable.h>
#include "gru.h"
#include "grutables.h"
@@ -40,6 +41,12 @@
#include "gru_instructions.h"
#include <asm/uv/uv_hub.h>
+/* Return codes for vtop functions */
+#define VTOP_SUCCESS 0
+#define VTOP_INVALID -1
+#define VTOP_RETRY -2
+
+
/*
* Test if a physical address is a valid GRU GSEG address
*/
@@ -90,19 +97,22 @@ static struct gru_thread_state *gru_alloc_locked_gts(unsigned long vaddr)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
- struct gru_thread_state *gts = NULL;
+ struct gru_thread_state *gts = ERR_PTR(-EINVAL);
down_write(&mm->mmap_sem);
vma = gru_find_vma(vaddr);
- if (vma)
- gts = gru_alloc_thread_state(vma, TSID(vaddr, vma));
- if (gts) {
- mutex_lock(&gts->ts_ctxlock);
- downgrade_write(&mm->mmap_sem);
- } else {
- up_write(&mm->mmap_sem);
- }
+ if (!vma)
+ goto err;
+ gts = gru_alloc_thread_state(vma, TSID(vaddr, vma));
+ if (IS_ERR(gts))
+ goto err;
+ mutex_lock(&gts->ts_ctxlock);
+ downgrade_write(&mm->mmap_sem);
+ return gts;
+
+err:
+ up_write(&mm->mmap_sem);
return gts;
}
@@ -122,39 +132,15 @@ static void gru_unlock_gts(struct gru_thread_state *gts)
* is necessary to prevent the user from seeing a stale cb.istatus that will
* change as soon as the TFH restart is complete. Races may cause an
* occasional failure to clear the cb.istatus, but that is ok.
- *
- * If the cb address is not valid (should not happen, but...), nothing
- * bad will happen.. The get_user()/put_user() will fail but there
- * are no bad side-effects.
*/
-static void gru_cb_set_istatus_active(unsigned long __user *cb)
+static void gru_cb_set_istatus_active(struct gru_instruction_bits *cbk)
{
- union {
- struct gru_instruction_bits bits;
- unsigned long dw;
- } u;
-
- if (cb) {
- get_user(u.dw, cb);
- u.bits.istatus = CBS_ACTIVE;
- put_user(u.dw, cb);
+ if (cbk) {
+ cbk->istatus = CBS_ACTIVE;
}
}
/*
- * Convert a interrupt IRQ to a pointer to the GRU GTS that caused the
- * interrupt. Interrupts are always sent to a cpu on the blade that contains the
- * GRU (except for headless blades which are not currently supported). A blade
- * has N grus; a block of N consecutive IRQs is assigned to the GRUs. The IRQ
- * number uniquely identifies the GRU chiplet on the local blade that caused the
- * interrupt. Always called in interrupt context.
- */
-static inline struct gru_state *irq_to_gru(int irq)
-{
- return &gru_base[uv_numa_blade_id()]->bs_grus[irq - IRQ_GRU];
-}
-
-/*
* Read & clear a TFM
*
* The GRU has an array of fault maps. A map is private to a cpu
@@ -166,7 +152,8 @@ static inline struct gru_state *irq_to_gru(int irq)
* the GRU, atomic operations must be used to clear bits.
*/
static void get_clear_fault_map(struct gru_state *gru,
- struct gru_tlb_fault_map *map)
+ struct gru_tlb_fault_map *imap,
+ struct gru_tlb_fault_map *dmap)
{
unsigned long i, k;
struct gru_tlb_fault_map *tfm;
@@ -177,7 +164,11 @@ static void get_clear_fault_map(struct gru_state *gru,
k = tfm->fault_bits[i];
if (k)
k = xchg(&tfm->fault_bits[i], 0UL);
- map->fault_bits[i] = k;
+ imap->fault_bits[i] = k;
+ k = tfm->done_bits[i];
+ if (k)
+ k = xchg(&tfm->done_bits[i], 0UL);
+ dmap->fault_bits[i] = k;
}
/*
@@ -202,10 +193,11 @@ static int non_atomic_pte_lookup(struct vm_area_struct *vma,
{
struct page *page;
- /* ZZZ Need to handle HUGE pages */
- if (is_vm_hugetlb_page(vma))
- return -EFAULT;
+#ifdef CONFIG_HUGETLB_PAGE
+ *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
+#else
*pageshift = PAGE_SHIFT;
+#endif
if (get_user_pages
(current, current->mm, vaddr, 1, write, 0, &page, NULL) <= 0)
return -EFAULT;
@@ -263,7 +255,6 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
return 0;
err:
- local_irq_enable();
return 1;
}
@@ -296,16 +287,71 @@ static int gru_vtop(struct gru_thread_state *gts, unsigned long vaddr,
paddr = paddr & ~((1UL << ps) - 1);
*gpa = uv_soc_phys_ram_to_gpa(paddr);
*pageshift = ps;
- return 0;
+ return VTOP_SUCCESS;
inval:
- return -1;
+ return VTOP_INVALID;
upm:
- return -2;
+ return VTOP_RETRY;
}
/*
+ * Flush a CBE from cache. The CBE is clean in the cache. Dirty the
+ * CBE cacheline so that the line will be written back to home agent.
+ * Otherwise the line may be silently dropped. This has no impact
+ * except on performance.
+ */
+static void gru_flush_cache_cbe(struct gru_control_block_extended *cbe)
+{
+ if (unlikely(cbe)) {
+ cbe->cbrexecstatus = 0; /* make CL dirty */
+ gru_flush_cache(cbe);
+ }
+}
+
+/*
+ * Preload the TLB with entries that may be required. Currently, preloading
+ * is implemented only for BCOPY. Preload <tlb_preload_count> pages OR to
+ * the end of the bcopy tranfer, whichever is smaller.
+ */
+static void gru_preload_tlb(struct gru_state *gru,
+ struct gru_thread_state *gts, int atomic,
+ unsigned long fault_vaddr, int asid, int write,
+ unsigned char tlb_preload_count,
+ struct gru_tlb_fault_handle *tfh,
+ struct gru_control_block_extended *cbe)
+{
+ unsigned long vaddr = 0, gpa;
+ int ret, pageshift;
+
+ if (cbe->opccpy != OP_BCOPY)
+ return;
+
+ if (fault_vaddr == cbe->cbe_baddr0)
+ vaddr = fault_vaddr + GRU_CACHE_LINE_BYTES * cbe->cbe_src_cl - 1;
+ else if (fault_vaddr == cbe->cbe_baddr1)
+ vaddr = fault_vaddr + (1 << cbe->xtypecpy) * cbe->cbe_nelemcur - 1;
+
+ fault_vaddr &= PAGE_MASK;
+ vaddr &= PAGE_MASK;
+ vaddr = min(vaddr, fault_vaddr + tlb_preload_count * PAGE_SIZE);
+
+ while (vaddr > fault_vaddr) {
+ ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift);
+ if (ret || tfh_write_only(tfh, gpa, GAA_RAM, vaddr, asid, write,
+ GRU_PAGESIZE(pageshift)))
+ return;
+ gru_dbg(grudev,
+ "%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, rw %d, ps %d, gpa 0x%lx\n",
+ atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh,
+ vaddr, asid, write, pageshift, gpa);
+ vaddr -= PAGE_SIZE;
+ STAT(tlb_preload_page);
+ }
+}
+
+/*
* Drop a TLB entry into the GRU. The fault is described by info in an TFH.
* Input:
* cb Address of user CBR. Null if not running in user context
@@ -315,11 +361,14 @@ upm:
* < 0 = error code
*
*/
-static int gru_try_dropin(struct gru_thread_state *gts,
+static int gru_try_dropin(struct gru_state *gru,
+ struct gru_thread_state *gts,
struct gru_tlb_fault_handle *tfh,
- unsigned long __user *cb)
+ struct gru_instruction_bits *cbk)
{
- int pageshift = 0, asid, write, ret, atomic = !cb;
+ struct gru_control_block_extended *cbe = NULL;
+ unsigned char tlb_preload_count = gts->ts_tlb_preload_count;
+ int pageshift = 0, asid, write, ret, atomic = !cbk, indexway;
unsigned long gpa = 0, vaddr = 0;
/*
@@ -330,18 +379,34 @@ static int gru_try_dropin(struct gru_thread_state *gts,
*/
/*
+ * Prefetch the CBE if doing TLB preloading
+ */
+ if (unlikely(tlb_preload_count)) {
+ cbe = gru_tfh_to_cbe(tfh);
+ prefetchw(cbe);
+ }
+
+ /*
* Error if TFH state is IDLE or FMM mode & the user issuing a UPM call.
* Might be a hardware race OR a stupid user. Ignore FMM because FMM
* is a transient state.
*/
+ if (tfh->status != TFHSTATUS_EXCEPTION) {
+ gru_flush_cache(tfh);
+ sync_core();
+ if (tfh->status != TFHSTATUS_EXCEPTION)
+ goto failnoexception;
+ STAT(tfh_stale_on_fault);
+ }
if (tfh->state == TFHSTATE_IDLE)
goto failidle;
- if (tfh->state == TFHSTATE_MISS_FMM && cb)
+ if (tfh->state == TFHSTATE_MISS_FMM && cbk)
goto failfmm;
write = (tfh->cause & TFHCAUSE_TLB_MOD) != 0;
vaddr = tfh->missvaddr;
asid = tfh->missasid;
+ indexway = tfh->indexway;
if (asid == 0)
goto failnoasid;
@@ -355,41 +420,51 @@ static int gru_try_dropin(struct gru_thread_state *gts,
goto failactive;
ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift);
- if (ret == -1)
+ if (ret == VTOP_INVALID)
goto failinval;
- if (ret == -2)
+ if (ret == VTOP_RETRY)
goto failupm;
if (!(gts->ts_sizeavail & GRU_SIZEAVAIL(pageshift))) {
gts->ts_sizeavail |= GRU_SIZEAVAIL(pageshift);
- if (atomic || !gru_update_cch(gts, 0)) {
+ if (atomic || !gru_update_cch(gts)) {
gts->ts_force_cch_reload = 1;
goto failupm;
}
}
- gru_cb_set_istatus_active(cb);
+
+ if (unlikely(cbe) && pageshift == PAGE_SHIFT) {
+ gru_preload_tlb(gru, gts, atomic, vaddr, asid, write, tlb_preload_count, tfh, cbe);
+ gru_flush_cache_cbe(cbe);
+ }
+
+ gru_cb_set_istatus_active(cbk);
+ gts->ustats.tlbdropin++;
tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write,
GRU_PAGESIZE(pageshift));
- STAT(tlb_dropin);
gru_dbg(grudev,
- "%s: tfh 0x%p, vaddr 0x%lx, asid 0x%x, ps %d, gpa 0x%lx\n",
- ret ? "non-atomic" : "atomic", tfh, vaddr, asid,
- pageshift, gpa);
+ "%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, indexway 0x%x,"
+ " rw %d, ps %d, gpa 0x%lx\n",
+ atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh, vaddr, asid,
+ indexway, write, pageshift, gpa);
+ STAT(tlb_dropin);
return 0;
failnoasid:
/* No asid (delayed unload). */
STAT(tlb_dropin_fail_no_asid);
gru_dbg(grudev, "FAILED no_asid tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
- if (!cb)
+ if (!cbk)
tfh_user_polling_mode(tfh);
else
gru_flush_cache(tfh);
+ gru_flush_cache_cbe(cbe);
return -EAGAIN;
failupm:
/* Atomic failure switch CBR to UPM */
tfh_user_polling_mode(tfh);
+ gru_flush_cache_cbe(cbe);
STAT(tlb_dropin_fail_upm);
gru_dbg(grudev, "FAILED upm tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
return 1;
@@ -397,15 +472,28 @@ failupm:
failfmm:
/* FMM state on UPM call */
gru_flush_cache(tfh);
+ gru_flush_cache_cbe(cbe);
STAT(tlb_dropin_fail_fmm);
gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state);
return 0;
+failnoexception:
+ /* TFH status did not show exception pending */
+ gru_flush_cache(tfh);
+ gru_flush_cache_cbe(cbe);
+ if (cbk)
+ gru_flush_cache(cbk);
+ STAT(tlb_dropin_fail_no_exception);
+ gru_dbg(grudev, "FAILED non-exception tfh: 0x%p, status %d, state %d\n",
+ tfh, tfh->status, tfh->state);
+ return 0;
+
failidle:
- /* TFH was idle - no miss pending */
+ /* TFH state was idle - no miss pending */
gru_flush_cache(tfh);
- if (cb)
- gru_flush_cache(cb);
+ gru_flush_cache_cbe(cbe);
+ if (cbk)
+ gru_flush_cache(cbk);
STAT(tlb_dropin_fail_idle);
gru_dbg(grudev, "FAILED idle tfh: 0x%p, state %d\n", tfh, tfh->state);
return 0;
@@ -413,16 +501,18 @@ failidle:
failinval:
/* All errors (atomic & non-atomic) switch CBR to EXCEPTION state */
tfh_exception(tfh);
+ gru_flush_cache_cbe(cbe);
STAT(tlb_dropin_fail_invalid);
gru_dbg(grudev, "FAILED inval tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr);
return -EFAULT;
failactive:
/* Range invalidate active. Switch to UPM iff atomic */
- if (!cb)
+ if (!cbk)
tfh_user_polling_mode(tfh);
else
gru_flush_cache(tfh);
+ gru_flush_cache_cbe(cbe);
STAT(tlb_dropin_fail_range_active);
gru_dbg(grudev, "FAILED range active: tfh 0x%p, vaddr 0x%lx\n",
tfh, vaddr);
@@ -435,27 +525,41 @@ failactive:
* Note that this is the interrupt handler that is registered with linux
* interrupt handlers.
*/
-irqreturn_t gru_intr(int irq, void *dev_id)
+static irqreturn_t gru_intr(int chiplet, int blade)
{
struct gru_state *gru;
- struct gru_tlb_fault_map map;
+ struct gru_tlb_fault_map imap, dmap;
struct gru_thread_state *gts;
struct gru_tlb_fault_handle *tfh = NULL;
+ struct completion *cmp;
int cbrnum, ctxnum;
STAT(intr);
- gru = irq_to_gru(irq);
+ gru = &gru_base[blade]->bs_grus[chiplet];
if (!gru) {
- dev_err(grudev, "GRU: invalid interrupt: cpu %d, irq %d\n",
- raw_smp_processor_id(), irq);
+ dev_err(grudev, "GRU: invalid interrupt: cpu %d, chiplet %d\n",
+ raw_smp_processor_id(), chiplet);
return IRQ_NONE;
}
- get_clear_fault_map(gru, &map);
- gru_dbg(grudev, "irq %d, gru %x, map 0x%lx\n", irq, gru->gs_gid,
- map.fault_bits[0]);
+ get_clear_fault_map(gru, &imap, &dmap);
+ gru_dbg(grudev,
+ "cpu %d, chiplet %d, gid %d, imap %016lx %016lx, dmap %016lx %016lx\n",
+ smp_processor_id(), chiplet, gru->gs_gid,
+ imap.fault_bits[0], imap.fault_bits[1],
+ dmap.fault_bits[0], dmap.fault_bits[1]);
+
+ for_each_cbr_in_tfm(cbrnum, dmap.fault_bits) {
+ STAT(intr_cbr);
+ cmp = gru->gs_blade->bs_async_wq;
+ if (cmp)
+ complete(cmp);
+ gru_dbg(grudev, "gid %d, cbr_done %d, done %d\n",
+ gru->gs_gid, cbrnum, cmp ? cmp->done : -1);
+ }
- for_each_cbr_in_tfm(cbrnum, map.fault_bits) {
+ for_each_cbr_in_tfm(cbrnum, imap.fault_bits) {
+ STAT(intr_tfh);
tfh = get_tfh_by_index(gru, cbrnum);
prefetchw(tfh); /* Helps on hdw, required for emulator */
@@ -468,12 +572,20 @@ irqreturn_t gru_intr(int irq, void *dev_id)
ctxnum = tfh->ctxnum;
gts = gru->gs_gts[ctxnum];
+ /* Spurious interrupts can cause this. Ignore. */
+ if (!gts) {
+ STAT(intr_spurious);
+ continue;
+ }
+
/*
* This is running in interrupt context. Trylock the mmap_sem.
* If it fails, retry the fault in user context.
*/
- if (down_read_trylock(&gts->ts_mm->mmap_sem)) {
- gru_try_dropin(gts, tfh, NULL);
+ gts->ustats.fmm_tlbmiss++;
+ if (!gts->ts_force_cch_reload &&
+ down_read_trylock(&gts->ts_mm->mmap_sem)) {
+ gru_try_dropin(gru, gts, tfh, NULL);
up_read(&gts->ts_mm->mmap_sem);
} else {
tfh_user_polling_mode(tfh);
@@ -483,19 +595,43 @@ irqreturn_t gru_intr(int irq, void *dev_id)
return IRQ_HANDLED;
}
+irqreturn_t gru0_intr(int irq, void *dev_id)
+{
+ return gru_intr(0, uv_numa_blade_id());
+}
+
+irqreturn_t gru1_intr(int irq, void *dev_id)
+{
+ return gru_intr(1, uv_numa_blade_id());
+}
+
+irqreturn_t gru_intr_mblade(int irq, void *dev_id)
+{
+ int blade;
+
+ for_each_possible_blade(blade) {
+ if (uv_blade_nr_possible_cpus(blade))
+ continue;
+ gru_intr(0, blade);
+ gru_intr(1, blade);
+ }
+ return IRQ_HANDLED;
+}
+
static int gru_user_dropin(struct gru_thread_state *gts,
struct gru_tlb_fault_handle *tfh,
- unsigned long __user *cb)
+ void *cb)
{
struct gru_mm_struct *gms = gts->ts_gms;
int ret;
+ gts->ustats.upm_tlbmiss++;
while (1) {
wait_event(gms->ms_wait_queue,
atomic_read(&gms->ms_range_active) == 0);
prefetchw(tfh); /* Helps on hdw, required for emulator */
- ret = gru_try_dropin(gts, tfh, cb);
+ ret = gru_try_dropin(gts->ts_gru, gts, tfh, cb);
if (ret <= 0)
return ret;
STAT(call_os_wait_queue);
@@ -511,52 +647,41 @@ int gru_handle_user_call_os(unsigned long cb)
{
struct gru_tlb_fault_handle *tfh;
struct gru_thread_state *gts;
- unsigned long __user *cbp;
+ void *cbk;
int ucbnum, cbrnum, ret = -EINVAL;
STAT(call_os);
- gru_dbg(grudev, "address 0x%lx\n", cb);
/* sanity check the cb pointer */
ucbnum = get_cb_number((void *)cb);
if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB)
return -EINVAL;
- cbp = (unsigned long *)cb;
gts = gru_find_lock_gts(cb);
if (!gts)
return -EINVAL;
+ gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts);
if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE)
goto exit;
- /*
- * If force_unload is set, the UPM TLB fault is phony. The task
- * has migrated to another node and the GSEG must be moved. Just
- * unload the context. The task will page fault and assign a new
- * context.
- */
- if (gts->ts_tgid_owner == current->tgid && gts->ts_blade >= 0 &&
- gts->ts_blade != uv_numa_blade_id()) {
- STAT(call_os_offnode_reference);
- gts->ts_force_unload = 1;
- }
+ gru_check_context_placement(gts);
/*
* CCH may contain stale data if ts_force_cch_reload is set.
*/
if (gts->ts_gru && gts->ts_force_cch_reload) {
- gru_update_cch(gts, 0);
gts->ts_force_cch_reload = 0;
+ gru_update_cch(gts);
}
ret = -EAGAIN;
cbrnum = thread_cbr_number(gts, ucbnum);
- if (gts->ts_force_unload) {
- gru_unload_context(gts, 1);
- } else if (gts->ts_gru) {
+ if (gts->ts_gru) {
tfh = get_tfh_by_index(gts->ts_gru, cbrnum);
- ret = gru_user_dropin(gts, tfh, cbp);
+ cbk = get_gseg_base_address_cb(gts->ts_gru->gs_gru_base_vaddr,
+ gts->ts_ctxnum, ucbnum);
+ ret = gru_user_dropin(gts, tfh, cbk);
}
exit:
gru_unlock_gts(gts);
@@ -578,31 +703,38 @@ int gru_get_exception_detail(unsigned long arg)
if (copy_from_user(&excdet, (void __user *)arg, sizeof(excdet)))
return -EFAULT;
- gru_dbg(grudev, "address 0x%lx\n", excdet.cb);
gts = gru_find_lock_gts(excdet.cb);
if (!gts)
return -EINVAL;
+ gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", excdet.cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts);
ucbnum = get_cb_number((void *)excdet.cb);
if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) {
ret = -EINVAL;
} else if (gts->ts_gru) {
cbrnum = thread_cbr_number(gts, ucbnum);
cbe = get_cbe_by_index(gts->ts_gru, cbrnum);
- prefetchw(cbe);/* Harmless on hardware, required for emulator */
+ gru_flush_cache(cbe); /* CBE not coherent */
+ sync_core(); /* make sure we are have current data */
excdet.opc = cbe->opccpy;
excdet.exopc = cbe->exopccpy;
excdet.ecause = cbe->ecause;
excdet.exceptdet0 = cbe->idef1upd;
excdet.exceptdet1 = cbe->idef3upd;
+ excdet.cbrstate = cbe->cbrstate;
+ excdet.cbrexecstatus = cbe->cbrexecstatus;
+ gru_flush_cache_cbe(cbe);
ret = 0;
} else {
ret = -EAGAIN;
}
gru_unlock_gts(gts);
- gru_dbg(grudev, "address 0x%lx, ecause 0x%x\n", excdet.cb,
- excdet.ecause);
+ gru_dbg(grudev,
+ "cb 0x%lx, op %d, exopc %d, cbrstate %d, cbrexecstatus 0x%x, ecause 0x%x, "
+ "exdet0 0x%lx, exdet1 0x%x\n",
+ excdet.cb, excdet.opc, excdet.exopc, excdet.cbrstate, excdet.cbrexecstatus,
+ excdet.ecause, excdet.exceptdet0, excdet.exceptdet1);
if (!ret && copy_to_user((void __user *)arg, &excdet, sizeof(excdet)))
ret = -EFAULT;
return ret;
@@ -627,7 +759,7 @@ static int gru_unload_all_contexts(void)
if (gts && mutex_trylock(&gts->ts_ctxlock)) {
spin_unlock(&gru->gs_lock);
gru_unload_context(gts, 1);
- gru_unlock_gts(gts);
+ mutex_unlock(&gts->ts_ctxlock);
spin_lock(&gru->gs_lock);
}
}
@@ -669,6 +801,7 @@ int gru_user_flush_tlb(unsigned long arg)
{
struct gru_thread_state *gts;
struct gru_flush_tlb_req req;
+ struct gru_mm_struct *gms;
STAT(user_flush_tlb);
if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
@@ -681,8 +814,39 @@ int gru_user_flush_tlb(unsigned long arg)
if (!gts)
return -EINVAL;
- gru_flush_tlb_range(gts->ts_gms, req.vaddr, req.len);
+ gms = gts->ts_gms;
gru_unlock_gts(gts);
+ gru_flush_tlb_range(gms, req.vaddr, req.len);
+
+ return 0;
+}
+
+/*
+ * Fetch GSEG statisticss
+ */
+long gru_get_gseg_statistics(unsigned long arg)
+{
+ struct gru_thread_state *gts;
+ struct gru_get_gseg_statistics_req req;
+
+ if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+ return -EFAULT;
+
+ /*
+ * The library creates arrays of contexts for threaded programs.
+ * If no gts exists in the array, the context has never been used & all
+ * statistics are implicitly 0.
+ */
+ gts = gru_find_lock_gts(req.gseg);
+ if (gts) {
+ memcpy(&req.stats, &gts->ustats, sizeof(gts->ustats));
+ gru_unlock_gts(gts);
+ } else {
+ memset(&req.stats, 0, sizeof(gts->ustats));
+ }
+
+ if (copy_to_user((void __user *)arg, &req, sizeof(req)))
+ return -EFAULT;
return 0;
}
@@ -691,18 +855,49 @@ int gru_user_flush_tlb(unsigned long arg)
* Register the current task as the user of the GSEG slice.
* Needed for TLB fault interrupt targeting.
*/
-int gru_set_task_slice(long address)
+int gru_set_context_option(unsigned long arg)
{
struct gru_thread_state *gts;
+ struct gru_set_context_option_req req;
+ int ret = 0;
- STAT(set_task_slice);
- gru_dbg(grudev, "address 0x%lx\n", address);
- gts = gru_alloc_locked_gts(address);
- if (!gts)
- return -EINVAL;
+ STAT(set_context_option);
+ if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+ return -EFAULT;
+ gru_dbg(grudev, "op %d, gseg 0x%lx, value1 0x%lx\n", req.op, req.gseg, req.val1);
+
+ gts = gru_find_lock_gts(req.gseg);
+ if (!gts) {
+ gts = gru_alloc_locked_gts(req.gseg);
+ if (IS_ERR(gts))
+ return PTR_ERR(gts);
+ }
- gts->ts_tgid_owner = current->tgid;
+ switch (req.op) {
+ case sco_blade_chiplet:
+ /* Select blade/chiplet for GRU context */
+ if (req.val0 < -1 || req.val0 >= GRU_CHIPLETS_PER_HUB ||
+ req.val1 < -1 || req.val1 >= GRU_MAX_BLADES ||
+ (req.val1 >= 0 && !gru_base[req.val1])) {
+ ret = -EINVAL;
+ } else {
+ gts->ts_user_blade_id = req.val1;
+ gts->ts_user_chiplet_id = req.val0;
+ gru_check_context_placement(gts);
+ }
+ break;
+ case sco_gseg_owner:
+ /* Register the current task as the GSEG owner */
+ gts->ts_tgid_owner = current->tgid;
+ break;
+ case sco_cch_req_slice:
+ /* Set the CCH slice option */
+ gts->ts_cch_req_slice = req.val1 & 3;
+ break;
+ default:
+ ret = -EINVAL;
+ }
gru_unlock_gts(gts);
- return 0;
+ return ret;
}
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
index bbefe77c67a..104a05f6b73 100644
--- a/drivers/misc/sgi-gru/grufile.c
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -6,7 +6,7 @@
* This file supports the user system call for file open, close, mmap, etc.
* This also incudes the driver initialization code.
*
- * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) 2008-2014 Silicon Graphics, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -29,13 +29,15 @@
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/io.h>
-#include <linux/smp_lock.h>
#include <linux/spinlock.h>
#include <linux/device.h>
#include <linux/miscdevice.h>
#include <linux/interrupt.h>
#include <linux/proc_fs.h>
#include <linux/uaccess.h>
+#ifdef CONFIG_X86_64
+#include <asm/uv/uv_irq.h>
+#endif
#include <asm/uv/uv.h>
#include "gru.h"
#include "grulib.h"
@@ -46,6 +48,7 @@
struct gru_blade_state *gru_base[GRU_MAX_BLADES] __read_mostly;
unsigned long gru_start_paddr __read_mostly;
+void *gru_start_vaddr __read_mostly;
unsigned long gru_end_paddr __read_mostly;
unsigned int gru_max_gids __read_mostly;
struct gru_stats_s gru_stats;
@@ -53,9 +56,13 @@ struct gru_stats_s gru_stats;
/* Guaranteed user available resources on each node */
static int max_user_cbrs, max_user_dsr_bytes;
-static struct file_operations gru_fops;
static struct miscdevice gru_miscdev;
+static int gru_supported(void)
+{
+ return is_uv_system() &&
+ (uv_hub_info->hub_revision < UV3_HUB_REVISION_BASE);
+}
/*
* gru_vma_close
@@ -93,7 +100,7 @@ static void gru_vma_close(struct vm_area_struct *vma)
/*
* gru_file_mmap
*
- * Called when mmaping the device. Initializes the vma with a fault handler
+ * Called when mmapping the device. Initializes the vma with a fault handler
* and private data structure necessary to allocate, track, and free the
* underlying pages.
*/
@@ -106,9 +113,8 @@ static int gru_file_mmap(struct file *file, struct vm_area_struct *vma)
vma->vm_end & (GRU_GSEG_PAGESIZE - 1))
return -EINVAL;
- vma->vm_flags |=
- (VM_IO | VM_DONTCOPY | VM_LOCKED | VM_DONTEXPAND | VM_PFNMAP |
- VM_RESERVED);
+ vma->vm_flags |= VM_IO | VM_PFNMAP | VM_LOCKED |
+ VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
vma->vm_page_prot = PAGE_SHARED;
vma->vm_ops = &gru_vm_ops;
@@ -131,15 +137,12 @@ static int gru_create_new_context(unsigned long arg)
struct gru_vma_data *vdata;
int ret = -EINVAL;
-
if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
return -EFAULT;
- if (req.data_segment_bytes == 0 ||
- req.data_segment_bytes > max_user_dsr_bytes)
+ if (req.data_segment_bytes > max_user_dsr_bytes)
return -EINVAL;
- if (!req.control_blocks || !req.maximum_thread_count ||
- req.control_blocks > max_user_cbrs)
+ if (req.control_blocks > max_user_cbrs || !req.maximum_thread_count)
return -EINVAL;
if (!(req.options & GRU_OPT_MISS_MASK))
@@ -153,6 +156,7 @@ static int gru_create_new_context(unsigned long arg)
vdata->vd_dsr_au_count =
GRU_DS_BYTES_TO_AU(req.data_segment_bytes);
vdata->vd_cbr_au_count = GRU_CB_COUNT_TO_AU(req.control_blocks);
+ vdata->vd_tlb_preload_count = req.tlb_preload_count;
ret = 0;
}
up_write(&current->mm->mmap_sem);
@@ -173,6 +177,7 @@ static long gru_get_config_info(unsigned long arg)
nodesperblade = 2;
else
nodesperblade = 1;
+ memset(&info, 0, sizeof(info));
info.cpus = num_online_cpus();
info.nodes = num_online_nodes();
info.blades = info.nodes / nodesperblade;
@@ -184,41 +189,6 @@ static long gru_get_config_info(unsigned long arg)
}
/*
- * Get GRU chiplet status
- */
-static long gru_get_chiplet_status(unsigned long arg)
-{
- struct gru_state *gru;
- struct gru_chiplet_info info;
-
- if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
- return -EFAULT;
-
- if (info.node == -1)
- info.node = numa_node_id();
- if (info.node >= num_possible_nodes() ||
- info.chiplet >= GRU_CHIPLETS_PER_HUB ||
- info.node < 0 || info.chiplet < 0)
- return -EINVAL;
-
- info.blade = uv_node_to_blade_id(info.node);
- gru = get_gru(info.blade, info.chiplet);
-
- info.total_dsr_bytes = GRU_NUM_DSR_BYTES;
- info.total_cbr = GRU_NUM_CB;
- info.total_user_dsr_bytes = GRU_NUM_DSR_BYTES -
- gru->gs_reserved_dsr_bytes;
- info.total_user_cbr = GRU_NUM_CB - gru->gs_reserved_cbrs;
- info.free_user_dsr_bytes = hweight64(gru->gs_dsr_map) *
- GRU_DSR_AU_BYTES;
- info.free_user_cbr = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
-
- if (copy_to_user((void __user *)arg, &info, sizeof(info)))
- return -EFAULT;
- return 0;
-}
-
-/*
* gru_file_unlocked_ioctl
*
* Called to update file attributes via IOCTL calls.
@@ -228,14 +198,14 @@ static long gru_file_unlocked_ioctl(struct file *file, unsigned int req,
{
int err = -EBADRQC;
- gru_dbg(grudev, "file %p\n", file);
+ gru_dbg(grudev, "file %p, req 0x%x, 0x%lx\n", file, req, arg);
switch (req) {
case GRU_CREATE_CONTEXT:
err = gru_create_new_context(arg);
break;
- case GRU_SET_TASK_SLICE:
- err = gru_set_task_slice(arg);
+ case GRU_SET_CONTEXT_OPTION:
+ err = gru_set_context_option(arg);
break;
case GRU_USER_GET_EXCEPTION_DETAIL:
err = gru_get_exception_detail(arg);
@@ -243,18 +213,24 @@ static long gru_file_unlocked_ioctl(struct file *file, unsigned int req,
case GRU_USER_UNLOAD_CONTEXT:
err = gru_user_unload_context(arg);
break;
- case GRU_GET_CHIPLET_STATUS:
- err = gru_get_chiplet_status(arg);
- break;
case GRU_USER_FLUSH_TLB:
err = gru_user_flush_tlb(arg);
break;
case GRU_USER_CALL_OS:
err = gru_handle_user_call_os(arg);
break;
+ case GRU_GET_GSEG_STATISTICS:
+ err = gru_get_gseg_statistics(arg);
+ break;
+ case GRU_KTEST:
+ err = gru_ktest(arg);
+ break;
case GRU_GET_CONFIG_INFO:
err = gru_get_config_info(arg);
break;
+ case GRU_DUMP_CHIPLET_STATE:
+ err = gru_dump_chiplet_request(arg);
+ break;
}
return err;
}
@@ -264,25 +240,25 @@ static long gru_file_unlocked_ioctl(struct file *file, unsigned int req,
* system.
*/
static void gru_init_chiplet(struct gru_state *gru, unsigned long paddr,
- void *vaddr, int nid, int bid, int grunum)
+ void *vaddr, int blade_id, int chiplet_id)
{
spin_lock_init(&gru->gs_lock);
spin_lock_init(&gru->gs_asid_lock);
gru->gs_gru_base_paddr = paddr;
gru->gs_gru_base_vaddr = vaddr;
- gru->gs_gid = bid * GRU_CHIPLETS_PER_BLADE + grunum;
- gru->gs_blade = gru_base[bid];
- gru->gs_blade_id = bid;
+ gru->gs_gid = blade_id * GRU_CHIPLETS_PER_BLADE + chiplet_id;
+ gru->gs_blade = gru_base[blade_id];
+ gru->gs_blade_id = blade_id;
+ gru->gs_chiplet_id = chiplet_id;
gru->gs_cbr_map = (GRU_CBR_AU == 64) ? ~0 : (1UL << GRU_CBR_AU) - 1;
gru->gs_dsr_map = (1UL << GRU_DSR_AU) - 1;
gru->gs_asid_limit = MAX_ASID;
gru_tgh_flush_init(gru);
if (gru->gs_gid >= gru_max_gids)
gru_max_gids = gru->gs_gid + 1;
- gru_dbg(grudev, "bid %d, nid %d, gid %d, vaddr %p (0x%lx)\n",
- bid, nid, gru->gs_gid, gru->gs_gru_base_vaddr,
+ gru_dbg(grudev, "bid %d, gid %d, vaddr %p (0x%lx)\n",
+ blade_id, gru->gs_gid, gru->gs_gru_base_vaddr,
gru->gs_gru_base_paddr);
- gru_kservices_init(gru);
}
static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr)
@@ -297,11 +273,9 @@ static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr)
max_user_cbrs = GRU_NUM_CB;
max_user_dsr_bytes = GRU_NUM_DSR_BYTES;
- for_each_online_node(nid) {
- bid = uv_node_to_blade_id(nid);
- pnode = uv_node_to_pnode(nid);
- if (bid < 0 || gru_base[bid])
- continue;
+ for_each_possible_blade(bid) {
+ pnode = uv_blade_to_pnode(bid);
+ nid = uv_blade_to_memory_nid(bid);/* -1 if no memory on blade */
page = alloc_pages_node(nid, GFP_KERNEL, order);
if (!page)
goto fail;
@@ -309,6 +283,7 @@ static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr)
memset(gru_base[bid], 0, sizeof(struct gru_blade_state));
gru_base[bid]->bs_lru_gru = &gru_base[bid]->bs_grus[0];
spin_lock_init(&gru_base[bid]->bs_lock);
+ init_rwsem(&gru_base[bid]->bs_kgts_sema);
dsrbytes = 0;
cbrs = 0;
@@ -317,7 +292,7 @@ static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr)
chip++, gru++) {
paddr = gru_chiplet_paddr(gru_base_paddr, pnode, chip);
vaddr = gru_chiplet_vaddr(gru_base_vaddr, pnode, chip);
- gru_init_chiplet(gru, paddr, vaddr, nid, bid, chip);
+ gru_init_chiplet(gru, paddr, vaddr, bid, chip);
n = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
cbrs = max(cbrs, n);
n = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES;
@@ -330,39 +305,215 @@ static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr)
return 0;
fail:
- for (nid--; nid >= 0; nid--)
- free_pages((unsigned long)gru_base[nid], order);
+ for (bid--; bid >= 0; bid--)
+ free_pages((unsigned long)gru_base[bid], order);
return -ENOMEM;
}
-#ifdef CONFIG_IA64
+static void gru_free_tables(void)
+{
+ int bid;
+ int order = get_order(sizeof(struct gru_state) *
+ GRU_CHIPLETS_PER_BLADE);
+
+ for (bid = 0; bid < GRU_MAX_BLADES; bid++)
+ free_pages((unsigned long)gru_base[bid], order);
+}
-static int get_base_irq(void)
+static unsigned long gru_chiplet_cpu_to_mmr(int chiplet, int cpu, int *corep)
{
- return IRQ_GRU;
+ unsigned long mmr = 0;
+ int core;
+
+ /*
+ * We target the cores of a blade and not the hyperthreads themselves.
+ * There is a max of 8 cores per socket and 2 sockets per blade,
+ * making for a max total of 16 cores (i.e., 16 CPUs without
+ * hyperthreading and 32 CPUs with hyperthreading).
+ */
+ core = uv_cpu_core_number(cpu) + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu);
+ if (core >= GRU_NUM_TFM || uv_cpu_ht_number(cpu))
+ return 0;
+
+ if (chiplet == 0) {
+ mmr = UVH_GR0_TLB_INT0_CONFIG +
+ core * (UVH_GR0_TLB_INT1_CONFIG - UVH_GR0_TLB_INT0_CONFIG);
+ } else if (chiplet == 1) {
+ mmr = UVH_GR1_TLB_INT0_CONFIG +
+ core * (UVH_GR1_TLB_INT1_CONFIG - UVH_GR1_TLB_INT0_CONFIG);
+ } else {
+ BUG();
+ }
+
+ *corep = core;
+ return mmr;
}
-#elif defined CONFIG_X86_64
+#ifdef CONFIG_IA64
-static void noop(unsigned int irq)
+static int gru_irq_count[GRU_CHIPLETS_PER_BLADE];
+
+static void gru_noop(struct irq_data *d)
{
}
-static struct irq_chip gru_chip = {
- .name = "gru",
- .mask = noop,
- .unmask = noop,
- .ack = noop,
+static struct irq_chip gru_chip[GRU_CHIPLETS_PER_BLADE] = {
+ [0 ... GRU_CHIPLETS_PER_BLADE - 1] {
+ .irq_mask = gru_noop,
+ .irq_unmask = gru_noop,
+ .irq_ack = gru_noop
+ }
};
-static int get_base_irq(void)
+static int gru_chiplet_setup_tlb_irq(int chiplet, char *irq_name,
+ irq_handler_t irq_handler, int cpu, int blade)
+{
+ unsigned long mmr;
+ int irq = IRQ_GRU + chiplet;
+ int ret, core;
+
+ mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core);
+ if (mmr == 0)
+ return 0;
+
+ if (gru_irq_count[chiplet] == 0) {
+ gru_chip[chiplet].name = irq_name;
+ ret = irq_set_chip(irq, &gru_chip[chiplet]);
+ if (ret) {
+ printk(KERN_ERR "%s: set_irq_chip failed, errno=%d\n",
+ GRU_DRIVER_ID_STR, -ret);
+ return ret;
+ }
+
+ ret = request_irq(irq, irq_handler, 0, irq_name, NULL);
+ if (ret) {
+ printk(KERN_ERR "%s: request_irq failed, errno=%d\n",
+ GRU_DRIVER_ID_STR, -ret);
+ return ret;
+ }
+ }
+ gru_irq_count[chiplet]++;
+
+ return 0;
+}
+
+static void gru_chiplet_teardown_tlb_irq(int chiplet, int cpu, int blade)
+{
+ unsigned long mmr;
+ int core, irq = IRQ_GRU + chiplet;
+
+ if (gru_irq_count[chiplet] == 0)
+ return;
+
+ mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core);
+ if (mmr == 0)
+ return;
+
+ if (--gru_irq_count[chiplet] == 0)
+ free_irq(irq, NULL);
+}
+
+#elif defined CONFIG_X86_64
+
+static int gru_chiplet_setup_tlb_irq(int chiplet, char *irq_name,
+ irq_handler_t irq_handler, int cpu, int blade)
{
- set_irq_chip(IRQ_GRU, &gru_chip);
- set_irq_chip(IRQ_GRU + 1, &gru_chip);
- return IRQ_GRU;
+ unsigned long mmr;
+ int irq, core;
+ int ret;
+
+ mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core);
+ if (mmr == 0)
+ return 0;
+
+ irq = uv_setup_irq(irq_name, cpu, blade, mmr, UV_AFFINITY_CPU);
+ if (irq < 0) {
+ printk(KERN_ERR "%s: uv_setup_irq failed, errno=%d\n",
+ GRU_DRIVER_ID_STR, -irq);
+ return irq;
+ }
+
+ ret = request_irq(irq, irq_handler, 0, irq_name, NULL);
+ if (ret) {
+ uv_teardown_irq(irq);
+ printk(KERN_ERR "%s: request_irq failed, errno=%d\n",
+ GRU_DRIVER_ID_STR, -ret);
+ return ret;
+ }
+ gru_base[blade]->bs_grus[chiplet].gs_irq[core] = irq;
+ return 0;
}
+
+static void gru_chiplet_teardown_tlb_irq(int chiplet, int cpu, int blade)
+{
+ int irq, core;
+ unsigned long mmr;
+
+ mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core);
+ if (mmr) {
+ irq = gru_base[blade]->bs_grus[chiplet].gs_irq[core];
+ if (irq) {
+ free_irq(irq, NULL);
+ uv_teardown_irq(irq);
+ }
+ }
+}
+
#endif
+static void gru_teardown_tlb_irqs(void)
+{
+ int blade;
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ blade = uv_cpu_to_blade_id(cpu);
+ gru_chiplet_teardown_tlb_irq(0, cpu, blade);
+ gru_chiplet_teardown_tlb_irq(1, cpu, blade);
+ }
+ for_each_possible_blade(blade) {
+ if (uv_blade_nr_possible_cpus(blade))
+ continue;
+ gru_chiplet_teardown_tlb_irq(0, 0, blade);
+ gru_chiplet_teardown_tlb_irq(1, 0, blade);
+ }
+}
+
+static int gru_setup_tlb_irqs(void)
+{
+ int blade;
+ int cpu;
+ int ret;
+
+ for_each_online_cpu(cpu) {
+ blade = uv_cpu_to_blade_id(cpu);
+ ret = gru_chiplet_setup_tlb_irq(0, "GRU0_TLB", gru0_intr, cpu, blade);
+ if (ret != 0)
+ goto exit1;
+
+ ret = gru_chiplet_setup_tlb_irq(1, "GRU1_TLB", gru1_intr, cpu, blade);
+ if (ret != 0)
+ goto exit1;
+ }
+ for_each_possible_blade(blade) {
+ if (uv_blade_nr_possible_cpus(blade))
+ continue;
+ ret = gru_chiplet_setup_tlb_irq(0, "GRU0_TLB", gru_intr_mblade, 0, blade);
+ if (ret != 0)
+ goto exit1;
+
+ ret = gru_chiplet_setup_tlb_irq(1, "GRU1_TLB", gru_intr_mblade, 0, blade);
+ if (ret != 0)
+ goto exit1;
+ }
+
+ return 0;
+
+exit1:
+ gru_teardown_tlb_irqs();
+ return ret;
+}
+
/*
* gru_init
*
@@ -370,11 +521,9 @@ static int get_base_irq(void)
*/
static int __init gru_init(void)
{
- int ret, irq, chip;
- char id[10];
- void *gru_start_vaddr;
+ int ret;
- if (!is_uv_system())
+ if (!gru_supported())
return 0;
#if defined CONFIG_IA64
@@ -387,83 +536,63 @@ static int __init gru_init(void)
gru_end_paddr = gru_start_paddr + GRU_MAX_BLADES * GRU_SIZE;
printk(KERN_INFO "GRU space: 0x%lx - 0x%lx\n",
gru_start_paddr, gru_end_paddr);
- irq = get_base_irq();
- for (chip = 0; chip < GRU_CHIPLETS_PER_BLADE; chip++) {
- ret = request_irq(irq + chip, gru_intr, 0, id, NULL);
- /* TODO: fix irq handling on x86. For now ignore failure because
- * interrupts are not required & not yet fully supported */
- if (ret) {
- printk(KERN_WARNING
- "!!!WARNING: GRU ignoring request failure!!!\n");
- ret = 0;
- }
- if (ret) {
- printk(KERN_ERR "%s: request_irq failed\n",
- GRU_DRIVER_ID_STR);
- goto exit1;
- }
- }
-
ret = misc_register(&gru_miscdev);
if (ret) {
printk(KERN_ERR "%s: misc_register failed\n",
GRU_DRIVER_ID_STR);
- goto exit1;
+ goto exit0;
}
ret = gru_proc_init();
if (ret) {
printk(KERN_ERR "%s: proc init failed\n", GRU_DRIVER_ID_STR);
- goto exit2;
+ goto exit1;
}
ret = gru_init_tables(gru_start_paddr, gru_start_vaddr);
if (ret) {
printk(KERN_ERR "%s: init tables failed\n", GRU_DRIVER_ID_STR);
- goto exit3;
+ goto exit2;
}
+ ret = gru_setup_tlb_irqs();
+ if (ret != 0)
+ goto exit3;
+
+ gru_kservices_init();
+
printk(KERN_INFO "%s: v%s\n", GRU_DRIVER_ID_STR,
GRU_DRIVER_VERSION_STR);
return 0;
exit3:
- gru_proc_exit();
+ gru_free_tables();
exit2:
- misc_deregister(&gru_miscdev);
+ gru_proc_exit();
exit1:
- for (--chip; chip >= 0; chip--)
- free_irq(irq + chip, NULL);
+ misc_deregister(&gru_miscdev);
+exit0:
return ret;
}
static void __exit gru_exit(void)
{
- int i, bid, gid;
- int order = get_order(sizeof(struct gru_state) *
- GRU_CHIPLETS_PER_BLADE);
-
- if (!is_uv_system())
+ if (!gru_supported())
return;
- for (i = 0; i < GRU_CHIPLETS_PER_BLADE; i++)
- free_irq(IRQ_GRU + i, NULL);
-
- foreach_gid(gid)
- gru_kservices_exit(GID_TO_GRU(gid));
-
- for (bid = 0; bid < GRU_MAX_BLADES; bid++)
- free_pages((unsigned long)gru_base[bid], order);
-
+ gru_teardown_tlb_irqs();
+ gru_kservices_exit();
+ gru_free_tables();
misc_deregister(&gru_miscdev);
gru_proc_exit();
}
-static struct file_operations gru_fops = {
+static const struct file_operations gru_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = gru_file_unlocked_ioctl,
.mmap = gru_file_mmap,
+ .llseek = noop_llseek,
};
static struct miscdevice gru_miscdev = {
@@ -472,7 +601,7 @@ static struct miscdevice gru_miscdev = {
.fops = &gru_fops,
};
-struct vm_operations_struct gru_vm_ops = {
+const struct vm_operations_struct gru_vm_ops = {
.close = gru_vma_close,
.fault = gru_fault,
};
diff --git a/drivers/misc/sgi-gru/gruhandles.c b/drivers/misc/sgi-gru/gruhandles.c
index 9b7ccb32869..2f30badc6ff 100644
--- a/drivers/misc/sgi-gru/gruhandles.c
+++ b/drivers/misc/sgi-gru/gruhandles.c
@@ -27,9 +27,11 @@
#ifdef CONFIG_IA64
#include <asm/processor.h>
#define GRU_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10)
+#define CLKS2NSEC(c) ((c) *1000000000 / local_cpu_data->itc_freq)
#else
#include <asm/tsc.h>
#define GRU_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
+#define CLKS2NSEC(c) ((c) * 1000000 / tsc_khz)
#endif
/* Extract the status field from a kernel handle */
@@ -39,54 +41,73 @@ struct mcs_op_statistic mcs_op_statistics[mcsop_last];
static void update_mcs_stats(enum mcs_op op, unsigned long clks)
{
+ unsigned long nsec;
+
+ nsec = CLKS2NSEC(clks);
atomic_long_inc(&mcs_op_statistics[op].count);
- atomic_long_add(clks, &mcs_op_statistics[op].total);
- if (mcs_op_statistics[op].max < clks)
- mcs_op_statistics[op].max = clks;
+ atomic_long_add(nsec, &mcs_op_statistics[op].total);
+ if (mcs_op_statistics[op].max < nsec)
+ mcs_op_statistics[op].max = nsec;
}
static void start_instruction(void *h)
{
unsigned long *w0 = h;
- wmb(); /* setting CMD bit must be last */
- *w0 = *w0 | 1;
+ wmb(); /* setting CMD/STATUS bits must be last */
+ *w0 = *w0 | 0x20001;
gru_flush_cache(h);
}
+static void report_instruction_timeout(void *h)
+{
+ unsigned long goff = GSEGPOFF((unsigned long)h);
+ char *id = "???";
+
+ if (TYPE_IS(CCH, goff))
+ id = "CCH";
+ else if (TYPE_IS(TGH, goff))
+ id = "TGH";
+ else if (TYPE_IS(TFH, goff))
+ id = "TFH";
+
+ panic(KERN_ALERT "GRU %p (%s) is malfunctioning\n", h, id);
+}
+
static int wait_instruction_complete(void *h, enum mcs_op opc)
{
int status;
- cycles_t start_time = get_cycles();
+ unsigned long start_time = get_cycles();
while (1) {
cpu_relax();
status = GET_MSEG_HANDLE_STATUS(h);
if (status != CCHSTATUS_ACTIVE)
break;
- if (GRU_OPERATION_TIMEOUT < (get_cycles() - start_time))
- panic("GRU %p is malfunctioning\n", h);
+ if (GRU_OPERATION_TIMEOUT < (get_cycles() - start_time)) {
+ report_instruction_timeout(h);
+ start_time = get_cycles();
+ }
}
if (gru_options & OPT_STATS)
update_mcs_stats(opc, get_cycles() - start_time);
return status;
}
-int cch_allocate(struct gru_context_configuration_handle *cch,
- int asidval, int sizeavail, unsigned long cbrmap,
- unsigned long dsrmap)
+int cch_allocate(struct gru_context_configuration_handle *cch)
{
- int i;
+ int ret;
- for (i = 0; i < 8; i++) {
- cch->asid[i] = (asidval++);
- cch->sizeavail[i] = sizeavail;
- }
- cch->dsr_allocation_map = dsrmap;
- cch->cbr_allocation_map = cbrmap;
cch->opc = CCHOP_ALLOCATE;
start_instruction(cch);
- return wait_instruction_complete(cch, cchop_allocate);
+ ret = wait_instruction_complete(cch, cchop_allocate);
+
+ /*
+ * Stop speculation into the GSEG being mapped by the previous ALLOCATE.
+ * The GSEG memory does not exist until the ALLOCATE completes.
+ */
+ sync_core();
+ return ret;
}
int cch_start(struct gru_context_configuration_handle *cch)
@@ -105,9 +126,18 @@ int cch_interrupt(struct gru_context_configuration_handle *cch)
int cch_deallocate(struct gru_context_configuration_handle *cch)
{
+ int ret;
+
cch->opc = CCHOP_DEALLOCATE;
start_instruction(cch);
- return wait_instruction_complete(cch, cchop_deallocate);
+ ret = wait_instruction_complete(cch, cchop_deallocate);
+
+ /*
+ * Stop speculation into the GSEG being unmapped by the previous
+ * DEALLOCATE.
+ */
+ sync_core();
+ return ret;
}
int cch_interrupt_sync(struct gru_context_configuration_handle
@@ -135,17 +165,20 @@ int tgh_invalidate(struct gru_tlb_global_handle *tgh,
return wait_instruction_complete(tgh, tghop_invalidate);
}
-void tfh_write_only(struct gru_tlb_fault_handle *tfh,
- unsigned long pfn, unsigned long vaddr,
- int asid, int dirty, int pagesize)
+int tfh_write_only(struct gru_tlb_fault_handle *tfh,
+ unsigned long paddr, int gaa,
+ unsigned long vaddr, int asid, int dirty,
+ int pagesize)
{
tfh->fillasid = asid;
tfh->fillvaddr = vaddr;
- tfh->pfn = pfn;
+ tfh->pfn = paddr >> GRU_PADDR_SHIFT;
+ tfh->gaa = gaa;
tfh->dirty = dirty;
tfh->pagesize = pagesize;
tfh->opc = TFHOP_WRITE_ONLY;
start_instruction(tfh);
+ return wait_instruction_complete(tfh, tfhop_write_only);
}
void tfh_write_restart(struct gru_tlb_fault_handle *tfh,
diff --git a/drivers/misc/sgi-gru/gruhandles.h b/drivers/misc/sgi-gru/gruhandles.h
index 1ed74d7508c..3f998b924d8 100644
--- a/drivers/misc/sgi-gru/gruhandles.h
+++ b/drivers/misc/sgi-gru/gruhandles.h
@@ -39,7 +39,6 @@
#define GRU_NUM_CBE 128
#define GRU_NUM_TFH 128
#define GRU_NUM_CCH 16
-#define GRU_NUM_GSH 1
/* Maximum resource counts that can be reserved by user programs */
#define GRU_NUM_USER_CBR GRU_NUM_CBE
@@ -56,7 +55,6 @@
#define GRU_CBE_BASE (GRU_MCS_BASE + 0x10000)
#define GRU_TFH_BASE (GRU_MCS_BASE + 0x18000)
#define GRU_CCH_BASE (GRU_MCS_BASE + 0x20000)
-#define GRU_GSH_BASE (GRU_MCS_BASE + 0x30000)
/* User gseg constants */
#define GRU_GSEG_STRIDE (4 * 1024 * 1024)
@@ -93,6 +91,12 @@
/* Convert an arbitrary handle address to the beginning of the GRU segment */
#define GRUBASE(h) ((void *)((unsigned long)(h) & ~(GRU_SIZE - 1)))
+/* Test a valid handle address to determine the type */
+#define TYPE_IS(hn, h) ((h) >= GRU_##hn##_BASE && (h) < \
+ GRU_##hn##_BASE + GRU_NUM_##hn * GRU_HANDLE_STRIDE && \
+ (((h) & (GRU_HANDLE_STRIDE - 1)) == 0))
+
+
/* General addressing macros. */
static inline void *get_gseg_base_address(void *base, int ctxnum)
{
@@ -160,6 +164,16 @@ static inline void *gru_chiplet_vaddr(void *vaddr, int pnode, int chiplet)
return vaddr + GRU_SIZE * (2 * pnode + chiplet);
}
+static inline struct gru_control_block_extended *gru_tfh_to_cbe(
+ struct gru_tlb_fault_handle *tfh)
+{
+ unsigned long cbe;
+
+ cbe = (unsigned long)tfh - GRU_TFH_BASE + GRU_CBE_BASE;
+ return (struct gru_control_block_extended*)cbe;
+}
+
+
/*
@@ -238,6 +252,17 @@ enum gru_tgh_state {
TGHSTATE_RESTART_CTX,
};
+enum gru_tgh_cause {
+ TGHCAUSE_RR_ECC,
+ TGHCAUSE_TLB_ECC,
+ TGHCAUSE_LRU_ECC,
+ TGHCAUSE_PS_ECC,
+ TGHCAUSE_MUL_ERR,
+ TGHCAUSE_DATA_ERR,
+ TGHCAUSE_SW_FORCE
+};
+
+
/*
* TFH - TLB Global Handle
* Used for TLB dropins into the GRU TLB.
@@ -251,15 +276,15 @@ struct gru_tlb_fault_handle {
unsigned int fill1:9;
unsigned int status:2;
- unsigned int fill2:1;
- unsigned int color:1;
+ unsigned int fill2:2;
unsigned int state:3;
unsigned int fill3:1;
- unsigned int cause:7; /* DW 0 - high 32 */
+ unsigned int cause:6;
+ unsigned int cb_int:1;
unsigned int fill4:1;
- unsigned int indexway:12;
+ unsigned int indexway:12; /* DW 0 - high 32 */
unsigned int fill5:4;
unsigned int ctxnum:4;
@@ -442,6 +467,12 @@ struct gru_control_block_extended {
unsigned int cbrexecstatus:8;
};
+/* CBE fields for active BCOPY instructions */
+#define cbe_baddr0 idef1upd
+#define cbe_baddr1 idef3upd
+#define cbe_src_cl idef6cpy
+#define cbe_nelemcur idef5upd
+
enum gru_cbr_state {
CBRSTATE_INACTIVE,
CBRSTATE_IDLE,
@@ -457,21 +488,7 @@ enum gru_cbr_state {
CBRSTATE_BUSY_INTERRUPT,
};
-/* CBE cbrexecstatus bits */
-#define CBR_EXS_ABORT_OCC_BIT 0
-#define CBR_EXS_INT_OCC_BIT 1
-#define CBR_EXS_PENDING_BIT 2
-#define CBR_EXS_QUEUED_BIT 3
-#define CBR_EXS_TLBHW_BIT 4
-#define CBR_EXS_EXCEPTION_BIT 5
-
-#define CBR_EXS_ABORT_OCC (1 << CBR_EXS_ABORT_OCC_BIT)
-#define CBR_EXS_INT_OCC (1 << CBR_EXS_INT_OCC_BIT)
-#define CBR_EXS_PENDING (1 << CBR_EXS_PENDING_BIT)
-#define CBR_EXS_QUEUED (1 << CBR_EXS_QUEUED_BIT)
-#define CBR_EXS_TLBHW (1 << CBR_EXS_TLBHW_BIT)
-#define CBR_EXS_EXCEPTION (1 << CBR_EXS_EXCEPTION_BIT)
-
+/* CBE cbrexecstatus bits - defined in gru_instructions.h*/
/* CBE ecause bits - defined in gru_instructions.h */
/*
@@ -495,9 +512,7 @@ enum gru_cbr_state {
/* minimum TLB purge count to ensure a full purge */
#define GRUMAXINVAL 1024UL
-int cch_allocate(struct gru_context_configuration_handle *cch,
- int asidval, int sizeavail, unsigned long cbrmap, unsigned long dsrmap);
-
+int cch_allocate(struct gru_context_configuration_handle *cch);
int cch_start(struct gru_context_configuration_handle *cch);
int cch_interrupt(struct gru_context_configuration_handle *cch);
int cch_deallocate(struct gru_context_configuration_handle *cch);
@@ -505,8 +520,8 @@ int cch_interrupt_sync(struct gru_context_configuration_handle *cch);
int tgh_invalidate(struct gru_tlb_global_handle *tgh, unsigned long vaddr,
unsigned long vaddrmask, int asid, int pagesize, int global, int n,
unsigned short ctxbitmap);
-void tfh_write_only(struct gru_tlb_fault_handle *tfh, unsigned long pfn,
- unsigned long vaddr, int asid, int dirty, int pagesize);
+int tfh_write_only(struct gru_tlb_fault_handle *tfh, unsigned long paddr,
+ int gaa, unsigned long vaddr, int asid, int dirty, int pagesize);
void tfh_write_restart(struct gru_tlb_fault_handle *tfh, unsigned long paddr,
int gaa, unsigned long vaddr, int asid, int dirty, int pagesize);
void tfh_restart(struct gru_tlb_fault_handle *tfh);
diff --git a/drivers/misc/sgi-gru/grukdump.c b/drivers/misc/sgi-gru/grukdump.c
new file mode 100644
index 00000000000..a3700a56b8f
--- /dev/null
+++ b/drivers/misc/sgi-gru/grukdump.c
@@ -0,0 +1,234 @@
+/*
+ * SN Platform GRU Driver
+ *
+ * Dump GRU State
+ *
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/delay.h>
+#include <linux/bitops.h>
+#include <asm/uv/uv_hub.h>
+#include "gru.h"
+#include "grutables.h"
+#include "gruhandles.h"
+#include "grulib.h"
+
+#define CCH_LOCK_ATTEMPTS 10
+
+static int gru_user_copy_handle(void __user **dp, void *s)
+{
+ if (copy_to_user(*dp, s, GRU_HANDLE_BYTES))
+ return -1;
+ *dp += GRU_HANDLE_BYTES;
+ return 0;
+}
+
+static int gru_dump_context_data(void *grubase,
+ struct gru_context_configuration_handle *cch,
+ void __user *ubuf, int ctxnum, int dsrcnt,
+ int flush_cbrs)
+{
+ void *cb, *cbe, *tfh, *gseg;
+ int i, scr;
+
+ gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
+ cb = gseg + GRU_CB_BASE;
+ cbe = grubase + GRU_CBE_BASE;
+ tfh = grubase + GRU_TFH_BASE;
+
+ for_each_cbr_in_allocation_map(i, &cch->cbr_allocation_map, scr) {
+ if (flush_cbrs)
+ gru_flush_cache(cb);
+ if (gru_user_copy_handle(&ubuf, cb))
+ goto fail;
+ if (gru_user_copy_handle(&ubuf, tfh + i * GRU_HANDLE_STRIDE))
+ goto fail;
+ if (gru_user_copy_handle(&ubuf, cbe + i * GRU_HANDLE_STRIDE))
+ goto fail;
+ cb += GRU_HANDLE_STRIDE;
+ }
+ if (dsrcnt)
+ memcpy(ubuf, gseg + GRU_DS_BASE, dsrcnt * GRU_HANDLE_STRIDE);
+ return 0;
+
+fail:
+ return -EFAULT;
+}
+
+static int gru_dump_tfm(struct gru_state *gru,
+ void __user *ubuf, void __user *ubufend)
+{
+ struct gru_tlb_fault_map *tfm;
+ int i, ret, bytes;
+
+ bytes = GRU_NUM_TFM * GRU_CACHE_LINE_BYTES;
+ if (bytes > ubufend - ubuf)
+ ret = -EFBIG;
+
+ for (i = 0; i < GRU_NUM_TFM; i++) {
+ tfm = get_tfm(gru->gs_gru_base_vaddr, i);
+ if (gru_user_copy_handle(&ubuf, tfm))
+ goto fail;
+ }
+ return GRU_NUM_TFM * GRU_CACHE_LINE_BYTES;
+
+fail:
+ return -EFAULT;
+}
+
+static int gru_dump_tgh(struct gru_state *gru,
+ void __user *ubuf, void __user *ubufend)
+{
+ struct gru_tlb_global_handle *tgh;
+ int i, ret, bytes;
+
+ bytes = GRU_NUM_TGH * GRU_CACHE_LINE_BYTES;
+ if (bytes > ubufend - ubuf)
+ ret = -EFBIG;
+
+ for (i = 0; i < GRU_NUM_TGH; i++) {
+ tgh = get_tgh(gru->gs_gru_base_vaddr, i);
+ if (gru_user_copy_handle(&ubuf, tgh))
+ goto fail;
+ }
+ return GRU_NUM_TGH * GRU_CACHE_LINE_BYTES;
+
+fail:
+ return -EFAULT;
+}
+
+static int gru_dump_context(struct gru_state *gru, int ctxnum,
+ void __user *ubuf, void __user *ubufend, char data_opt,
+ char lock_cch, char flush_cbrs)
+{
+ struct gru_dump_context_header hdr;
+ struct gru_dump_context_header __user *uhdr = ubuf;
+ struct gru_context_configuration_handle *cch, *ubufcch;
+ struct gru_thread_state *gts;
+ int try, cch_locked, cbrcnt = 0, dsrcnt = 0, bytes = 0, ret = 0;
+ void *grubase;
+
+ memset(&hdr, 0, sizeof(hdr));
+ grubase = gru->gs_gru_base_vaddr;
+ cch = get_cch(grubase, ctxnum);
+ for (try = 0; try < CCH_LOCK_ATTEMPTS; try++) {
+ cch_locked = trylock_cch_handle(cch);
+ if (cch_locked)
+ break;
+ msleep(1);
+ }
+
+ ubuf += sizeof(hdr);
+ ubufcch = ubuf;
+ if (gru_user_copy_handle(&ubuf, cch)) {
+ if (cch_locked)
+ unlock_cch_handle(cch);
+ return -EFAULT;
+ }
+ if (cch_locked)
+ ubufcch->delresp = 0;
+ bytes = sizeof(hdr) + GRU_CACHE_LINE_BYTES;
+
+ if (cch_locked || !lock_cch) {
+ gts = gru->gs_gts[ctxnum];
+ if (gts && gts->ts_vma) {
+ hdr.pid = gts->ts_tgid_owner;
+ hdr.vaddr = gts->ts_vma->vm_start;
+ }
+ if (cch->state != CCHSTATE_INACTIVE) {
+ cbrcnt = hweight64(cch->cbr_allocation_map) *
+ GRU_CBR_AU_SIZE;
+ dsrcnt = data_opt ? hweight32(cch->dsr_allocation_map) *
+ GRU_DSR_AU_CL : 0;
+ }
+ bytes += (3 * cbrcnt + dsrcnt) * GRU_CACHE_LINE_BYTES;
+ if (bytes > ubufend - ubuf)
+ ret = -EFBIG;
+ else
+ ret = gru_dump_context_data(grubase, cch, ubuf, ctxnum,
+ dsrcnt, flush_cbrs);
+ }
+ if (cch_locked)
+ unlock_cch_handle(cch);
+ if (ret)
+ return ret;
+
+ hdr.magic = GRU_DUMP_MAGIC;
+ hdr.gid = gru->gs_gid;
+ hdr.ctxnum = ctxnum;
+ hdr.cbrcnt = cbrcnt;
+ hdr.dsrcnt = dsrcnt;
+ hdr.cch_locked = cch_locked;
+ if (copy_to_user(uhdr, &hdr, sizeof(hdr)))
+ return -EFAULT;
+
+ return bytes;
+}
+
+int gru_dump_chiplet_request(unsigned long arg)
+{
+ struct gru_state *gru;
+ struct gru_dump_chiplet_state_req req;
+ void __user *ubuf;
+ void __user *ubufend;
+ int ctxnum, ret, cnt = 0;
+
+ if (copy_from_user(&req, (void __user *)arg, sizeof(req)))
+ return -EFAULT;
+
+ /* Currently, only dump by gid is implemented */
+ if (req.gid >= gru_max_gids || req.gid < 0)
+ return -EINVAL;
+
+ gru = GID_TO_GRU(req.gid);
+ ubuf = req.buf;
+ ubufend = req.buf + req.buflen;
+
+ ret = gru_dump_tfm(gru, ubuf, ubufend);
+ if (ret < 0)
+ goto fail;
+ ubuf += ret;
+
+ ret = gru_dump_tgh(gru, ubuf, ubufend);
+ if (ret < 0)
+ goto fail;
+ ubuf += ret;
+
+ for (ctxnum = 0; ctxnum < GRU_NUM_CCH; ctxnum++) {
+ if (req.ctxnum == ctxnum || req.ctxnum < 0) {
+ ret = gru_dump_context(gru, ctxnum, ubuf, ubufend,
+ req.data_opt, req.lock_cch,
+ req.flush_cbrs);
+ if (ret < 0)
+ goto fail;
+ ubuf += ret;
+ cnt++;
+ }
+ }
+
+ if (copy_to_user((void __user *)arg, &req, sizeof(req)))
+ return -EFAULT;
+ return cnt;
+
+fail:
+ return ret;
+}
diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c
index d8bd7d84a7c..913de07e577 100644
--- a/drivers/misc/sgi-gru/grukservices.c
+++ b/drivers/misc/sgi-gru/grukservices.c
@@ -24,13 +24,15 @@
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/mm.h>
-#include <linux/smp_lock.h>
#include <linux/spinlock.h>
#include <linux/device.h>
#include <linux/miscdevice.h>
#include <linux/proc_fs.h>
#include <linux/interrupt.h>
#include <linux/uaccess.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <asm/io_apic.h>
#include "gru.h"
#include "grulib.h"
#include "grutables.h"
@@ -45,18 +47,63 @@
* resources. This will likely be replaced when we better understand the
* kernel/user requirements.
*
- * At boot time, the kernel permanently reserves a fixed number of
- * CBRs/DSRs for each cpu to use. The resources are all taken from
- * the GRU chiplet 1 on the blade. This leaves the full set of resources
- * of chiplet 0 available to be allocated to a single user.
+ * Blade percpu resources reserved for kernel use. These resources are
+ * reserved whenever the the kernel context for the blade is loaded. Note
+ * that the kernel context is not guaranteed to be always available. It is
+ * loaded on demand & can be stolen by a user if the user demand exceeds the
+ * kernel demand. The kernel can always reload the kernel context but
+ * a SLEEP may be required!!!.
+ *
+ * Async Overview:
+ *
+ * Each blade has one "kernel context" that owns GRU kernel resources
+ * located on the blade. Kernel drivers use GRU resources in this context
+ * for sending messages, zeroing memory, etc.
+ *
+ * The kernel context is dynamically loaded on demand. If it is not in
+ * use by the kernel, the kernel context can be unloaded & given to a user.
+ * The kernel context will be reloaded when needed. This may require that
+ * a context be stolen from a user.
+ * NOTE: frequent unloading/reloading of the kernel context is
+ * expensive. We are depending on batch schedulers, cpusets, sane
+ * drivers or some other mechanism to prevent the need for frequent
+ * stealing/reloading.
+ *
+ * The kernel context consists of two parts:
+ * - 1 CB & a few DSRs that are reserved for each cpu on the blade.
+ * Each cpu has it's own private resources & does not share them
+ * with other cpus. These resources are used serially, ie,
+ * locked, used & unlocked on each call to a function in
+ * grukservices.
+ * (Now that we have dynamic loading of kernel contexts, I
+ * may rethink this & allow sharing between cpus....)
+ *
+ * - Additional resources can be reserved long term & used directly
+ * by UV drivers located in the kernel. Drivers using these GRU
+ * resources can use asynchronous GRU instructions that send
+ * interrupts on completion.
+ * - these resources must be explicitly locked/unlocked
+ * - locked resources prevent (obviously) the kernel
+ * context from being unloaded.
+ * - drivers using these resource directly issue their own
+ * GRU instruction and must wait/check completion.
+ *
+ * When these resources are reserved, the caller can optionally
+ * associate a wait_queue with the resources and use asynchronous
+ * GRU instructions. When an async GRU instruction completes, the
+ * driver will do a wakeup on the event.
+ *
*/
-/* Blade percpu resources PERMANENTLY reserved for kernel use */
+
+#define ASYNC_HAN_TO_BID(h) ((h) - 1)
+#define ASYNC_BID_TO_HAN(b) ((b) + 1)
+#define ASYNC_HAN_TO_BS(h) gru_base[ASYNC_HAN_TO_BID(h)]
+
#define GRU_NUM_KERNEL_CBR 1
#define GRU_NUM_KERNEL_DSR_BYTES 256
#define GRU_NUM_KERNEL_DSR_CL (GRU_NUM_KERNEL_DSR_BYTES / \
GRU_CACHE_LINE_BYTES)
-#define KERNEL_CTXNUM 15
/* GRU instruction attributes for all instructions */
#define IMA IMA_CB_DELAY
@@ -98,6 +145,120 @@ struct message_header {
#define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h]))
+/*
+ * Reload the blade's kernel context into a GRU chiplet. Called holding
+ * the bs_kgts_sema for READ. Will steal user contexts if necessary.
+ */
+static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id)
+{
+ struct gru_state *gru;
+ struct gru_thread_state *kgts;
+ void *vaddr;
+ int ctxnum, ncpus;
+
+ up_read(&bs->bs_kgts_sema);
+ down_write(&bs->bs_kgts_sema);
+
+ if (!bs->bs_kgts) {
+ bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0);
+ bs->bs_kgts->ts_user_blade_id = blade_id;
+ }
+ kgts = bs->bs_kgts;
+
+ if (!kgts->ts_gru) {
+ STAT(load_kernel_context);
+ ncpus = uv_blade_nr_possible_cpus(blade_id);
+ kgts->ts_cbr_au_count = GRU_CB_COUNT_TO_AU(
+ GRU_NUM_KERNEL_CBR * ncpus + bs->bs_async_cbrs);
+ kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU(
+ GRU_NUM_KERNEL_DSR_BYTES * ncpus +
+ bs->bs_async_dsr_bytes);
+ while (!gru_assign_gru_context(kgts)) {
+ msleep(1);
+ gru_steal_context(kgts);
+ }
+ gru_load_context(kgts);
+ gru = bs->bs_kgts->ts_gru;
+ vaddr = gru->gs_gru_base_vaddr;
+ ctxnum = kgts->ts_ctxnum;
+ bs->kernel_cb = get_gseg_base_address_cb(vaddr, ctxnum, 0);
+ bs->kernel_dsr = get_gseg_base_address_ds(vaddr, ctxnum, 0);
+ }
+ downgrade_write(&bs->bs_kgts_sema);
+}
+
+/*
+ * Free all kernel contexts that are not currently in use.
+ * Returns 0 if all freed, else number of inuse context.
+ */
+static int gru_free_kernel_contexts(void)
+{
+ struct gru_blade_state *bs;
+ struct gru_thread_state *kgts;
+ int bid, ret = 0;
+
+ for (bid = 0; bid < GRU_MAX_BLADES; bid++) {
+ bs = gru_base[bid];
+ if (!bs)
+ continue;
+
+ /* Ignore busy contexts. Don't want to block here. */
+ if (down_write_trylock(&bs->bs_kgts_sema)) {
+ kgts = bs->bs_kgts;
+ if (kgts && kgts->ts_gru)
+ gru_unload_context(kgts, 0);
+ bs->bs_kgts = NULL;
+ up_write(&bs->bs_kgts_sema);
+ kfree(kgts);
+ } else {
+ ret++;
+ }
+ }
+ return ret;
+}
+
+/*
+ * Lock & load the kernel context for the specified blade.
+ */
+static struct gru_blade_state *gru_lock_kernel_context(int blade_id)
+{
+ struct gru_blade_state *bs;
+ int bid;
+
+ STAT(lock_kernel_context);
+again:
+ bid = blade_id < 0 ? uv_numa_blade_id() : blade_id;
+ bs = gru_base[bid];
+
+ /* Handle the case where migration occurred while waiting for the sema */
+ down_read(&bs->bs_kgts_sema);
+ if (blade_id < 0 && bid != uv_numa_blade_id()) {
+ up_read(&bs->bs_kgts_sema);
+ goto again;
+ }
+ if (!bs->bs_kgts || !bs->bs_kgts->ts_gru)
+ gru_load_kernel_context(bs, bid);
+ return bs;
+
+}
+
+/*
+ * Unlock the kernel context for the specified blade. Context is not
+ * unloaded but may be stolen before next use.
+ */
+static void gru_unlock_kernel_context(int blade_id)
+{
+ struct gru_blade_state *bs;
+
+ bs = gru_base[blade_id];
+ up_read(&bs->bs_kgts_sema);
+ STAT(unlock_kernel_context);
+}
+
+/*
+ * Reserve & get pointers to the DSR/CBRs reserved for the current cpu.
+ * - returns with preemption disabled
+ */
static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr)
{
struct gru_blade_state *bs;
@@ -105,30 +266,166 @@ static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr)
BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES);
preempt_disable();
- bs = gru_base[uv_numa_blade_id()];
+ bs = gru_lock_kernel_context(-1);
lcpu = uv_blade_processor_id();
*cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE;
*dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES;
return 0;
}
+/*
+ * Free the current cpus reserved DSR/CBR resources.
+ */
static void gru_free_cpu_resources(void *cb, void *dsr)
{
+ gru_unlock_kernel_context(uv_numa_blade_id());
preempt_enable();
}
+/*
+ * Reserve GRU resources to be used asynchronously.
+ * Note: currently supports only 1 reservation per blade.
+ *
+ * input:
+ * blade_id - blade on which resources should be reserved
+ * cbrs - number of CBRs
+ * dsr_bytes - number of DSR bytes needed
+ * output:
+ * handle to identify resource
+ * (0 = async resources already reserved)
+ */
+unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes,
+ struct completion *cmp)
+{
+ struct gru_blade_state *bs;
+ struct gru_thread_state *kgts;
+ int ret = 0;
+
+ bs = gru_base[blade_id];
+
+ down_write(&bs->bs_kgts_sema);
+
+ /* Verify no resources already reserved */
+ if (bs->bs_async_dsr_bytes + bs->bs_async_cbrs)
+ goto done;
+ bs->bs_async_dsr_bytes = dsr_bytes;
+ bs->bs_async_cbrs = cbrs;
+ bs->bs_async_wq = cmp;
+ kgts = bs->bs_kgts;
+
+ /* Resources changed. Unload context if already loaded */
+ if (kgts && kgts->ts_gru)
+ gru_unload_context(kgts, 0);
+ ret = ASYNC_BID_TO_HAN(blade_id);
+
+done:
+ up_write(&bs->bs_kgts_sema);
+ return ret;
+}
+
+/*
+ * Release async resources previously reserved.
+ *
+ * input:
+ * han - handle to identify resources
+ */
+void gru_release_async_resources(unsigned long han)
+{
+ struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
+
+ down_write(&bs->bs_kgts_sema);
+ bs->bs_async_dsr_bytes = 0;
+ bs->bs_async_cbrs = 0;
+ bs->bs_async_wq = NULL;
+ up_write(&bs->bs_kgts_sema);
+}
+
+/*
+ * Wait for async GRU instructions to complete.
+ *
+ * input:
+ * han - handle to identify resources
+ */
+void gru_wait_async_cbr(unsigned long han)
+{
+ struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
+
+ wait_for_completion(bs->bs_async_wq);
+ mb();
+}
+
+/*
+ * Lock previous reserved async GRU resources
+ *
+ * input:
+ * han - handle to identify resources
+ * output:
+ * cb - pointer to first CBR
+ * dsr - pointer to first DSR
+ */
+void gru_lock_async_resource(unsigned long han, void **cb, void **dsr)
+{
+ struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han);
+ int blade_id = ASYNC_HAN_TO_BID(han);
+ int ncpus;
+
+ gru_lock_kernel_context(blade_id);
+ ncpus = uv_blade_nr_possible_cpus(blade_id);
+ if (cb)
+ *cb = bs->kernel_cb + ncpus * GRU_HANDLE_STRIDE;
+ if (dsr)
+ *dsr = bs->kernel_dsr + ncpus * GRU_NUM_KERNEL_DSR_BYTES;
+}
+
+/*
+ * Unlock previous reserved async GRU resources
+ *
+ * input:
+ * han - handle to identify resources
+ */
+void gru_unlock_async_resource(unsigned long han)
+{
+ int blade_id = ASYNC_HAN_TO_BID(han);
+
+ gru_unlock_kernel_context(blade_id);
+}
+
+/*----------------------------------------------------------------------*/
int gru_get_cb_exception_detail(void *cb,
struct control_block_extended_exc_detail *excdet)
{
struct gru_control_block_extended *cbe;
+ struct gru_thread_state *kgts = NULL;
+ unsigned long off;
+ int cbrnum, bid;
- cbe = get_cbe(GRUBASE(cb), get_cb_number(cb));
- prefetchw(cbe); /* Harmless on hardware, required for emulator */
+ /*
+ * Locate kgts for cb. This algorithm is SLOW but
+ * this function is rarely called (ie., almost never).
+ * Performance does not matter.
+ */
+ for_each_possible_blade(bid) {
+ if (!gru_base[bid])
+ break;
+ kgts = gru_base[bid]->bs_kgts;
+ if (!kgts || !kgts->ts_gru)
+ continue;
+ off = cb - kgts->ts_gru->gs_gru_base_vaddr;
+ if (off < GRU_SIZE)
+ break;
+ kgts = NULL;
+ }
+ BUG_ON(!kgts);
+ cbrnum = thread_cbr_number(kgts, get_cb_number(cb));
+ cbe = get_cbe(GRUBASE(cb), cbrnum);
+ gru_flush_cache(cbe); /* CBE not coherent */
+ sync_core();
excdet->opc = cbe->opccpy;
excdet->exopc = cbe->exopccpy;
excdet->ecause = cbe->ecause;
excdet->exceptdet0 = cbe->idef1upd;
excdet->exceptdet1 = cbe->idef3upd;
+ gru_flush_cache(cbe);
return 0;
}
@@ -141,8 +438,8 @@ char *gru_get_cb_exception_detail_str(int ret, void *cb,
if (ret > 0 && gen->istatus == CBS_EXCEPTION) {
gru_get_cb_exception_detail(cb, &excdet);
snprintf(buf, size,
- "GRU exception: cb %p, opc %d, exopc %d, ecause 0x%x,"
- "excdet0 0x%lx, excdet1 0x%x",
+ "GRU:%d exception: cb %p, opc %d, exopc %d, ecause 0x%x,"
+ "excdet0 0x%lx, excdet1 0x%x", smp_processor_id(),
gen, excdet.opc, excdet.exopc, excdet.ecause,
excdet.exceptdet0, excdet.exceptdet1);
} else {
@@ -167,13 +464,13 @@ static int gru_retry_exception(void *cb)
int retry = EXCEPTION_RETRY_LIMIT;
while (1) {
- if (gru_get_cb_message_queue_substatus(cb))
- break;
if (gru_wait_idle_or_exception(gen) == CBS_IDLE)
return CBS_IDLE;
-
+ if (gru_get_cb_message_queue_substatus(cb))
+ return CBS_EXCEPTION;
gru_get_cb_exception_detail(cb, &excdet);
- if (excdet.ecause & ~EXCEPTION_RETRY_BITS)
+ if ((excdet.ecause & ~EXCEPTION_RETRY_BITS) ||
+ (excdet.cbrexecstatus & CBR_EXS_ABORT_OCC))
break;
if (retry-- == 0)
break;
@@ -189,9 +486,10 @@ int gru_check_status_proc(void *cb)
int ret;
ret = gen->istatus;
- if (ret != CBS_EXCEPTION)
- return ret;
- return gru_retry_exception(cb);
+ if (ret == CBS_EXCEPTION)
+ ret = gru_retry_exception(cb);
+ rmb();
+ return ret;
}
@@ -203,7 +501,7 @@ int gru_wait_proc(void *cb)
ret = gru_wait_idle_or_exception(gen);
if (ret == CBS_EXCEPTION)
ret = gru_retry_exception(cb);
-
+ rmb();
return ret;
}
@@ -270,7 +568,7 @@ int gru_create_message_queue(struct gru_message_queue_desc *mqd,
mqd->mq = mq;
mqd->mq_gpa = uv_gpa(mq);
mqd->qlines = qlines;
- mqd->interrupt_pnode = UV_NASID_TO_PNODE(nasid);
+ mqd->interrupt_pnode = nasid >> 1;
mqd->interrupt_vector = vector;
mqd->interrupt_apicid = apicid;
return 0;
@@ -330,6 +628,8 @@ static int send_noop_message(void *cb, struct gru_message_queue_desc *mqd,
ret = MQE_UNEXPECTED_CB_ERR;
break;
case CBSS_PAGE_OVERFLOW:
+ STAT(mesq_noop_page_overflow);
+ /* fallthru */
default:
BUG();
}
@@ -405,17 +705,48 @@ cberr:
}
/*
- * Send a cross-partition interrupt to the SSI that contains the target
- * message queue. Normally, the interrupt is automatically delivered by hardware
- * but some error conditions require explicit delivery.
+ * Handle a PUT failure. Note: if message was a 2-line message, one of the
+ * lines might have successfully have been written. Before sending the
+ * message, "present" must be cleared in BOTH lines to prevent the receiver
+ * from prematurely seeing the full message.
*/
-static void send_message_queue_interrupt(struct gru_message_queue_desc *mqd)
+static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd,
+ void *mesg, int lines)
{
- if (mqd->interrupt_vector)
- uv_hub_send_ipi(mqd->interrupt_pnode, mqd->interrupt_apicid,
- mqd->interrupt_vector);
-}
+ unsigned long m, *val = mesg, gpa, save;
+ int ret;
+
+ m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6);
+ if (lines == 2) {
+ gru_vset(cb, m, 0, XTYPE_CL, lines, 1, IMA);
+ if (gru_wait(cb) != CBS_IDLE)
+ return MQE_UNEXPECTED_CB_ERR;
+ }
+ gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA);
+ if (gru_wait(cb) != CBS_IDLE)
+ return MQE_UNEXPECTED_CB_ERR;
+
+ if (!mqd->interrupt_vector)
+ return MQE_OK;
+ /*
+ * Send a cross-partition interrupt to the SSI that contains the target
+ * message queue. Normally, the interrupt is automatically delivered by
+ * hardware but some error conditions require explicit delivery.
+ * Use the GRU to deliver the interrupt. Otherwise partition failures
+ * could cause unrecovered errors.
+ */
+ gpa = uv_global_gru_mmr_address(mqd->interrupt_pnode, UVH_IPI_INT);
+ save = *val;
+ *val = uv_hub_ipi_value(mqd->interrupt_apicid, mqd->interrupt_vector,
+ dest_Fixed);
+ gru_vstore_phys(cb, gpa, gru_get_tri(mesg), IAA_REGISTER, IMA);
+ ret = gru_wait(cb);
+ *val = save;
+ if (ret != CBS_IDLE)
+ return MQE_UNEXPECTED_CB_ERR;
+ return MQE_OK;
+}
/*
* Handle a gru_mesq failure. Some of these failures are software recoverable
@@ -425,7 +756,6 @@ static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd,
void *mesg, int lines)
{
int substatus, ret = 0;
- unsigned long m;
substatus = gru_get_cb_message_queue_substatus(cb);
switch (substatus) {
@@ -447,15 +777,11 @@ static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd,
break;
case CBSS_PUT_NACKED:
STAT(mesq_send_put_nacked);
- m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6);
- gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA);
- if (gru_wait(cb) == CBS_IDLE) {
- ret = MQE_OK;
- send_message_queue_interrupt(mqd);
- } else {
- ret = MQE_UNEXPECTED_CB_ERR;
- }
+ ret = send_message_put_nacked(cb, mqd, mesg, lines);
break;
+ case CBSS_PAGE_OVERFLOW:
+ STAT(mesq_page_overflow);
+ /* fallthru */
default:
BUG();
}
@@ -548,7 +874,6 @@ void *gru_get_next_message(struct gru_message_queue_desc *mqd)
int present = mhdr->present;
/* skip NOOP messages */
- STAT(mesq_receive);
while (present == MQS_NOOP) {
gru_free_message(mqd, mhdr);
mhdr = mq->next;
@@ -568,6 +893,7 @@ void *gru_get_next_message(struct gru_message_queue_desc *mqd)
if (mhdr->lines == 2)
restore_present2(mhdr, mhdr->present2);
+ STAT(mesq_receive);
return mhdr;
}
EXPORT_SYMBOL_GPL(gru_get_next_message);
@@ -575,6 +901,29 @@ EXPORT_SYMBOL_GPL(gru_get_next_message);
/* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/
/*
+ * Load a DW from a global GPA. The GPA can be a memory or MMR address.
+ */
+int gru_read_gpa(unsigned long *value, unsigned long gpa)
+{
+ void *cb;
+ void *dsr;
+ int ret, iaa;
+
+ STAT(read_gpa);
+ if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr))
+ return MQE_BUG_NO_RESOURCES;
+ iaa = gpa >> 62;
+ gru_vload_phys(cb, gpa, gru_get_tri(dsr), iaa, IMA);
+ ret = gru_wait(cb);
+ if (ret == CBS_IDLE)
+ *value = *(unsigned long *)dsr;
+ gru_free_cpu_resources(cb, dsr);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(gru_read_gpa);
+
+
+/*
* Copy a block of data using the GRU resources
*/
int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,
@@ -597,115 +946,217 @@ EXPORT_SYMBOL_GPL(gru_copy_gpa);
/* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/
/* Temp - will delete after we gain confidence in the GRU */
-static __cacheline_aligned unsigned long word0;
-static __cacheline_aligned unsigned long word1;
-static int quicktest(struct gru_state *gru)
+static int quicktest0(unsigned long arg)
{
+ unsigned long word0;
+ unsigned long word1;
void *cb;
- void *ds;
+ void *dsr;
unsigned long *p;
+ int ret = -EIO;
- cb = get_gseg_base_address_cb(gru->gs_gru_base_vaddr, KERNEL_CTXNUM, 0);
- ds = get_gseg_base_address_ds(gru->gs_gru_base_vaddr, KERNEL_CTXNUM, 0);
- p = ds;
+ if (gru_get_cpu_resources(GRU_CACHE_LINE_BYTES, &cb, &dsr))
+ return MQE_BUG_NO_RESOURCES;
+ p = dsr;
word0 = MAGIC;
+ word1 = 0;
- gru_vload(cb, uv_gpa(&word0), 0, XTYPE_DW, 1, 1, IMA);
- if (gru_wait(cb) != CBS_IDLE)
- BUG();
+ gru_vload(cb, uv_gpa(&word0), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA);
+ if (gru_wait(cb) != CBS_IDLE) {
+ printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 1\n", smp_processor_id());
+ goto done;
+ }
- if (*(unsigned long *)ds != MAGIC)
- BUG();
- gru_vstore(cb, uv_gpa(&word1), 0, XTYPE_DW, 1, 1, IMA);
- if (gru_wait(cb) != CBS_IDLE)
- BUG();
+ if (*p != MAGIC) {
+ printk(KERN_DEBUG "GRU:%d quicktest0 bad magic 0x%lx\n", smp_processor_id(), *p);
+ goto done;
+ }
+ gru_vstore(cb, uv_gpa(&word1), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA);
+ if (gru_wait(cb) != CBS_IDLE) {
+ printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 2\n", smp_processor_id());
+ goto done;
+ }
- if (word0 != word1 || word0 != MAGIC) {
- printk
- ("GRU quicktest err: gid %d, found 0x%lx, expected 0x%lx\n",
- gru->gs_gid, word1, MAGIC);
- BUG(); /* ZZZ should not be fatal */
+ if (word0 != word1 || word1 != MAGIC) {
+ printk(KERN_DEBUG
+ "GRU:%d quicktest0 err: found 0x%lx, expected 0x%lx\n",
+ smp_processor_id(), word1, MAGIC);
+ goto done;
}
+ ret = 0;
- return 0;
+done:
+ gru_free_cpu_resources(cb, dsr);
+ return ret;
}
+#define ALIGNUP(p, q) ((void *)(((unsigned long)(p) + (q) - 1) & ~(q - 1)))
-int gru_kservices_init(struct gru_state *gru)
+static int quicktest1(unsigned long arg)
{
- struct gru_blade_state *bs;
- struct gru_context_configuration_handle *cch;
- unsigned long cbr_map, dsr_map;
- int err, num, cpus_possible;
+ struct gru_message_queue_desc mqd;
+ void *p, *mq;
+ unsigned long *dw;
+ int i, ret = -EIO;
+ char mes[GRU_CACHE_LINE_BYTES], *m;
+
+ /* Need 1K cacheline aligned that does not cross page boundary */
+ p = kmalloc(4096, 0);
+ if (p == NULL)
+ return -ENOMEM;
+ mq = ALIGNUP(p, 1024);
+ memset(mes, 0xee, sizeof(mes));
+ dw = mq;
+
+ gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0);
+ for (i = 0; i < 6; i++) {
+ mes[8] = i;
+ do {
+ ret = gru_send_message_gpa(&mqd, mes, sizeof(mes));
+ } while (ret == MQE_CONGESTION);
+ if (ret)
+ break;
+ }
+ if (ret != MQE_QUEUE_FULL || i != 4) {
+ printk(KERN_DEBUG "GRU:%d quicktest1: unexpect status %d, i %d\n",
+ smp_processor_id(), ret, i);
+ goto done;
+ }
- /*
- * Currently, resources are reserved ONLY on the second chiplet
- * on each blade. This leaves ALL resources on chiplet 0 available
- * for user code.
- */
- bs = gru->gs_blade;
- if (gru != &bs->bs_grus[1])
- return 0;
-
- cpus_possible = uv_blade_nr_possible_cpus(gru->gs_blade_id);
-
- num = GRU_NUM_KERNEL_CBR * cpus_possible;
- cbr_map = gru_reserve_cb_resources(gru, GRU_CB_COUNT_TO_AU(num), NULL);
- gru->gs_reserved_cbrs += num;
-
- num = GRU_NUM_KERNEL_DSR_BYTES * cpus_possible;
- dsr_map = gru_reserve_ds_resources(gru, GRU_DS_BYTES_TO_AU(num), NULL);
- gru->gs_reserved_dsr_bytes += num;
-
- gru->gs_active_contexts++;
- __set_bit(KERNEL_CTXNUM, &gru->gs_context_map);
- cch = get_cch(gru->gs_gru_base_vaddr, KERNEL_CTXNUM);
-
- bs->kernel_cb = get_gseg_base_address_cb(gru->gs_gru_base_vaddr,
- KERNEL_CTXNUM, 0);
- bs->kernel_dsr = get_gseg_base_address_ds(gru->gs_gru_base_vaddr,
- KERNEL_CTXNUM, 0);
-
- lock_cch_handle(cch);
- cch->tfm_fault_bit_enable = 0;
- cch->tlb_int_enable = 0;
- cch->tfm_done_bit_enable = 0;
- cch->unmap_enable = 1;
- err = cch_allocate(cch, 0, 0, cbr_map, dsr_map);
- if (err) {
- gru_dbg(grudev,
- "Unable to allocate kernel CCH: gid %d, err %d\n",
- gru->gs_gid, err);
- BUG();
+ for (i = 0; i < 6; i++) {
+ m = gru_get_next_message(&mqd);
+ if (!m || m[8] != i)
+ break;
+ gru_free_message(&mqd, m);
}
- if (cch_start(cch)) {
- gru_dbg(grudev, "Unable to start kernel CCH: gid %d, err %d\n",
- gru->gs_gid, err);
- BUG();
+ if (i != 4) {
+ printk(KERN_DEBUG "GRU:%d quicktest2: bad message, i %d, m %p, m8 %d\n",
+ smp_processor_id(), i, m, m ? m[8] : -1);
+ goto done;
}
- unlock_cch_handle(cch);
+ ret = 0;
- if (gru_options & GRU_QUICKLOOK)
- quicktest(gru);
- return 0;
+done:
+ kfree(p);
+ return ret;
}
-void gru_kservices_exit(struct gru_state *gru)
+static int quicktest2(unsigned long arg)
{
- struct gru_context_configuration_handle *cch;
- struct gru_blade_state *bs;
+ static DECLARE_COMPLETION(cmp);
+ unsigned long han;
+ int blade_id = 0;
+ int numcb = 4;
+ int ret = 0;
+ unsigned long *buf;
+ void *cb0, *cb;
+ struct gru_control_block_status *gen;
+ int i, k, istatus, bytes;
+
+ bytes = numcb * 4 * 8;
+ buf = kmalloc(bytes, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ ret = -EBUSY;
+ han = gru_reserve_async_resources(blade_id, numcb, 0, &cmp);
+ if (!han)
+ goto done;
+
+ gru_lock_async_resource(han, &cb0, NULL);
+ memset(buf, 0xee, bytes);
+ for (i = 0; i < numcb; i++)
+ gru_vset(cb0 + i * GRU_HANDLE_STRIDE, uv_gpa(&buf[i * 4]), 0,
+ XTYPE_DW, 4, 1, IMA_INTERRUPT);
+
+ ret = 0;
+ k = numcb;
+ do {
+ gru_wait_async_cbr(han);
+ for (i = 0; i < numcb; i++) {
+ cb = cb0 + i * GRU_HANDLE_STRIDE;
+ istatus = gru_check_status(cb);
+ if (istatus != CBS_ACTIVE && istatus != CBS_CALL_OS)
+ break;
+ }
+ if (i == numcb)
+ continue;
+ if (istatus != CBS_IDLE) {
+ printk(KERN_DEBUG "GRU:%d quicktest2: cb %d, exception\n", smp_processor_id(), i);
+ ret = -EFAULT;
+ } else if (buf[4 * i] || buf[4 * i + 1] || buf[4 * i + 2] ||
+ buf[4 * i + 3]) {
+ printk(KERN_DEBUG "GRU:%d quicktest2:cb %d, buf 0x%lx, 0x%lx, 0x%lx, 0x%lx\n",
+ smp_processor_id(), i, buf[4 * i], buf[4 * i + 1], buf[4 * i + 2], buf[4 * i + 3]);
+ ret = -EIO;
+ }
+ k--;
+ gen = cb;
+ gen->istatus = CBS_CALL_OS; /* don't handle this CBR again */
+ } while (k);
+ BUG_ON(cmp.done);
+
+ gru_unlock_async_resource(han);
+ gru_release_async_resources(han);
+done:
+ kfree(buf);
+ return ret;
+}
+
+#define BUFSIZE 200
+static int quicktest3(unsigned long arg)
+{
+ char buf1[BUFSIZE], buf2[BUFSIZE];
+ int ret = 0;
+
+ memset(buf2, 0, sizeof(buf2));
+ memset(buf1, get_cycles() & 255, sizeof(buf1));
+ gru_copy_gpa(uv_gpa(buf2), uv_gpa(buf1), BUFSIZE);
+ if (memcmp(buf1, buf2, BUFSIZE)) {
+ printk(KERN_DEBUG "GRU:%d quicktest3 error\n", smp_processor_id());
+ ret = -EIO;
+ }
+ return ret;
+}
- bs = gru->gs_blade;
- if (gru != &bs->bs_grus[1])
- return;
+/*
+ * Debugging only. User hook for various kernel tests
+ * of driver & gru.
+ */
+int gru_ktest(unsigned long arg)
+{
+ int ret = -EINVAL;
- cch = get_cch(gru->gs_gru_base_vaddr, KERNEL_CTXNUM);
- lock_cch_handle(cch);
- if (cch_interrupt_sync(cch))
- BUG();
- if (cch_deallocate(cch))
+ switch (arg & 0xff) {
+ case 0:
+ ret = quicktest0(arg);
+ break;
+ case 1:
+ ret = quicktest1(arg);
+ break;
+ case 2:
+ ret = quicktest2(arg);
+ break;
+ case 3:
+ ret = quicktest3(arg);
+ break;
+ case 99:
+ ret = gru_free_kernel_contexts();
+ break;
+ }
+ return ret;
+
+}
+
+int gru_kservices_init(void)
+{
+ return 0;
+}
+
+void gru_kservices_exit(void)
+{
+ if (gru_free_kernel_contexts())
BUG();
- unlock_cch_handle(cch);
}
diff --git a/drivers/misc/sgi-gru/grukservices.h b/drivers/misc/sgi-gru/grukservices.h
index 747ed315d56..02aa94d8484 100644
--- a/drivers/misc/sgi-gru/grukservices.h
+++ b/drivers/misc/sgi-gru/grukservices.h
@@ -131,6 +131,20 @@ extern void *gru_get_next_message(struct gru_message_queue_desc *mqd);
/*
+ * Read a GRU global GPA. Source can be located in a remote partition.
+ *
+ * Input:
+ * value memory address where MMR value is returned
+ * gpa source numalink physical address of GPA
+ *
+ * Output:
+ * 0 OK
+ * >0 error
+ */
+int gru_read_gpa(unsigned long *value, unsigned long gpa);
+
+
+/*
* Copy data using the GRU. Source or destination can be located in a remote
* partition.
*
@@ -146,4 +160,55 @@ extern void *gru_get_next_message(struct gru_message_queue_desc *mqd);
extern int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,
unsigned int bytes);
+/*
+ * Reserve GRU resources to be used asynchronously.
+ *
+ * input:
+ * blade_id - blade on which resources should be reserved
+ * cbrs - number of CBRs
+ * dsr_bytes - number of DSR bytes needed
+ * cmp - completion structure for waiting for
+ * async completions
+ * output:
+ * handle to identify resource
+ * (0 = no resources)
+ */
+extern unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes,
+ struct completion *cmp);
+
+/*
+ * Release async resources previously reserved.
+ *
+ * input:
+ * han - handle to identify resources
+ */
+extern void gru_release_async_resources(unsigned long han);
+
+/*
+ * Wait for async GRU instructions to complete.
+ *
+ * input:
+ * han - handle to identify resources
+ */
+extern void gru_wait_async_cbr(unsigned long han);
+
+/*
+ * Lock previous reserved async GRU resources
+ *
+ * input:
+ * han - handle to identify resources
+ * output:
+ * cb - pointer to first CBR
+ * dsr - pointer to first DSR
+ */
+extern void gru_lock_async_resource(unsigned long han, void **cb, void **dsr);
+
+/*
+ * Unlock previous reserved async GRU resources
+ *
+ * input:
+ * han - handle to identify resources
+ */
+extern void gru_unlock_async_resource(unsigned long han);
+
#endif /* __GRU_KSERVICES_H_ */
diff --git a/drivers/misc/sgi-gru/grulib.h b/drivers/misc/sgi-gru/grulib.h
index e56e196a699..e77d1b1f9d0 100644
--- a/drivers/misc/sgi-gru/grulib.h
+++ b/drivers/misc/sgi-gru/grulib.h
@@ -32,8 +32,8 @@
/* Set Number of Request Blocks */
#define GRU_CREATE_CONTEXT _IOWR(GRU_IOCTL_NUM, 1, void *)
-/* Register task as using the slice */
-#define GRU_SET_TASK_SLICE _IOWR(GRU_IOCTL_NUM, 5, void *)
+/* Set Context Options */
+#define GRU_SET_CONTEXT_OPTION _IOWR(GRU_IOCTL_NUM, 4, void *)
/* Fetch exception detail */
#define GRU_USER_GET_EXCEPTION_DETAIL _IOWR(GRU_IOCTL_NUM, 6, void *)
@@ -44,8 +44,11 @@
/* For user unload context */
#define GRU_USER_UNLOAD_CONTEXT _IOWR(GRU_IOCTL_NUM, 9, void *)
-/* For fetching GRU chiplet status */
-#define GRU_GET_CHIPLET_STATUS _IOWR(GRU_IOCTL_NUM, 10, void *)
+/* For dumpping GRU chiplet state */
+#define GRU_DUMP_CHIPLET_STATE _IOWR(GRU_IOCTL_NUM, 11, void *)
+
+/* For getting gseg statistics */
+#define GRU_GET_GSEG_STATISTICS _IOWR(GRU_IOCTL_NUM, 12, void *)
/* For user TLB flushing (primarily for tests) */
#define GRU_USER_FLUSH_TLB _IOWR(GRU_IOCTL_NUM, 50, void *)
@@ -53,8 +56,17 @@
/* Get some config options (primarily for tests & emulator) */
#define GRU_GET_CONFIG_INFO _IOWR(GRU_IOCTL_NUM, 51, void *)
+/* Various kernel self-tests */
+#define GRU_KTEST _IOWR(GRU_IOCTL_NUM, 52, void *)
+
#define CONTEXT_WINDOW_BYTES(th) (GRU_GSEG_PAGESIZE * (th))
#define THREAD_POINTER(p, th) (p + GRU_GSEG_PAGESIZE * (th))
+#define GSEG_START(cb) ((void *)((unsigned long)(cb) & ~(GRU_GSEG_PAGESIZE - 1)))
+
+struct gru_get_gseg_statistics_req {
+ unsigned long gseg;
+ struct gru_gseg_statistics stats;
+};
/*
* Structure used to pass TLB flush parameters to the driver
@@ -65,6 +77,7 @@ struct gru_create_context_req {
unsigned int control_blocks;
unsigned int maximum_thread_count;
unsigned int options;
+ unsigned char tlb_preload_count;
};
/*
@@ -75,6 +88,17 @@ struct gru_unload_context_req {
};
/*
+ * Structure used to set context options
+ */
+enum {sco_gseg_owner, sco_cch_req_slice, sco_blade_chiplet};
+struct gru_set_context_option_req {
+ unsigned long gseg;
+ int op;
+ int val0;
+ long val1;
+};
+
+/*
* Structure used to pass TLB flush parameters to the driver
*/
struct gru_flush_tlb_req {
@@ -84,6 +108,38 @@ struct gru_flush_tlb_req {
};
/*
+ * Structure used to pass TLB flush parameters to the driver
+ */
+enum {dcs_pid, dcs_gid};
+struct gru_dump_chiplet_state_req {
+ unsigned int op;
+ unsigned int gid;
+ int ctxnum;
+ char data_opt;
+ char lock_cch;
+ char flush_cbrs;
+ char fill[10];
+ pid_t pid;
+ void *buf;
+ size_t buflen;
+ /* ---- output --- */
+ unsigned int num_contexts;
+};
+
+#define GRU_DUMP_MAGIC 0x3474ab6c
+struct gru_dump_context_header {
+ unsigned int magic;
+ unsigned int gid;
+ unsigned char ctxnum;
+ unsigned char cbrcnt;
+ unsigned char dsrcnt;
+ pid_t pid;
+ unsigned long vaddr;
+ int cch_locked;
+ unsigned long data[0];
+};
+
+/*
* GRU configuration info (temp - for testing)
*/
struct gru_config_info {
diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c
index ec3f7a17d22..ae16c8cb4f3 100644
--- a/drivers/misc/sgi-gru/grumain.c
+++ b/drivers/misc/sgi-gru/grumain.c
@@ -3,11 +3,21 @@
*
* DRIVER TABLE MANAGER + GRU CONTEXT LOAD/UNLOAD
*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
+ * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
*
- * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/kernel.h>
@@ -17,6 +27,8 @@
#include <linux/sched.h>
#include <linux/device.h>
#include <linux/list.h>
+#include <linux/err.h>
+#include <linux/prefetch.h>
#include <asm/uv/uv_hub.h>
#include "gru.h"
#include "grutables.h"
@@ -38,12 +50,20 @@ struct device *grudev = &gru_device;
/*
* Select a gru fault map to be used by the current cpu. Note that
* multiple cpus may be using the same map.
- * ZZZ should "shift" be used?? Depends on HT cpu numbering
* ZZZ should be inline but did not work on emulator
*/
int gru_cpu_fault_map_id(void)
{
+#ifdef CONFIG_IA64
return uv_blade_processor_id() % GRU_NUM_TFM;
+#else
+ int cpu = smp_processor_id();
+ int id, core;
+
+ core = uv_cpu_core_number(cpu);
+ id = core + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu);
+ return id;
+#endif
}
/*--------- ASID Management -------------------------------------------
@@ -96,7 +116,7 @@ static int gru_reset_asid_limit(struct gru_state *gru, int asid)
gid = gru->gs_gid;
again:
for (i = 0; i < GRU_NUM_CCH; i++) {
- if (!gru->gs_gts[i])
+ if (!gru->gs_gts[i] || is_kernel_context(gru->gs_gts[i]))
continue;
inuse_asid = gru->gs_gts[i]->ts_gms->ms_asids[gid].mt_asid;
gru_dbg(grudev, "gid %d, gts %p, gms %p, inuse 0x%x, cxt %d\n",
@@ -150,7 +170,7 @@ static unsigned long reserve_resources(unsigned long *p, int n, int mmax,
unsigned long bits = 0;
int i;
- do {
+ while (n--) {
i = find_first_bit(p, mmax);
if (i == mmax)
BUG();
@@ -158,7 +178,7 @@ static unsigned long reserve_resources(unsigned long *p, int n, int mmax,
__set_bit(i, &bits);
if (idx)
*idx++ = i;
- } while (--n);
+ }
return bits;
}
@@ -276,7 +296,8 @@ static void gru_unload_mm_tracker(struct gru_state *gru,
void gts_drop(struct gru_thread_state *gts)
{
if (gts && atomic_dec_return(&gts->ts_refcnt) == 0) {
- gru_drop_mmu_notifier(gts->ts_gms);
+ if (gts->ts_gms)
+ gru_drop_mmu_notifier(gts->ts_gms);
kfree(gts);
STAT(gts_free);
}
@@ -299,43 +320,50 @@ static struct gru_thread_state *gru_find_current_gts_nolock(struct gru_vma_data
/*
* Allocate a thread state structure.
*/
-static struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
- struct gru_vma_data *vdata,
- int tsid)
+struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
+ int cbr_au_count, int dsr_au_count,
+ unsigned char tlb_preload_count, int options, int tsid)
{
struct gru_thread_state *gts;
+ struct gru_mm_struct *gms;
int bytes;
- bytes = DSR_BYTES(vdata->vd_dsr_au_count) +
- CBR_BYTES(vdata->vd_cbr_au_count);
+ bytes = DSR_BYTES(dsr_au_count) + CBR_BYTES(cbr_au_count);
bytes += sizeof(struct gru_thread_state);
- gts = kzalloc(bytes, GFP_KERNEL);
+ gts = kmalloc(bytes, GFP_KERNEL);
if (!gts)
- return NULL;
+ return ERR_PTR(-ENOMEM);
STAT(gts_alloc);
+ memset(gts, 0, sizeof(struct gru_thread_state)); /* zero out header */
atomic_set(&gts->ts_refcnt, 1);
mutex_init(&gts->ts_ctxlock);
- gts->ts_cbr_au_count = vdata->vd_cbr_au_count;
- gts->ts_dsr_au_count = vdata->vd_dsr_au_count;
- gts->ts_user_options = vdata->vd_user_options;
+ gts->ts_cbr_au_count = cbr_au_count;
+ gts->ts_dsr_au_count = dsr_au_count;
+ gts->ts_tlb_preload_count = tlb_preload_count;
+ gts->ts_user_options = options;
+ gts->ts_user_blade_id = -1;
+ gts->ts_user_chiplet_id = -1;
gts->ts_tsid = tsid;
- gts->ts_user_options = vdata->vd_user_options;
gts->ts_ctxnum = NULLCTX;
- gts->ts_mm = current->mm;
- gts->ts_vma = vma;
gts->ts_tlb_int_select = -1;
- gts->ts_gms = gru_register_mmu_notifier();
+ gts->ts_cch_req_slice = -1;
gts->ts_sizeavail = GRU_SIZEAVAIL(PAGE_SHIFT);
- if (!gts->ts_gms)
- goto err;
+ if (vma) {
+ gts->ts_mm = current->mm;
+ gts->ts_vma = vma;
+ gms = gru_register_mmu_notifier();
+ if (IS_ERR(gms))
+ goto err;
+ gts->ts_gms = gms;
+ }
- gru_dbg(grudev, "alloc vdata %p, new gts %p\n", vdata, gts);
+ gru_dbg(grudev, "alloc gts %p\n", gts);
return gts;
err:
gts_drop(gts);
- return NULL;
+ return ERR_CAST(gms);
}
/*
@@ -349,6 +377,7 @@ struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, int tsid)
if (!vdata)
return NULL;
+ STAT(vdata_alloc);
INIT_LIST_HEAD(&vdata->vd_head);
spin_lock_init(&vdata->vd_lock);
gru_dbg(grudev, "alloc vdata %p\n", vdata);
@@ -381,9 +410,12 @@ struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma,
struct gru_vma_data *vdata = vma->vm_private_data;
struct gru_thread_state *gts, *ngts;
- gts = gru_alloc_gts(vma, vdata, tsid);
- if (!gts)
- return NULL;
+ gts = gru_alloc_gts(vma, vdata->vd_cbr_au_count,
+ vdata->vd_dsr_au_count,
+ vdata->vd_tlb_preload_count,
+ vdata->vd_user_options, tsid);
+ if (IS_ERR(gts))
+ return gts;
spin_lock(&vdata->vd_lock);
ngts = gru_find_current_gts_nolock(vdata, tsid);
@@ -458,7 +490,8 @@ static void gru_prefetch_context(void *gseg, void *cb, void *cbe,
}
static void gru_load_context_data(void *save, void *grubase, int ctxnum,
- unsigned long cbrmap, unsigned long dsrmap)
+ unsigned long cbrmap, unsigned long dsrmap,
+ int data_valid)
{
void *gseg, *cb, *cbe;
unsigned long length;
@@ -471,12 +504,25 @@ static void gru_load_context_data(void *save, void *grubase, int ctxnum,
gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
- save += gru_copy_handle(cb, save);
- save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE, save);
+ if (data_valid) {
+ save += gru_copy_handle(cb, save);
+ save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE,
+ save);
+ } else {
+ memset(cb, 0, GRU_CACHE_LINE_BYTES);
+ memset(cbe + i * GRU_HANDLE_STRIDE, 0,
+ GRU_CACHE_LINE_BYTES);
+ }
+ /* Flush CBE to hide race in context restart */
+ mb();
+ gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE);
cb += GRU_HANDLE_STRIDE;
}
- memcpy(gseg + GRU_DS_BASE, save, length);
+ if (data_valid)
+ memcpy(gseg + GRU_DS_BASE, save, length);
+ else
+ memset(gseg + GRU_DS_BASE, 0, length);
}
static void gru_unload_context_data(void *save, void *grubase, int ctxnum,
@@ -490,6 +536,12 @@ static void gru_unload_context_data(void *save, void *grubase, int ctxnum,
cb = gseg + GRU_CB_BASE;
cbe = grubase + GRU_CBE_BASE;
length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
+
+ /* CBEs may not be coherent. Flush them from cache */
+ for_each_cbr_in_allocation_map(i, &cbrmap, scr)
+ gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE);
+ mb(); /* Let the CL flush complete */
+
gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
@@ -506,44 +558,44 @@ void gru_unload_context(struct gru_thread_state *gts, int savestate)
struct gru_context_configuration_handle *cch;
int ctxnum = gts->ts_ctxnum;
- zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE);
+ if (!is_kernel_context(gts))
+ zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE);
cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
- gru_dbg(grudev, "gts %p\n", gts);
+ gru_dbg(grudev, "gts %p, cbrmap 0x%lx, dsrmap 0x%lx\n",
+ gts, gts->ts_cbr_map, gts->ts_dsr_map);
lock_cch_handle(cch);
if (cch_interrupt_sync(cch))
BUG();
- gru_unload_mm_tracker(gru, gts);
- if (savestate)
+ if (!is_kernel_context(gts))
+ gru_unload_mm_tracker(gru, gts);
+ if (savestate) {
gru_unload_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr,
ctxnum, gts->ts_cbr_map,
gts->ts_dsr_map);
+ gts->ts_data_valid = 1;
+ }
if (cch_deallocate(cch))
BUG();
- gts->ts_force_unload = 0; /* ts_force_unload locked by CCH lock */
unlock_cch_handle(cch);
gru_free_gru_context(gts);
- STAT(unload_context);
}
/*
* Load a GRU context by copying it from the thread data structure in memory
* to the GRU.
*/
-static void gru_load_context(struct gru_thread_state *gts)
+void gru_load_context(struct gru_thread_state *gts)
{
struct gru_state *gru = gts->ts_gru;
struct gru_context_configuration_handle *cch;
- int err, asid, ctxnum = gts->ts_ctxnum;
+ int i, err, asid, ctxnum = gts->ts_ctxnum;
- gru_dbg(grudev, "gts %p\n", gts);
cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
-
lock_cch_handle(cch);
- asid = gru_load_mm_tracker(gru, gts);
cch->tfm_fault_bit_enable =
(gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
|| gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
@@ -552,9 +604,33 @@ static void gru_load_context(struct gru_thread_state *gts)
gts->ts_tlb_int_select = gru_cpu_fault_map_id();
cch->tlb_int_select = gts->ts_tlb_int_select;
}
+ if (gts->ts_cch_req_slice >= 0) {
+ cch->req_slice_set_enable = 1;
+ cch->req_slice = gts->ts_cch_req_slice;
+ } else {
+ cch->req_slice_set_enable =0;
+ }
cch->tfm_done_bit_enable = 0;
- err = cch_allocate(cch, asid, gts->ts_sizeavail, gts->ts_cbr_map,
- gts->ts_dsr_map);
+ cch->dsr_allocation_map = gts->ts_dsr_map;
+ cch->cbr_allocation_map = gts->ts_cbr_map;
+
+ if (is_kernel_context(gts)) {
+ cch->unmap_enable = 1;
+ cch->tfm_done_bit_enable = 1;
+ cch->cb_int_enable = 1;
+ cch->tlb_int_select = 0; /* For now, ints go to cpu 0 */
+ } else {
+ cch->unmap_enable = 0;
+ cch->tfm_done_bit_enable = 0;
+ cch->cb_int_enable = 0;
+ asid = gru_load_mm_tracker(gru, gts);
+ for (i = 0; i < 8; i++) {
+ cch->asid[i] = asid + i;
+ cch->sizeavail[i] = gts->ts_sizeavail;
+ }
+ }
+
+ err = cch_allocate(cch);
if (err) {
gru_dbg(grudev,
"err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n",
@@ -563,24 +639,23 @@ static void gru_load_context(struct gru_thread_state *gts)
}
gru_load_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, ctxnum,
- gts->ts_cbr_map, gts->ts_dsr_map);
+ gts->ts_cbr_map, gts->ts_dsr_map, gts->ts_data_valid);
if (cch_start(cch))
BUG();
unlock_cch_handle(cch);
- STAT(load_context);
+ gru_dbg(grudev, "gid %d, gts %p, cbrmap 0x%lx, dsrmap 0x%lx, tie %d, tis %d\n",
+ gts->ts_gru->gs_gid, gts, gts->ts_cbr_map, gts->ts_dsr_map,
+ (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR), gts->ts_tlb_int_select);
}
/*
* Update fields in an active CCH:
* - retarget interrupts on local blade
* - update sizeavail mask
- * - force a delayed context unload by clearing the CCH asids. This
- * forces TLB misses for new GRU instructions. The context is unloaded
- * when the next TLB miss occurs.
*/
-int gru_update_cch(struct gru_thread_state *gts, int force_unload)
+int gru_update_cch(struct gru_thread_state *gts)
{
struct gru_context_configuration_handle *cch;
struct gru_state *gru = gts->ts_gru;
@@ -594,18 +669,13 @@ int gru_update_cch(struct gru_thread_state *gts, int force_unload)
goto exit;
if (cch_interrupt(cch))
BUG();
- if (!force_unload) {
- for (i = 0; i < 8; i++)
- cch->sizeavail[i] = gts->ts_sizeavail;
- gts->ts_tlb_int_select = gru_cpu_fault_map_id();
- cch->tlb_int_select = gru_cpu_fault_map_id();
- } else {
- for (i = 0; i < 8; i++)
- cch->asid[i] = 0;
- cch->tfm_fault_bit_enable = 0;
- cch->tlb_int_enable = 0;
- gts->ts_force_unload = 1;
- }
+ for (i = 0; i < 8; i++)
+ cch->sizeavail[i] = gts->ts_sizeavail;
+ gts->ts_tlb_int_select = gru_cpu_fault_map_id();
+ cch->tlb_int_select = gru_cpu_fault_map_id();
+ cch->tfm_fault_bit_enable =
+ (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
+ || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
if (cch_start(cch))
BUG();
ret = 1;
@@ -630,7 +700,54 @@ static int gru_retarget_intr(struct gru_thread_state *gts)
gru_dbg(grudev, "retarget from %d to %d\n", gts->ts_tlb_int_select,
gru_cpu_fault_map_id());
- return gru_update_cch(gts, 0);
+ return gru_update_cch(gts);
+}
+
+/*
+ * Check if a GRU context is allowed to use a specific chiplet. By default
+ * a context is assigned to any blade-local chiplet. However, users can
+ * override this.
+ * Returns 1 if assignment allowed, 0 otherwise
+ */
+static int gru_check_chiplet_assignment(struct gru_state *gru,
+ struct gru_thread_state *gts)
+{
+ int blade_id;
+ int chiplet_id;
+
+ blade_id = gts->ts_user_blade_id;
+ if (blade_id < 0)
+ blade_id = uv_numa_blade_id();
+
+ chiplet_id = gts->ts_user_chiplet_id;
+ return gru->gs_blade_id == blade_id &&
+ (chiplet_id < 0 || chiplet_id == gru->gs_chiplet_id);
+}
+
+/*
+ * Unload the gru context if it is not assigned to the correct blade or
+ * chiplet. Misassignment can occur if the process migrates to a different
+ * blade or if the user changes the selected blade/chiplet.
+ */
+void gru_check_context_placement(struct gru_thread_state *gts)
+{
+ struct gru_state *gru;
+
+ /*
+ * If the current task is the context owner, verify that the
+ * context is correctly placed. This test is skipped for non-owner
+ * references. Pthread apps use non-owner references to the CBRs.
+ */
+ gru = gts->ts_gru;
+ if (!gru || gts->ts_tgid_owner != current->tgid)
+ return;
+
+ if (!gru_check_chiplet_assignment(gru, gts)) {
+ STAT(check_context_unload);
+ gru_unload_context(gts, 1);
+ } else if (gru_retarget_intr(gts)) {
+ STAT(check_context_retarget_intr);
+ }
}
@@ -642,61 +759,88 @@ static int gru_retarget_intr(struct gru_thread_state *gts)
#define next_gru(b, g) (((g) < &(b)->bs_grus[GRU_CHIPLETS_PER_BLADE - 1]) ? \
((g)+1) : &(b)->bs_grus[0])
-static void gru_steal_context(struct gru_thread_state *gts)
+static int is_gts_stealable(struct gru_thread_state *gts,
+ struct gru_blade_state *bs)
+{
+ if (is_kernel_context(gts))
+ return down_write_trylock(&bs->bs_kgts_sema);
+ else
+ return mutex_trylock(&gts->ts_ctxlock);
+}
+
+static void gts_stolen(struct gru_thread_state *gts,
+ struct gru_blade_state *bs)
+{
+ if (is_kernel_context(gts)) {
+ up_write(&bs->bs_kgts_sema);
+ STAT(steal_kernel_context);
+ } else {
+ mutex_unlock(&gts->ts_ctxlock);
+ STAT(steal_user_context);
+ }
+}
+
+void gru_steal_context(struct gru_thread_state *gts)
{
struct gru_blade_state *blade;
struct gru_state *gru, *gru0;
struct gru_thread_state *ngts = NULL;
int ctxnum, ctxnum0, flag = 0, cbr, dsr;
+ int blade_id;
+ blade_id = gts->ts_user_blade_id;
+ if (blade_id < 0)
+ blade_id = uv_numa_blade_id();
cbr = gts->ts_cbr_au_count;
dsr = gts->ts_dsr_au_count;
- preempt_disable();
- blade = gru_base[uv_numa_blade_id()];
+ blade = gru_base[blade_id];
spin_lock(&blade->bs_lock);
ctxnum = next_ctxnum(blade->bs_lru_ctxnum);
gru = blade->bs_lru_gru;
if (ctxnum == 0)
gru = next_gru(blade, gru);
+ blade->bs_lru_gru = gru;
+ blade->bs_lru_ctxnum = ctxnum;
ctxnum0 = ctxnum;
gru0 = gru;
while (1) {
- if (check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH))
- break;
- spin_lock(&gru->gs_lock);
- for (; ctxnum < GRU_NUM_CCH; ctxnum++) {
- if (flag && gru == gru0 && ctxnum == ctxnum0)
+ if (gru_check_chiplet_assignment(gru, gts)) {
+ if (check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH))
break;
- ngts = gru->gs_gts[ctxnum];
- /*
- * We are grabbing locks out of order, so trylock is
- * needed. GTSs are usually not locked, so the odds of
- * success are high. If trylock fails, try to steal a
- * different GSEG.
- */
- if (ngts && mutex_trylock(&ngts->ts_ctxlock))
+ spin_lock(&gru->gs_lock);
+ for (; ctxnum < GRU_NUM_CCH; ctxnum++) {
+ if (flag && gru == gru0 && ctxnum == ctxnum0)
+ break;
+ ngts = gru->gs_gts[ctxnum];
+ /*
+ * We are grabbing locks out of order, so trylock is
+ * needed. GTSs are usually not locked, so the odds of
+ * success are high. If trylock fails, try to steal a
+ * different GSEG.
+ */
+ if (ngts && is_gts_stealable(ngts, blade))
+ break;
+ ngts = NULL;
+ }
+ spin_unlock(&gru->gs_lock);
+ if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0))
break;
- ngts = NULL;
- flag = 1;
}
- spin_unlock(&gru->gs_lock);
- if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0))
+ if (flag && gru == gru0)
break;
+ flag = 1;
ctxnum = 0;
gru = next_gru(blade, gru);
}
- blade->bs_lru_gru = gru;
- blade->bs_lru_ctxnum = ctxnum;
spin_unlock(&blade->bs_lock);
- preempt_enable();
if (ngts) {
- STAT(steal_context);
+ gts->ustats.context_stolen++;
ngts->ts_steal_jiffies = jiffies;
- gru_unload_context(ngts, 1);
- mutex_unlock(&ngts->ts_ctxlock);
+ gru_unload_context(ngts, is_kernel_context(ngts) ? 0 : 1);
+ gts_stolen(ngts, blade);
} else {
STAT(steal_context_failed);
}
@@ -708,19 +852,34 @@ static void gru_steal_context(struct gru_thread_state *gts)
}
/*
+ * Assign a gru context.
+ */
+static int gru_assign_context_number(struct gru_state *gru)
+{
+ int ctxnum;
+
+ ctxnum = find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH);
+ __set_bit(ctxnum, &gru->gs_context_map);
+ return ctxnum;
+}
+
+/*
* Scan the GRUs on the local blade & assign a GRU context.
*/
-static struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts)
+struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts)
{
struct gru_state *gru, *grux;
int i, max_active_contexts;
+ int blade_id = gts->ts_user_blade_id;
- preempt_disable();
-
+ if (blade_id < 0)
+ blade_id = uv_numa_blade_id();
again:
gru = NULL;
max_active_contexts = GRU_NUM_CCH;
- for_each_gru_on_blade(grux, uv_numa_blade_id(), i) {
+ for_each_gru_on_blade(grux, blade_id, i) {
+ if (!gru_check_chiplet_assignment(grux, gts))
+ continue;
if (check_gru_resources(grux, gts->ts_cbr_au_count,
gts->ts_dsr_au_count,
max_active_contexts)) {
@@ -741,12 +900,9 @@ again:
reserve_gru_resources(gru, gts);
gts->ts_gru = gru;
gts->ts_blade = gru->gs_blade_id;
- gts->ts_ctxnum =
- find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH);
- BUG_ON(gts->ts_ctxnum == GRU_NUM_CCH);
+ gts->ts_ctxnum = gru_assign_context_number(gru);
atomic_inc(&gts->ts_refcnt);
gru->gs_gts[gts->ts_ctxnum] = gts;
- __set_bit(gts->ts_ctxnum, &gru->gs_context_map);
spin_unlock(&gru->gs_lock);
STAT(assign_context);
@@ -760,7 +916,6 @@ again:
STAT(assign_context_failed);
}
- preempt_enable();
return gru;
}
@@ -789,20 +944,15 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
again:
mutex_lock(&gts->ts_ctxlock);
preempt_disable();
- if (gts->ts_gru) {
- if (gts->ts_gru->gs_blade_id != uv_numa_blade_id()) {
- STAT(migrated_nopfn_unload);
- gru_unload_context(gts, 1);
- } else {
- if (gru_retarget_intr(gts))
- STAT(migrated_nopfn_retarget);
- }
- }
+
+ gru_check_context_placement(gts);
if (!gts->ts_gru) {
+ STAT(load_user_context);
if (!gru_assign_gru_context(gts)) {
- mutex_unlock(&gts->ts_ctxlock);
preempt_enable();
+ mutex_unlock(&gts->ts_ctxlock);
+ set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */
if (gts->ts_steal_jiffies + GRU_STEAL_DELAY < jiffies)
gru_steal_context(gts);
@@ -815,8 +965,8 @@ again:
vma->vm_page_prot);
}
- mutex_unlock(&gts->ts_ctxlock);
preempt_enable();
+ mutex_unlock(&gts->ts_ctxlock);
return VM_FAULT_NOPAGE;
}
diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c
index ee74821b171..4f763592239 100644
--- a/drivers/misc/sgi-gru/gruprocfs.c
+++ b/drivers/misc/sgi-gru/gruprocfs.c
@@ -36,8 +36,7 @@ static void printstat_val(struct seq_file *s, atomic_long_t *v, char *id)
{
unsigned long val = atomic_long_read(v);
- if (val)
- seq_printf(s, "%16lu %s\n", val, id);
+ seq_printf(s, "%16lu %s\n", val, id);
}
static int statistics_show(struct seq_file *s, void *p)
@@ -46,52 +45,55 @@ static int statistics_show(struct seq_file *s, void *p)
printstat(s, vdata_free);
printstat(s, gts_alloc);
printstat(s, gts_free);
- printstat(s, vdata_double_alloc);
+ printstat(s, gms_alloc);
+ printstat(s, gms_free);
printstat(s, gts_double_allocate);
printstat(s, assign_context);
printstat(s, assign_context_failed);
printstat(s, free_context);
- printstat(s, load_context);
- printstat(s, unload_context);
- printstat(s, steal_context);
+ printstat(s, load_user_context);
+ printstat(s, load_kernel_context);
+ printstat(s, lock_kernel_context);
+ printstat(s, unlock_kernel_context);
+ printstat(s, steal_user_context);
+ printstat(s, steal_kernel_context);
printstat(s, steal_context_failed);
printstat(s, nopfn);
- printstat(s, break_cow);
printstat(s, asid_new);
printstat(s, asid_next);
printstat(s, asid_wrap);
printstat(s, asid_reuse);
printstat(s, intr);
+ printstat(s, intr_cbr);
+ printstat(s, intr_tfh);
+ printstat(s, intr_spurious);
printstat(s, intr_mm_lock_failed);
printstat(s, call_os);
- printstat(s, call_os_offnode_reference);
- printstat(s, call_os_check_for_bug);
printstat(s, call_os_wait_queue);
printstat(s, user_flush_tlb);
printstat(s, user_unload_context);
printstat(s, user_exception);
- printstat(s, set_task_slice);
- printstat(s, migrate_check);
- printstat(s, migrated_retarget);
- printstat(s, migrated_unload);
- printstat(s, migrated_unload_delay);
- printstat(s, migrated_nopfn_retarget);
- printstat(s, migrated_nopfn_unload);
+ printstat(s, set_context_option);
+ printstat(s, check_context_retarget_intr);
+ printstat(s, check_context_unload);
printstat(s, tlb_dropin);
+ printstat(s, tlb_preload_page);
printstat(s, tlb_dropin_fail_no_asid);
printstat(s, tlb_dropin_fail_upm);
printstat(s, tlb_dropin_fail_invalid);
printstat(s, tlb_dropin_fail_range_active);
printstat(s, tlb_dropin_fail_idle);
printstat(s, tlb_dropin_fail_fmm);
+ printstat(s, tlb_dropin_fail_no_exception);
+ printstat(s, tfh_stale_on_fault);
printstat(s, mmu_invalidate_range);
printstat(s, mmu_invalidate_page);
- printstat(s, mmu_clear_flush_young);
printstat(s, flush_tlb);
printstat(s, flush_tlb_gru);
printstat(s, flush_tlb_gru_tgh);
printstat(s, flush_tlb_gru_zero_asid);
printstat(s, copy_gpa);
+ printstat(s, read_gpa);
printstat(s, mesq_receive);
printstat(s, mesq_receive_none);
printstat(s, mesq_send);
@@ -102,7 +104,6 @@ static int statistics_show(struct seq_file *s, void *p)
printstat(s, mesq_send_qlimit_reached);
printstat(s, mesq_send_amo_nacked);
printstat(s, mesq_send_put_nacked);
- printstat(s, mesq_qf_not_full);
printstat(s, mesq_qf_locked);
printstat(s, mesq_qf_noop_not_full);
printstat(s, mesq_qf_switch_head_failed);
@@ -112,6 +113,7 @@ static int statistics_show(struct seq_file *s, void *p)
printstat(s, mesq_noop_qlimit_reached);
printstat(s, mesq_noop_amo_nacked);
printstat(s, mesq_noop_put_nacked);
+ printstat(s, mesq_noop_page_overflow);
return 0;
}
@@ -127,8 +129,10 @@ static int mcs_statistics_show(struct seq_file *s, void *p)
int op;
unsigned long total, count, max;
static char *id[] = {"cch_allocate", "cch_start", "cch_interrupt",
- "cch_interrupt_sync", "cch_deallocate", "tgh_invalidate"};
+ "cch_interrupt_sync", "cch_deallocate", "tfh_write_only",
+ "tfh_write_restart", "tgh_invalidate"};
+ seq_printf(s, "%-20s%12s%12s%12s\n", "#id", "count", "aver-clks", "max-clks");
for (op = 0; op < mcsop_last; op++) {
count = atomic_long_read(&mcs_op_statistics[op].count);
total = atomic_long_read(&mcs_op_statistics[op].total);
@@ -148,6 +152,7 @@ static ssize_t mcs_statistics_write(struct file *file,
static int options_show(struct seq_file *s, void *p)
{
+ seq_printf(s, "#bitmask: 1=trace, 2=statistics\n");
seq_printf(s, "0x%lx\n", gru_options);
return 0;
}
@@ -155,15 +160,11 @@ static int options_show(struct seq_file *s, void *p)
static ssize_t options_write(struct file *file, const char __user *userbuf,
size_t count, loff_t *data)
{
- unsigned long val;
- char buf[80];
+ int ret;
- if (copy_from_user
- (buf, userbuf, count < sizeof(buf) ? count : sizeof(buf)))
- return -EFAULT;
- buf[count - 1] = '\0';
- if (!strict_strtoul(buf, 10, &val))
- gru_options = val;
+ ret = kstrtoul_from_user(userbuf, count, 0, &gru_options);
+ if (ret)
+ return ret;
return count;
}
@@ -177,16 +178,17 @@ static int cch_seq_show(struct seq_file *file, void *data)
const char *mode[] = { "??", "UPM", "INTR", "OS_POLL" };
if (gid == 0)
- seq_printf(file, "#%5s%5s%6s%9s%6s%8s%8s\n", "gid", "bid",
- "ctx#", "pid", "cbrs", "dsbytes", "mode");
+ seq_printf(file, "#%5s%5s%6s%7s%9s%6s%8s%8s\n", "gid", "bid",
+ "ctx#", "asid", "pid", "cbrs", "dsbytes", "mode");
if (gru)
for (i = 0; i < GRU_NUM_CCH; i++) {
ts = gru->gs_gts[i];
if (!ts)
continue;
- seq_printf(file, " %5d%5d%6d%9d%6d%8d%8s\n",
+ seq_printf(file, " %5d%5d%6d%7d%9d%6d%8d%8s\n",
gru->gs_gid, gru->gs_blade_id, i,
- ts->ts_tgid_owner,
+ is_kernel_context(ts) ? 0 : ts->ts_gms->ms_asids[gid].mt_asid,
+ is_kernel_context(ts) ? 0 : ts->ts_tgid_owner,
ts->ts_cbr_au_count * GRU_CBR_AU_SIZE,
ts->ts_cbr_au_count * GRU_DSR_AU_BYTES,
mode[ts->ts_user_options &
@@ -318,7 +320,7 @@ static const struct file_operations gru_fops = {
static struct proc_entry {
char *name;
- int mode;
+ umode_t mode;
const struct file_operations *fops;
struct proc_dir_entry *entry;
} proc_files[] = {
@@ -335,10 +337,9 @@ static struct proc_dir_entry *proc_gru __read_mostly;
static int create_proc_file(struct proc_entry *p)
{
- p->entry = create_proc_entry(p->name, p->mode, proc_gru);
+ p->entry = proc_create(p->name, p->mode, proc_gru, p->fops);
if (!p->entry)
return -1;
- p->entry->proc_fops = p->fops;
return 0;
}
@@ -350,7 +351,7 @@ static void delete_proc_files(void)
for (p = proc_files; p->name; p++)
if (p->entry)
remove_proc_entry(p->name, proc_gru);
- remove_proc_entry("gru", NULL);
+ proc_remove(proc_gru);
}
}
diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h
index bf1eeb7553e..5c3ce245967 100644
--- a/drivers/misc/sgi-gru/grutables.h
+++ b/drivers/misc/sgi-gru/grutables.h
@@ -148,18 +148,20 @@
#include <linux/wait.h>
#include <linux/mmu_notifier.h>
#include "gru.h"
+#include "grulib.h"
#include "gruhandles.h"
extern struct gru_stats_s gru_stats;
extern struct gru_blade_state *gru_base[];
extern unsigned long gru_start_paddr, gru_end_paddr;
+extern void *gru_start_vaddr;
extern unsigned int gru_max_gids;
#define GRU_MAX_BLADES MAX_NUMNODES
#define GRU_MAX_GRUS (GRU_MAX_BLADES * GRU_CHIPLETS_PER_BLADE)
#define GRU_DRIVER_ID_STR "SGI GRU Device Driver"
-#define GRU_DRIVER_VERSION_STR "0.80"
+#define GRU_DRIVER_VERSION_STR "0.85"
/*
* GRU statistics.
@@ -169,53 +171,56 @@ struct gru_stats_s {
atomic_long_t vdata_free;
atomic_long_t gts_alloc;
atomic_long_t gts_free;
- atomic_long_t vdata_double_alloc;
+ atomic_long_t gms_alloc;
+ atomic_long_t gms_free;
atomic_long_t gts_double_allocate;
atomic_long_t assign_context;
atomic_long_t assign_context_failed;
atomic_long_t free_context;
- atomic_long_t load_context;
- atomic_long_t unload_context;
- atomic_long_t steal_context;
+ atomic_long_t load_user_context;
+ atomic_long_t load_kernel_context;
+ atomic_long_t lock_kernel_context;
+ atomic_long_t unlock_kernel_context;
+ atomic_long_t steal_user_context;
+ atomic_long_t steal_kernel_context;
atomic_long_t steal_context_failed;
atomic_long_t nopfn;
- atomic_long_t break_cow;
atomic_long_t asid_new;
atomic_long_t asid_next;
atomic_long_t asid_wrap;
atomic_long_t asid_reuse;
atomic_long_t intr;
+ atomic_long_t intr_cbr;
+ atomic_long_t intr_tfh;
+ atomic_long_t intr_spurious;
atomic_long_t intr_mm_lock_failed;
atomic_long_t call_os;
- atomic_long_t call_os_offnode_reference;
- atomic_long_t call_os_check_for_bug;
atomic_long_t call_os_wait_queue;
atomic_long_t user_flush_tlb;
atomic_long_t user_unload_context;
atomic_long_t user_exception;
- atomic_long_t set_task_slice;
- atomic_long_t migrate_check;
- atomic_long_t migrated_retarget;
- atomic_long_t migrated_unload;
- atomic_long_t migrated_unload_delay;
- atomic_long_t migrated_nopfn_retarget;
- atomic_long_t migrated_nopfn_unload;
+ atomic_long_t set_context_option;
+ atomic_long_t check_context_retarget_intr;
+ atomic_long_t check_context_unload;
atomic_long_t tlb_dropin;
+ atomic_long_t tlb_preload_page;
atomic_long_t tlb_dropin_fail_no_asid;
atomic_long_t tlb_dropin_fail_upm;
atomic_long_t tlb_dropin_fail_invalid;
atomic_long_t tlb_dropin_fail_range_active;
atomic_long_t tlb_dropin_fail_idle;
atomic_long_t tlb_dropin_fail_fmm;
+ atomic_long_t tlb_dropin_fail_no_exception;
+ atomic_long_t tfh_stale_on_fault;
atomic_long_t mmu_invalidate_range;
atomic_long_t mmu_invalidate_page;
- atomic_long_t mmu_clear_flush_young;
atomic_long_t flush_tlb;
atomic_long_t flush_tlb_gru;
atomic_long_t flush_tlb_gru_tgh;
atomic_long_t flush_tlb_gru_zero_asid;
atomic_long_t copy_gpa;
+ atomic_long_t read_gpa;
atomic_long_t mesq_receive;
atomic_long_t mesq_receive_none;
@@ -227,7 +232,7 @@ struct gru_stats_s {
atomic_long_t mesq_send_qlimit_reached;
atomic_long_t mesq_send_amo_nacked;
atomic_long_t mesq_send_put_nacked;
- atomic_long_t mesq_qf_not_full;
+ atomic_long_t mesq_page_overflow;
atomic_long_t mesq_qf_locked;
atomic_long_t mesq_qf_noop_not_full;
atomic_long_t mesq_qf_switch_head_failed;
@@ -237,11 +242,13 @@ struct gru_stats_s {
atomic_long_t mesq_noop_qlimit_reached;
atomic_long_t mesq_noop_amo_nacked;
atomic_long_t mesq_noop_put_nacked;
+ atomic_long_t mesq_noop_page_overflow;
};
enum mcs_op {cchop_allocate, cchop_start, cchop_interrupt, cchop_interrupt_sync,
- cchop_deallocate, tghop_invalidate, mcsop_last};
+ cchop_deallocate, tfhop_write_only, tfhop_write_restart,
+ tghop_invalidate, mcsop_last};
struct mcs_op_statistic {
atomic_long_t count;
@@ -251,9 +258,8 @@ struct mcs_op_statistic {
extern struct mcs_op_statistic mcs_op_statistics[mcsop_last];
-#define OPT_DPRINT 1
-#define OPT_STATS 2
-#define GRU_QUICKLOOK 4
+#define OPT_DPRINT 1
+#define OPT_STATS 2
#define IRQ_GRU 110 /* Starting IRQ number for interrupts */
@@ -276,7 +282,7 @@ extern struct mcs_op_statistic mcs_op_statistics[mcsop_last];
#define gru_dbg(dev, fmt, x...) \
do { \
if (gru_options & OPT_DPRINT) \
- dev_dbg(dev, "%s: " fmt, __func__, x); \
+ printk(KERN_DEBUG "GRU:%d %s: " fmt, smp_processor_id(), __func__, x);\
} while (0)
#else
#define gru_dbg(x...)
@@ -290,13 +296,7 @@ extern struct mcs_op_statistic mcs_op_statistics[mcsop_last];
#define ASID_INC 8 /* number of regions */
/* Generate a GRU asid value from a GRU base asid & a virtual address. */
-#if defined CONFIG_IA64
#define VADDR_HI_BIT 64
-#elif defined CONFIG_X86_64
-#define VADDR_HI_BIT 48
-#else
-#error "Unsupported architecture"
-#endif
#define GRUREGION(addr) ((addr) >> (VADDR_HI_BIT - 3) & 3)
#define GRUASID(asid, addr) ((asid) + GRUREGION(addr))
@@ -338,6 +338,7 @@ struct gru_vma_data {
long vd_user_options;/* misc user option flags */
int vd_cbr_au_count;
int vd_dsr_au_count;
+ unsigned char vd_tlb_preload_count;
};
/*
@@ -353,6 +354,7 @@ struct gru_thread_state {
struct gru_state *ts_gru; /* GRU where the context is
loaded */
struct gru_mm_struct *ts_gms; /* asid & ioproc struct */
+ unsigned char ts_tlb_preload_count; /* TLB preload pages */
unsigned long ts_cbr_map; /* map of allocated CBRs */
unsigned long ts_dsr_map; /* map of allocated DATA
resources */
@@ -361,6 +363,8 @@ struct gru_thread_state {
long ts_user_options;/* misc user option flags */
pid_t ts_tgid_owner; /* task that is using the
context - for migration */
+ short ts_user_blade_id;/* user selected blade */
+ char ts_user_chiplet_id;/* user selected chiplet */
unsigned short ts_sizeavail; /* Pagesizes in use */
int ts_tsid; /* thread that owns the
structure */
@@ -373,13 +377,15 @@ struct gru_thread_state {
required for contest */
unsigned char ts_cbr_au_count;/* Number of CBR resources
required for contest */
+ char ts_cch_req_slice;/* CCH packet slice */
char ts_blade; /* If >= 0, migrate context if
- ref from diferent blade */
+ ref from different blade */
char ts_force_cch_reload;
- char ts_force_unload;/* force context to be unloaded
- after migration */
char ts_cbr_idx[GRU_CBR_AU];/* CBR numbers of each
allocated CB */
+ int ts_data_valid; /* Indicates if ts_gdata has
+ valid data */
+ struct gru_gseg_statistics ustats; /* User statistics */
unsigned long ts_gdata[0]; /* save area for GRU data (CB,
DS, CBE) */
};
@@ -411,6 +417,7 @@ struct gru_state {
gru segments (64) */
unsigned short gs_gid; /* unique GRU number */
unsigned short gs_blade_id; /* blade of GRU */
+ unsigned char gs_chiplet_id; /* blade chiplet of GRU */
unsigned char gs_tgh_local_shift; /* used to pick TGH for
local flush */
unsigned char gs_tgh_first_remote; /* starting TGH# for
@@ -442,6 +449,7 @@ struct gru_state {
in use */
struct gru_thread_state *gs_gts[GRU_NUM_CCH]; /* GTS currently using
the context */
+ int gs_irq[GRU_NUM_TFM]; /* Interrupt irqs */
};
/*
@@ -452,6 +460,14 @@ struct gru_blade_state {
reserved cb */
void *kernel_dsr; /* First kernel
reserved DSR */
+ struct rw_semaphore bs_kgts_sema; /* lock for kgts */
+ struct gru_thread_state *bs_kgts; /* GTS for kernel use */
+
+ /* ---- the following are used for managing kernel async GRU CBRs --- */
+ int bs_async_dsr_bytes; /* DSRs for async */
+ int bs_async_cbrs; /* CBRs AU for async */
+ struct completion *bs_async_wq;
+
/* ---- the following are protected by the bs_lock spinlock ---- */
spinlock_t bs_lock; /* lock used for
stealing contexts */
@@ -500,8 +516,7 @@ struct gru_blade_state {
/* Scan all active GRUs in a GRU bitmap */
#define for_each_gru_in_bitmap(gid, map) \
- for ((gid) = find_first_bit((map), GRU_MAX_GRUS); (gid) < GRU_MAX_GRUS;\
- (gid)++, (gid) = find_next_bit((map), GRU_MAX_GRUS, (gid)))
+ for_each_set_bit((gid), (map), GRU_MAX_GRUS)
/* Scan all active GRUs on a specific blade */
#define for_each_gru_on_blade(gru, nid, i) \
@@ -520,23 +535,17 @@ struct gru_blade_state {
/* Scan each CBR whose bit is set in a TFM (or copy of) */
#define for_each_cbr_in_tfm(i, map) \
- for ((i) = find_first_bit(map, GRU_NUM_CBE); \
- (i) < GRU_NUM_CBE; \
- (i)++, (i) = find_next_bit(map, GRU_NUM_CBE, i))
+ for_each_set_bit((i), (map), GRU_NUM_CBE)
/* Scan each CBR in a CBR bitmap. Note: multiple CBRs in an allocation unit */
#define for_each_cbr_in_allocation_map(i, map, k) \
- for ((k) = find_first_bit(map, GRU_CBR_AU); (k) < GRU_CBR_AU; \
- (k) = find_next_bit(map, GRU_CBR_AU, (k) + 1)) \
+ for_each_set_bit((k), (map), GRU_CBR_AU) \
for ((i) = (k)*GRU_CBR_AU_SIZE; \
(i) < ((k) + 1) * GRU_CBR_AU_SIZE; (i)++)
/* Scan each DSR in a DSR bitmap. Note: multiple DSRs in an allocation unit */
#define for_each_dsr_in_allocation_map(i, map, k) \
- for ((k) = find_first_bit((const unsigned long *)map, GRU_DSR_AU);\
- (k) < GRU_DSR_AU; \
- (k) = find_next_bit((const unsigned long *)map, \
- GRU_DSR_AU, (k) + 1)) \
+ for_each_set_bit((k), (const unsigned long *)(map), GRU_DSR_AU) \
for ((i) = (k) * GRU_DSR_AU_CL; \
(i) < ((k) + 1) * GRU_DSR_AU_CL; (i)++)
@@ -552,6 +561,12 @@ struct gru_blade_state {
/* Lock hierarchy checking enabled only in emulator */
+/* 0 = lock failed, 1 = locked */
+static inline int __trylock_handle(void *h)
+{
+ return !test_and_set_bit(1, h);
+}
+
static inline void __lock_handle(void *h)
{
while (test_and_set_bit(1, h))
@@ -563,6 +578,11 @@ static inline void __unlock_handle(void *h)
clear_bit(1, h);
}
+static inline int trylock_cch_handle(struct gru_context_configuration_handle *cch)
+{
+ return __trylock_handle(cch);
+}
+
static inline void lock_cch_handle(struct gru_context_configuration_handle *cch)
{
__lock_handle(cch);
@@ -584,12 +604,26 @@ static inline void unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
__unlock_handle(tgh);
}
+static inline int is_kernel_context(struct gru_thread_state *gts)
+{
+ return !gts->ts_mm;
+}
+
+/*
+ * The following are for Nehelem-EX. A more general scheme is needed for
+ * future processors.
+ */
+#define UV_MAX_INT_CORES 8
+#define uv_cpu_socket_number(p) ((cpu_physical_id(p) >> 5) & 1)
+#define uv_cpu_ht_number(p) (cpu_physical_id(p) & 1)
+#define uv_cpu_core_number(p) (((cpu_physical_id(p) >> 2) & 4) | \
+ ((cpu_physical_id(p) >> 1) & 3))
/*-----------------------------------------------------------------------------
* Function prototypes & externs
*/
struct gru_unload_context_req;
-extern struct vm_operations_struct gru_vm_ops;
+extern const struct vm_operations_struct gru_vm_ops;
extern struct device *grudev;
extern struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma,
@@ -598,24 +632,35 @@ extern struct gru_thread_state *gru_find_thread_state(struct vm_area_struct
*vma, int tsid);
extern struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct
*vma, int tsid);
+extern struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts);
+extern void gru_load_context(struct gru_thread_state *gts);
+extern void gru_steal_context(struct gru_thread_state *gts);
extern void gru_unload_context(struct gru_thread_state *gts, int savestate);
-extern int gru_update_cch(struct gru_thread_state *gts, int force_unload);
+extern int gru_update_cch(struct gru_thread_state *gts);
extern void gts_drop(struct gru_thread_state *gts);
extern void gru_tgh_flush_init(struct gru_state *gru);
-extern int gru_kservices_init(struct gru_state *gru);
-extern void gru_kservices_exit(struct gru_state *gru);
-extern irqreturn_t gru_intr(int irq, void *dev_id);
+extern int gru_kservices_init(void);
+extern void gru_kservices_exit(void);
+extern irqreturn_t gru0_intr(int irq, void *dev_id);
+extern irqreturn_t gru1_intr(int irq, void *dev_id);
+extern irqreturn_t gru_intr_mblade(int irq, void *dev_id);
+extern int gru_dump_chiplet_request(unsigned long arg);
+extern long gru_get_gseg_statistics(unsigned long arg);
extern int gru_handle_user_call_os(unsigned long address);
extern int gru_user_flush_tlb(unsigned long arg);
extern int gru_user_unload_context(unsigned long arg);
extern int gru_get_exception_detail(unsigned long arg);
-extern int gru_set_task_slice(long address);
+extern int gru_set_context_option(unsigned long address);
+extern void gru_check_context_placement(struct gru_thread_state *gts);
extern int gru_cpu_fault_map_id(void);
extern struct vm_area_struct *gru_find_vma(unsigned long vaddr);
extern void gru_flush_all_tlb(struct gru_state *gru);
extern int gru_proc_init(void);
extern void gru_proc_exit(void);
+extern struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
+ int cbr_au_count, int dsr_au_count,
+ unsigned char tlb_preload_count, int options, int tsid);
extern unsigned long gru_reserve_cb_resources(struct gru_state *gru,
int cbr_au_count, char *cbmap);
extern unsigned long gru_reserve_ds_resources(struct gru_state *gru,
@@ -624,6 +669,7 @@ extern int gru_fault(struct vm_area_struct *, struct vm_fault *vmf);
extern struct gru_mm_struct *gru_register_mmu_notifier(void);
extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms);
+extern int gru_ktest(unsigned long arg);
extern void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
unsigned long len);
diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c
index 1d125091f5e..2129274ef7a 100644
--- a/drivers/misc/sgi-gru/grutlbpurge.c
+++ b/drivers/misc/sgi-gru/grutlbpurge.c
@@ -184,8 +184,8 @@ void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
STAT(flush_tlb_gru_tgh);
asid = GRUASID(asid, start);
gru_dbg(grudev,
- " FLUSH gruid %d, asid 0x%x, num %ld, cbmap 0x%x\n",
- gid, asid, num, asids->mt_ctxbitmap);
+ " FLUSH gruid %d, asid 0x%x, vaddr 0x%lx, vamask 0x%x, num %ld, cbmap 0x%x\n",
+ gid, asid, start, grupagesize, num, asids->mt_ctxbitmap);
tgh = get_lock_tgh_handle(gru);
tgh_invalidate(tgh, start, ~0, asid, grupagesize, 0,
num - 1, asids->mt_ctxbitmap);
@@ -280,11 +280,10 @@ static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm,
const struct mmu_notifier_ops *ops)
{
struct mmu_notifier *mn, *gru_mn = NULL;
- struct hlist_node *n;
if (mm->mmu_notifier_mm) {
rcu_read_lock();
- hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list,
+ hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list,
hlist)
if (mn->ops == ops) {
gru_mn = mn;
@@ -299,6 +298,7 @@ struct gru_mm_struct *gru_register_mmu_notifier(void)
{
struct gru_mm_struct *gms;
struct mmu_notifier *mn;
+ int err;
mn = mmu_find_ops(current->mm, &gru_mmuops);
if (mn) {
@@ -307,16 +307,22 @@ struct gru_mm_struct *gru_register_mmu_notifier(void)
} else {
gms = kzalloc(sizeof(*gms), GFP_KERNEL);
if (gms) {
+ STAT(gms_alloc);
spin_lock_init(&gms->ms_asid_lock);
gms->ms_notifier.ops = &gru_mmuops;
atomic_set(&gms->ms_refcnt, 1);
init_waitqueue_head(&gms->ms_wait_queue);
- __mmu_notifier_register(&gms->ms_notifier, current->mm);
+ err = __mmu_notifier_register(&gms->ms_notifier, current->mm);
+ if (err)
+ goto error;
}
}
gru_dbg(grudev, "gms %p, refcnt %d\n", gms,
atomic_read(&gms->ms_refcnt));
return gms;
+error:
+ kfree(gms);
+ return ERR_PTR(err);
}
void gru_drop_mmu_notifier(struct gru_mm_struct *gms)
@@ -327,6 +333,7 @@ void gru_drop_mmu_notifier(struct gru_mm_struct *gms)
if (!gms->ms_released)
mmu_notifier_unregister(&gms->ms_notifier, current->mm);
kfree(gms);
+ STAT(gms_free);
}
}