diff options
Diffstat (limited to 'drivers/misc/sgi-gru')
| -rw-r--r-- | drivers/misc/sgi-gru/Makefile | 6 | ||||
| -rw-r--r-- | drivers/misc/sgi-gru/gru.h | 11 | ||||
| -rw-r--r-- | drivers/misc/sgi-gru/gru_instructions.h | 211 | ||||
| -rw-r--r-- | drivers/misc/sgi-gru/grufault.c | 411 | ||||
| -rw-r--r-- | drivers/misc/sgi-gru/grufile.c | 381 | ||||
| -rw-r--r-- | drivers/misc/sgi-gru/gruhandles.c | 81 | ||||
| -rw-r--r-- | drivers/misc/sgi-gru/gruhandles.h | 67 | ||||
| -rw-r--r-- | drivers/misc/sgi-gru/grukdump.c | 234 | ||||
| -rw-r--r-- | drivers/misc/sgi-gru/grukservices.c | 699 | ||||
| -rw-r--r-- | drivers/misc/sgi-gru/grukservices.h | 65 | ||||
| -rw-r--r-- | drivers/misc/sgi-gru/grulib.h | 64 | ||||
| -rw-r--r-- | drivers/misc/sgi-gru/grumain.c | 374 | ||||
| -rw-r--r-- | drivers/misc/sgi-gru/gruprocfs.c | 71 | ||||
| -rw-r--r-- | drivers/misc/sgi-gru/grutables.h | 142 | ||||
| -rw-r--r-- | drivers/misc/sgi-gru/grutlbpurge.c | 17 |
15 files changed, 2144 insertions, 690 deletions
diff --git a/drivers/misc/sgi-gru/Makefile b/drivers/misc/sgi-gru/Makefile index bcd8136d2f9..0003a1d56f7 100644 --- a/drivers/misc/sgi-gru/Makefile +++ b/drivers/misc/sgi-gru/Makefile @@ -1,7 +1,5 @@ -ifdef CONFIG_SGI_GRU_DEBUG - EXTRA_CFLAGS += -DDEBUG -endif +ccflags-$(CONFIG_SGI_GRU_DEBUG) := -DDEBUG obj-$(CONFIG_SGI_GRU) := gru.o -gru-y := grufile.o grumain.o grufault.o grutlbpurge.o gruprocfs.o grukservices.o gruhandles.o +gru-y := grufile.o grumain.o grufault.o grutlbpurge.o gruprocfs.o grukservices.o gruhandles.o grukdump.o diff --git a/drivers/misc/sgi-gru/gru.h b/drivers/misc/sgi-gru/gru.h index f93f03a9e6e..3ad76cd18b4 100644 --- a/drivers/misc/sgi-gru/gru.h +++ b/drivers/misc/sgi-gru/gru.h @@ -53,6 +53,17 @@ struct gru_chiplet_info { int free_user_cbr; }; +/* + * Statictics kept for each context. + */ +struct gru_gseg_statistics { + unsigned long fmm_tlbmiss; + unsigned long upm_tlbmiss; + unsigned long tlbdropin; + unsigned long context_stolen; + unsigned long reserved[10]; +}; + /* Flags for GRU options on the gru_create_context() call */ /* Select one of the follow 4 options to specify how TLB misses are handled */ #define GRU_OPT_MISS_DEFAULT 0x0000 /* Use default mode */ diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h index 3fde33c1e8f..04d5170ac14 100644 --- a/drivers/misc/sgi-gru/gru_instructions.h +++ b/drivers/misc/sgi-gru/gru_instructions.h @@ -34,17 +34,18 @@ extern void gru_wait_abort_proc(void *cb); #include <asm/intrinsics.h> #define __flush_cache(p) ia64_fc((unsigned long)p) /* Use volatile on IA64 to ensure ordering via st4.rel */ -#define gru_ordered_store_int(p, v) \ +#define gru_ordered_store_ulong(p, v) \ do { \ barrier(); \ - *((volatile int *)(p)) = v; /* force st.rel */ \ + *((volatile unsigned long *)(p)) = v; /* force st.rel */ \ } while (0) #elif defined(CONFIG_X86_64) +#include <asm/cacheflush.h> #define __flush_cache(p) clflush(p) -#define gru_ordered_store_int(p, v) \ +#define gru_ordered_store_ulong(p, v) \ do { \ barrier(); \ - *(int *)p = v; \ + *(unsigned long *)p = v; \ } while (0) #else #error "Unsupported architecture" @@ -81,6 +82,8 @@ struct control_block_extended_exc_detail { int exopc; long exceptdet0; int exceptdet1; + int cbrstate; + int cbrexecstatus; }; /* @@ -107,7 +110,8 @@ struct gru_instruction_bits { unsigned char reserved2: 2; unsigned char istatus: 2; unsigned char isubstatus:4; - unsigned char reserved3: 2; + unsigned char reserved3: 1; + unsigned char tlb_fault_color: 1; /* DW 1 */ unsigned long idef4; /* 42 bits: TRi1, BufSize */ /* DW 2-6 */ @@ -126,8 +130,13 @@ struct gru_instruction_bits { */ struct gru_instruction { /* DW 0 */ - unsigned int op32; /* icmd,xtype,iaa0,ima,opc */ - unsigned int tri0; + union { + unsigned long op64; /* icmd,xtype,iaa0,ima,opc,tri0 */ + struct { + unsigned int op32; + unsigned int tri0; + }; + }; unsigned long tri1_bufsize; /* DW 1 */ unsigned long baddr0; /* DW 2 */ unsigned long nelem; /* DW 3 */ @@ -137,7 +146,7 @@ struct gru_instruction { unsigned long avalue; /* DW 7 */ }; -/* Some shifts and masks for the low 32 bits of a GRU command */ +/* Some shifts and masks for the low 64 bits of a GRU command */ #define GRU_CB_ICMD_SHFT 0 #define GRU_CB_ICMD_MASK 0x1 #define GRU_CB_XTYPE_SHFT 8 @@ -152,6 +161,10 @@ struct gru_instruction { #define GRU_CB_OPC_MASK 0xff #define GRU_CB_EXOPC_SHFT 24 #define GRU_CB_EXOPC_MASK 0xff +#define GRU_IDEF2_SHFT 32 +#define GRU_IDEF2_MASK 0x3ffff +#define GRU_ISTATUS_SHFT 56 +#define GRU_ISTATUS_MASK 0x3 /* GRU instruction opcodes (opc field) */ #define OP_NOP 0x00 @@ -250,17 +263,40 @@ struct gru_instruction { #define CBE_CAUSE_HA_RESPONSE_FATAL (1 << 13) #define CBE_CAUSE_HA_RESPONSE_NON_FATAL (1 << 14) #define CBE_CAUSE_ADDRESS_SPACE_DECODE_ERROR (1 << 15) -#define CBE_CAUSE_RESPONSE_DATA_ERROR (1 << 16) -#define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR (1 << 17) +#define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR (1 << 16) +#define CBE_CAUSE_RA_RESPONSE_DATA_ERROR (1 << 17) +#define CBE_CAUSE_HA_RESPONSE_DATA_ERROR (1 << 18) +#define CBE_CAUSE_FORCED_ERROR (1 << 19) + +/* CBE cbrexecstatus bits */ +#define CBR_EXS_ABORT_OCC_BIT 0 +#define CBR_EXS_INT_OCC_BIT 1 +#define CBR_EXS_PENDING_BIT 2 +#define CBR_EXS_QUEUED_BIT 3 +#define CBR_EXS_TLB_INVAL_BIT 4 +#define CBR_EXS_EXCEPTION_BIT 5 +#define CBR_EXS_CB_INT_PENDING_BIT 6 + +#define CBR_EXS_ABORT_OCC (1 << CBR_EXS_ABORT_OCC_BIT) +#define CBR_EXS_INT_OCC (1 << CBR_EXS_INT_OCC_BIT) +#define CBR_EXS_PENDING (1 << CBR_EXS_PENDING_BIT) +#define CBR_EXS_QUEUED (1 << CBR_EXS_QUEUED_BIT) +#define CBR_EXS_TLB_INVAL (1 << CBR_EXS_TLB_INVAL_BIT) +#define CBR_EXS_EXCEPTION (1 << CBR_EXS_EXCEPTION_BIT) +#define CBR_EXS_CB_INT_PENDING (1 << CBR_EXS_CB_INT_PENDING_BIT) /* * Exceptions are retried for the following cases. If any OTHER bits are set * in ecause, the exception is not retryable. */ -#define EXCEPTION_RETRY_BITS (CBE_CAUSE_RESPONSE_DATA_ERROR | \ - CBE_CAUSE_RA_REQUEST_TIMEOUT | \ +#define EXCEPTION_RETRY_BITS (CBE_CAUSE_EXECUTION_HW_ERROR | \ CBE_CAUSE_TLBHW_ERROR | \ - CBE_CAUSE_HA_REQUEST_TIMEOUT) + CBE_CAUSE_RA_REQUEST_TIMEOUT | \ + CBE_CAUSE_RA_RESPONSE_NON_FATAL | \ + CBE_CAUSE_HA_RESPONSE_NON_FATAL | \ + CBE_CAUSE_RA_RESPONSE_DATA_ERROR | \ + CBE_CAUSE_HA_RESPONSE_DATA_ERROR \ + ) /* Message queue head structure */ union gru_mesqhead { @@ -273,12 +309,14 @@ union gru_mesqhead { /* Generate the low word of a GRU instruction */ -static inline unsigned int -__opword(unsigned char opcode, unsigned char exopc, unsigned char xtype, +static inline unsigned long +__opdword(unsigned char opcode, unsigned char exopc, unsigned char xtype, unsigned char iaa0, unsigned char iaa1, - unsigned char ima) + unsigned long idef2, unsigned char ima) { return (1 << GRU_CB_ICMD_SHFT) | + ((unsigned long)CBS_ACTIVE << GRU_ISTATUS_SHFT) | + (idef2<< GRU_IDEF2_SHFT) | (iaa0 << GRU_CB_IAA0_SHFT) | (iaa1 << GRU_CB_IAA1_SHFT) | (ima << GRU_CB_IMA_SHFT) | @@ -296,12 +334,13 @@ static inline void gru_flush_cache(void *p) } /* - * Store the lower 32 bits of the command including the "start" bit. Then + * Store the lower 64 bits of the command including the "start" bit. Then * start the instruction executing. */ -static inline void gru_start_instruction(struct gru_instruction *ins, int op32) +static inline void gru_start_instruction(struct gru_instruction *ins, unsigned long op64) { - gru_ordered_store_int(ins, op32); + gru_ordered_store_ulong(ins, op64); + mb(); gru_flush_cache(ins); } @@ -317,6 +356,30 @@ static inline void gru_start_instruction(struct gru_instruction *ins, int op32) * - nelem and stride are in elements * - tri0/tri1 is in bytes for the beginning of the data segment. */ +static inline void gru_vload_phys(void *cb, unsigned long gpa, + unsigned int tri0, int iaa, unsigned long hints) +{ + struct gru_instruction *ins = (struct gru_instruction *)cb; + + ins->baddr0 = (long)gpa | ((unsigned long)iaa << 62); + ins->nelem = 1; + ins->op1_stride = 1; + gru_start_instruction(ins, __opdword(OP_VLOAD, 0, XTYPE_DW, iaa, 0, + (unsigned long)tri0, CB_IMA(hints))); +} + +static inline void gru_vstore_phys(void *cb, unsigned long gpa, + unsigned int tri0, int iaa, unsigned long hints) +{ + struct gru_instruction *ins = (struct gru_instruction *)cb; + + ins->baddr0 = (long)gpa | ((unsigned long)iaa << 62); + ins->nelem = 1; + ins->op1_stride = 1; + gru_start_instruction(ins, __opdword(OP_VSTORE, 0, XTYPE_DW, iaa, 0, + (unsigned long)tri0, CB_IMA(hints))); +} + static inline void gru_vload(void *cb, unsigned long mem_addr, unsigned int tri0, unsigned char xtype, unsigned long nelem, unsigned long stride, unsigned long hints) @@ -325,10 +388,9 @@ static inline void gru_vload(void *cb, unsigned long mem_addr, ins->baddr0 = (long)mem_addr; ins->nelem = nelem; - ins->tri0 = tri0; ins->op1_stride = stride; - gru_start_instruction(ins, __opword(OP_VLOAD, 0, xtype, IAA_RAM, 0, - CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_VLOAD, 0, xtype, IAA_RAM, 0, + (unsigned long)tri0, CB_IMA(hints))); } static inline void gru_vstore(void *cb, unsigned long mem_addr, @@ -339,10 +401,9 @@ static inline void gru_vstore(void *cb, unsigned long mem_addr, ins->baddr0 = (long)mem_addr; ins->nelem = nelem; - ins->tri0 = tri0; ins->op1_stride = stride; - gru_start_instruction(ins, __opword(OP_VSTORE, 0, xtype, IAA_RAM, 0, - CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_VSTORE, 0, xtype, IAA_RAM, 0, + tri0, CB_IMA(hints))); } static inline void gru_ivload(void *cb, unsigned long mem_addr, @@ -353,10 +414,9 @@ static inline void gru_ivload(void *cb, unsigned long mem_addr, ins->baddr0 = (long)mem_addr; ins->nelem = nelem; - ins->tri0 = tri0; ins->tri1_bufsize = tri1; - gru_start_instruction(ins, __opword(OP_IVLOAD, 0, xtype, IAA_RAM, 0, - CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_IVLOAD, 0, xtype, IAA_RAM, 0, + tri0, CB_IMA(hints))); } static inline void gru_ivstore(void *cb, unsigned long mem_addr, @@ -367,10 +427,9 @@ static inline void gru_ivstore(void *cb, unsigned long mem_addr, ins->baddr0 = (long)mem_addr; ins->nelem = nelem; - ins->tri0 = tri0; ins->tri1_bufsize = tri1; - gru_start_instruction(ins, __opword(OP_IVSTORE, 0, xtype, IAA_RAM, 0, - CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_IVSTORE, 0, xtype, IAA_RAM, 0, + tri0, CB_IMA(hints))); } static inline void gru_vset(void *cb, unsigned long mem_addr, @@ -383,8 +442,8 @@ static inline void gru_vset(void *cb, unsigned long mem_addr, ins->op2_value_baddr1 = value; ins->nelem = nelem; ins->op1_stride = stride; - gru_start_instruction(ins, __opword(OP_VSET, 0, xtype, IAA_RAM, 0, - CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_VSET, 0, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); } static inline void gru_ivset(void *cb, unsigned long mem_addr, @@ -397,8 +456,8 @@ static inline void gru_ivset(void *cb, unsigned long mem_addr, ins->op2_value_baddr1 = value; ins->nelem = nelem; ins->tri1_bufsize = tri1; - gru_start_instruction(ins, __opword(OP_IVSET, 0, xtype, IAA_RAM, 0, - CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_IVSET, 0, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); } static inline void gru_vflush(void *cb, unsigned long mem_addr, @@ -410,15 +469,15 @@ static inline void gru_vflush(void *cb, unsigned long mem_addr, ins->baddr0 = (long)mem_addr; ins->op1_stride = stride; ins->nelem = nelem; - gru_start_instruction(ins, __opword(OP_VFLUSH, 0, xtype, IAA_RAM, 0, - CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_VFLUSH, 0, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); } static inline void gru_nop(void *cb, int hints) { struct gru_instruction *ins = (void *)cb; - gru_start_instruction(ins, __opword(OP_NOP, 0, 0, 0, 0, CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_NOP, 0, 0, 0, 0, 0, CB_IMA(hints))); } @@ -432,10 +491,9 @@ static inline void gru_bcopy(void *cb, const unsigned long src, ins->baddr0 = (long)src; ins->op2_value_baddr1 = (long)dest; ins->nelem = nelem; - ins->tri0 = tri0; ins->tri1_bufsize = bufsize; - gru_start_instruction(ins, __opword(OP_BCOPY, 0, xtype, IAA_RAM, - IAA_RAM, CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_BCOPY, 0, xtype, IAA_RAM, + IAA_RAM, tri0, CB_IMA(hints))); } static inline void gru_bstore(void *cb, const unsigned long src, @@ -447,9 +505,8 @@ static inline void gru_bstore(void *cb, const unsigned long src, ins->baddr0 = (long)src; ins->op2_value_baddr1 = (long)dest; ins->nelem = nelem; - ins->tri0 = tri0; - gru_start_instruction(ins, __opword(OP_BSTORE, 0, xtype, 0, IAA_RAM, - CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_BSTORE, 0, xtype, 0, IAA_RAM, + tri0, CB_IMA(hints))); } static inline void gru_gamir(void *cb, int exopc, unsigned long src, @@ -458,8 +515,8 @@ static inline void gru_gamir(void *cb, int exopc, unsigned long src, struct gru_instruction *ins = (void *)cb; ins->baddr0 = (long)src; - gru_start_instruction(ins, __opword(OP_GAMIR, exopc, xtype, IAA_RAM, 0, - CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_GAMIR, exopc, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); } static inline void gru_gamirr(void *cb, int exopc, unsigned long src, @@ -468,8 +525,8 @@ static inline void gru_gamirr(void *cb, int exopc, unsigned long src, struct gru_instruction *ins = (void *)cb; ins->baddr0 = (long)src; - gru_start_instruction(ins, __opword(OP_GAMIRR, exopc, xtype, IAA_RAM, 0, - CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_GAMIRR, exopc, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); } static inline void gru_gamer(void *cb, int exopc, unsigned long src, @@ -482,8 +539,8 @@ static inline void gru_gamer(void *cb, int exopc, unsigned long src, ins->baddr0 = (long)src; ins->op1_stride = operand1; ins->op2_value_baddr1 = operand2; - gru_start_instruction(ins, __opword(OP_GAMER, exopc, xtype, IAA_RAM, 0, - CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_GAMER, exopc, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); } static inline void gru_gamerr(void *cb, int exopc, unsigned long src, @@ -495,8 +552,8 @@ static inline void gru_gamerr(void *cb, int exopc, unsigned long src, ins->baddr0 = (long)src; ins->op1_stride = operand1; ins->op2_value_baddr1 = operand2; - gru_start_instruction(ins, __opword(OP_GAMERR, exopc, xtype, IAA_RAM, 0, - CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_GAMERR, exopc, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); } static inline void gru_gamxr(void *cb, unsigned long src, @@ -506,8 +563,8 @@ static inline void gru_gamxr(void *cb, unsigned long src, ins->baddr0 = (long)src; ins->nelem = 4; - gru_start_instruction(ins, __opword(OP_GAMXR, EOP_XR_CSWAP, XTYPE_DW, - IAA_RAM, 0, CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_GAMXR, EOP_XR_CSWAP, XTYPE_DW, + IAA_RAM, 0, 0, CB_IMA(hints))); } static inline void gru_mesq(void *cb, unsigned long queue, @@ -518,9 +575,8 @@ static inline void gru_mesq(void *cb, unsigned long queue, ins->baddr0 = (long)queue; ins->nelem = nelem; - ins->tri0 = tri0; - gru_start_instruction(ins, __opword(OP_MESQ, 0, XTYPE_CL, IAA_RAM, 0, - CB_IMA(hints))); + gru_start_instruction(ins, __opdword(OP_MESQ, 0, XTYPE_CL, IAA_RAM, 0, + tri0, CB_IMA(hints))); } static inline unsigned long gru_get_amo_value(void *cb) @@ -600,9 +656,11 @@ static inline int gru_get_cb_substatus(void *cb) return cbs->isubstatus; } -/* Check the status of a CB. If the CB is in UPM mode, call the - * OS to handle the UPM status. - * Returns the CB status field value (0 for normal completion) +/* + * User interface to check an instruction status. UPM and exceptions + * are handled automatically. However, this function does NOT wait + * for an active instruction to complete. + * */ static inline int gru_check_status(void *cb) { @@ -610,36 +668,41 @@ static inline int gru_check_status(void *cb) int ret; ret = cbs->istatus; - if (ret == CBS_CALL_OS) + if (ret != CBS_ACTIVE) ret = gru_check_status_proc(cb); return ret; } -/* Wait for CB to complete. - * Returns the CB status field value (0 for normal completion) +/* + * User interface (via inline function) to wait for an instruction + * to complete. Completion status (IDLE or EXCEPTION is returned + * to the user. Exception due to hardware errors are automatically + * retried before returning an exception. + * */ static inline int gru_wait(void *cb) { - struct gru_control_block_status *cbs = (void *)cb; - int ret = cbs->istatus; - - if (ret != CBS_IDLE) - ret = gru_wait_proc(cb); - return ret; + return gru_wait_proc(cb); } -/* Wait for CB to complete. Aborts program if error. (Note: error does NOT +/* + * Wait for CB to complete. Aborts program if error. (Note: error does NOT * mean TLB mis - only fatal errors such as memory parity error or user * bugs will cause termination. */ static inline void gru_wait_abort(void *cb) { - struct gru_control_block_status *cbs = (void *)cb; - - if (cbs->istatus != CBS_IDLE) - gru_wait_abort_proc(cb); + gru_wait_abort_proc(cb); } +/* + * Get a pointer to the start of a gseg + * p - Any valid pointer within the gseg + */ +static inline void *gru_get_gseg_pointer (void *p) +{ + return (void *)((unsigned long)p & ~(GRU_GSEG_PAGESIZE - 1)); +} /* * Get a pointer to a control block diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c index ab118558552..f74fc0ca2ef 100644 --- a/drivers/misc/sgi-gru/grufault.c +++ b/drivers/misc/sgi-gru/grufault.c @@ -33,6 +33,7 @@ #include <linux/io.h> #include <linux/uaccess.h> #include <linux/security.h> +#include <linux/prefetch.h> #include <asm/pgtable.h> #include "gru.h" #include "grutables.h" @@ -40,6 +41,12 @@ #include "gru_instructions.h" #include <asm/uv/uv_hub.h> +/* Return codes for vtop functions */ +#define VTOP_SUCCESS 0 +#define VTOP_INVALID -1 +#define VTOP_RETRY -2 + + /* * Test if a physical address is a valid GRU GSEG address */ @@ -90,19 +97,22 @@ static struct gru_thread_state *gru_alloc_locked_gts(unsigned long vaddr) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - struct gru_thread_state *gts = NULL; + struct gru_thread_state *gts = ERR_PTR(-EINVAL); down_write(&mm->mmap_sem); vma = gru_find_vma(vaddr); - if (vma) - gts = gru_alloc_thread_state(vma, TSID(vaddr, vma)); - if (gts) { - mutex_lock(>s->ts_ctxlock); - downgrade_write(&mm->mmap_sem); - } else { - up_write(&mm->mmap_sem); - } + if (!vma) + goto err; + gts = gru_alloc_thread_state(vma, TSID(vaddr, vma)); + if (IS_ERR(gts)) + goto err; + mutex_lock(>s->ts_ctxlock); + downgrade_write(&mm->mmap_sem); + return gts; + +err: + up_write(&mm->mmap_sem); return gts; } @@ -122,39 +132,15 @@ static void gru_unlock_gts(struct gru_thread_state *gts) * is necessary to prevent the user from seeing a stale cb.istatus that will * change as soon as the TFH restart is complete. Races may cause an * occasional failure to clear the cb.istatus, but that is ok. - * - * If the cb address is not valid (should not happen, but...), nothing - * bad will happen.. The get_user()/put_user() will fail but there - * are no bad side-effects. */ -static void gru_cb_set_istatus_active(unsigned long __user *cb) +static void gru_cb_set_istatus_active(struct gru_instruction_bits *cbk) { - union { - struct gru_instruction_bits bits; - unsigned long dw; - } u; - - if (cb) { - get_user(u.dw, cb); - u.bits.istatus = CBS_ACTIVE; - put_user(u.dw, cb); + if (cbk) { + cbk->istatus = CBS_ACTIVE; } } /* - * Convert a interrupt IRQ to a pointer to the GRU GTS that caused the - * interrupt. Interrupts are always sent to a cpu on the blade that contains the - * GRU (except for headless blades which are not currently supported). A blade - * has N grus; a block of N consecutive IRQs is assigned to the GRUs. The IRQ - * number uniquely identifies the GRU chiplet on the local blade that caused the - * interrupt. Always called in interrupt context. - */ -static inline struct gru_state *irq_to_gru(int irq) -{ - return &gru_base[uv_numa_blade_id()]->bs_grus[irq - IRQ_GRU]; -} - -/* * Read & clear a TFM * * The GRU has an array of fault maps. A map is private to a cpu @@ -166,7 +152,8 @@ static inline struct gru_state *irq_to_gru(int irq) * the GRU, atomic operations must be used to clear bits. */ static void get_clear_fault_map(struct gru_state *gru, - struct gru_tlb_fault_map *map) + struct gru_tlb_fault_map *imap, + struct gru_tlb_fault_map *dmap) { unsigned long i, k; struct gru_tlb_fault_map *tfm; @@ -177,7 +164,11 @@ static void get_clear_fault_map(struct gru_state *gru, k = tfm->fault_bits[i]; if (k) k = xchg(&tfm->fault_bits[i], 0UL); - map->fault_bits[i] = k; + imap->fault_bits[i] = k; + k = tfm->done_bits[i]; + if (k) + k = xchg(&tfm->done_bits[i], 0UL); + dmap->fault_bits[i] = k; } /* @@ -202,10 +193,11 @@ static int non_atomic_pte_lookup(struct vm_area_struct *vma, { struct page *page; - /* ZZZ Need to handle HUGE pages */ - if (is_vm_hugetlb_page(vma)) - return -EFAULT; +#ifdef CONFIG_HUGETLB_PAGE + *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT; +#else *pageshift = PAGE_SHIFT; +#endif if (get_user_pages (current, current->mm, vaddr, 1, write, 0, &page, NULL) <= 0) return -EFAULT; @@ -263,7 +255,6 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr, return 0; err: - local_irq_enable(); return 1; } @@ -296,16 +287,71 @@ static int gru_vtop(struct gru_thread_state *gts, unsigned long vaddr, paddr = paddr & ~((1UL << ps) - 1); *gpa = uv_soc_phys_ram_to_gpa(paddr); *pageshift = ps; - return 0; + return VTOP_SUCCESS; inval: - return -1; + return VTOP_INVALID; upm: - return -2; + return VTOP_RETRY; } /* + * Flush a CBE from cache. The CBE is clean in the cache. Dirty the + * CBE cacheline so that the line will be written back to home agent. + * Otherwise the line may be silently dropped. This has no impact + * except on performance. + */ +static void gru_flush_cache_cbe(struct gru_control_block_extended *cbe) +{ + if (unlikely(cbe)) { + cbe->cbrexecstatus = 0; /* make CL dirty */ + gru_flush_cache(cbe); + } +} + +/* + * Preload the TLB with entries that may be required. Currently, preloading + * is implemented only for BCOPY. Preload <tlb_preload_count> pages OR to + * the end of the bcopy tranfer, whichever is smaller. + */ +static void gru_preload_tlb(struct gru_state *gru, + struct gru_thread_state *gts, int atomic, + unsigned long fault_vaddr, int asid, int write, + unsigned char tlb_preload_count, + struct gru_tlb_fault_handle *tfh, + struct gru_control_block_extended *cbe) +{ + unsigned long vaddr = 0, gpa; + int ret, pageshift; + + if (cbe->opccpy != OP_BCOPY) + return; + + if (fault_vaddr == cbe->cbe_baddr0) + vaddr = fault_vaddr + GRU_CACHE_LINE_BYTES * cbe->cbe_src_cl - 1; + else if (fault_vaddr == cbe->cbe_baddr1) + vaddr = fault_vaddr + (1 << cbe->xtypecpy) * cbe->cbe_nelemcur - 1; + + fault_vaddr &= PAGE_MASK; + vaddr &= PAGE_MASK; + vaddr = min(vaddr, fault_vaddr + tlb_preload_count * PAGE_SIZE); + + while (vaddr > fault_vaddr) { + ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift); + if (ret || tfh_write_only(tfh, gpa, GAA_RAM, vaddr, asid, write, + GRU_PAGESIZE(pageshift))) + return; + gru_dbg(grudev, + "%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, rw %d, ps %d, gpa 0x%lx\n", + atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh, + vaddr, asid, write, pageshift, gpa); + vaddr -= PAGE_SIZE; + STAT(tlb_preload_page); + } +} + +/* * Drop a TLB entry into the GRU. The fault is described by info in an TFH. * Input: * cb Address of user CBR. Null if not running in user context @@ -315,11 +361,14 @@ upm: * < 0 = error code * */ -static int gru_try_dropin(struct gru_thread_state *gts, +static int gru_try_dropin(struct gru_state *gru, + struct gru_thread_state *gts, struct gru_tlb_fault_handle *tfh, - unsigned long __user *cb) + struct gru_instruction_bits *cbk) { - int pageshift = 0, asid, write, ret, atomic = !cb; + struct gru_control_block_extended *cbe = NULL; + unsigned char tlb_preload_count = gts->ts_tlb_preload_count; + int pageshift = 0, asid, write, ret, atomic = !cbk, indexway; unsigned long gpa = 0, vaddr = 0; /* @@ -330,18 +379,34 @@ static int gru_try_dropin(struct gru_thread_state *gts, */ /* + * Prefetch the CBE if doing TLB preloading + */ + if (unlikely(tlb_preload_count)) { + cbe = gru_tfh_to_cbe(tfh); + prefetchw(cbe); + } + + /* * Error if TFH state is IDLE or FMM mode & the user issuing a UPM call. * Might be a hardware race OR a stupid user. Ignore FMM because FMM * is a transient state. */ + if (tfh->status != TFHSTATUS_EXCEPTION) { + gru_flush_cache(tfh); + sync_core(); + if (tfh->status != TFHSTATUS_EXCEPTION) + goto failnoexception; + STAT(tfh_stale_on_fault); + } if (tfh->state == TFHSTATE_IDLE) goto failidle; - if (tfh->state == TFHSTATE_MISS_FMM && cb) + if (tfh->state == TFHSTATE_MISS_FMM && cbk) goto failfmm; write = (tfh->cause & TFHCAUSE_TLB_MOD) != 0; vaddr = tfh->missvaddr; asid = tfh->missasid; + indexway = tfh->indexway; if (asid == 0) goto failnoasid; @@ -355,41 +420,51 @@ static int gru_try_dropin(struct gru_thread_state *gts, goto failactive; ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift); - if (ret == -1) + if (ret == VTOP_INVALID) goto failinval; - if (ret == -2) + if (ret == VTOP_RETRY) goto failupm; if (!(gts->ts_sizeavail & GRU_SIZEAVAIL(pageshift))) { gts->ts_sizeavail |= GRU_SIZEAVAIL(pageshift); - if (atomic || !gru_update_cch(gts, 0)) { + if (atomic || !gru_update_cch(gts)) { gts->ts_force_cch_reload = 1; goto failupm; } } - gru_cb_set_istatus_active(cb); + + if (unlikely(cbe) && pageshift == PAGE_SHIFT) { + gru_preload_tlb(gru, gts, atomic, vaddr, asid, write, tlb_preload_count, tfh, cbe); + gru_flush_cache_cbe(cbe); + } + + gru_cb_set_istatus_active(cbk); + gts->ustats.tlbdropin++; tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write, GRU_PAGESIZE(pageshift)); - STAT(tlb_dropin); gru_dbg(grudev, - "%s: tfh 0x%p, vaddr 0x%lx, asid 0x%x, ps %d, gpa 0x%lx\n", - ret ? "non-atomic" : "atomic", tfh, vaddr, asid, - pageshift, gpa); + "%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, indexway 0x%x," + " rw %d, ps %d, gpa 0x%lx\n", + atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh, vaddr, asid, + indexway, write, pageshift, gpa); + STAT(tlb_dropin); return 0; failnoasid: /* No asid (delayed unload). */ STAT(tlb_dropin_fail_no_asid); gru_dbg(grudev, "FAILED no_asid tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); - if (!cb) + if (!cbk) tfh_user_polling_mode(tfh); else gru_flush_cache(tfh); + gru_flush_cache_cbe(cbe); return -EAGAIN; failupm: /* Atomic failure switch CBR to UPM */ tfh_user_polling_mode(tfh); + gru_flush_cache_cbe(cbe); STAT(tlb_dropin_fail_upm); gru_dbg(grudev, "FAILED upm tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); return 1; @@ -397,15 +472,28 @@ failupm: failfmm: /* FMM state on UPM call */ gru_flush_cache(tfh); + gru_flush_cache_cbe(cbe); STAT(tlb_dropin_fail_fmm); gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state); return 0; +failnoexception: + /* TFH status did not show exception pending */ + gru_flush_cache(tfh); + gru_flush_cache_cbe(cbe); + if (cbk) + gru_flush_cache(cbk); + STAT(tlb_dropin_fail_no_exception); + gru_dbg(grudev, "FAILED non-exception tfh: 0x%p, status %d, state %d\n", + tfh, tfh->status, tfh->state); + return 0; + failidle: - /* TFH was idle - no miss pending */ + /* TFH state was idle - no miss pending */ gru_flush_cache(tfh); - if (cb) - gru_flush_cache(cb); + gru_flush_cache_cbe(cbe); + if (cbk) + gru_flush_cache(cbk); STAT(tlb_dropin_fail_idle); gru_dbg(grudev, "FAILED idle tfh: 0x%p, state %d\n", tfh, tfh->state); return 0; @@ -413,16 +501,18 @@ failidle: failinval: /* All errors (atomic & non-atomic) switch CBR to EXCEPTION state */ tfh_exception(tfh); + gru_flush_cache_cbe(cbe); STAT(tlb_dropin_fail_invalid); gru_dbg(grudev, "FAILED inval tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); return -EFAULT; failactive: /* Range invalidate active. Switch to UPM iff atomic */ - if (!cb) + if (!cbk) tfh_user_polling_mode(tfh); else gru_flush_cache(tfh); + gru_flush_cache_cbe(cbe); STAT(tlb_dropin_fail_range_active); gru_dbg(grudev, "FAILED range active: tfh 0x%p, vaddr 0x%lx\n", tfh, vaddr); @@ -435,27 +525,41 @@ failactive: * Note that this is the interrupt handler that is registered with linux * interrupt handlers. */ -irqreturn_t gru_intr(int irq, void *dev_id) +static irqreturn_t gru_intr(int chiplet, int blade) { struct gru_state *gru; - struct gru_tlb_fault_map map; + struct gru_tlb_fault_map imap, dmap; struct gru_thread_state *gts; struct gru_tlb_fault_handle *tfh = NULL; + struct completion *cmp; int cbrnum, ctxnum; STAT(intr); - gru = irq_to_gru(irq); + gru = &gru_base[blade]->bs_grus[chiplet]; if (!gru) { - dev_err(grudev, "GRU: invalid interrupt: cpu %d, irq %d\n", - raw_smp_processor_id(), irq); + dev_err(grudev, "GRU: invalid interrupt: cpu %d, chiplet %d\n", + raw_smp_processor_id(), chiplet); return IRQ_NONE; } - get_clear_fault_map(gru, &map); - gru_dbg(grudev, "irq %d, gru %x, map 0x%lx\n", irq, gru->gs_gid, - map.fault_bits[0]); + get_clear_fault_map(gru, &imap, &dmap); + gru_dbg(grudev, + "cpu %d, chiplet %d, gid %d, imap %016lx %016lx, dmap %016lx %016lx\n", + smp_processor_id(), chiplet, gru->gs_gid, + imap.fault_bits[0], imap.fault_bits[1], + dmap.fault_bits[0], dmap.fault_bits[1]); + + for_each_cbr_in_tfm(cbrnum, dmap.fault_bits) { + STAT(intr_cbr); + cmp = gru->gs_blade->bs_async_wq; + if (cmp) + complete(cmp); + gru_dbg(grudev, "gid %d, cbr_done %d, done %d\n", + gru->gs_gid, cbrnum, cmp ? cmp->done : -1); + } - for_each_cbr_in_tfm(cbrnum, map.fault_bits) { + for_each_cbr_in_tfm(cbrnum, imap.fault_bits) { + STAT(intr_tfh); tfh = get_tfh_by_index(gru, cbrnum); prefetchw(tfh); /* Helps on hdw, required for emulator */ @@ -468,12 +572,20 @@ irqreturn_t gru_intr(int irq, void *dev_id) ctxnum = tfh->ctxnum; gts = gru->gs_gts[ctxnum]; + /* Spurious interrupts can cause this. Ignore. */ + if (!gts) { + STAT(intr_spurious); + continue; + } + /* * This is running in interrupt context. Trylock the mmap_sem. * If it fails, retry the fault in user context. */ - if (down_read_trylock(>s->ts_mm->mmap_sem)) { - gru_try_dropin(gts, tfh, NULL); + gts->ustats.fmm_tlbmiss++; + if (!gts->ts_force_cch_reload && + down_read_trylock(>s->ts_mm->mmap_sem)) { + gru_try_dropin(gru, gts, tfh, NULL); up_read(>s->ts_mm->mmap_sem); } else { tfh_user_polling_mode(tfh); @@ -483,19 +595,43 @@ irqreturn_t gru_intr(int irq, void *dev_id) return IRQ_HANDLED; } +irqreturn_t gru0_intr(int irq, void *dev_id) +{ + return gru_intr(0, uv_numa_blade_id()); +} + +irqreturn_t gru1_intr(int irq, void *dev_id) +{ + return gru_intr(1, uv_numa_blade_id()); +} + +irqreturn_t gru_intr_mblade(int irq, void *dev_id) +{ + int blade; + + for_each_possible_blade(blade) { + if (uv_blade_nr_possible_cpus(blade)) + continue; + gru_intr(0, blade); + gru_intr(1, blade); + } + return IRQ_HANDLED; +} + static int gru_user_dropin(struct gru_thread_state *gts, struct gru_tlb_fault_handle *tfh, - unsigned long __user *cb) + void *cb) { struct gru_mm_struct *gms = gts->ts_gms; int ret; + gts->ustats.upm_tlbmiss++; while (1) { wait_event(gms->ms_wait_queue, atomic_read(&gms->ms_range_active) == 0); prefetchw(tfh); /* Helps on hdw, required for emulator */ - ret = gru_try_dropin(gts, tfh, cb); + ret = gru_try_dropin(gts->ts_gru, gts, tfh, cb); if (ret <= 0) return ret; STAT(call_os_wait_queue); @@ -511,52 +647,41 @@ int gru_handle_user_call_os(unsigned long cb) { struct gru_tlb_fault_handle *tfh; struct gru_thread_state *gts; - unsigned long __user *cbp; + void *cbk; int ucbnum, cbrnum, ret = -EINVAL; STAT(call_os); - gru_dbg(grudev, "address 0x%lx\n", cb); /* sanity check the cb pointer */ ucbnum = get_cb_number((void *)cb); if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB) return -EINVAL; - cbp = (unsigned long *)cb; gts = gru_find_lock_gts(cb); if (!gts) return -EINVAL; + gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts); if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) goto exit; - /* - * If force_unload is set, the UPM TLB fault is phony. The task - * has migrated to another node and the GSEG must be moved. Just - * unload the context. The task will page fault and assign a new - * context. - */ - if (gts->ts_tgid_owner == current->tgid && gts->ts_blade >= 0 && - gts->ts_blade != uv_numa_blade_id()) { - STAT(call_os_offnode_reference); - gts->ts_force_unload = 1; - } + gru_check_context_placement(gts); /* * CCH may contain stale data if ts_force_cch_reload is set. */ if (gts->ts_gru && gts->ts_force_cch_reload) { - gru_update_cch(gts, 0); gts->ts_force_cch_reload = 0; + gru_update_cch(gts); } ret = -EAGAIN; cbrnum = thread_cbr_number(gts, ucbnum); - if (gts->ts_force_unload) { - gru_unload_context(gts, 1); - } else if (gts->ts_gru) { + if (gts->ts_gru) { tfh = get_tfh_by_index(gts->ts_gru, cbrnum); - ret = gru_user_dropin(gts, tfh, cbp); + cbk = get_gseg_base_address_cb(gts->ts_gru->gs_gru_base_vaddr, + gts->ts_ctxnum, ucbnum); + ret = gru_user_dropin(gts, tfh, cbk); } exit: gru_unlock_gts(gts); @@ -578,31 +703,38 @@ int gru_get_exception_detail(unsigned long arg) if (copy_from_user(&excdet, (void __user *)arg, sizeof(excdet))) return -EFAULT; - gru_dbg(grudev, "address 0x%lx\n", excdet.cb); gts = gru_find_lock_gts(excdet.cb); if (!gts) return -EINVAL; + gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", excdet.cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts); ucbnum = get_cb_number((void *)excdet.cb); if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) { ret = -EINVAL; } else if (gts->ts_gru) { cbrnum = thread_cbr_number(gts, ucbnum); cbe = get_cbe_by_index(gts->ts_gru, cbrnum); - prefetchw(cbe);/* Harmless on hardware, required for emulator */ + gru_flush_cache(cbe); /* CBE not coherent */ + sync_core(); /* make sure we are have current data */ excdet.opc = cbe->opccpy; excdet.exopc = cbe->exopccpy; excdet.ecause = cbe->ecause; excdet.exceptdet0 = cbe->idef1upd; excdet.exceptdet1 = cbe->idef3upd; + excdet.cbrstate = cbe->cbrstate; + excdet.cbrexecstatus = cbe->cbrexecstatus; + gru_flush_cache_cbe(cbe); ret = 0; } else { ret = -EAGAIN; } gru_unlock_gts(gts); - gru_dbg(grudev, "address 0x%lx, ecause 0x%x\n", excdet.cb, - excdet.ecause); + gru_dbg(grudev, + "cb 0x%lx, op %d, exopc %d, cbrstate %d, cbrexecstatus 0x%x, ecause 0x%x, " + "exdet0 0x%lx, exdet1 0x%x\n", + excdet.cb, excdet.opc, excdet.exopc, excdet.cbrstate, excdet.cbrexecstatus, + excdet.ecause, excdet.exceptdet0, excdet.exceptdet1); if (!ret && copy_to_user((void __user *)arg, &excdet, sizeof(excdet))) ret = -EFAULT; return ret; @@ -627,7 +759,7 @@ static int gru_unload_all_contexts(void) if (gts && mutex_trylock(>s->ts_ctxlock)) { spin_unlock(&gru->gs_lock); gru_unload_context(gts, 1); - gru_unlock_gts(gts); + mutex_unlock(>s->ts_ctxlock); spin_lock(&gru->gs_lock); } } @@ -669,6 +801,7 @@ int gru_user_flush_tlb(unsigned long arg) { struct gru_thread_state *gts; struct gru_flush_tlb_req req; + struct gru_mm_struct *gms; STAT(user_flush_tlb); if (copy_from_user(&req, (void __user *)arg, sizeof(req))) @@ -681,8 +814,39 @@ int gru_user_flush_tlb(unsigned long arg) if (!gts) return -EINVAL; - gru_flush_tlb_range(gts->ts_gms, req.vaddr, req.len); + gms = gts->ts_gms; gru_unlock_gts(gts); + gru_flush_tlb_range(gms, req.vaddr, req.len); + + return 0; +} + +/* + * Fetch GSEG statisticss + */ +long gru_get_gseg_statistics(unsigned long arg) +{ + struct gru_thread_state *gts; + struct gru_get_gseg_statistics_req req; + + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + + /* + * The library creates arrays of contexts for threaded programs. + * If no gts exists in the array, the context has never been used & all + * statistics are implicitly 0. + */ + gts = gru_find_lock_gts(req.gseg); + if (gts) { + memcpy(&req.stats, >s->ustats, sizeof(gts->ustats)); + gru_unlock_gts(gts); + } else { + memset(&req.stats, 0, sizeof(gts->ustats)); + } + + if (copy_to_user((void __user *)arg, &req, sizeof(req))) + return -EFAULT; return 0; } @@ -691,18 +855,49 @@ int gru_user_flush_tlb(unsigned long arg) * Register the current task as the user of the GSEG slice. * Needed for TLB fault interrupt targeting. */ -int gru_set_task_slice(long address) +int gru_set_context_option(unsigned long arg) { struct gru_thread_state *gts; + struct gru_set_context_option_req req; + int ret = 0; - STAT(set_task_slice); - gru_dbg(grudev, "address 0x%lx\n", address); - gts = gru_alloc_locked_gts(address); - if (!gts) - return -EINVAL; + STAT(set_context_option); + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + gru_dbg(grudev, "op %d, gseg 0x%lx, value1 0x%lx\n", req.op, req.gseg, req.val1); + + gts = gru_find_lock_gts(req.gseg); + if (!gts) { + gts = gru_alloc_locked_gts(req.gseg); + if (IS_ERR(gts)) + return PTR_ERR(gts); + } - gts->ts_tgid_owner = current->tgid; + switch (req.op) { + case sco_blade_chiplet: + /* Select blade/chiplet for GRU context */ + if (req.val0 < -1 || req.val0 >= GRU_CHIPLETS_PER_HUB || + req.val1 < -1 || req.val1 >= GRU_MAX_BLADES || + (req.val1 >= 0 && !gru_base[req.val1])) { + ret = -EINVAL; + } else { + gts->ts_user_blade_id = req.val1; + gts->ts_user_chiplet_id = req.val0; + gru_check_context_placement(gts); + } + break; + case sco_gseg_owner: + /* Register the current task as the GSEG owner */ + gts->ts_tgid_owner = current->tgid; + break; + case sco_cch_req_slice: + /* Set the CCH slice option */ + gts->ts_cch_req_slice = req.val1 & 3; + break; + default: + ret = -EINVAL; + } gru_unlock_gts(gts); - return 0; + return ret; } diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c index bbefe77c67a..104a05f6b73 100644 --- a/drivers/misc/sgi-gru/grufile.c +++ b/drivers/misc/sgi-gru/grufile.c @@ -6,7 +6,7 @@ * This file supports the user system call for file open, close, mmap, etc. * This also incudes the driver initialization code. * - * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2008-2014 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -29,13 +29,15 @@ #include <linux/slab.h> #include <linux/mm.h> #include <linux/io.h> -#include <linux/smp_lock.h> #include <linux/spinlock.h> #include <linux/device.h> #include <linux/miscdevice.h> #include <linux/interrupt.h> #include <linux/proc_fs.h> #include <linux/uaccess.h> +#ifdef CONFIG_X86_64 +#include <asm/uv/uv_irq.h> +#endif #include <asm/uv/uv.h> #include "gru.h" #include "grulib.h" @@ -46,6 +48,7 @@ struct gru_blade_state *gru_base[GRU_MAX_BLADES] __read_mostly; unsigned long gru_start_paddr __read_mostly; +void *gru_start_vaddr __read_mostly; unsigned long gru_end_paddr __read_mostly; unsigned int gru_max_gids __read_mostly; struct gru_stats_s gru_stats; @@ -53,9 +56,13 @@ struct gru_stats_s gru_stats; /* Guaranteed user available resources on each node */ static int max_user_cbrs, max_user_dsr_bytes; -static struct file_operations gru_fops; static struct miscdevice gru_miscdev; +static int gru_supported(void) +{ + return is_uv_system() && + (uv_hub_info->hub_revision < UV3_HUB_REVISION_BASE); +} /* * gru_vma_close @@ -93,7 +100,7 @@ static void gru_vma_close(struct vm_area_struct *vma) /* * gru_file_mmap * - * Called when mmaping the device. Initializes the vma with a fault handler + * Called when mmapping the device. Initializes the vma with a fault handler * and private data structure necessary to allocate, track, and free the * underlying pages. */ @@ -106,9 +113,8 @@ static int gru_file_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_end & (GRU_GSEG_PAGESIZE - 1)) return -EINVAL; - vma->vm_flags |= - (VM_IO | VM_DONTCOPY | VM_LOCKED | VM_DONTEXPAND | VM_PFNMAP | - VM_RESERVED); + vma->vm_flags |= VM_IO | VM_PFNMAP | VM_LOCKED | + VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_page_prot = PAGE_SHARED; vma->vm_ops = &gru_vm_ops; @@ -131,15 +137,12 @@ static int gru_create_new_context(unsigned long arg) struct gru_vma_data *vdata; int ret = -EINVAL; - if (copy_from_user(&req, (void __user *)arg, sizeof(req))) return -EFAULT; - if (req.data_segment_bytes == 0 || - req.data_segment_bytes > max_user_dsr_bytes) + if (req.data_segment_bytes > max_user_dsr_bytes) return -EINVAL; - if (!req.control_blocks || !req.maximum_thread_count || - req.control_blocks > max_user_cbrs) + if (req.control_blocks > max_user_cbrs || !req.maximum_thread_count) return -EINVAL; if (!(req.options & GRU_OPT_MISS_MASK)) @@ -153,6 +156,7 @@ static int gru_create_new_context(unsigned long arg) vdata->vd_dsr_au_count = GRU_DS_BYTES_TO_AU(req.data_segment_bytes); vdata->vd_cbr_au_count = GRU_CB_COUNT_TO_AU(req.control_blocks); + vdata->vd_tlb_preload_count = req.tlb_preload_count; ret = 0; } up_write(¤t->mm->mmap_sem); @@ -173,6 +177,7 @@ static long gru_get_config_info(unsigned long arg) nodesperblade = 2; else nodesperblade = 1; + memset(&info, 0, sizeof(info)); info.cpus = num_online_cpus(); info.nodes = num_online_nodes(); info.blades = info.nodes / nodesperblade; @@ -184,41 +189,6 @@ static long gru_get_config_info(unsigned long arg) } /* - * Get GRU chiplet status - */ -static long gru_get_chiplet_status(unsigned long arg) -{ - struct gru_state *gru; - struct gru_chiplet_info info; - - if (copy_from_user(&info, (void __user *)arg, sizeof(info))) - return -EFAULT; - - if (info.node == -1) - info.node = numa_node_id(); - if (info.node >= num_possible_nodes() || - info.chiplet >= GRU_CHIPLETS_PER_HUB || - info.node < 0 || info.chiplet < 0) - return -EINVAL; - - info.blade = uv_node_to_blade_id(info.node); - gru = get_gru(info.blade, info.chiplet); - - info.total_dsr_bytes = GRU_NUM_DSR_BYTES; - info.total_cbr = GRU_NUM_CB; - info.total_user_dsr_bytes = GRU_NUM_DSR_BYTES - - gru->gs_reserved_dsr_bytes; - info.total_user_cbr = GRU_NUM_CB - gru->gs_reserved_cbrs; - info.free_user_dsr_bytes = hweight64(gru->gs_dsr_map) * - GRU_DSR_AU_BYTES; - info.free_user_cbr = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE; - - if (copy_to_user((void __user *)arg, &info, sizeof(info))) - return -EFAULT; - return 0; -} - -/* * gru_file_unlocked_ioctl * * Called to update file attributes via IOCTL calls. @@ -228,14 +198,14 @@ static long gru_file_unlocked_ioctl(struct file *file, unsigned int req, { int err = -EBADRQC; - gru_dbg(grudev, "file %p\n", file); + gru_dbg(grudev, "file %p, req 0x%x, 0x%lx\n", file, req, arg); switch (req) { case GRU_CREATE_CONTEXT: err = gru_create_new_context(arg); break; - case GRU_SET_TASK_SLICE: - err = gru_set_task_slice(arg); + case GRU_SET_CONTEXT_OPTION: + err = gru_set_context_option(arg); break; case GRU_USER_GET_EXCEPTION_DETAIL: err = gru_get_exception_detail(arg); @@ -243,18 +213,24 @@ static long gru_file_unlocked_ioctl(struct file *file, unsigned int req, case GRU_USER_UNLOAD_CONTEXT: err = gru_user_unload_context(arg); break; - case GRU_GET_CHIPLET_STATUS: - err = gru_get_chiplet_status(arg); - break; case GRU_USER_FLUSH_TLB: err = gru_user_flush_tlb(arg); break; case GRU_USER_CALL_OS: err = gru_handle_user_call_os(arg); break; + case GRU_GET_GSEG_STATISTICS: + err = gru_get_gseg_statistics(arg); + break; + case GRU_KTEST: + err = gru_ktest(arg); + break; case GRU_GET_CONFIG_INFO: err = gru_get_config_info(arg); break; + case GRU_DUMP_CHIPLET_STATE: + err = gru_dump_chiplet_request(arg); + break; } return err; } @@ -264,25 +240,25 @@ static long gru_file_unlocked_ioctl(struct file *file, unsigned int req, * system. */ static void gru_init_chiplet(struct gru_state *gru, unsigned long paddr, - void *vaddr, int nid, int bid, int grunum) + void *vaddr, int blade_id, int chiplet_id) { spin_lock_init(&gru->gs_lock); spin_lock_init(&gru->gs_asid_lock); gru->gs_gru_base_paddr = paddr; gru->gs_gru_base_vaddr = vaddr; - gru->gs_gid = bid * GRU_CHIPLETS_PER_BLADE + grunum; - gru->gs_blade = gru_base[bid]; - gru->gs_blade_id = bid; + gru->gs_gid = blade_id * GRU_CHIPLETS_PER_BLADE + chiplet_id; + gru->gs_blade = gru_base[blade_id]; + gru->gs_blade_id = blade_id; + gru->gs_chiplet_id = chiplet_id; gru->gs_cbr_map = (GRU_CBR_AU == 64) ? ~0 : (1UL << GRU_CBR_AU) - 1; gru->gs_dsr_map = (1UL << GRU_DSR_AU) - 1; gru->gs_asid_limit = MAX_ASID; gru_tgh_flush_init(gru); if (gru->gs_gid >= gru_max_gids) gru_max_gids = gru->gs_gid + 1; - gru_dbg(grudev, "bid %d, nid %d, gid %d, vaddr %p (0x%lx)\n", - bid, nid, gru->gs_gid, gru->gs_gru_base_vaddr, + gru_dbg(grudev, "bid %d, gid %d, vaddr %p (0x%lx)\n", + blade_id, gru->gs_gid, gru->gs_gru_base_vaddr, gru->gs_gru_base_paddr); - gru_kservices_init(gru); } static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr) @@ -297,11 +273,9 @@ static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr) max_user_cbrs = GRU_NUM_CB; max_user_dsr_bytes = GRU_NUM_DSR_BYTES; - for_each_online_node(nid) { - bid = uv_node_to_blade_id(nid); - pnode = uv_node_to_pnode(nid); - if (bid < 0 || gru_base[bid]) - continue; + for_each_possible_blade(bid) { + pnode = uv_blade_to_pnode(bid); + nid = uv_blade_to_memory_nid(bid);/* -1 if no memory on blade */ page = alloc_pages_node(nid, GFP_KERNEL, order); if (!page) goto fail; @@ -309,6 +283,7 @@ static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr) memset(gru_base[bid], 0, sizeof(struct gru_blade_state)); gru_base[bid]->bs_lru_gru = &gru_base[bid]->bs_grus[0]; spin_lock_init(&gru_base[bid]->bs_lock); + init_rwsem(&gru_base[bid]->bs_kgts_sema); dsrbytes = 0; cbrs = 0; @@ -317,7 +292,7 @@ static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr) chip++, gru++) { paddr = gru_chiplet_paddr(gru_base_paddr, pnode, chip); vaddr = gru_chiplet_vaddr(gru_base_vaddr, pnode, chip); - gru_init_chiplet(gru, paddr, vaddr, nid, bid, chip); + gru_init_chiplet(gru, paddr, vaddr, bid, chip); n = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE; cbrs = max(cbrs, n); n = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES; @@ -330,39 +305,215 @@ static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr) return 0; fail: - for (nid--; nid >= 0; nid--) - free_pages((unsigned long)gru_base[nid], order); + for (bid--; bid >= 0; bid--) + free_pages((unsigned long)gru_base[bid], order); return -ENOMEM; } -#ifdef CONFIG_IA64 +static void gru_free_tables(void) +{ + int bid; + int order = get_order(sizeof(struct gru_state) * + GRU_CHIPLETS_PER_BLADE); + + for (bid = 0; bid < GRU_MAX_BLADES; bid++) + free_pages((unsigned long)gru_base[bid], order); +} -static int get_base_irq(void) +static unsigned long gru_chiplet_cpu_to_mmr(int chiplet, int cpu, int *corep) { - return IRQ_GRU; + unsigned long mmr = 0; + int core; + + /* + * We target the cores of a blade and not the hyperthreads themselves. + * There is a max of 8 cores per socket and 2 sockets per blade, + * making for a max total of 16 cores (i.e., 16 CPUs without + * hyperthreading and 32 CPUs with hyperthreading). + */ + core = uv_cpu_core_number(cpu) + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu); + if (core >= GRU_NUM_TFM || uv_cpu_ht_number(cpu)) + return 0; + + if (chiplet == 0) { + mmr = UVH_GR0_TLB_INT0_CONFIG + + core * (UVH_GR0_TLB_INT1_CONFIG - UVH_GR0_TLB_INT0_CONFIG); + } else if (chiplet == 1) { + mmr = UVH_GR1_TLB_INT0_CONFIG + + core * (UVH_GR1_TLB_INT1_CONFIG - UVH_GR1_TLB_INT0_CONFIG); + } else { + BUG(); + } + + *corep = core; + return mmr; } -#elif defined CONFIG_X86_64 +#ifdef CONFIG_IA64 -static void noop(unsigned int irq) +static int gru_irq_count[GRU_CHIPLETS_PER_BLADE]; + +static void gru_noop(struct irq_data *d) { } -static struct irq_chip gru_chip = { - .name = "gru", - .mask = noop, - .unmask = noop, - .ack = noop, +static struct irq_chip gru_chip[GRU_CHIPLETS_PER_BLADE] = { + [0 ... GRU_CHIPLETS_PER_BLADE - 1] { + .irq_mask = gru_noop, + .irq_unmask = gru_noop, + .irq_ack = gru_noop + } }; -static int get_base_irq(void) +static int gru_chiplet_setup_tlb_irq(int chiplet, char *irq_name, + irq_handler_t irq_handler, int cpu, int blade) +{ + unsigned long mmr; + int irq = IRQ_GRU + chiplet; + int ret, core; + + mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core); + if (mmr == 0) + return 0; + + if (gru_irq_count[chiplet] == 0) { + gru_chip[chiplet].name = irq_name; + ret = irq_set_chip(irq, &gru_chip[chiplet]); + if (ret) { + printk(KERN_ERR "%s: set_irq_chip failed, errno=%d\n", + GRU_DRIVER_ID_STR, -ret); + return ret; + } + + ret = request_irq(irq, irq_handler, 0, irq_name, NULL); + if (ret) { + printk(KERN_ERR "%s: request_irq failed, errno=%d\n", + GRU_DRIVER_ID_STR, -ret); + return ret; + } + } + gru_irq_count[chiplet]++; + + return 0; +} + +static void gru_chiplet_teardown_tlb_irq(int chiplet, int cpu, int blade) +{ + unsigned long mmr; + int core, irq = IRQ_GRU + chiplet; + + if (gru_irq_count[chiplet] == 0) + return; + + mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core); + if (mmr == 0) + return; + + if (--gru_irq_count[chiplet] == 0) + free_irq(irq, NULL); +} + +#elif defined CONFIG_X86_64 + +static int gru_chiplet_setup_tlb_irq(int chiplet, char *irq_name, + irq_handler_t irq_handler, int cpu, int blade) { - set_irq_chip(IRQ_GRU, &gru_chip); - set_irq_chip(IRQ_GRU + 1, &gru_chip); - return IRQ_GRU; + unsigned long mmr; + int irq, core; + int ret; + + mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core); + if (mmr == 0) + return 0; + + irq = uv_setup_irq(irq_name, cpu, blade, mmr, UV_AFFINITY_CPU); + if (irq < 0) { + printk(KERN_ERR "%s: uv_setup_irq failed, errno=%d\n", + GRU_DRIVER_ID_STR, -irq); + return irq; + } + + ret = request_irq(irq, irq_handler, 0, irq_name, NULL); + if (ret) { + uv_teardown_irq(irq); + printk(KERN_ERR "%s: request_irq failed, errno=%d\n", + GRU_DRIVER_ID_STR, -ret); + return ret; + } + gru_base[blade]->bs_grus[chiplet].gs_irq[core] = irq; + return 0; } + +static void gru_chiplet_teardown_tlb_irq(int chiplet, int cpu, int blade) +{ + int irq, core; + unsigned long mmr; + + mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core); + if (mmr) { + irq = gru_base[blade]->bs_grus[chiplet].gs_irq[core]; + if (irq) { + free_irq(irq, NULL); + uv_teardown_irq(irq); + } + } +} + #endif +static void gru_teardown_tlb_irqs(void) +{ + int blade; + int cpu; + + for_each_online_cpu(cpu) { + blade = uv_cpu_to_blade_id(cpu); + gru_chiplet_teardown_tlb_irq(0, cpu, blade); + gru_chiplet_teardown_tlb_irq(1, cpu, blade); + } + for_each_possible_blade(blade) { + if (uv_blade_nr_possible_cpus(blade)) + continue; + gru_chiplet_teardown_tlb_irq(0, 0, blade); + gru_chiplet_teardown_tlb_irq(1, 0, blade); + } +} + +static int gru_setup_tlb_irqs(void) +{ + int blade; + int cpu; + int ret; + + for_each_online_cpu(cpu) { + blade = uv_cpu_to_blade_id(cpu); + ret = gru_chiplet_setup_tlb_irq(0, "GRU0_TLB", gru0_intr, cpu, blade); + if (ret != 0) + goto exit1; + + ret = gru_chiplet_setup_tlb_irq(1, "GRU1_TLB", gru1_intr, cpu, blade); + if (ret != 0) + goto exit1; + } + for_each_possible_blade(blade) { + if (uv_blade_nr_possible_cpus(blade)) + continue; + ret = gru_chiplet_setup_tlb_irq(0, "GRU0_TLB", gru_intr_mblade, 0, blade); + if (ret != 0) + goto exit1; + + ret = gru_chiplet_setup_tlb_irq(1, "GRU1_TLB", gru_intr_mblade, 0, blade); + if (ret != 0) + goto exit1; + } + + return 0; + +exit1: + gru_teardown_tlb_irqs(); + return ret; +} + /* * gru_init * @@ -370,11 +521,9 @@ static int get_base_irq(void) */ static int __init gru_init(void) { - int ret, irq, chip; - char id[10]; - void *gru_start_vaddr; + int ret; - if (!is_uv_system()) + if (!gru_supported()) return 0; #if defined CONFIG_IA64 @@ -387,83 +536,63 @@ static int __init gru_init(void) gru_end_paddr = gru_start_paddr + GRU_MAX_BLADES * GRU_SIZE; printk(KERN_INFO "GRU space: 0x%lx - 0x%lx\n", gru_start_paddr, gru_end_paddr); - irq = get_base_irq(); - for (chip = 0; chip < GRU_CHIPLETS_PER_BLADE; chip++) { - ret = request_irq(irq + chip, gru_intr, 0, id, NULL); - /* TODO: fix irq handling on x86. For now ignore failure because - * interrupts are not required & not yet fully supported */ - if (ret) { - printk(KERN_WARNING - "!!!WARNING: GRU ignoring request failure!!!\n"); - ret = 0; - } - if (ret) { - printk(KERN_ERR "%s: request_irq failed\n", - GRU_DRIVER_ID_STR); - goto exit1; - } - } - ret = misc_register(&gru_miscdev); if (ret) { printk(KERN_ERR "%s: misc_register failed\n", GRU_DRIVER_ID_STR); - goto exit1; + goto exit0; } ret = gru_proc_init(); if (ret) { printk(KERN_ERR "%s: proc init failed\n", GRU_DRIVER_ID_STR); - goto exit2; + goto exit1; } ret = gru_init_tables(gru_start_paddr, gru_start_vaddr); if (ret) { printk(KERN_ERR "%s: init tables failed\n", GRU_DRIVER_ID_STR); - goto exit3; + goto exit2; } + ret = gru_setup_tlb_irqs(); + if (ret != 0) + goto exit3; + + gru_kservices_init(); + printk(KERN_INFO "%s: v%s\n", GRU_DRIVER_ID_STR, GRU_DRIVER_VERSION_STR); return 0; exit3: - gru_proc_exit(); + gru_free_tables(); exit2: - misc_deregister(&gru_miscdev); + gru_proc_exit(); exit1: - for (--chip; chip >= 0; chip--) - free_irq(irq + chip, NULL); + misc_deregister(&gru_miscdev); +exit0: return ret; } static void __exit gru_exit(void) { - int i, bid, gid; - int order = get_order(sizeof(struct gru_state) * - GRU_CHIPLETS_PER_BLADE); - - if (!is_uv_system()) + if (!gru_supported()) return; - for (i = 0; i < GRU_CHIPLETS_PER_BLADE; i++) - free_irq(IRQ_GRU + i, NULL); - - foreach_gid(gid) - gru_kservices_exit(GID_TO_GRU(gid)); - - for (bid = 0; bid < GRU_MAX_BLADES; bid++) - free_pages((unsigned long)gru_base[bid], order); - + gru_teardown_tlb_irqs(); + gru_kservices_exit(); + gru_free_tables(); misc_deregister(&gru_miscdev); gru_proc_exit(); } -static struct file_operations gru_fops = { +static const struct file_operations gru_fops = { .owner = THIS_MODULE, .unlocked_ioctl = gru_file_unlocked_ioctl, .mmap = gru_file_mmap, + .llseek = noop_llseek, }; static struct miscdevice gru_miscdev = { @@ -472,7 +601,7 @@ static struct miscdevice gru_miscdev = { .fops = &gru_fops, }; -struct vm_operations_struct gru_vm_ops = { +const struct vm_operations_struct gru_vm_ops = { .close = gru_vma_close, .fault = gru_fault, }; diff --git a/drivers/misc/sgi-gru/gruhandles.c b/drivers/misc/sgi-gru/gruhandles.c index 9b7ccb32869..2f30badc6ff 100644 --- a/drivers/misc/sgi-gru/gruhandles.c +++ b/drivers/misc/sgi-gru/gruhandles.c @@ -27,9 +27,11 @@ #ifdef CONFIG_IA64 #include <asm/processor.h> #define GRU_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10) +#define CLKS2NSEC(c) ((c) *1000000000 / local_cpu_data->itc_freq) #else #include <asm/tsc.h> #define GRU_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) +#define CLKS2NSEC(c) ((c) * 1000000 / tsc_khz) #endif /* Extract the status field from a kernel handle */ @@ -39,54 +41,73 @@ struct mcs_op_statistic mcs_op_statistics[mcsop_last]; static void update_mcs_stats(enum mcs_op op, unsigned long clks) { + unsigned long nsec; + + nsec = CLKS2NSEC(clks); atomic_long_inc(&mcs_op_statistics[op].count); - atomic_long_add(clks, &mcs_op_statistics[op].total); - if (mcs_op_statistics[op].max < clks) - mcs_op_statistics[op].max = clks; + atomic_long_add(nsec, &mcs_op_statistics[op].total); + if (mcs_op_statistics[op].max < nsec) + mcs_op_statistics[op].max = nsec; } static void start_instruction(void *h) { unsigned long *w0 = h; - wmb(); /* setting CMD bit must be last */ - *w0 = *w0 | 1; + wmb(); /* setting CMD/STATUS bits must be last */ + *w0 = *w0 | 0x20001; gru_flush_cache(h); } +static void report_instruction_timeout(void *h) +{ + unsigned long goff = GSEGPOFF((unsigned long)h); + char *id = "???"; + + if (TYPE_IS(CCH, goff)) + id = "CCH"; + else if (TYPE_IS(TGH, goff)) + id = "TGH"; + else if (TYPE_IS(TFH, goff)) + id = "TFH"; + + panic(KERN_ALERT "GRU %p (%s) is malfunctioning\n", h, id); +} + static int wait_instruction_complete(void *h, enum mcs_op opc) { int status; - cycles_t start_time = get_cycles(); + unsigned long start_time = get_cycles(); while (1) { cpu_relax(); status = GET_MSEG_HANDLE_STATUS(h); if (status != CCHSTATUS_ACTIVE) break; - if (GRU_OPERATION_TIMEOUT < (get_cycles() - start_time)) - panic("GRU %p is malfunctioning\n", h); + if (GRU_OPERATION_TIMEOUT < (get_cycles() - start_time)) { + report_instruction_timeout(h); + start_time = get_cycles(); + } } if (gru_options & OPT_STATS) update_mcs_stats(opc, get_cycles() - start_time); return status; } -int cch_allocate(struct gru_context_configuration_handle *cch, - int asidval, int sizeavail, unsigned long cbrmap, - unsigned long dsrmap) +int cch_allocate(struct gru_context_configuration_handle *cch) { - int i; + int ret; - for (i = 0; i < 8; i++) { - cch->asid[i] = (asidval++); - cch->sizeavail[i] = sizeavail; - } - cch->dsr_allocation_map = dsrmap; - cch->cbr_allocation_map = cbrmap; cch->opc = CCHOP_ALLOCATE; start_instruction(cch); - return wait_instruction_complete(cch, cchop_allocate); + ret = wait_instruction_complete(cch, cchop_allocate); + + /* + * Stop speculation into the GSEG being mapped by the previous ALLOCATE. + * The GSEG memory does not exist until the ALLOCATE completes. + */ + sync_core(); + return ret; } int cch_start(struct gru_context_configuration_handle *cch) @@ -105,9 +126,18 @@ int cch_interrupt(struct gru_context_configuration_handle *cch) int cch_deallocate(struct gru_context_configuration_handle *cch) { + int ret; + cch->opc = CCHOP_DEALLOCATE; start_instruction(cch); - return wait_instruction_complete(cch, cchop_deallocate); + ret = wait_instruction_complete(cch, cchop_deallocate); + + /* + * Stop speculation into the GSEG being unmapped by the previous + * DEALLOCATE. + */ + sync_core(); + return ret; } int cch_interrupt_sync(struct gru_context_configuration_handle @@ -135,17 +165,20 @@ int tgh_invalidate(struct gru_tlb_global_handle *tgh, return wait_instruction_complete(tgh, tghop_invalidate); } -void tfh_write_only(struct gru_tlb_fault_handle *tfh, - unsigned long pfn, unsigned long vaddr, - int asid, int dirty, int pagesize) +int tfh_write_only(struct gru_tlb_fault_handle *tfh, + unsigned long paddr, int gaa, + unsigned long vaddr, int asid, int dirty, + int pagesize) { tfh->fillasid = asid; tfh->fillvaddr = vaddr; - tfh->pfn = pfn; + tfh->pfn = paddr >> GRU_PADDR_SHIFT; + tfh->gaa = gaa; tfh->dirty = dirty; tfh->pagesize = pagesize; tfh->opc = TFHOP_WRITE_ONLY; start_instruction(tfh); + return wait_instruction_complete(tfh, tfhop_write_only); } void tfh_write_restart(struct gru_tlb_fault_handle *tfh, diff --git a/drivers/misc/sgi-gru/gruhandles.h b/drivers/misc/sgi-gru/gruhandles.h index 1ed74d7508c..3f998b924d8 100644 --- a/drivers/misc/sgi-gru/gruhandles.h +++ b/drivers/misc/sgi-gru/gruhandles.h @@ -39,7 +39,6 @@ #define GRU_NUM_CBE 128 #define GRU_NUM_TFH 128 #define GRU_NUM_CCH 16 -#define GRU_NUM_GSH 1 /* Maximum resource counts that can be reserved by user programs */ #define GRU_NUM_USER_CBR GRU_NUM_CBE @@ -56,7 +55,6 @@ #define GRU_CBE_BASE (GRU_MCS_BASE + 0x10000) #define GRU_TFH_BASE (GRU_MCS_BASE + 0x18000) #define GRU_CCH_BASE (GRU_MCS_BASE + 0x20000) -#define GRU_GSH_BASE (GRU_MCS_BASE + 0x30000) /* User gseg constants */ #define GRU_GSEG_STRIDE (4 * 1024 * 1024) @@ -93,6 +91,12 @@ /* Convert an arbitrary handle address to the beginning of the GRU segment */ #define GRUBASE(h) ((void *)((unsigned long)(h) & ~(GRU_SIZE - 1))) +/* Test a valid handle address to determine the type */ +#define TYPE_IS(hn, h) ((h) >= GRU_##hn##_BASE && (h) < \ + GRU_##hn##_BASE + GRU_NUM_##hn * GRU_HANDLE_STRIDE && \ + (((h) & (GRU_HANDLE_STRIDE - 1)) == 0)) + + /* General addressing macros. */ static inline void *get_gseg_base_address(void *base, int ctxnum) { @@ -160,6 +164,16 @@ static inline void *gru_chiplet_vaddr(void *vaddr, int pnode, int chiplet) return vaddr + GRU_SIZE * (2 * pnode + chiplet); } +static inline struct gru_control_block_extended *gru_tfh_to_cbe( + struct gru_tlb_fault_handle *tfh) +{ + unsigned long cbe; + + cbe = (unsigned long)tfh - GRU_TFH_BASE + GRU_CBE_BASE; + return (struct gru_control_block_extended*)cbe; +} + + /* @@ -238,6 +252,17 @@ enum gru_tgh_state { TGHSTATE_RESTART_CTX, }; +enum gru_tgh_cause { + TGHCAUSE_RR_ECC, + TGHCAUSE_TLB_ECC, + TGHCAUSE_LRU_ECC, + TGHCAUSE_PS_ECC, + TGHCAUSE_MUL_ERR, + TGHCAUSE_DATA_ERR, + TGHCAUSE_SW_FORCE +}; + + /* * TFH - TLB Global Handle * Used for TLB dropins into the GRU TLB. @@ -251,15 +276,15 @@ struct gru_tlb_fault_handle { unsigned int fill1:9; unsigned int status:2; - unsigned int fill2:1; - unsigned int color:1; + unsigned int fill2:2; unsigned int state:3; unsigned int fill3:1; - unsigned int cause:7; /* DW 0 - high 32 */ + unsigned int cause:6; + unsigned int cb_int:1; unsigned int fill4:1; - unsigned int indexway:12; + unsigned int indexway:12; /* DW 0 - high 32 */ unsigned int fill5:4; unsigned int ctxnum:4; @@ -442,6 +467,12 @@ struct gru_control_block_extended { unsigned int cbrexecstatus:8; }; +/* CBE fields for active BCOPY instructions */ +#define cbe_baddr0 idef1upd +#define cbe_baddr1 idef3upd +#define cbe_src_cl idef6cpy +#define cbe_nelemcur idef5upd + enum gru_cbr_state { CBRSTATE_INACTIVE, CBRSTATE_IDLE, @@ -457,21 +488,7 @@ enum gru_cbr_state { CBRSTATE_BUSY_INTERRUPT, }; -/* CBE cbrexecstatus bits */ -#define CBR_EXS_ABORT_OCC_BIT 0 -#define CBR_EXS_INT_OCC_BIT 1 -#define CBR_EXS_PENDING_BIT 2 -#define CBR_EXS_QUEUED_BIT 3 -#define CBR_EXS_TLBHW_BIT 4 -#define CBR_EXS_EXCEPTION_BIT 5 - -#define CBR_EXS_ABORT_OCC (1 << CBR_EXS_ABORT_OCC_BIT) -#define CBR_EXS_INT_OCC (1 << CBR_EXS_INT_OCC_BIT) -#define CBR_EXS_PENDING (1 << CBR_EXS_PENDING_BIT) -#define CBR_EXS_QUEUED (1 << CBR_EXS_QUEUED_BIT) -#define CBR_EXS_TLBHW (1 << CBR_EXS_TLBHW_BIT) -#define CBR_EXS_EXCEPTION (1 << CBR_EXS_EXCEPTION_BIT) - +/* CBE cbrexecstatus bits - defined in gru_instructions.h*/ /* CBE ecause bits - defined in gru_instructions.h */ /* @@ -495,9 +512,7 @@ enum gru_cbr_state { /* minimum TLB purge count to ensure a full purge */ #define GRUMAXINVAL 1024UL -int cch_allocate(struct gru_context_configuration_handle *cch, - int asidval, int sizeavail, unsigned long cbrmap, unsigned long dsrmap); - +int cch_allocate(struct gru_context_configuration_handle *cch); int cch_start(struct gru_context_configuration_handle *cch); int cch_interrupt(struct gru_context_configuration_handle *cch); int cch_deallocate(struct gru_context_configuration_handle *cch); @@ -505,8 +520,8 @@ int cch_interrupt_sync(struct gru_context_configuration_handle *cch); int tgh_invalidate(struct gru_tlb_global_handle *tgh, unsigned long vaddr, unsigned long vaddrmask, int asid, int pagesize, int global, int n, unsigned short ctxbitmap); -void tfh_write_only(struct gru_tlb_fault_handle *tfh, unsigned long pfn, - unsigned long vaddr, int asid, int dirty, int pagesize); +int tfh_write_only(struct gru_tlb_fault_handle *tfh, unsigned long paddr, + int gaa, unsigned long vaddr, int asid, int dirty, int pagesize); void tfh_write_restart(struct gru_tlb_fault_handle *tfh, unsigned long paddr, int gaa, unsigned long vaddr, int asid, int dirty, int pagesize); void tfh_restart(struct gru_tlb_fault_handle *tfh); diff --git a/drivers/misc/sgi-gru/grukdump.c b/drivers/misc/sgi-gru/grukdump.c new file mode 100644 index 00000000000..a3700a56b8f --- /dev/null +++ b/drivers/misc/sgi-gru/grukdump.c @@ -0,0 +1,234 @@ +/* + * SN Platform GRU Driver + * + * Dump GRU State + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/spinlock.h> +#include <linux/uaccess.h> +#include <linux/delay.h> +#include <linux/bitops.h> +#include <asm/uv/uv_hub.h> +#include "gru.h" +#include "grutables.h" +#include "gruhandles.h" +#include "grulib.h" + +#define CCH_LOCK_ATTEMPTS 10 + +static int gru_user_copy_handle(void __user **dp, void *s) +{ + if (copy_to_user(*dp, s, GRU_HANDLE_BYTES)) + return -1; + *dp += GRU_HANDLE_BYTES; + return 0; +} + +static int gru_dump_context_data(void *grubase, + struct gru_context_configuration_handle *cch, + void __user *ubuf, int ctxnum, int dsrcnt, + int flush_cbrs) +{ + void *cb, *cbe, *tfh, *gseg; + int i, scr; + + gseg = grubase + ctxnum * GRU_GSEG_STRIDE; + cb = gseg + GRU_CB_BASE; + cbe = grubase + GRU_CBE_BASE; + tfh = grubase + GRU_TFH_BASE; + + for_each_cbr_in_allocation_map(i, &cch->cbr_allocation_map, scr) { + if (flush_cbrs) + gru_flush_cache(cb); + if (gru_user_copy_handle(&ubuf, cb)) + goto fail; + if (gru_user_copy_handle(&ubuf, tfh + i * GRU_HANDLE_STRIDE)) + goto fail; + if (gru_user_copy_handle(&ubuf, cbe + i * GRU_HANDLE_STRIDE)) + goto fail; + cb += GRU_HANDLE_STRIDE; + } + if (dsrcnt) + memcpy(ubuf, gseg + GRU_DS_BASE, dsrcnt * GRU_HANDLE_STRIDE); + return 0; + +fail: + return -EFAULT; +} + +static int gru_dump_tfm(struct gru_state *gru, + void __user *ubuf, void __user *ubufend) +{ + struct gru_tlb_fault_map *tfm; + int i, ret, bytes; + + bytes = GRU_NUM_TFM * GRU_CACHE_LINE_BYTES; + if (bytes > ubufend - ubuf) + ret = -EFBIG; + + for (i = 0; i < GRU_NUM_TFM; i++) { + tfm = get_tfm(gru->gs_gru_base_vaddr, i); + if (gru_user_copy_handle(&ubuf, tfm)) + goto fail; + } + return GRU_NUM_TFM * GRU_CACHE_LINE_BYTES; + +fail: + return -EFAULT; +} + +static int gru_dump_tgh(struct gru_state *gru, + void __user *ubuf, void __user *ubufend) +{ + struct gru_tlb_global_handle *tgh; + int i, ret, bytes; + + bytes = GRU_NUM_TGH * GRU_CACHE_LINE_BYTES; + if (bytes > ubufend - ubuf) + ret = -EFBIG; + + for (i = 0; i < GRU_NUM_TGH; i++) { + tgh = get_tgh(gru->gs_gru_base_vaddr, i); + if (gru_user_copy_handle(&ubuf, tgh)) + goto fail; + } + return GRU_NUM_TGH * GRU_CACHE_LINE_BYTES; + +fail: + return -EFAULT; +} + +static int gru_dump_context(struct gru_state *gru, int ctxnum, + void __user *ubuf, void __user *ubufend, char data_opt, + char lock_cch, char flush_cbrs) +{ + struct gru_dump_context_header hdr; + struct gru_dump_context_header __user *uhdr = ubuf; + struct gru_context_configuration_handle *cch, *ubufcch; + struct gru_thread_state *gts; + int try, cch_locked, cbrcnt = 0, dsrcnt = 0, bytes = 0, ret = 0; + void *grubase; + + memset(&hdr, 0, sizeof(hdr)); + grubase = gru->gs_gru_base_vaddr; + cch = get_cch(grubase, ctxnum); + for (try = 0; try < CCH_LOCK_ATTEMPTS; try++) { + cch_locked = trylock_cch_handle(cch); + if (cch_locked) + break; + msleep(1); + } + + ubuf += sizeof(hdr); + ubufcch = ubuf; + if (gru_user_copy_handle(&ubuf, cch)) { + if (cch_locked) + unlock_cch_handle(cch); + return -EFAULT; + } + if (cch_locked) + ubufcch->delresp = 0; + bytes = sizeof(hdr) + GRU_CACHE_LINE_BYTES; + + if (cch_locked || !lock_cch) { + gts = gru->gs_gts[ctxnum]; + if (gts && gts->ts_vma) { + hdr.pid = gts->ts_tgid_owner; + hdr.vaddr = gts->ts_vma->vm_start; + } + if (cch->state != CCHSTATE_INACTIVE) { + cbrcnt = hweight64(cch->cbr_allocation_map) * + GRU_CBR_AU_SIZE; + dsrcnt = data_opt ? hweight32(cch->dsr_allocation_map) * + GRU_DSR_AU_CL : 0; + } + bytes += (3 * cbrcnt + dsrcnt) * GRU_CACHE_LINE_BYTES; + if (bytes > ubufend - ubuf) + ret = -EFBIG; + else + ret = gru_dump_context_data(grubase, cch, ubuf, ctxnum, + dsrcnt, flush_cbrs); + } + if (cch_locked) + unlock_cch_handle(cch); + if (ret) + return ret; + + hdr.magic = GRU_DUMP_MAGIC; + hdr.gid = gru->gs_gid; + hdr.ctxnum = ctxnum; + hdr.cbrcnt = cbrcnt; + hdr.dsrcnt = dsrcnt; + hdr.cch_locked = cch_locked; + if (copy_to_user(uhdr, &hdr, sizeof(hdr))) + return -EFAULT; + + return bytes; +} + +int gru_dump_chiplet_request(unsigned long arg) +{ + struct gru_state *gru; + struct gru_dump_chiplet_state_req req; + void __user *ubuf; + void __user *ubufend; + int ctxnum, ret, cnt = 0; + + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + + /* Currently, only dump by gid is implemented */ + if (req.gid >= gru_max_gids || req.gid < 0) + return -EINVAL; + + gru = GID_TO_GRU(req.gid); + ubuf = req.buf; + ubufend = req.buf + req.buflen; + + ret = gru_dump_tfm(gru, ubuf, ubufend); + if (ret < 0) + goto fail; + ubuf += ret; + + ret = gru_dump_tgh(gru, ubuf, ubufend); + if (ret < 0) + goto fail; + ubuf += ret; + + for (ctxnum = 0; ctxnum < GRU_NUM_CCH; ctxnum++) { + if (req.ctxnum == ctxnum || req.ctxnum < 0) { + ret = gru_dump_context(gru, ctxnum, ubuf, ubufend, + req.data_opt, req.lock_cch, + req.flush_cbrs); + if (ret < 0) + goto fail; + ubuf += ret; + cnt++; + } + } + + if (copy_to_user((void __user *)arg, &req, sizeof(req))) + return -EFAULT; + return cnt; + +fail: + return ret; +} diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c index d8bd7d84a7c..913de07e577 100644 --- a/drivers/misc/sgi-gru/grukservices.c +++ b/drivers/misc/sgi-gru/grukservices.c @@ -24,13 +24,15 @@ #include <linux/errno.h> #include <linux/slab.h> #include <linux/mm.h> -#include <linux/smp_lock.h> #include <linux/spinlock.h> #include <linux/device.h> #include <linux/miscdevice.h> #include <linux/proc_fs.h> #include <linux/interrupt.h> #include <linux/uaccess.h> +#include <linux/delay.h> +#include <linux/export.h> +#include <asm/io_apic.h> #include "gru.h" #include "grulib.h" #include "grutables.h" @@ -45,18 +47,63 @@ * resources. This will likely be replaced when we better understand the * kernel/user requirements. * - * At boot time, the kernel permanently reserves a fixed number of - * CBRs/DSRs for each cpu to use. The resources are all taken from - * the GRU chiplet 1 on the blade. This leaves the full set of resources - * of chiplet 0 available to be allocated to a single user. + * Blade percpu resources reserved for kernel use. These resources are + * reserved whenever the the kernel context for the blade is loaded. Note + * that the kernel context is not guaranteed to be always available. It is + * loaded on demand & can be stolen by a user if the user demand exceeds the + * kernel demand. The kernel can always reload the kernel context but + * a SLEEP may be required!!!. + * + * Async Overview: + * + * Each blade has one "kernel context" that owns GRU kernel resources + * located on the blade. Kernel drivers use GRU resources in this context + * for sending messages, zeroing memory, etc. + * + * The kernel context is dynamically loaded on demand. If it is not in + * use by the kernel, the kernel context can be unloaded & given to a user. + * The kernel context will be reloaded when needed. This may require that + * a context be stolen from a user. + * NOTE: frequent unloading/reloading of the kernel context is + * expensive. We are depending on batch schedulers, cpusets, sane + * drivers or some other mechanism to prevent the need for frequent + * stealing/reloading. + * + * The kernel context consists of two parts: + * - 1 CB & a few DSRs that are reserved for each cpu on the blade. + * Each cpu has it's own private resources & does not share them + * with other cpus. These resources are used serially, ie, + * locked, used & unlocked on each call to a function in + * grukservices. + * (Now that we have dynamic loading of kernel contexts, I + * may rethink this & allow sharing between cpus....) + * + * - Additional resources can be reserved long term & used directly + * by UV drivers located in the kernel. Drivers using these GRU + * resources can use asynchronous GRU instructions that send + * interrupts on completion. + * - these resources must be explicitly locked/unlocked + * - locked resources prevent (obviously) the kernel + * context from being unloaded. + * - drivers using these resource directly issue their own + * GRU instruction and must wait/check completion. + * + * When these resources are reserved, the caller can optionally + * associate a wait_queue with the resources and use asynchronous + * GRU instructions. When an async GRU instruction completes, the + * driver will do a wakeup on the event. + * */ -/* Blade percpu resources PERMANENTLY reserved for kernel use */ + +#define ASYNC_HAN_TO_BID(h) ((h) - 1) +#define ASYNC_BID_TO_HAN(b) ((b) + 1) +#define ASYNC_HAN_TO_BS(h) gru_base[ASYNC_HAN_TO_BID(h)] + #define GRU_NUM_KERNEL_CBR 1 #define GRU_NUM_KERNEL_DSR_BYTES 256 #define GRU_NUM_KERNEL_DSR_CL (GRU_NUM_KERNEL_DSR_BYTES / \ GRU_CACHE_LINE_BYTES) -#define KERNEL_CTXNUM 15 /* GRU instruction attributes for all instructions */ #define IMA IMA_CB_DELAY @@ -98,6 +145,120 @@ struct message_header { #define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h])) +/* + * Reload the blade's kernel context into a GRU chiplet. Called holding + * the bs_kgts_sema for READ. Will steal user contexts if necessary. + */ +static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id) +{ + struct gru_state *gru; + struct gru_thread_state *kgts; + void *vaddr; + int ctxnum, ncpus; + + up_read(&bs->bs_kgts_sema); + down_write(&bs->bs_kgts_sema); + + if (!bs->bs_kgts) { + bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0); + bs->bs_kgts->ts_user_blade_id = blade_id; + } + kgts = bs->bs_kgts; + + if (!kgts->ts_gru) { + STAT(load_kernel_context); + ncpus = uv_blade_nr_possible_cpus(blade_id); + kgts->ts_cbr_au_count = GRU_CB_COUNT_TO_AU( + GRU_NUM_KERNEL_CBR * ncpus + bs->bs_async_cbrs); + kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU( + GRU_NUM_KERNEL_DSR_BYTES * ncpus + + bs->bs_async_dsr_bytes); + while (!gru_assign_gru_context(kgts)) { + msleep(1); + gru_steal_context(kgts); + } + gru_load_context(kgts); + gru = bs->bs_kgts->ts_gru; + vaddr = gru->gs_gru_base_vaddr; + ctxnum = kgts->ts_ctxnum; + bs->kernel_cb = get_gseg_base_address_cb(vaddr, ctxnum, 0); + bs->kernel_dsr = get_gseg_base_address_ds(vaddr, ctxnum, 0); + } + downgrade_write(&bs->bs_kgts_sema); +} + +/* + * Free all kernel contexts that are not currently in use. + * Returns 0 if all freed, else number of inuse context. + */ +static int gru_free_kernel_contexts(void) +{ + struct gru_blade_state *bs; + struct gru_thread_state *kgts; + int bid, ret = 0; + + for (bid = 0; bid < GRU_MAX_BLADES; bid++) { + bs = gru_base[bid]; + if (!bs) + continue; + + /* Ignore busy contexts. Don't want to block here. */ + if (down_write_trylock(&bs->bs_kgts_sema)) { + kgts = bs->bs_kgts; + if (kgts && kgts->ts_gru) + gru_unload_context(kgts, 0); + bs->bs_kgts = NULL; + up_write(&bs->bs_kgts_sema); + kfree(kgts); + } else { + ret++; + } + } + return ret; +} + +/* + * Lock & load the kernel context for the specified blade. + */ +static struct gru_blade_state *gru_lock_kernel_context(int blade_id) +{ + struct gru_blade_state *bs; + int bid; + + STAT(lock_kernel_context); +again: + bid = blade_id < 0 ? uv_numa_blade_id() : blade_id; + bs = gru_base[bid]; + + /* Handle the case where migration occurred while waiting for the sema */ + down_read(&bs->bs_kgts_sema); + if (blade_id < 0 && bid != uv_numa_blade_id()) { + up_read(&bs->bs_kgts_sema); + goto again; + } + if (!bs->bs_kgts || !bs->bs_kgts->ts_gru) + gru_load_kernel_context(bs, bid); + return bs; + +} + +/* + * Unlock the kernel context for the specified blade. Context is not + * unloaded but may be stolen before next use. + */ +static void gru_unlock_kernel_context(int blade_id) +{ + struct gru_blade_state *bs; + + bs = gru_base[blade_id]; + up_read(&bs->bs_kgts_sema); + STAT(unlock_kernel_context); +} + +/* + * Reserve & get pointers to the DSR/CBRs reserved for the current cpu. + * - returns with preemption disabled + */ static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr) { struct gru_blade_state *bs; @@ -105,30 +266,166 @@ static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr) BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES); preempt_disable(); - bs = gru_base[uv_numa_blade_id()]; + bs = gru_lock_kernel_context(-1); lcpu = uv_blade_processor_id(); *cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE; *dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES; return 0; } +/* + * Free the current cpus reserved DSR/CBR resources. + */ static void gru_free_cpu_resources(void *cb, void *dsr) { + gru_unlock_kernel_context(uv_numa_blade_id()); preempt_enable(); } +/* + * Reserve GRU resources to be used asynchronously. + * Note: currently supports only 1 reservation per blade. + * + * input: + * blade_id - blade on which resources should be reserved + * cbrs - number of CBRs + * dsr_bytes - number of DSR bytes needed + * output: + * handle to identify resource + * (0 = async resources already reserved) + */ +unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes, + struct completion *cmp) +{ + struct gru_blade_state *bs; + struct gru_thread_state *kgts; + int ret = 0; + + bs = gru_base[blade_id]; + + down_write(&bs->bs_kgts_sema); + + /* Verify no resources already reserved */ + if (bs->bs_async_dsr_bytes + bs->bs_async_cbrs) + goto done; + bs->bs_async_dsr_bytes = dsr_bytes; + bs->bs_async_cbrs = cbrs; + bs->bs_async_wq = cmp; + kgts = bs->bs_kgts; + + /* Resources changed. Unload context if already loaded */ + if (kgts && kgts->ts_gru) + gru_unload_context(kgts, 0); + ret = ASYNC_BID_TO_HAN(blade_id); + +done: + up_write(&bs->bs_kgts_sema); + return ret; +} + +/* + * Release async resources previously reserved. + * + * input: + * han - handle to identify resources + */ +void gru_release_async_resources(unsigned long han) +{ + struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); + + down_write(&bs->bs_kgts_sema); + bs->bs_async_dsr_bytes = 0; + bs->bs_async_cbrs = 0; + bs->bs_async_wq = NULL; + up_write(&bs->bs_kgts_sema); +} + +/* + * Wait for async GRU instructions to complete. + * + * input: + * han - handle to identify resources + */ +void gru_wait_async_cbr(unsigned long han) +{ + struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); + + wait_for_completion(bs->bs_async_wq); + mb(); +} + +/* + * Lock previous reserved async GRU resources + * + * input: + * han - handle to identify resources + * output: + * cb - pointer to first CBR + * dsr - pointer to first DSR + */ +void gru_lock_async_resource(unsigned long han, void **cb, void **dsr) +{ + struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); + int blade_id = ASYNC_HAN_TO_BID(han); + int ncpus; + + gru_lock_kernel_context(blade_id); + ncpus = uv_blade_nr_possible_cpus(blade_id); + if (cb) + *cb = bs->kernel_cb + ncpus * GRU_HANDLE_STRIDE; + if (dsr) + *dsr = bs->kernel_dsr + ncpus * GRU_NUM_KERNEL_DSR_BYTES; +} + +/* + * Unlock previous reserved async GRU resources + * + * input: + * han - handle to identify resources + */ +void gru_unlock_async_resource(unsigned long han) +{ + int blade_id = ASYNC_HAN_TO_BID(han); + + gru_unlock_kernel_context(blade_id); +} + +/*----------------------------------------------------------------------*/ int gru_get_cb_exception_detail(void *cb, struct control_block_extended_exc_detail *excdet) { struct gru_control_block_extended *cbe; + struct gru_thread_state *kgts = NULL; + unsigned long off; + int cbrnum, bid; - cbe = get_cbe(GRUBASE(cb), get_cb_number(cb)); - prefetchw(cbe); /* Harmless on hardware, required for emulator */ + /* + * Locate kgts for cb. This algorithm is SLOW but + * this function is rarely called (ie., almost never). + * Performance does not matter. + */ + for_each_possible_blade(bid) { + if (!gru_base[bid]) + break; + kgts = gru_base[bid]->bs_kgts; + if (!kgts || !kgts->ts_gru) + continue; + off = cb - kgts->ts_gru->gs_gru_base_vaddr; + if (off < GRU_SIZE) + break; + kgts = NULL; + } + BUG_ON(!kgts); + cbrnum = thread_cbr_number(kgts, get_cb_number(cb)); + cbe = get_cbe(GRUBASE(cb), cbrnum); + gru_flush_cache(cbe); /* CBE not coherent */ + sync_core(); excdet->opc = cbe->opccpy; excdet->exopc = cbe->exopccpy; excdet->ecause = cbe->ecause; excdet->exceptdet0 = cbe->idef1upd; excdet->exceptdet1 = cbe->idef3upd; + gru_flush_cache(cbe); return 0; } @@ -141,8 +438,8 @@ char *gru_get_cb_exception_detail_str(int ret, void *cb, if (ret > 0 && gen->istatus == CBS_EXCEPTION) { gru_get_cb_exception_detail(cb, &excdet); snprintf(buf, size, - "GRU exception: cb %p, opc %d, exopc %d, ecause 0x%x," - "excdet0 0x%lx, excdet1 0x%x", + "GRU:%d exception: cb %p, opc %d, exopc %d, ecause 0x%x," + "excdet0 0x%lx, excdet1 0x%x", smp_processor_id(), gen, excdet.opc, excdet.exopc, excdet.ecause, excdet.exceptdet0, excdet.exceptdet1); } else { @@ -167,13 +464,13 @@ static int gru_retry_exception(void *cb) int retry = EXCEPTION_RETRY_LIMIT; while (1) { - if (gru_get_cb_message_queue_substatus(cb)) - break; if (gru_wait_idle_or_exception(gen) == CBS_IDLE) return CBS_IDLE; - + if (gru_get_cb_message_queue_substatus(cb)) + return CBS_EXCEPTION; gru_get_cb_exception_detail(cb, &excdet); - if (excdet.ecause & ~EXCEPTION_RETRY_BITS) + if ((excdet.ecause & ~EXCEPTION_RETRY_BITS) || + (excdet.cbrexecstatus & CBR_EXS_ABORT_OCC)) break; if (retry-- == 0) break; @@ -189,9 +486,10 @@ int gru_check_status_proc(void *cb) int ret; ret = gen->istatus; - if (ret != CBS_EXCEPTION) - return ret; - return gru_retry_exception(cb); + if (ret == CBS_EXCEPTION) + ret = gru_retry_exception(cb); + rmb(); + return ret; } @@ -203,7 +501,7 @@ int gru_wait_proc(void *cb) ret = gru_wait_idle_or_exception(gen); if (ret == CBS_EXCEPTION) ret = gru_retry_exception(cb); - + rmb(); return ret; } @@ -270,7 +568,7 @@ int gru_create_message_queue(struct gru_message_queue_desc *mqd, mqd->mq = mq; mqd->mq_gpa = uv_gpa(mq); mqd->qlines = qlines; - mqd->interrupt_pnode = UV_NASID_TO_PNODE(nasid); + mqd->interrupt_pnode = nasid >> 1; mqd->interrupt_vector = vector; mqd->interrupt_apicid = apicid; return 0; @@ -330,6 +628,8 @@ static int send_noop_message(void *cb, struct gru_message_queue_desc *mqd, ret = MQE_UNEXPECTED_CB_ERR; break; case CBSS_PAGE_OVERFLOW: + STAT(mesq_noop_page_overflow); + /* fallthru */ default: BUG(); } @@ -405,17 +705,48 @@ cberr: } /* - * Send a cross-partition interrupt to the SSI that contains the target - * message queue. Normally, the interrupt is automatically delivered by hardware - * but some error conditions require explicit delivery. + * Handle a PUT failure. Note: if message was a 2-line message, one of the + * lines might have successfully have been written. Before sending the + * message, "present" must be cleared in BOTH lines to prevent the receiver + * from prematurely seeing the full message. */ -static void send_message_queue_interrupt(struct gru_message_queue_desc *mqd) +static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd, + void *mesg, int lines) { - if (mqd->interrupt_vector) - uv_hub_send_ipi(mqd->interrupt_pnode, mqd->interrupt_apicid, - mqd->interrupt_vector); -} + unsigned long m, *val = mesg, gpa, save; + int ret; + + m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); + if (lines == 2) { + gru_vset(cb, m, 0, XTYPE_CL, lines, 1, IMA); + if (gru_wait(cb) != CBS_IDLE) + return MQE_UNEXPECTED_CB_ERR; + } + gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA); + if (gru_wait(cb) != CBS_IDLE) + return MQE_UNEXPECTED_CB_ERR; + + if (!mqd->interrupt_vector) + return MQE_OK; + /* + * Send a cross-partition interrupt to the SSI that contains the target + * message queue. Normally, the interrupt is automatically delivered by + * hardware but some error conditions require explicit delivery. + * Use the GRU to deliver the interrupt. Otherwise partition failures + * could cause unrecovered errors. + */ + gpa = uv_global_gru_mmr_address(mqd->interrupt_pnode, UVH_IPI_INT); + save = *val; + *val = uv_hub_ipi_value(mqd->interrupt_apicid, mqd->interrupt_vector, + dest_Fixed); + gru_vstore_phys(cb, gpa, gru_get_tri(mesg), IAA_REGISTER, IMA); + ret = gru_wait(cb); + *val = save; + if (ret != CBS_IDLE) + return MQE_UNEXPECTED_CB_ERR; + return MQE_OK; +} /* * Handle a gru_mesq failure. Some of these failures are software recoverable @@ -425,7 +756,6 @@ static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd, void *mesg, int lines) { int substatus, ret = 0; - unsigned long m; substatus = gru_get_cb_message_queue_substatus(cb); switch (substatus) { @@ -447,15 +777,11 @@ static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd, break; case CBSS_PUT_NACKED: STAT(mesq_send_put_nacked); - m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); - gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA); - if (gru_wait(cb) == CBS_IDLE) { - ret = MQE_OK; - send_message_queue_interrupt(mqd); - } else { - ret = MQE_UNEXPECTED_CB_ERR; - } + ret = send_message_put_nacked(cb, mqd, mesg, lines); break; + case CBSS_PAGE_OVERFLOW: + STAT(mesq_page_overflow); + /* fallthru */ default: BUG(); } @@ -548,7 +874,6 @@ void *gru_get_next_message(struct gru_message_queue_desc *mqd) int present = mhdr->present; /* skip NOOP messages */ - STAT(mesq_receive); while (present == MQS_NOOP) { gru_free_message(mqd, mhdr); mhdr = mq->next; @@ -568,6 +893,7 @@ void *gru_get_next_message(struct gru_message_queue_desc *mqd) if (mhdr->lines == 2) restore_present2(mhdr, mhdr->present2); + STAT(mesq_receive); return mhdr; } EXPORT_SYMBOL_GPL(gru_get_next_message); @@ -575,6 +901,29 @@ EXPORT_SYMBOL_GPL(gru_get_next_message); /* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/ /* + * Load a DW from a global GPA. The GPA can be a memory or MMR address. + */ +int gru_read_gpa(unsigned long *value, unsigned long gpa) +{ + void *cb; + void *dsr; + int ret, iaa; + + STAT(read_gpa); + if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr)) + return MQE_BUG_NO_RESOURCES; + iaa = gpa >> 62; + gru_vload_phys(cb, gpa, gru_get_tri(dsr), iaa, IMA); + ret = gru_wait(cb); + if (ret == CBS_IDLE) + *value = *(unsigned long *)dsr; + gru_free_cpu_resources(cb, dsr); + return ret; +} +EXPORT_SYMBOL_GPL(gru_read_gpa); + + +/* * Copy a block of data using the GRU resources */ int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa, @@ -597,115 +946,217 @@ EXPORT_SYMBOL_GPL(gru_copy_gpa); /* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/ /* Temp - will delete after we gain confidence in the GRU */ -static __cacheline_aligned unsigned long word0; -static __cacheline_aligned unsigned long word1; -static int quicktest(struct gru_state *gru) +static int quicktest0(unsigned long arg) { + unsigned long word0; + unsigned long word1; void *cb; - void *ds; + void *dsr; unsigned long *p; + int ret = -EIO; - cb = get_gseg_base_address_cb(gru->gs_gru_base_vaddr, KERNEL_CTXNUM, 0); - ds = get_gseg_base_address_ds(gru->gs_gru_base_vaddr, KERNEL_CTXNUM, 0); - p = ds; + if (gru_get_cpu_resources(GRU_CACHE_LINE_BYTES, &cb, &dsr)) + return MQE_BUG_NO_RESOURCES; + p = dsr; word0 = MAGIC; + word1 = 0; - gru_vload(cb, uv_gpa(&word0), 0, XTYPE_DW, 1, 1, IMA); - if (gru_wait(cb) != CBS_IDLE) - BUG(); + gru_vload(cb, uv_gpa(&word0), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); + if (gru_wait(cb) != CBS_IDLE) { + printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 1\n", smp_processor_id()); + goto done; + } - if (*(unsigned long *)ds != MAGIC) - BUG(); - gru_vstore(cb, uv_gpa(&word1), 0, XTYPE_DW, 1, 1, IMA); - if (gru_wait(cb) != CBS_IDLE) - BUG(); + if (*p != MAGIC) { + printk(KERN_DEBUG "GRU:%d quicktest0 bad magic 0x%lx\n", smp_processor_id(), *p); + goto done; + } + gru_vstore(cb, uv_gpa(&word1), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); + if (gru_wait(cb) != CBS_IDLE) { + printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 2\n", smp_processor_id()); + goto done; + } - if (word0 != word1 || word0 != MAGIC) { - printk - ("GRU quicktest err: gid %d, found 0x%lx, expected 0x%lx\n", - gru->gs_gid, word1, MAGIC); - BUG(); /* ZZZ should not be fatal */ + if (word0 != word1 || word1 != MAGIC) { + printk(KERN_DEBUG + "GRU:%d quicktest0 err: found 0x%lx, expected 0x%lx\n", + smp_processor_id(), word1, MAGIC); + goto done; } + ret = 0; - return 0; +done: + gru_free_cpu_resources(cb, dsr); + return ret; } +#define ALIGNUP(p, q) ((void *)(((unsigned long)(p) + (q) - 1) & ~(q - 1))) -int gru_kservices_init(struct gru_state *gru) +static int quicktest1(unsigned long arg) { - struct gru_blade_state *bs; - struct gru_context_configuration_handle *cch; - unsigned long cbr_map, dsr_map; - int err, num, cpus_possible; + struct gru_message_queue_desc mqd; + void *p, *mq; + unsigned long *dw; + int i, ret = -EIO; + char mes[GRU_CACHE_LINE_BYTES], *m; + + /* Need 1K cacheline aligned that does not cross page boundary */ + p = kmalloc(4096, 0); + if (p == NULL) + return -ENOMEM; + mq = ALIGNUP(p, 1024); + memset(mes, 0xee, sizeof(mes)); + dw = mq; + + gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0); + for (i = 0; i < 6; i++) { + mes[8] = i; + do { + ret = gru_send_message_gpa(&mqd, mes, sizeof(mes)); + } while (ret == MQE_CONGESTION); + if (ret) + break; + } + if (ret != MQE_QUEUE_FULL || i != 4) { + printk(KERN_DEBUG "GRU:%d quicktest1: unexpect status %d, i %d\n", + smp_processor_id(), ret, i); + goto done; + } - /* - * Currently, resources are reserved ONLY on the second chiplet - * on each blade. This leaves ALL resources on chiplet 0 available - * for user code. - */ - bs = gru->gs_blade; - if (gru != &bs->bs_grus[1]) - return 0; - - cpus_possible = uv_blade_nr_possible_cpus(gru->gs_blade_id); - - num = GRU_NUM_KERNEL_CBR * cpus_possible; - cbr_map = gru_reserve_cb_resources(gru, GRU_CB_COUNT_TO_AU(num), NULL); - gru->gs_reserved_cbrs += num; - - num = GRU_NUM_KERNEL_DSR_BYTES * cpus_possible; - dsr_map = gru_reserve_ds_resources(gru, GRU_DS_BYTES_TO_AU(num), NULL); - gru->gs_reserved_dsr_bytes += num; - - gru->gs_active_contexts++; - __set_bit(KERNEL_CTXNUM, &gru->gs_context_map); - cch = get_cch(gru->gs_gru_base_vaddr, KERNEL_CTXNUM); - - bs->kernel_cb = get_gseg_base_address_cb(gru->gs_gru_base_vaddr, - KERNEL_CTXNUM, 0); - bs->kernel_dsr = get_gseg_base_address_ds(gru->gs_gru_base_vaddr, - KERNEL_CTXNUM, 0); - - lock_cch_handle(cch); - cch->tfm_fault_bit_enable = 0; - cch->tlb_int_enable = 0; - cch->tfm_done_bit_enable = 0; - cch->unmap_enable = 1; - err = cch_allocate(cch, 0, 0, cbr_map, dsr_map); - if (err) { - gru_dbg(grudev, - "Unable to allocate kernel CCH: gid %d, err %d\n", - gru->gs_gid, err); - BUG(); + for (i = 0; i < 6; i++) { + m = gru_get_next_message(&mqd); + if (!m || m[8] != i) + break; + gru_free_message(&mqd, m); } - if (cch_start(cch)) { - gru_dbg(grudev, "Unable to start kernel CCH: gid %d, err %d\n", - gru->gs_gid, err); - BUG(); + if (i != 4) { + printk(KERN_DEBUG "GRU:%d quicktest2: bad message, i %d, m %p, m8 %d\n", + smp_processor_id(), i, m, m ? m[8] : -1); + goto done; } - unlock_cch_handle(cch); + ret = 0; - if (gru_options & GRU_QUICKLOOK) - quicktest(gru); - return 0; +done: + kfree(p); + return ret; } -void gru_kservices_exit(struct gru_state *gru) +static int quicktest2(unsigned long arg) { - struct gru_context_configuration_handle *cch; - struct gru_blade_state *bs; + static DECLARE_COMPLETION(cmp); + unsigned long han; + int blade_id = 0; + int numcb = 4; + int ret = 0; + unsigned long *buf; + void *cb0, *cb; + struct gru_control_block_status *gen; + int i, k, istatus, bytes; + + bytes = numcb * 4 * 8; + buf = kmalloc(bytes, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + ret = -EBUSY; + han = gru_reserve_async_resources(blade_id, numcb, 0, &cmp); + if (!han) + goto done; + + gru_lock_async_resource(han, &cb0, NULL); + memset(buf, 0xee, bytes); + for (i = 0; i < numcb; i++) + gru_vset(cb0 + i * GRU_HANDLE_STRIDE, uv_gpa(&buf[i * 4]), 0, + XTYPE_DW, 4, 1, IMA_INTERRUPT); + + ret = 0; + k = numcb; + do { + gru_wait_async_cbr(han); + for (i = 0; i < numcb; i++) { + cb = cb0 + i * GRU_HANDLE_STRIDE; + istatus = gru_check_status(cb); + if (istatus != CBS_ACTIVE && istatus != CBS_CALL_OS) + break; + } + if (i == numcb) + continue; + if (istatus != CBS_IDLE) { + printk(KERN_DEBUG "GRU:%d quicktest2: cb %d, exception\n", smp_processor_id(), i); + ret = -EFAULT; + } else if (buf[4 * i] || buf[4 * i + 1] || buf[4 * i + 2] || + buf[4 * i + 3]) { + printk(KERN_DEBUG "GRU:%d quicktest2:cb %d, buf 0x%lx, 0x%lx, 0x%lx, 0x%lx\n", + smp_processor_id(), i, buf[4 * i], buf[4 * i + 1], buf[4 * i + 2], buf[4 * i + 3]); + ret = -EIO; + } + k--; + gen = cb; + gen->istatus = CBS_CALL_OS; /* don't handle this CBR again */ + } while (k); + BUG_ON(cmp.done); + + gru_unlock_async_resource(han); + gru_release_async_resources(han); +done: + kfree(buf); + return ret; +} + +#define BUFSIZE 200 +static int quicktest3(unsigned long arg) +{ + char buf1[BUFSIZE], buf2[BUFSIZE]; + int ret = 0; + + memset(buf2, 0, sizeof(buf2)); + memset(buf1, get_cycles() & 255, sizeof(buf1)); + gru_copy_gpa(uv_gpa(buf2), uv_gpa(buf1), BUFSIZE); + if (memcmp(buf1, buf2, BUFSIZE)) { + printk(KERN_DEBUG "GRU:%d quicktest3 error\n", smp_processor_id()); + ret = -EIO; + } + return ret; +} - bs = gru->gs_blade; - if (gru != &bs->bs_grus[1]) - return; +/* + * Debugging only. User hook for various kernel tests + * of driver & gru. + */ +int gru_ktest(unsigned long arg) +{ + int ret = -EINVAL; - cch = get_cch(gru->gs_gru_base_vaddr, KERNEL_CTXNUM); - lock_cch_handle(cch); - if (cch_interrupt_sync(cch)) - BUG(); - if (cch_deallocate(cch)) + switch (arg & 0xff) { + case 0: + ret = quicktest0(arg); + break; + case 1: + ret = quicktest1(arg); + break; + case 2: + ret = quicktest2(arg); + break; + case 3: + ret = quicktest3(arg); + break; + case 99: + ret = gru_free_kernel_contexts(); + break; + } + return ret; + +} + +int gru_kservices_init(void) +{ + return 0; +} + +void gru_kservices_exit(void) +{ + if (gru_free_kernel_contexts()) BUG(); - unlock_cch_handle(cch); } diff --git a/drivers/misc/sgi-gru/grukservices.h b/drivers/misc/sgi-gru/grukservices.h index 747ed315d56..02aa94d8484 100644 --- a/drivers/misc/sgi-gru/grukservices.h +++ b/drivers/misc/sgi-gru/grukservices.h @@ -131,6 +131,20 @@ extern void *gru_get_next_message(struct gru_message_queue_desc *mqd); /* + * Read a GRU global GPA. Source can be located in a remote partition. + * + * Input: + * value memory address where MMR value is returned + * gpa source numalink physical address of GPA + * + * Output: + * 0 OK + * >0 error + */ +int gru_read_gpa(unsigned long *value, unsigned long gpa); + + +/* * Copy data using the GRU. Source or destination can be located in a remote * partition. * @@ -146,4 +160,55 @@ extern void *gru_get_next_message(struct gru_message_queue_desc *mqd); extern int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa, unsigned int bytes); +/* + * Reserve GRU resources to be used asynchronously. + * + * input: + * blade_id - blade on which resources should be reserved + * cbrs - number of CBRs + * dsr_bytes - number of DSR bytes needed + * cmp - completion structure for waiting for + * async completions + * output: + * handle to identify resource + * (0 = no resources) + */ +extern unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes, + struct completion *cmp); + +/* + * Release async resources previously reserved. + * + * input: + * han - handle to identify resources + */ +extern void gru_release_async_resources(unsigned long han); + +/* + * Wait for async GRU instructions to complete. + * + * input: + * han - handle to identify resources + */ +extern void gru_wait_async_cbr(unsigned long han); + +/* + * Lock previous reserved async GRU resources + * + * input: + * han - handle to identify resources + * output: + * cb - pointer to first CBR + * dsr - pointer to first DSR + */ +extern void gru_lock_async_resource(unsigned long han, void **cb, void **dsr); + +/* + * Unlock previous reserved async GRU resources + * + * input: + * han - handle to identify resources + */ +extern void gru_unlock_async_resource(unsigned long han); + #endif /* __GRU_KSERVICES_H_ */ diff --git a/drivers/misc/sgi-gru/grulib.h b/drivers/misc/sgi-gru/grulib.h index e56e196a699..e77d1b1f9d0 100644 --- a/drivers/misc/sgi-gru/grulib.h +++ b/drivers/misc/sgi-gru/grulib.h @@ -32,8 +32,8 @@ /* Set Number of Request Blocks */ #define GRU_CREATE_CONTEXT _IOWR(GRU_IOCTL_NUM, 1, void *) -/* Register task as using the slice */ -#define GRU_SET_TASK_SLICE _IOWR(GRU_IOCTL_NUM, 5, void *) +/* Set Context Options */ +#define GRU_SET_CONTEXT_OPTION _IOWR(GRU_IOCTL_NUM, 4, void *) /* Fetch exception detail */ #define GRU_USER_GET_EXCEPTION_DETAIL _IOWR(GRU_IOCTL_NUM, 6, void *) @@ -44,8 +44,11 @@ /* For user unload context */ #define GRU_USER_UNLOAD_CONTEXT _IOWR(GRU_IOCTL_NUM, 9, void *) -/* For fetching GRU chiplet status */ -#define GRU_GET_CHIPLET_STATUS _IOWR(GRU_IOCTL_NUM, 10, void *) +/* For dumpping GRU chiplet state */ +#define GRU_DUMP_CHIPLET_STATE _IOWR(GRU_IOCTL_NUM, 11, void *) + +/* For getting gseg statistics */ +#define GRU_GET_GSEG_STATISTICS _IOWR(GRU_IOCTL_NUM, 12, void *) /* For user TLB flushing (primarily for tests) */ #define GRU_USER_FLUSH_TLB _IOWR(GRU_IOCTL_NUM, 50, void *) @@ -53,8 +56,17 @@ /* Get some config options (primarily for tests & emulator) */ #define GRU_GET_CONFIG_INFO _IOWR(GRU_IOCTL_NUM, 51, void *) +/* Various kernel self-tests */ +#define GRU_KTEST _IOWR(GRU_IOCTL_NUM, 52, void *) + #define CONTEXT_WINDOW_BYTES(th) (GRU_GSEG_PAGESIZE * (th)) #define THREAD_POINTER(p, th) (p + GRU_GSEG_PAGESIZE * (th)) +#define GSEG_START(cb) ((void *)((unsigned long)(cb) & ~(GRU_GSEG_PAGESIZE - 1))) + +struct gru_get_gseg_statistics_req { + unsigned long gseg; + struct gru_gseg_statistics stats; +}; /* * Structure used to pass TLB flush parameters to the driver @@ -65,6 +77,7 @@ struct gru_create_context_req { unsigned int control_blocks; unsigned int maximum_thread_count; unsigned int options; + unsigned char tlb_preload_count; }; /* @@ -75,6 +88,17 @@ struct gru_unload_context_req { }; /* + * Structure used to set context options + */ +enum {sco_gseg_owner, sco_cch_req_slice, sco_blade_chiplet}; +struct gru_set_context_option_req { + unsigned long gseg; + int op; + int val0; + long val1; +}; + +/* * Structure used to pass TLB flush parameters to the driver */ struct gru_flush_tlb_req { @@ -84,6 +108,38 @@ struct gru_flush_tlb_req { }; /* + * Structure used to pass TLB flush parameters to the driver + */ +enum {dcs_pid, dcs_gid}; +struct gru_dump_chiplet_state_req { + unsigned int op; + unsigned int gid; + int ctxnum; + char data_opt; + char lock_cch; + char flush_cbrs; + char fill[10]; + pid_t pid; + void *buf; + size_t buflen; + /* ---- output --- */ + unsigned int num_contexts; +}; + +#define GRU_DUMP_MAGIC 0x3474ab6c +struct gru_dump_context_header { + unsigned int magic; + unsigned int gid; + unsigned char ctxnum; + unsigned char cbrcnt; + unsigned char dsrcnt; + pid_t pid; + unsigned long vaddr; + int cch_locked; + unsigned long data[0]; +}; + +/* * GRU configuration info (temp - for testing) */ struct gru_config_info { diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c index ec3f7a17d22..ae16c8cb4f3 100644 --- a/drivers/misc/sgi-gru/grumain.c +++ b/drivers/misc/sgi-gru/grumain.c @@ -3,11 +3,21 @@ * * DRIVER TABLE MANAGER + GRU CONTEXT LOAD/UNLOAD * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. * - * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/kernel.h> @@ -17,6 +27,8 @@ #include <linux/sched.h> #include <linux/device.h> #include <linux/list.h> +#include <linux/err.h> +#include <linux/prefetch.h> #include <asm/uv/uv_hub.h> #include "gru.h" #include "grutables.h" @@ -38,12 +50,20 @@ struct device *grudev = &gru_device; /* * Select a gru fault map to be used by the current cpu. Note that * multiple cpus may be using the same map. - * ZZZ should "shift" be used?? Depends on HT cpu numbering * ZZZ should be inline but did not work on emulator */ int gru_cpu_fault_map_id(void) { +#ifdef CONFIG_IA64 return uv_blade_processor_id() % GRU_NUM_TFM; +#else + int cpu = smp_processor_id(); + int id, core; + + core = uv_cpu_core_number(cpu); + id = core + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu); + return id; +#endif } /*--------- ASID Management ------------------------------------------- @@ -96,7 +116,7 @@ static int gru_reset_asid_limit(struct gru_state *gru, int asid) gid = gru->gs_gid; again: for (i = 0; i < GRU_NUM_CCH; i++) { - if (!gru->gs_gts[i]) + if (!gru->gs_gts[i] || is_kernel_context(gru->gs_gts[i])) continue; inuse_asid = gru->gs_gts[i]->ts_gms->ms_asids[gid].mt_asid; gru_dbg(grudev, "gid %d, gts %p, gms %p, inuse 0x%x, cxt %d\n", @@ -150,7 +170,7 @@ static unsigned long reserve_resources(unsigned long *p, int n, int mmax, unsigned long bits = 0; int i; - do { + while (n--) { i = find_first_bit(p, mmax); if (i == mmax) BUG(); @@ -158,7 +178,7 @@ static unsigned long reserve_resources(unsigned long *p, int n, int mmax, __set_bit(i, &bits); if (idx) *idx++ = i; - } while (--n); + } return bits; } @@ -276,7 +296,8 @@ static void gru_unload_mm_tracker(struct gru_state *gru, void gts_drop(struct gru_thread_state *gts) { if (gts && atomic_dec_return(>s->ts_refcnt) == 0) { - gru_drop_mmu_notifier(gts->ts_gms); + if (gts->ts_gms) + gru_drop_mmu_notifier(gts->ts_gms); kfree(gts); STAT(gts_free); } @@ -299,43 +320,50 @@ static struct gru_thread_state *gru_find_current_gts_nolock(struct gru_vma_data /* * Allocate a thread state structure. */ -static struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, - struct gru_vma_data *vdata, - int tsid) +struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, + int cbr_au_count, int dsr_au_count, + unsigned char tlb_preload_count, int options, int tsid) { struct gru_thread_state *gts; + struct gru_mm_struct *gms; int bytes; - bytes = DSR_BYTES(vdata->vd_dsr_au_count) + - CBR_BYTES(vdata->vd_cbr_au_count); + bytes = DSR_BYTES(dsr_au_count) + CBR_BYTES(cbr_au_count); bytes += sizeof(struct gru_thread_state); - gts = kzalloc(bytes, GFP_KERNEL); + gts = kmalloc(bytes, GFP_KERNEL); if (!gts) - return NULL; + return ERR_PTR(-ENOMEM); STAT(gts_alloc); + memset(gts, 0, sizeof(struct gru_thread_state)); /* zero out header */ atomic_set(>s->ts_refcnt, 1); mutex_init(>s->ts_ctxlock); - gts->ts_cbr_au_count = vdata->vd_cbr_au_count; - gts->ts_dsr_au_count = vdata->vd_dsr_au_count; - gts->ts_user_options = vdata->vd_user_options; + gts->ts_cbr_au_count = cbr_au_count; + gts->ts_dsr_au_count = dsr_au_count; + gts->ts_tlb_preload_count = tlb_preload_count; + gts->ts_user_options = options; + gts->ts_user_blade_id = -1; + gts->ts_user_chiplet_id = -1; gts->ts_tsid = tsid; - gts->ts_user_options = vdata->vd_user_options; gts->ts_ctxnum = NULLCTX; - gts->ts_mm = current->mm; - gts->ts_vma = vma; gts->ts_tlb_int_select = -1; - gts->ts_gms = gru_register_mmu_notifier(); + gts->ts_cch_req_slice = -1; gts->ts_sizeavail = GRU_SIZEAVAIL(PAGE_SHIFT); - if (!gts->ts_gms) - goto err; + if (vma) { + gts->ts_mm = current->mm; + gts->ts_vma = vma; + gms = gru_register_mmu_notifier(); + if (IS_ERR(gms)) + goto err; + gts->ts_gms = gms; + } - gru_dbg(grudev, "alloc vdata %p, new gts %p\n", vdata, gts); + gru_dbg(grudev, "alloc gts %p\n", gts); return gts; err: gts_drop(gts); - return NULL; + return ERR_CAST(gms); } /* @@ -349,6 +377,7 @@ struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, int tsid) if (!vdata) return NULL; + STAT(vdata_alloc); INIT_LIST_HEAD(&vdata->vd_head); spin_lock_init(&vdata->vd_lock); gru_dbg(grudev, "alloc vdata %p\n", vdata); @@ -381,9 +410,12 @@ struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma, struct gru_vma_data *vdata = vma->vm_private_data; struct gru_thread_state *gts, *ngts; - gts = gru_alloc_gts(vma, vdata, tsid); - if (!gts) - return NULL; + gts = gru_alloc_gts(vma, vdata->vd_cbr_au_count, + vdata->vd_dsr_au_count, + vdata->vd_tlb_preload_count, + vdata->vd_user_options, tsid); + if (IS_ERR(gts)) + return gts; spin_lock(&vdata->vd_lock); ngts = gru_find_current_gts_nolock(vdata, tsid); @@ -458,7 +490,8 @@ static void gru_prefetch_context(void *gseg, void *cb, void *cbe, } static void gru_load_context_data(void *save, void *grubase, int ctxnum, - unsigned long cbrmap, unsigned long dsrmap) + unsigned long cbrmap, unsigned long dsrmap, + int data_valid) { void *gseg, *cb, *cbe; unsigned long length; @@ -471,12 +504,25 @@ static void gru_load_context_data(void *save, void *grubase, int ctxnum, gru_prefetch_context(gseg, cb, cbe, cbrmap, length); for_each_cbr_in_allocation_map(i, &cbrmap, scr) { - save += gru_copy_handle(cb, save); - save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE, save); + if (data_valid) { + save += gru_copy_handle(cb, save); + save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE, + save); + } else { + memset(cb, 0, GRU_CACHE_LINE_BYTES); + memset(cbe + i * GRU_HANDLE_STRIDE, 0, + GRU_CACHE_LINE_BYTES); + } + /* Flush CBE to hide race in context restart */ + mb(); + gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE); cb += GRU_HANDLE_STRIDE; } - memcpy(gseg + GRU_DS_BASE, save, length); + if (data_valid) + memcpy(gseg + GRU_DS_BASE, save, length); + else + memset(gseg + GRU_DS_BASE, 0, length); } static void gru_unload_context_data(void *save, void *grubase, int ctxnum, @@ -490,6 +536,12 @@ static void gru_unload_context_data(void *save, void *grubase, int ctxnum, cb = gseg + GRU_CB_BASE; cbe = grubase + GRU_CBE_BASE; length = hweight64(dsrmap) * GRU_DSR_AU_BYTES; + + /* CBEs may not be coherent. Flush them from cache */ + for_each_cbr_in_allocation_map(i, &cbrmap, scr) + gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE); + mb(); /* Let the CL flush complete */ + gru_prefetch_context(gseg, cb, cbe, cbrmap, length); for_each_cbr_in_allocation_map(i, &cbrmap, scr) { @@ -506,44 +558,44 @@ void gru_unload_context(struct gru_thread_state *gts, int savestate) struct gru_context_configuration_handle *cch; int ctxnum = gts->ts_ctxnum; - zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE); + if (!is_kernel_context(gts)) + zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE); cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); - gru_dbg(grudev, "gts %p\n", gts); + gru_dbg(grudev, "gts %p, cbrmap 0x%lx, dsrmap 0x%lx\n", + gts, gts->ts_cbr_map, gts->ts_dsr_map); lock_cch_handle(cch); if (cch_interrupt_sync(cch)) BUG(); - gru_unload_mm_tracker(gru, gts); - if (savestate) + if (!is_kernel_context(gts)) + gru_unload_mm_tracker(gru, gts); + if (savestate) { gru_unload_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, ctxnum, gts->ts_cbr_map, gts->ts_dsr_map); + gts->ts_data_valid = 1; + } if (cch_deallocate(cch)) BUG(); - gts->ts_force_unload = 0; /* ts_force_unload locked by CCH lock */ unlock_cch_handle(cch); gru_free_gru_context(gts); - STAT(unload_context); } /* * Load a GRU context by copying it from the thread data structure in memory * to the GRU. */ -static void gru_load_context(struct gru_thread_state *gts) +void gru_load_context(struct gru_thread_state *gts) { struct gru_state *gru = gts->ts_gru; struct gru_context_configuration_handle *cch; - int err, asid, ctxnum = gts->ts_ctxnum; + int i, err, asid, ctxnum = gts->ts_ctxnum; - gru_dbg(grudev, "gts %p\n", gts); cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); - lock_cch_handle(cch); - asid = gru_load_mm_tracker(gru, gts); cch->tfm_fault_bit_enable = (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); @@ -552,9 +604,33 @@ static void gru_load_context(struct gru_thread_state *gts) gts->ts_tlb_int_select = gru_cpu_fault_map_id(); cch->tlb_int_select = gts->ts_tlb_int_select; } + if (gts->ts_cch_req_slice >= 0) { + cch->req_slice_set_enable = 1; + cch->req_slice = gts->ts_cch_req_slice; + } else { + cch->req_slice_set_enable =0; + } cch->tfm_done_bit_enable = 0; - err = cch_allocate(cch, asid, gts->ts_sizeavail, gts->ts_cbr_map, - gts->ts_dsr_map); + cch->dsr_allocation_map = gts->ts_dsr_map; + cch->cbr_allocation_map = gts->ts_cbr_map; + + if (is_kernel_context(gts)) { + cch->unmap_enable = 1; + cch->tfm_done_bit_enable = 1; + cch->cb_int_enable = 1; + cch->tlb_int_select = 0; /* For now, ints go to cpu 0 */ + } else { + cch->unmap_enable = 0; + cch->tfm_done_bit_enable = 0; + cch->cb_int_enable = 0; + asid = gru_load_mm_tracker(gru, gts); + for (i = 0; i < 8; i++) { + cch->asid[i] = asid + i; + cch->sizeavail[i] = gts->ts_sizeavail; + } + } + + err = cch_allocate(cch); if (err) { gru_dbg(grudev, "err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n", @@ -563,24 +639,23 @@ static void gru_load_context(struct gru_thread_state *gts) } gru_load_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, ctxnum, - gts->ts_cbr_map, gts->ts_dsr_map); + gts->ts_cbr_map, gts->ts_dsr_map, gts->ts_data_valid); if (cch_start(cch)) BUG(); unlock_cch_handle(cch); - STAT(load_context); + gru_dbg(grudev, "gid %d, gts %p, cbrmap 0x%lx, dsrmap 0x%lx, tie %d, tis %d\n", + gts->ts_gru->gs_gid, gts, gts->ts_cbr_map, gts->ts_dsr_map, + (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR), gts->ts_tlb_int_select); } /* * Update fields in an active CCH: * - retarget interrupts on local blade * - update sizeavail mask - * - force a delayed context unload by clearing the CCH asids. This - * forces TLB misses for new GRU instructions. The context is unloaded - * when the next TLB miss occurs. */ -int gru_update_cch(struct gru_thread_state *gts, int force_unload) +int gru_update_cch(struct gru_thread_state *gts) { struct gru_context_configuration_handle *cch; struct gru_state *gru = gts->ts_gru; @@ -594,18 +669,13 @@ int gru_update_cch(struct gru_thread_state *gts, int force_unload) goto exit; if (cch_interrupt(cch)) BUG(); - if (!force_unload) { - for (i = 0; i < 8; i++) - cch->sizeavail[i] = gts->ts_sizeavail; - gts->ts_tlb_int_select = gru_cpu_fault_map_id(); - cch->tlb_int_select = gru_cpu_fault_map_id(); - } else { - for (i = 0; i < 8; i++) - cch->asid[i] = 0; - cch->tfm_fault_bit_enable = 0; - cch->tlb_int_enable = 0; - gts->ts_force_unload = 1; - } + for (i = 0; i < 8; i++) + cch->sizeavail[i] = gts->ts_sizeavail; + gts->ts_tlb_int_select = gru_cpu_fault_map_id(); + cch->tlb_int_select = gru_cpu_fault_map_id(); + cch->tfm_fault_bit_enable = + (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL + || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); if (cch_start(cch)) BUG(); ret = 1; @@ -630,7 +700,54 @@ static int gru_retarget_intr(struct gru_thread_state *gts) gru_dbg(grudev, "retarget from %d to %d\n", gts->ts_tlb_int_select, gru_cpu_fault_map_id()); - return gru_update_cch(gts, 0); + return gru_update_cch(gts); +} + +/* + * Check if a GRU context is allowed to use a specific chiplet. By default + * a context is assigned to any blade-local chiplet. However, users can + * override this. + * Returns 1 if assignment allowed, 0 otherwise + */ +static int gru_check_chiplet_assignment(struct gru_state *gru, + struct gru_thread_state *gts) +{ + int blade_id; + int chiplet_id; + + blade_id = gts->ts_user_blade_id; + if (blade_id < 0) + blade_id = uv_numa_blade_id(); + + chiplet_id = gts->ts_user_chiplet_id; + return gru->gs_blade_id == blade_id && + (chiplet_id < 0 || chiplet_id == gru->gs_chiplet_id); +} + +/* + * Unload the gru context if it is not assigned to the correct blade or + * chiplet. Misassignment can occur if the process migrates to a different + * blade or if the user changes the selected blade/chiplet. + */ +void gru_check_context_placement(struct gru_thread_state *gts) +{ + struct gru_state *gru; + + /* + * If the current task is the context owner, verify that the + * context is correctly placed. This test is skipped for non-owner + * references. Pthread apps use non-owner references to the CBRs. + */ + gru = gts->ts_gru; + if (!gru || gts->ts_tgid_owner != current->tgid) + return; + + if (!gru_check_chiplet_assignment(gru, gts)) { + STAT(check_context_unload); + gru_unload_context(gts, 1); + } else if (gru_retarget_intr(gts)) { + STAT(check_context_retarget_intr); + } } @@ -642,61 +759,88 @@ static int gru_retarget_intr(struct gru_thread_state *gts) #define next_gru(b, g) (((g) < &(b)->bs_grus[GRU_CHIPLETS_PER_BLADE - 1]) ? \ ((g)+1) : &(b)->bs_grus[0]) -static void gru_steal_context(struct gru_thread_state *gts) +static int is_gts_stealable(struct gru_thread_state *gts, + struct gru_blade_state *bs) +{ + if (is_kernel_context(gts)) + return down_write_trylock(&bs->bs_kgts_sema); + else + return mutex_trylock(>s->ts_ctxlock); +} + +static void gts_stolen(struct gru_thread_state *gts, + struct gru_blade_state *bs) +{ + if (is_kernel_context(gts)) { + up_write(&bs->bs_kgts_sema); + STAT(steal_kernel_context); + } else { + mutex_unlock(>s->ts_ctxlock); + STAT(steal_user_context); + } +} + +void gru_steal_context(struct gru_thread_state *gts) { struct gru_blade_state *blade; struct gru_state *gru, *gru0; struct gru_thread_state *ngts = NULL; int ctxnum, ctxnum0, flag = 0, cbr, dsr; + int blade_id; + blade_id = gts->ts_user_blade_id; + if (blade_id < 0) + blade_id = uv_numa_blade_id(); cbr = gts->ts_cbr_au_count; dsr = gts->ts_dsr_au_count; - preempt_disable(); - blade = gru_base[uv_numa_blade_id()]; + blade = gru_base[blade_id]; spin_lock(&blade->bs_lock); ctxnum = next_ctxnum(blade->bs_lru_ctxnum); gru = blade->bs_lru_gru; if (ctxnum == 0) gru = next_gru(blade, gru); + blade->bs_lru_gru = gru; + blade->bs_lru_ctxnum = ctxnum; ctxnum0 = ctxnum; gru0 = gru; while (1) { - if (check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH)) - break; - spin_lock(&gru->gs_lock); - for (; ctxnum < GRU_NUM_CCH; ctxnum++) { - if (flag && gru == gru0 && ctxnum == ctxnum0) + if (gru_check_chiplet_assignment(gru, gts)) { + if (check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH)) break; - ngts = gru->gs_gts[ctxnum]; - /* - * We are grabbing locks out of order, so trylock is - * needed. GTSs are usually not locked, so the odds of - * success are high. If trylock fails, try to steal a - * different GSEG. - */ - if (ngts && mutex_trylock(&ngts->ts_ctxlock)) + spin_lock(&gru->gs_lock); + for (; ctxnum < GRU_NUM_CCH; ctxnum++) { + if (flag && gru == gru0 && ctxnum == ctxnum0) + break; + ngts = gru->gs_gts[ctxnum]; + /* + * We are grabbing locks out of order, so trylock is + * needed. GTSs are usually not locked, so the odds of + * success are high. If trylock fails, try to steal a + * different GSEG. + */ + if (ngts && is_gts_stealable(ngts, blade)) + break; + ngts = NULL; + } + spin_unlock(&gru->gs_lock); + if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0)) break; - ngts = NULL; - flag = 1; } - spin_unlock(&gru->gs_lock); - if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0)) + if (flag && gru == gru0) break; + flag = 1; ctxnum = 0; gru = next_gru(blade, gru); } - blade->bs_lru_gru = gru; - blade->bs_lru_ctxnum = ctxnum; spin_unlock(&blade->bs_lock); - preempt_enable(); if (ngts) { - STAT(steal_context); + gts->ustats.context_stolen++; ngts->ts_steal_jiffies = jiffies; - gru_unload_context(ngts, 1); - mutex_unlock(&ngts->ts_ctxlock); + gru_unload_context(ngts, is_kernel_context(ngts) ? 0 : 1); + gts_stolen(ngts, blade); } else { STAT(steal_context_failed); } @@ -708,19 +852,34 @@ static void gru_steal_context(struct gru_thread_state *gts) } /* + * Assign a gru context. + */ +static int gru_assign_context_number(struct gru_state *gru) +{ + int ctxnum; + + ctxnum = find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH); + __set_bit(ctxnum, &gru->gs_context_map); + return ctxnum; +} + +/* * Scan the GRUs on the local blade & assign a GRU context. */ -static struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts) +struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts) { struct gru_state *gru, *grux; int i, max_active_contexts; + int blade_id = gts->ts_user_blade_id; - preempt_disable(); - + if (blade_id < 0) + blade_id = uv_numa_blade_id(); again: gru = NULL; max_active_contexts = GRU_NUM_CCH; - for_each_gru_on_blade(grux, uv_numa_blade_id(), i) { + for_each_gru_on_blade(grux, blade_id, i) { + if (!gru_check_chiplet_assignment(grux, gts)) + continue; if (check_gru_resources(grux, gts->ts_cbr_au_count, gts->ts_dsr_au_count, max_active_contexts)) { @@ -741,12 +900,9 @@ again: reserve_gru_resources(gru, gts); gts->ts_gru = gru; gts->ts_blade = gru->gs_blade_id; - gts->ts_ctxnum = - find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH); - BUG_ON(gts->ts_ctxnum == GRU_NUM_CCH); + gts->ts_ctxnum = gru_assign_context_number(gru); atomic_inc(>s->ts_refcnt); gru->gs_gts[gts->ts_ctxnum] = gts; - __set_bit(gts->ts_ctxnum, &gru->gs_context_map); spin_unlock(&gru->gs_lock); STAT(assign_context); @@ -760,7 +916,6 @@ again: STAT(assign_context_failed); } - preempt_enable(); return gru; } @@ -789,20 +944,15 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf) again: mutex_lock(>s->ts_ctxlock); preempt_disable(); - if (gts->ts_gru) { - if (gts->ts_gru->gs_blade_id != uv_numa_blade_id()) { - STAT(migrated_nopfn_unload); - gru_unload_context(gts, 1); - } else { - if (gru_retarget_intr(gts)) - STAT(migrated_nopfn_retarget); - } - } + + gru_check_context_placement(gts); if (!gts->ts_gru) { + STAT(load_user_context); if (!gru_assign_gru_context(gts)) { - mutex_unlock(>s->ts_ctxlock); preempt_enable(); + mutex_unlock(>s->ts_ctxlock); + set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */ if (gts->ts_steal_jiffies + GRU_STEAL_DELAY < jiffies) gru_steal_context(gts); @@ -815,8 +965,8 @@ again: vma->vm_page_prot); } - mutex_unlock(>s->ts_ctxlock); preempt_enable(); + mutex_unlock(>s->ts_ctxlock); return VM_FAULT_NOPAGE; } diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c index ee74821b171..4f763592239 100644 --- a/drivers/misc/sgi-gru/gruprocfs.c +++ b/drivers/misc/sgi-gru/gruprocfs.c @@ -36,8 +36,7 @@ static void printstat_val(struct seq_file *s, atomic_long_t *v, char *id) { unsigned long val = atomic_long_read(v); - if (val) - seq_printf(s, "%16lu %s\n", val, id); + seq_printf(s, "%16lu %s\n", val, id); } static int statistics_show(struct seq_file *s, void *p) @@ -46,52 +45,55 @@ static int statistics_show(struct seq_file *s, void *p) printstat(s, vdata_free); printstat(s, gts_alloc); printstat(s, gts_free); - printstat(s, vdata_double_alloc); + printstat(s, gms_alloc); + printstat(s, gms_free); printstat(s, gts_double_allocate); printstat(s, assign_context); printstat(s, assign_context_failed); printstat(s, free_context); - printstat(s, load_context); - printstat(s, unload_context); - printstat(s, steal_context); + printstat(s, load_user_context); + printstat(s, load_kernel_context); + printstat(s, lock_kernel_context); + printstat(s, unlock_kernel_context); + printstat(s, steal_user_context); + printstat(s, steal_kernel_context); printstat(s, steal_context_failed); printstat(s, nopfn); - printstat(s, break_cow); printstat(s, asid_new); printstat(s, asid_next); printstat(s, asid_wrap); printstat(s, asid_reuse); printstat(s, intr); + printstat(s, intr_cbr); + printstat(s, intr_tfh); + printstat(s, intr_spurious); printstat(s, intr_mm_lock_failed); printstat(s, call_os); - printstat(s, call_os_offnode_reference); - printstat(s, call_os_check_for_bug); printstat(s, call_os_wait_queue); printstat(s, user_flush_tlb); printstat(s, user_unload_context); printstat(s, user_exception); - printstat(s, set_task_slice); - printstat(s, migrate_check); - printstat(s, migrated_retarget); - printstat(s, migrated_unload); - printstat(s, migrated_unload_delay); - printstat(s, migrated_nopfn_retarget); - printstat(s, migrated_nopfn_unload); + printstat(s, set_context_option); + printstat(s, check_context_retarget_intr); + printstat(s, check_context_unload); printstat(s, tlb_dropin); + printstat(s, tlb_preload_page); printstat(s, tlb_dropin_fail_no_asid); printstat(s, tlb_dropin_fail_upm); printstat(s, tlb_dropin_fail_invalid); printstat(s, tlb_dropin_fail_range_active); printstat(s, tlb_dropin_fail_idle); printstat(s, tlb_dropin_fail_fmm); + printstat(s, tlb_dropin_fail_no_exception); + printstat(s, tfh_stale_on_fault); printstat(s, mmu_invalidate_range); printstat(s, mmu_invalidate_page); - printstat(s, mmu_clear_flush_young); printstat(s, flush_tlb); printstat(s, flush_tlb_gru); printstat(s, flush_tlb_gru_tgh); printstat(s, flush_tlb_gru_zero_asid); printstat(s, copy_gpa); + printstat(s, read_gpa); printstat(s, mesq_receive); printstat(s, mesq_receive_none); printstat(s, mesq_send); @@ -102,7 +104,6 @@ static int statistics_show(struct seq_file *s, void *p) printstat(s, mesq_send_qlimit_reached); printstat(s, mesq_send_amo_nacked); printstat(s, mesq_send_put_nacked); - printstat(s, mesq_qf_not_full); printstat(s, mesq_qf_locked); printstat(s, mesq_qf_noop_not_full); printstat(s, mesq_qf_switch_head_failed); @@ -112,6 +113,7 @@ static int statistics_show(struct seq_file *s, void *p) printstat(s, mesq_noop_qlimit_reached); printstat(s, mesq_noop_amo_nacked); printstat(s, mesq_noop_put_nacked); + printstat(s, mesq_noop_page_overflow); return 0; } @@ -127,8 +129,10 @@ static int mcs_statistics_show(struct seq_file *s, void *p) int op; unsigned long total, count, max; static char *id[] = {"cch_allocate", "cch_start", "cch_interrupt", - "cch_interrupt_sync", "cch_deallocate", "tgh_invalidate"}; + "cch_interrupt_sync", "cch_deallocate", "tfh_write_only", + "tfh_write_restart", "tgh_invalidate"}; + seq_printf(s, "%-20s%12s%12s%12s\n", "#id", "count", "aver-clks", "max-clks"); for (op = 0; op < mcsop_last; op++) { count = atomic_long_read(&mcs_op_statistics[op].count); total = atomic_long_read(&mcs_op_statistics[op].total); @@ -148,6 +152,7 @@ static ssize_t mcs_statistics_write(struct file *file, static int options_show(struct seq_file *s, void *p) { + seq_printf(s, "#bitmask: 1=trace, 2=statistics\n"); seq_printf(s, "0x%lx\n", gru_options); return 0; } @@ -155,15 +160,11 @@ static int options_show(struct seq_file *s, void *p) static ssize_t options_write(struct file *file, const char __user *userbuf, size_t count, loff_t *data) { - unsigned long val; - char buf[80]; + int ret; - if (copy_from_user - (buf, userbuf, count < sizeof(buf) ? count : sizeof(buf))) - return -EFAULT; - buf[count - 1] = '\0'; - if (!strict_strtoul(buf, 10, &val)) - gru_options = val; + ret = kstrtoul_from_user(userbuf, count, 0, &gru_options); + if (ret) + return ret; return count; } @@ -177,16 +178,17 @@ static int cch_seq_show(struct seq_file *file, void *data) const char *mode[] = { "??", "UPM", "INTR", "OS_POLL" }; if (gid == 0) - seq_printf(file, "#%5s%5s%6s%9s%6s%8s%8s\n", "gid", "bid", - "ctx#", "pid", "cbrs", "dsbytes", "mode"); + seq_printf(file, "#%5s%5s%6s%7s%9s%6s%8s%8s\n", "gid", "bid", + "ctx#", "asid", "pid", "cbrs", "dsbytes", "mode"); if (gru) for (i = 0; i < GRU_NUM_CCH; i++) { ts = gru->gs_gts[i]; if (!ts) continue; - seq_printf(file, " %5d%5d%6d%9d%6d%8d%8s\n", + seq_printf(file, " %5d%5d%6d%7d%9d%6d%8d%8s\n", gru->gs_gid, gru->gs_blade_id, i, - ts->ts_tgid_owner, + is_kernel_context(ts) ? 0 : ts->ts_gms->ms_asids[gid].mt_asid, + is_kernel_context(ts) ? 0 : ts->ts_tgid_owner, ts->ts_cbr_au_count * GRU_CBR_AU_SIZE, ts->ts_cbr_au_count * GRU_DSR_AU_BYTES, mode[ts->ts_user_options & @@ -318,7 +320,7 @@ static const struct file_operations gru_fops = { static struct proc_entry { char *name; - int mode; + umode_t mode; const struct file_operations *fops; struct proc_dir_entry *entry; } proc_files[] = { @@ -335,10 +337,9 @@ static struct proc_dir_entry *proc_gru __read_mostly; static int create_proc_file(struct proc_entry *p) { - p->entry = create_proc_entry(p->name, p->mode, proc_gru); + p->entry = proc_create(p->name, p->mode, proc_gru, p->fops); if (!p->entry) return -1; - p->entry->proc_fops = p->fops; return 0; } @@ -350,7 +351,7 @@ static void delete_proc_files(void) for (p = proc_files; p->name; p++) if (p->entry) remove_proc_entry(p->name, proc_gru); - remove_proc_entry("gru", NULL); + proc_remove(proc_gru); } } diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h index bf1eeb7553e..5c3ce245967 100644 --- a/drivers/misc/sgi-gru/grutables.h +++ b/drivers/misc/sgi-gru/grutables.h @@ -148,18 +148,20 @@ #include <linux/wait.h> #include <linux/mmu_notifier.h> #include "gru.h" +#include "grulib.h" #include "gruhandles.h" extern struct gru_stats_s gru_stats; extern struct gru_blade_state *gru_base[]; extern unsigned long gru_start_paddr, gru_end_paddr; +extern void *gru_start_vaddr; extern unsigned int gru_max_gids; #define GRU_MAX_BLADES MAX_NUMNODES #define GRU_MAX_GRUS (GRU_MAX_BLADES * GRU_CHIPLETS_PER_BLADE) #define GRU_DRIVER_ID_STR "SGI GRU Device Driver" -#define GRU_DRIVER_VERSION_STR "0.80" +#define GRU_DRIVER_VERSION_STR "0.85" /* * GRU statistics. @@ -169,53 +171,56 @@ struct gru_stats_s { atomic_long_t vdata_free; atomic_long_t gts_alloc; atomic_long_t gts_free; - atomic_long_t vdata_double_alloc; + atomic_long_t gms_alloc; + atomic_long_t gms_free; atomic_long_t gts_double_allocate; atomic_long_t assign_context; atomic_long_t assign_context_failed; atomic_long_t free_context; - atomic_long_t load_context; - atomic_long_t unload_context; - atomic_long_t steal_context; + atomic_long_t load_user_context; + atomic_long_t load_kernel_context; + atomic_long_t lock_kernel_context; + atomic_long_t unlock_kernel_context; + atomic_long_t steal_user_context; + atomic_long_t steal_kernel_context; atomic_long_t steal_context_failed; atomic_long_t nopfn; - atomic_long_t break_cow; atomic_long_t asid_new; atomic_long_t asid_next; atomic_long_t asid_wrap; atomic_long_t asid_reuse; atomic_long_t intr; + atomic_long_t intr_cbr; + atomic_long_t intr_tfh; + atomic_long_t intr_spurious; atomic_long_t intr_mm_lock_failed; atomic_long_t call_os; - atomic_long_t call_os_offnode_reference; - atomic_long_t call_os_check_for_bug; atomic_long_t call_os_wait_queue; atomic_long_t user_flush_tlb; atomic_long_t user_unload_context; atomic_long_t user_exception; - atomic_long_t set_task_slice; - atomic_long_t migrate_check; - atomic_long_t migrated_retarget; - atomic_long_t migrated_unload; - atomic_long_t migrated_unload_delay; - atomic_long_t migrated_nopfn_retarget; - atomic_long_t migrated_nopfn_unload; + atomic_long_t set_context_option; + atomic_long_t check_context_retarget_intr; + atomic_long_t check_context_unload; atomic_long_t tlb_dropin; + atomic_long_t tlb_preload_page; atomic_long_t tlb_dropin_fail_no_asid; atomic_long_t tlb_dropin_fail_upm; atomic_long_t tlb_dropin_fail_invalid; atomic_long_t tlb_dropin_fail_range_active; atomic_long_t tlb_dropin_fail_idle; atomic_long_t tlb_dropin_fail_fmm; + atomic_long_t tlb_dropin_fail_no_exception; + atomic_long_t tfh_stale_on_fault; atomic_long_t mmu_invalidate_range; atomic_long_t mmu_invalidate_page; - atomic_long_t mmu_clear_flush_young; atomic_long_t flush_tlb; atomic_long_t flush_tlb_gru; atomic_long_t flush_tlb_gru_tgh; atomic_long_t flush_tlb_gru_zero_asid; atomic_long_t copy_gpa; + atomic_long_t read_gpa; atomic_long_t mesq_receive; atomic_long_t mesq_receive_none; @@ -227,7 +232,7 @@ struct gru_stats_s { atomic_long_t mesq_send_qlimit_reached; atomic_long_t mesq_send_amo_nacked; atomic_long_t mesq_send_put_nacked; - atomic_long_t mesq_qf_not_full; + atomic_long_t mesq_page_overflow; atomic_long_t mesq_qf_locked; atomic_long_t mesq_qf_noop_not_full; atomic_long_t mesq_qf_switch_head_failed; @@ -237,11 +242,13 @@ struct gru_stats_s { atomic_long_t mesq_noop_qlimit_reached; atomic_long_t mesq_noop_amo_nacked; atomic_long_t mesq_noop_put_nacked; + atomic_long_t mesq_noop_page_overflow; }; enum mcs_op {cchop_allocate, cchop_start, cchop_interrupt, cchop_interrupt_sync, - cchop_deallocate, tghop_invalidate, mcsop_last}; + cchop_deallocate, tfhop_write_only, tfhop_write_restart, + tghop_invalidate, mcsop_last}; struct mcs_op_statistic { atomic_long_t count; @@ -251,9 +258,8 @@ struct mcs_op_statistic { extern struct mcs_op_statistic mcs_op_statistics[mcsop_last]; -#define OPT_DPRINT 1 -#define OPT_STATS 2 -#define GRU_QUICKLOOK 4 +#define OPT_DPRINT 1 +#define OPT_STATS 2 #define IRQ_GRU 110 /* Starting IRQ number for interrupts */ @@ -276,7 +282,7 @@ extern struct mcs_op_statistic mcs_op_statistics[mcsop_last]; #define gru_dbg(dev, fmt, x...) \ do { \ if (gru_options & OPT_DPRINT) \ - dev_dbg(dev, "%s: " fmt, __func__, x); \ + printk(KERN_DEBUG "GRU:%d %s: " fmt, smp_processor_id(), __func__, x);\ } while (0) #else #define gru_dbg(x...) @@ -290,13 +296,7 @@ extern struct mcs_op_statistic mcs_op_statistics[mcsop_last]; #define ASID_INC 8 /* number of regions */ /* Generate a GRU asid value from a GRU base asid & a virtual address. */ -#if defined CONFIG_IA64 #define VADDR_HI_BIT 64 -#elif defined CONFIG_X86_64 -#define VADDR_HI_BIT 48 -#else -#error "Unsupported architecture" -#endif #define GRUREGION(addr) ((addr) >> (VADDR_HI_BIT - 3) & 3) #define GRUASID(asid, addr) ((asid) + GRUREGION(addr)) @@ -338,6 +338,7 @@ struct gru_vma_data { long vd_user_options;/* misc user option flags */ int vd_cbr_au_count; int vd_dsr_au_count; + unsigned char vd_tlb_preload_count; }; /* @@ -353,6 +354,7 @@ struct gru_thread_state { struct gru_state *ts_gru; /* GRU where the context is loaded */ struct gru_mm_struct *ts_gms; /* asid & ioproc struct */ + unsigned char ts_tlb_preload_count; /* TLB preload pages */ unsigned long ts_cbr_map; /* map of allocated CBRs */ unsigned long ts_dsr_map; /* map of allocated DATA resources */ @@ -361,6 +363,8 @@ struct gru_thread_state { long ts_user_options;/* misc user option flags */ pid_t ts_tgid_owner; /* task that is using the context - for migration */ + short ts_user_blade_id;/* user selected blade */ + char ts_user_chiplet_id;/* user selected chiplet */ unsigned short ts_sizeavail; /* Pagesizes in use */ int ts_tsid; /* thread that owns the structure */ @@ -373,13 +377,15 @@ struct gru_thread_state { required for contest */ unsigned char ts_cbr_au_count;/* Number of CBR resources required for contest */ + char ts_cch_req_slice;/* CCH packet slice */ char ts_blade; /* If >= 0, migrate context if - ref from diferent blade */ + ref from different blade */ char ts_force_cch_reload; - char ts_force_unload;/* force context to be unloaded - after migration */ char ts_cbr_idx[GRU_CBR_AU];/* CBR numbers of each allocated CB */ + int ts_data_valid; /* Indicates if ts_gdata has + valid data */ + struct gru_gseg_statistics ustats; /* User statistics */ unsigned long ts_gdata[0]; /* save area for GRU data (CB, DS, CBE) */ }; @@ -411,6 +417,7 @@ struct gru_state { gru segments (64) */ unsigned short gs_gid; /* unique GRU number */ unsigned short gs_blade_id; /* blade of GRU */ + unsigned char gs_chiplet_id; /* blade chiplet of GRU */ unsigned char gs_tgh_local_shift; /* used to pick TGH for local flush */ unsigned char gs_tgh_first_remote; /* starting TGH# for @@ -442,6 +449,7 @@ struct gru_state { in use */ struct gru_thread_state *gs_gts[GRU_NUM_CCH]; /* GTS currently using the context */ + int gs_irq[GRU_NUM_TFM]; /* Interrupt irqs */ }; /* @@ -452,6 +460,14 @@ struct gru_blade_state { reserved cb */ void *kernel_dsr; /* First kernel reserved DSR */ + struct rw_semaphore bs_kgts_sema; /* lock for kgts */ + struct gru_thread_state *bs_kgts; /* GTS for kernel use */ + + /* ---- the following are used for managing kernel async GRU CBRs --- */ + int bs_async_dsr_bytes; /* DSRs for async */ + int bs_async_cbrs; /* CBRs AU for async */ + struct completion *bs_async_wq; + /* ---- the following are protected by the bs_lock spinlock ---- */ spinlock_t bs_lock; /* lock used for stealing contexts */ @@ -500,8 +516,7 @@ struct gru_blade_state { /* Scan all active GRUs in a GRU bitmap */ #define for_each_gru_in_bitmap(gid, map) \ - for ((gid) = find_first_bit((map), GRU_MAX_GRUS); (gid) < GRU_MAX_GRUS;\ - (gid)++, (gid) = find_next_bit((map), GRU_MAX_GRUS, (gid))) + for_each_set_bit((gid), (map), GRU_MAX_GRUS) /* Scan all active GRUs on a specific blade */ #define for_each_gru_on_blade(gru, nid, i) \ @@ -520,23 +535,17 @@ struct gru_blade_state { /* Scan each CBR whose bit is set in a TFM (or copy of) */ #define for_each_cbr_in_tfm(i, map) \ - for ((i) = find_first_bit(map, GRU_NUM_CBE); \ - (i) < GRU_NUM_CBE; \ - (i)++, (i) = find_next_bit(map, GRU_NUM_CBE, i)) + for_each_set_bit((i), (map), GRU_NUM_CBE) /* Scan each CBR in a CBR bitmap. Note: multiple CBRs in an allocation unit */ #define for_each_cbr_in_allocation_map(i, map, k) \ - for ((k) = find_first_bit(map, GRU_CBR_AU); (k) < GRU_CBR_AU; \ - (k) = find_next_bit(map, GRU_CBR_AU, (k) + 1)) \ + for_each_set_bit((k), (map), GRU_CBR_AU) \ for ((i) = (k)*GRU_CBR_AU_SIZE; \ (i) < ((k) + 1) * GRU_CBR_AU_SIZE; (i)++) /* Scan each DSR in a DSR bitmap. Note: multiple DSRs in an allocation unit */ #define for_each_dsr_in_allocation_map(i, map, k) \ - for ((k) = find_first_bit((const unsigned long *)map, GRU_DSR_AU);\ - (k) < GRU_DSR_AU; \ - (k) = find_next_bit((const unsigned long *)map, \ - GRU_DSR_AU, (k) + 1)) \ + for_each_set_bit((k), (const unsigned long *)(map), GRU_DSR_AU) \ for ((i) = (k) * GRU_DSR_AU_CL; \ (i) < ((k) + 1) * GRU_DSR_AU_CL; (i)++) @@ -552,6 +561,12 @@ struct gru_blade_state { /* Lock hierarchy checking enabled only in emulator */ +/* 0 = lock failed, 1 = locked */ +static inline int __trylock_handle(void *h) +{ + return !test_and_set_bit(1, h); +} + static inline void __lock_handle(void *h) { while (test_and_set_bit(1, h)) @@ -563,6 +578,11 @@ static inline void __unlock_handle(void *h) clear_bit(1, h); } +static inline int trylock_cch_handle(struct gru_context_configuration_handle *cch) +{ + return __trylock_handle(cch); +} + static inline void lock_cch_handle(struct gru_context_configuration_handle *cch) { __lock_handle(cch); @@ -584,12 +604,26 @@ static inline void unlock_tgh_handle(struct gru_tlb_global_handle *tgh) __unlock_handle(tgh); } +static inline int is_kernel_context(struct gru_thread_state *gts) +{ + return !gts->ts_mm; +} + +/* + * The following are for Nehelem-EX. A more general scheme is needed for + * future processors. + */ +#define UV_MAX_INT_CORES 8 +#define uv_cpu_socket_number(p) ((cpu_physical_id(p) >> 5) & 1) +#define uv_cpu_ht_number(p) (cpu_physical_id(p) & 1) +#define uv_cpu_core_number(p) (((cpu_physical_id(p) >> 2) & 4) | \ + ((cpu_physical_id(p) >> 1) & 3)) /*----------------------------------------------------------------------------- * Function prototypes & externs */ struct gru_unload_context_req; -extern struct vm_operations_struct gru_vm_ops; +extern const struct vm_operations_struct gru_vm_ops; extern struct device *grudev; extern struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, @@ -598,24 +632,35 @@ extern struct gru_thread_state *gru_find_thread_state(struct vm_area_struct *vma, int tsid); extern struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma, int tsid); +extern struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts); +extern void gru_load_context(struct gru_thread_state *gts); +extern void gru_steal_context(struct gru_thread_state *gts); extern void gru_unload_context(struct gru_thread_state *gts, int savestate); -extern int gru_update_cch(struct gru_thread_state *gts, int force_unload); +extern int gru_update_cch(struct gru_thread_state *gts); extern void gts_drop(struct gru_thread_state *gts); extern void gru_tgh_flush_init(struct gru_state *gru); -extern int gru_kservices_init(struct gru_state *gru); -extern void gru_kservices_exit(struct gru_state *gru); -extern irqreturn_t gru_intr(int irq, void *dev_id); +extern int gru_kservices_init(void); +extern void gru_kservices_exit(void); +extern irqreturn_t gru0_intr(int irq, void *dev_id); +extern irqreturn_t gru1_intr(int irq, void *dev_id); +extern irqreturn_t gru_intr_mblade(int irq, void *dev_id); +extern int gru_dump_chiplet_request(unsigned long arg); +extern long gru_get_gseg_statistics(unsigned long arg); extern int gru_handle_user_call_os(unsigned long address); extern int gru_user_flush_tlb(unsigned long arg); extern int gru_user_unload_context(unsigned long arg); extern int gru_get_exception_detail(unsigned long arg); -extern int gru_set_task_slice(long address); +extern int gru_set_context_option(unsigned long address); +extern void gru_check_context_placement(struct gru_thread_state *gts); extern int gru_cpu_fault_map_id(void); extern struct vm_area_struct *gru_find_vma(unsigned long vaddr); extern void gru_flush_all_tlb(struct gru_state *gru); extern int gru_proc_init(void); extern void gru_proc_exit(void); +extern struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, + int cbr_au_count, int dsr_au_count, + unsigned char tlb_preload_count, int options, int tsid); extern unsigned long gru_reserve_cb_resources(struct gru_state *gru, int cbr_au_count, char *cbmap); extern unsigned long gru_reserve_ds_resources(struct gru_state *gru, @@ -624,6 +669,7 @@ extern int gru_fault(struct vm_area_struct *, struct vm_fault *vmf); extern struct gru_mm_struct *gru_register_mmu_notifier(void); extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms); +extern int gru_ktest(unsigned long arg); extern void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start, unsigned long len); diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c index 1d125091f5e..2129274ef7a 100644 --- a/drivers/misc/sgi-gru/grutlbpurge.c +++ b/drivers/misc/sgi-gru/grutlbpurge.c @@ -184,8 +184,8 @@ void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start, STAT(flush_tlb_gru_tgh); asid = GRUASID(asid, start); gru_dbg(grudev, - " FLUSH gruid %d, asid 0x%x, num %ld, cbmap 0x%x\n", - gid, asid, num, asids->mt_ctxbitmap); + " FLUSH gruid %d, asid 0x%x, vaddr 0x%lx, vamask 0x%x, num %ld, cbmap 0x%x\n", + gid, asid, start, grupagesize, num, asids->mt_ctxbitmap); tgh = get_lock_tgh_handle(gru); tgh_invalidate(tgh, start, ~0, asid, grupagesize, 0, num - 1, asids->mt_ctxbitmap); @@ -280,11 +280,10 @@ static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm, const struct mmu_notifier_ops *ops) { struct mmu_notifier *mn, *gru_mn = NULL; - struct hlist_node *n; if (mm->mmu_notifier_mm) { rcu_read_lock(); - hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, + hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) if (mn->ops == ops) { gru_mn = mn; @@ -299,6 +298,7 @@ struct gru_mm_struct *gru_register_mmu_notifier(void) { struct gru_mm_struct *gms; struct mmu_notifier *mn; + int err; mn = mmu_find_ops(current->mm, &gru_mmuops); if (mn) { @@ -307,16 +307,22 @@ struct gru_mm_struct *gru_register_mmu_notifier(void) } else { gms = kzalloc(sizeof(*gms), GFP_KERNEL); if (gms) { + STAT(gms_alloc); spin_lock_init(&gms->ms_asid_lock); gms->ms_notifier.ops = &gru_mmuops; atomic_set(&gms->ms_refcnt, 1); init_waitqueue_head(&gms->ms_wait_queue); - __mmu_notifier_register(&gms->ms_notifier, current->mm); + err = __mmu_notifier_register(&gms->ms_notifier, current->mm); + if (err) + goto error; } } gru_dbg(grudev, "gms %p, refcnt %d\n", gms, atomic_read(&gms->ms_refcnt)); return gms; +error: + kfree(gms); + return ERR_PTR(err); } void gru_drop_mmu_notifier(struct gru_mm_struct *gms) @@ -327,6 +333,7 @@ void gru_drop_mmu_notifier(struct gru_mm_struct *gms) if (!gms->ms_released) mmu_notifier_unregister(&gms->ms_notifier, current->mm); kfree(gms); + STAT(gms_free); } } |
