diff options
author | Linus Torvalds <torvalds@g5.osdl.org> | 2005-10-28 21:09:26 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-10-28 21:09:26 -0700 |
commit | 8a212ab6b8a4ccc6f3c3d1beba5f92655c576404 (patch) | |
tree | 525271129ff9c692defdd20566f1f7203b18ff24 /arch/ia64/sn | |
parent | 1f419cadff55f548e7356ffebdb9e1b5a8c22275 (diff) | |
parent | 0e1f60609258e18ec0a0477c646101212822d387 (diff) |
Merge branch 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6
Diffstat (limited to 'arch/ia64/sn')
-rw-r--r-- | arch/ia64/sn/kernel/bte.c | 2 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/io_init.c | 4 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/setup.c | 160 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/sn2/sn2_smp.c | 31 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/sn2/sn_hwperf.c | 4 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/tiocx.c | 67 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/xpc.h | 366 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/xpc_channel.c | 329 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/xpc_main.c | 330 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/xpc_partition.c | 475 | ||||
-rw-r--r-- | arch/ia64/sn/pci/pci_dma.c | 46 | ||||
-rw-r--r-- | arch/ia64/sn/pci/pcibr/pcibr_reg.c | 59 | ||||
-rw-r--r-- | arch/ia64/sn/pci/tioca_provider.c | 32 | ||||
-rw-r--r-- | arch/ia64/sn/pci/tioce_provider.c | 30 |
14 files changed, 1349 insertions, 586 deletions
diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c index 45854c637e9..d71f4de44f7 100644 --- a/arch/ia64/sn/kernel/bte.c +++ b/arch/ia64/sn/kernel/bte.c @@ -87,7 +87,7 @@ bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification) unsigned long irq_flags; unsigned long itc_end = 0; int nasid_to_try[MAX_NODES_TO_TRY]; - int my_nasid = get_nasid(); + int my_nasid = cpuid_to_nasid(raw_smp_processor_id()); int bte_if_index, nasid_index; int bte_first, btes_per_node = BTES_PER_NODE; diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index 906622d9f93..b4f5053f5e1 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -22,8 +22,6 @@ #include "xtalk/hubdev.h" #include "xtalk/xwidgetdev.h" -nasid_t master_nasid = INVALID_NASID; /* Partition Master */ - static struct list_head sn_sysdata_list; /* sysdata list struct */ @@ -165,7 +163,7 @@ static void sn_fixup_ionodes(void) * Get SGI Specific HUB chipset information. * Inform Prom that this kernel can support domain bus numbering. */ - for (i = 0; i < numionodes; i++) { + for (i = 0; i < num_cnodes; i++) { hubdev = (struct hubdev_info *)(NODEPDA(i)->pdinfo); nasid = cnodeid_to_nasid(i); hubdev->max_segment_number = 0xffffffff; diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index 6f8c5883716..0fb579ef18c 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -59,8 +59,6 @@ DEFINE_PER_CPU(struct pda_s, pda_percpu); #define MAX_PHYS_MEMORY (1UL << IA64_MAX_PHYS_BITS) /* Max physical address supported */ -lboard_t *root_lboard[MAX_COMPACT_NODES]; - extern void bte_init_node(nodepda_t *, cnodeid_t); extern void sn_timer_init(void); @@ -97,15 +95,15 @@ u8 sn_region_size; EXPORT_SYMBOL(sn_region_size); int sn_prom_type; /* 0=hardware, 1=medusa/realprom, 2=medusa/fakeprom */ -short physical_node_map[MAX_PHYSNODE_ID]; +short physical_node_map[MAX_NUMALINK_NODES]; static unsigned long sn_prom_features[MAX_PROM_FEATURE_SETS]; EXPORT_SYMBOL(physical_node_map); -int numionodes; +int num_cnodes; static void sn_init_pdas(char **); -static void scan_for_ionodes(void); +static void build_cnode_tables(void); static nodepda_t *nodepdaindr[MAX_COMPACT_NODES]; @@ -140,19 +138,6 @@ char drive_info[4 * 16]; #endif /* - * Get nasid of current cpu early in boot before nodepda is initialized - */ -static int -boot_get_nasid(void) -{ - int nasid; - - if (ia64_sn_get_sapic_info(get_sapicid(), &nasid, NULL, NULL)) - BUG(); - return nasid; -} - -/* * This routine can only be used during init, since * smp_boot_data is an init data structure. * We have to use smp_boot_data.cpu_phys_id to find @@ -223,7 +208,6 @@ void __init early_sn_setup(void) } extern int platform_intr_list[]; -extern nasid_t master_nasid; static int __initdata shub_1_1_found = 0; /* @@ -269,7 +253,6 @@ static void __init sn_check_for_wars(void) void __init sn_setup(char **cmdline_p) { long status, ticks_per_sec, drift; - int pxm; u32 version = sn_sal_rev(); extern void sn_cpu_init(void); @@ -300,11 +283,10 @@ void __init sn_setup(char **cmdline_p) MAX_DMA_ADDRESS = PAGE_OFFSET + MAX_PHYS_MEMORY; - memset(physical_node_map, -1, sizeof(physical_node_map)); - for (pxm = 0; pxm < MAX_PXM_DOMAINS; pxm++) - if (pxm_to_nid_map[pxm] != -1) - physical_node_map[pxm_to_nasid(pxm)] = - pxm_to_nid_map[pxm]; + /* + * Build the tables for managing cnodes. + */ + build_cnode_tables(); /* * Old PROMs do not provide an ACPI FADT. Disable legacy keyboard @@ -319,8 +301,6 @@ void __init sn_setup(char **cmdline_p) printk("SGI SAL version %x.%02x\n", version >> 8, version & 0x00FF); - master_nasid = boot_get_nasid(); - status = ia64_sal_freq_base(SAL_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec, &drift); @@ -378,15 +358,6 @@ static void __init sn_init_pdas(char **cmdline_p) { cnodeid_t cnode; - memset(sn_cnodeid_to_nasid, -1, - sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid))); - for_each_online_node(cnode) - sn_cnodeid_to_nasid[cnode] = - pxm_to_nasid(nid_to_pxm_map[cnode]); - - numionodes = num_online_nodes(); - scan_for_ionodes(); - /* * Allocate & initalize the nodepda for each node. */ @@ -402,7 +373,7 @@ static void __init sn_init_pdas(char **cmdline_p) /* * Allocate & initialize nodepda for TIOs. For now, put them on node 0. */ - for (cnode = num_online_nodes(); cnode < numionodes; cnode++) { + for (cnode = num_online_nodes(); cnode < num_cnodes; cnode++) { nodepdaindr[cnode] = alloc_bootmem_node(NODE_DATA(0), sizeof(nodepda_t)); memset(nodepdaindr[cnode], 0, sizeof(nodepda_t)); @@ -411,7 +382,7 @@ static void __init sn_init_pdas(char **cmdline_p) /* * Now copy the array of nodepda pointers to each nodepda. */ - for (cnode = 0; cnode < numionodes; cnode++) + for (cnode = 0; cnode < num_cnodes; cnode++) memcpy(nodepdaindr[cnode]->pernode_pdaindr, nodepdaindr, sizeof(nodepdaindr)); @@ -428,7 +399,7 @@ static void __init sn_init_pdas(char **cmdline_p) * Initialize the per node hubdev. This includes IO Nodes and * headless/memless nodes. */ - for (cnode = 0; cnode < numionodes; cnode++) { + for (cnode = 0; cnode < num_cnodes; cnode++) { hubdev_init_node(nodepdaindr[cnode], cnode); } } @@ -553,87 +524,58 @@ void __init sn_cpu_init(void) } /* - * Scan klconfig for ionodes. Add the nasids to the - * physical_node_map and the pda and increment numionodes. + * Build tables for converting between NASIDs and cnodes. */ +static inline int __init board_needs_cnode(int type) +{ + return (type == KLTYPE_SNIA || type == KLTYPE_TIO); +} -static void __init scan_for_ionodes(void) +void __init build_cnode_tables(void) { - int nasid = 0; + int nasid; + int node; lboard_t *brd; - /* fakeprom does not support klgraph */ - if (IS_RUNNING_ON_FAKE_PROM()) - return; - - /* Setup ionodes with memory */ - for (nasid = 0; nasid < MAX_PHYSNODE_ID; nasid += 2) { - char *klgraph_header; - cnodeid_t cnodeid; - - if (physical_node_map[nasid] == -1) - continue; + memset(physical_node_map, -1, sizeof(physical_node_map)); + memset(sn_cnodeid_to_nasid, -1, + sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid))); - cnodeid = -1; - klgraph_header = __va(ia64_sn_get_klconfig_addr(nasid)); - if (!klgraph_header) { - BUG(); /* All nodes must have klconfig tables! */ - } - cnodeid = nasid_to_cnodeid(nasid); - root_lboard[cnodeid] = (lboard_t *) - NODE_OFFSET_TO_LBOARD((nasid), - ((kl_config_hdr_t - *) (klgraph_header))-> - ch_board_info); + /* + * First populate the tables with C/M bricks. This ensures that + * cnode == node for all C & M bricks. + */ + for_each_online_node(node) { + nasid = pxm_to_nasid(nid_to_pxm_map[node]); + sn_cnodeid_to_nasid[node] = nasid; + physical_node_map[nasid] = node; } - /* Scan headless/memless IO Nodes. */ - for (nasid = 0; nasid < MAX_PHYSNODE_ID; nasid += 2) { - /* if there's no nasid, don't try to read the klconfig on the node */ - if (physical_node_map[nasid] == -1) - continue; - brd = find_lboard_any((lboard_t *) - root_lboard[nasid_to_cnodeid(nasid)], - KLTYPE_SNIA); - if (brd) { - brd = KLCF_NEXT_ANY(brd); /* Skip this node's lboard */ - if (!brd) - continue; - } - - brd = find_lboard_any(brd, KLTYPE_SNIA); + /* + * num_cnodes is total number of C/M/TIO bricks. Because of the 256 node + * limit on the number of nodes, we can't use the generic node numbers + * for this. Note that num_cnodes is incremented below as TIOs or + * headless/memoryless nodes are discovered. + */ + num_cnodes = num_online_nodes(); - while (brd) { - sn_cnodeid_to_nasid[numionodes] = brd->brd_nasid; - physical_node_map[brd->brd_nasid] = numionodes; - root_lboard[numionodes] = brd; - numionodes++; - brd = KLCF_NEXT_ANY(brd); - if (!brd) - break; - - brd = find_lboard_any(brd, KLTYPE_SNIA); - } - } + /* fakeprom does not support klgraph */ + if (IS_RUNNING_ON_FAKE_PROM()) + return; - /* Scan for TIO nodes. */ - for (nasid = 0; nasid < MAX_PHYSNODE_ID; nasid += 2) { - /* if there's no nasid, don't try to read the klconfig on the node */ - if (physical_node_map[nasid] == -1) - continue; - brd = find_lboard_any((lboard_t *) - root_lboard[nasid_to_cnodeid(nasid)], - KLTYPE_TIO); + /* Find TIOs & headless/memoryless nodes and add them to the tables */ + for_each_online_node(node) { + kl_config_hdr_t *klgraph_header; + nasid = cnodeid_to_nasid(node); + if ((klgraph_header = ia64_sn_get_klconfig_addr(nasid)) == NULL) + BUG(); + brd = NODE_OFFSET_TO_LBOARD(nasid, klgraph_header->ch_board_info); while (brd) { - sn_cnodeid_to_nasid[numionodes] = brd->brd_nasid; - physical_node_map[brd->brd_nasid] = numionodes; - root_lboard[numionodes] = brd; - numionodes++; - brd = KLCF_NEXT_ANY(brd); - if (!brd) - break; - - brd = find_lboard_any(brd, KLTYPE_TIO); + if (board_needs_cnode(brd->brd_type) && physical_node_map[brd->brd_nasid] < 0) { + sn_cnodeid_to_nasid[num_cnodes] = brd->brd_nasid; + physical_node_map[brd->brd_nasid] = num_cnodes++; + } + brd = find_lboard_next(brd); } } } diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c index 0a4ee50c302..49b530c39a4 100644 --- a/arch/ia64/sn/kernel/sn2/sn2_smp.c +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c @@ -177,6 +177,7 @@ void sn_tlb_migrate_finish(struct mm_struct *mm) /** * sn2_global_tlb_purge - globally purge translation cache of virtual address range + * @mm: mm_struct containing virtual address range * @start: start of virtual address range * @end: end of virtual address range * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc)) @@ -188,21 +189,22 @@ void sn_tlb_migrate_finish(struct mm_struct *mm) * - cpu_vm_mask is a bit mask that indicates which cpus have loaded the context. * - cpu_vm_mask is converted into a nodemask of the nodes containing the * cpus in cpu_vm_mask. - * - if only one bit is set in cpu_vm_mask & it is the current cpu, - * then only the local TLB needs to be flushed. This flushing can be done - * using ptc.l. This is the common case & avoids the global spinlock. + * - if only one bit is set in cpu_vm_mask & it is the current cpu & the + * process is purging its own virtual address range, then only the + * local TLB needs to be flushed. This flushing can be done using + * ptc.l. This is the common case & avoids the global spinlock. * - if multiple cpus have loaded the context, then flushing has to be * done with ptc.g/MMRs under protection of the global ptc_lock. */ void -sn2_global_tlb_purge(unsigned long start, unsigned long end, - unsigned long nbits) +sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, + unsigned long end, unsigned long nbits) { int i, opt, shub1, cnode, mynasid, cpu, lcpu = 0, nasid, flushed = 0; + int mymm = (mm == current->active_mm); volatile unsigned long *ptc0, *ptc1; - unsigned long itc, itc2, flags, data0 = 0, data1 = 0; - struct mm_struct *mm = current->active_mm; + unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value; short nasids[MAX_NUMNODES], nix; nodemask_t nodes_flushed; @@ -216,9 +218,12 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, i++; } + if (i == 0) + return; + preempt_disable(); - if (likely(i == 1 && lcpu == smp_processor_id())) { + if (likely(i == 1 && lcpu == smp_processor_id() && mymm)) { do { ia64_ptcl(start, nbits << 2); start += (1UL << nbits); @@ -229,7 +234,7 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, return; } - if (atomic_read(&mm->mm_users) == 1) { + if (atomic_read(&mm->mm_users) == 1 && mymm) { flush_tlb_mm(mm); __get_cpu_var(ptcstats).change_rid++; preempt_enable(); @@ -241,11 +246,13 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, for_each_node_mask(cnode, nodes_flushed) nasids[nix++] = cnodeid_to_nasid(cnode); + rr_value = (mm->context << 3) | REGION_NUMBER(start); + shub1 = is_shub1(); if (shub1) { data0 = (1UL << SH1_PTC_0_A_SHFT) | (nbits << SH1_PTC_0_PS_SHFT) | - ((ia64_get_rr(start) >> 8) << SH1_PTC_0_RID_SHFT) | + (rr_value << SH1_PTC_0_RID_SHFT) | (1UL << SH1_PTC_0_START_SHFT); ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_0); ptc1 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_1); @@ -254,7 +261,7 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, (nbits << SH2_PTC_PS_SHFT) | (1UL << SH2_PTC_START_SHFT); ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH2_PTC + - ((ia64_get_rr(start) >> 8) << SH2_PTC_RID_SHFT) ); + (rr_value << SH2_PTC_RID_SHFT)); ptc1 = NULL; } @@ -275,7 +282,7 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK); for (i = 0; i < nix; i++) { nasid = nasids[i]; - if ((!(sn2_ptctest & 3)) && unlikely(nasid == mynasid)) { + if ((!(sn2_ptctest & 3)) && unlikely(nasid == mynasid && mymm)) { ia64_ptcga(start, nbits << 2); ia64_srlz_i(); } else { diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index 0513aacac8c..6c6fbca3229 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -476,8 +476,8 @@ static int sn_topology_show(struct seq_file *s, void *d) for_each_online_cpu(j) { seq_printf(s, j ? ":%d" : ", dist %d", node_distance( - cpuid_to_cnodeid(i), - cpuid_to_cnodeid(j))); + cpu_to_node(i), + cpu_to_node(j))); } seq_putc(s, '\n'); } diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c index b45db5133f5..0d8592a745a 100644 --- a/arch/ia64/sn/kernel/tiocx.c +++ b/arch/ia64/sn/kernel/tiocx.c @@ -183,11 +183,12 @@ int cx_driver_unregister(struct cx_drv *cx_driver) * @part_num: device's part number * @mfg_num: device's manufacturer number * @hubdev: hub info associated with this device + * @bt: board type of the device * */ int cx_device_register(nasid_t nasid, int part_num, int mfg_num, - struct hubdev_info *hubdev) + struct hubdev_info *hubdev, int bt) { struct cx_dev *cx_dev; @@ -200,6 +201,7 @@ cx_device_register(nasid_t nasid, int part_num, int mfg_num, cx_dev->cx_id.mfg_num = mfg_num; cx_dev->cx_id.nasid = nasid; cx_dev->hubdev = hubdev; + cx_dev->bt = bt; cx_dev->dev.parent = NULL; cx_dev->dev.bus = &tiocx_bus_type; @@ -238,7 +240,8 @@ static int cx_device_reload(struct cx_dev *cx_dev) { cx_device_unregister(cx_dev); return cx_device_register(cx_dev->cx_id.nasid, cx_dev->cx_id.part_num, - cx_dev->cx_id.mfg_num, cx_dev->hubdev); + cx_dev->cx_id.mfg_num, cx_dev->hubdev, + cx_dev->bt); } static inline uint64_t tiocx_intr_alloc(nasid_t nasid, int widget, @@ -365,26 +368,20 @@ static void tio_corelet_reset(nasid_t nasid, int corelet) udelay(2000); } -static int tiocx_btchar_get(int nasid) +static int is_fpga_tio(int nasid, int *bt) { - moduleid_t module_id; - geoid_t geoid; - int cnodeid; - - cnodeid = nasid_to_cnodeid(nasid); - geoid = cnodeid_get_geoid(cnodeid); - module_id = geo_module(geoid); - return MODULE_GET_BTCHAR(module_id); -} + int ioboard_type; -static int is_fpga_brick(int nasid) -{ - switch (tiocx_btchar_get(nasid)) { + ioboard_type = ia64_sn_sysctl_ioboard_get(nasid); + + switch (ioboard_type) { case L1_BRICKTYPE_SA: case L1_BRICKTYPE_ATHENA: - case L1_BRICKTYPE_DAYTONA: + case L1_BOARDTYPE_DAYTONA: + *bt = ioboard_type; return 1; } + return 0; } @@ -407,16 +404,22 @@ static int tiocx_reload(struct cx_dev *cx_dev) if (bitstream_loaded(nasid)) { uint64_t cx_id; - - cx_id = - *(volatile uint64_t *)(TIO_SWIN_BASE(nasid, TIOCX_CORELET) + + int rv; + + rv = ia64_sn_sysctl_tio_clock_reset(nasid); + if (rv) { + printk(KERN_ALERT "CX port JTAG reset failed.\n"); + } else { + cx_id = *(volatile uint64_t *) + (TIO_SWIN_BASE(nasid, TIOCX_CORELET) + WIDGET_ID); - part_num = XWIDGET_PART_NUM(cx_id); - mfg_num = XWIDGET_MFG_NUM(cx_id); - DBG("part= 0x%x, mfg= 0x%x\n", part_num, mfg_num); - /* just ignore it if it's a CE */ - if (part_num == TIO_CE_ASIC_PARTNUM) - return 0; + part_num = XWIDGET_PART_NUM(cx_id); + mfg_num = XWIDGET_MFG_NUM(cx_id); + DBG("part= 0x%x, mfg= 0x%x\n", part_num, mfg_num); + /* just ignore it if it's a CE */ + if (part_num == TIO_CE_ASIC_PARTNUM) + return 0; + } } cx_dev->cx_id.part_num = part_num; @@ -436,10 +439,10 @@ static ssize_t show_cxdev_control(struct device *dev, struct device_attribute *a { struct cx_dev *cx_dev = to_cx_dev(dev); - return sprintf(buf, "0x%x 0x%x 0x%x %d\n", + return sprintf(buf, "0x%x 0x%x 0x%x 0x%x\n", cx_dev->cx_id.nasid, cx_dev->cx_id.part_num, cx_dev->cx_id.mfg_num, - tiocx_btchar_get(cx_dev->cx_id.nasid)); + cx_dev->bt); } static ssize_t store_cxdev_control(struct device *dev, struct device_attribute *attr, const char *buf, @@ -486,13 +489,13 @@ static int __init tiocx_init(void) bus_register(&tiocx_bus_type); - for (cnodeid = 0; cnodeid < MAX_COMPACT_NODES; cnodeid++) { + for (cnodeid = 0; cnodeid < num_cnodes; cnodeid++) { nasid_t nasid; + int bt; - if ((nasid = cnodeid_to_nasid(cnodeid)) < 0) - break; /* No more nasids .. bail out of loop */ + nasid = cnodeid_to_nasid(cnodeid); - if ((nasid & 0x1) && is_fpga_brick(nasid)) { + if ((nasid & 0x1) && is_fpga_tio(nasid, &bt)) { struct hubdev_info *hubdev; struct xwidget_info *widgetp; @@ -512,7 +515,7 @@ static int __init tiocx_init(void) if (cx_device_register (nasid, widgetp->xwi_hwid.part_num, - widgetp->xwi_hwid.mfg_num, hubdev) < 0) + widgetp->xwi_hwid.mfg_num, hubdev, bt) < 0) return -ENXIO; else found_tiocx_device++; diff --git a/arch/ia64/sn/kernel/xpc.h b/arch/ia64/sn/kernel/xpc.h index e5f5a4e51f7..fbcedc7c27f 100644 --- a/arch/ia64/sn/kernel/xpc.h +++ b/arch/ia64/sn/kernel/xpc.h @@ -57,7 +57,7 @@ #define XPC_NASID_FROM_W_B(_w, _b) (((_w) * 64 + (_b)) * 2) #define XPC_HB_DEFAULT_INTERVAL 5 /* incr HB every x secs */ -#define XPC_HB_CHECK_DEFAULT_TIMEOUT 20 /* check HB every x secs */ +#define XPC_HB_CHECK_DEFAULT_INTERVAL 20 /* check HB every x secs */ /* define the process name of HB checker and the CPU it is pinned to */ #define XPC_HB_CHECK_THREAD_NAME "xpc_hb" @@ -67,34 +67,82 @@ #define XPC_DISCOVERY_THREAD_NAME "xpc_discovery" -#define XPC_HB_ALLOWED(_p, _v) ((_v)->heartbeating_to_mask & (1UL << (_p))) -#define XPC_ALLOW_HB(_p, _v) (_v)->heartbeating_to_mask |= (1UL << (_p)) -#define XPC_DISALLOW_HB(_p, _v) (_v)->heartbeating_to_mask &= (~(1UL << (_p))) - - /* - * Reserved Page provided by SAL. + * the reserved page + * + * SAL reserves one page of memory per partition for XPC. Though a full page + * in length (16384 bytes), its starting address is not page aligned, but it + * is cacheline aligned. The reserved page consists of the following: + * + * reserved page header + * + * The first cacheline of the reserved page contains the header + * (struct xpc_rsvd_page). Before SAL initialization has completed, + * SAL has set up the following fields of the reserved page header: + * SAL_signature, SAL_version, partid, and nasids_size. The other + * fields are set up by XPC. (xpc_rsvd_page points to the local + * partition's reserved page.) * - * SAL provides one page per partition of reserved memory. When SAL - * initialization is complete, SAL_signature, SAL_version, partid, - * part_nasids, and mach_nasids are set. + * part_nasids mask + * mach_nasids mask + * + * SAL also sets up two bitmaps (or masks), one that reflects the actual + * nasids in this partition (part_nasids), and the other that reflects + * the actual nasids in the entire machine (mach_nasids). We're only + * interested in the even numbered nasids (which contain the processors + * and/or memory), so we only need half as many bits to represent the + * nasids. The part_nasids mask is located starting at the first cacheline + * following the reserved page header. The mach_nasids mask follows right + * after the part_nasids mask. The size in bytes of each mask is reflected + * by the reserved page header field 'nasids_size'. (Local partition's + * mask pointers are xpc_part_nasids and xpc_mach_nasids.) + * + * vars + * vars part + * + * Immediately following the mach_nasids mask are the XPC variables + * required by other partitions. First are those that are generic to all + * partitions (vars), followed on the next available cacheline by those + * which are partition specific (vars part). These are setup by XPC. + * (Local partition's vars pointers are xpc_vars and xpc_vars_part.) * * Note: Until vars_pa is set, the partition XPC code has not been initialized. */ struct xpc_rsvd_page { - u64 SAL_signature; /* SAL unique signature */ - u64 SAL_version; /* SAL specified version */ - u8 partid; /* partition ID from SAL */ + u64 SAL_signature; /* SAL: unique signature */ + u64 SAL_version; /* SAL: version */ + u8 partid; /* SAL: partition ID */ u8 version; - u8 pad[6]; /* pad to u64 align */ + u8 pad1[6]; /* align to next u64 in cacheline */ volatile u64 vars_pa; - u64 part_nasids[XP_NASID_MASK_WORDS] ____cacheline_aligned; - u64 mach_nasids[XP_NASID_MASK_WORDS] ____cacheline_aligned; + struct timespec stamp; /* time when reserved page was setup by XPC */ + u64 pad2[9]; /* align to last u64 in cacheline */ + u64 nasids_size; /* SAL: size of each nasid mask in bytes */ }; -#define XPC_RP_VERSION _XPC_VERSION(1,0) /* version 1.0 of the reserved page */ -#define XPC_RSVD_PAGE_ALIGNED_SIZE \ - (L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page))) +#define XPC_RP_VERSION _XPC_VERSION(1,1) /* version 1.1 of the reserved page */ + +#define XPC_SUPPORTS_RP_STAMP(_version) \ + (_version >= _XPC_VERSION(1,1)) + +/* + * compare stamps - the return value is: + * + * < 0, if stamp1 < stamp2 + * = 0, if stamp1 == stamp2 + * > 0, if stamp1 > stamp2 + */ +static inline int +xpc_compare_stamps(struct timespec *stamp1, struct timespec *stamp2) +{ + int ret; + + + if ((ret = stamp1->tv_sec - stamp2->tv_sec) == 0) { + ret = stamp1->tv_nsec - stamp2->tv_nsec; + } + return ret; +} /* @@ -121,11 +169,58 @@ struct xpc_vars { u64 vars_part_pa; u64 amos_page_pa; /* paddr of page of AMOs from MSPEC driver */ AMO_t *amos_page; /* vaddr of page of AMOs from MSPEC driver */ - AMO_t *act_amos; /* pointer to the first activation AMO */ }; -#define XPC_V_VERSION _XPC_VERSION(3,0) /* version 3.0 of the cross vars */ -#define XPC_VARS_ALIGNED_SIZE (L1_CACHE_ALIGN(sizeof(struct xpc_vars))) +#define XPC_V_VERSION _XPC_VERSION(3,1) /* version 3.1 of the cross vars */ + +#define XPC_SUPPORTS_DISENGAGE_REQUEST(_version) \ + (_version >= _XPC_VERSION(3,1)) + + +static inline int +xpc_hb_allowed(partid_t partid, struct xpc_vars *vars) +{ + return ((vars->heartbeating_to_mask & (1UL << partid)) != 0); +} + +static inline void +xpc_allow_hb(partid_t partid, struct xpc_vars *vars) +{ + u64 old_mask, new_mask; + + do { + old_mask = vars->heartbeating_to_mask; + new_mask = (old_mask | (1UL << partid)); + } while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) != + old_mask); +} + +static inline void +xpc_disallow_hb(partid_t partid, struct xpc_vars *vars) +{ + u64 old_mask, new_mask; + + do { + old_mask = vars->heartbeating_to_mask; + new_mask = (old_mask & ~(1UL << partid)); + } while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) != + old_mask); +} + + +/* + * The AMOs page consists of a number of AMO variables which are divided into + * four groups, The first two groups are used to identify an IRQ's sender. + * These two groups consist of 64 and 128 AMO variables respectively. The last + * two groups, consisting of just one AMO variable each, are used to identify + * the remote partitions that are currently engaged (from the viewpoint of + * the XPC running on the remote partition). + */ +#define XPC_NOTIFY_IRQ_AMOS 0 +#define XPC_ACTIVATE_IRQ_AMOS (XPC_NOTIFY_IRQ_AMOS + XP_MAX_PARTITIONS) +#define XPC_ENGAGED_PARTITIONS_AMO (XPC_ACTIVATE_IRQ_AMOS + XP_NASID_MASK_WORDS) +#define XPC_DISENGAGE_REQUEST_AMO (XPC_ENGAGED_PARTITIONS_AMO + 1) + /* * The following structure describes the per partition specific variables. @@ -165,6 +260,16 @@ struct xpc_vars_part { #define XPC_VP_MAGIC2 0x0073726176435058L /* 'XPCvars\0'L (little endian) */ +/* the reserved page sizes and offsets */ + +#define XPC_RP_HEADER_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page)) +#define XPC_RP_VARS_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_vars)) + +#define XPC_RP_PART_NASIDS(_rp) (u64 *) ((u8 *) _rp + XPC_RP_HEADER_SIZE) +#define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + xp_nasid_mask_words) +#define XPC_RP_VARS(_rp) ((struct xpc_vars *) XPC_RP_MACH_NASIDS(_rp) + xp_nasid_mask_words) +#define XPC_RP_VARS_PART(_rp) (struct xpc_vars_part *) ((u8 *) XPC_RP_VARS(rp) + XPC_RP_VARS_SIZE) + /* * Functions registered by add_timer() or called by kernel_thread() only @@ -349,6 +454,9 @@ struct xpc_channel { atomic_t n_on_msg_allocate_wq; /* #on msg allocation wait queue */ wait_queue_head_t msg_allocate_wq; /* msg allocation wait queue */ + u8 delayed_IPI_flags; /* IPI flags received, but delayed */ + /* action until channel disconnected */ + /* queue of msg senders who want to be notified when msg received */ atomic_t n_to_notify; /* #of msg senders to notify */ @@ -358,7 +466,7 @@ struct xpc_channel { void *key; /* pointer to user's key */ struct semaphore msg_to_pull_sema; /* next msg to pull serialization */ - struct semaphore teardown_sema; /* wait for teardown completion */ + struct semaphore wdisconnect_sema; /* wait for channel disconnect */ struct xpc_openclose_args *local_openclose_args; /* args passed on */ /* opening or closing of channel */ @@ -410,6 +518,8 @@ struct xpc_channel { #define XPC_C_DISCONNECTED 0x00002000 /* channel is disconnected */ #define XPC_C_DISCONNECTING 0x00004000 /* channel is being disconnected */ +#define XPC_C_DISCONNECTCALLOUT 0x00008000 /* chan disconnected callout made */ +#define XPC_C_WDISCONNECT 0x00010000 /* waiting for channel disconnect */ @@ -422,6 +532,8 @@ struct xpc_partition { /* XPC HB infrastructure */ + u8 remote_rp_version; /* version# of partition's rsvd pg */ + struct timespec remote_rp_stamp;/* time when rsvd pg was initialized */ u64 remote_rp_pa; /* phys addr of partition's rsvd pg */ u64 remote_vars_pa; /* phys addr of partition's vars */ u64 remote_vars_part_pa; /* phys addr of partition's vars part */ @@ -432,14 +544,18 @@ struct xpc_partition { u32 act_IRQ_rcvd; /* IRQs since activation */ spinlock_t act_lock; /* protect updating of act_state */ u8 act_state; /* from XPC HB viewpoint */ + u8 remote_vars_version; /* version# of partition's vars */ enum xpc_retval reason; /* reason partition is deactivating */ int reason_line; /* line# deactivation initiated from */ int reactivate_nasid; /* nasid in partition to reactivate */ + unsigned long disengage_request_timeout; /* timeout in jiffies */ + struct timer_list disengage_request_timer; + /* XPC infrastructure referencing and teardown control */ - volatile u8 setup_state; /* infrastructure setup state */ + volatile u8 setup_state; /* infrastructure setup state */ wait_queue_head_t teardown_wq; /* kthread waiting to teardown infra */ atomic_t references; /* #of references to infrastructure */ @@ -454,6 +570,7 @@ struct xpc_partition { u8 nchannels; /* #of defined channels supported */ atomic_t nchannels_active; /* #of channels that are not DISCONNECTED */ + atomic_t nchannels_engaged;/* #of channels engaged with remote part */ struct xpc_channel *channels;/* array of channel structures */ void *local_GPs_base; /* base address of kmalloc'd space */ @@ -518,6 +635,7 @@ struct xpc_partition { #define XPC_P_TORNDOWN 0x03 /* infrastructure is torndown */ + /* * struct xpc_partition IPI_timer #of seconds to wait before checking for * dropped IPIs. These occur whenever an IPI amo write doesn't complete until @@ -526,6 +644,13 @@ struct xpc_partition { #define XPC_P_DROPPED_IPI_WAIT (0.25 * HZ) +/* number of seconds to wait for other partitions to disengage */ +#define XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT 90 + +/* interval in seconds to print 'waiting disengagement' messages */ +#define XPC_DISENGAGE_PRINTMSG_INTERVAL 10 + + #define XPC_PARTID(_p) ((partid_t) ((_p) - &xpc_partitions[0])) @@ -534,24 +659,20 @@ struct xpc_partition { extern struct xpc_registration xpc_registrations[]; -/* >>> found in xpc_main.c only */ +/* found in xpc_main.c */ extern struct device *xpc_part; extern struct device *xpc_chan; +extern int xpc_disengage_request_timelimit; extern irqreturn_t xpc_notify_IRQ_handler(int, void *, struct pt_regs *); extern void xpc_dropped_IPI_check(struct xpc_partition *); +extern void xpc_activate_partition(struct xpc_partition *); extern void xpc_activate_kthreads(struct xpc_channel *, int); extern void xpc_create_kthreads(struct xpc_channel *, int); extern void xpc_disconnect_wait(int); -/* found in xpc_main.c and efi-xpc.c */ -extern void xpc_activate_partition(struct xpc_partition *); - - /* found in xpc_partition.c */ extern int xpc_exiting; -extern int xpc_hb_interval; -extern int xpc_hb_check_interval; extern struct xpc_vars *xpc_vars; extern struct xpc_rsvd_page *xpc_rsvd_page; extern struct xpc_vars_part *xpc_vars_part; @@ -561,6 +682,7 @@ extern struct xpc_rsvd_page *xpc_rsvd_page_init(void); extern void xpc_allow_IPI_ops(void); extern void xpc_restrict_IPI_ops(void); extern int xpc_identify_act_IRQ_sender(void); +extern int xpc_partition_disengaged(struct xpc_partition *); extern enum xpc_retval xpc_mark_partition_active(struct xpc_partition *); extern void xpc_mark_partition_inactive(struct xpc_partition *); extern void xpc_discovery(void); @@ -585,8 +707,8 @@ extern void xpc_connected_callout(struct xpc_channel *); extern void xpc_deliver_msg(struct xpc_channel *); extern void xpc_disconnect_channel(const int, struct xpc_channel *, enum xpc_retval, unsigned long *); -extern void xpc_disconnected_callout(struct xpc_channel *); -extern void xpc_partition_down(struct xpc_partition *, enum xpc_retval); +extern void xpc_disconnecting_callout(struct xpc_channel *); +extern void xpc_partition_going_down(struct xpc_partition *, enum xpc_retval); extern void xpc_teardown_infrastructure(struct xpc_partition *); @@ -674,6 +796,157 @@ xpc_part_ref(struct xpc_partition *part) /* + * This next set of inlines are used to keep track of when a partition is + * potentially engaged in accessing memory belonging to another partition. + */ + +static inline void +xpc_mark_partition_engaged(struct xpc_partition *part) +{ + unsigned long irq_flags; + AMO_t *amo = (AMO_t *) __va(part->remote |