Merge branch 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6

author: Linus Torvalds <torvalds@g5.osdl.org> 2005-10-28 21:09:26 -0700
committer: Linus Torvalds <torvalds@g5.osdl.org> 2005-10-28 21:09:26 -0700
commit: 8a212ab6b8a4ccc6f3c3d1beba5f92655c576404 (patch)
tree: 525271129ff9c692defdd20566f1f7203b18ff24 /arch/ia64/sn
parent: 1f419cadff55f548e7356ffebdb9e1b5a8c22275 (diff)
parent: 0e1f60609258e18ec0a0477c646101212822d387 (diff)
14 files changed, 1349 insertions, 586 deletions
diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c
index 45854c637e9..d71f4de44f7 100644
--- a/arch/ia64/sn/kernel/bte.c
+++ b/arch/ia64/sn/kernel/bte.c
@@ -87,7 +87,7 @@ bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification)
 	unsigned long irq_flags;
 	unsigned long itc_end = 0;
 	int nasid_to_try[MAX_NODES_TO_TRY];
-	int my_nasid = get_nasid();
+	int my_nasid = cpuid_to_nasid(raw_smp_processor_id());
 	int bte_if_index, nasid_index;
 	int bte_first, btes_per_node = BTES_PER_NODE;
 
diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
index 906622d9f93..b4f5053f5e1 100644
--- a/arch/ia64/sn/kernel/io_init.c
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -22,8 +22,6 @@
 #include "xtalk/hubdev.h"
 #include "xtalk/xwidgetdev.h"
 
-nasid_t master_nasid = INVALID_NASID;	/* Partition Master */
-
 static struct list_head sn_sysdata_list;
 
 /* sysdata list struct */
@@ -165,7 +163,7 @@ static void sn_fixup_ionodes(void)
 	 * Get SGI Specific HUB chipset information.
 	 * Inform Prom that this kernel can support domain bus numbering.
 	 */
-	for (i = 0; i < numionodes; i++) {
+	for (i = 0; i < num_cnodes; i++) {
 		hubdev = (struct hubdev_info *)(NODEPDA(i)->pdinfo);
 		nasid = cnodeid_to_nasid(i);
 		hubdev->max_segment_number = 0xffffffff;
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index 6f8c5883716..0fb579ef18c 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -59,8 +59,6 @@ DEFINE_PER_CPU(struct pda_s, pda_percpu);
 
 #define MAX_PHYS_MEMORY		(1UL << IA64_MAX_PHYS_BITS)	/* Max physical address supported */
 
-lboard_t *root_lboard[MAX_COMPACT_NODES];
-
 extern void bte_init_node(nodepda_t *, cnodeid_t);
 
 extern void sn_timer_init(void);
@@ -97,15 +95,15 @@ u8 sn_region_size;
 EXPORT_SYMBOL(sn_region_size);
 int sn_prom_type;	/* 0=hardware, 1=medusa/realprom, 2=medusa/fakeprom */
 
-short physical_node_map[MAX_PHYSNODE_ID];
+short physical_node_map[MAX_NUMALINK_NODES];
 static unsigned long sn_prom_features[MAX_PROM_FEATURE_SETS];
 
 EXPORT_SYMBOL(physical_node_map);
 
-int numionodes;
+int num_cnodes;
 
 static void sn_init_pdas(char **);
-static void scan_for_ionodes(void);
+static void build_cnode_tables(void);
 
 static nodepda_t *nodepdaindr[MAX_COMPACT_NODES];
 
@@ -140,19 +138,6 @@ char drive_info[4 * 16];
 #endif
 
 /*
- * Get nasid of current cpu early in boot before nodepda is initialized
- */
-static int
-boot_get_nasid(void)
-{
-	int nasid;
-
-	if (ia64_sn_get_sapic_info(get_sapicid(), &nasid, NULL, NULL))
-		BUG();
-	return nasid;
-}
-
-/*
  * This routine can only be used during init, since
  * smp_boot_data is an init data structure.
  * We have to use smp_boot_data.cpu_phys_id to find
@@ -223,7 +208,6 @@ void __init early_sn_setup(void)
 }
 
 extern int platform_intr_list[];
-extern nasid_t master_nasid;
 static int __initdata shub_1_1_found = 0;
 
 /*
@@ -269,7 +253,6 @@ static void __init sn_check_for_wars(void)
 void __init sn_setup(char **cmdline_p)
 {
 	long status, ticks_per_sec, drift;
-	int pxm;
 	u32 version = sn_sal_rev();
 	extern void sn_cpu_init(void);
 
@@ -300,11 +283,10 @@ void __init sn_setup(char **cmdline_p)
 
 	MAX_DMA_ADDRESS = PAGE_OFFSET + MAX_PHYS_MEMORY;
 
-	memset(physical_node_map, -1, sizeof(physical_node_map));
-	for (pxm = 0; pxm < MAX_PXM_DOMAINS; pxm++)
-		if (pxm_to_nid_map[pxm] != -1)
-			physical_node_map[pxm_to_nasid(pxm)] =
-			    pxm_to_nid_map[pxm];
+	/*
+	 * Build the tables for managing cnodes.
+	 */
+	build_cnode_tables();
 
 	/*
 	 * Old PROMs do not provide an ACPI FADT. Disable legacy keyboard
@@ -319,8 +301,6 @@ void __init sn_setup(char **cmdline_p)
 
 	printk("SGI SAL version %x.%02x\n", version >> 8, version & 0x00FF);
 
-	master_nasid = boot_get_nasid();
-
 	status =
 	    ia64_sal_freq_base(SAL_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec,
 			       &drift);
@@ -378,15 +358,6 @@ static void __init sn_init_pdas(char **cmdline_p)
 {
 	cnodeid_t cnode;
 
-	memset(sn_cnodeid_to_nasid, -1,
-			sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid)));
-	for_each_online_node(cnode)
-		sn_cnodeid_to_nasid[cnode] =
-				pxm_to_nasid(nid_to_pxm_map[cnode]);
-
-	numionodes = num_online_nodes();
-	scan_for_ionodes();
-
 	/*
 	 * Allocate & initalize the nodepda for each node.
 	 */
@@ -402,7 +373,7 @@ static void __init sn_init_pdas(char **cmdline_p)
 	/*
 	 * Allocate & initialize nodepda for TIOs.  For now, put them on node 0.
 	 */
-	for (cnode = num_online_nodes(); cnode < numionodes; cnode++) {
+	for (cnode = num_online_nodes(); cnode < num_cnodes; cnode++) {
 		nodepdaindr[cnode] =
 		    alloc_bootmem_node(NODE_DATA(0), sizeof(nodepda_t));
 		memset(nodepdaindr[cnode], 0, sizeof(nodepda_t));
@@ -411,7 +382,7 @@ static void __init sn_init_pdas(char **cmdline_p)
 	/*
 	 * Now copy the array of nodepda pointers to each nodepda.
 	 */
-	for (cnode = 0; cnode < numionodes; cnode++)
+	for (cnode = 0; cnode < num_cnodes; cnode++)
 		memcpy(nodepdaindr[cnode]->pernode_pdaindr, nodepdaindr,
 		       sizeof(nodepdaindr));
 
@@ -428,7 +399,7 @@ static void __init sn_init_pdas(char **cmdline_p)
 	 * Initialize the per node hubdev.  This includes IO Nodes and
 	 * headless/memless nodes.
 	 */
-	for (cnode = 0; cnode < numionodes; cnode++) {
+	for (cnode = 0; cnode < num_cnodes; cnode++) {
 		hubdev_init_node(nodepdaindr[cnode], cnode);
 	}
 }
@@ -553,87 +524,58 @@ void __init sn_cpu_init(void)
 }
 
 /*
- * Scan klconfig for ionodes.  Add the nasids to the
- * physical_node_map and the pda and increment numionodes.
+ * Build tables for converting between NASIDs and cnodes.
  */
+static inline int __init board_needs_cnode(int type)
+{
+	return (type == KLTYPE_SNIA || type == KLTYPE_TIO);
+}
 
-static void __init scan_for_ionodes(void)
+void __init build_cnode_tables(void)
 {
-	int nasid = 0;
+	int nasid;
+	int node;
 	lboard_t *brd;
 
-	/* fakeprom does not support klgraph */
-	if (IS_RUNNING_ON_FAKE_PROM())
-		return;
-
-	/* Setup ionodes with memory */
-	for (nasid = 0; nasid < MAX_PHYSNODE_ID; nasid += 2) {
-		char *klgraph_header;
-		cnodeid_t cnodeid;
-
-		if (physical_node_map[nasid] == -1)
-			continue;
+	memset(physical_node_map, -1, sizeof(physical_node_map));
+	memset(sn_cnodeid_to_nasid, -1,
+			sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid)));
 
-		cnodeid = -1;
-		klgraph_header = __va(ia64_sn_get_klconfig_addr(nasid));
-		if (!klgraph_header) {
-			BUG();	/* All nodes must have klconfig tables! */
-		}
-		cnodeid = nasid_to_cnodeid(nasid);
-		root_lboard[cnodeid] = (lboard_t *)
-		    NODE_OFFSET_TO_LBOARD((nasid),
-					  ((kl_config_hdr_t
-					    *) (klgraph_header))->
-					  ch_board_info);
+	/*
+	 * First populate the tables with C/M bricks. This ensures that
+	 * cnode == node for all C & M bricks.
+	 */
+	for_each_online_node(node) {
+		nasid = pxm_to_nasid(nid_to_pxm_map[node]);
+		sn_cnodeid_to_nasid[node] = nasid;
+		physical_node_map[nasid] = node;
 	}
 
-	/* Scan headless/memless IO Nodes. */
-	for (nasid = 0; nasid < MAX_PHYSNODE_ID; nasid += 2) {
-		/* if there's no nasid, don't try to read the klconfig on the node */
-		if (physical_node_map[nasid] == -1)
-			continue;
-		brd = find_lboard_any((lboard_t *)
-				      root_lboard[nasid_to_cnodeid(nasid)],
-				      KLTYPE_SNIA);
-		if (brd) {
-			brd = KLCF_NEXT_ANY(brd);	/* Skip this node's lboard */
-			if (!brd)
-				continue;
-		}
-
-		brd = find_lboard_any(brd, KLTYPE_SNIA);
+	/*
+	 * num_cnodes is total number of C/M/TIO bricks. Because of the 256 node
+	 * limit on the number of nodes, we can't use the generic node numbers 
+	 * for this. Note that num_cnodes is incremented below as TIOs or
+	 * headless/memoryless nodes are discovered.
+	 */
+	num_cnodes = num_online_nodes();
 
-		while (brd) {
-			sn_cnodeid_to_nasid[numionodes] = brd->brd_nasid;
-			physical_node_map[brd->brd_nasid] = numionodes;
-			root_lboard[numionodes] = brd;
-			numionodes++;
-			brd = KLCF_NEXT_ANY(brd);
-			if (!brd)
-				break;
-
-			brd = find_lboard_any(brd, KLTYPE_SNIA);
-		}
-	}
+	/* fakeprom does not support klgraph */
+	if (IS_RUNNING_ON_FAKE_PROM())
+		return;
 
-	/* Scan for TIO nodes. */
-	for (nasid = 0; nasid < MAX_PHYSNODE_ID; nasid += 2) {
-		/* if there's no nasid, don't try to read the klconfig on the node */
-		if (physical_node_map[nasid] == -1)
-			continue;
-		brd = find_lboard_any((lboard_t *)
-				      root_lboard[nasid_to_cnodeid(nasid)],
-				      KLTYPE_TIO);
+	/* Find TIOs & headless/memoryless nodes and add them to the tables */
+	for_each_online_node(node) {
+		kl_config_hdr_t *klgraph_header;
+		nasid = cnodeid_to_nasid(node);
+		if ((klgraph_header = ia64_sn_get_klconfig_addr(nasid)) == NULL)
+			BUG();
+		brd = NODE_OFFSET_TO_LBOARD(nasid, klgraph_header->ch_board_info);
 		while (brd) {
-			sn_cnodeid_to_nasid[numionodes] = brd->brd_nasid;
-			physical_node_map[brd->brd_nasid] = numionodes;
-			root_lboard[numionodes] = brd;
-			numionodes++;
-			brd = KLCF_NEXT_ANY(brd);
-			if (!brd)
-				break;
-
-			brd = find_lboard_any(brd, KLTYPE_TIO);
+			if (board_needs_cnode(brd->brd_type) && physical_node_map[brd->brd_nasid] < 0) {
+				sn_cnodeid_to_nasid[num_cnodes] = brd->brd_nasid;
+				physical_node_map[brd->brd_nasid] = num_cnodes++;
+			}
+			brd = find_lboard_next(brd);
 		}
 	}
 }
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index 0a4ee50c302..49b530c39a4 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -177,6 +177,7 @@ void sn_tlb_migrate_finish(struct mm_struct *mm)
 
 /**
  * sn2_global_tlb_purge - globally purge translation cache of virtual address range
+ * @mm: mm_struct containing virtual address range
  * @start: start of virtual address range
  * @end: end of virtual address range
  * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc))
@@ -188,21 +189,22 @@ void sn_tlb_migrate_finish(struct mm_struct *mm)
  * 	- cpu_vm_mask is a bit mask that indicates which cpus have loaded the context.
  * 	- cpu_vm_mask is converted into a nodemask of the nodes containing the
  * 	  cpus in cpu_vm_mask.
- *	- if only one bit is set in cpu_vm_mask & it is the current cpu,
- *	  then only the local TLB needs to be flushed. This flushing can be done
- *	  using ptc.l. This is the common case & avoids the global spinlock.
+ *	- if only one bit is set in cpu_vm_mask & it is the current cpu & the
+ *	  process is purging its own virtual address range, then only the
+ *	  local TLB needs to be flushed. This flushing can be done using
+ *	  ptc.l. This is the common case & avoids the global spinlock.
  *	- if multiple cpus have loaded the context, then flushing has to be
  *	  done with ptc.g/MMRs under protection of the global ptc_lock.
  */
 
 void
-sn2_global_tlb_purge(unsigned long start, unsigned long end,
-		     unsigned long nbits)
+sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
+		     unsigned long end, unsigned long nbits)
 {
 	int i, opt, shub1, cnode, mynasid, cpu, lcpu = 0, nasid, flushed = 0;
+	int mymm = (mm == current->active_mm);
 	volatile unsigned long *ptc0, *ptc1;
-	unsigned long itc, itc2, flags, data0 = 0, data1 = 0;
-	struct mm_struct *mm = current->active_mm;
+	unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value;
 	short nasids[MAX_NUMNODES], nix;
 	nodemask_t nodes_flushed;
 
@@ -216,9 +218,12 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end,
 		i++;
 	}
 
+	if (i == 0)
+		return;
+
 	preempt_disable();
 
-	if (likely(i == 1 && lcpu == smp_processor_id())) {
+	if (likely(i == 1 && lcpu == smp_processor_id() && mymm)) {
 		do {
 			ia64_ptcl(start, nbits << 2);
 			start += (1UL << nbits);
@@ -229,7 +234,7 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end,
 		return;
 	}
 
-	if (atomic_read(&mm->mm_users) == 1) {
+	if (atomic_read(&mm->mm_users) == 1 && mymm) {
 		flush_tlb_mm(mm);
 		__get_cpu_var(ptcstats).change_rid++;
 		preempt_enable();
@@ -241,11 +246,13 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end,
 	for_each_node_mask(cnode, nodes_flushed)
 		nasids[nix++] = cnodeid_to_nasid(cnode);
 
+	rr_value = (mm->context << 3) | REGION_NUMBER(start);
+
 	shub1 = is_shub1();
 	if (shub1) {
 		data0 = (1UL << SH1_PTC_0_A_SHFT) |
 		    	(nbits << SH1_PTC_0_PS_SHFT) |
-		    	((ia64_get_rr(start) >> 8) << SH1_PTC_0_RID_SHFT) |
+			(rr_value << SH1_PTC_0_RID_SHFT) |
 		    	(1UL << SH1_PTC_0_START_SHFT);
 		ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_0);
 		ptc1 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_1);
@@ -254,7 +261,7 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end,
 			(nbits << SH2_PTC_PS_SHFT) |
 		    	(1UL << SH2_PTC_START_SHFT);
 		ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH2_PTC + 
-			((ia64_get_rr(start) >> 8) << SH2_PTC_RID_SHFT) );
+			(rr_value << SH2_PTC_RID_SHFT));
 		ptc1 = NULL;
 	}
 	
@@ -275,7 +282,7 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end,
 			data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK);
 		for (i = 0; i < nix; i++) {
 			nasid = nasids[i];
-			if ((!(sn2_ptctest & 3)) && unlikely(nasid == mynasid)) {
+			if ((!(sn2_ptctest & 3)) && unlikely(nasid == mynasid && mymm)) {
 				ia64_ptcga(start, nbits << 2);
 				ia64_srlz_i();
 			} else {
diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
index 0513aacac8c..6c6fbca3229 100644
--- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -476,8 +476,8 @@ static int sn_topology_show(struct seq_file *s, void *d)
 				for_each_online_cpu(j) {
 					seq_printf(s, j ? ":%d" : ", dist %d",
 						node_distance(
-						    cpuid_to_cnodeid(i),
-						    cpuid_to_cnodeid(j)));
+						    cpu_to_node(i),
+						    cpu_to_node(j)));
 				}
 				seq_putc(s, '\n');
 			}
diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c
index b45db5133f5..0d8592a745a 100644
--- a/arch/ia64/sn/kernel/tiocx.c
+++ b/arch/ia64/sn/kernel/tiocx.c
@@ -183,11 +183,12 @@ int cx_driver_unregister(struct cx_drv *cx_driver)
  * @part_num: device's part number
  * @mfg_num: device's manufacturer number
  * @hubdev: hub info associated with this device
+ * @bt: board type of the device
  *
  */
 int
 cx_device_register(nasid_t nasid, int part_num, int mfg_num,
-		   struct hubdev_info *hubdev)
+		   struct hubdev_info *hubdev, int bt)
 {
 	struct cx_dev *cx_dev;
 
@@ -200,6 +201,7 @@ cx_device_register(nasid_t nasid, int part_num, int mfg_num,
 	cx_dev->cx_id.mfg_num = mfg_num;
 	cx_dev->cx_id.nasid = nasid;
 	cx_dev->hubdev = hubdev;
+	cx_dev->bt = bt;
 
 	cx_dev->dev.parent = NULL;
 	cx_dev->dev.bus = &tiocx_bus_type;
@@ -238,7 +240,8 @@ static int cx_device_reload(struct cx_dev *cx_dev)
 {
 	cx_device_unregister(cx_dev);
 	return cx_device_register(cx_dev->cx_id.nasid, cx_dev->cx_id.part_num,
-				  cx_dev->cx_id.mfg_num, cx_dev->hubdev);
+				  cx_dev->cx_id.mfg_num, cx_dev->hubdev,
+				  cx_dev->bt);
 }
 
 static inline uint64_t tiocx_intr_alloc(nasid_t nasid, int widget,
@@ -365,26 +368,20 @@ static void tio_corelet_reset(nasid_t nasid, int corelet)
 	udelay(2000);
 }
 
-static int tiocx_btchar_get(int nasid)
+static int is_fpga_tio(int nasid, int *bt)
 {
-	moduleid_t module_id;
-	geoid_t geoid;
-	int cnodeid;
-
-	cnodeid = nasid_to_cnodeid(nasid);
-	geoid = cnodeid_get_geoid(cnodeid);
-	module_id = geo_module(geoid);
-	return MODULE_GET_BTCHAR(module_id);
-}
+	int ioboard_type;
 
-static int is_fpga_brick(int nasid)
-{
-	switch (tiocx_btchar_get(nasid)) {
+	ioboard_type = ia64_sn_sysctl_ioboard_get(nasid);
+
+	switch (ioboard_type) {
 	case L1_BRICKTYPE_SA:
 	case L1_BRICKTYPE_ATHENA:
-	case L1_BRICKTYPE_DAYTONA:
+	case L1_BOARDTYPE_DAYTONA:
+		*bt = ioboard_type;
 		return 1;
 	}
+
 	return 0;
 }
 
@@ -407,16 +404,22 @@ static int tiocx_reload(struct cx_dev *cx_dev)
 
 	if (bitstream_loaded(nasid)) {
 		uint64_t cx_id;
-
-		cx_id =
-		    *(volatile uint64_t *)(TIO_SWIN_BASE(nasid, TIOCX_CORELET) +
+		int rv;
+
+		rv = ia64_sn_sysctl_tio_clock_reset(nasid);
+		if (rv) {
+			printk(KERN_ALERT "CX port JTAG reset failed.\n");
+		} else {
+			cx_id = *(volatile uint64_t *)
+				(TIO_SWIN_BASE(nasid, TIOCX_CORELET) +
 					  WIDGET_ID);
-		part_num = XWIDGET_PART_NUM(cx_id);
-		mfg_num = XWIDGET_MFG_NUM(cx_id);
-		DBG("part= 0x%x, mfg= 0x%x\n", part_num, mfg_num);
-		/* just ignore it if it's a CE */
-		if (part_num == TIO_CE_ASIC_PARTNUM)
-			return 0;
+			part_num = XWIDGET_PART_NUM(cx_id);
+			mfg_num = XWIDGET_MFG_NUM(cx_id);
+			DBG("part= 0x%x, mfg= 0x%x\n", part_num, mfg_num);
+			/* just ignore it if it's a CE */
+			if (part_num == TIO_CE_ASIC_PARTNUM)
+				return 0;
+		}
 	}
 
 	cx_dev->cx_id.part_num = part_num;
@@ -436,10 +439,10 @@ static ssize_t show_cxdev_control(struct device *dev, struct device_attribute *a
 {
 	struct cx_dev *cx_dev = to_cx_dev(dev);
 
-	return sprintf(buf, "0x%x 0x%x 0x%x %d\n",
+	return sprintf(buf, "0x%x 0x%x 0x%x 0x%x\n",
 		       cx_dev->cx_id.nasid,
 		       cx_dev->cx_id.part_num, cx_dev->cx_id.mfg_num,
-		       tiocx_btchar_get(cx_dev->cx_id.nasid));
+		       cx_dev->bt);
 }
 
 static ssize_t store_cxdev_control(struct device *dev, struct device_attribute *attr, const char *buf,
@@ -486,13 +489,13 @@ static int __init tiocx_init(void)
 
 	bus_register(&tiocx_bus_type);
 
-	for (cnodeid = 0; cnodeid < MAX_COMPACT_NODES; cnodeid++) {
+	for (cnodeid = 0; cnodeid < num_cnodes; cnodeid++) {
 		nasid_t nasid;
+		int bt;
 
-		if ((nasid = cnodeid_to_nasid(cnodeid)) < 0)
-			break;	/* No more nasids .. bail out of loop */
+		nasid = cnodeid_to_nasid(cnodeid);
 
-		if ((nasid & 0x1) && is_fpga_brick(nasid)) {
+		if ((nasid & 0x1) && is_fpga_tio(nasid, &bt)) {
 			struct hubdev_info *hubdev;
 			struct xwidget_info *widgetp;
 
@@ -512,7 +515,7 @@ static int __init tiocx_init(void)
 
 			if (cx_device_register
 			    (nasid, widgetp->xwi_hwid.part_num,
-			     widgetp->xwi_hwid.mfg_num, hubdev) < 0)
+			     widgetp->xwi_hwid.mfg_num, hubdev, bt) < 0)
 				return -ENXIO;
 			else
 				found_tiocx_device++;
diff --git a/arch/ia64/sn/kernel/xpc.h b/arch/ia64/sn/kernel/xpc.h
index e5f5a4e51f7..fbcedc7c27f 100644
--- a/arch/ia64/sn/kernel/xpc.h
+++ b/arch/ia64/sn/kernel/xpc.h
@@ -57,7 +57,7 @@
 #define XPC_NASID_FROM_W_B(_w, _b) (((_w) * 64 + (_b)) * 2)
 
 #define XPC_HB_DEFAULT_INTERVAL		5	/* incr HB every x secs */
-#define XPC_HB_CHECK_DEFAULT_TIMEOUT	20	/* check HB every x secs */
+#define XPC_HB_CHECK_DEFAULT_INTERVAL	20	/* check HB every x secs */
 
 /* define the process name of HB checker and the CPU it is pinned to */
 #define XPC_HB_CHECK_THREAD_NAME	"xpc_hb"
@@ -67,34 +67,82 @@
 #define XPC_DISCOVERY_THREAD_NAME	"xpc_discovery"
 
 
-#define XPC_HB_ALLOWED(_p, _v)	((_v)->heartbeating_to_mask & (1UL << (_p)))
-#define XPC_ALLOW_HB(_p, _v)	(_v)->heartbeating_to_mask |= (1UL << (_p))
-#define XPC_DISALLOW_HB(_p, _v)	(_v)->heartbeating_to_mask &= (~(1UL << (_p)))
-
-
 /*
- * Reserved Page provided by SAL.
+ * the reserved page
+ *
+ *   SAL reserves one page of memory per partition for XPC. Though a full page
+ *   in length (16384 bytes), its starting address is not page aligned, but it
+ *   is cacheline aligned. The reserved page consists of the following:
+ *
+ *   reserved page header
+ *
+ *     The first cacheline of the reserved page contains the header
+ *     (struct xpc_rsvd_page). Before SAL initialization has completed,
+ *     SAL has set up the following fields of the reserved page header:
+ *     SAL_signature, SAL_version, partid, and nasids_size. The other
+ *     fields are set up by XPC. (xpc_rsvd_page points to the local
+ *     partition's reserved page.)
  *
- * SAL provides one page per partition of reserved memory.  When SAL
- * initialization is complete, SAL_signature, SAL_version, partid,
- * part_nasids, and mach_nasids are set.
+ *   part_nasids mask
+ *   mach_nasids mask
+ *
+ *     SAL also sets up two bitmaps (or masks), one that reflects the actual
+ *     nasids in this partition (part_nasids), and the other that reflects
+ *     the actual nasids in the entire machine (mach_nasids). We're only
+ *     interested in the even numbered nasids (which contain the processors
+ *     and/or memory), so we only need half as many bits to represent the
+ *     nasids. The part_nasids mask is located starting at the first cacheline
+ *     following the reserved page header. The mach_nasids mask follows right
+ *     after the part_nasids mask. The size in bytes of each mask is reflected
+ *     by the reserved page header field 'nasids_size'. (Local partition's
+ *     mask pointers are xpc_part_nasids and xpc_mach_nasids.)
+ *
+ *   vars
+ *   vars part
+ *
+ *     Immediately following the mach_nasids mask are the XPC variables
+ *     required by other partitions. First are those that are generic to all
+ *     partitions (vars), followed on the next available cacheline by those
+ *     which are partition specific (vars part). These are setup by XPC.
+ *     (Local partition's vars pointers are xpc_vars and xpc_vars_part.)
  *
  * Note: Until vars_pa is set, the partition XPC code has not been initialized.
  */
 struct xpc_rsvd_page {
-	u64 SAL_signature;	/* SAL unique signature */
-	u64 SAL_version;	/* SAL specified version */
-	u8 partid;		/* partition ID from SAL */
+	u64 SAL_signature;	/* SAL: unique signature */
+	u64 SAL_version;	/* SAL: version */
+	u8 partid;		/* SAL: partition ID */
 	u8 version;
-	u8 pad[6];		/* pad to u64 align */
+	u8 pad1[6];		/* align to next u64 in cacheline */
 	volatile u64 vars_pa;
-	u64 part_nasids[XP_NASID_MASK_WORDS] ____cacheline_aligned;
-	u64 mach_nasids[XP_NASID_MASK_WORDS] ____cacheline_aligned;
+	struct timespec stamp;	/* time when reserved page was setup by XPC */
+	u64 pad2[9];		/* align to last u64 in cacheline */
+	u64 nasids_size;	/* SAL: size of each nasid mask in bytes */
 };
-#define XPC_RP_VERSION _XPC_VERSION(1,0) /* version 1.0 of the reserved page */
 
-#define XPC_RSVD_PAGE_ALIGNED_SIZE \
-			(L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page)))
+#define XPC_RP_VERSION _XPC_VERSION(1,1) /* version 1.1 of the reserved page */
+
+#define XPC_SUPPORTS_RP_STAMP(_version) \
+			(_version >= _XPC_VERSION(1,1))
+
+/*
+ * compare stamps - the return value is:
+ *
+ *	< 0,	if stamp1 < stamp2
+ *	= 0,	if stamp1 == stamp2
+ *	> 0,	if stamp1 > stamp2
+ */
+static inline int
+xpc_compare_stamps(struct timespec *stamp1, struct timespec *stamp2)
+{
+	int ret;
+
+
+	if ((ret = stamp1->tv_sec - stamp2->tv_sec) == 0) {
+		ret = stamp1->tv_nsec - stamp2->tv_nsec;
+	}
+	return ret;
+}
 
 
 /*
@@ -121,11 +169,58 @@ struct xpc_vars {
 	u64 vars_part_pa;
 	u64 amos_page_pa;	/* paddr of page of AMOs from MSPEC driver */
 	AMO_t *amos_page;	/* vaddr of page of AMOs from MSPEC driver */
-	AMO_t *act_amos;	/* pointer to the first activation AMO */
 };
-#define XPC_V_VERSION _XPC_VERSION(3,0) /* version 3.0 of the cross vars */
 
-#define XPC_VARS_ALIGNED_SIZE  (L1_CACHE_ALIGN(sizeof(struct xpc_vars)))
+#define XPC_V_VERSION _XPC_VERSION(3,1) /* version 3.1 of the cross vars */
+
+#define XPC_SUPPORTS_DISENGAGE_REQUEST(_version) \
+			(_version >= _XPC_VERSION(3,1))
+
+
+static inline int
+xpc_hb_allowed(partid_t partid, struct xpc_vars *vars)
+{
+	return ((vars->heartbeating_to_mask & (1UL << partid)) != 0);
+}
+
+static inline void
+xpc_allow_hb(partid_t partid, struct xpc_vars *vars)
+{
+	u64 old_mask, new_mask;
+
+	do {
+		old_mask = vars->heartbeating_to_mask;
+		new_mask = (old_mask | (1UL << partid));
+	} while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) !=
+							old_mask);
+}
+
+static inline void
+xpc_disallow_hb(partid_t partid, struct xpc_vars *vars)
+{
+	u64 old_mask, new_mask;
+
+	do {
+		old_mask = vars->heartbeating_to_mask;
+		new_mask = (old_mask & ~(1UL << partid));
+	} while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) !=
+							old_mask);
+}
+
+
+/*
+ * The AMOs page consists of a number of AMO variables which are divided into
+ * four groups, The first two groups are used to identify an IRQ's sender.
+ * These two groups consist of 64 and 128 AMO variables respectively. The last
+ * two groups, consisting of just one AMO variable each, are used to identify
+ * the remote partitions that are currently engaged (from the viewpoint of
+ * the XPC running on the remote partition).
+ */
+#define XPC_NOTIFY_IRQ_AMOS	   0
+#define XPC_ACTIVATE_IRQ_AMOS	   (XPC_NOTIFY_IRQ_AMOS + XP_MAX_PARTITIONS)
+#define XPC_ENGAGED_PARTITIONS_AMO (XPC_ACTIVATE_IRQ_AMOS + XP_NASID_MASK_WORDS)
+#define XPC_DISENGAGE_REQUEST_AMO  (XPC_ENGAGED_PARTITIONS_AMO + 1)
+
 
 /*
  * The following structure describes the per partition specific variables.
@@ -165,6 +260,16 @@ struct xpc_vars_part {
 #define XPC_VP_MAGIC2	0x0073726176435058L  /* 'XPCvars\0'L (little endian) */
 
 
+/* the reserved page sizes and offsets */
+
+#define XPC_RP_HEADER_SIZE	L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page))
+#define XPC_RP_VARS_SIZE 	L1_CACHE_ALIGN(sizeof(struct xpc_vars))
+
+#define XPC_RP_PART_NASIDS(_rp) (u64 *) ((u8 *) _rp + XPC_RP_HEADER_SIZE)
+#define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + xp_nasid_mask_words)
+#define XPC_RP_VARS(_rp)	((struct xpc_vars *) XPC_RP_MACH_NASIDS(_rp) + xp_nasid_mask_words)
+#define XPC_RP_VARS_PART(_rp)	(struct xpc_vars_part *) ((u8 *) XPC_RP_VARS(rp) + XPC_RP_VARS_SIZE)
+
 
 /*
  * Functions registered by add_timer() or called by kernel_thread() only
@@ -349,6 +454,9 @@ struct xpc_channel {
 	atomic_t n_on_msg_allocate_wq;   /* #on msg allocation wait queue */
 	wait_queue_head_t msg_allocate_wq; /* msg allocation wait queue */
 
+	u8 delayed_IPI_flags;		/* IPI flags received, but delayed */
+					/* action until channel disconnected */
+
 	/* queue of msg senders who want to be notified when msg received */
 
 	atomic_t n_to_notify;		/* #of msg senders to notify */
@@ -358,7 +466,7 @@ struct xpc_channel {
 	void *key;			/* pointer to user's key */
 
 	struct semaphore msg_to_pull_sema; /* next msg to pull serialization */
-	struct semaphore teardown_sema;    /* wait for teardown completion */
+	struct semaphore wdisconnect_sema; /* wait for channel disconnect */
 
 	struct xpc_openclose_args *local_openclose_args; /* args passed on */
 					/* opening or closing of channel */
@@ -410,6 +518,8 @@ struct xpc_channel {
 
 #define	XPC_C_DISCONNECTED	0x00002000 /* channel is disconnected */
 #define	XPC_C_DISCONNECTING	0x00004000 /* channel is being disconnected */
+#define	XPC_C_DISCONNECTCALLOUT	0x00008000 /* chan disconnected callout made */
+#define	XPC_C_WDISCONNECT	0x00010000 /* waiting for channel disconnect */
 
 
 
@@ -422,6 +532,8 @@ struct xpc_partition {
 
 	/* XPC HB infrastructure */
 
+	u8 remote_rp_version;		/* version# of partition's rsvd pg */
+	struct timespec remote_rp_stamp;/* time when rsvd pg was initialized */
 	u64 remote_rp_pa;		/* phys addr of partition's rsvd pg */
 	u64 remote_vars_pa;		/* phys addr of partition's vars */
 	u64 remote_vars_part_pa;	/* phys addr of partition's vars part */
@@ -432,14 +544,18 @@ struct xpc_partition {
 	u32 act_IRQ_rcvd;		/* IRQs since activation */
 	spinlock_t act_lock;		/* protect updating of act_state */
 	u8 act_state;			/* from XPC HB viewpoint */
+	u8 remote_vars_version;		/* version# of partition's vars */
 	enum xpc_retval reason;		/* reason partition is deactivating */
 	int reason_line;		/* line# deactivation initiated from */
 	int reactivate_nasid;		/* nasid in partition to reactivate */
 
+	unsigned long disengage_request_timeout; /* timeout in jiffies */
+	struct timer_list disengage_request_timer;
+
 
 	/* XPC infrastructure referencing and teardown control */
 
-	volatile u8 setup_state;			/* infrastructure setup state */
+	volatile u8 setup_state;	/* infrastructure setup state */
 	wait_queue_head_t teardown_wq;	/* kthread waiting to teardown infra */
 	atomic_t references;		/* #of references to infrastructure */
 
@@ -454,6 +570,7 @@ struct xpc_partition {
 
 	u8 nchannels;		   /* #of defined channels supported */
 	atomic_t nchannels_active; /* #of channels that are not DISCONNECTED */
+	atomic_t nchannels_engaged;/* #of channels engaged with remote part */
 	struct xpc_channel *channels;/* array of channel structures */
 
 	void *local_GPs_base;	  /* base address of kmalloc'd space */
@@ -518,6 +635,7 @@ struct xpc_partition {
 #define XPC_P_TORNDOWN		0x03	/* infrastructure is torndown */
 
 
+
 /*
  * struct xpc_partition IPI_timer #of seconds to wait before checking for
  * dropped IPIs. These occur whenever an IPI amo write doesn't complete until
@@ -526,6 +644,13 @@ struct xpc_partition {
 #define XPC_P_DROPPED_IPI_WAIT	(0.25 * HZ)
 
 
+/* number of seconds to wait for other partitions to disengage */
+#define XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT	90
+
+/* interval in seconds to print 'waiting disengagement' messages */
+#define XPC_DISENGAGE_PRINTMSG_INTERVAL		10
+
+
 #define XPC_PARTID(_p)	((partid_t) ((_p) - &xpc_partitions[0]))
 
 
@@ -534,24 +659,20 @@ struct xpc_partition {
 extern struct xpc_registration xpc_registrations[];
 
 
-/* >>> found in xpc_main.c only */
+/* found in xpc_main.c */
 extern struct device *xpc_part;
 extern struct device *xpc_chan;
+extern int xpc_disengage_request_timelimit;
 extern irqreturn_t xpc_notify_IRQ_handler(int, void *, struct pt_regs *);
 extern void xpc_dropped_IPI_check(struct xpc_partition *);
+extern void xpc_activate_partition(struct xpc_partition *);
 extern void xpc_activate_kthreads(struct xpc_channel *, int);
 extern void xpc_create_kthreads(struct xpc_channel *, int);
 extern void xpc_disconnect_wait(int);
 
 
-/* found in xpc_main.c and efi-xpc.c */
-extern void xpc_activate_partition(struct xpc_partition *);
-
-
 /* found in xpc_partition.c */
 extern int xpc_exiting;
-extern int xpc_hb_interval;
-extern int xpc_hb_check_interval;
 extern struct xpc_vars *xpc_vars;
 extern struct xpc_rsvd_page *xpc_rsvd_page;
 extern struct xpc_vars_part *xpc_vars_part;
@@ -561,6 +682,7 @@ extern struct xpc_rsvd_page *xpc_rsvd_page_init(void);
 extern void xpc_allow_IPI_ops(void);
 extern void xpc_restrict_IPI_ops(void);
 extern int xpc_identify_act_IRQ_sender(void);
+extern int xpc_partition_disengaged(struct xpc_partition *);
 extern enum xpc_retval xpc_mark_partition_active(struct xpc_partition *);
 extern void xpc_mark_partition_inactive(struct xpc_partition *);
 extern void xpc_discovery(void);
@@ -585,8 +707,8 @@ extern void xpc_connected_callout(struct xpc_channel *);
 extern void xpc_deliver_msg(struct xpc_channel *);
 extern void xpc_disconnect_channel(const int, struct xpc_channel *,
 					enum xpc_retval, unsigned long *);
-extern void xpc_disconnected_callout(struct xpc_channel *);
-extern void xpc_partition_down(struct xpc_partition *, enum xpc_retval);
+extern void xpc_disconnecting_callout(struct xpc_channel *);
+extern void xpc_partition_going_down(struct xpc_partition *, enum xpc_retval);
 extern void xpc_teardown_infrastructure(struct xpc_partition *);
 
 
@@ -674,6 +796,157 @@ xpc_part_ref(struct xpc_partition *part)
 
 
 /*
+ * This next set of inlines are used to keep track of when a partition is
+ * potentially engaged in accessing memory belonging to another partition.
+ */
+
+static inline void
+xpc_mark_partition_engaged(struct xpc_partition *part)
+{
+	unsigned long irq_flags;
+	AMO_t *amo = (AMO_t *) __va(part->remote
author	Linus Torvalds <torvalds@g5.osdl.org>	2005-10-28 21:09:26 -0700
committer	Linus Torvalds <torvalds@g5.osdl.org>	2005-10-28 21:09:26 -0700
commit	8a212ab6b8a4ccc6f3c3d1beba5f92655c576404 (patch)
tree	525271129ff9c692defdd20566f1f7203b18ff24 /arch/ia64/sn
parent	1f419cadff55f548e7356ffebdb9e1b5a8c22275 (diff)
parent	0e1f60609258e18ec0a0477c646101212822d387 (diff)