aboutsummaryrefslogtreecommitdiff
path: root/arch/ia64/sn
diff options
context:
space:
mode:
Diffstat (limited to 'arch/ia64/sn')
-rw-r--r--arch/ia64/sn/kernel/Makefile7
-rw-r--r--arch/ia64/sn/kernel/io_init.c10
-rw-r--r--arch/ia64/sn/kernel/mca.c34
-rw-r--r--arch/ia64/sn/kernel/setup.c40
-rw-r--r--arch/ia64/sn/kernel/tiocx.c60
-rw-r--r--arch/ia64/sn/kernel/xp_main.c289
-rw-r--r--arch/ia64/sn/kernel/xp_nofault.S31
-rw-r--r--arch/ia64/sn/kernel/xpc.h991
-rw-r--r--arch/ia64/sn/kernel/xpc_channel.c2297
-rw-r--r--arch/ia64/sn/kernel/xpc_main.c1064
-rw-r--r--arch/ia64/sn/kernel/xpc_partition.c984
-rw-r--r--arch/ia64/sn/kernel/xpnet.c715
-rw-r--r--arch/ia64/sn/pci/pcibr/pcibr_dma.c2
-rw-r--r--arch/ia64/sn/pci/tioca_provider.c2
14 files changed, 6462 insertions, 64 deletions
diff --git a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile
index 4f381fb2504..4351c4ff984 100644
--- a/arch/ia64/sn/kernel/Makefile
+++ b/arch/ia64/sn/kernel/Makefile
@@ -4,10 +4,15 @@
# License. See the file "COPYING" in the main directory of this archive
# for more details.
#
-# Copyright (C) 1999,2001-2003 Silicon Graphics, Inc. All Rights Reserved.
+# Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All Rights Reserved.
#
obj-y += setup.o bte.o bte_error.o irq.o mca.o idle.o \
huberror.o io_init.o iomv.o klconflib.o sn2/
obj-$(CONFIG_IA64_GENERIC) += machvec.o
obj-$(CONFIG_SGI_TIOCX) += tiocx.o
+obj-$(CONFIG_IA64_SGI_SN_XP) += xp.o
+xp-y := xp_main.o xp_nofault.o
+obj-$(CONFIG_IA64_SGI_SN_XP) += xpc.o
+xpc-y := xpc_main.o xpc_channel.o xpc_partition.o
+obj-$(CONFIG_IA64_SGI_SN_XP) += xpnet.o
diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
index 18160a06a8c..9e07f5463f2 100644
--- a/arch/ia64/sn/kernel/io_init.c
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -174,6 +174,12 @@ static void sn_fixup_ionodes(void)
if (status)
continue;
+ /* Attach the error interrupt handlers */
+ if (nasid & 1)
+ ice_error_init(hubdev);
+ else
+ hub_error_init(hubdev);
+
for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++)
hubdev->hdi_xwidget_info[widget].xwi_hubinfo = hubdev;
@@ -211,10 +217,6 @@ static void sn_fixup_ionodes(void)
sn_flush_device_list;
}
- if (!(i & 1))
- hub_error_init(hubdev);
- else
- ice_error_init(hubdev);
}
}
diff --git a/arch/ia64/sn/kernel/mca.c b/arch/ia64/sn/kernel/mca.c
index 857774bb2c9..6546db6abdb 100644
--- a/arch/ia64/sn/kernel/mca.c
+++ b/arch/ia64/sn/kernel/mca.c
@@ -37,6 +37,11 @@ static u64 *sn_oemdata_size, sn_oemdata_bufsize;
* This function is the callback routine that SAL calls to log error
* info for platform errors. buf is appended to sn_oemdata, resizing as
* required.
+ * Note: this is a SAL to OS callback, running under the same rules as the SAL
+ * code. SAL calls are run with preempt disabled so this routine must not
+ * sleep. vmalloc can sleep so print_hook cannot resize the output buffer
+ * itself, instead it must set the required size and return to let the caller
+ * resize the buffer then redrive the SAL call.
*/
static int print_hook(const char *fmt, ...)
{
@@ -47,18 +52,8 @@ static int print_hook(const char *fmt, ...)
vsnprintf(buf, sizeof(buf), fmt, args);
va_end(args);
len = strlen(buf);
- while (*sn_oemdata_size + len + 1 > sn_oemdata_bufsize) {
- u8 *newbuf = vmalloc(sn_oemdata_bufsize += 1000);
- if (!newbuf) {
- printk(KERN_ERR "%s: unable to extend sn_oemdata\n",
- __FUNCTION__);
- return 0;
- }
- memcpy(newbuf, *sn_oemdata, *sn_oemdata_size);
- vfree(*sn_oemdata);
- *sn_oemdata = newbuf;
- }
- memcpy(*sn_oemdata + *sn_oemdata_size, buf, len + 1);
+ if (*sn_oemdata_size + len <= sn_oemdata_bufsize)
+ memcpy(*sn_oemdata + *sn_oemdata_size, buf, len);
*sn_oemdata_size += len;
return 0;
}
@@ -98,7 +93,20 @@ sn_platform_plat_specific_err_print(const u8 * sect_header, u8 ** oemdata,
sn_oemdata = oemdata;
sn_oemdata_size = oemdata_size;
sn_oemdata_bufsize = 0;
- ia64_sn_plat_specific_err_print(print_hook, (char *)sect_header);
+ *sn_oemdata_size = PAGE_SIZE; /* first guess at how much data will be generated */
+ while (*sn_oemdata_size > sn_oemdata_bufsize) {
+ u8 *newbuf = vmalloc(*sn_oemdata_size);
+ if (!newbuf) {
+ printk(KERN_ERR "%s: unable to extend sn_oemdata\n",
+ __FUNCTION__);
+ return 1;
+ }
+ vfree(*sn_oemdata);
+ *sn_oemdata = newbuf;
+ sn_oemdata_bufsize = *sn_oemdata_size;
+ *sn_oemdata_size = 0;
+ ia64_sn_plat_specific_err_print(print_hook, (char *)sect_header);
+ }
up(&sn_oemdata_mutex);
return 0;
}
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index d35f2a6f9c9..4fb44984afe 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -3,7 +3,7 @@
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
- * Copyright (C) 1999,2001-2004 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All rights reserved.
*/
#include <linux/config.h>
@@ -73,6 +73,12 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);
DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info);
EXPORT_PER_CPU_SYMBOL(__sn_hub_info);
+DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_NUMNODES]);
+EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid);
+
+DEFINE_PER_CPU(struct nodepda_s *, __sn_nodepda);
+EXPORT_PER_CPU_SYMBOL(__sn_nodepda);
+
partid_t sn_partid = -1;
EXPORT_SYMBOL(sn_partid);
char sn_system_serial_number_string[128];
@@ -373,11 +379,11 @@ static void __init sn_init_pdas(char **cmdline_p)
{
cnodeid_t cnode;
- memset(pda->cnodeid_to_nasid_table, -1,
- sizeof(pda->cnodeid_to_nasid_table));
+ memset(sn_cnodeid_to_nasid, -1,
+ sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid)));
for_each_online_node(cnode)
- pda->cnodeid_to_nasid_table[cnode] =
- pxm_to_nasid(nid_to_pxm_map[cnode]);
+ sn_cnodeid_to_nasid[cnode] =
+ pxm_to_nasid(nid_to_pxm_map[cnode]);
numionodes = num_online_nodes();
scan_for_ionodes();
@@ -477,7 +483,8 @@ void __init sn_cpu_init(void)
cnode = nasid_to_cnodeid(nasid);
- pda->p_nodepda = nodepdaindr[cnode];
+ sn_nodepda = nodepdaindr[cnode];
+
pda->led_address =
(typeof(pda->led_address)) (LED0 + (slice << LED_CPU_SHIFT));
pda->led_state = LED_ALWAYS_SET;
@@ -486,15 +493,18 @@ void __init sn_cpu_init(void)
pda->idle_flag = 0;
if (cpuid != 0) {
- memcpy(pda->cnodeid_to_nasid_table,
- pdacpu(0)->cnodeid_to_nasid_table,
- sizeof(pda->cnodeid_to_nasid_table));
+ /* copy cpu 0's sn_cnodeid_to_nasid table to this cpu's */
+ memcpy(sn_cnodeid_to_nasid,
+ (&per_cpu(__sn_cnodeid_to_nasid, 0)),
+ sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid)));
}
/*
* Check for WARs.
* Only needs to be done once, on BSP.
- * Has to be done after loop above, because it uses pda.cnodeid_to_nasid_table[i].
+ * Has to be done after loop above, because it uses this cpu's
+ * sn_cnodeid_to_nasid table which was just initialized if this
+ * isn't cpu 0.
* Has to be done before assignment below.
*/
if (!wars_have_been_checked) {
@@ -580,8 +590,7 @@ static void __init scan_for_ionodes(void)
brd = find_lboard_any(brd, KLTYPE_SNIA);
while (brd) {
- pda->cnodeid_to_nasid_table[numionodes] =
- brd->brd_nasid;
+ sn_cnodeid_to_nasid[numionodes] = brd->brd_nasid;
physical_node_map[brd->brd_nasid] = numionodes;
root_lboard[numionodes] = brd;
numionodes++;
@@ -602,8 +611,7 @@ static void __init scan_for_ionodes(void)
root_lboard[nasid_to_cnodeid(nasid)],
KLTYPE_TIO);
while (brd) {
- pda->cnodeid_to_nasid_table[numionodes] =
- brd->brd_nasid;
+ sn_cnodeid_to_nasid[numionodes] = brd->brd_nasid;
physical_node_map[brd->brd_nasid] = numionodes;
root_lboard[numionodes] = brd;
numionodes++;
@@ -614,7 +622,6 @@ static void __init scan_for_ionodes(void)
brd = find_lboard_any(brd, KLTYPE_TIO);
}
}
-
}
int
@@ -623,7 +630,8 @@ nasid_slice_to_cpuid(int nasid, int slice)
long cpu;
for (cpu=0; cpu < NR_CPUS; cpu++)
- if (nodepda->phys_cpuid[cpu].nasid == nasid && nodepda->phys_cpuid[cpu].slice == slice)
+ if (cpuid_to_nasid(cpu) == nasid &&
+ cpuid_to_slice(cpu) == slice)
return cpu;
return -1;
diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c
index 66190d7e492..ab9b5f35c2a 100644
--- a/arch/ia64/sn/kernel/tiocx.c
+++ b/arch/ia64/sn/kernel/tiocx.c
@@ -21,6 +21,8 @@
#include <asm/sn/types.h>
#include <asm/sn/shubio.h>
#include <asm/sn/tiocx.h>
+#include <asm/sn/l1.h>
+#include <asm/sn/module.h>
#include "tio.h"
#include "xtalk/xwidgetdev.h"
#include "xtalk/hubdev.h"
@@ -308,14 +310,12 @@ void tiocx_irq_free(struct sn_irq_info *sn_irq_info)
}
}
-uint64_t
-tiocx_dma_addr(uint64_t addr)
+uint64_t tiocx_dma_addr(uint64_t addr)
{
return PHYS_TO_TIODMA(addr);
}
-uint64_t
-tiocx_swin_base(int nasid)
+uint64_t tiocx_swin_base(int nasid)
{
return TIO_SWIN_BASE(nasid, TIOCX_CORELET);
}
@@ -330,19 +330,6 @@ EXPORT_SYMBOL(tiocx_bus_type);
EXPORT_SYMBOL(tiocx_dma_addr);
EXPORT_SYMBOL(tiocx_swin_base);
-static uint64_t tiocx_get_hubdev_info(u64 handle, u64 address)
-{
-
- struct ia64_sal_retval ret_stuff;
- ret_stuff.status = 0;
- ret_stuff.v0 = 0;
-
- ia64_sal_oemcall_nolock(&ret_stuff,
- SN_SAL_IOIF_GET_HUBDEV_INFO,
- handle, address, 0, 0, 0, 0, 0);
- return ret_stuff.v0;
-}
-
static void tio_conveyor_set(nasid_t nasid, int enable_flag)
{
uint64_t ice_frz;
@@ -379,7 +366,29 @@ static void tio_corelet_reset(nasid_t nasid, int corelet)
udelay(2000);
}
-static int fpga_attached(nasid_t nasid)
+static int tiocx_btchar_get(int nasid)
+{
+ moduleid_t module_id;
+ geoid_t geoid;
+ int cnodeid;
+
+ cnodeid = nasid_to_cnodeid(nasid);
+ geoid = cnodeid_get_geoid(cnodeid);
+ module_id = geo_module(geoid);
+ return MODULE_GET_BTCHAR(module_id);
+}
+
+static int is_fpga_brick(int nasid)
+{
+ switch (tiocx_btchar_get(nasid)) {
+ case L1_BRICKTYPE_SA:
+ case L1_BRICKTYPE_ATHENA:
+ return 1;
+ }
+ return 0;
+}
+
+static int bitstream_loaded(nasid_t nasid)
{
uint64_t cx_credits;
@@ -396,7 +405,7 @@ static int tiocx_reload(struct cx_dev *cx_dev)
int mfg_num = CX_DEV_NONE;
nasid_t nasid = cx_dev->cx_id.nasid;
- if (fpga_attached(nasid)) {
+ if (bitstream_loaded(nasid)) {
uint64_t cx_id;
cx_id =
@@ -427,9 +436,10 @@ static ssize_t show_cxdev_control(struct device *dev, char *buf)
{
struct cx_dev *cx_dev = to_cx_dev(dev);
- return sprintf(buf, "0x%x 0x%x 0x%x\n",
+ return sprintf(buf, "0x%x 0x%x 0x%x %d\n",
cx_dev->cx_id.nasid,
- cx_dev->cx_id.part_num, cx_dev->cx_id.mfg_num);
+ cx_dev->cx_id.part_num, cx_dev->cx_id.mfg_num,
+ tiocx_btchar_get(cx_dev->cx_id.nasid));
}
static ssize_t store_cxdev_control(struct device *dev, const char *buf,
@@ -475,20 +485,14 @@ static int __init tiocx_init(void)
if ((nasid = cnodeid_to_nasid(cnodeid)) < 0)
break; /* No more nasids .. bail out of loop */
- if (nasid & 0x1) { /* TIO's are always odd */
+ if ((nasid & 0x1) && is_fpga_brick(nasid)) {
struct hubdev_info *hubdev;
- uint64_t status;
struct xwidget_info *widgetp;
DBG("Found TIO at nasid 0x%x\n", nasid);
hubdev =
(struct hubdev_info *)(NODEPDA(cnodeid)->pdinfo);
- status =
- tiocx_get_hubdev_info(nasid,
- (uint64_t) __pa(hubdev));
- if (status)
- continue;
widgetp = &hubdev->hdi_xwidget_info[TIOCX_CORELET];
diff --git a/arch/ia64/sn/kernel/xp_main.c b/arch/ia64/sn/kernel/xp_main.c
new file mode 100644
index 00000000000..3be52a34c80
--- /dev/null
+++ b/arch/ia64/sn/kernel/xp_main.c
@@ -0,0 +1,289 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+
+/*
+ * Cross Partition (XP) base.
+ *
+ * XP provides a base from which its users can interact
+ * with XPC, yet not be dependent on XPC.
+ *
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <asm/sn/intr.h>
+#include <asm/sn/sn_sal.h>
+#include <asm/sn/xp.h>
+
+
+/*
+ * Target of nofault PIO read.
+ */
+u64 xp_nofault_PIOR_target;
+
+
+/*
+ * xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level
+ * users of XPC.
+ */
+struct xpc_registration xpc_registrations[XPC_NCHANNELS];
+
+
+/*
+ * Initialize the XPC interface to indicate that XPC isn't loaded.
+ */
+static enum xpc_retval xpc_notloaded(void) { return xpcNotLoaded; }
+
+struct xpc_interface xpc_interface = {
+ (void (*)(int)) xpc_notloaded,
+ (void (*)(int)) xpc_notloaded,
+ (enum xpc_retval (*)(partid_t, int, u32, void **)) xpc_notloaded,
+ (enum xpc_retval (*)(partid_t, int, void *)) xpc_notloaded,
+ (enum xpc_retval (*)(partid_t, int, void *, xpc_notify_func, void *))
+ xpc_notloaded,
+ (void (*)(partid_t, int, void *)) xpc_notloaded,
+ (enum xpc_retval (*)(partid_t, void *)) xpc_notloaded
+};
+
+
+/*
+ * XPC calls this when it (the XPC module) has been loaded.
+ */
+void
+xpc_set_interface(void (*connect)(int),
+ void (*disconnect)(int),
+ enum xpc_retval (*allocate)(partid_t, int, u32, void **),
+ enum xpc_retval (*send)(partid_t, int, void *),
+ enum xpc_retval (*send_notify)(partid_t, int, void *,
+ xpc_notify_func, void *),
+ void (*received)(partid_t, int, void *),
+ enum xpc_retval (*partid_to_nasids)(partid_t, void *))
+{
+ xpc_interface.connect = connect;
+ xpc_interface.disconnect = disconnect;
+ xpc_interface.allocate = allocate;
+ xpc_interface.send = send;
+ xpc_interface.send_notify = send_notify;
+ xpc_interface.received = received;
+ xpc_interface.partid_to_nasids = partid_to_nasids;
+}
+
+
+/*
+ * XPC calls this when it (the XPC module) is being unloaded.
+ */
+void
+xpc_clear_interface(void)
+{
+ xpc_interface.connect = (void (*)(int)) xpc_notloaded;
+ xpc_interface.disconnect = (void (*)(int)) xpc_notloaded;
+ xpc_interface.allocate = (enum xpc_retval (*)(partid_t, int, u32,
+ void **)) xpc_notloaded;
+ xpc_interface.send = (enum xpc_retval (*)(partid_t, int, void *))
+ xpc_notloaded;
+ xpc_interface.send_notify = (enum xpc_retval (*)(partid_t, int, void *,
+ xpc_notify_func, void *)) xpc_notloaded;
+ xpc_interface.received = (void (*)(partid_t, int, void *))
+ xpc_notloaded;
+ xpc_interface.partid_to_nasids = (enum xpc_retval (*)(partid_t, void *))
+ xpc_notloaded;
+}
+
+
+/*
+ * Register for automatic establishment of a channel connection whenever
+ * a partition comes up.
+ *
+ * Arguments:
+ *
+ * ch_number - channel # to register for connection.
+ * func - function to call for asynchronous notification of channel
+ * state changes (i.e., connection, disconnection, error) and
+ * the arrival of incoming messages.
+ * key - pointer to optional user-defined value that gets passed back
+ * to the user on any callouts made to func.
+ * payload_size - size in bytes of the XPC message's payload area which
+ * contains a user-defined message. The user should make
+ * this large enough to hold their largest message.
+ * nentries - max #of XPC message entries a message queue can contain.
+ * The actual number, which is determined when a connection
+ * is established and may be less then requested, will be
+ * passed to the user via the xpcConnected callout.
+ * assigned_limit - max number of kthreads allowed to be processing
+ * messages (per connection) at any given instant.
+ * idle_limit - max number of kthreads allowed to be idle at any given
+ * instant.
+ */
+enum xpc_retval
+xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size,
+ u16 nentries, u32 assigned_limit, u32 idle_limit)
+{
+ struct xpc_registration *registration;
+
+
+ DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
+ DBUG_ON(payload_size == 0 || nentries == 0);
+ DBUG_ON(func == NULL);
+ DBUG_ON(assigned_limit == 0 || idle_limit > assigned_limit);
+
+ registration = &xpc_registrations[ch_number];
+
+ if (down_interruptible(&registration->sema) != 0) {
+ return xpcInterrupted;
+ }
+
+ /* if XPC_CHANNEL_REGISTERED(ch_number) */
+ if (registration->func != NULL) {
+ up(&registration->sema);
+ return xpcAlreadyRegistered;
+ }
+
+ /* register the channel for connection */
+ registration->msg_size = XPC_MSG_SIZE(payload_size);
+ registration->nentries = nentries;
+ registration->assigned_limit = assigned_limit;
+ registration->idle_limit = idle_limit;
+ registration->key = key;
+ registration->func = func;
+
+ up(&registration->sema);
+
+ xpc_interface.connect(ch_number);
+
+ return xpcSuccess;
+}
+
+
+/*
+ * Remove the registration for automatic connection of the specified channel
+ * when a partition comes up.
+ *
+ * Before returning this xpc_disconnect() will wait for all connections on the
+ * specified channel have been closed/torndown. So the caller can be assured
+ * that they will not be receiving any more callouts from XPC to their
+ * function registered via xpc_connect().
+ *
+ * Arguments:
+ *
+ * ch_number - channel # to unregister.
+ */
+void
+xpc_disconnect(int ch_number)
+{
+ struct xpc_registration *registration;
+
+
+ DBUG_ON(ch_number < 0 || ch_number >= XPC_NCHANNELS);
+
+ registration = &xpc_registrations[ch_number];
+
+ /*
+ * We've decided not to make this a down_interruptible(), since we
+ * figured XPC's users will just turn around and call xpc_disconnect()
+ * again anyways, so we might as well wait, if need be.
+ */
+ down(&registration->sema);
+
+ /* if !XPC_CHANNEL_REGISTERED(ch_number) */
+ if (registration->func == NULL) {
+ up(&registration->sema);
+ return;
+ }
+
+ /* remove the connection registration for the specified channel */
+ registration->func = NULL;
+ registration->key = NULL;
+ registration->nentries = 0;
+ registration->msg_size = 0;
+ registration->assigned_limit = 0;
+ registration->idle_limit = 0;
+
+ xpc_interface.disconnect(ch_number);
+
+ up(&registration->sema);
+
+ return;
+}
+
+
+int __init
+xp_init(void)
+{
+ int ret, ch_number;
+ u64 func_addr = *(u64 *) xp_nofault_PIOR;
+ u64 err_func_addr = *(u64 *) xp_error_PIOR;
+
+
+ if (!ia64_platform_is("sn2")) {
+ return -ENODEV;
+ }
+
+ /*
+ * Register a nofault code region which performs a cross-partition
+ * PIO read. If the PIO read times out, the MCA handler will consume
+ * the error and return to a kernel-provided instruction to indicate
+ * an error. This PIO read exists because it is guaranteed to timeout
+ * if the destination is down (AMO operations do not timeout on at
+ * least some CPUs on Shubs <= v1.2, which unfortunately we have to
+ * work around).
+ */
+ if ((ret = sn_register_nofault_code(func_addr, err_func_addr,
+ err_func_addr, 1, 1)) != 0) {
+ printk(KERN_ERR "XP: can't register nofault code, error=%d\n",
+ ret);
+ }
+ /*
+ * Setup the nofault PIO read target. (There is no special reason why
+ * SH_IPI_ACCESS was selected.)
+ */
+ if (is_shub2()) {
+ xp_nofault_PIOR_target = SH2_IPI_ACCESS0;
+ } else {
+ xp_nofault_PIOR_target = SH1_IPI_ACCESS;
+ }
+
+ /* initialize the connection registration semaphores */
+ for (ch_number = 0; ch_number < XPC_NCHANNELS; ch_number++) {
+ sema_init(&xpc_registrations[ch_number].sema, 1); /* mutex */
+ }
+
+ return 0;
+}
+module_init(xp_init);
+
+
+void __exit
+xp_exit(void)
+{
+ u64 func_addr = *(u64 *) xp_nofault_PIOR;
+ u64 err_func_addr = *(u64 *) xp_error_PIOR;
+
+
+ /* unregister the PIO read nofault code region */
+ (void) sn_register_nofault_code(func_addr, err_func_addr,
+ err_func_addr, 1, 0);
+}
+module_exit(xp_exit);
+
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_DESCRIPTION("Cross Partition (XP) base");
+MODULE_LICENSE("GPL");
+
+EXPORT_SYMBOL(xp_nofault_PIOR);
+EXPORT_SYMBOL(xp_nofault_PIOR_target);
+EXPORT_SYMBOL(xpc_registrations);
+EXPORT_SYMBOL(xpc_interface);
+EXPORT_SYMBOL(xpc_clear_interface);
+EXPORT_SYMBOL(xpc_set_interface);
+EXPORT_SYMBOL(xpc_connect);
+EXPORT_SYMBOL(xpc_disconnect);
+
diff --git a/arch/ia64/sn/kernel/xp_nofault.S b/arch/ia64/sn/kernel/xp_nofault.S
new file mode 100644
index 00000000000..b772543053c
--- /dev/null
+++ b/arch/ia64/sn/kernel/xp_nofault.S
@@ -0,0 +1,31 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+
+/*
+ * The xp_nofault_PIOR function takes a pointer to a remote PIO register
+ * and attempts to load and consume a value from it. This function
+ * will be registered as a nofault code block. In the event that the
+ * PIO read fails, the MCA handler will force the error to look
+ * corrected and vector to the xp_error_PIOR which will return an error.
+ *
+ * extern int xp_nofault_PIOR(void *remote_register);
+ */
+
+ .global xp_nofault_PIOR
+xp_nofault_PIOR:
+ mov r8=r0 // Stage a success return value
+ ld8.acq r9=[r32];; // PIO Read the specified register
+ adds r9=1,r9 // Add to force a consume
+ br.ret.sptk.many b0;; // Return success
+
+ .global xp_error_PIOR
+xp_error_PIOR:
+ mov r8=1 // Return value of 1
+ br.ret.sptk.many b0;; // Return failure
+
diff --git a/arch/ia64/sn/kernel/xpc.h b/arch/ia64/sn/kernel/xpc.h
new file mode 100644
index 00000000000..1a0aed8490d
--- /dev/null
+++ b/arch/ia64/sn/kernel/xpc.h
@@ -0,0 +1,991 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved.
+ */
+
+
+/*
+ * Cross Partition Communication (XPC) structures and macros.
+ */
+
+#ifndef _IA64_SN_KERNEL_XPC_H
+#define _IA64_SN_KERNEL_XPC_H
+
+
+#include <linux/config.h>
+#include <linux/interrupt.h>
+#include <linux/sysctl.h>
+#include <linux/device.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/sn/bte.h>
+#include <asm/sn/clksupport.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/mspec.h>
+#include <asm/sn/shub_mmr.h>
+#include <asm/sn/xp.h>
+
+
+/*
+ * XPC Version numbers consist of a major and minor number. XPC can always
+ * talk to versions with same major #, and never talk to versions with a
+ * different major #.
+ */
+#define _XPC_VERSION(_maj, _min) (((_maj) << 4) | ((_min) & 0xf))
+#define XPC_VERSION_MAJOR(_v) ((_v) >> 4)
+#define XPC_VERSION_MINOR(_v) ((_v) & 0xf)
+
+
+/*
+ * The next macros define word or bit representations for given
+ * C-brick nasid in either the SAL provided bit array representing
+ * nasids in the partition/machine or the AMO_t array used for
+ * inter-partition initiation communications.
+ *
+ * For SN2 machines, C-Bricks are alway even numbered NASIDs. As
+ * such, some space will be saved by insisting that nasid information
+ * passed from SAL always be packed for C-Bricks and the
+ * cross-partition interrupts use the same packing scheme.
+ */
+#define XPC_NASID_W_INDEX(_n) (((_n) / 64) / 2)
+#define XPC_NASID_B_INDEX(_n) (((_n) / 2) & (64 - 1))
+#define XPC_NASID_IN_ARRAY(_n, _p) ((_p)[XPC_NASID_W_INDEX(_n)] & \
+ (1UL << XPC_NASID_B_INDEX(_n)))
+#define XPC_NASID_FROM_W_B(_w, _b) (((_w) * 64 + (_b)) * 2)
+
+#define XPC_HB_DEFAULT_INTERVAL 5 /* incr HB every x secs */
+#define XPC_HB_CHECK_DEFAULT_TIMEOUT 20 /* check HB every x secs */
+
+/* define the process name of HB checker and the CPU it is pinned to */
+#define XPC_HB_CHECK_THREAD_NAME "xpc_hb"
+#define XPC_HB_CHECK_CPU 0
+
+/* define the process name of the discovery thread */
+#define XPC_DISCOVERY_THREAD_NAME "xpc_discovery"
+
+
+#define XPC_HB_ALLOWED(_p, _v) ((_v)->heartbeating_to_mask & (1UL << (_p)))
+#define XPC_ALLOW_HB(_p, _v) (_v)->heartbeating_to_mask |= (1UL << (_p))
+#define XPC_DISALLOW_HB(_p, _v) (_v)->heartbeating_to_mask &= (~(1UL << (_p)))
+
+
+/*
+ * Reserved Page provided by SAL.
+ *
+ * SAL provides one page per partition of reserved memory. When SAL
+ * initialization is complete, SAL_signature, SAL_version, partid,
+ * part_nasids, and mach_nasids are set.
+ *
+ * Note: Until vars_pa is set, the partition XPC code has not been initialized.
+ */
+struct xpc_rsvd_page {
+ u64 SAL_signature; /* SAL unique signature */
+ u64 SAL_version; /* SAL specified version */
+ u8 partid; /* partition ID from SAL */
+ u8 version;
+ u8 pad[6]; /* pad to u64 align */
+ u64 vars_pa;
+ u64 part_nasids[XP_NASID_MASK_WORDS] ____cacheline_aligned;
+ u64 mach_nasids[XP_NASID_MASK_WORDS] ____cacheline_aligned;
+};
+#define XPC_RP_VERSION _XPC_VERSION(1,0) /* version 1.0 of the reserved page */
+
+#define XPC_RSVD_PAGE_ALIGNED_SIZE \
+ (L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page)))
+
+
+/*
+ * Define the structures by which XPC variables can be exported to other
+ * partitions. (There are two: struct xpc_vars and struct xpc_vars_part)
+ */
+
+/*
+ * The following structure describes the partition generic variables
+ * needed by other partitions in order to properly initialize.
+ *
+ * struct xpc_vars version number also applies to struct xpc_vars_part.
+ * Changes to either structure and/or related functionality should be
+ * reflected by incrementing either the major or minor version numbers
+ * of struct xpc_vars.
+ */
+struct xpc_vars {
+ u8 version;
+ u64 heartbeat;
+ u64 heartbeating_to_mask;
+ u64 kdb_status; /* 0 = machine running */
+ int act_nasid;
+ int act_phys_cpuid;
+ u64 vars_part_pa;
+ u64 amos_page_pa; /* paddr of page of AMOs from MSPEC driver */
+ AMO_t *amos_page; /* vaddr of page of AMOs from MSPEC driver */
+ AMO_t *act_amos; /* pointer to the first activation AMO */
+};
+#define XPC_V_VERSION _XPC_VERSION(3,0) /* version 3.0 of the cross vars */
+
+#define XPC_VARS_ALIGNED_SIZE (L1_CACHE_ALIGN(sizeof(struct xpc_vars)))
+
+/*
+ * The following structure describes the per partition specific variables.
+ *
+ * An array of these structures, one per partition, will be defined. As a
+ * partition becomes active XPC will copy the array entry corresponding to
+ * itself from that partition. It is desirable that the size of this
+ * structure evenly divide into a cacheline, such that none of the entries
+ * in this array crosses a cacheline boundary. As it is now, each entry
+ * occupies half a cacheline.
+ */
+struct xpc_vars_part {
+ u64 magic;
+
+ u64 openclose_args_pa; /* physical address of open and close args */
+ u64 GPs_pa; /* physical address of Get/Put values */
+
+ u64 IPI_amo_pa; /* physical address of IPI AMO_t structure */
+ int IPI_nasid; /* nasid of where to send IPIs */
+ int IPI_phys_cpuid; /* physical CPU ID of where to send IPIs */
+
+ u8 nchannels; /* #of defined channels supported */
+
+ u8 reserved[23]; /* pad to a full 64 bytes */
+};
+
+/*
+ * The vars_part MAGIC numbers play a part in the first contact protocol.
+ *
+ * MAGIC1 indicates that the per partition specific variables for a remote
+ * partition have been initialized by this partition.
+ *
+ * MAGIC2 indicates that this partition has pulled the remote partititions
+ * per partition variables that pertain to this partition.
+ */
+#define XPC_VP_MAGIC1 0x0053524156435058L /* 'XPCVARS\0'L (little endian) */
+#define XPC_VP_MAGIC2 0x0073726176435058L /* 'XPCvars\0'L (little endian) */
+
+
+
+/*
+ * Functions registered by add_timer() or called by kernel_thread() only
+ * allow for a single 64-bit argument. The following macros can be used to
+ * pack and unpack two (32-bit, 16-bit or 8-bit) arguments into or out from
+ * the passed argument.
+ */
+#define XPC_PACK_ARGS(_arg1, _arg2) \
+ ((((u64) _arg1) & 0xffffffff) | \
+ ((((u64) _arg2) & 0xffffffff) << 32))
+
+#define XPC_UNPACK_ARG1(_args) (((u64) _args) & 0xffffffff)
+#define XPC_UNPACK_ARG2(_args) ((((u64) _args) >> 32) & 0xffffffff)
+
+
+
+/*
+ * Define a Get/Put value pair (pointers) used with a message queue.
+ */
+struct xpc_gp {
+ s64 get; /* Get value */
+ s64 put; /* Put value */
+};
+
+#define XPC_GP_SIZE \
+ L1_CACHE_ALIGN(sizeof(struct xpc_gp) * XPC_NCHANNELS)
+
+
+
+/*
+ * Define a structure that contains arguments associated with opening and
+ * closing a channel.
+ */
+struct xpc_openclose_args {
+ u16 reason; /* reason why channel is closing */
+ u16 msg_size; /* sizeof each message entry */
+ u16 remote_nentries; /* #of message entries in remote msg queue */
+ u16 local_nentries; /* #of message entries in local msg queue */
+ u64 local_msgqueue_pa; /* physical address of local message queue */
+};
+
+#define XPC_OPENCLOSE_ARGS_SIZE \
+ L1_CACHE_ALIGN(sizeof(struct xpc_openclose_args) * XPC_NCHANNELS)
+
+
+
+/* struct xpc_msg flags */
+
+#define XPC_M_DONE 0x01 /* msg has been received/consumed */
+#define XPC_M_READY 0x02 /* msg is ready to be sent */
+#define XPC_M_INTERRUPT 0x04 /* send interrupt when msg consumed */
+
+
+#define XPC_MSG_ADDRESS(_payload) \
+ ((struct xpc_msg *)((u8 *)(_payload) - XPC_MSG_PAYLOAD_OFFSET))
+
+
+
+/*
+ * Defines notify entry.
+ *
+ * This is used to notify a message's sender that their message was received
+ * and consumed by the intended recipient.
+ */
+struct xpc_notify {
+ struct semaphore sema; /* notify semaphore */
+ u8 type; /* type of notification */
+
+ /* the following two fields are only used if type == XPC_N_CALL */
+ xpc_notify_func func; /* user's notify function */
+ void *key; /* pointer to user's key */
+};
+
+/* struct xpc_notify type of notification */
+
+#define XPC_N_CALL 0x01 /* notify function provided by user */
+
+
+
+/*
+ * Define the structure that manages all the stuff required by a channel. In
+ * particular, they are used to manage the messages sent across the channel.
+ *
+ * This structure is private to a partition, and is NOT shared across the
+ * partition boundary.
+ *
+ * There is an array of these structures for each remote partition. It is
+ * allocated at the time a partition becomes active. The array contains one
+ * of these structures for each potential channel connection to that partition.
+ *
+ * Each of these structures manages two message queues (circular buffers).
+ * They are allocated at the time a channel connection is made. One of
+ * these message queues (local_msgqueue) holds the locally created messages
+ * that are destined for the remote partition. The other of these message
+ * queues (remote_msgqueue) is a locally cached copy of the remote partition's
+ * own local_msgqueue.
+ *
+ * The following is a description of the Get/Put pointers used to manage these
+ * two message queues. Consider the local_msgqueue to be on one partition
+ * and the remote_msgqueue to be its cached copy on another partition. A
+ * description of what each of the lettered areas contains is included.
+ *
+ *
+ * local_msgqueue remote_msgqueue
+ *
+ * |/////////| |/////////|
+ * w_remote_GP.get --> +---------+ |/////////|
+ * | F | |/////////|
+ * remote_GP.get --> +---------+ +---------+ <-- local_GP->get
+ * | | | |
+ * | | | E |
+ * | | | |
+ * | | +---------+ <-- w_local_GP.get
+ * | B | |/////////|
+ * | | |////D////|
+ * | | |/////////|
+ * | | +---------+ <-- w_remote_GP.put
+ * | | |////C////|
+ * local_GP->put --> +---------+ +---------+ <-- remote_GP.put
+ * | | |/////////|
+ * | A | |/////////|
+ * | | |/////////|
+ * w_local_GP.put --> +---------+ |/////////|
+ * |/////////| |/////////|
+ *
+ *
+ * ( remote_GP.[get|put] are cached copies of the remote
+ * partition's local_GP->[get|put], and thus their values can
+ * lag behind their counterparts on the