aboutsummaryrefslogtreecommitdiff
path: root/arch/ia64
diff options
context:
space:
mode:
authorTony Luck <tony.luck@intel.com>2005-10-28 15:27:03 -0700
committerTony Luck <tony.luck@intel.com>2005-10-28 15:27:03 -0700
commitfac84ef26759a3725bfc53ae3abf21976360aff3 (patch)
tree94aa9362e72a0736948adc78cb43ebf44f59580b /arch/ia64
parentd73dee6ee4b554074f88e4ebd956ea4db8552d52 (diff)
parent279290294662d4a35d209fb7d7c46523cfa3d63d (diff)
Pull xpc-disengage into release branch
Diffstat (limited to 'arch/ia64')
-rw-r--r--arch/ia64/sn/kernel/xpc.h366
-rw-r--r--arch/ia64/sn/kernel/xpc_channel.c329
-rw-r--r--arch/ia64/sn/kernel/xpc_main.c330
-rw-r--r--arch/ia64/sn/kernel/xpc_partition.c473
4 files changed, 1130 insertions, 368 deletions
diff --git a/arch/ia64/sn/kernel/xpc.h b/arch/ia64/sn/kernel/xpc.h
index d0ee635daf2..33df1b3758b 100644
--- a/arch/ia64/sn/kernel/xpc.h
+++ b/arch/ia64/sn/kernel/xpc.h
@@ -57,7 +57,7 @@
#define XPC_NASID_FROM_W_B(_w, _b) (((_w) * 64 + (_b)) * 2)
#define XPC_HB_DEFAULT_INTERVAL 5 /* incr HB every x secs */
-#define XPC_HB_CHECK_DEFAULT_TIMEOUT 20 /* check HB every x secs */
+#define XPC_HB_CHECK_DEFAULT_INTERVAL 20 /* check HB every x secs */
/* define the process name of HB checker and the CPU it is pinned to */
#define XPC_HB_CHECK_THREAD_NAME "xpc_hb"
@@ -67,34 +67,82 @@
#define XPC_DISCOVERY_THREAD_NAME "xpc_discovery"
-#define XPC_HB_ALLOWED(_p, _v) ((_v)->heartbeating_to_mask & (1UL << (_p)))
-#define XPC_ALLOW_HB(_p, _v) (_v)->heartbeating_to_mask |= (1UL << (_p))
-#define XPC_DISALLOW_HB(_p, _v) (_v)->heartbeating_to_mask &= (~(1UL << (_p)))
-
-
/*
- * Reserved Page provided by SAL.
+ * the reserved page
+ *
+ * SAL reserves one page of memory per partition for XPC. Though a full page
+ * in length (16384 bytes), its starting address is not page aligned, but it
+ * is cacheline aligned. The reserved page consists of the following:
+ *
+ * reserved page header
+ *
+ * The first cacheline of the reserved page contains the header
+ * (struct xpc_rsvd_page). Before SAL initialization has completed,
+ * SAL has set up the following fields of the reserved page header:
+ * SAL_signature, SAL_version, partid, and nasids_size. The other
+ * fields are set up by XPC. (xpc_rsvd_page points to the local
+ * partition's reserved page.)
*
- * SAL provides one page per partition of reserved memory. When SAL
- * initialization is complete, SAL_signature, SAL_version, partid,
- * part_nasids, and mach_nasids are set.
+ * part_nasids mask
+ * mach_nasids mask
+ *
+ * SAL also sets up two bitmaps (or masks), one that reflects the actual
+ * nasids in this partition (part_nasids), and the other that reflects
+ * the actual nasids in the entire machine (mach_nasids). We're only
+ * interested in the even numbered nasids (which contain the processors
+ * and/or memory), so we only need half as many bits to represent the
+ * nasids. The part_nasids mask is located starting at the first cacheline
+ * following the reserved page header. The mach_nasids mask follows right
+ * after the part_nasids mask. The size in bytes of each mask is reflected
+ * by the reserved page header field 'nasids_size'. (Local partition's
+ * mask pointers are xpc_part_nasids and xpc_mach_nasids.)
+ *
+ * vars
+ * vars part
+ *
+ * Immediately following the mach_nasids mask are the XPC variables
+ * required by other partitions. First are those that are generic to all
+ * partitions (vars), followed on the next available cacheline by those
+ * which are partition specific (vars part). These are setup by XPC.
+ * (Local partition's vars pointers are xpc_vars and xpc_vars_part.)
*
* Note: Until vars_pa is set, the partition XPC code has not been initialized.
*/
struct xpc_rsvd_page {
- u64 SAL_signature; /* SAL unique signature */
- u64 SAL_version; /* SAL specified version */
- u8 partid; /* partition ID from SAL */
+ u64 SAL_signature; /* SAL: unique signature */
+ u64 SAL_version; /* SAL: version */
+ u8 partid; /* SAL: partition ID */
u8 version;
- u8 pad[6]; /* pad to u64 align */
+ u8 pad1[6]; /* align to next u64 in cacheline */
volatile u64 vars_pa;
- u64 part_nasids[XP_NASID_MASK_WORDS] ____cacheline_aligned;
- u64 mach_nasids[XP_NASID_MASK_WORDS] ____cacheline_aligned;
+ struct timespec stamp; /* time when reserved page was setup by XPC */
+ u64 pad2[9]; /* align to last u64 in cacheline */
+ u64 nasids_size; /* SAL: size of each nasid mask in bytes */
};
-#define XPC_RP_VERSION _XPC_VERSION(1,0) /* version 1.0 of the reserved page */
-#define XPC_RSVD_PAGE_ALIGNED_SIZE \
- (L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page)))
+#define XPC_RP_VERSION _XPC_VERSION(1,1) /* version 1.1 of the reserved page */
+
+#define XPC_SUPPORTS_RP_STAMP(_version) \
+ (_version >= _XPC_VERSION(1,1))
+
+/*
+ * compare stamps - the return value is:
+ *
+ * < 0, if stamp1 < stamp2
+ * = 0, if stamp1 == stamp2
+ * > 0, if stamp1 > stamp2
+ */
+static inline int
+xpc_compare_stamps(struct timespec *stamp1, struct timespec *stamp2)
+{
+ int ret;
+
+
+ if ((ret = stamp1->tv_sec - stamp2->tv_sec) == 0) {
+ ret = stamp1->tv_nsec - stamp2->tv_nsec;
+ }
+ return ret;
+}
/*
@@ -121,11 +169,58 @@ struct xpc_vars {
u64 vars_part_pa;
u64 amos_page_pa; /* paddr of page of AMOs from MSPEC driver */
AMO_t *amos_page; /* vaddr of page of AMOs from MSPEC driver */
- AMO_t *act_amos; /* pointer to the first activation AMO */
};
-#define XPC_V_VERSION _XPC_VERSION(3,0) /* version 3.0 of the cross vars */
-#define XPC_VARS_ALIGNED_SIZE (L1_CACHE_ALIGN(sizeof(struct xpc_vars)))
+#define XPC_V_VERSION _XPC_VERSION(3,1) /* version 3.1 of the cross vars */
+
+#define XPC_SUPPORTS_DISENGAGE_REQUEST(_version) \
+ (_version >= _XPC_VERSION(3,1))
+
+
+static inline int
+xpc_hb_allowed(partid_t partid, struct xpc_vars *vars)
+{
+ return ((vars->heartbeating_to_mask & (1UL << partid)) != 0);
+}
+
+static inline void
+xpc_allow_hb(partid_t partid, struct xpc_vars *vars)
+{
+ u64 old_mask, new_mask;
+
+ do {
+ old_mask = vars->heartbeating_to_mask;
+ new_mask = (old_mask | (1UL << partid));
+ } while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) !=
+ old_mask);
+}
+
+static inline void
+xpc_disallow_hb(partid_t partid, struct xpc_vars *vars)
+{
+ u64 old_mask, new_mask;
+
+ do {
+ old_mask = vars->heartbeating_to_mask;
+ new_mask = (old_mask & ~(1UL << partid));
+ } while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) !=
+ old_mask);
+}
+
+
+/*
+ * The AMOs page consists of a number of AMO variables which are divided into
+ * four groups, The first two groups are used to identify an IRQ's sender.
+ * These two groups consist of 64 and 128 AMO variables respectively. The last
+ * two groups, consisting of just one AMO variable each, are used to identify
+ * the remote partitions that are currently engaged (from the viewpoint of
+ * the XPC running on the remote partition).
+ */
+#define XPC_NOTIFY_IRQ_AMOS 0
+#define XPC_ACTIVATE_IRQ_AMOS (XPC_NOTIFY_IRQ_AMOS + XP_MAX_PARTITIONS)
+#define XPC_ENGAGED_PARTITIONS_AMO (XPC_ACTIVATE_IRQ_AMOS + XP_NASID_MASK_WORDS)
+#define XPC_DISENGAGE_REQUEST_AMO (XPC_ENGAGED_PARTITIONS_AMO + 1)
+
/*
* The following structure describes the per partition specific variables.
@@ -165,6 +260,16 @@ struct xpc_vars_part {
#define XPC_VP_MAGIC2 0x0073726176435058L /* 'XPCvars\0'L (little endian) */
+/* the reserved page sizes and offsets */
+
+#define XPC_RP_HEADER_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page))
+#define XPC_RP_VARS_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_vars))
+
+#define XPC_RP_PART_NASIDS(_rp) (u64 *) ((u8 *) _rp + XPC_RP_HEADER_SIZE)
+#define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + xp_nasid_mask_words)
+#define XPC_RP_VARS(_rp) ((struct xpc_vars *) XPC_RP_MACH_NASIDS(_rp) + xp_nasid_mask_words)
+#define XPC_RP_VARS_PART(_rp) (struct xpc_vars_part *) ((u8 *) XPC_RP_VARS(rp) + XPC_RP_VARS_SIZE)
+
/*
* Functions registered by add_timer() or called by kernel_thread() only
@@ -349,6 +454,9 @@ struct xpc_channel {
atomic_t n_on_msg_allocate_wq; /* #on msg allocation wait queue */
wait_queue_head_t msg_allocate_wq; /* msg allocation wait queue */
+ u8 delayed_IPI_flags; /* IPI flags received, but delayed */
+ /* action until channel disconnected */
+
/* queue of msg senders who want to be notified when msg received */
atomic_t n_to_notify; /* #of msg senders to notify */
@@ -358,7 +466,7 @@ struct xpc_channel {
void *key; /* pointer to user's key */
struct semaphore msg_to_pull_sema; /* next msg to pull serialization */
- struct semaphore teardown_sema; /* wait for teardown completion */
+ struct semaphore wdisconnect_sema; /* wait for channel disconnect */
struct xpc_openclose_args *local_openclose_args; /* args passed on */
/* opening or closing of channel */
@@ -410,6 +518,8 @@ struct xpc_channel {
#define XPC_C_DISCONNECTED 0x00002000 /* channel is disconnected */
#define XPC_C_DISCONNECTING 0x00004000 /* channel is being disconnected */
+#define XPC_C_DISCONNECTCALLOUT 0x00008000 /* chan disconnected callout made */
+#define XPC_C_WDISCONNECT 0x00010000 /* waiting for channel disconnect */
@@ -422,6 +532,8 @@ struct xpc_partition {
/* XPC HB infrastructure */
+ u8 remote_rp_version; /* version# of partition's rsvd pg */
+ struct timespec remote_rp_stamp;/* time when rsvd pg was initialized */
u64 remote_rp_pa; /* phys addr of partition's rsvd pg */
u64 remote_vars_pa; /* phys addr of partition's vars */
u64 remote_vars_part_pa; /* phys addr of partition's vars part */
@@ -432,14 +544,18 @@ struct xpc_partition {
u32 act_IRQ_rcvd; /* IRQs since activation */
spinlock_t act_lock; /* protect updating of act_state */
u8 act_state; /* from XPC HB viewpoint */
+ u8 remote_vars_version; /* version# of partition's vars */
enum xpc_retval reason; /* reason partition is deactivating */
int reason_line; /* line# deactivation initiated from */
int reactivate_nasid; /* nasid in partition to reactivate */
+ unsigned long disengage_request_timeout; /* timeout in jiffies */
+ struct timer_list disengage_request_timer;
+
/* XPC infrastructure referencing and teardown control */
- volatile u8 setup_state; /* infrastructure setup state */
+ volatile u8 setup_state; /* infrastructure setup state */
wait_queue_head_t teardown_wq; /* kthread waiting to teardown infra */
atomic_t references; /* #of references to infrastructure */
@@ -454,6 +570,7 @@ struct xpc_partition {
u8 nchannels; /* #of defined channels supported */
atomic_t nchannels_active; /* #of channels that are not DISCONNECTED */
+ atomic_t nchannels_engaged;/* #of channels engaged with remote part */
struct xpc_channel *channels;/* array of channel structures */
void *local_GPs_base; /* base address of kmalloc'd space */
@@ -518,6 +635,7 @@ struct xpc_partition {
#define XPC_P_TORNDOWN 0x03 /* infrastructure is torndown */
+
/*
* struct xpc_partition IPI_timer #of seconds to wait before checking for
* dropped IPIs. These occur whenever an IPI amo write doesn't complete until
@@ -526,6 +644,13 @@ struct xpc_partition {
#define XPC_P_DROPPED_IPI_WAIT (0.25 * HZ)
+/* number of seconds to wait for other partitions to disengage */
+#define XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT 90
+
+/* interval in seconds to print 'waiting disengagement' messages */
+#define XPC_DISENGAGE_PRINTMSG_INTERVAL 10
+
+
#define XPC_PARTID(_p) ((partid_t) ((_p) - &xpc_partitions[0]))
@@ -534,24 +659,20 @@ struct xpc_partition {
extern struct xpc_registration xpc_registrations[];
-/* >>> found in xpc_main.c only */
+/* found in xpc_main.c */
extern struct device *xpc_part;
extern struct device *xpc_chan;
+extern int xpc_disengage_request_timelimit;
extern irqreturn_t xpc_notify_IRQ_handler(int, void *, struct pt_regs *);
extern void xpc_dropped_IPI_check(struct xpc_partition *);
+extern void xpc_activate_partition(struct xpc_partition *);
extern void xpc_activate_kthreads(struct xpc_channel *, int);
extern void xpc_create_kthreads(struct xpc_channel *, int);
extern void xpc_disconnect_wait(int);
-/* found in xpc_main.c and efi-xpc.c */
-extern void xpc_activate_partition(struct xpc_partition *);
-
-
/* found in xpc_partition.c */
extern int xpc_exiting;
-extern int xpc_hb_interval;
-extern int xpc_hb_check_interval;
extern struct xpc_vars *xpc_vars;
extern struct xpc_rsvd_page *xpc_rsvd_page;
extern struct xpc_vars_part *xpc_vars_part;
@@ -561,6 +682,7 @@ extern struct xpc_rsvd_page *xpc_rsvd_page_init(void);
extern void xpc_allow_IPI_ops(void);
extern void xpc_restrict_IPI_ops(void);
extern int xpc_identify_act_IRQ_sender(void);
+extern int xpc_partition_disengaged(struct xpc_partition *);
extern enum xpc_retval xpc_mark_partition_active(struct xpc_partition *);
extern void xpc_mark_partition_inactive(struct xpc_partition *);
extern void xpc_discovery(void);
@@ -585,8 +707,8 @@ extern void xpc_connected_callout(struct xpc_channel *);
extern void xpc_deliver_msg(struct xpc_channel *);
extern void xpc_disconnect_channel(const int, struct xpc_channel *,
enum xpc_retval, unsigned long *);
-extern void xpc_disconnected_callout(struct xpc_channel *);
-extern void xpc_partition_down(struct xpc_partition *, enum xpc_retval);
+extern void xpc_disconnecting_callout(struct xpc_channel *);
+extern void xpc_partition_going_down(struct xpc_partition *, enum xpc_retval);
extern void xpc_teardown_infrastructure(struct xpc_partition *);
@@ -674,6 +796,157 @@ xpc_part_ref(struct xpc_partition *part)
/*
+ * This next set of inlines are used to keep track of when a partition is
+ * potentially engaged in accessing memory belonging to another partition.
+ */
+
+static inline void
+xpc_mark_partition_engaged(struct xpc_partition *part)
+{
+ unsigned long irq_flags;
+ AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
+ (XPC_ENGAGED_PARTITIONS_AMO * sizeof(AMO_t)));
+
+
+ local_irq_save(irq_flags);
+
+ /* set bit corresponding to our partid in remote partition's AMO */
+ FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_OR,
+ (1UL << sn_partition_id));
+ /*
+ * We must always use the nofault function regardless of whether we
+ * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+ * didn't, we'd never know that the other partition is down and would
+ * keep sending IPIs and AMOs to it until the heartbeat times out.
+ */
+ (void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
+ variable), xp_nofault_PIOR_target));
+
+ local_irq_restore(irq_flags);
+}
+
+static inline void
+xpc_mark_partition_disengaged(struct xpc_partition *part)
+{
+ unsigned long irq_flags;
+ AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
+ (XPC_ENGAGED_PARTITIONS_AMO * sizeof(AMO_t)));
+
+
+ local_irq_save(irq_flags);
+
+ /* clear bit corresponding to our partid in remote partition's AMO */
+ FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
+ ~(1UL << sn_partition_id));
+ /*
+ * We must always use the nofault function regardless of whether we
+ * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+ * didn't, we'd never know that the other partition is down and would
+ * keep sending IPIs and AMOs to it until the heartbeat times out.
+ */
+ (void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
+ variable), xp_nofault_PIOR_target));
+
+ local_irq_restore(irq_flags);
+}
+
+static inline void
+xpc_request_partition_disengage(struct xpc_partition *part)
+{
+ unsigned long irq_flags;
+ AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
+ (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
+
+
+ local_irq_save(irq_flags);
+
+ /* set bit corresponding to our partid in remote partition's AMO */
+ FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_OR,
+ (1UL << sn_partition_id));
+ /*
+ * We must always use the nofault function regardless of whether we
+ * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+ * didn't, we'd never know that the other partition is down and would
+ * keep sending IPIs and AMOs to it until the heartbeat times out.
+ */
+ (void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
+ variable), xp_nofault_PIOR_target));
+
+ local_irq_restore(irq_flags);
+}
+
+static inline void
+xpc_cancel_partition_disengage_request(struct xpc_partition *part)
+{
+ unsigned long irq_flags;
+ AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
+ (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
+
+
+ local_irq_save(irq_flags);
+
+ /* clear bit corresponding to our partid in remote partition's AMO */
+ FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
+ ~(1UL << sn_partition_id));
+ /*
+ * We must always use the nofault function regardless of whether we
+ * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
+ * didn't, we'd never know that the other partition is down and would
+ * keep sending IPIs and AMOs to it until the heartbeat times out.
+ */
+ (void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
+ variable), xp_nofault_PIOR_target));
+
+ local_irq_restore(irq_flags);
+}
+
+static inline u64
+xpc_partition_engaged(u64 partid_mask)
+{
+ AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
+
+
+ /* return our partition's AMO variable ANDed with partid_mask */
+ return (FETCHOP_LOAD_OP(TO_AMO((u64) &amo->variable), FETCHOP_LOAD) &
+ partid_mask);
+}
+
+static inline u64
+xpc_partition_disengage_requested(u64 partid_mask)
+{
+ AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
+
+
+ /* return our partition's AMO variable ANDed with partid_mask */
+ return (FETCHOP_LOAD_OP(TO_AMO((u64) &amo->variable), FETCHOP_LOAD) &
+ partid_mask);
+}
+
+static inline void
+xpc_clear_partition_engaged(u64 partid_mask)
+{
+ AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
+
+
+ /* clear bit(s) based on partid_mask in our partition's AMO */
+ FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
+ ~partid_mask);
+}
+
+static inline void
+xpc_clear_partition_disengage_request(u64 partid_mask)
+{
+ AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
+
+
+ /* clear bit(s) based on partid_mask in our partition's AMO */
+ FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
+ ~partid_mask);
+}
+
+
+
+/*
* The following set of macros and inlines are used for the sending and
* receiving of IPIs (also known as IRQs). There are two flavors of IPIs,
* one that is associated with partition activity (SGI_XPC_ACTIVATE) and
@@ -722,13 +995,13 @@ xpc_IPI_send(AMO_t *amo, u64 flag, int nasid, int phys_cpuid, int vector)
* Flag the appropriate AMO variable and send an IPI to the specified node.
*/
static inline void
-xpc_activate_IRQ_send(u64 amos_page, int from_nasid, int to_nasid,
+xpc_activate_IRQ_send(u64 amos_page_pa, int from_nasid, int to_nasid,
int to_phys_cpuid)
{
int w_index = XPC_NASID_W_INDEX(from_nasid);
int b_index = XPC_NASID_B_INDEX(from_nasid);
- AMO_t *amos = (AMO_t *) __va(amos_page +
- (XP_MAX_PARTITIONS * sizeof(AMO_t)));
+ AMO_t *amos = (AMO_t *) __va(amos_page_pa +
+ (XPC_ACTIVATE_IRQ_AMOS * sizeof(AMO_t)));
(void) xpc_IPI_send(&amos[w_index], (1UL << b_index), to_nasid,
@@ -756,6 +1029,13 @@ xpc_IPI_send_reactivate(struct xpc_partition *part)
xpc_vars->act_nasid, xpc_vars->act_phys_cpuid);
}
+static inline void
+xpc_IPI_send_disengage(struct xpc_partition *part)
+{
+ xpc_activate_IRQ_send(part->remote_amos_page_pa, cnodeid_to_nasid(0),
+ part->remote_act_nasid, part->remote_act_phys_cpuid);
+}
+
/*
* IPIs associated with SGI_XPC_NOTIFY IRQ.
@@ -836,6 +1116,7 @@ xpc_notify_IRQ_send_local(struct xpc_channel *ch, u8 ipi_flag,
/* given an AMO variable and a channel#, get its associated IPI flags */
#define XPC_GET_IPI_FLAGS(_amo, _c) ((u8) (((_amo) >> ((_c) * 8)) & 0xff))
+#define XPC_SET_IPI_FLAGS(_amo, _c, _f) (_amo) |= ((u64) (_f) << ((_c) * 8))
#define XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(_amo) ((_amo) & 0x0f0f0f0f0f0f0f0f)
#define XPC_ANY_MSG_IPI_FLAGS_SET(_amo) ((_amo) & 0x1010101010101010)
@@ -903,17 +1184,18 @@ xpc_IPI_send_local_msgrequest(struct xpc_channel *ch)
* cacheable mapping for the entire region. This will prevent speculative
* reading of cached copies of our lines from being issued which will cause
* a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64
- * (XP_MAX_PARTITIONS) AMO variables for message notification (xpc_main.c)
- * and an additional 16 AMO variables for partition activation (xpc_hb.c).
+ * AMO variables (based on XP_MAX_PARTITIONS) for message notification and an
+ * additional 128 AMO variables (based on XP_NASID_MASK_WORDS) for partition
+ * activation and 2 AMO variables for partition deactivation.
*/
static inline AMO_t *
-xpc_IPI_init(partid_t partid)
+xpc_IPI_init(int index)
{
- AMO_t *part_amo = xpc_vars->amos_page + partid;
+ AMO_t *amo = xpc_vars->amos_page + index;
- xpc_IPI_receive(part_amo);
- return part_amo;
+ (void) xpc_IPI_receive(amo); /* clear AMO variable */
+ return amo;
}
diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c
index 94698bea7be..abf4fc2a87b 100644
--- a/arch/ia64/sn/kernel/xpc_channel.c
+++ b/arch/ia64/sn/kernel/xpc_channel.c
@@ -57,6 +57,7 @@ xpc_initialize_channels(struct xpc_partition *part, partid_t partid)
spin_lock_init(&ch->lock);
sema_init(&ch->msg_to_pull_sema, 1); /* mutex */
+ sema_init(&ch->wdisconnect_sema, 0); /* event wait */
atomic_set(&ch->n_on_msg_allocate_wq, 0);
init_waitqueue_head(&ch->msg_allocate_wq);
@@ -166,6 +167,7 @@ xpc_setup_infrastructure(struct xpc_partition *part)
xpc_initialize_channels(part, partid);
atomic_set(&part->nchannels_active, 0);
+ atomic_set(&part->nchannels_engaged, 0);
/* local_IPI_amo were set to 0 by an earlier memset() */
@@ -555,8 +557,6 @@ xpc_allocate_msgqueues(struct xpc_channel *ch)
sema_init(&ch->notify_queue[i].sema, 0);
}
- sema_init(&ch->teardown_sema, 0); /* event wait */
-
spin_lock_irqsave(&ch->lock, irq_flags);
ch->flags |= XPC_C_SETUP;
spin_unlock_irqrestore(&ch->lock, irq_flags);
@@ -626,6 +626,55 @@ xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags)
/*
+ * Notify those who wanted to be notified upon delivery of their message.
+ */
+static void
+xpc_notify_senders(struct xpc_channel *ch, enum xpc_retval reason, s64 put)
+{
+ struct xpc_notify *notify;
+ u8 notify_type;
+ s64 get = ch->w_remote_GP.get - 1;
+
+
+ while (++get < put && atomic_read(&ch->n_to_notify) > 0) {
+
+ notify = &ch->notify_queue[get % ch->local_nentries];
+
+ /*
+ * See if the notify entry indicates it was associated with
+ * a message who's sender wants to be notified. It is possible
+ * that it is, but someone else is doing or has done the
+ * notification.
+ */
+ notify_type = notify->type;
+ if (notify_type == 0 ||
+ cmpxchg(&notify->type, notify_type, 0) !=
+ notify_type) {
+ continue;
+ }
+
+ DBUG_ON(notify_type != XPC_N_CALL);
+
+ atomic_dec(&ch->n_to_notify);
+
+ if (notify->func != NULL) {
+ dev_dbg(xpc_chan, "notify->func() called, notify=0x%p, "
+ "msg_number=%ld, partid=%d, channel=%d\n",
+ (void *) notify, get, ch->partid, ch->number);
+
+ notify->func(reason, ch->partid, ch->number,
+ notify->key);
+
+ dev_dbg(xpc_chan, "notify->func() returned, "
+ "notify=0x%p, msg_number=%ld, partid=%d, "
+ "channel=%d\n", (void *) notify, get,
+ ch->partid, ch->number);
+ }
+ }
+}
+
+
+/*
* Free up message queues and other stuff that were allocated for the specified
* channel.
*
@@ -669,9 +718,6 @@ xpc_free_msgqueues(struct xpc_channel *ch)
ch->remote_msgqueue = NULL;
kfree(ch->notify_queue);
ch->notify_queue = NULL;
-
- /* in case someone is waiting for the teardown to complete */
- up(&ch->teardown_sema);
}
}
@@ -683,7 +729,7 @@ static void
xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
{
struct xpc_partition *part = &xpc_partitions[ch->partid];
- u32 ch_flags = ch->flags;
+ u32 channel_was_connected = (ch->flags & XPC_C_WASCONNECTED);
DBUG_ON(!spin_is_locked(&ch->lock));
@@ -701,12 +747,13 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
}
DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0);
- /* it's now safe to free the channel's message queues */
-
- xpc_free_msgqueues(ch);
- DBUG_ON(ch->flags & XPC_C_SETUP);
+ if (part->act_state == XPC_P_DEACTIVATING) {
+ /* can't proceed until the other side disengages from us */
+ if (xpc_partition_engaged(1UL << ch->partid)) {
+ return;
+ }
- if (part->act_state != XPC_P_DEACTIVATING) {
+ } else {
/* as long as the other side is up do the full protocol */
@@ -724,16 +771,42 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
}
}
+ /* wake those waiting for notify completion */
+ if (atomic_read(&ch->n_to_notify) > 0) {
+ /* >>> we do callout while holding ch->lock */
+ xpc_notify_senders(ch, ch->reason, ch->w_local_GP.put);
+ }
+
/* both sides are disconnected now */
- ch->flags = XPC_C_DISCONNECTED; /* clear all flags, but this one */
+ /* it's now safe to free the channel's message queues */
+ xpc_free_msgqueues(ch);
+
+ /* mark disconnected, clear all other flags except XPC_C_WDISCONNECT */
+ ch->flags = (XPC_C_DISCONNECTED | (ch->flags & XPC_C_WDISCONNECT));
atomic_dec(&part->nchannels_active);
- if (ch_flags & XPC_C_WASCONNECTED) {
+ if (channel_was_connected) {
dev_info(xpc_chan, "channel %d to partition %d disconnected, "
"reason=%d\n", ch->number, ch->partid, ch->reason);
}
+
+ if (ch->flags & XPC_C_WDISCONNECT) {
+ spin_unlock_irqrestore(&ch->lock, *irq_flags);
+ up(&ch->wdisconnect_sema);
+ spin_lock_irqsave(&ch->lock, *irq_flags);
+
+ } else if (ch->delayed_IPI_flags) {
+ if (part->act_state != XPC_P_DEACTIVATING) {
+ /* time to take action on any delayed IPI flags */
+ spin_lock(&part->IPI_lock);
+ XPC_SET_IPI_FLAGS(part->local_IPI_amo, ch->number,
+ ch->delayed_IPI_flags);
+ spin_unlock(&part->IPI_lock);
+ }
+ ch->delayed_IPI_flags = 0;
+ }
}
@@ -754,6 +827,19 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
spin_lock_irqsave(&ch->lock, irq_flags);
+again:
+
+ if ((ch->flags & XPC_C_DISCONNECTED) &&
+ (ch->flags & XPC_C_WDISCONNECT)) {
+ /*
+ * Delay processing IPI flags until thread waiting disconnect
+ * has had a chance to see that the channel is disconnected.
+ */
+ ch->delayed_IPI_flags |= IPI_flags;
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ return;
+ }
+
if (IPI_flags & XPC_IPI_CLOSEREQUEST) {
@@ -764,7 +850,7 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
/*
* If RCLOSEREQUEST is set, we're probably waiting for
* RCLOSEREPLY. We should find it and a ROPENREQUEST packed
- * with this RCLOSEQREUQEST in the IPI_flags.
+ * with this RCLOSEREQUEST in the IPI_flags.
*/
if (ch->flags & XPC_C_RCLOSEREQUEST) {
@@ -779,14 +865,22 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
/* both sides have finished disconnecting */
xpc_process_disconnect(ch, &irq_flags);
+ DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
+ goto again;
}
if (ch->flags & XPC_C_DISCONNECTED) {
- // >>> explain this section
-
if (!(IPI_flags & XPC_IPI_OPENREQUEST)) {
- DBUG_ON(part->act_state !=
- XPC_P_DEACTIVATING);
+ if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo,
+ ch_number) & XPC_IPI_OPENREQUEST)) {
+
+ DBUG_ON(ch->delayed_IPI_flags != 0);
+ spin_lock(&part->IPI_lock);
+ XPC_SET_IPI_FLAGS(part->local_IPI_amo,
+ ch_number,
+ XPC_IPI_CLOSEREQUEST);
+ spin_unlock(&part->IPI_lock);
+ }
spin_unlock_irqrestore(&ch->lock, irq_flags);
return;
}
@@ -816,9 +910,13 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
}
XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags);
- } else {
- xpc_process_disconnect(ch, &irq_flags);
+
+ DBUG_ON(IPI_flags & XPC_IPI_CLOSEREPLY);
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ return;
}
+
+ xpc_process_disconnect(ch, &irq_flags);
}
@@ -834,7 +932,20 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
}
DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
- DBUG_ON(!(ch->flags & XPC_C_RCLOSEREQUEST));
+
+ if (!(ch->flags & XPC_C_RCLOSEREQUEST)) {
+ if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo, ch_number)
+ & XPC_IPI_CLOSEREQUEST)) {
+
+ DBUG_ON(ch->delayed_IPI_flags != 0);
+ spin_lock(&part->IPI_lock);
+ XPC_SET_IPI_FLAGS(part->local_IPI_amo,
+ ch_number, XPC_IPI_CLOSEREPLY);
+ spin_unlock(&part->IPI_lock);
+ }
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ return;
+ }
ch->flags |= XPC_C_RCLOSEREPLY;
@@ -852,8 +963,14 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
"channel=%d\n", args->msg_size, args->local_nentries,
ch->partid, ch->number);
- if ((ch->flags & XPC_C_DISCONNECTING) ||
- part->act_state == XPC_P_DEACTIVATING) {
+ if (part->act_state == XPC_P_DEACTIVATING ||
+ (ch->flags & XPC_C_ROPENREQUEST)) {
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ return;
+ }
+
+ if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) {
+ ch->delayed_IPI_flags |= XPC_IPI_OPENREQUEST;
spin_unlock_irqrestore(&ch->lock, irq_flags);
return;
}
@@ -867,8 +984,11 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
* msg_size = size of channel's messages in bytes
* local_nentries = remote partition's local_nentries
*/
- DBUG_ON(args->msg_size == 0);
- DBUG_ON(args->local_nentries == 0);
+ if (args->msg_size == 0 || args->local_nentries == 0) {
+ /* assume OPENREQUEST was delayed by mistake */
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ return;
+ }
ch->flags |= (XPC_C_ROPENREQUEST | XPC_C_CONNECTING);
ch->remote_nentries = args->local_nentries;
@@ -906,7 +1026,13 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
spin_unlock_irqrestore(&ch->lock, irq_flags);
return;
}
- DBUG_ON(!(ch->flags & XPC_C_OPENREQUEST));
+ if (!(ch->flags & XPC_C_OPENREQUEST)) {
+ XPC_DISCONNECT_CHANNEL(ch, xpcOpenCloseError,
+ &irq_flags);
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+ return;
+ }
+
DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST));
DBUG_ON(ch->flags & XPC_C_CONNECTED);
@@ -960,8 +1086,8 @@ xpc_connect_channel(struct xpc_channel *ch)
struct xpc_registration *registration = &xpc_registrations[ch->number];
- if (down_interruptible(&registration->sema) != 0) {
- return xpcInterrupted;
+ if (down_trylock(&registration->sema) != 0) {
+ return xpcRetry;
}
if (!XPC_CHANNEL_REGISTERED(ch->number)) {
@@ -1040,55 +1166,6 @@ xpc_connect_channel(struct xpc_channel *ch)
/*
- * Notify those who wanted to be notified upon delivery of their message.
- */
-static void
-xpc_notify_senders(struct xpc_channel *ch, enum xpc_retval reason, s64 put)
-{
- struct xpc_notify *notify;
- u8 notify_type;
- s64 get = ch->w_remote_GP.get - 1;
-
-
- while (++get < put && atomic_read(&ch->n_to_notify) > 0) {
-
- notify = &ch->notify_queue[get % ch->local_nentries];
-
- /*
- * See if the notify entry indicates it was associated with
- * a message who's sender wants to be notified. It is possible
- * that it is, but someone else is doing or has done the
- * notification.
- */
- notify_type = notify->type;
- if (notify_type == 0 ||
- cmpxchg(&notify->type, notify_type, 0) !=
- notify_type) {
- continue;
- }
-
- DBUG_ON(notify_type != XPC_N_CALL);
-
- atomic_dec(&ch->n_to_notify);
-
- if (notify->func != NULL) {
- dev_dbg(xpc_chan, "notify->func() called, notify=0x%p, "
- "msg_number=%ld, partid=%d, channel=%d\n",
- (void *) notify, get, ch->partid, ch->number);
-
- notify->func(reason, ch->partid, ch->number,
- notify->key);
-
- dev_dbg(xpc_chan, "notify->func() returned, "
- "notify=0x%p, msg_number=%ld, partid=%d, "
- "channel=%d\n", (void *) notify, get,
- ch->partid, ch->number);
- }
- }
-}
-
-
-/*
* Clear some of the msg flags in the local message queue.
*/
static inline void
@@ -1240,6 +1317,7 @@ xpc_process_channel_activity(struct xpc_partition *part)
u64 IPI_amo, IPI_flags;
struct xpc_channel *ch;
int ch_number;
+ u32 ch_flags;
IPI_amo = xpc_get_IPI_flags(part);
@@ -1266,8 +1344,9 @@ xpc_process_channel_activity(struct xpc_partition *part)
xpc_process_openclose_IPI(part, ch_number, IPI_flags);
}
+ ch_flags = ch->flags; /* need an atomic snapshot of flags */
- if (ch->flags & XPC_C_DISCONNECTING) {
+ if (ch_flags & XPC_C_DISCONNECTING) {
spin_lock_irqsave(&ch->lock, irq_flags);
xpc_process_disconnect(ch, &irq_flags);
spin_unlock_irqrestore(&ch->lock, irq_flags);
@@ -1278,9 +1357,9 @@ xpc_process_channel_activity(struct xpc_partition *part)
continue;
}
- if (!(ch->flags & XPC_C_CONNECTED)) {
- if (!(ch->flags & XPC_C_OPENREQUEST)) {
- DBUG_ON(ch->flags & XPC_C_SETUP);
+ if (!(ch_flags & XPC_C_CONNECTED)) {
+ if (!(ch_flags & XPC_C_OPENREQUEST)) {
+ DBUG_ON(ch_flags & XPC_C_SETUP);
(void) xpc_connect_channel(ch);
} else {
spin_lock_irqsave(&ch->lock, irq_flags);
@@ -1305,8 +1384,8 @@ xpc_process_channel_activity(struct xpc_partition *part)
/*
- * XPC's heartbeat code calls this function to inform XPC that a partition has
- * gone down. XPC responds by tearing down the XPartition Communication
+ * XPC's heartbeat code calls this function to inform XPC that a partition is
+ * going down. XPC responds by tearing down the XPartition Communication
* infrastructure used for the just downed partition.
*
* XPC's heartbeat code will never call this function and xpc_partition_up()
@@ -1314,7 +1393,7 @@ xpc_process_channel_activity(struct xpc_partition *part)
* at the same time.
*/
void
-xpc_partition_down(struct xpc_partition *part, enum xpc_retval reason)
+xpc_partition_going_down(struct xpc_partition *part, enum xpc_retval reason)
{
unsigned long irq_flags;
int ch_number;
@@ -1330,12 +1409,11 @@ xpc_partition_down(struct xpc_partition *part, enum xpc_retval reason)
}
- /* disconnect all channels associated with the downed partition */
+ /* disconnect channels associated with the partition going down */
for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
ch = &part->channels[ch_number];
-
xpc_msgqueue_ref(ch);
spin_lock_irqsave(&ch->lock, irq_flags);
@