aboutsummaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@g5.osdl.org>2005-07-07 10:24:51 -0700
committerLinus Torvalds <torvalds@g5.osdl.org>2005-07-07 10:24:51 -0700
commit043d051615aa5da09a7e44f1edbb69798458e067 (patch)
treea0bfe7f6ed6efa4e0eb7f6b9891a0dc3f2fafe57 /arch
parentc101f3136cc98a003d0d16be6fab7d0d950581a6 (diff)
parent21517a57e838a1fbb7a54a8a77501024e77f83e0 (diff)
Merge master.kernel.org:/pub/scm/linux/kernel/git/aegl/linux-2.6
Diffstat (limited to 'arch')
-rw-r--r--arch/ia64/kernel/Makefile1
-rw-r--r--arch/ia64/kernel/acpi.c4
-rw-r--r--arch/ia64/kernel/numa.c57
-rw-r--r--arch/ia64/kernel/signal.c2
-rw-r--r--arch/ia64/kernel/smpboot.c41
-rw-r--r--arch/ia64/kernel/traps.c6
-rw-r--r--arch/ia64/mm/discontig.c432
-rw-r--r--arch/ia64/mm/init.c3
-rw-r--r--arch/ia64/sn/include/pci/pcibr_provider.h151
-rw-r--r--arch/ia64/sn/include/pci/pic.h261
-rw-r--r--arch/ia64/sn/include/pci/tiocp.h256
-rw-r--r--arch/ia64/sn/include/xtalk/hubdev.h2
-rw-r--r--arch/ia64/sn/kernel/io_init.c185
-rw-r--r--arch/ia64/sn/kernel/irq.c255
-rw-r--r--arch/ia64/sn/kernel/setup.c13
-rw-r--r--arch/ia64/sn/kernel/tiocx.c4
-rw-r--r--arch/ia64/sn/pci/pci_dma.c3
-rw-r--r--arch/ia64/sn/pci/pcibr/pcibr_ate.c2
-rw-r--r--arch/ia64/sn/pci/pcibr/pcibr_dma.c15
-rw-r--r--arch/ia64/sn/pci/pcibr/pcibr_provider.c48
-rw-r--r--arch/ia64/sn/pci/pcibr/pcibr_reg.c8
-rw-r--r--arch/ia64/sn/pci/tioca_provider.c3
22 files changed, 589 insertions, 1163 deletions
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index b2e2f6509eb..e1fb68ddec2 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_IA64_PALINFO) += palinfo.o
obj-$(CONFIG_IOSAPIC) += iosapic.o
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_SMP) += smp.o smpboot.o domain.o
+obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o
obj-$(CONFIG_IA64_CYCLONE) += cyclone.o
obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index cda06f88c66..542256e98e6 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -640,9 +640,11 @@ acpi_boot_init (void)
if (smp_boot_data.cpu_phys_id[cpu] != hard_smp_processor_id())
node_cpuid[i++].phys_id = smp_boot_data.cpu_phys_id[cpu];
}
- build_cpu_to_node_map();
# endif
#endif
+#ifdef CONFIG_ACPI_NUMA
+ build_cpu_to_node_map();
+#endif
/* Make boot-up look pretty */
printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus, total_cpus);
return 0;
diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c
new file mode 100644
index 00000000000..a68ce667809
--- /dev/null
+++ b/arch/ia64/kernel/numa.c
@@ -0,0 +1,57 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * ia64 kernel NUMA specific stuff
+ *
+ * Copyright (C) 2002 Erich Focht <efocht@ess.nec.de>
+ * Copyright (C) 2004 Silicon Graphics, Inc.
+ * Jesse Barnes <jbarnes@sgi.com>
+ */
+#include <linux/config.h>
+#include <linux/topology.h>
+#include <linux/module.h>
+#include <asm/processor.h>
+#include <asm/smp.h>
+
+u8 cpu_to_node_map[NR_CPUS] __cacheline_aligned;
+EXPORT_SYMBOL(cpu_to_node_map);
+
+cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
+
+/**
+ * build_cpu_to_node_map - setup cpu to node and node to cpumask arrays
+ *
+ * Build cpu to node mapping and initialize the per node cpu masks using
+ * info from the node_cpuid array handed to us by ACPI.
+ */
+void __init build_cpu_to_node_map(void)
+{
+ int cpu, i, node;
+
+ for(node=0; node < MAX_NUMNODES; node++)
+ cpus_clear(node_to_cpu_mask[node]);
+
+ for(cpu = 0; cpu < NR_CPUS; ++cpu) {
+ node = -1;
+ for (i = 0; i < NR_CPUS; ++i)
+ if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) {
+ node = node_cpuid[i].nid;
+ break;
+ }
+ cpu_to_node_map[cpu] = (node >= 0) ? node : 0;
+ if (node >= 0)
+ cpu_set(cpu, node_to_cpu_mask[node]);
+ }
+}
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index edd9f07860b..b8a0a7d257a 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -143,6 +143,7 @@ restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr)
__copy_from_user(current->thread.fph, &sc->sc_fr[32], 96*16);
psr->mfh = 0; /* drop signal handler's fph contents... */
+ preempt_disable();
if (psr->dfh)
ia64_drop_fpu(current);
else {
@@ -150,6 +151,7 @@ restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr)
__ia64_load_fpu(current->thread.fph);
ia64_set_local_fpu_owner(current);
}
+ preempt_enable();
}
return err;
}
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 623b0a54670..7d72c0d872b 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -525,47 +525,6 @@ smp_build_cpu_map (void)
}
}
-#ifdef CONFIG_NUMA
-
-/* on which node is each logical CPU (one cacheline even for 64 CPUs) */
-u8 cpu_to_node_map[NR_CPUS] __cacheline_aligned;
-EXPORT_SYMBOL(cpu_to_node_map);
-/* which logical CPUs are on which nodes */
-cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
-
-/*
- * Build cpu to node mapping and initialize the per node cpu masks.
- */
-void __init
-build_cpu_to_node_map (void)
-{
- int cpu, i, node;
-
- for(node=0; node<MAX_NUMNODES; node++)
- cpus_clear(node_to_cpu_mask[node]);
- for(cpu = 0; cpu < NR_CPUS; ++cpu) {
- /*
- * All Itanium NUMA platforms I know use ACPI, so maybe we
- * can drop this ifdef completely. [EF]
- */
-#ifdef CONFIG_ACPI_NUMA
- node = -1;
- for (i = 0; i < NR_CPUS; ++i)
- if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) {
- node = node_cpuid[i].nid;
- break;
- }
-#else
-# error Fixme: Dunno how to build CPU-to-node map.
-#endif
- cpu_to_node_map[cpu] = (node >= 0) ? node : 0;
- if (node >= 0)
- cpu_set(cpu, node_to_cpu_mask[node]);
- }
-}
-
-#endif /* CONFIG_NUMA */
-
/*
* Cycle through the APs sending Wakeup IPIs to boot each.
*/
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index e7e520d90f0..4440c8343fa 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -90,14 +90,16 @@ die (const char *str, struct pt_regs *regs, long err)
.lock_owner_depth = 0
};
static int die_counter;
+ int cpu = get_cpu();
- if (die.lock_owner != smp_processor_id()) {
+ if (die.lock_owner != cpu) {
console_verbose();
spin_lock_irq(&die.lock);
- die.lock_owner = smp_processor_id();
+ die.lock_owner = cpu;
die.lock_owner_depth = 0;
bust_spinlocks(1);
}
+ put_cpu();
if (++die.lock_owner_depth < 3) {
printk("%s[%d]: %s %ld [%d]\n",
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index f3fd528ead3..b5c90e54819 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -44,150 +44,7 @@ struct early_node_data {
};
static struct early_node_data mem_data[MAX_NUMNODES] __initdata;
-
-/**
- * reassign_cpu_only_nodes - called from find_memory to move CPU-only nodes to a memory node
- *
- * This function will move nodes with only CPUs (no memory)
- * to a node with memory which is at the minimum numa_slit distance.
- * Any reassigments will result in the compression of the nodes
- * and renumbering the nid values where appropriate.
- * The static declarations below are to avoid large stack size which
- * makes the code not re-entrant.
- */
-static void __init reassign_cpu_only_nodes(void)
-{
- struct node_memblk_s *p;
- int i, j, k, nnode, nid, cpu, cpunid, pxm;
- u8 cslit, slit;
- static DECLARE_BITMAP(nodes_with_mem, MAX_NUMNODES) __initdata;
- static u8 numa_slit_fix[MAX_NUMNODES * MAX_NUMNODES] __initdata;
- static int node_flip[MAX_NUMNODES] __initdata;
- static int old_nid_map[NR_CPUS] __initdata;
-
- for (nnode = 0, p = &node_memblk[0]; p < &node_memblk[num_node_memblks]; p++)
- if (!test_bit(p->nid, (void *) nodes_with_mem)) {
- set_bit(p->nid, (void *) nodes_with_mem);
- nnode++;
- }
-
- /*
- * All nids with memory.
- */
- if (nnode == num_online_nodes())
- return;
-
- /*
- * Change nids and attempt to migrate CPU-only nodes
- * to the best numa_slit (closest neighbor) possible.
- * For reassigned CPU nodes a nid can't be arrived at
- * until after this loop because the target nid's new
- * identity might not have been established yet. So
- * new nid values are fabricated above num_online_nodes() and
- * mapped back later to their true value.
- */
- /* MCD - This code is a bit complicated, but may be unnecessary now.
- * We can now handle much more interesting node-numbering.
- * The old requirement that 0 <= nid <= numnodes <= MAX_NUMNODES
- * and that there be no holes in the numbering 0..numnodes
- * has become simply 0 <= nid <= MAX_NUMNODES.
- */
- nid = 0;
- for_each_online_node(i) {
- if (test_bit(i, (void *) nodes_with_mem)) {
- /*
- * Save original nid value for numa_slit
- * fixup and node_cpuid reassignments.
- */
- node_flip[nid] = i;
-
- if (i == nid) {
- nid++;
- continue;
- }
-
- for (p = &node_memblk[0]; p < &node_memblk[num_node_memblks]; p++)
- if (p->nid == i)
- p->nid = nid;
-
- cpunid = nid;
- nid++;
- } else
- cpunid = MAX_NUMNODES;
-
- for (cpu = 0; cpu < NR_CPUS; cpu++)
- if (node_cpuid[cpu].nid == i) {
- /*
- * For nodes not being reassigned just
- * fix the cpu's nid and reverse pxm map
- */
- if (cpunid < MAX_NUMNODES) {
- pxm = nid_to_pxm_map[i];
- pxm_to_nid_map[pxm] =
- node_cpuid[cpu].nid = cpunid;
- continue;
- }
-
- /*
- * For nodes being reassigned, find best node by
- * numa_slit information and then make a temporary
- * nid value based on current nid and num_online_nodes().
- */
- slit = 0xff;
- k = 2*num_online_nodes();
- for_each_online_node(j) {
- if (i == j)
- continue;
- else if (test_bit(j, (void *) nodes_with_mem)) {
- cslit = numa_slit[i * num_online_nodes() + j];
- if (cslit < slit) {
- k = num_online_nodes() + j;
- slit = cslit;
- }
- }
- }
-
- /* save old nid map so we can update the pxm */
- old_nid_map[cpu] = node_cpuid[cpu].nid;
- node_cpuid[cpu].nid = k;
- }
- }
-
- /*
- * Fixup temporary nid values for CPU-only nodes.
- */
- for (cpu = 0; cpu < NR_CPUS; cpu++)
- if (node_cpuid[cpu].nid == (2*num_online_nodes())) {
- pxm = nid_to_pxm_map[old_nid_map[cpu]];
- pxm_to_nid_map[pxm] = node_cpuid[cpu].nid = nnode - 1;
- } else {
- for (i = 0; i < nnode; i++) {
- if (node_flip[i] != (node_cpuid[cpu].nid - num_online_nodes()))
- continue;
-
- pxm = nid_to_pxm_map[old_nid_map[cpu]];
- pxm_to_nid_map[pxm] = node_cpuid[cpu].nid = i;
- break;
- }
- }
-
- /*
- * Fix numa_slit by compressing from larger
- * nid array to reduced nid array.
- */
- for (i = 0; i < nnode; i++)
- for (j = 0; j < nnode; j++)
- numa_slit_fix[i * nnode + j] =
- numa_slit[node_flip[i] * num_online_nodes() + node_flip[j]];
-
- memcpy(numa_slit, numa_slit_fix, sizeof (numa_slit));
-
- nodes_clear(node_online_map);
- for (i = 0; i < nnode; i++)
- node_set_online(i);
-
- return;
-}
+static nodemask_t memory_less_mask __initdata;
/*
* To prevent cache aliasing effects, align per-node structures so that they
@@ -233,44 +90,101 @@ static int __init build_node_maps(unsigned long start, unsigned long len,
}
/**
- * early_nr_phys_cpus_node - return number of physical cpus on a given node
+ * early_nr_cpus_node - return number of cpus on a given node
* @node: node to check
*
- * Count the number of physical cpus on @node. These are cpus that actually
- * exist. We can't use nr_cpus_node() yet because
+ * Count the number of cpus on @node. We can't use nr_cpus_node() yet because
* acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been
- * called yet.
+ * called yet. Note that node 0 will also count all non-existent cpus.
*/
-static int early_nr_phys_cpus_node(int node)
+static int __init early_nr_cpus_node(int node)
{
int cpu, n = 0;
for (cpu = 0; cpu < NR_CPUS; cpu++)
if (node == node_cpuid[cpu].nid)
- if ((cpu == 0) || node_cpuid[cpu].phys_id)
- n++;
+ n++;
return n;
}
+/**
+ * compute_pernodesize - compute size of pernode data
+ * @node: the node id.
+ */
+static unsigned long __init compute_pernodesize(int node)
+{
+ unsigned long pernodesize = 0, cpus;
+
+ cpus = early_nr_cpus_node(node);
+ pernodesize += PERCPU_PAGE_SIZE * cpus;
+ pernodesize += node * L1_CACHE_BYTES;
+ pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
+ pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
+ pernodesize = PAGE_ALIGN(pernodesize);
+ return pernodesize;
+}
/**
- * early_nr_cpus_node - return number of cpus on a given node
- * @node: node to check
+ * per_cpu_node_setup - setup per-cpu areas on each node
+ * @cpu_data: per-cpu area on this node
+ * @node: node to setup
*
- * Count the number of cpus on @node. We can't use nr_cpus_node() yet because
- * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been
- * called yet. Note that node 0 will also count all non-existent cpus.
+ * Copy the static per-cpu data into the region we just set aside and then
+ * setup __per_cpu_offset for each CPU on this node. Return a pointer to
+ * the end of the area.
*/
-static int early_nr_cpus_node(int node)
+static void *per_cpu_node_setup(void *cpu_data, int node)
{
- int cpu, n = 0;
+#ifdef CONFIG_SMP
+ int cpu;
- for (cpu = 0; cpu < NR_CPUS; cpu++)
- if (node == node_cpuid[cpu].nid)
- n++;
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ if (node == node_cpuid[cpu].nid) {
+ memcpy(__va(cpu_data), __phys_per_cpu_start,
+ __per_cpu_end - __per_cpu_start);
+ __per_cpu_offset[cpu] = (char*)__va(cpu_data) -
+ __per_cpu_start;
+ cpu_data += PERCPU_PAGE_SIZE;
+ }
+ }
+#endif
+ return cpu_data;
+}
- return n;
+/**
+ * fill_pernode - initialize pernode data.
+ * @node: the node id.
+ * @pernode: physical address of pernode data
+ * @pernodesize: size of the pernode data
+ */
+static void __init fill_pernode(int node, unsigned long pernode,
+ unsigned long pernodesize)
+{
+ void *cpu_data;
+ int cpus = early_nr_cpus_node(node);
+ struct bootmem_data *bdp = &mem_data[node].bootmem_data;
+
+ mem_data[node].pernode_addr = pernode;
+ mem_data[node].pernode_size = pernodesize;
+ memset(__va(pernode), 0, pernodesize);
+
+ cpu_data = (void *)pernode;
+ pernode += PERCPU_PAGE_SIZE * cpus;
+ pernode += node * L1_CACHE_BYTES;
+
+ mem_data[node].pgdat = __va(pernode);
+ pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
+
+ mem_data[node].node_data = __va(pernode);
+ pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
+
+ mem_data[node].pgdat->bdata = bdp;
+ pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
+
+ cpu_data = per_cpu_node_setup(cpu_data, node);
+
+ return;
}
/**
@@ -304,9 +218,8 @@ static int early_nr_cpus_node(int node)
static int __init find_pernode_space(unsigned long start, unsigned long len,
int node)
{
- unsigned long epfn, cpu, cpus, phys_cpus;
+ unsigned long epfn;
unsigned long pernodesize = 0, pernode, pages, mapsize;
- void *cpu_data;
struct bootmem_data *bdp = &mem_data[node].bootmem_data;
epfn = (start + len) >> PAGE_SHIFT;
@@ -329,49 +242,12 @@ static int __init find_pernode_space(unsigned long start, unsigned long len,
* Calculate total size needed, incl. what's necessary
* for good alignment and alias prevention.
*/
- cpus = early_nr_cpus_node(node);
- phys_cpus = early_nr_phys_cpus_node(node);
- pernodesize += PERCPU_PAGE_SIZE * cpus;
- pernodesize += node * L1_CACHE_BYTES;
- pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
- pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
- pernodesize = PAGE_ALIGN(pernodesize);
+ pernodesize = compute_pernodesize(node);
pernode = NODEDATA_ALIGN(start, node);
/* Is this range big enough for what we want to store here? */
- if (start + len > (pernode + pernodesize + mapsize)) {
- mem_data[node].pernode_addr = pernode;
- mem_data[node].pernode_size = pernodesize;
- memset(__va(pernode), 0, pernodesize);
-
- cpu_data = (void *)pernode;
- pernode += PERCPU_PAGE_SIZE * cpus;
- pernode += node * L1_CACHE_BYTES;
-
- mem_data[node].pgdat = __va(pernode);
- pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
-
- mem_data[node].node_data = __va(pernode);
- pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
-
- mem_data[node].pgdat->bdata = bdp;
- pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
-
- /*
- * Copy the static per-cpu data into the region we
- * just set aside and then setup __per_cpu_offset
- * for each CPU on this node.
- */
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
- if (node == node_cpuid[cpu].nid) {
- memcpy(__va(cpu_data), __phys_per_cpu_start,
- __per_cpu_end - __per_cpu_start);
- __per_cpu_offset[cpu] = (char*)__va(cpu_data) -
- __per_cpu_start;
- cpu_data += PERCPU_PAGE_SIZE;
- }
- }
- }
+ if (start + len > (pernode + pernodesize + mapsize))
+ fill_pernode(node, pernode, pernodesize);
return 0;
}
@@ -411,6 +287,9 @@ static void __init reserve_pernode_space(void)
for_each_online_node(node) {
pg_data_t *pdp = mem_data[node].pgdat;
+ if (node_isset(node, memory_less_mask))
+ continue;
+
bdp = pdp->bdata;
/* First the bootmem_map itself */
@@ -436,8 +315,8 @@ static void __init reserve_pernode_space(void)
*/
static void __init initialize_pernode_data(void)
{
- int cpu, node;
pg_data_t *pgdat_list[MAX_NUMNODES];
+ int cpu, node;
for_each_online_node(node)
pgdat_list[node] = mem_data[node].pgdat;
@@ -447,12 +326,99 @@ static void __init initialize_pernode_data(void)
memcpy(mem_data[node].node_data->pg_data_ptrs, pgdat_list,
sizeof(pgdat_list));
}
-
+#ifdef CONFIG_SMP
/* Set the node_data pointer for each per-cpu struct */
for (cpu = 0; cpu < NR_CPUS; cpu++) {
node = node_cpuid[cpu].nid;
per_cpu(cpu_info, cpu).node_data = mem_data[node].node_data;
}
+#else
+ {
+ struct cpuinfo_ia64 *cpu0_cpu_info;
+ cpu = 0;
+ node = node_cpuid[cpu].nid;
+ cpu0_cpu_info = (struct cpuinfo_ia64 *)(__phys_per_cpu_start +
+ ((char *)&per_cpu__cpu_info - __per_cpu_start));
+ cpu0_cpu_info->node_data = mem_data[node].node_data;
+ }
+#endif /* CONFIG_SMP */
+}
+
+/**
+ * memory_less_node_alloc - * attempt to allocate memory on the best NUMA slit
+ * node but fall back to any other node when __alloc_bootmem_node fails
+ * for best.
+ * @nid: node id
+ * @pernodesize: size of this node's pernode data
+ * @align: alignment to use for this node's pernode data
+ */
+static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize,
+ unsigned long align)
+{
+ void *ptr = NULL;
+ u8 best = 0xff;
+ int bestnode = -1, node;
+
+ for_each_online_node(node) {
+ if (node_isset(node, memory_less_mask))
+ continue;
+ else if (node_distance(nid, node) < best) {
+ best = node_distance(nid, node);
+ bestnode = node;
+ }
+ }
+
+ ptr = __alloc_bootmem_node(mem_data[bestnode].pgdat,
+ pernodesize, align, __pa(MAX_DMA_ADDRESS));
+
+ if (!ptr)
+ panic("NO memory for memory less node\n");
+ return ptr;
+}
+
+/**
+ * pgdat_insert - insert the pgdat into global pgdat_list
+ * @pgdat: the pgdat for a node.
+ */
+static void __init pgdat_insert(pg_data_t *pgdat)
+{
+ pg_data_t *prev = NULL, *next;
+
+ for_each_pgdat(next)
+ if (pgdat->node_id < next->node_id)
+ break;
+ else
+ prev = next;
+
+ if (prev) {
+ prev->pgdat_next = pgdat;
+ pgdat->pgdat_next = next;
+ } else {
+ pgdat->pgdat_next = pgdat_list;
+ pgdat_list = pgdat;
+ }
+
+ return;
+}
+
+/**
+ * memory_less_nodes - allocate and initialize CPU only nodes pernode
+ * information.
+ */
+static void __init memory_less_nodes(void)
+{
+ unsigned long pernodesize;
+ void *pernode;
+ int node;
+
+ for_each_node_mask(node, memory_less_mask) {
+ pernodesize = compute_pernodesize(node);
+ pernode = memory_less_node_alloc(node, pernodesize,
+ (node) ? (node * PERCPU_PAGE_SIZE) : (1024*1024));
+ fill_pernode(node, __pa(pernode), pernodesize);
+ }
+
+ return;
}
/**
@@ -472,16 +438,19 @@ void __init find_memory(void)
node_set_online(0);
}
+ nodes_or(memory_less_mask, memory_less_mask, node_online_map);
min_low_pfn = -1;
max_low_pfn = 0;
- if (num_online_nodes() > 1)
- reassign_cpu_only_nodes();
-
/* These actually end up getting called by call_pernode_memory() */
efi_memmap_walk(filter_rsvd_memory, build_node_maps);
efi_memmap_walk(filter_rsvd_memory, find_pernode_space);
+ for_each_online_node(node)
+ if (mem_data[node].bootmem_data.node_low_pfn) {
+ node_clear(node, memory_less_mask);
+ mem_data[node].min_pfn = ~0UL;
+ }
/*
* Initialize the boot memory maps in reverse order since that's
* what the bootmem allocator expects
@@ -492,17 +461,14 @@ void __init find_memory(void)
if (!node_online(node))
continue;
+ else if (node_isset(node, memory_less_mask))
+ continue;
bdp = &mem_data[node].bootmem_data;
pernode = mem_data[node].pernode_addr;
pernodesize = mem_data[node].pernode_size;
map = pernode + pernodesize;
- /* Sanity check... */
- if (!pernode)
- panic("pernode space for node %d "
- "could not be allocated!", node);
-
init_bootmem_node(mem_data[node].pgdat,
map>>PAGE_SHIFT,
bdp->node_boot_start>>PAGE_SHIFT,
@@ -512,6 +478,7 @@ void __init find_memory(void)
efi_memmap_walk(filter_rsvd_memory, free_node_bootmem);
reserve_pernode_space();
+ memory_less_nodes();
initialize_pernode_data();
max_pfn = max_low_pfn;
@@ -519,6 +486,7 @@ void __init find_memory(void)
find_initrd();
}
+#ifdef CONFIG_SMP
/**
* per_cpu_init - setup per-cpu variables
*
@@ -529,15 +497,15 @@ void *per_cpu_init(void)
{
int cpu;
- if (smp_processor_id() == 0) {
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
- per_cpu(local_per_cpu_offset, cpu) =
- __per_cpu_offset[cpu];
- }
- }
+ if (smp_processor_id() != 0)
+ return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
+
+ for (cpu = 0; cpu < NR_CPUS; cpu++)
+ per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
}
+#endif /* CONFIG_SMP */
/**
* show_mem - give short summary of memory stats
@@ -680,12 +648,13 @@ void __init paging_init(void)
max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
- /* so min() will work in count_node_pages */
- for_each_online_node(node)
- mem_data[node].min_pfn = ~0UL;
-
efi_memmap_walk(filter_rsvd_memory, count_node_pages);
+ vmalloc_end -= PAGE_ALIGN(max_low_pfn * sizeof(struct page));
+ vmem_map = (struct page *) vmalloc_end;
+ efi_memmap_walk(create_mem_map_page_table, NULL);
+ printk("Virtual mem_map starts at 0x%p\n", vmem_map);
+
for_each_online_node(node) {
memset(zones_size, 0, sizeof(zones_size));
memset(zholes_size, 0, sizeof(zholes_size));
@@ -719,15 +688,6 @@ void __init paging_init(void)
mem_data[node].num_dma_physpages);
}
- if (node == 0) {
- vmalloc_end -=
- PAGE_ALIGN(max_low_pfn * sizeof(struct page));
- vmem_map = (struct page *) vmalloc_end;
-
- efi_memmap_walk(create_mem_map_page_table, NULL);
- printk("Virtual mem_map starts at 0x%p\n", vmem_map);
- }
-
pfn_offset = mem_data[node].min_pfn;
NODE_DATA(node)->node_mem_map = vmem_map + pfn_offset;
@@ -735,5 +695,11 @@ void __init paging_init(void)
pfn_offset, zholes_size);
}
+ /*
+ * Make memory less nodes become a member of the known nodes.
+ */
+ for_each_node_mask(node, memory_less_mask)
+ pgdat_insert(mem_data[node].pgdat);
+
zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
}
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 4eb2f52b87a..65f9958db9f 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -597,7 +597,8 @@ mem_init (void)
kclist_add(&kcore_kernel, _stext, _end - _stext);
for_each_pgdat(pgdat)
- totalram_pages += free_all_bootmem_node(pgdat);
+ if (pgdat->bdata->node_bootmem_map)
+ totalram_pages += free_all_bootmem_node(pgdat);
reserved_pages = 0;
efi_memmap_walk(count_reserved_pages, &reserved_pages);
diff --git a/arch/ia64/sn/include/pci/pcibr_provider.h b/arch/ia64/sn/include/pci/pcibr_provider.h
deleted file mode 100644
index 1cd291d8bad..00000000000
--- a/arch/ia64/sn/include/pci/pcibr_provider.h
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1992-1997,2000-2004 Silicon Graphics, Inc. All rights reserved.
- */
-#ifndef _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H
-#define _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H
-
-/* Workarounds */
-#define PV907516 (1 << 1) /* TIOCP: Don't write the write buffer flush reg */
-
-#define BUSTYPE_MASK 0x1
-
-/* Macros given a pcibus structure */
-#define IS_PCIX(ps) ((ps)->pbi_bridge_mode & BUSTYPE_MASK)
-#define IS_PCI_BRIDGE_ASIC(asic) (asic == PCIIO_ASIC_TYPE_PIC || \
- asic == PCIIO_ASIC_TYPE_TIOCP)
-#define IS_PIC_SOFT(ps) (ps->pbi_bridge_type == PCIBR_BRIDGETYPE_PIC)
-
-
-/*
- * The different PCI Bridge types supported on the SGI Altix platforms
- */
-#define PCIBR_BRIDGETYPE_UNKNOWN -1
-#define PCIBR_BRIDGETYPE_PIC 2
-#define PCIBR_BRIDGETYPE_TIOCP 3
-
-/*
- * Bridge 64bit Direct Map Attributes
- */
-#define PCI64_ATTR_PREF (1ull << 59)
-#define PCI64_ATTR_PREC (1ull << 58)
-#define PCI64_ATTR_VIRTUAL (1ull << 57)
-#define PCI64_ATTR_BAR (1ull << 56)
-#define PCI64_ATTR_SWAP (1ull << 55)
-#define PCI64_ATTR_VIRTUAL1 (1ull << 54)
-
-#define PCI32_LOCAL_BASE 0
-#define PCI32_MAPPED_BASE 0x40000000
-#define PCI32_DIRECT_BASE 0x80000000
-
-#define IS_PCI32_MAPPED(x) ((uint64_t)(x) < PCI32_DIRECT_BASE && \
- (uint64_t)(x) >= PCI32_MAPPED_BASE)
-#define IS_PCI32_DIRECT(x) ((uint64_t)(x) >= PCI32_MAPPED_BASE)
-
-
-/*
- * Bridge PMU Address Transaltion Entry Attibutes
- */
-#define PCI32_ATE_V (0x1 << 0)
-#define PCI32_ATE_CO (0x1 << 1)
-#define PCI32_ATE_PREC (0x1 << 2)
-#define PCI32_ATE_PREF (0x1 << 3)
-#define PCI32_ATE_BAR (0x1 << 4)
-#define PCI32_ATE_ADDR_SHFT 12
-
-#define MINIMAL_ATES_REQUIRED(addr, size) \
- (IOPG(IOPGOFF(addr) + (size) - 1) == IOPG((size) - 1))
-
-#define MINIMAL_ATE_FLAG(addr, size) \
- (MINIMAL_ATES_REQUIRED((uint64_t)addr, size) ? 1 : 0)
-
-/* bit 29 of the pci address is the SWAP bit */
-#define ATE_SWAPSHIFT 29
-#define ATE_SWAP_ON(x) ((x) |= (1 << ATE_SWAPSHIFT))
-#define ATE_SWAP_OFF(x) ((x) &= ~(1 << ATE_SWAPSHIFT))
-
-/*
- * I/O page size
- */
-#if PAGE_SIZE < 16384
-#define IOPFNSHIFT 12 /* 4K per mapped page */
-#else
-#define IOPFNSHIFT 14 /* 16K per mapped page */
-#endif
-
-#define IOPGSIZE (1 << IOPFNSHIFT)
-#define IOPG(x) ((x) >> IOPFNSHIFT)
-#define IOPGOFF(x) ((x) & (IOPGSIZE-1))
-
-#define PCIBR_DEV_SWAP_DIR (1ull << 19)
-#define PCIBR_CTRL_PAGE_SIZE (0x1 << 21)
-
-/*
- * PMU resources.
- */
-struct ate_resource{
- uint64_t *ate;
- uint64_t num_ate;
- uint64_t lowest_free_index;
-};
-
-struct pcibus_info {
- struct pcibus_bussoft pbi_buscommon; /* common header */
- uint32_t pbi_moduleid;
- short pbi_bridge_type;
- short pbi_bridge_mode;
-
- struct ate_resource pbi_int_ate_resource;
- uint64_t pbi_int_ate_size;
-
- uint64_t pbi_dir_xbase;
- char pbi_hub_xid;
-
- uint64_t pbi_devreg[8];
- spinlock_t pbi_lock;
-
- uint32_t pbi_valid_devices;
- uint32_t pbi_enabled_devices;
-};
-
-/*
- * pcibus_info structure locking macros
- */
-inline static unsigned long
-pcibr_lock(struct pcibus_info *pcibus_info)
-{
- unsigned long flag;
- spin_lock_irqsave(&pcibus_info->pbi_lock, flag);
- return(flag);
-}
-#define pcibr_unlock(pcibus_info, flag) spin_unlock_irqrestore(&pcibus_info->pbi_lock, flag)
-
-extern int pcibr_init_provider(void);
-extern void *pcibr_bus_fixup(struct pcibus_bussoft *);
-extern dma_addr_t pcibr_dma_map(struct pci_dev *, unsigned long,