diff options
Diffstat (limited to 'arch/i386/mm/discontig.c')
-rw-r--r-- | arch/i386/mm/discontig.c | 140 |
1 files changed, 96 insertions, 44 deletions
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index 1726b4096b1..6711ce3f691 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -29,12 +29,16 @@ #include <linux/highmem.h> #include <linux/initrd.h> #include <linux/nodemask.h> +#include <linux/module.h> +#include <linux/kexec.h> + #include <asm/e820.h> #include <asm/setup.h> #include <asm/mmzone.h> #include <bios_ebda.h> struct pglist_data *node_data[MAX_NUMNODES]; +EXPORT_SYMBOL(node_data); bootmem_data_t node0_bdata; /* @@ -42,12 +46,16 @@ bootmem_data_t node0_bdata; * populated the following initialisation. * * 1) node_online_map - the map of all nodes configured (online) in the system - * 2) physnode_map - the mapping between a pfn and owning node - * 3) node_start_pfn - the starting page frame number for a node + * 2) node_start_pfn - the starting page frame number for a node * 3) node_end_pfn - the ending page fram number for a node */ +unsigned long node_start_pfn[MAX_NUMNODES]; +unsigned long node_end_pfn[MAX_NUMNODES]; + +#ifdef CONFIG_DISCONTIGMEM /* + * 4) physnode_map - the mapping between a pfn and owning node * physnode_map keeps track of the physical memory layout of a generic * numa node on a 256Mb break (each element of the array will * represent 256Mb of memory and will be marked by the node id. so, @@ -59,6 +67,7 @@ bootmem_data_t node0_bdata; * physnode_map[8- ] = -1; */ s8 physnode_map[MAX_ELEMENTS] = { [0 ... (MAX_ELEMENTS - 1)] = -1}; +EXPORT_SYMBOL(physnode_map); void memory_present(int nid, unsigned long start, unsigned long end) { @@ -85,9 +94,7 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, return (nr_pages + 1) * sizeof(struct page); } - -unsigned long node_start_pfn[MAX_NUMNODES]; -unsigned long node_end_pfn[MAX_NUMNODES]; +#endif extern unsigned long find_max_low_pfn(void); extern void find_max_pfn(void); @@ -108,6 +115,9 @@ unsigned long node_remap_offset[MAX_NUMNODES]; void *node_remap_start_vaddr[MAX_NUMNODES]; void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); +void *node_remap_end_vaddr[MAX_NUMNODES]; +void *node_remap_alloc_vaddr[MAX_NUMNODES]; + /* * FLAT - support for basic PC memory model with discontig enabled, essentially * a single node with all available processors in it with a flat @@ -146,6 +156,21 @@ static void __init find_max_pfn_node(int nid) BUG(); } +/* Find the owning node for a pfn. */ +int early_pfn_to_nid(unsigned long pfn) +{ + int nid; + + for_each_node(nid) { + if (node_end_pfn[nid] == 0) + break; + if (node_start_pfn[nid] <= pfn && node_end_pfn[nid] >= pfn) + return nid; + } + + return 0; +} + /* * Allocate memory for the pg_data_t for this node via a crude pre-bootmem * method. For node zero take this from the bottom of memory, for @@ -163,6 +188,21 @@ static void __init allocate_pgdat(int nid) } } +void *alloc_remap(int nid, unsigned long size) +{ + void *allocation = node_remap_alloc_vaddr[nid]; + + size = ALIGN(size, L1_CACHE_BYTES); + + if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid]) + return 0; + + node_remap_alloc_vaddr[nid] += size; + memset(allocation, 0, size); + + return allocation; +} + void __init remap_numa_kva(void) { void *vaddr; @@ -170,8 +210,6 @@ void __init remap_numa_kva(void) int node; for_each_online_node(node) { - if (node == 0) - continue; for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT); set_pmd_pfn((ulong) vaddr, @@ -185,13 +223,9 @@ static unsigned long calculate_numa_remap_pages(void) { int nid; unsigned long size, reserve_pages = 0; + unsigned long pfn; for_each_online_node(nid) { - if (nid == 0) - continue; - if (!node_remap_size[nid]) - continue; - /* * The acpi/srat node info can show hot-add memroy zones * where memory could be added but not currently present. @@ -208,13 +242,37 @@ static unsigned long calculate_numa_remap_pages(void) size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES; /* now the roundup is correct, convert to PAGE_SIZE pages */ size = size * PTRS_PER_PTE; + + /* + * Validate the region we are allocating only contains valid + * pages. + */ + for (pfn = node_end_pfn[nid] - size; + pfn < node_end_pfn[nid]; pfn++) + if (!page_is_ram(pfn)) + break; + + if (pfn != node_end_pfn[nid]) + size = 0; + printk("Reserving %ld pages of KVA for lmem_map of node %d\n", size, nid); node_remap_size[nid] = size; - reserve_pages += size; node_remap_offset[nid] = reserve_pages; + reserve_pages += size; printk("Shrinking node %d from %ld pages to %ld pages\n", nid, node_end_pfn[nid], node_end_pfn[nid] - size); + + if (node_end_pfn[nid] & (PTRS_PER_PTE-1)) { + /* + * Align node_end_pfn[] and node_remap_start_pfn[] to + * pmd boundary. remap_numa_kva will barf otherwise. + */ + printk("Shrinking node %d further by %ld pages for proper alignment\n", + nid, node_end_pfn[nid] & (PTRS_PER_PTE-1)); + size += node_end_pfn[nid] & (PTRS_PER_PTE-1); + } + node_end_pfn[nid] -= size; node_remap_start_pfn[nid] = node_end_pfn[nid]; } @@ -265,12 +323,18 @@ unsigned long __init setup_memory(void) (ulong) pfn_to_kaddr(max_low_pfn)); for_each_online_node(nid) { node_remap_start_vaddr[nid] = pfn_to_kaddr( - (highstart_pfn + reserve_pages) - node_remap_offset[nid]); + highstart_pfn + node_remap_offset[nid]); + /* Init the node remap allocator */ + node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + + (node_remap_size[nid] * PAGE_SIZE); + node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] + + ALIGN(sizeof(pg_data_t), PAGE_SIZE); + allocate_pgdat(nid); printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, (ulong) node_remap_start_vaddr[nid], - (ulong) pfn_to_kaddr(highstart_pfn + reserve_pages - - node_remap_offset[nid] + node_remap_size[nid])); + (ulong) pfn_to_kaddr(highstart_pfn + + node_remap_offset[nid] + node_remap_size[nid])); } printk("High memory starts at vaddr %08lx\n", (ulong) pfn_to_kaddr(highstart_pfn)); @@ -333,23 +397,9 @@ void __init zone_sizes_init(void) } zholes_size = get_zholes_size(nid); - /* - * We let the lmem_map for node 0 be allocated from the - * normal bootmem allocator, but other nodes come from the - * remapped KVA area - mbligh - */ - if (!nid) - free_area_init_node(nid, NODE_DATA(nid), - zones_size, start, zholes_size); - else { - unsigned long lmem_map; - lmem_map = (unsigned long)node_remap_start_vaddr[nid]; - lmem_map += sizeof(pg_data_t) + PAGE_SIZE - 1; - lmem_map &= PAGE_MASK; - NODE_DATA(nid)->node_mem_map = (struct page *)lmem_map; - free_area_init_node(nid, NODE_DATA(nid), zones_size, - start, zholes_size); - } + + free_area_init_node(nid, NODE_DATA(nid), zones_size, start, + zholes_size); } return; } @@ -358,24 +408,26 @@ void __init set_highmem_pages_init(int bad_ppro) { #ifdef CONFIG_HIGHMEM struct zone *zone; + struct page *page; for_each_zone(zone) { - unsigned long node_pfn, node_high_size, zone_start_pfn; - struct page * zone_mem_map; - + unsigned long node_pfn, zone_start_pfn, zone_end_pfn; + if (!is_highmem(zone)) continue; - printk("Initializing %s for node %d\n", zone->name, - zone->zone_pgdat->node_id); - - node_high_size = zone->spanned_pages; - zone_mem_map = zone->zone_mem_map; zone_start_pfn = zone->zone_start_pfn; + zone_end_pfn = zone_start_pfn + zone->spanned_pages; + + printk("Initializing %s for node %d (%08lx:%08lx)\n", + zone->name, zone->zone_pgdat->node_id, + zone_start_pfn, zone_end_pfn); - for (node_pfn = 0; node_pfn < node_high_size; node_pfn++) { - one_highpage_init((struct page *)(zone_mem_map + node_pfn), - zone_start_pfn + node_pfn, bad_ppro); + for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) { + if (!pfn_valid(node_pfn)) + continue; + page = pfn_to_page(node_pfn); + one_highpage_init(page, node_pfn, bad_ppro); } } totalram_pages += totalhigh_pages; |