From bf88c8c83e4425d17e29daa5354ffb1f8ba7b225 Mon Sep 17 00:00:00 2001 From: "Figo.zhang" Date: Mon, 21 Sep 2009 17:01:47 -0700 Subject: vmalloc.c: fix double error checking There is no need for double error checking. Signed-off-by: Figo.zhang Acked-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 204b8243d8a..759deae4539 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -168,11 +168,9 @@ static int vmap_page_range_noflush(unsigned long start, unsigned long end, next = pgd_addr_end(addr, end); err = vmap_pud_range(pgd, addr, next, prot, pages, &nr); if (err) - break; + return err; } while (pgd++, addr = next, addr != end); - if (unlikely(err)) - return err; return nr; } -- cgit v1.2.3-70-g09d2 From dd32c279983bf77fdcc8a9aa4a05b0ffdc75859c Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 21 Sep 2009 17:02:32 -0700 Subject: vmalloc: unmap vmalloc area after hiding it vmap area should be purged after vm_struct is removed from the list because vread/vwrite etc...believes the range is valid while it's on vm_struct list. Signed-off-by: KAMEZAWA Hiroyuki Reviewed-by: WANG Cong Cc: Mike Smith Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 759deae4539..c4071fa8e12 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1270,17 +1270,21 @@ struct vm_struct *remove_vm_area(const void *addr) if (va && va->flags & VM_VM_AREA) { struct vm_struct *vm = va->private; struct vm_struct *tmp, **p; - - vmap_debug_free_range(va->va_start, va->va_end); - free_unmap_vmap_area(va); - vm->size -= PAGE_SIZE; - + /* + * remove from list and disallow access to this vm_struct + * before unmap. (address range confliction is maintained by + * vmap.) + */ write_lock(&vmlist_lock); for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next) ; *p = tmp->next; write_unlock(&vmlist_lock); + vmap_debug_free_range(va->va_start, va->va_end); + free_unmap_vmap_area(va); + vm->size -= PAGE_SIZE; + return vm; } return NULL; -- cgit v1.2.3-70-g09d2 From d0107eb07320b5d37c0f8a9f015534caebb28a48 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 21 Sep 2009 17:02:34 -0700 Subject: kcore: fix vread/vwrite to be aware of holes vread/vwrite access vmalloc area without checking there is a page or not. In most case, this works well. In old ages, the caller of get_vm_ara() is only IOREMAP and there is no memory hole within vm_struct's [addr...addr + size - PAGE_SIZE] ( -PAGE_SIZE is for a guard page.) After per-cpu-alloc patch, it uses get_vm_area() for reserve continuous virtual address but remap _later_. There tend to be a hole in valid vmalloc area in vm_struct lists. Then, skip the hole (not mapped page) is necessary. This patch updates vread/vwrite() for avoiding memory hole. Routines which access vmalloc area without knowing for which addr is used are - /proc/kcore - /dev/kmem kcore checks IOREMAP, /dev/kmem doesn't. After this patch, IOREMAP is checked and /dev/kmem will avoid to read/write it. Fixes to /proc/kcore will be in the next patch in series. Signed-off-by: KAMEZAWA Hiroyuki Cc: WANG Cong Cc: Mike Smith Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 199 ++++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 176 insertions(+), 23 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index c4071fa8e12..9216b2555d0 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -25,7 +25,7 @@ #include #include #include - +#include #include #include #include @@ -1643,10 +1643,120 @@ void *vmalloc_32_user(unsigned long size) } EXPORT_SYMBOL(vmalloc_32_user); +/* + * small helper routine , copy contents to buf from addr. + * If the page is not present, fill zero. + */ + +static int aligned_vread(char *buf, char *addr, unsigned long count) +{ + struct page *p; + int copied = 0; + + while (count) { + unsigned long offset, length; + + offset = (unsigned long)addr & ~PAGE_MASK; + length = PAGE_SIZE - offset; + if (length > count) + length = count; + p = vmalloc_to_page(addr); + /* + * To do safe access to this _mapped_ area, we need + * lock. But adding lock here means that we need to add + * overhead of vmalloc()/vfree() calles for this _debug_ + * interface, rarely used. Instead of that, we'll use + * kmap() and get small overhead in this access function. + */ + if (p) { + /* + * we can expect USER0 is not used (see vread/vwrite's + * function description) + */ + void *map = kmap_atomic(p, KM_USER0); + memcpy(buf, map + offset, length); + kunmap_atomic(map, KM_USER0); + } else + memset(buf, 0, length); + + addr += length; + buf += length; + copied += length; + count -= length; + } + return copied; +} + +static int aligned_vwrite(char *buf, char *addr, unsigned long count) +{ + struct page *p; + int copied = 0; + + while (count) { + unsigned long offset, length; + + offset = (unsigned long)addr & ~PAGE_MASK; + length = PAGE_SIZE - offset; + if (length > count) + length = count; + p = vmalloc_to_page(addr); + /* + * To do safe access to this _mapped_ area, we need + * lock. But adding lock here means that we need to add + * overhead of vmalloc()/vfree() calles for this _debug_ + * interface, rarely used. Instead of that, we'll use + * kmap() and get small overhead in this access function. + */ + if (p) { + /* + * we can expect USER0 is not used (see vread/vwrite's + * function description) + */ + void *map = kmap_atomic(p, KM_USER0); + memcpy(map + offset, buf, length); + kunmap_atomic(map, KM_USER0); + } + addr += length; + buf += length; + copied += length; + count -= length; + } + return copied; +} + +/** + * vread() - read vmalloc area in a safe way. + * @buf: buffer for reading data + * @addr: vm address. + * @count: number of bytes to be read. + * + * Returns # of bytes which addr and buf should be increased. + * (same number to @count). Returns 0 if [addr...addr+count) doesn't + * includes any intersect with alive vmalloc area. + * + * This function checks that addr is a valid vmalloc'ed area, and + * copy data from that area to a given buffer. If the given memory range + * of [addr...addr+count) includes some valid address, data is copied to + * proper area of @buf. If there are memory holes, they'll be zero-filled. + * IOREMAP area is treated as memory hole and no copy is done. + * + * If [addr...addr+count) doesn't includes any intersects with alive + * vm_struct area, returns 0. + * @buf should be kernel's buffer. Because this function uses KM_USER0, + * the caller should guarantee KM_USER0 is not used. + * + * Note: In usual ops, vread() is never necessary because the caller + * should know vmalloc() area is valid and can use memcpy(). + * This is for routines which have to access vmalloc area without + * any informaion, as /dev/kmem. + * + */ + long vread(char *buf, char *addr, unsigned long count) { struct vm_struct *tmp; char *vaddr, *buf_start = buf; + unsigned long buflen = count; unsigned long n; /* Don't allow overflow */ @@ -1654,7 +1764,7 @@ long vread(char *buf, char *addr, unsigned long count) count = -(unsigned long) addr; read_lock(&vmlist_lock); - for (tmp = vmlist; tmp; tmp = tmp->next) { + for (tmp = vmlist; count && tmp; tmp = tmp->next) { vaddr = (char *) tmp->addr; if (addr >= vaddr + tmp->size - PAGE_SIZE) continue; @@ -1667,32 +1777,72 @@ long vread(char *buf, char *addr, unsigned long count) count--; } n = vaddr + tmp->size - PAGE_SIZE - addr; - do { - if (count == 0) - goto finished; - *buf = *addr; - buf++; - addr++; - count--; - } while (--n > 0); + if (n > count) + n = count; + if (!(tmp->flags & VM_IOREMAP)) + aligned_vread(buf, addr, n); + else /* IOREMAP area is treated as memory hole */ + memset(buf, 0, n); + buf += n; + addr += n; + count -= n; } finished: read_unlock(&vmlist_lock); - return buf - buf_start; + + if (buf == buf_start) + return 0; + /* zero-fill memory holes */ + if (buf != buf_start + buflen) + memset(buf, 0, buflen - (buf - buf_start)); + + return buflen; } +/** + * vwrite() - write vmalloc area in a safe way. + * @buf: buffer for source data + * @addr: vm address. + * @count: number of bytes to be read. + * + * Returns # of bytes which addr and buf should be incresed. + * (same number to @count). + * If [addr...addr+count) doesn't includes any intersect with valid + * vmalloc area, returns 0. + * + * This function checks that addr is a valid vmalloc'ed area, and + * copy data from a buffer to the given addr. If specified range of + * [addr...addr+count) includes some valid address, data is copied from + * proper area of @buf. If there are memory holes, no copy to hole. + * IOREMAP area is treated as memory hole and no copy is done. + * + * If [addr...addr+count) doesn't includes any intersects with alive + * vm_struct area, returns 0. + * @buf should be kernel's buffer. Because this function uses KM_USER0, + * the caller should guarantee KM_USER0 is not used. + * + * Note: In usual ops, vwrite() is never necessary because the caller + * should know vmalloc() area is valid and can use memcpy(). + * This is for routines which have to access vmalloc area without + * any informaion, as /dev/kmem. + * + * The caller should guarantee KM_USER1 is not used. + */ + long vwrite(char *buf, char *addr, unsigned long count) { struct vm_struct *tmp; - char *vaddr, *buf_start = buf; - unsigned long n; + char *vaddr; + unsigned long n, buflen; + int copied = 0; /* Don't allow overflow */ if ((unsigned long) addr + count < count) count = -(unsigned long) addr; + buflen = count; read_lock(&vmlist_lock); - for (tmp = vmlist; tmp; tmp = tmp->next) { + for (tmp = vmlist; count && tmp; tmp = tmp->next) { vaddr = (char *) tmp->addr; if (addr >= vaddr + tmp->size - PAGE_SIZE) continue; @@ -1704,18 +1854,21 @@ long vwrite(char *buf, char *addr, unsigned long count) count--; } n = vaddr + tmp->size - PAGE_SIZE - addr; - do { - if (count == 0) - goto finished; - *addr = *buf; - buf++; - addr++; - count--; - } while (--n > 0); + if (n > count) + n = count; + if (!(tmp->flags & VM_IOREMAP)) { + aligned_vwrite(buf, addr, n); + copied++; + } + buf += n; + addr += n; + count -= n; } finished: read_unlock(&vmlist_lock); - return buf - buf_start; + if (!copied) + return 0; + return buflen; } /** -- cgit v1.2.3-70-g09d2 From 4481374ce88ba8f460c8b89f2572027bd27057d0 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 21 Sep 2009 17:03:05 -0700 Subject: mm: replace various uses of num_physpages by totalram_pages Sizing of memory allocations shouldn't depend on the number of physical pages found in a system, as that generally includes (perhaps a huge amount of) non-RAM pages. The amount of what actually is usable as storage should instead be used as a basis here. Some of the calculations (i.e. those not intending to use high memory) should likely even use (totalram_pages - totalhigh_pages). Signed-off-by: Jan Beulich Acked-by: Rusty Russell Acked-by: Ingo Molnar Cc: Dave Airlie Cc: Kyle McMartin Cc: Jeremy Fitzhardinge Cc: Pekka Enberg Cc: Hugh Dickins Cc: "David S. Miller" Cc: Patrick McHardy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/microcode_core.c | 4 ++-- drivers/char/agp/backend.c | 4 ++-- drivers/parisc/ccio-dma.c | 4 ++-- drivers/parisc/sba_iommu.c | 4 ++-- drivers/xen/balloon.c | 4 ---- fs/ntfs/malloc.h | 2 +- include/linux/mm.h | 1 + init/main.c | 4 ++-- mm/slab.c | 2 +- mm/swap.c | 2 +- mm/vmalloc.c | 4 ++-- net/core/sock.c | 4 ++-- net/dccp/proto.c | 6 +++--- net/decnet/dn_route.c | 2 +- net/ipv4/route.c | 2 +- net/ipv4/tcp.c | 4 ++-- net/netfilter/nf_conntrack_core.c | 4 ++-- net/netfilter/x_tables.c | 2 +- net/netfilter/xt_hashlimit.c | 8 ++++---- net/netlink/af_netlink.c | 6 +++--- net/sctp/protocol.c | 6 +++--- 21 files changed, 38 insertions(+), 41 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 0db7969b0dd..378e9a8f1bf 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -210,8 +210,8 @@ static ssize_t microcode_write(struct file *file, const char __user *buf, { ssize_t ret = -EINVAL; - if ((len >> PAGE_SHIFT) > num_physpages) { - pr_err("microcode: too much data (max %ld pages)\n", num_physpages); + if ((len >> PAGE_SHIFT) > totalram_pages) { + pr_err("microcode: too much data (max %ld pages)\n", totalram_pages); return ret; } diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c index ad87753f6de..a56ca080e10 100644 --- a/drivers/char/agp/backend.c +++ b/drivers/char/agp/backend.c @@ -114,9 +114,9 @@ static int agp_find_max(void) long memory, index, result; #if PAGE_SHIFT < 20 - memory = num_physpages >> (20 - PAGE_SHIFT); + memory = totalram_pages >> (20 - PAGE_SHIFT); #else - memory = num_physpages << (PAGE_SHIFT - 20); + memory = totalram_pages << (PAGE_SHIFT - 20); #endif index = 1; diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index a45b0c0d574..a6b4a5a53d4 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -1266,7 +1266,7 @@ ccio_ioc_init(struct ioc *ioc) ** Hot-Plug/Removal of PCI cards. (aka PCI OLARD). */ - iova_space_size = (u32) (num_physpages / count_parisc_driver(&ccio_driver)); + iova_space_size = (u32) (totalram_pages / count_parisc_driver(&ccio_driver)); /* limit IOVA space size to 1MB-1GB */ @@ -1305,7 +1305,7 @@ ccio_ioc_init(struct ioc *ioc) DBG_INIT("%s() hpa 0x%p mem %luMB IOV %dMB (%d bits)\n", __func__, ioc->ioc_regs, - (unsigned long) num_physpages >> (20 - PAGE_SHIFT), + (unsigned long) totalram_pages >> (20 - PAGE_SHIFT), iova_space_size>>20, iov_order + PAGE_SHIFT); diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index 123d8fe3427..57a6d19eba4 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -1390,7 +1390,7 @@ sba_ioc_init(struct parisc_device *sba, struct ioc *ioc, int ioc_num) ** for DMA hints - ergo only 30 bits max. */ - iova_space_size = (u32) (num_physpages/global_ioc_cnt); + iova_space_size = (u32) (totalram_pages/global_ioc_cnt); /* limit IOVA space size to 1MB-1GB */ if (iova_space_size < (1 << (20 - PAGE_SHIFT))) { @@ -1415,7 +1415,7 @@ sba_ioc_init(struct parisc_device *sba, struct ioc *ioc, int ioc_num) DBG_INIT("%s() hpa 0x%lx mem %ldMB IOV %dMB (%d bits)\n", __func__, ioc->ioc_hpa, - (unsigned long) num_physpages >> (20 - PAGE_SHIFT), + (unsigned long) totalram_pages >> (20 - PAGE_SHIFT), iova_space_size>>20, iov_order + PAGE_SHIFT); diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index f5bbd9e8341..1b7123eb5d7 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -96,11 +96,7 @@ static struct balloon_stats balloon_stats; /* We increase/decrease in batches which fit in a page */ static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; -/* VM /proc information for memory */ -extern unsigned long totalram_pages; - #ifdef CONFIG_HIGHMEM -extern unsigned long totalhigh_pages; #define inc_totalhigh_pages() (totalhigh_pages++) #define dec_totalhigh_pages() (totalhigh_pages--) #else diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h index cd0be3f5c3c..a44b14cbcee 100644 --- a/fs/ntfs/malloc.h +++ b/fs/ntfs/malloc.h @@ -47,7 +47,7 @@ static inline void *__ntfs_malloc(unsigned long size, gfp_t gfp_mask) return kmalloc(PAGE_SIZE, gfp_mask & ~__GFP_HIGHMEM); /* return (void *)__get_free_page(gfp_mask); */ } - if (likely(size >> PAGE_SHIFT < num_physpages)) + if (likely((size >> PAGE_SHIFT) < totalram_pages)) return __vmalloc(size, gfp_mask, PAGE_KERNEL); return NULL; } diff --git a/include/linux/mm.h b/include/linux/mm.h index d808cf832c4..19ff81c49ba 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -25,6 +25,7 @@ extern unsigned long max_mapnr; #endif extern unsigned long num_physpages; +extern unsigned long totalram_pages; extern void * high_memory; extern int page_cluster; diff --git a/init/main.c b/init/main.c index 34971becbd3..2c48c315316 100644 --- a/init/main.c +++ b/init/main.c @@ -668,12 +668,12 @@ asmlinkage void __init start_kernel(void) #endif thread_info_cache_init(); cred_init(); - fork_init(num_physpages); + fork_init(totalram_pages); proc_caches_init(); buffer_init(); key_init(); security_init(); - vfs_caches_init(num_physpages); + vfs_caches_init(totalram_pages); radix_tree_init(); signals_init(); /* rootfs populating might need page-writeback */ diff --git a/mm/slab.c b/mm/slab.c index 7b5d4deacfc..7dfa481c96b 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1384,7 +1384,7 @@ void __init kmem_cache_init(void) * Fragmentation resistance on low memory - only use bigger * page orders on machines with more than 32MB of memory. */ - if (num_physpages > (32 << 20) >> PAGE_SHIFT) + if (totalram_pages > (32 << 20) >> PAGE_SHIFT) slab_break_gfp_order = BREAK_GFP_ORDER_HI; /* Bootstrap is tricky, because several objects are allocated diff --git a/mm/swap.c b/mm/swap.c index 4a8a59e671f..308e57d8d7e 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -496,7 +496,7 @@ EXPORT_SYMBOL(pagevec_lookup_tag); */ void __init swap_setup(void) { - unsigned long megs = num_physpages >> (20 - PAGE_SHIFT); + unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT); #ifdef CONFIG_SWAP bdi_init(swapper_space.backing_dev_info); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 9216b2555d0..5535da1d696 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1386,7 +1386,7 @@ void *vmap(struct page **pages, unsigned int count, might_sleep(); - if (count > num_physpages) + if (count > totalram_pages) return NULL; area = get_vm_area_caller((count << PAGE_SHIFT), flags, @@ -1493,7 +1493,7 @@ static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot, unsigned long real_size = size; size = PAGE_ALIGN(size); - if (!size || (size >> PAGE_SHIFT) > num_physpages) + if (!size || (size >> PAGE_SHIFT) > totalram_pages) return NULL; area = __get_vm_area_node(size, VM_ALLOC, VMALLOC_START, VMALLOC_END, diff --git a/net/core/sock.c b/net/core/sock.c index 30d5446512f..524712a7b15 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1206,12 +1206,12 @@ EXPORT_SYMBOL_GPL(sk_setup_caps); void __init sk_init(void) { - if (num_physpages <= 4096) { + if (totalram_pages <= 4096) { sysctl_wmem_max = 32767; sysctl_rmem_max = 32767; sysctl_wmem_default = 32767; sysctl_rmem_default = 32767; - } else if (num_physpages >= 131072) { + } else if (totalram_pages >= 131072) { sysctl_wmem_max = 131071; sysctl_rmem_max = 131071; } diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 923db06c7e5..bc4467082a0 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1049,10 +1049,10 @@ static int __init dccp_init(void) * * The methodology is similar to that of the buffer cache. */ - if (num_physpages >= (128 * 1024)) - goal = num_physpages >> (21 - PAGE_SHIFT); + if (totalram_pages >= (128 * 1024)) + goal = totalram_pages >> (21 - PAGE_SHIFT); else - goal = num_physpages >> (23 - PAGE_SHIFT); + goal = totalram_pages >> (23 - PAGE_SHIFT); if (thash_entries) goal = (thash_entries * diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 9383d3e5a1a..57662cabaf9 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1750,7 +1750,7 @@ void __init dn_route_init(void) dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ; add_timer(&dn_route_timer); - goal = num_physpages >> (26 - PAGE_SHIFT); + goal = totalram_pages >> (26 - PAGE_SHIFT); for(order = 0; (1UL << order) < goal; order++) /* NOTHING */; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 91867d3e632..df934731453 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3414,7 +3414,7 @@ int __init ip_rt_init(void) alloc_large_system_hash("IP route cache", sizeof(struct rt_hash_bucket), rhash_entries, - (num_physpages >= 128 * 1024) ? + (totalram_pages >= 128 * 1024) ? 15 : 17, 0, &rt_hash_log, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 19a0612b8a2..21387ebabf0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2862,7 +2862,7 @@ void __init tcp_init(void) alloc_large_system_hash("TCP established", sizeof(struct inet_ehash_bucket), thash_entries, - (num_physpages >= 128 * 1024) ? + (totalram_pages >= 128 * 1024) ? 13 : 15, 0, &tcp_hashinfo.ehash_size, @@ -2879,7 +2879,7 @@ void __init tcp_init(void) alloc_large_system_hash("TCP bind", sizeof(struct inet_bind_hashbucket), tcp_hashinfo.ehash_size, - (num_physpages >= 128 * 1024) ? + (totalram_pages >= 128 * 1024) ? 13 : 15, 0, &tcp_hashinfo.bhash_size, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index b37109817a9..7c9ec3dee96 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1245,9 +1245,9 @@ static int nf_conntrack_init_init_net(void) * machine has 512 buckets. >= 1GB machines have 16384 buckets. */ if (!nf_conntrack_htable_size) { nf_conntrack_htable_size - = (((num_physpages << PAGE_SHIFT) / 16384) + = (((totalram_pages << PAGE_SHIFT) / 16384) / sizeof(struct hlist_head)); - if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE)) + if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE)) nf_conntrack_htable_size = 16384; if (nf_conntrack_htable_size < 32) nf_conntrack_htable_size = 32; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index a6ac83a9334..f01955cce31 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -617,7 +617,7 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size) int cpu; /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */ - if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > num_physpages) + if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages) return NULL; newinfo = kzalloc(XT_TABLE_INFO_SZ, GFP_KERNEL); diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 219dcdbe388..dd16e404424 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -194,9 +194,9 @@ static int htable_create_v0(struct xt_hashlimit_info *minfo, u_int8_t family) if (minfo->cfg.size) size = minfo->cfg.size; else { - size = ((num_physpages << PAGE_SHIFT) / 16384) / + size = ((totalram_pages << PAGE_SHIFT) / 16384) / sizeof(struct list_head); - if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE)) + if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE)) size = 8192; if (size < 16) size = 16; @@ -266,9 +266,9 @@ static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, u_int8_t family) if (minfo->cfg.size) { size = minfo->cfg.size; } else { - size = (num_physpages << PAGE_SHIFT) / 16384 / + size = (totalram_pages << PAGE_SHIFT) / 16384 / sizeof(struct list_head); - if (num_physpages > 1024 * 1024 * 1024 / PAGE_SIZE) + if (totalram_pages > 1024 * 1024 * 1024 / PAGE_SIZE) size = 8192; if (size < 16) size = 16; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c5aab6a368c..55180b99562 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2091,10 +2091,10 @@ static int __init netlink_proto_init(void) if (!nl_table) goto panic; - if (num_physpages >= (128 * 1024)) - limit = num_physpages >> (21 - PAGE_SHIFT); + if (totalram_pages >= (128 * 1024)) + limit = totalram_pages >> (21 - PAGE_SHIFT); else - limit = num_physpages >> (23 - PAGE_SHIFT); + limit = totalram_pages >> (23 - PAGE_SHIFT); order = get_bitmask_order(limit) - 1 + PAGE_SHIFT; limit = (1UL << order) / sizeof(struct hlist_head); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index c557f1fb1c6..612dc878e05 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1184,10 +1184,10 @@ SCTP_STATIC __init int sctp_init(void) /* Size and allocate the association hash table. * The methodology is similar to that of the tcp hash tables. */ - if (num_physpages >= (128 * 1024)) - goal = num_physpages >> (22 - PAGE_SHIFT); + if (totalram_pages >= (128 * 1024)) + goal = totalram_pages >> (22 - PAGE_SHIFT); else - goal = num_physpages >> (24 - PAGE_SHIFT); + goal = totalram_pages >> (24 - PAGE_SHIFT); for (order = 0; (1UL << order) < goal; order++) ; -- cgit v1.2.3-70-g09d2 From 81ac3ad9061dd9cd490ee92f0c5316a14d77ce18 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Tue, 22 Sep 2009 16:45:49 -0700 Subject: kcore: register module area in generic way Some archs define MODULED_VADDR/MODULES_END which is not in VMALLOC area. This is handled only in x86-64. This patch make it more generic. And we can use vread/vwrite to access the area. Fix it. Signed-off-by: KAMEZAWA Hiroyuki Cc: Jiri Slaby Cc: Ralf Baechle Cc: Benjamin Herrenschmidt Cc: WANG Cong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/init_64.c | 4 +--- fs/proc/kcore.c | 19 ++++++++++++++++++- include/linux/mm.h | 8 ++++++++ mm/vmalloc.c | 2 +- 4 files changed, 28 insertions(+), 5 deletions(-) (limited to 'mm/vmalloc.c') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index d5d23cc2407..5a4398a6006 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -647,7 +647,7 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif /* CONFIG_MEMORY_HOTPLUG */ -static struct kcore_list kcore_modules, kcore_vsyscall; +static struct kcore_list kcore_vsyscall; void __init mem_init(void) { @@ -676,8 +676,6 @@ void __init mem_init(void) initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; /* Register memory areas for /proc/kcore */ - kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN, - KCORE_OTHER); kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, VSYSCALL_END - VSYSCALL_START, KCORE_OTHER); diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 78970e6f715..c6a5ec73197 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -490,7 +490,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) if (m == NULL) { if (clear_user(buffer, tsz)) return -EFAULT; - } else if (is_vmalloc_addr((void *)start)) { + } else if (is_vmalloc_or_module_addr((void *)start)) { char * elf_buf; elf_buf = kzalloc(tsz, GFP_KERNEL); @@ -586,6 +586,22 @@ static void __init proc_kcore_text_init(void) } #endif +#if defined(CONFIG_MODULES) && defined(MODULES_VADDR) +/* + * MODULES_VADDR has no intersection with VMALLOC_ADDR. + */ +struct kcore_list kcore_modules; +static void __init add_modules_range(void) +{ + kclist_add(&kcore_modules, (void *)MODULES_VADDR, + MODULES_END - MODULES_VADDR, KCORE_VMALLOC); +} +#else +static void __init add_modules_range(void) +{ +} +#endif + static int __init proc_kcore_init(void) { proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, @@ -595,6 +611,7 @@ static int __init proc_kcore_init(void) /* Store vmalloc area */ kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, VMALLOC_END - VMALLOC_START, KCORE_VMALLOC); + add_modules_range(); /* Store direct-map area from physical memory map */ kcore_update_ram(); hotplug_memory_notifier(kcore_callback, 0); diff --git a/include/linux/mm.h b/include/linux/mm.h index 5946e2ff9fe..b6eae5e3144 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -285,6 +285,14 @@ static inline int is_vmalloc_addr(const void *x) return 0; #endif } +#ifdef CONFIG_MMU +extern int is_vmalloc_or_module_addr(const void *x); +#else +static int is_vmalloc_or_module_addr(const void *x) +{ + return 0; +} +#endif static inline struct page *compound_head(struct page *page) { diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 5535da1d696..69511e66323 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -184,7 +184,7 @@ static int vmap_page_range(unsigned long start, unsigned long end, return ret; } -static inline int is_vmalloc_or_module_addr(const void *x) +int is_vmalloc_or_module_addr(const void *x) { /* * ARM, x86-64 and sparc64 put modules in a special place, -- cgit v1.2.3-70-g09d2