From 5e48521e869de7d904bb5ffe9739258ffa026927 Mon Sep 17 00:00:00 2001 From: "Zhang, Yanmin" Date: Thu, 23 Feb 2006 11:07:20 +0800 Subject: [IA64] lazy_mmu_prot_update needs to be aware of huge pages Function lazy_mmu_prot_update is also used on huge pages when it is called by set_huge_ptep_writable, but it isn't aware of huge pages. Signed-off-by: Zhang Yanmin Acked-by: Ken Chen Signed-off-by: Tony Luck --- arch/ia64/mm/init.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/ia64/mm') diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index ff4f31fcd33..674736129b8 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -109,6 +109,7 @@ lazy_mmu_prot_update (pte_t pte) { unsigned long addr; struct page *page; + unsigned long order; if (!pte_exec(pte)) return; /* not an executable page... */ @@ -119,7 +120,12 @@ lazy_mmu_prot_update (pte_t pte) if (test_bit(PG_arch_1, &page->flags)) return; /* i-cache is already coherent with d-cache */ - flush_icache_range(addr, addr + PAGE_SIZE); + if (PageCompound(page)) { + order = (unsigned long) (page[1].lru.prev); + flush_icache_range(addr, addr + (1UL << order << PAGE_SHIFT)); + } + else + flush_icache_range(addr, addr + PAGE_SIZE); set_bit(PG_arch_1, &page->flags); /* mark page as clean */ } -- cgit v1.2.3-18-g5258 From ce9eed5a98efacb896551d3470d9d46826caaee5 Mon Sep 17 00:00:00 2001 From: "Chen, Kenneth W" Date: Mon, 6 Mar 2006 14:12:54 -0800 Subject: [IA64] optimize flush_tlb_range on large numa box It was reported from a field customer that global spin lock ptcg_lock is giving a lot of grief on munmap performance running on a large numa machine. What appears to be a problem coming from flush_tlb_range(), which currently unconditionally calls platform_global_tlb_purge(). For some of the numa machines in existence today, this function is mapped into ia64_global_tlb_purge(), which holds ptcg_lock spin lock while executing ptc.ga instruction. Here is a patch that attempt to avoid global tlb purge whenever possible. It will use local tlb purge as much as possible. Though the conditions to use local tlb purge is pretty restrictive. One of the side effect of having flush tlb range instruction on ia64 is that kernel don't get a chance to clear out cpu_vm_mask. On ia64, this mask is sticky and it will accumulate if process bounces around. Thus diminishing the possible use of ptc.l. Thoughts? Signed-off-by: Ken Chen Acked-by: Jack Steiner Acked-by: KAMEZAWA Hiroyuki Signed-off-by: Tony Luck --- arch/ia64/mm/tlb.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/ia64/mm') diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index 6a4eec9113e..4dbbca0b5e9 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c @@ -156,17 +156,19 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start, nbits = purge.max_bits; start &= ~((1UL << nbits) - 1); -# ifdef CONFIG_SMP - platform_global_tlb_purge(mm, start, end, nbits); -# else preempt_disable(); +#ifdef CONFIG_SMP + if (mm != current->active_mm || cpus_weight(mm->cpu_vm_mask) != 1) { + platform_global_tlb_purge(mm, start, end, nbits); + preempt_enable(); + return; + } +#endif do { ia64_ptcl(start, (nbits<<2)); start += (1UL << nbits); } while (start < end); preempt_enable(); -# endif - ia64_srlz_i(); /* srlz.i implies srlz.d */ } EXPORT_SYMBOL(flush_tlb_range); -- cgit v1.2.3-18-g5258 From c1c57d767100417f63c18da52d7e96f82b2b9e1a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 30 Mar 2006 09:53:39 -0700 Subject: [IA64] ioremap() should prefer WB over UC efi_memmap_init() collects full granules of WB memory, without regard for whether they also support UC. So in order for ioremap() to work for main memory, it must prefer WB mappings when possible. Signed-off-by: Bjorn Helgaas Signed-off-by: Tony Luck --- arch/ia64/mm/ioremap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/ia64/mm') diff --git a/arch/ia64/mm/ioremap.c b/arch/ia64/mm/ioremap.c index 62328621f99..643ccc6960c 100644 --- a/arch/ia64/mm/ioremap.c +++ b/arch/ia64/mm/ioremap.c @@ -21,12 +21,12 @@ __ioremap (unsigned long offset, unsigned long size) void __iomem * ioremap (unsigned long offset, unsigned long size) { - if (efi_mem_attribute_range(offset, size, EFI_MEMORY_UC)) - return __ioremap(offset, size); - if (efi_mem_attribute_range(offset, size, EFI_MEMORY_WB)) return phys_to_virt(offset); + if (efi_mem_attribute_range(offset, size, EFI_MEMORY_UC)) + return __ioremap(offset, size); + /* * Someday this should check ACPI resources so we * can do the right thing for hot-plugged regions. -- cgit v1.2.3-18-g5258