From 2abbbb63c90ab55ca3f054772c2e5ba7df810c48 Mon Sep 17 00:00:00 2001 From: Gerhard Sittig Date: Tue, 14 May 2013 04:40:53 +0000 Subject: powerpc/mpc512x: move common code to shared.c file - implement all of the init, init early, and setup arch routines in the shared source file for the MPC512x PowerPC platform, and make all MPC512x based boards (ADS, PDM, generic) use those common routines - remove declarations from header files for routines which aren't referenced from external callers any longer this modification concentrates knowledge about the optional FSL DIU support in one spot within the shared code, and makes all boards benefit transparently from future improvements in the shared platform code the change does not modify any behaviour but preserves all code paths Signed-off-by: Gerhard Sittig Signed-off-by: Anatolij Gustschin --- arch/powerpc/include/asm/mpc5121.h | 1 - arch/powerpc/platforms/512x/mpc5121_ads.c | 6 ++---- arch/powerpc/platforms/512x/mpc512x.h | 11 ++--------- arch/powerpc/platforms/512x/mpc512x_generic.c | 4 ++-- arch/powerpc/platforms/512x/mpc512x_shared.c | 14 +++++++++++++- arch/powerpc/platforms/512x/pdm360ng.c | 4 ++-- 6 files changed, 21 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/include/asm/mpc5121.h b/arch/powerpc/include/asm/mpc5121.h index 885c040d619..8ae133eaf9f 100644 --- a/arch/powerpc/include/asm/mpc5121.h +++ b/arch/powerpc/include/asm/mpc5121.h @@ -68,6 +68,5 @@ struct mpc512x_lpc { }; int mpc512x_cs_config(unsigned int cs, u32 val); -int __init mpc5121_clk_init(void); #endif /* __ASM_POWERPC_MPC5121_H__ */ diff --git a/arch/powerpc/platforms/512x/mpc5121_ads.c b/arch/powerpc/platforms/512x/mpc5121_ads.c index 0a134e0469e..3e90ece10ae 100644 --- a/arch/powerpc/platforms/512x/mpc5121_ads.c +++ b/arch/powerpc/platforms/512x/mpc5121_ads.c @@ -43,9 +43,7 @@ static void __init mpc5121_ads_setup_arch(void) mpc83xx_add_bridge(np); #endif -#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) - mpc512x_setup_diu(); -#endif + mpc512x_setup_arch(); } static void __init mpc5121_ads_init_IRQ(void) @@ -69,7 +67,7 @@ define_machine(mpc5121_ads) { .probe = mpc5121_ads_probe, .setup_arch = mpc5121_ads_setup_arch, .init = mpc512x_init, - .init_early = mpc512x_init_diu, + .init_early = mpc512x_init_early, .init_IRQ = mpc5121_ads_init_IRQ, .get_irq = ipic_get_irq, .calibrate_decr = generic_calibrate_decr, diff --git a/arch/powerpc/platforms/512x/mpc512x.h b/arch/powerpc/platforms/512x/mpc512x.h index 0a8e6002394..fdb4303246a 100644 --- a/arch/powerpc/platforms/512x/mpc512x.h +++ b/arch/powerpc/platforms/512x/mpc512x.h @@ -12,18 +12,11 @@ #ifndef __MPC512X_H__ #define __MPC512X_H__ extern void __init mpc512x_init_IRQ(void); +extern void __init mpc512x_init_early(void); extern void __init mpc512x_init(void); +extern void __init mpc512x_setup_arch(void); extern int __init mpc5121_clk_init(void); -void __init mpc512x_declare_of_platform_devices(void); extern const char *mpc512x_select_psc_compat(void); extern void mpc512x_restart(char *cmd); -#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) -void mpc512x_init_diu(void); -void mpc512x_setup_diu(void); -#else -#define mpc512x_init_diu NULL -#define mpc512x_setup_diu NULL -#endif - #endif /* __MPC512X_H__ */ diff --git a/arch/powerpc/platforms/512x/mpc512x_generic.c b/arch/powerpc/platforms/512x/mpc512x_generic.c index 5fb919b3092..ce71408781a 100644 --- a/arch/powerpc/platforms/512x/mpc512x_generic.c +++ b/arch/powerpc/platforms/512x/mpc512x_generic.c @@ -45,8 +45,8 @@ define_machine(mpc512x_generic) { .name = "MPC512x generic", .probe = mpc512x_generic_probe, .init = mpc512x_init, - .init_early = mpc512x_init_diu, - .setup_arch = mpc512x_setup_diu, + .init_early = mpc512x_init_early, + .setup_arch = mpc512x_setup_arch, .init_IRQ = mpc512x_init_IRQ, .get_irq = ipic_get_irq, .calibrate_decr = generic_calibrate_decr, diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c index 6eb94ab99d3..09622d3323d 100644 --- a/arch/powerpc/platforms/512x/mpc512x_shared.c +++ b/arch/powerpc/platforms/512x/mpc512x_shared.c @@ -58,7 +58,7 @@ void mpc512x_restart(char *cmd) ; } -#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) +#if IS_ENABLED(CONFIG_FB_FSL_DIU) struct fsl_diu_shared_fb { u8 gamma[0x300]; /* 32-bit aligned! */ @@ -436,6 +436,12 @@ void __init mpc512x_psc_fifo_init(void) } } +void __init mpc512x_init_early(void) +{ + if (IS_ENABLED(CONFIG_FB_FSL_DIU)) + mpc512x_init_diu(); +} + void __init mpc512x_init(void) { mpc5121_clk_init(); @@ -444,6 +450,12 @@ void __init mpc512x_init(void) mpc512x_psc_fifo_init(); } +void __init mpc512x_setup_arch(void) +{ + if (IS_ENABLED(CONFIG_FB_FSL_DIU)) + mpc512x_setup_diu(); +} + /** * mpc512x_cs_config - Setup chip select configuration * @cs: chip select number diff --git a/arch/powerpc/platforms/512x/pdm360ng.c b/arch/powerpc/platforms/512x/pdm360ng.c index 0575e858291..24b314d7bd5 100644 --- a/arch/powerpc/platforms/512x/pdm360ng.c +++ b/arch/powerpc/platforms/512x/pdm360ng.c @@ -119,9 +119,9 @@ static int __init pdm360ng_probe(void) define_machine(pdm360ng) { .name = "PDM360NG", .probe = pdm360ng_probe, - .setup_arch = mpc512x_setup_diu, + .setup_arch = mpc512x_setup_arch, .init = pdm360ng_init, - .init_early = mpc512x_init_diu, + .init_early = mpc512x_init_early, .init_IRQ = mpc512x_init_IRQ, .get_irq = ipic_get_irq, .calibrate_decr = generic_calibrate_decr, -- cgit v1.2.3-18-g5258 From a4f4124cf308275b4a2219d1e332dfc01d8bd6b7 Mon Sep 17 00:00:00 2001 From: Gerhard Sittig Date: Tue, 14 May 2013 04:40:54 +0000 Subject: powerpc/mpc512x: initialize board restart earlier move the MPC512x restart initialization from the shared init routine to the shared init_early routine recent problems in the proc(5) filesystem initialization led to the situation where the platform's restart routine was invoked yet the registers required for software reset were not yet available, which made the board hang instead of reboot Signed-off-by: Gerhard Sittig Signed-off-by: Anatolij Gustschin --- arch/powerpc/platforms/512x/mpc512x_shared.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c index 09622d3323d..a8b5110eb29 100644 --- a/arch/powerpc/platforms/512x/mpc512x_shared.c +++ b/arch/powerpc/platforms/512x/mpc512x_shared.c @@ -438,6 +438,7 @@ void __init mpc512x_psc_fifo_init(void) void __init mpc512x_init_early(void) { + mpc512x_restart_init(); if (IS_ENABLED(CONFIG_FB_FSL_DIU)) mpc512x_init_diu(); } @@ -446,7 +447,6 @@ void __init mpc512x_init(void) { mpc5121_clk_init(); mpc512x_declare_of_platform_devices(); - mpc512x_restart_init(); mpc512x_psc_fifo_init(); } -- cgit v1.2.3-18-g5258 From 8663890a9e9278623d20c67aa9fbeeb31ff3be97 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 6 Jun 2013 00:20:34 -0700 Subject: mm/thp: use the correct function when updating access flags We should use pmdp_set_access_flags to update access flags. Archs like powerpc use extra checks(_PAGE_BUSY) when updating a hugepage PTE. A set_pmd_at doesn't do those checks. We should use set_pmd_at only when updating a none hugepage PTE. Signed-off-by: Aneesh Kumar K.V Cc: Andrea Arcangeli a Signed-off-by: Andrew Morton Signed-off-by: Benjamin Herrenschmidt --- mm/huge_memory.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 362c329b83f..dab90fd6729 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1265,7 +1265,9 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, * young bit, instead of the current set_pmd_at. */ _pmd = pmd_mkyoung(pmd_mkdirty(*pmd)); - set_pmd_at(mm, addr & HPAGE_PMD_MASK, pmd, _pmd); + if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK, + pmd, _pmd, 1)) + update_mmu_cache_pmd(vma, addr, pmd); } if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { if (page->mapping && trylock_page(page)) { -- cgit v1.2.3-18-g5258 From 6b0b50b0617fad5f2af3b928596a25f7de8dbf50 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 5 Jun 2013 17:14:02 -0700 Subject: mm/THP: add pmd args to pgtable deposit and withdraw APIs This will be later used by powerpc THP support. In powerpc we want to use pgtable for storing the hash index values. So instead of adding them to mm_context list, we would like to store them in the second half of pmd Signed-off-by: Aneesh Kumar K.V Reviewed-by: Andrea Arcangeli Reviewed-by: David Gibson Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Benjamin Herrenschmidt --- arch/s390/include/asm/pgtable.h | 5 +++-- arch/s390/mm/pgtable.c | 5 +++-- arch/sparc/include/asm/pgtable_64.h | 5 +++-- arch/sparc/mm/tlb.c | 5 +++-- include/asm-generic/pgtable.h | 5 +++-- mm/huge_memory.c | 18 +++++++++--------- mm/pgtable-generic.c | 5 +++-- 7 files changed, 27 insertions(+), 21 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index e8b6e5b8932..2080dfeba64 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1370,10 +1370,11 @@ static inline pmd_t pmd_mkwrite(pmd_t pmd) #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_PGTABLE_DEPOSIT -extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable); +extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable); #define __HAVE_ARCH_PGTABLE_WITHDRAW -extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm); +extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); static inline int pmd_trans_splitting(pmd_t pmd) { diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index a938b548f07..1ccbffecc4d 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -1117,7 +1117,8 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, } } -void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable) +void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable) { struct list_head *lh = (struct list_head *) pgtable; @@ -1131,7 +1132,7 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable) mm->pmd_huge_pte = pgtable; } -pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm) +pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) { struct list_head *lh; pgtable_t pgtable; diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 7619f2f792a..d22b92d6784 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -853,10 +853,11 @@ extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd); #define __HAVE_ARCH_PGTABLE_DEPOSIT -extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable); +extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable); #define __HAVE_ARCH_PGTABLE_WITHDRAW -extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm); +extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); #endif /* Encode and de-code a swap entry */ diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 83d89bcb44a..f828dd33551 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -188,7 +188,8 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, } } -void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable) +void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable) { struct list_head *lh = (struct list_head *) pgtable; @@ -202,7 +203,7 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable) mm->pmd_huge_pte = pgtable; } -pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm) +pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) { struct list_head *lh; pgtable_t pgtable; diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index a59ff51b016..18e27c21071 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -173,11 +173,12 @@ extern void pmdp_splitting_flush(struct vm_area_struct *vma, #endif #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT -extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable); +extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable); #endif #ifndef __HAVE_ARCH_PGTABLE_WITHDRAW -extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm); +extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); #endif #ifndef __HAVE_ARCH_PMDP_INVALIDATE diff --git a/mm/huge_memory.c b/mm/huge_memory.c index dab90fd6729..6b785e17b67 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -730,7 +730,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, entry = mk_huge_pmd(page, vma); page_add_new_anon_rmap(page, vma, haddr); set_pmd_at(mm, haddr, pmd, entry); - pgtable_trans_huge_deposit(mm, pgtable); + pgtable_trans_huge_deposit(mm, pmd, pgtable); add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); mm->nr_ptes++; spin_unlock(&mm->page_table_lock); @@ -772,7 +772,7 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, entry = pmd_wrprotect(entry); entry = pmd_mkhuge(entry); set_pmd_at(mm, haddr, pmd, entry); - pgtable_trans_huge_deposit(mm, pgtable); + pgtable_trans_huge_deposit(mm, pmd, pgtable); mm->nr_ptes++; return true; } @@ -917,7 +917,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmdp_set_wrprotect(src_mm, addr, src_pmd); pmd = pmd_mkold(pmd_wrprotect(pmd)); set_pmd_at(dst_mm, addr, dst_pmd, pmd); - pgtable_trans_huge_deposit(dst_mm, pgtable); + pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable); dst_mm->nr_ptes++; ret = 0; @@ -987,7 +987,7 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm, pmdp_clear_flush(vma, haddr, pmd); /* leave pmd empty until pte is filled */ - pgtable = pgtable_trans_huge_withdraw(mm); + pgtable = pgtable_trans_huge_withdraw(mm, pmd); pmd_populate(mm, &_pmd, pgtable); for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) { @@ -1085,7 +1085,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, pmdp_clear_flush(vma, haddr, pmd); /* leave pmd empty until pte is filled */ - pgtable = pgtable_trans_huge_withdraw(mm); + pgtable = pgtable_trans_huge_withdraw(mm, pmd); pmd_populate(mm, &_pmd, pgtable); for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) { @@ -1360,7 +1360,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, struct page *page; pgtable_t pgtable; pmd_t orig_pmd; - pgtable = pgtable_trans_huge_withdraw(tlb->mm); + pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd); orig_pmd = pmdp_get_and_clear(tlb->mm, addr, pmd); tlb_remove_pmd_tlb_entry(tlb, pmd, addr); if (is_huge_zero_pmd(orig_pmd)) { @@ -1693,7 +1693,7 @@ static int __split_huge_page_map(struct page *page, pmd = page_check_address_pmd(page, mm, address, PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG); if (pmd) { - pgtable = pgtable_trans_huge_withdraw(mm); + pgtable = pgtable_trans_huge_withdraw(mm, pmd); pmd_populate(mm, &_pmd, pgtable); haddr = address; @@ -2363,7 +2363,7 @@ static void collapse_huge_page(struct mm_struct *mm, page_add_new_anon_rmap(new_page, vma, address); set_pmd_at(mm, address, pmd, _pmd); update_mmu_cache_pmd(vma, address, pmd); - pgtable_trans_huge_deposit(mm, pgtable); + pgtable_trans_huge_deposit(mm, pmd, pgtable); spin_unlock(&mm->page_table_lock); *hpage = NULL; @@ -2669,7 +2669,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma, pmdp_clear_flush(vma, haddr, pmd); /* leave pmd empty until pte is filled */ - pgtable = pgtable_trans_huge_withdraw(mm); + pgtable = pgtable_trans_huge_withdraw(mm, pmd); pmd_populate(mm, &_pmd, pgtable); for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) { diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 0c8323fe6c8..e1a6e4fab01 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -124,7 +124,8 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT #ifdef CONFIG_TRANSPARENT_HUGEPAGE -void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable) +void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable) { assert_spin_locked(&mm->page_table_lock); @@ -141,7 +142,7 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable) #ifndef __HAVE_ARCH_PGTABLE_WITHDRAW #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* no "address" argument so destroys page coloring of some arch */ -pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm) +pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) { pgtable_t pgtable; -- cgit v1.2.3-18-g5258 From a6bf2bb03e5bad7e9289d80ecb5faac11630c7ab Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 5 Jun 2013 17:14:04 -0700 Subject: mm/THP: withdraw the pgtable after pmdp related operations For architectures like ppc64 we look at deposited pgtable when calling pmdp_get_and_clear. So do the pgtable_trans_huge_withdraw after finishing pmdp related operations. Signed-off-by: Aneesh Kumar K.V Reviewed-by: Andrea Arcangeli Cc: Andrea Arcangeli Cc: David Gibson Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Benjamin Herrenschmidt --- mm/huge_memory.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 6b785e17b67..5c4fac2d239 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1360,9 +1360,15 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, struct page *page; pgtable_t pgtable; pmd_t orig_pmd; - pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd); + /* + * For architectures like ppc64 we look at deposited pgtable + * when calling pmdp_get_and_clear. So do the + * pgtable_trans_huge_withdraw after finishing pmdp related + * operations. + */ orig_pmd = pmdp_get_and_clear(tlb->mm, addr, pmd); tlb_remove_pmd_tlb_entry(tlb, pmd, addr); + pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd); if (is_huge_zero_pmd(orig_pmd)) { tlb->mm->nr_ptes--; spin_unlock(&tlb->mm->page_table_lock); -- cgit v1.2.3-18-g5258 From fde52796d487b675cde55427e3347ff3e59f9a7f Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 5 Jun 2013 17:14:05 -0700 Subject: mm/THP: don't use HPAGE_SHIFT in transparent hugepage code For architectures like powerpc that support multiple explicit hugepage sizes, HPAGE_SHIFT indicate the default explicit hugepage shift. For THP to work the hugepage size should be same as PMD_SIZE. So use PMD_SHIFT directly. So move the define outside CONFIG_TRANSPARENT_HUGEPAGE #ifdef because we want to use these defines in generic code with if (pmd_trans_huge()) conditional. Signed-off-by: Aneesh Kumar K.V Cc: Andrea Arcangeli Cc: David Gibson Cc: Andrea Arcangeli Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Benjamin Herrenschmidt --- include/linux/huge_mm.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 528454c2caa..cc276d2c3a4 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -58,12 +58,11 @@ extern pmd_t *page_check_address_pmd(struct page *page, #define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT) #define HPAGE_PMD_NR (1< Date: Wed, 5 Jun 2013 17:14:06 -0700 Subject: mm/THP: deposit the transpare huge pgtable before set_pmd Architectures like powerpc use the deposited pgtable to store hash index values. We need to make the deposted pgtable is visible to other cpus before we are ready to take a hash fault. Signed-off-by: Aneesh Kumar K.V Cc: Andrea Arcangeli Cc: David Gibson Cc: Andrea Arcangeli Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Benjamin Herrenschmidt --- mm/huge_memory.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 5c4fac2d239..59d9384b6bb 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -729,8 +729,8 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, pmd_t entry; entry = mk_huge_pmd(page, vma); page_add_new_anon_rmap(page, vma, haddr); - set_pmd_at(mm, haddr, pmd, entry); pgtable_trans_huge_deposit(mm, pmd, pgtable); + set_pmd_at(mm, haddr, pmd, entry); add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); mm->nr_ptes++; spin_unlock(&mm->page_table_lock); @@ -771,8 +771,8 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, entry = mk_pmd(zero_page, vma->vm_page_prot); entry = pmd_wrprotect(entry); entry = pmd_mkhuge(entry); - set_pmd_at(mm, haddr, pmd, entry); pgtable_trans_huge_deposit(mm, pmd, pgtable); + set_pmd_at(mm, haddr, pmd, entry); mm->nr_ptes++; return true; } @@ -916,8 +916,8 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmdp_set_wrprotect(src_mm, addr, src_pmd); pmd = pmd_mkold(pmd_wrprotect(pmd)); - set_pmd_at(dst_mm, addr, dst_pmd, pmd); pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable); + set_pmd_at(dst_mm, addr, dst_pmd, pmd); dst_mm->nr_ptes++; ret = 0; @@ -2367,9 +2367,9 @@ static void collapse_huge_page(struct mm_struct *mm, spin_lock(&mm->page_table_lock); BUG_ON(!pmd_none(*pmd)); page_add_new_anon_rmap(new_page, vma, address); + pgtable_trans_huge_deposit(mm, pmd, pgtable); set_pmd_at(mm, address, pmd, _pmd); update_mmu_cache_pmd(vma, address, pmd); - pgtable_trans_huge_deposit(mm, pmd, pgtable); spin_unlock(&mm->page_table_lock); *hpage = NULL; -- cgit v1.2.3-18-g5258 From e80034047bee9ceacfc1bfff873ebfdd049817ca Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Feb 2013 23:38:51 +0100 Subject: powerpc: Mark low level irq handlers NO_THREAD These low level handlers cannot be threaded. Mark them NO_THREAD Reported-by: leroy christophe Tested-by: leroy christophe Signed-off-by: Thomas Gleixner Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/8xx/m8xx_setup.c | 1 + arch/powerpc/sysdev/cpm1.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c index 1e121088826..806cbbd86ec 100644 --- a/arch/powerpc/platforms/8xx/m8xx_setup.c +++ b/arch/powerpc/platforms/8xx/m8xx_setup.c @@ -43,6 +43,7 @@ static irqreturn_t timebase_interrupt(int irq, void *dev) static struct irqaction tbint_irqaction = { .handler = timebase_interrupt, + .flags = IRQF_NO_THREAD, .name = "tbint", }; diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c index d4fa03f2b6a..5e6ff38ea69 100644 --- a/arch/powerpc/sysdev/cpm1.c +++ b/arch/powerpc/sysdev/cpm1.c @@ -120,6 +120,7 @@ static irqreturn_t cpm_error_interrupt(int irq, void *dev) static struct irqaction cpm_error_irqaction = { .handler = cpm_error_interrupt, + .flags = IRQF_NO_THREAD, .name = "error", }; -- cgit v1.2.3-18-g5258 From 85f395c5b0a26b3a80f9e2d35333981a2a75c0ae Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 3 Dec 2012 20:37:42 +0530 Subject: powerpc/kprobes: Do not disable External interrupts during single step External/Decrement exceptions have lower priority than the Debug Exception. So, we don't have to disable the External interrupts before a single step. However, on BookE, Critical Input Exception(CE) has higher priority than a Debug Exception. Hence we mask them. Signed-off-by: Suzuki K. Poulose Cc: Sebastian Andrzej Siewior Cc: Ananth N Mavinakaynahalli Cc: Kumar Gala Cc: linuxppc-dev@ozlabs.org Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/kprobes.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 11f5b03a0b0..560f430da47 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -104,13 +104,13 @@ void __kprobes arch_remove_kprobe(struct kprobe *p) static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) { - /* We turn off async exceptions to ensure that the single step will - * be for the instruction we have the kprobe on, if we dont its - * possible we'd get the single step reported for an exception handler - * like Decrementer or External Interrupt */ - regs->msr &= ~MSR_EE; regs->msr |= MSR_SINGLESTEP; #ifdef CONFIG_PPC_ADV_DEBUG_REGS + /* + * We turn off Critical Input Exception(CE) to ensure that the single + * step will be for the instruction we have the probe on; if we don't, + * it is possible we'd get the single step reported for CE. + */ regs->msr &= ~MSR_CE; mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); #ifdef CONFIG_PPC_47x -- cgit v1.2.3-18-g5258 From 35fd219a268cc82cef842518cd64ea6949629ba2 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 3 Dec 2012 20:38:37 +0530 Subject: powerpc: Move the single step enable code to a generic path This patch moves the single step enable code used by kprobe to a generic routine header so that, it can be re-used by other code, in this case, uprobes. No functional changes. Signed-off-by: Suzuki K. Poulose Cc: Ananth N Mavinakaynahalli Cc: Kumar Gala Cc: linuxppc-dev@ozlabs.org Acked-by: Ananth N Mavinakayanahalli Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/probes.h | 25 +++++++++++++++++++++++++ arch/powerpc/kernel/kprobes.c | 20 +------------------- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/include/asm/probes.h b/arch/powerpc/include/asm/probes.h index 5f1e15b6870..3421637cfd7 100644 --- a/arch/powerpc/include/asm/probes.h +++ b/arch/powerpc/include/asm/probes.h @@ -38,5 +38,30 @@ typedef u32 ppc_opcode_t; #define is_trap(instr) (IS_TW(instr) || IS_TWI(instr)) #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC_ADV_DEBUG_REGS +#define MSR_SINGLESTEP (MSR_DE) +#else +#define MSR_SINGLESTEP (MSR_SE) +#endif + +/* Enable single stepping for the current task */ +static inline void enable_single_step(struct pt_regs *regs) +{ + regs->msr |= MSR_SINGLESTEP; +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + /* + * We turn off Critical Input Exception(CE) to ensure that the single + * step will be for the instruction we have the probe on; if we don't, + * it is possible we'd get the single step reported for CE. + */ + regs->msr &= ~MSR_CE; + mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); +#ifdef CONFIG_PPC_47x + isync(); +#endif +#endif +} + + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_PROBES_H */ diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 560f430da47..2156ea90eb5 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -36,12 +36,6 @@ #include #include -#ifdef CONFIG_PPC_ADV_DEBUG_REGS -#define MSR_SINGLESTEP (MSR_DE) -#else -#define MSR_SINGLESTEP (MSR_SE) -#endif - DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); @@ -104,19 +98,7 @@ void __kprobes arch_remove_kprobe(struct kprobe *p) static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) { - regs->msr |= MSR_SINGLESTEP; -#ifdef CONFIG_PPC_ADV_DEBUG_REGS - /* - * We turn off Critical Input Exception(CE) to ensure that the single - * step will be for the instruction we have the probe on; if we don't, - * it is possible we'd get the single step reported for CE. - */ - regs->msr &= ~MSR_CE; - mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); -#ifdef CONFIG_PPC_47x - isync(); -#endif -#endif + enable_single_step(regs); /* * On powerpc we should single step on the original -- cgit v1.2.3-18-g5258 From 39a421ff0b7cb056e687894a9d5f57aa1303e1c8 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Wed, 20 Mar 2013 19:06:12 -0500 Subject: powerpc/mm/nohash: Ignore NULL stale_map entries This happens with threads that are offline due to CPU hotplug (including threads that were never "plugged in" to begin with because SMT is disabled). Signed-off-by: Scott Wood Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/mmu_context_nohash.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index e779642c25e..810f8e4d74d 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -112,8 +112,10 @@ static unsigned int steal_context_smp(unsigned int id) */ for_each_cpu(cpu, mm_cpumask(mm)) { for (i = cpu_first_thread_sibling(cpu); - i <= cpu_last_thread_sibling(cpu); i++) - __set_bit(id, stale_map[i]); + i <= cpu_last_thread_sibling(cpu); i++) { + if (stale_map[i]) + __set_bit(id, stale_map[i]); + } cpu = i - 1; } return id; @@ -272,7 +274,8 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) /* XXX This clear should ultimately be part of local_flush_tlb_mm */ for (i = cpu_first_thread_sibling(cpu); i <= cpu_last_thread_sibling(cpu); i++) { - __clear_bit(id, stale_map[i]); + if (stale_map[i]) + __clear_bit(id, stale_map[i]); } } -- cgit v1.2.3-18-g5258 From 3139b0a797d6826519ed98a13623a92f12269613 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Wed, 17 Apr 2013 17:50:35 +0800 Subject: powerpc: Remove the unneeded trigger of decrementer interrupt in decrementer_check_overflow Previously in order to handle the edge sensitive decrementers, we choose to set the decrementer to 1 to trigger a decrementer interrupt when re-enabling interrupts. But with the rework of the lazy EE, we would replay the decrementer interrupt when re-enabling interrupts if a decrementer interrupt occurs with irq soft-disabled. So there is no need to trigger a decrementer interrupt in this case any more. Signed-off-by: Kevin Hao Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/irq.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 5cbcf4d5a80..32fa52e8163 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -116,8 +116,6 @@ static inline notrace int decrementer_check_overflow(void) u64 now = get_tb_or_rtc(); u64 *next_tb = &__get_cpu_var(decrementers_next_tb); - if (now >= *next_tb) - set_dec(1); return now >= *next_tb; } -- cgit v1.2.3-18-g5258 From d5d8ec895ca599fbde43efe3a2f9714315e3d298 Mon Sep 17 00:00:00 2001 From: Daniel Walker Date: Tue, 23 Apr 2013 17:50:33 -0700 Subject: powerpc/mm: Make mmap_64.c compile on 32bit powerpc There appears to be no good reason to keep this as 64bit only. It works on 32bit also, and has checks so that it can work correctly with 32bit binaries on 64bit hardware which is why I think this works. I tested this on qemu using the virtex-ml507 machine type. Before, /bin2 # ./test & cat /proc/${!}/maps 00100000-00103000 r-xp 00000000 00:00 0 [vdso] 10000000-10007000 r-xp 00000000 00:01 454 /bin2/test 10017000-10018000 rw-p 00007000 00:01 454 /bin2/test 48000000-48020000 r-xp 00000000 00:01 224 /lib/ld-2.11.3.so 48021000-48023000 rw-p 00021000 00:01 224 /lib/ld-2.11.3.so bfd03000-bfd24000 rw-p 00000000 00:00 0 [stack] /bin2 # ./test & cat /proc/${!}/maps 00100000-00103000 r-xp 00000000 00:00 0 [vdso] 0fe6e000-0ffd8000 r-xp 00000000 00:01 214 /lib/libc-2.11.3.so 0ffd8000-0ffe8000 ---p 0016a000 00:01 214 /lib/libc-2.11.3.so 0ffe8000-0ffed000 rw-p 0016a000 00:01 214 /lib/libc-2.11.3.so 0ffed000-0fff0000 rw-p 00000000 00:00 0 10000000-10007000 r-xp 00000000 00:01 454 /bin2/test 10017000-10018000 rw-p 00007000 00:01 454 /bin2/test 48000000-48020000 r-xp 00000000 00:01 224 /lib/ld-2.11.3.so 48020000-48021000 rw-p 00000000 00:00 0 48021000-48023000 rw-p 00021000 00:01 224 /lib/ld-2.11.3.so bf98a000-bf9ab000 rw-p 00000000 00:00 0 [stack] /bin2 # ./test & cat /proc/${!}/maps 00100000-00103000 r-xp 00000000 00:00 0 [vdso] 0fe6e000-0ffd8000 r-xp 00000000 00:01 214 /lib/libc-2.11.3.so 0ffd8000-0ffe8000 ---p 0016a000 00:01 214 /lib/libc-2.11.3.so 0ffe8000-0ffed000 rw-p 0016a000 00:01 214 /lib/libc-2.11.3.so 0ffed000-0fff0000 rw-p 00000000 00:00 0 10000000-10007000 r-xp 00000000 00:01 454 /bin2/test 10017000-10018000 rw-p 00007000 00:01 454 /bin2/test 48000000-48020000 r-xp 00000000 00:01 224 /lib/ld-2.11.3.so 48020000-48021000 rw-p 00000000 00:00 0 48021000-48023000 rw-p 00021000 00:01 224 /lib/ld-2.11.3.so bfa54000-bfa75000 rw-p 00000000 00:00 0 [stack] After, bash-4.1# ./test & cat /proc/${!}/maps [7] 803 00100000-00103000 r-xp 00000000 00:00 0 [vdso] 10000000-10007000 r-xp 00000000 00:01 454 /bin2/test 10017000-10018000 rw-p 00007000 00:01 454 /bin2/test b7eb0000-b7ed0000 r-xp 00000000 00:01 224 /lib/ld-2.11.3.so b7ed1000-b7ed3000 rw-p 00021000 00:01 224 /lib/ld-2.11.3.so bfbc0000-bfbe1000 rw-p 00000000 00:00 0 [stack] bash-4.1# ./test & cat /proc/${!}/maps [8] 805 00100000-00103000 r-xp 00000000 00:00 0 [vdso] 10000000-10007000 r-xp 00000000 00:01 454 /bin2/test 10017000-10018000 rw-p 00007000 00:01 454 /bin2/test b7b03000-b7b23000 r-xp 00000000 00:01 224 /lib/ld-2.11.3.so b7b24000-b7b26000 rw-p 00021000 00:01 224 /lib/ld-2.11.3.so bfc27000-bfc48000 rw-p 00000000 00:00 0 [stack] bash-4.1# ./test & cat /proc/${!}/maps [9] 807 00100000-00103000 r-xp 00000000 00:00 0 [vdso] 10000000-10007000 r-xp 00000000 00:01 454 /bin2/test 10017000-10018000 rw-p 00007000 00:01 454 /bin2/test b7f37000-b7f57000 r-xp 00000000 00:01 224 /lib/ld-2.11.3.so b7f58000-b7f5a000 rw-p 00021000 00:01 224 /lib/ld-2.11.3.so bff96000-bffb7000 rw-p 00000000 00:00 0 [stack] Signed-off-by: Daniel Walker Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/processor.h | 2 - arch/powerpc/mm/Makefile | 5 +- arch/powerpc/mm/mmap.c | 101 +++++++++++++++++++++++++++++++++++ arch/powerpc/mm/mmap_64.c | 101 ----------------------------------- 4 files changed, 103 insertions(+), 106 deletions(-) create mode 100644 arch/powerpc/mm/mmap.c delete mode 100644 arch/powerpc/mm/mmap_64.c diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 14a65836369..7135a257f7c 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -404,9 +404,7 @@ static inline void prefetchw(const void *x) #define spin_lock_prefetch(x) prefetchw(x) -#ifdef CONFIG_PPC64 #define HAVE_ARCH_PICK_MMAP_LAYOUT -#endif #ifdef CONFIG_PPC64 static inline unsigned long get_clean_sp(unsigned long sp, int is_32) diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index cf16b5733ea..26f29a77241 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -6,17 +6,16 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) -obj-y := fault.o mem.o pgtable.o gup.o \ +obj-y := fault.o mem.o pgtable.o gup.o mmap.o \ init_$(CONFIG_WORD_SIZE).o \ pgtable_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ tlb_nohash_low.o obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(CONFIG_WORD_SIZE)e.o -obj-$(CONFIG_PPC64) += mmap_64.o hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o obj-$(CONFIG_PPC_STD_MMU_64) += hash_utils_64.o \ slb_low.o slb.o stab.o \ - mmap_64.o $(hash64-y) + $(hash64-y) obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \ tlb_hash$(CONFIG_WORD_SIZE).o \ diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c new file mode 100644 index 00000000000..67a42ed0d2f --- /dev/null +++ b/arch/powerpc/mm/mmap.c @@ -0,0 +1,101 @@ +/* + * flexible mmap layout support + * + * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * Started by Ingo Molnar + */ + +#include +#include +#include +#include + +/* + * Top of mmap area (just below the process stack). + * + * Leave at least a ~128 MB hole on 32bit applications. + * + * On 64bit applications we randomise the stack by 1GB so we need to + * space our mmap start address by a further 1GB, otherwise there is a + * chance the mmap area will end up closer to the stack than our ulimit + * requires. + */ +#define MIN_GAP32 (128*1024*1024) +#define MIN_GAP64 ((128 + 1024)*1024*1024UL) +#define MIN_GAP ((is_32bit_task()) ? MIN_GAP32 : MIN_GAP64) +#define MAX_GAP (TASK_SIZE/6*5) + +static inline int mmap_is_legacy(void) +{ + if (current->personality & ADDR_COMPAT_LAYOUT) + return 1; + + if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) + return 1; + + return sysctl_legacy_va_layout; +} + +static unsigned long mmap_rnd(void) +{ + unsigned long rnd = 0; + + if (current->flags & PF_RANDOMIZE) { + /* 8MB for 32bit, 1GB for 64bit */ + if (is_32bit_task()) + rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT))); + else + rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT))); + } + return rnd << PAGE_SHIFT; +} + +static inline unsigned long mmap_base(void) +{ + unsigned long gap = rlimit(RLIMIT_STACK); + + if (gap < MIN_GAP) + gap = MIN_GAP; + else if (gap > MAX_GAP) + gap = MAX_GAP; + + return PAGE_ALIGN(TASK_SIZE - gap - mmap_rnd()); +} + +/* + * This function, called very early during the creation of a new + * process VM image, sets up which VM layout function to use: + */ +void arch_pick_mmap_layout(struct mm_struct *mm) +{ + /* + * Fall back to the standard layout if the personality + * bit is set, or if the expected stack growth is unlimited: + */ + if (mmap_is_legacy()) { + mm->mmap_base = TASK_UNMAPPED_BASE; + mm->get_unmapped_area = arch_get_unmapped_area; + mm->unmap_area = arch_unmap_area; + } else { + mm->mmap_base = mmap_base(); + mm->get_unmapped_area = arch_get_unmapped_area_topdown; + mm->unmap_area = arch_unmap_area_topdown; + } +} diff --git a/arch/powerpc/mm/mmap_64.c b/arch/powerpc/mm/mmap_64.c deleted file mode 100644 index 67a42ed0d2f..00000000000 --- a/arch/powerpc/mm/mmap_64.c +++ /dev/null @@ -1,101 +0,0 @@ -/* - * flexible mmap layout support - * - * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * - * Started by Ingo Molnar - */ - -#include -#include -#include -#include - -/* - * Top of mmap area (just below the process stack). - * - * Leave at least a ~128 MB hole on 32bit applications. - * - * On 64bit applications we randomise the stack by 1GB so we need to - * space our mmap start address by a further 1GB, otherwise there is a - * chance the mmap area will end up closer to the stack than our ulimit - * requires. - */ -#define MIN_GAP32 (128*1024*1024) -#define MIN_GAP64 ((128 + 1024)*1024*1024UL) -#define MIN_GAP ((is_32bit_task()) ? MIN_GAP32 : MIN_GAP64) -#define MAX_GAP (TASK_SIZE/6*5) - -static inline int mmap_is_legacy(void) -{ - if (current->personality & ADDR_COMPAT_LAYOUT) - return 1; - - if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) - return 1; - - return sysctl_legacy_va_layout; -} - -static unsigned long mmap_rnd(void) -{ - unsigned long rnd = 0; - - if (current->flags & PF_RANDOMIZE) { - /* 8MB for 32bit, 1GB for 64bit */ - if (is_32bit_task()) - rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT))); - else - rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT))); - } - return rnd << PAGE_SHIFT; -} - -static inline unsigned long mmap_base(void) -{ - unsigned long gap = rlimit(RLIMIT_STACK); - - if (gap < MIN_GAP) - gap = MIN_GAP; - else if (gap > MAX_GAP) - gap = MAX_GAP; - - return PAGE_ALIGN(TASK_SIZE - gap - mmap_rnd()); -} - -/* - * This function, called very early during the creation of a new - * process VM image, sets up which VM layout function to use: - */ -void arch_pick_mmap_layout(struct mm_struct *mm) -{ - /* - * Fall back to the standard layout if the personality - * bit is set, or if the expected stack growth is unlimited: - */ - if (mmap_is_legacy()) { - mm->mmap_base = TASK_UNMAPPED_BASE; - mm->get_unmapped_area = arch_get_unmapped_area; - mm->unmap_area = arch_unmap_area; - } else { - mm->mmap_base = mmap_base(); - mm->get_unmapped_area = arch_get_unmapped_area_topdown; - mm->unmap_area = arch_unmap_area_topdown; - } -} -- cgit v1.2.3-18-g5258 From 0962e8004e97409072bb6caee7b3ba948a5fb93a Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 24 Apr 2013 14:26:30 +0800 Subject: powerpc/prom: Scan reserved-ranges node for memory reservations Based on benh's proposal at https://lists.ozlabs.org/pipermail/linuxppc-dev/2012-September/101237.html, this change provides support for reserving memory from the reserved-ranges node at the root of the device tree. We just call memblock_reserve on these ranges for now. Signed-off-by: Jeremy Kerr Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/prom.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 8b6f7a99cce..9c753bc9885 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -559,6 +559,33 @@ void __init early_init_dt_setup_initrd_arch(unsigned long start, } #endif +static bool __init early_reserve_mem_dt(void) +{ + unsigned long i, len, dt_root; + const __be32 *prop; + + dt_root = of_get_flat_dt_root(); + + prop = of_get_flat_dt_prop(dt_root, "reserved-ranges", &len); + + if (!prop) + return false; + + /* Each reserved range is an (address,size) pair, 2 cells each, + * totalling 4 cells per range. */ + for (i = 0; i < len / (sizeof(*prop) * 4); i++) { + u64 base, size; + + base = of_read_number(prop + (i * 4) + 0, 2); + size = of_read_number(prop + (i * 4) + 2, 2); + + if (size) + memblock_reserve(base, size); + } + + return true; +} + static void __init early_reserve_mem(void) { u64 base, size; @@ -574,6 +601,14 @@ static void __init early_reserve_mem(void) self_size = initial_boot_params->totalsize; memblock_reserve(self_base, self_size); + /* + * Try looking for reserved-regions property in the DT first; if + * it's present, it'll contain all of the necessary reservation + * info + */ + if (early_reserve_mem_dt()) + return; + #ifdef CONFIG_BLK_DEV_INITRD /* then reserve the initrd, if any */ if (initrd_start && (initrd_end > initrd_start)) -- cgit v1.2.3-18-g5258 From 071df9422ac91c0d290e81f5ae2635c74cda6d00 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Mon, 29 Apr 2013 13:42:43 +1000 Subject: powerpc: Add a configuration option for early BootX/OpenFirmware debug Signed-off-by: Alistair Popple Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/Kconfig.debug | 7 +++++++ arch/powerpc/kernel/udbg.c | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 863d877e0b5..d86875f3e17 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -147,6 +147,13 @@ choice enable debugging for the wrong type of machine your kernel _will not boot_. +config PPC_EARLY_DEBUG_BOOTX + bool "BootX or OpenFirmware" + depends on BOOTX_TEXT + help + Select this to enable early debugging for a machine using BootX + or OpenFirmware. + config PPC_EARLY_DEBUG_LPAR bool "LPAR HV Console" depends on PPC_PSERIES diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index 9d3fdcd6629..a15837519dc 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -50,7 +50,7 @@ void __init udbg_early_init(void) udbg_init_debug_beat(); #elif defined(CONFIG_PPC_EARLY_DEBUG_PAS_REALMODE) udbg_init_pas_realmode(); -#elif defined(CONFIG_BOOTX_TEXT) +#elif defined(CONFIG_PPC_EARLY_DEBUG_BOOTX) udbg_init_btext(); #elif defined(CONFIG_PPC_EARLY_DEBUG_44x) /* PPC44x debug */ -- cgit v1.2.3-18-g5258 From b9ef7d6b11c120cc402a76013062061bbe0fbaad Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Mon, 29 Apr 2013 13:42:44 +1000 Subject: powerpc: Update default configurations Update default configurations for systems with CONFIG_BOOTX_TEXT selected so that they continue to print early debug messages as is currently the case. Signed-off-by: Alistair Popple Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/configs/c2k_defconfig | 2 ++ arch/powerpc/configs/g5_defconfig | 2 ++ arch/powerpc/configs/maple_defconfig | 2 ++ arch/powerpc/configs/pmac32_defconfig | 2 ++ arch/powerpc/configs/ppc64_defconfig | 2 ++ arch/powerpc/configs/ppc6xx_defconfig | 2 ++ 6 files changed, 12 insertions(+) diff --git a/arch/powerpc/configs/c2k_defconfig b/arch/powerpc/configs/c2k_defconfig index 2a84fd7f631..671a8f960af 100644 --- a/arch/powerpc/configs/c2k_defconfig +++ b/arch/powerpc/configs/c2k_defconfig @@ -423,6 +423,8 @@ CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_BOOTX_TEXT=y +CONFIG_PPC_EARLY_DEBUG=y +CONFIG_PPC_EARLY_DEBUG_BOOTX=y CONFIG_KEYS=y CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_SECURITY=y diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index 07b7f2af2dc..1ea22fc24ea 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -284,6 +284,8 @@ CONFIG_DEBUG_MUTEXES=y CONFIG_LATENCYTOP=y CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BOOTX_TEXT=y +CONFIG_PPC_EARLY_DEBUG=y +CONFIG_PPC_EARLY_DEBUG_BOOTX=y CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_ECB=m diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig index 02ac96b679b..2a5afac2986 100644 --- a/arch/powerpc/configs/maple_defconfig +++ b/arch/powerpc/configs/maple_defconfig @@ -138,6 +138,8 @@ CONFIG_DEBUG_STACK_USAGE=y CONFIG_XMON=y CONFIG_XMON_DEFAULT=y CONFIG_BOOTX_TEXT=y +CONFIG_PPC_EARLY_DEBUG=y +CONFIG_PPC_EARLY_DEBUG_BOOTX=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index 29767a8dfea..a73626b0905 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -350,6 +350,8 @@ CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_XMON=y CONFIG_XMON_DEFAULT=y CONFIG_BOOTX_TEXT=y +CONFIG_PPC_EARLY_DEBUG=y +CONFIG_PPC_EARLY_DEBUG_BOOTX=y CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_MD4=m diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index aef3f71de5a..c86fcb92358 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -398,6 +398,8 @@ CONFIG_FTR_FIXUP_SELFTEST=y CONFIG_MSI_BITMAP_SELFTEST=y CONFIG_XMON=y CONFIG_BOOTX_TEXT=y +CONFIG_PPC_EARLY_DEBUG=y +CONFIG_PPC_EARLY_DEBUG_BOOTX=y CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_PCBC=m diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index be1cb6ea3a3..20ebfaf7234 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -1264,6 +1264,8 @@ CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_XMON=y CONFIG_BOOTX_TEXT=y +CONFIG_PPC_EARLY_DEBUG=y +CONFIG_PPC_EARLY_DEBUG_BOOTX=y CONFIG_KEYS=y CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_SECURITY=y -- cgit v1.2.3-18-g5258 From 70a54a4faec72ee9d12b9c4dfa27bc241deb79a6 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 6 May 2013 21:32:40 +1000 Subject: powerpc: Fix single step emulation of 32bit overflowed branches Check truncate_if_32bit() on final write to nip. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/lib/sstep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index e15c521846c..99c7fc16dc0 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -580,7 +580,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) if (instr & 1) regs->link = regs->nip; if (branch_taken(instr, regs)) - regs->nip = imm; + regs->nip = truncate_if_32bit(regs->msr, imm); return 1; #ifdef CONFIG_PPC64 case 17: /* sc */ -- cgit v1.2.3-18-g5258 From ab9a4183fddf232a46b6255e0d3da5a09f85ecbd Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Thu, 9 May 2013 10:42:13 +1000 Subject: powerpc: Update currituck pci/usb fixup for new board revision The currituck board uses a different IRQ for the pci usb host controller depending on the board revision. This patch adds support for newer board revisions by retrieving the board revision from the FPGA and mapping the appropriate IRQ. Signed-off-by: Alistair Popple Acked-by: Tony Breeds Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/boot/dts/currituck.dts | 5 +++++ arch/powerpc/platforms/44x/currituck.c | 39 ++++++++++++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/boot/dts/currituck.dts b/arch/powerpc/boot/dts/currituck.dts index b801dd06e57..d2c8a872308 100644 --- a/arch/powerpc/boot/dts/currituck.dts +++ b/arch/powerpc/boot/dts/currituck.dts @@ -103,6 +103,11 @@ interrupts = <34 2>; }; + FPGA0: fpga@50000000 { + compatible = "ibm,currituck-fpga"; + reg = <0x50000000 0x4>; + }; + IIC0: i2c@00000000 { compatible = "ibm,iic-currituck", "ibm,iic"; reg = <0x0 0x00000014>; diff --git a/arch/powerpc/platforms/44x/currituck.c b/arch/powerpc/platforms/44x/currituck.c index ecd3890c40d..c52e1b3c9be 100644 --- a/arch/powerpc/platforms/44x/currituck.c +++ b/arch/powerpc/platforms/44x/currituck.c @@ -176,13 +176,48 @@ static int __init ppc47x_probe(void) return 1; } +static int board_rev = -1; +static int __init ppc47x_get_board_rev(void) +{ + u8 fpga_reg0; + void *fpga; + struct device_node *np; + + np = of_find_compatible_node(NULL, NULL, "ibm,currituck-fpga"); + if (!np) + goto fail; + + fpga = of_iomap(np, 0); + of_node_put(np); + if (!fpga) + goto fail; + + fpga_reg0 = ioread8(fpga); + board_rev = fpga_reg0 & 0x03; + pr_info("%s: Found board revision %d\n", __func__, board_rev); + iounmap(fpga); + return 0; + +fail: + pr_info("%s: Unable to find board revision\n", __func__); + return 0; +} +machine_arch_initcall(ppc47x, ppc47x_get_board_rev); + /* Use USB controller should have been hardware swizzled but it wasn't :( */ static void ppc47x_pci_irq_fixup(struct pci_dev *dev) { if (dev->vendor == 0x1033 && (dev->device == 0x0035 || dev->device == 0x00e0)) { - dev->irq = irq_create_mapping(NULL, 47); - pr_info("%s: Mapping irq 47 %d\n", __func__, dev->irq); + if (board_rev == 0) { + dev->irq = irq_create_mapping(NULL, 47); + pr_info("%s: Mapping irq %d\n", __func__, dev->irq); + } else if (board_rev == 2) { + dev->irq = irq_create_mapping(NULL, 49); + pr_info("%s: Mapping irq %d\n", __func__, dev->irq); + } else { + pr_alert("%s: Unknown board revision\n", __func__); + } } } -- cgit v1.2.3-18-g5258 From 4e13c1ac6baa1d6c2b650d66ca89e1e12727ec19 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 21 May 2013 13:33:09 +1000 Subject: powerpc/vfio: Enable on PowerNV platform This initializes IOMMU groups based on the IOMMU configuration discovered during the PCI scan on POWERNV (POWER non virtualized) platform. The IOMMU groups are to be used later by the VFIO driver, which is used for PCI pass through. It also implements an API for mapping/unmapping pages for guest PCI drivers and providing DMA window properties. This API is going to be used later by QEMU-VFIO to handle h_put_tce hypercalls from the KVM guest. The iommu_put_tce_user_mode() does only a single page mapping as an API for adding many mappings at once is going to be added later. Although this driver has been tested only on the POWERNV platform, it should work on any platform which supports TCE tables. As h_put_tce hypercall is received by the host kernel and processed by the QEMU (what involves calling the host kernel again), performance is not the best - circa 220MB/s on 10Gb ethernet network. To enable VFIO on POWER, enable SPAPR_TCE_IOMMU config option and configure VFIO as required. Cc: David Gibson Signed-off-by: Alexey Kardashevskiy Signed-off-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/iommu.h | 26 +++ arch/powerpc/kernel/iommu.c | 323 ++++++++++++++++++++++++++++ arch/powerpc/platforms/powernv/pci-ioda.c | 1 + arch/powerpc/platforms/powernv/pci-p5ioc2.c | 5 +- arch/powerpc/platforms/powernv/pci.c | 2 + drivers/iommu/Kconfig | 8 + 6 files changed, 364 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index cbfe678e3db..98d14229f89 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -76,6 +76,9 @@ struct iommu_table { struct iommu_pool large_pool; struct iommu_pool pools[IOMMU_NR_POOLS]; unsigned long *it_map; /* A simple allocation bitmap for now */ +#ifdef CONFIG_IOMMU_API + struct iommu_group *it_group; +#endif }; struct scatterlist; @@ -98,6 +101,8 @@ extern void iommu_free_table(struct iommu_table *tbl, const char *node_name); */ extern struct iommu_table *iommu_init_table(struct iommu_table * tbl, int nid); +extern void iommu_register_group(struct iommu_table *tbl, + int pci_domain_number, unsigned long pe_num); extern int iommu_map_sg(struct device *dev, struct iommu_table *tbl, struct scatterlist *sglist, int nelems, @@ -147,5 +152,26 @@ static inline void iommu_restore(void) } #endif +/* The API to support IOMMU operations for VFIO */ +extern int iommu_tce_clear_param_check(struct iommu_table *tbl, + unsigned long ioba, unsigned long tce_value, + unsigned long npages); +extern int iommu_tce_put_param_check(struct iommu_table *tbl, + unsigned long ioba, unsigned long tce); +extern int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, + unsigned long hwaddr, enum dma_data_direction direction); +extern unsigned long iommu_clear_tce(struct iommu_table *tbl, + unsigned long entry); +extern int iommu_clear_tces_and_put_pages(struct iommu_table *tbl, + unsigned long entry, unsigned long pages); +extern int iommu_put_tce_user_mode(struct iommu_table *tbl, + unsigned long entry, unsigned long tce); + +extern void iommu_flush_tce(struct iommu_table *tbl); +extern int iommu_take_ownership(struct iommu_table *tbl); +extern void iommu_release_ownership(struct iommu_table *tbl); + +extern enum dma_data_direction iommu_tce_direction(unsigned long tce); + #endif /* __KERNEL__ */ #endif /* _ASM_IOMMU_H */ diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index c0d0dbddfba..b20ff173a67 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include #include #include @@ -44,6 +46,7 @@ #include #include #include +#include #define DBG(...) @@ -724,6 +727,13 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name) if (tbl->it_offset == 0) clear_bit(0, tbl->it_map); +#ifdef CONFIG_IOMMU_API + if (tbl->it_group) { + iommu_group_put(tbl->it_group); + BUG_ON(tbl->it_group); + } +#endif + /* verify that table contains no entries */ if (!bitmap_empty(tbl->it_map, tbl->it_size)) pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name); @@ -860,3 +870,316 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size, free_pages((unsigned long)vaddr, get_order(size)); } } + +#ifdef CONFIG_IOMMU_API +/* + * SPAPR TCE API + */ +static void group_release(void *iommu_data) +{ + struct iommu_table *tbl = iommu_data; + tbl->it_group = NULL; +} + +void iommu_register_group(struct iommu_table *tbl, + int pci_domain_number, unsigned long pe_num) +{ + struct iommu_group *grp; + char *name; + + grp = iommu_group_alloc(); + if (IS_ERR(grp)) { + pr_warn("powerpc iommu api: cannot create new group, err=%ld\n", + PTR_ERR(grp)); + return; + } + tbl->it_group = grp; + iommu_group_set_iommudata(grp, tbl, group_release); + name = kasprintf(GFP_KERNEL, "domain%d-pe%lx", + pci_domain_number, pe_num); + if (!name) + return; + iommu_group_set_name(grp, name); + kfree(name); +} + +enum dma_data_direction iommu_tce_direction(unsigned long tce) +{ + if ((tce & TCE_PCI_READ) && (tce & TCE_PCI_WRITE)) + return DMA_BIDIRECTIONAL; + else if (tce & TCE_PCI_READ) + return DMA_TO_DEVICE; + else if (tce & TCE_PCI_WRITE) + return DMA_FROM_DEVICE; + else + return DMA_NONE; +} +EXPORT_SYMBOL_GPL(iommu_tce_direction); + +void iommu_flush_tce(struct iommu_table *tbl) +{ + /* Flush/invalidate TLB caches if necessary */ + if (ppc_md.tce_flush) + ppc_md.tce_flush(tbl); + + /* Make sure updates are seen by hardware */ + mb(); +} +EXPORT_SYMBOL_GPL(iommu_flush_tce); + +int iommu_tce_clear_param_check(struct iommu_table *tbl, + unsigned long ioba, unsigned long tce_value, + unsigned long npages) +{ + /* ppc_md.tce_free() does not support any value but 0 */ + if (tce_value) + return -EINVAL; + + if (ioba & ~IOMMU_PAGE_MASK) + return -EINVAL; + + ioba >>= IOMMU_PAGE_SHIFT; + if (ioba < tbl->it_offset) + return -EINVAL; + + if ((ioba + npages) > (tbl->it_offset + tbl->it_size)) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check); + +int iommu_tce_put_param_check(struct iommu_table *tbl, + unsigned long ioba, unsigned long tce) +{ + if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ))) + return -EINVAL; + + if (tce & ~(IOMMU_PAGE_MASK | TCE_PCI_WRITE | TCE_PCI_READ)) + return -EINVAL; + + if (ioba & ~IOMMU_PAGE_MASK) + return -EINVAL; + + ioba >>= IOMMU_PAGE_SHIFT; + if (ioba < tbl->it_offset) + return -EINVAL; + + if ((ioba + 1) > (tbl->it_offset + tbl->it_size)) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL_GPL(iommu_tce_put_param_check); + +unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry) +{ + unsigned long oldtce; + struct iommu_pool *pool = get_pool(tbl, entry); + + spin_lock(&(pool->lock)); + + oldtce = ppc_md.tce_get(tbl, entry); + if (oldtce & (TCE_PCI_WRITE | TCE_PCI_READ)) + ppc_md.tce_free(tbl, entry, 1); + else + oldtce = 0; + + spin_unlock(&(pool->lock)); + + return oldtce; +} +EXPORT_SYMBOL_GPL(iommu_clear_tce); + +int iommu_clear_tces_and_put_pages(struct iommu_table *tbl, + unsigned long entry, unsigned long pages) +{ + unsigned long oldtce; + struct page *page; + + for ( ; pages; --pages, ++entry) { + oldtce = iommu_clear_tce(tbl, entry); + if (!oldtce) + continue; + + page = pfn_to_page(oldtce >> PAGE_SHIFT); + WARN_ON(!page); + if (page) { + if (oldtce & TCE_PCI_WRITE) + SetPageDirty(page); + put_page(page); + } + } + + return 0; +} +EXPORT_SYMBOL_GPL(iommu_clear_tces_and_put_pages); + +/* + * hwaddr is a kernel virtual address here (0xc... bazillion), + * tce_build converts it to a physical address. + */ +int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, + unsigned long hwaddr, enum dma_data_dire