diff options
author | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2010-10-25 16:10:11 +0200 |
---|---|---|
committer | Martin Schwidefsky <sky@mschwide.boeblingen.de.ibm.com> | 2010-10-25 16:10:15 +0200 |
commit | 80217147a3d80c8a4e48f06e2f6e965455f3fe2a (patch) | |
tree | b419ae9ee3ab0e5b92c0ed2a30ff59b76d6a4978 /arch/s390/mm/pgtable.c | |
parent | 87799ebab760dd1460f6e4193d4f71ba416d1451 (diff) |
[S390] lockless get_user_pages_fast()
Implement get_user_pages_fast without locking in the fastpath on s390.
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/mm/pgtable.c')
-rw-r--r-- | arch/s390/mm/pgtable.c | 171 |
1 files changed, 154 insertions, 17 deletions
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 8d999249d35..19338d228c9 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -15,6 +15,7 @@ #include <linux/spinlock.h> #include <linux/module.h> #include <linux/quicklist.h> +#include <linux/rcupdate.h> #include <asm/system.h> #include <asm/pgtable.h> @@ -23,6 +24,67 @@ #include <asm/tlbflush.h> #include <asm/mmu_context.h> +struct rcu_table_freelist { + struct rcu_head rcu; + struct mm_struct *mm; + unsigned int pgt_index; + unsigned int crst_index; + unsigned long *table[0]; +}; + +#define RCU_FREELIST_SIZE \ + ((PAGE_SIZE - sizeof(struct rcu_table_freelist)) \ + / sizeof(unsigned long)) + +DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +static DEFINE_PER_CPU(struct rcu_table_freelist *, rcu_table_freelist); + +static void __page_table_free(struct mm_struct *mm, unsigned long *table); +static void __crst_table_free(struct mm_struct *mm, unsigned long *table); + +static struct rcu_table_freelist *rcu_table_freelist_get(struct mm_struct *mm) +{ + struct rcu_table_freelist **batchp = &__get_cpu_var(rcu_table_freelist); + struct rcu_table_freelist *batch = *batchp; + + if (batch) + return batch; + batch = (struct rcu_table_freelist *) __get_free_page(GFP_ATOMIC); + if (batch) { + batch->mm = mm; + batch->pgt_index = 0; + batch->crst_index = RCU_FREELIST_SIZE; + *batchp = batch; + } + return batch; +} + +static void rcu_table_freelist_callback(struct rcu_head *head) +{ + struct rcu_table_freelist *batch = + container_of(head, struct rcu_table_freelist, rcu); + + while (batch->pgt_index > 0) + __page_table_free(batch->mm, batch->table[--batch->pgt_index]); + while (batch->crst_index < RCU_FREELIST_SIZE) + __crst_table_free(batch->mm, batch->table[batch->crst_index++]); + free_page((unsigned long) batch); +} + +void rcu_table_freelist_finish(void) +{ + struct rcu_table_freelist *batch = __get_cpu_var(rcu_table_freelist); + + if (!batch) + return; + call_rcu(&batch->rcu, rcu_table_freelist_callback); + __get_cpu_var(rcu_table_freelist) = NULL; +} + +static void smp_sync(void *arg) +{ +} + #ifndef CONFIG_64BIT #define ALLOC_ORDER 1 #define TABLES_PER_PAGE 4 @@ -78,25 +140,55 @@ unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) } page->index = page_to_phys(shadow); } - spin_lock(&mm->context.list_lock); + spin_lock_bh(&mm->context.list_lock); list_add(&page->lru, &mm->context.crst_list); - spin_unlock(&mm->context.list_lock); + spin_unlock_bh(&mm->context.list_lock); return (unsigned long *) page_to_phys(page); } -void crst_table_free(struct mm_struct *mm, unsigned long *table) +static void __crst_table_free(struct mm_struct *mm, unsigned long *table) { unsigned long *shadow = get_shadow_table(table); - struct page *page = virt_to_page(table); - spin_lock(&mm->context.list_lock); - list_del(&page->lru); - spin_unlock(&mm->context.list_lock); if (shadow) free_pages((unsigned long) shadow, ALLOC_ORDER); free_pages((unsigned long) table, ALLOC_ORDER); } +void crst_table_free(struct mm_struct *mm, unsigned long *table) +{ + struct page *page = virt_to_page(table); + + spin_lock_bh(&mm->context.list_lock); + list_del(&page->lru); + spin_unlock_bh(&mm->context.list_lock); + __crst_table_free(mm, table); +} + +void crst_table_free_rcu(struct mm_struct *mm, unsigned long *table) +{ + struct rcu_table_freelist *batch; + struct page *page = virt_to_page(table); + + spin_lock_bh(&mm->context.list_lock); + list_del(&page->lru); + spin_unlock_bh(&mm->context.list_lock); + if (atomic_read(&mm->mm_users) < 2 && + cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { + __crst_table_free(mm, table); + return; + } + batch = rcu_table_freelist_get(mm); + if (!batch) { + smp_call_function(smp_sync, NULL, 1); + __crst_table_free(mm, table); + return; + } + batch->table[--batch->crst_index] = table; + if (batch->pgt_index >= batch->crst_index) + rcu_table_freelist_finish(); +} + #ifdef CONFIG_64BIT int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) { @@ -108,7 +200,7 @@ repeat: table = crst_table_alloc(mm, mm->context.noexec); if (!table) return -ENOMEM; - spin_lock(&mm->page_table_lock); + spin_lock_bh(&mm->page_table_lock); if (mm->context.asce_limit < limit) { pgd = (unsigned long *) mm->pgd; if (mm->context.asce_limit <= (1UL << 31)) { @@ -130,7 +222,7 @@ repeat: mm->task_size = mm->context.asce_limit; table = NULL; } - spin_unlock(&mm->page_table_lock); + spin_unlock_bh(&mm->page_table_lock); if (table) crst_table_free(mm, table); if (mm->context.asce_limit < limit) @@ -182,7 +274,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) unsigned long bits; bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; - spin_lock(&mm->context.list_lock); + spin_lock_bh(&mm->context.list_lock); page = NULL; if (!list_empty(&mm->context.pgtable_list)) { page = list_first_entry(&mm->context.pgtable_list, @@ -191,7 +283,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) page = NULL; } if (!page) { - spin_unlock(&mm->context.list_lock); + spin_unlock_bh(&mm->context.list_lock); page = alloc_page(GFP_KERNEL|__GFP_REPEAT); if (!page) return NULL; @@ -202,7 +294,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) clear_table_pgstes(table); else clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); - spin_lock(&mm->context.list_lock); + spin_lock_bh(&mm->context.list_lock); list_add(&page->lru, &mm->context.pgtable_list); } table = (unsigned long *) page_to_phys(page); @@ -213,10 +305,25 @@ unsigned long *page_table_alloc(struct mm_struct *mm) page->flags |= bits; if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) list_move_tail(&page->lru, &mm->context.pgtable_list); - spin_unlock(&mm->context.list_lock); + spin_unlock_bh(&mm->context.list_lock); return table; } +static void __page_table_free(struct mm_struct *mm, unsigned long *table) +{ + struct page *page; + unsigned long bits; + + bits = ((unsigned long) table) & 15; + table = (unsigned long *)(((unsigned long) table) ^ bits); + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + page->flags ^= bits; + if (!(page->flags & FRAG_MASK)) { + pgtable_page_dtor(page); + __free_page(page); + } +} + void page_table_free(struct mm_struct *mm, unsigned long *table) { struct page *page; @@ -225,7 +332,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - spin_lock(&mm->context.list_lock); + spin_lock_bh(&mm->context.list_lock); page->flags ^= bits; if (page->flags & FRAG_MASK) { /* Page now has some free pgtable fragments. */ @@ -234,18 +341,48 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) } else /* All fragments of the 4K page have been freed. */ list_del(&page->lru); - spin_unlock(&mm->context.list_lock); + spin_unlock_bh(&mm->context.list_lock); if (page) { pgtable_page_dtor(page); __free_page(page); } } +void page_table_free_rcu(struct mm_struct *mm, unsigned long *table) +{ + struct rcu_table_freelist *batch; + struct page *page; + unsigned long bits; + + if (atomic_read(&mm->mm_users) < 2 && + cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { + page_table_free(mm, table); + return; + } + batch = rcu_table_freelist_get(mm); + if (!batch) { + smp_call_function(smp_sync, NULL, 1); + page_table_free(mm, table); + return; + } + bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; + bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + spin_lock_bh(&mm->context.list_lock); + /* Delayed freeing with rcu prevents reuse of pgtable fragments */ + list_del_init(&page->lru); + spin_unlock_bh(&mm->context.list_lock); + table = (unsigned long *)(((unsigned long) table) | bits); + batch->table[batch->pgt_index++] = table; + if (batch->pgt_index >= batch->crst_index) + rcu_table_freelist_finish(); +} + void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) { struct page *page; - spin_lock(&mm->context.list_lock); + spin_lock_bh(&mm->context.list_lock); /* Free shadow region and segment tables. */ list_for_each_entry(page, &mm->context.crst_list, lru) if (page->index) { @@ -255,7 +392,7 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) /* "Free" second halves of page tables. */ list_for_each_entry(page, &mm->context.pgtable_list, lru) page->flags &= ~SECOND_HALVES; - spin_unlock(&mm->context.list_lock); + spin_unlock_bh(&mm->context.list_lock); mm->context.noexec = 0; update_mm(mm, tsk); } |