aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2007-01-23 16:46:22 +0100
committerAdrian Bunk <bunk@stusta.de>2007-01-23 16:46:22 +0100
commitfaa309e7b921b2104a42d4ac0e0122f3399a3789 (patch)
treedca8e140dd19e48207f9c8baaf8b0dd53b7b52c2
parent891ff634a279da34545787413355a2fd6f8487d4 (diff)
read_zero_pagealigned() locking fix
Ramiro Voicu hits the BUG_ON(!pte_none(*pte)) in zeromap_pte_range: kernel bugzilla 7645. Right: read_zero_pagealigned uses down_read of mmap_sem, but another thread's racing read of /dev/zero, or a normal fault, can easily set that pte again, in between zap_page_range and zeromap_page_range getting there. It's been wrong ever since 2.4.3. The simple fix is to use down_write instead, but that would serialize reads of /dev/zero more than at present: perhaps some app would be badly affected. So instead let zeromap_page_range return the error instead of BUG_ON, and read_zero_pagealigned break to the slower clear_user loop in that case - there's no need to optimize for it. Use -EEXIST for when a pte is found: BUG_ON in mmap_zero (the other user of zeromap_page_range), though it really isn't interesting there. And since mmap_zero wants -EAGAIN for out-of-memory, the zeromaps better return that than -ENOMEM. Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Adrian Bunk <bunk@stusta.de>
-rw-r--r--drivers/char/mem.c12
-rw-r--r--mm/memory.c32
2 files changed, 29 insertions, 15 deletions
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 29c41f4418c..3b50f44a8d1 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -613,7 +613,8 @@ static inline size_t read_zero_pagealigned(char __user * buf, size_t size)
count = size;
zap_page_range(vma, addr, count, NULL);
- zeromap_page_range(vma, addr, count, PAGE_COPY);
+ if (zeromap_page_range(vma, addr, count, PAGE_COPY))
+ break;
size -= count;
buf += count;
@@ -680,11 +681,14 @@ out:
static int mmap_zero(struct file * file, struct vm_area_struct * vma)
{
+ int err;
+
if (vma->vm_flags & VM_SHARED)
return shmem_zero_setup(vma);
- if (zeromap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, vma->vm_page_prot))
- return -EAGAIN;
- return 0;
+ err = zeromap_page_range(vma, vma->vm_start,
+ vma->vm_end - vma->vm_start, vma->vm_page_prot);
+ BUG_ON(err == -EEXIST);
+ return err;
}
#else /* CONFIG_MMU */
static ssize_t read_zero(struct file * file, char * buf,
diff --git a/mm/memory.c b/mm/memory.c
index a4caa2f223e..97f5ea39572 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1092,21 +1092,27 @@ static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
{
pte_t *pte;
spinlock_t *ptl;
+ int err = 0;
pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
if (!pte)
- return -ENOMEM;
+ return -EAGAIN;
do {
struct page *page = ZERO_PAGE(addr);
pte_t zero_pte = pte_wrprotect(mk_pte(page, prot));
+
+ if (unlikely(!pte_none(*pte))) {
+ err = -EEXIST;
+ pte++;
+ break;
+ }
page_cache_get(page);
page_add_file_rmap(page);
inc_mm_counter(mm, file_rss);
- BUG_ON(!pte_none(*pte));
set_pte_at(mm, addr, pte, zero_pte);
} while (pte++, addr += PAGE_SIZE, addr != end);
pte_unmap_unlock(pte - 1, ptl);
- return 0;
+ return err;
}
static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
@@ -1114,16 +1120,18 @@ static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
{
pmd_t *pmd;
unsigned long next;
+ int err;
pmd = pmd_alloc(mm, pud, addr);
if (!pmd)
- return -ENOMEM;
+ return -EAGAIN;
do {
next = pmd_addr_end(addr, end);
- if (zeromap_pte_range(mm, pmd, addr, next, prot))
- return -ENOMEM;
+ err = zeromap_pte_range(mm, pmd, addr, next, prot);
+ if (err)
+ break;
} while (pmd++, addr = next, addr != end);
- return 0;
+ return err;
}
static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
@@ -1131,16 +1139,18 @@ static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
{
pud_t *pud;
unsigned long next;
+ int err;
pud = pud_alloc(mm, pgd, addr);
if (!pud)
- return -ENOMEM;
+ return -EAGAIN;
do {
next = pud_addr_end(addr, end);
- if (zeromap_pmd_range(mm, pud, addr, next, prot))
- return -ENOMEM;
+ err = zeromap_pmd_range(mm, pud, addr, next, prot);
+ if (err)
+ break;
} while (pud++, addr = next, addr != end);
- return 0;
+ return err;
}
int zeromap_page_range(struct vm_area_struct *vma,