aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2009-01-08 12:04:47 +0000
committerDavid Howells <dhowells@redhat.com>2009-01-08 12:04:47 +0000
commit8feae13110d60cc6287afabc2887366b0eb226c2 (patch)
treeb3188986faab70e753e00ea8670a11ba8ec844c0
parent41836382ebb415d68d3ebc4525e78e871fe58baf (diff)
NOMMU: Make VMAs per MM as for MMU-mode linux
Make VMAs per mm_struct as for MMU-mode linux. This solves two problems: (1) In SYSV SHM where nattch for a segment does not reflect the number of shmat's (and forks) done. (2) In mmap() where the VMA's vm_mm is set to point to the parent mm by an exec'ing process when VM_EXECUTABLE is specified, regardless of the fact that a VMA might be shared and already have its vm_mm assigned to another process or a dead process. A new struct (vm_region) is introduced to track a mapped region and to remember the circumstances under which it may be shared and the vm_list_struct structure is discarded as it's no longer required. This patch makes the following additional changes: (1) Regions are now allocated with alloc_pages() rather than kmalloc() and with no recourse to __GFP_COMP, so the pages are not composite. Instead, each page has a reference on it held by the region. Anything else that is interested in such a page will have to get a reference on it to retain it. When the pages are released due to unmapping, each page is passed to put_page() and will be freed when the page usage count reaches zero. (2) Excess pages are trimmed after an allocation as the allocation must be made as a power-of-2 quantity of pages. (3) VMAs are added to the parent MM's R/B tree and mmap lists. As an MM may end up with overlapping VMAs within the tree, the VMA struct address is appended to the sort key. (4) Non-anonymous VMAs are now added to the backing inode's prio list. (5) Holes may be punched in anonymous VMAs with munmap(), releasing parts of the backing region. The VMA and region structs will be split if necessary. (6) sys_shmdt() only releases one attachment to a SYSV IPC shared memory segment instead of all the attachments at that addresss. Multiple shmat()'s return the same address under NOMMU-mode instead of different virtual addresses as under MMU-mode. (7) Core dumping for ELF-FDPIC requires fewer exceptions for NOMMU-mode. (8) /proc/maps is now the global list of mapped regions, and may list bits that aren't actually mapped anywhere. (9) /proc/meminfo gains a line (tagged "MmapCopy") that indicates the amount of RAM currently allocated by mmap to hold mappable regions that can't be mapped directly. These are copies of the backing device or file if not anonymous. These changes make NOMMU mode more similar to MMU mode. The downside is that NOMMU mode requires some extra memory to track things over NOMMU without this patch (VMAs are no longer shared, and there are now region structs). Signed-off-by: David Howells <dhowells@redhat.com> Tested-by: Mike Frysinger <vapier.adi@gmail.com> Acked-by: Paul Mundt <lethal@linux-sh.org>
-rw-r--r--Documentation/nommu-mmap.txt18
-rw-r--r--arch/arm/include/asm/mmu.h1
-rw-r--r--arch/blackfin/include/asm/mmu.h1
-rw-r--r--arch/blackfin/kernel/ptrace.c6
-rw-r--r--arch/blackfin/kernel/traps.c11
-rw-r--r--arch/frv/kernel/ptrace.c11
-rw-r--r--arch/h8300/include/asm/mmu.h1
-rw-r--r--arch/m68knommu/include/asm/mmu.h1
-rw-r--r--arch/sh/include/asm/mmu.h1
-rw-r--r--fs/binfmt_elf_fdpic.c27
-rw-r--r--fs/proc/internal.h2
-rw-r--r--fs/proc/meminfo.c6
-rw-r--r--fs/proc/nommu.c71
-rw-r--r--fs/proc/task_nommu.c108
-rw-r--r--include/asm-frv/mmu.h1
-rw-r--r--include/asm-m32r/mmu.h1
-rw-r--r--include/linux/mm.h18
-rw-r--r--include/linux/mm_types.h18
-rw-r--r--ipc/shm.c12
-rw-r--r--kernel/fork.c4
-rw-r--r--lib/Kconfig.debug7
-rw-r--r--mm/mmap.c10
-rw-r--r--mm/nommu.c960
23 files changed, 860 insertions, 436 deletions
diff --git a/Documentation/nommu-mmap.txt b/Documentation/nommu-mmap.txt
index 7714f57caad..02b89dcf38a 100644
--- a/Documentation/nommu-mmap.txt
+++ b/Documentation/nommu-mmap.txt
@@ -109,12 +109,18 @@ and it's also much more restricted in the latter case:
FURTHER NOTES ON NO-MMU MMAP
============================
- (*) A request for a private mapping of less than a page in size may not return
- a page-aligned buffer. This is because the kernel calls kmalloc() to
- allocate the buffer, not get_free_page().
-
- (*) A list of all the mappings on the system is visible through /proc/maps in
- no-MMU mode.
+ (*) A request for a private mapping of a file may return a buffer that is not
+ page-aligned. This is because XIP may take place, and the data may not be
+ paged aligned in the backing store.
+
+ (*) A request for an anonymous mapping will always be page aligned. If
+ possible the size of the request should be a power of two otherwise some
+ of the space may be wasted as the kernel must allocate a power-of-2
+ granule but will only discard the excess if appropriately configured as
+ this has an effect on fragmentation.
+
+ (*) A list of all the private copy and anonymous mappings on the system is
+ visible through /proc/maps in no-MMU mode.
(*) A list of all the mappings in use by a process is visible through
/proc/<pid>/maps in no-MMU mode.
diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h
index 53099d4ee42..b561584d04a 100644
--- a/arch/arm/include/asm/mmu.h
+++ b/arch/arm/include/asm/mmu.h
@@ -24,7 +24,6 @@ typedef struct {
* modified for 2.6 by Hyok S. Choi <hyok.choi@samsung.com>
*/
typedef struct {
- struct vm_list_struct *vmlist;
unsigned long end_brk;
} mm_context_t;
diff --git a/arch/blackfin/include/asm/mmu.h b/arch/blackfin/include/asm/mmu.h
index 757e43906ed..dbfd686360e 100644
--- a/arch/blackfin/include/asm/mmu.h
+++ b/arch/blackfin/include/asm/mmu.h
@@ -10,7 +10,6 @@ struct sram_list_struct {
};
typedef struct {
- struct vm_list_struct *vmlist;
unsigned long end_brk;
unsigned long stack_start;
diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c
index d2d38853663..594e325b40e 100644
--- a/arch/blackfin/kernel/ptrace.c
+++ b/arch/blackfin/kernel/ptrace.c
@@ -160,15 +160,15 @@ put_reg(struct task_struct *task, int regno, unsigned long data)
static inline int is_user_addr_valid(struct task_struct *child,
unsigned long start, unsigned long len)
{
- struct vm_list_struct *vml;
+ struct vm_area_struct *vma;
struct sram_list_struct *sraml;
/* overflow */
if (start + len < start)
return -EIO;
- for (vml = child->mm->context.vmlist; vml; vml = vml->next)
- if (start >= vml->vma->vm_start && start + len < vml->vma->vm_end)
+ vma = find_vma(child->mm, start);
+ if (vma && start >= vma->vm_start && start + len <= vma->vm_end)
return 0;
for (sraml = child->mm->context.sram_list; sraml; sraml = sraml->next)
diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c
index 17d8e417289..5b0667da8d0 100644
--- a/arch/blackfin/kernel/traps.c
+++ b/arch/blackfin/kernel/traps.c
@@ -32,6 +32,7 @@
#include <linux/module.h>
#include <linux/kallsyms.h>
#include <linux/fs.h>
+#include <linux/rbtree.h>
#include <asm/traps.h>
#include <asm/cacheflush.h>
#include <asm/cplb.h>
@@ -83,6 +84,7 @@ static void decode_address(char *buf, unsigned long address)
struct mm_struct *mm;
unsigned long flags, offset;
unsigned char in_atomic = (bfin_read_IPEND() & 0x10) || in_atomic();
+ struct rb_node *n;
#ifdef CONFIG_KALLSYMS
unsigned long symsize;
@@ -128,9 +130,10 @@ static void decode_address(char *buf, unsigned long address)
if (!mm)
continue;
- vml = mm->context.vmlist;
- while (vml) {
- struct vm_area_struct *vma = vml->vma;
+ for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) {
+ struct vm_area_struct *vma;
+
+ vma = rb_entry(n, struct vm_area_struct, vm_rb);
if (address >= vma->vm_start && address < vma->vm_end) {
char _tmpbuf[256];
@@ -176,8 +179,6 @@ static void decode_address(char *buf, unsigned long address)
goto done;
}
-
- vml = vml->next;
}
if (!in_atomic)
mmput(mm);
diff --git a/arch/frv/kernel/ptrace.c b/arch/frv/kernel/ptrace.c
index 709e9bdc612..5e7d401d21e 100644
--- a/arch/frv/kernel/ptrace.c
+++ b/arch/frv/kernel/ptrace.c
@@ -69,7 +69,8 @@ static inline int put_reg(struct task_struct *task, int regno,
}
/*
- * check that an address falls within the bounds of the target process's memory mappings
+ * check that an address falls within the bounds of the target process's memory
+ * mappings
*/
static inline int is_user_addr_valid(struct task_struct *child,
unsigned long start, unsigned long len)
@@ -79,11 +80,11 @@ static inline int is_user_addr_valid(struct task_struct *child,
return -EIO;
return 0;
#else
- struct vm_list_struct *vml;
+ struct vm_area_struct *vma;
- for (vml = child->mm->context.vmlist; vml; vml = vml->next)
- if (start >= vml->vma->vm_start && start + len <= vml->vma->vm_end)
- return 0;
+ vma = find_vma(child->mm, start);
+ if (vma && start >= vma->vm_start && start + len <= vma->vm_end)
+ return 0;
return -EIO;
#endif
diff --git a/arch/h8300/include/asm/mmu.h b/arch/h8300/include/asm/mmu.h
index 2ce06ea4610..31309969df7 100644
--- a/arch/h8300/include/asm/mmu.h
+++ b/arch/h8300/include/asm/mmu.h
@@ -4,7 +4,6 @@
/* Copyright (C) 2002, David McCullough <davidm@snapgear.com> */
typedef struct {
- struct vm_list_struct *vmlist;
unsigned long end_brk;
} mm_context_t;
diff --git a/arch/m68knommu/include/asm/mmu.h b/arch/m68knommu/include/asm/mmu.h
index 5fa6b68353b..e2da1e6f09f 100644
--- a/arch/m68knommu/include/asm/mmu.h
+++ b/arch/m68knommu/include/asm/mmu.h
@@ -4,7 +4,6 @@
/* Copyright (C) 2002, David McCullough <davidm@snapgear.com> */
typedef struct {
- struct vm_list_struct *vmlist;
unsigned long end_brk;
} mm_context_t;
diff --git a/arch/sh/include/asm/mmu.h b/arch/sh/include/asm/mmu.h
index fdcb93bc6d1..6c43625bb1a 100644
--- a/arch/sh/include/asm/mmu.h
+++ b/arch/sh/include/asm/mmu.h
@@ -9,7 +9,6 @@ typedef struct {
mm_context_id_t id;
void *vdso;
#else
- struct vm_list_struct *vmlist;
unsigned long end_brk;
#endif
#ifdef CONFIG_BINFMT_ELF_FDPIC
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index aa5b43205e3..22baf1b1349 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1567,11 +1567,9 @@ end_coredump:
static int elf_fdpic_dump_segments(struct file *file, size_t *size,
unsigned long *limit, unsigned long mm_flags)
{
- struct vm_list_struct *vml;
-
- for (vml = current->mm->context.vmlist; vml; vml = vml->next) {
- struct vm_area_struct *vma = vml->vma;
+ struct vm_area_struct *vma;
+ for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
if (!maydump(vma, mm_flags))
continue;
@@ -1617,9 +1615,6 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
elf_fpxregset_t *xfpu = NULL;
#endif
int thread_status_size = 0;
-#ifndef CONFIG_MMU
- struct vm_list_struct *vml;
-#endif
elf_addr_t *auxv;
unsigned long mm_flags;
@@ -1685,13 +1680,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
fill_prstatus(prstatus, current, signr);
elf_core_copy_regs(&prstatus->pr_reg, regs);
-#ifdef CONFIG_MMU
segs = current->mm->map_count;
-#else
- segs = 0;
- for (vml = current->mm->context.vmlist; vml; vml = vml->next)
- segs++;
-#endif
#ifdef ELF_CORE_EXTRA_PHDRS
segs += ELF_CORE_EXTRA_PHDRS;
#endif
@@ -1766,20 +1755,10 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
mm_flags = current->mm->flags;
/* write program headers for segments dump */
- for (
-#ifdef CONFIG_MMU
- vma = current->mm->mmap; vma; vma = vma->vm_next
-#else
- vml = current->mm->context.vmlist; vml; vml = vml->next
-#endif
- ) {
+ for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
struct elf_phdr phdr;
size_t sz;
-#ifndef CONFIG_MMU
- vma = vml->vma;
-#endif
-
sz = vma->vm_end - vma->vm_start;
phdr.p_type = PT_LOAD;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 3e8aeb8b61c..cd53ff83849 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -41,8 +41,6 @@ do { \
(vmi)->used = 0; \
(vmi)->largest_chunk = 0; \
} while(0)
-
-extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *);
#endif
extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns,
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index b1675c4e66d..43d23948384 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -74,6 +74,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
"LowTotal: %8lu kB\n"
"LowFree: %8lu kB\n"
#endif
+#ifndef CONFIG_MMU
+ "MmapCopy: %8lu kB\n"
+#endif
"SwapTotal: %8lu kB\n"
"SwapFree: %8lu kB\n"
"Dirty: %8lu kB\n"
@@ -116,6 +119,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
K(i.totalram-i.totalhigh),
K(i.freeram-i.freehigh),
#endif
+#ifndef CONFIG_MMU
+ K((unsigned long) atomic_read(&mmap_pages_allocated)),
+#endif
K(i.totalswap),
K(i.freeswap),
K(global_page_state(NR_FILE_DIRTY)),
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 3f87d263294..b446d7ad0b0 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -33,33 +33,33 @@
#include "internal.h"
/*
- * display a single VMA to a sequenced file
+ * display a single region to a sequenced file
*/
-int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
+static int nommu_region_show(struct seq_file *m, struct vm_region *region)
{
unsigned long ino = 0;
struct file *file;
dev_t dev = 0;
int flags, len;
- flags = vma->vm_flags;
- file = vma->vm_file;
+ flags = region->vm_flags;
+ file = region->vm_file;
if (file) {
- struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+ struct inode *inode = region->vm_file->f_path.dentry->d_inode;
dev = inode->i_sb->s_dev;
ino = inode->i_ino;
}
seq_printf(m,
"%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
- vma->vm_start,
- vma->vm_end,
+ region->vm_start,
+ region->vm_end,
flags & VM_READ ? 'r' : '-',
flags & VM_WRITE ? 'w' : '-',
flags & VM_EXEC ? 'x' : '-',
flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
- ((loff_t)vma->vm_pgoff) << PAGE_SHIFT,
+ ((loff_t)region->vm_pgoff) << PAGE_SHIFT,
MAJOR(dev), MINOR(dev), ino, &len);
if (file) {
@@ -75,61 +75,54 @@ int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
}
/*
- * display a list of all the VMAs the kernel knows about
+ * display a list of all the REGIONs the kernel knows about
* - nommu kernals have a single flat list
*/
-static int nommu_vma_list_show(struct seq_file *m, void *v)
+static int nommu_region_list_show(struct seq_file *m, void *_p)
{
- struct vm_area_struct *vma;
+ struct rb_node *p = _p;
- vma = rb_entry((struct rb_node *) v, struct vm_area_struct, vm_rb);
- return nommu_vma_show(m, vma);
+ return nommu_region_show(m, rb_entry(p, struct vm_region, vm_rb));
}
-static void *nommu_vma_list_start(struct seq_file *m, loff_t *_pos)
+static void *nommu_region_list_start(struct seq_file *m, loff_t *_pos)
{
- struct rb_node *_rb;
+ struct rb_node *p;
loff_t pos = *_pos;
- void *next = NULL;
- down_read(&nommu_vma_sem);
+ down_read(&nommu_region_sem);
- for (_rb = rb_first(&nommu_vma_tree); _rb; _rb = rb_next(_rb)) {
- if (pos == 0) {
- next = _rb;
- break;
- }
- pos--;
- }
-
- return next;
+ for (p = rb_first(&nommu_region_tree); p; p = rb_next(p))
+ if (pos-- == 0)
+ return p;
+ return NULL;
}
-static void nommu_vma_list_stop(struct seq_file *m, void *v)
+static void nommu_region_list_stop(struct seq_file *m, void *v)
{
- up_read(&nommu_vma_sem);
+ up_read(&nommu_region_sem);
}
-static void *nommu_vma_list_next(struct seq_file *m, void *v, loff_t *pos)
+static void *nommu_region_list_next(struct seq_file *m, void *v, loff_t *pos)
{
(*pos)++;
return rb_next((struct rb_node *) v);
}
-static const struct seq_operations proc_nommu_vma_list_seqop = {
- .start = nommu_vma_list_start,
- .next = nommu_vma_list_next,
- .stop = nommu_vma_list_stop,
- .show = nommu_vma_list_show
+static struct seq_operations proc_nommu_region_list_seqop = {
+ .start = nommu_region_list_start,
+ .next = nommu_region_list_next,
+ .stop = nommu_region_list_stop,
+ .show = nommu_region_list_show
};
-static int proc_nommu_vma_list_open(struct inode *inode, struct file *file)
+static int proc_nommu_region_list_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &proc_nommu_vma_list_seqop);
+ return seq_open(file, &proc_nommu_region_list_seqop);
}
-static const struct file_operations proc_nommu_vma_list_operations = {
- .open = proc_nommu_vma_list_open,
+static const struct file_operations proc_nommu_region_list_operations = {
+ .open = proc_nommu_region_list_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
@@ -137,7 +130,7 @@ static const struct file_operations proc_nommu_vma_list_operations = {
static int __init proc_nommu_init(void)
{
- proc_create("maps", S_IRUGO, NULL, &proc_nommu_vma_list_operations);
+ proc_create("maps", S_IRUGO, NULL, &proc_nommu_region_list_operations);
return 0;
}
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index d4a8be32b90..ca4a48d0d31 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -15,25 +15,25 @@
*/
void task_mem(struct seq_file *m, struct mm_struct *mm)
{
- struct vm_list_struct *vml;
+ struct vm_area_struct *vma;
+ struct rb_node *p;
unsigned long bytes = 0, sbytes = 0, slack = 0;
down_read(&mm->mmap_sem);
- for (vml = mm->context.vmlist; vml; vml = vml->next) {
- if (!vml->vma)
- continue;
+ for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
+ vma = rb_entry(p, struct vm_area_struct, vm_rb);
- bytes += kobjsize(vml);
+ bytes += kobjsize(vma);
if (atomic_read(&mm->mm_count) > 1 ||
- atomic_read(&vml->vma->vm_usage) > 1
- ) {
- sbytes += kobjsize((void *) vml->vma->vm_start);
- sbytes += kobjsize(vml->vma);
+ vma->vm_region ||
+ vma->vm_flags & VM_MAYSHARE) {
+ sbytes += kobjsize((void *) vma->vm_start);
+ if (vma->vm_region)
+ sbytes += kobjsize(vma->vm_region);
} else {
- bytes += kobjsize((void *) vml->vma->vm_start);
- bytes += kobjsize(vml->vma);
- slack += kobjsize((void *) vml->vma->vm_start) -
- (vml->vma->vm_end - vml->vma->vm_start);
+ bytes += kobjsize((void *) vma->vm_start);
+ slack += kobjsize((void *) vma->vm_start) -
+ (vma->vm_end - vma->vm_start);
}
}
@@ -70,13 +70,14 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
unsigned long task_vsize(struct mm_struct *mm)
{
- struct vm_list_struct *tbp;
+ struct vm_area_struct *vma;
+ struct rb_node *p;
unsigned long vsize = 0;
down_read(&mm->mmap_sem);
- for (tbp = mm->context.vmlist; tbp; tbp = tbp->next) {
- if (tbp->vma)
- vsize += kobjsize((void *) tbp->vma->vm_start);
+ for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
+ vma = rb_entry(p, struct vm_area_struct, vm_rb);
+ vsize += vma->vm_region->vm_end - vma->vm_region->vm_start;
}
up_read(&mm->mmap_sem);
return vsize;
@@ -85,16 +86,15 @@ unsigned long task_vsize(struct mm_struct *mm)
int task_statm(struct mm_struct *mm, int *shared, int *text,
int *data, int *resident)
{
- struct vm_list_struct *tbp;
+ struct vm_area_struct *vma;
+ struct rb_node *p;
int size = kobjsize(mm);
down_read(&mm->mmap_sem);
- for (tbp = mm->context.vmlist; tbp; tbp = tbp->next) {
- size += kobjsize(tbp);
- if (tbp->vma) {
- size += kobjsize(tbp->vma);
- size += kobjsize((void *) tbp->vma->vm_start);
- }
+ for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
+ vma = rb_entry(p, struct vm_area_struct, vm_rb);
+ size += kobjsize(vma);
+ size += kobjsize((void *) vma->vm_start);
}
size += (*text = mm->end_code - mm->start_code);
@@ -105,20 +105,62 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
}
/*
+ * display a single VMA to a sequenced file
+ */
+static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
+{
+ unsigned long ino = 0;
+ struct file *file;
+ dev_t dev = 0;
+ int flags, len;
+
+ flags = vma->vm_flags;
+ file = vma->vm_file;
+
+ if (file) {
+ struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+ dev = inode->i_sb->s_dev;
+ ino = inode->i_ino;
+ }
+
+ seq_printf(m,
+ "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n",
+ vma->vm_start,
+ vma->vm_end,
+ flags & VM_READ ? 'r' : '-',
+ flags & VM_WRITE ? 'w' : '-',
+ flags & VM_EXEC ? 'x' : '-',
+ flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
+ vma->vm_pgoff << PAGE_SHIFT,
+ MAJOR(dev), MINOR(dev), ino, &len);
+
+ if (file) {
+ len = 25 + sizeof(void *) * 6 - len;
+ if (len < 1)
+ len = 1;
+ seq_printf(m, "%*c", len, ' ');
+ seq_path(m, &file->f_path, "");
+ }
+
+ seq_putc(m, '\n');
+ return 0;
+}
+
+/*
* display mapping lines for a particular process's /proc/pid/maps
*/
-static int show_map(struct seq_file *m, void *_vml)
+static int show_map(struct seq_file *m, void *_p)
{
- struct vm_list_struct *vml = _vml;
+ struct rb_node *p = _p;
- return nommu_vma_show(m, vml->vma);
+ return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb));
}
static void *m_start(struct seq_file *m, loff_t *pos)
{
struct proc_maps_private *priv = m->private;
- struct vm_list_struct *vml;
struct mm_struct *mm;
+ struct rb_node *p;
loff_t n = *pos;
/* pin the task and mm whilst we play with them */
@@ -134,9 +176,9 @@ static void *m_start(struct seq_file *m, loff_t *pos)
}
/* start from the Nth VMA */
- for (vml = mm->context.vmlist; vml; vml = vml->next)
+ for (p = rb_first(&mm->mm_rb); p; p = rb_next(p))
if (n-- == 0)
- return vml;
+ return p;
return NULL;
}
@@ -152,12 +194,12 @@ static void m_stop(struct seq_file *m, void *_vml)
}
}
-static void *m_next(struct seq_file *m, void *_vml, loff_t *pos)
+static void *m_next(struct seq_file *m, void *_p, loff_t *pos)
{
- struct vm_list_struct *vml = _vml;
+ struct rb_node *p = _p;
(*pos)++;
- return vml ? vml->next : NULL;
+ return p ? rb_next(p) : NULL;
}
static const struct seq_operations proc_pid_maps_ops = {
diff --git a/include/asm-frv/mmu.h b/include/asm-frv/mmu.h
index 22c03714fb1..86ca0e86e7d 100644
--- a/include/asm-frv/mmu.h
+++ b/include/asm-frv/mmu.h
@@ -22,7 +22,6 @@ typedef struct {
unsigned long dtlb_ptd_mapping; /* [DAMR5] PTD mapping for dtlb cached PGE */
#else
- struct vm_list_struct *vmlist;
unsigned long end_brk;
#endif
diff --git a/include/asm-m32r/mmu.h b/include/asm-m32r/mmu.h
index d9bd724479c..150cb92bb66 100644
--- a/include/asm-m32r/mmu.h
+++ b/include/asm-m32r/mmu.h
@@ -4,7 +4,6 @@
#if !defined(CONFIG_MMU)
typedef struct {
- struct vm_list_struct *vmlist;
unsigned long end_brk;
} mm_context_t;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4a3d28c8644..b91a73fd1bc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -56,19 +56,9 @@ extern unsigned long mmap_min_addr;
extern struct kmem_cache *vm_area_cachep;
-/*
- * This struct defines the per-mm list of VMAs for uClinux. If CONFIG_MMU is
- * disabled, then there's a single shared list of VMAs maintained by the
- * system, and mm's subscribe to these individually
- */
-struct vm_list_struct {
- struct vm_list_struct *next;
- struct vm_area_struct *vma;
-};
-
#ifndef CONFIG_MMU
-extern struct rb_root nommu_vma_tree;
-extern struct rw_semaphore nommu_vma_sem;
+extern struct rb_root nommu_region_tree;
+extern struct rw_semaphore nommu_region_sem;
extern unsigned int kobjsize(const void *objp);
#endif
@@ -1061,6 +1051,7 @@ extern void memmap_init_zone(unsigned long, int, unsigned long,
unsigned long, enum memmap_context);
extern void setup_per_zone_pages_min(void);
extern void mem_init(void);
+extern void __init mmap_init(void);
extern void show_mem(void);
extern void si_meminfo(struct sysinfo * val);
extern void si_meminfo_node(struct sysinfo *val, int nid);
@@ -1072,6 +1063,9 @@ extern void setup_per_cpu_pageset(void);
static inline void setup_per_cpu_pageset(void) {}
#endif
+/* nommu.c */
+extern atomic_t mmap_pages_allocated;
+
/* prio_tree.c */
void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 9cfc9b627fd..1c1e0d3a171 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -97,6 +97,22 @@ struct page {
};
/*
+ * A region containing a mapping of a non-memory backed file under NOMMU
+ * conditions. These are held in a global tree and are pinned by the VMAs that
+ * map parts of them.
+ */
+struct vm_region {
+ struct rb_node vm_rb; /* link in global region tree */
+ unsigned long vm_flags; /* VMA vm_flags */
+ unsigned long vm_start; /* start address of region */
+ unsigned long vm_end; /* region initialised to here */
+ unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */
+ struct file *vm_file; /* the backing file or NULL */
+
+ atomic_t vm_usage; /* region usage count */
+};
+
+/*
* This struct defines a memory VMM memory area. There is one of these
* per VM-area/task. A VM area is any part of the process virtual memory
* space that has a special rule for the page-fault handlers (ie a shared
@@ -152,7 +168,7 @@ struct vm_area_struct {
unsigned long vm_truncate_count;/* truncate_count or restart_addr */
#ifndef CONFIG_MMU
- atomic_t vm_usage; /* refcount (VMAs shared if !MMU) */
+ struct vm_region *vm_region; /* NOMMU mapping region */
#endif
#ifdef CONFIG_NUMA
struct mempolicy *vm_policy; /* NUMA policy for the VMA */
diff --git a/ipc/shm.c b/ipc/shm.c
index b125b560240..d0ab5527bf4 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -990,6 +990,7 @@ asmlinkage long sys_shmdt(char __user *shmaddr)
*/
vma = find_vma(mm, addr);
+#ifdef CONFIG_MMU
while (vma) {
next = vma->vm_next;
@@ -1034,6 +1035,17 @@ asmlinkage long sys_shmdt(char __user *shmaddr)
vma = next;
}
+#else /* CONFIG_MMU */
+ /* under NOMMU conditions, the exact address to be destroyed must be
+ * given */
+ retval = -EINVAL;
+ if (vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
+ do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
+ retval = 0;
+ }
+
+#endif
+
up_write(&mm->mmap_sem);
return retval;
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 7b8f2a78be3..0bce4a43bb3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1481,12 +1481,10 @@ void __init proc_caches_init(void)
fs_cachep = kmem_cache_create("fs_cache",
sizeof(struct fs_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
- vm_area_cachep = kmem_cache_create("vm_area_struct",
- sizeof(struct vm_area_struct), 0,
- SLAB_PANIC, NULL);
mm_cachep = kmem_cache_create("mm_struct",
sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+ mmap_init();
}
/*
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2e75478e9c6..d0a32aab03f 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -512,6 +512,13 @@ config DEBUG_VIRTUAL
If unsure, say N.
+config DEBUG_NOMMU_REGIONS
+ bool "Debug the global anon/private NOMMU mapping region tree"
+ depends on DEBUG_KERNEL && !MMU
+ help
+ This option causes the global tree of anonymous and private mapping
+ regions to be regularly checked for invalid topology.
+
config DEBUG_WRITECOUNT
bool "Debug filesystem writers count"
depends on DEBUG_KERNEL
diff --git a/mm/mmap.c b/mm/mmap.c
index a910c045cfd..749623196cb 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2472,3 +2472,13 @@ void mm_drop_all_locks(struct mm_struct *mm)
mutex_unlock(&mm_all_locks_mutex);
}
+
+/*
+ * initialise the VMA slab
+ */
+void __init mmap_init(void)
+{
+ vm_area_cachep = kmem_cache_create("vm_area_struct",
+ sizeof(struct vm_area_struct), 0,
+ SLAB_PANIC, NULL);
+}
diff --git a/mm/nommu.c b/mm/nommu.c
index 23f355bbe26..0d363dfcf10 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -6,7 +6,7 @@
*
* See Documentation/nommu-mmap.txt
*
- * Copyright (c) 2004-2005 David Howells <dhowells@redhat.com>
+ * Copyright (c) 2004-2008 David Howells <dhowells@redhat.com>
* Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com>
* Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org>
* Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com>
@@ -33,6 +33,28 @@
#include <asm/uaccess.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
+#include "internal.h"
+
+static inline __attribute__((format(printf, 1, 2)))
+void no_printk(const char *fmt, ...)
+{
+}
+
+#if 0
+#define kenter(FMT, ...) \
+ printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__)
+#define kleave(FMT, ...) \
+ printk(KERN_DEBUG "&l