diff options
Diffstat (limited to 'Documentation/vm')
-rw-r--r-- | Documentation/vm/.gitignore | 1 | ||||
-rw-r--r-- | Documentation/vm/locking | 2 | ||||
-rw-r--r-- | Documentation/vm/page-types.c | 248 | ||||
-rw-r--r-- | Documentation/vm/slabinfo.c | 68 |
4 files changed, 240 insertions, 79 deletions
diff --git a/Documentation/vm/.gitignore b/Documentation/vm/.gitignore index 33e8a023df0..09b164a5700 100644 --- a/Documentation/vm/.gitignore +++ b/Documentation/vm/.gitignore @@ -1 +1,2 @@ +page-types slabinfo diff --git a/Documentation/vm/locking b/Documentation/vm/locking index f366fa95617..25fadb44876 100644 --- a/Documentation/vm/locking +++ b/Documentation/vm/locking @@ -80,7 +80,7 @@ Note: PTL can also be used to guarantee that no new clones using the mm start up ... this is a loose form of stability on mm_users. For example, it is used in copy_mm to protect against a racing tlb_gather_mmu single address space optimization, so that the zap_page_range (from -vmtruncate) does not lose sending ipi's to cloned threads that might +truncate) does not lose sending ipi's to cloned threads that might be spawned underneath it and go to user mode to drag in pte's into tlbs. swap_lock diff --git a/Documentation/vm/page-types.c b/Documentation/vm/page-types.c index 0833f44ba16..fa1a30d9e9d 100644 --- a/Documentation/vm/page-types.c +++ b/Documentation/vm/page-types.c @@ -5,6 +5,7 @@ * Copyright (C) 2009 Wu Fengguang <fengguang.wu@intel.com> */ +#define _LARGEFILE64_SOURCE #include <stdio.h> #include <stdlib.h> #include <unistd.h> @@ -13,12 +14,33 @@ #include <string.h> #include <getopt.h> #include <limits.h> +#include <assert.h> #include <sys/types.h> #include <sys/errno.h> #include <sys/fcntl.h> /* + * pagemap kernel ABI bits + */ + +#define PM_ENTRY_BYTES sizeof(uint64_t) +#define PM_STATUS_BITS 3 +#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) +#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) +#define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK) +#define PM_PSHIFT_BITS 6 +#define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS) +#define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET) +#define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) +#define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1) +#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) + +#define PM_PRESENT PM_STATUS(4LL) +#define PM_SWAP PM_STATUS(2LL) + + +/* * kernel page flags */ @@ -126,6 +148,14 @@ static int nr_addr_ranges; static unsigned long opt_offset[MAX_ADDR_RANGES]; static unsigned long opt_size[MAX_ADDR_RANGES]; +#define MAX_VMAS 10240 +static int nr_vmas; +static unsigned long pg_start[MAX_VMAS]; +static unsigned long pg_end[MAX_VMAS]; +static unsigned long voffset; + +static int pagemap_fd; + #define MAX_BIT_FILTERS 64 static int nr_bit_filters; static uint64_t opt_mask[MAX_BIT_FILTERS]; @@ -135,7 +165,6 @@ static int page_size; #define PAGES_BATCH (64 << 10) /* 64k pages */ static int kpageflags_fd; -static uint64_t kpageflags_buf[KPF_BYTES * PAGES_BATCH]; #define HASH_SHIFT 13 #define HASH_SIZE (1 << HASH_SHIFT) @@ -158,12 +187,17 @@ static uint64_t page_flags[HASH_SIZE]; type __min2 = (y); \ __min1 < __min2 ? __min1 : __min2; }) -unsigned long pages2mb(unsigned long pages) +#define max_t(type, x, y) ({ \ + type __max1 = (x); \ + type __max2 = (y); \ + __max1 > __max2 ? __max1 : __max2; }) + +static unsigned long pages2mb(unsigned long pages) { return (pages * page_size) >> 20; } -void fatal(const char *x, ...) +static void fatal(const char *x, ...) { va_list ap; @@ -178,7 +212,7 @@ void fatal(const char *x, ...) * page flag names */ -char *page_flag_name(uint64_t flags) +static char *page_flag_name(uint64_t flags) { static char buf[65]; int present; @@ -197,7 +231,7 @@ char *page_flag_name(uint64_t flags) return buf; } -char *page_flag_longname(uint64_t flags) +static char *page_flag_longname(uint64_t flags) { static char buf[1024]; int i, n; @@ -221,32 +255,40 @@ char *page_flag_longname(uint64_t flags) * page list and summary */ -void show_page_range(unsigned long offset, uint64_t flags) +static void show_page_range(unsigned long offset, uint64_t flags) { static uint64_t flags0; + static unsigned long voff; static unsigned long index; static unsigned long count; - if (flags == flags0 && offset == index + count) { + if (flags == flags0 && offset == index + count && + (!opt_pid || voffset == voff + count)) { count++; return; } - if (count) - printf("%lu\t%lu\t%s\n", + if (count) { + if (opt_pid) + printf("%lx\t", voff); + printf("%lx\t%lx\t%s\n", index, count, page_flag_name(flags0)); + } flags0 = flags; index = offset; + voff = voffset; count = 1; } -void show_page(unsigned long offset, uint64_t flags) +static void show_page(unsigned long offset, uint64_t flags) { - printf("%lu\t%s\n", offset, page_flag_name(flags)); + if (opt_pid) + printf("%lx\t", voffset); + printf("%lx\t%s\n", offset, page_flag_name(flags)); } -void show_summary(void) +static void show_summary(void) { int i; @@ -272,7 +314,7 @@ void show_summary(void) * page flag filters */ -int bit_mask_ok(uint64_t flags) +static int bit_mask_ok(uint64_t flags) { int i; @@ -289,7 +331,7 @@ int bit_mask_ok(uint64_t flags) return 1; } -uint64_t expand_overloaded_flags(uint64_t flags) +static uint64_t expand_overloaded_flags(uint64_t flags) { /* SLOB/SLUB overload several page flags */ if (flags & BIT(SLAB)) { @@ -308,7 +350,7 @@ uint64_t expand_overloaded_flags(uint64_t flags) return flags; } -uint64_t well_known_flags(uint64_t flags) +static uint64_t well_known_flags(uint64_t flags) { /* hide flags intended only for kernel hacker */ flags &= ~KPF_HACKERS_BITS; @@ -325,7 +367,7 @@ uint64_t well_known_flags(uint64_t flags) * page frame walker */ -int hash_slot(uint64_t flags) +static int hash_slot(uint64_t flags) { int k = HASH_KEY(flags); int i; @@ -352,7 +394,7 @@ int hash_slot(uint64_t flags) exit(EXIT_FAILURE); } -void add_page(unsigned long offset, uint64_t flags) +static void add_page(unsigned long offset, uint64_t flags) { flags = expand_overloaded_flags(flags); @@ -371,7 +413,7 @@ void add_page(unsigned long offset, uint64_t flags) total_pages++; } -void walk_pfn(unsigned long index, unsigned long count) +static void walk_pfn(unsigned long index, unsigned long count) { unsigned long batch; unsigned long n; @@ -383,6 +425,8 @@ void walk_pfn(unsigned long index, unsigned long count) lseek(kpageflags_fd, index * KPF_BYTES, SEEK_SET); while (count) { + uint64_t kpageflags_buf[KPF_BYTES * PAGES_BATCH]; + batch = min_t(unsigned long, count, PAGES_BATCH); n = read(kpageflags_fd, kpageflags_buf, batch * KPF_BYTES); if (n == 0) @@ -404,7 +448,82 @@ void walk_pfn(unsigned long index, unsigned long count) } } -void walk_addr_ranges(void) + +#define PAGEMAP_BATCH 4096 +static unsigned long task_pfn(unsigned long pgoff) +{ + static uint64_t buf[PAGEMAP_BATCH]; + static unsigned long start; + static long count; + uint64_t pfn; + + if (pgoff < start || pgoff >= start + count) { + if (lseek64(pagemap_fd, + (uint64_t)pgoff * PM_ENTRY_BYTES, + SEEK_SET) < 0) { + perror("pagemap seek"); + exit(EXIT_FAILURE); + } + count = read(pagemap_fd, buf, sizeof(buf)); + if (count == 0) + return 0; + if (count < 0) { + perror("pagemap read"); + exit(EXIT_FAILURE); + } + if (count % PM_ENTRY_BYTES) { + fatal("pagemap read not aligned.\n"); + exit(EXIT_FAILURE); + } + count /= PM_ENTRY_BYTES; + start = pgoff; + } + + pfn = buf[pgoff - start]; + if (pfn & PM_PRESENT) + pfn = PM_PFRAME(pfn); + else + pfn = 0; + + return pfn; +} + +static void walk_task(unsigned long index, unsigned long count) +{ + int i = 0; + const unsigned long end = index + count; + + while (index < end) { + + while (pg_end[i] <= index) + if (++i >= nr_vmas) + return; + if (pg_start[i] >= end) + return; + + voffset = max_t(unsigned long, pg_start[i], index); + index = min_t(unsigned long, pg_end[i], end); + + assert(voffset < index); + for (; voffset < index; voffset++) { + unsigned long pfn = task_pfn(voffset); + if (pfn) + walk_pfn(pfn, 1); + } + } +} + +static void add_addr_range(unsigned long offset, unsigned long size) +{ + if (nr_addr_ranges >= MAX_ADDR_RANGES) + fatal("too many addr ranges\n"); + + opt_offset[nr_addr_ranges] = offset; + opt_size[nr_addr_ranges] = min_t(unsigned long, size, ULONG_MAX-offset); + nr_addr_ranges++; +} + +static void walk_addr_ranges(void) { int i; @@ -415,10 +534,13 @@ void walk_addr_ranges(void) } if (!nr_addr_ranges) - walk_pfn(0, ULONG_MAX); + add_addr_range(0, ULONG_MAX); for (i = 0; i < nr_addr_ranges; i++) - walk_pfn(opt_offset[i], opt_size[i]); + if (!opt_pid) + walk_pfn(opt_offset[i], opt_size[i]); + else + walk_task(opt_offset[i], opt_size[i]); close(kpageflags_fd); } @@ -428,7 +550,7 @@ void walk_addr_ranges(void) * user interface */ -const char *page_flag_type(uint64_t flag) +static const char *page_flag_type(uint64_t flag) { if (flag & KPF_HACKERS_BITS) return "(r)"; @@ -437,7 +559,7 @@ const char *page_flag_type(uint64_t flag) return " "; } -void usage(void) +static void usage(void) { int i, j; @@ -446,8 +568,8 @@ void usage(void) " -r|--raw Raw mode, for kernel developers\n" " -a|--addr addr-spec Walk a range of pages\n" " -b|--bits bits-spec Walk pages with specified bits\n" -#if 0 /* planned features */ " -p|--pid pid Walk process address space\n" +#if 0 /* planned features */ " -f|--file filename Walk file address space\n" #endif " -l|--list Show page details in ranges\n" @@ -459,7 +581,7 @@ void usage(void) " N+M pages range from N to N+M-1\n" " N,M pages range from N to M-1\n" " N, pages range from N to end\n" -" ,M pages range from 0 to M\n" +" ,M pages range from 0 to M-1\n" "bits-spec:\n" " bit1,bit2 (flags & (bit1|bit2)) != 0\n" " bit1,bit2=bit1 (flags & (bit1|bit2)) == bit1\n" @@ -482,7 +604,7 @@ void usage(void) "(r) raw mode bits (o) overloaded bits\n"); } -unsigned long long parse_number(const char *str) +static unsigned long long parse_number(const char *str) { unsigned long long n; @@ -494,26 +616,62 @@ unsigned long long parse_number(const char *str) return n; } -void parse_pid(const char *str) +static void parse_pid(const char *str) { + FILE *file; + char buf[5000]; + opt_pid = parse_number(str); -} -void parse_file(const char *name) -{ + sprintf(buf, "/proc/%d/pagemap", opt_pid); + pagemap_fd = open(buf, O_RDONLY); + if (pagemap_fd < 0) { + perror(buf); + exit(EXIT_FAILURE); + } + + sprintf(buf, "/proc/%d/maps", opt_pid); + file = fopen(buf, "r"); + if (!file) { + perror(buf); + exit(EXIT_FAILURE); + } + + while (fgets(buf, sizeof(buf), file) != NULL) { + unsigned long vm_start; + unsigned long vm_end; + unsigned long long pgoff; + int major, minor; + char r, w, x, s; + unsigned long ino; + int n; + + n = sscanf(buf, "%lx-%lx %c%c%c%c %llx %x:%x %lu", + &vm_start, + &vm_end, + &r, &w, &x, &s, + &pgoff, + &major, &minor, + &ino); + if (n < 10) { + fprintf(stderr, "unexpected line: %s\n", buf); + continue; + } + pg_start[nr_vmas] = vm_start / page_size; + pg_end[nr_vmas] = vm_end / page_size; + if (++nr_vmas >= MAX_VMAS) { + fprintf(stderr, "too many VMAs\n"); + break; + } + } + fclose(file); } -void add_addr_range(unsigned long offset, unsigned long size) +static void parse_file(const char *name) { - if (nr_addr_ranges >= MAX_ADDR_RANGES) - fatal("too much addr ranges\n"); - - opt_offset[nr_addr_ranges] = offset; - opt_size[nr_addr_ranges] = size; - nr_addr_ranges++; } -void parse_addr_range(const char *optarg) +static void parse_addr_range(const char *optarg) { unsigned long offset; unsigned long size; @@ -547,7 +705,7 @@ void parse_addr_range(const char *optarg) add_addr_range(offset, size); } -void add_bits_filter(uint64_t mask, uint64_t bits) +static void add_bits_filter(uint64_t mask, uint64_t bits) { if (nr_bit_filters >= MAX_BIT_FILTERS) fatal("too much bit filters\n"); @@ -557,7 +715,7 @@ void add_bits_filter(uint64_t mask, uint64_t bits) nr_bit_filters++; } -uint64_t parse_flag_name(const char *str, int len) +static uint64_t parse_flag_name(const char *str, int len) { int i; @@ -577,7 +735,7 @@ uint64_t parse_flag_name(const char *str, int len) return parse_number(str); } -uint64_t parse_flag_names(const char *str, int all) +static uint64_t parse_flag_names(const char *str, int all) { const char *p = str; uint64_t flags = 0; @@ -596,7 +754,7 @@ uint64_t parse_flag_names(const char *str, int all) return flags; } -void parse_bits_mask(const char *optarg) +static void parse_bits_mask(const char *optarg) { uint64_t mask; uint64_t bits; @@ -621,7 +779,7 @@ void parse_bits_mask(const char *optarg) } -struct option opts[] = { +static struct option opts[] = { { "raw" , 0, NULL, 'r' }, { "pid" , 1, NULL, 'p' }, { "file" , 1, NULL, 'f' }, @@ -676,8 +834,10 @@ int main(int argc, char *argv[]) } } + if (opt_list && opt_pid) + printf("voffset\t"); if (opt_list == 1) - printf("offset\tcount\tflags\n"); + printf("offset\tlen\tflags\n"); if (opt_list == 2) printf("offset\tflags\n"); diff --git a/Documentation/vm/slabinfo.c b/Documentation/vm/slabinfo.c index df3227605d5..92e729f4b67 100644 --- a/Documentation/vm/slabinfo.c +++ b/Documentation/vm/slabinfo.c @@ -87,7 +87,7 @@ int page_size; regex_t pattern; -void fatal(const char *x, ...) +static void fatal(const char *x, ...) { va_list ap; @@ -97,7 +97,7 @@ void fatal(const char *x, ...) exit(EXIT_FAILURE); } -void usage(void) +static void usage(void) { printf("slabinfo 5/7/2007. (c) 2007 sgi.\n\n" "slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n" @@ -131,7 +131,7 @@ void usage(void) ); } -unsigned long read_obj(const char *name) +static unsigned long read_obj(const char *name) { FILE *f = fopen(name, "r"); @@ -151,7 +151,7 @@ unsigned long read_obj(const char *name) /* * Get the contents of an attribute */ -unsigned long get_obj(const char *name) +static unsigned long get_obj(const char *name) { if (!read_obj(name)) return 0; @@ -159,7 +159,7 @@ unsigned long get_obj(const char *name) return atol(buffer); } -unsigned long get_obj_and_str(const char *name, char **x) +static unsigned long get_obj_and_str(const char *name, char **x) { unsigned long result = 0; char *p; @@ -178,7 +178,7 @@ unsigned long get_obj_and_str(const char *name, char **x) return result; } -void set_obj(struct slabinfo *s, const char *name, int n) +static void set_obj(struct slabinfo *s, const char *name, int n) { char x[100]; FILE *f; @@ -192,7 +192,7 @@ void set_obj(struct slabinfo *s, const char *name, int n) fclose(f); } -unsigned long read_slab_obj(struct slabinfo *s, const char *name) +static unsigned long read_slab_obj(struct slabinfo *s, const char *name) { char x[100]; FILE *f; @@ -215,7 +215,7 @@ unsigned long read_slab_obj(struct slabinfo *s, const char *name) /* * Put a size string together */ -int store_size(char *buffer, unsigned long value) +static int store_size(char *buffer, unsigned long value) { unsigned long divisor = 1; char trailer = 0; @@ -247,7 +247,7 @@ int store_size(char *buffer, unsigned long value) return n; } -void decode_numa_list(int *numa, char *t) +static void decode_numa_list(int *numa, char *t) { int node; int nr; @@ -272,7 +272,7 @@ void decode_numa_list(int *numa, char *t) } } -void slab_validate(struct slabinfo *s) +static void slab_validate(struct slabinfo *s) { if (strcmp(s->name, "*") == 0) return; @@ -280,7 +280,7 @@ void slab_validate(struct slabinfo *s) set_obj(s, "validate", 1); } -void slab_shrink(struct slabinfo *s) +static void slab_shrink(struct slabinfo *s) { if (strcmp(s->name, "*") == 0) return; @@ -290,7 +290,7 @@ void slab_shrink(struct slabinfo *s) int line = 0; -void first_line(void) +static void first_line(void) { if (show_activity) printf("Name Objects Alloc Free %%Fast Fallb O\n"); @@ -302,7 +302,7 @@ void first_line(void) /* * Find the shortest alias of a slab */ -struct aliasinfo *find_one_alias(struct slabinfo *find) +static struct aliasinfo *find_one_alias(struct slabinfo *find) { struct aliasinfo *a; struct aliasinfo *best = NULL; @@ -318,18 +318,18 @@ struct aliasinfo *find_one_alias(struct slabinfo *find) return best; } -unsigned long slab_size(struct slabinfo *s) +static unsigned long slab_size(struct slabinfo *s) { return s->slabs * (page_size << s->order); } -unsigned long slab_activity(struct slabinfo *s) +static unsigned long slab_activity(struct slabinfo *s) { return s->alloc_fastpath + s->free_fastpath + s->alloc_slowpath + s->free_slowpath; } -void slab_numa(struct slabinfo *s, int mode) +static void slab_numa(struct slabinfo *s, int mode) { int node; @@ -374,7 +374,7 @@ void slab_numa(struct slabinfo *s, int mode) line++; } -void show_tracking(struct slabinfo *s) +static void show_tracking(struct slabinfo *s) { printf("\n%s: Kernel object allocation\n", s->name); printf("-----------------------------------------------------------------------\n"); @@ -392,7 +392,7 @@ void show_tracking(struct slabinfo *s) } -void ops(struct slabinfo *s) +static void ops(struct slabinfo *s) { if (strcmp(s->name, "*") == 0) return; @@ -405,14 +405,14 @@ void ops(struct slabinfo *s) printf("\n%s has no kmem_cache operations\n", s->name); } -const char *onoff(int x) +static const char *onoff(int x) { if (x) return "On "; return "Off"; } -void slab_stats(struct slabinfo *s) +static void slab_stats(struct slabinfo *s) { unsigned long total_alloc; unsigned long total_free; @@ -477,7 +477,7 @@ void slab_stats(struct slabinfo *s) s->deactivate_to_tail, (s->deactivate_to_tail * 100) / total); } -void report(struct slabinfo *s) +static void report(struct slabinfo *s) { if (strcmp(s->name, "*") == 0) return; @@ -518,7 +518,7 @@ void report(struct slabinfo *s) slab_stats(s); } -void slabcache(struct slabinfo *s) +static void slabcache(struct slabinfo *s) { char size_str[20]; char dist_str[40]; @@ -593,7 +593,7 @@ void slabcache(struct slabinfo *s) /* * Analyze debug options. Return false if something is amiss. */ -int debug_opt_scan(char *opt) +static int debug_opt_scan(char *opt) { if (!opt || !opt[0] || strcmp(opt, "-") == 0) return 1; @@ -642,7 +642,7 @@ int debug_opt_scan(char *opt) return 1; } -int slab_empty(struct slabinfo *s) +static int slab_empty(struct slabinfo *s) { if (s->objects > 0) return 0; @@ -657,7 +657,7 @@ int slab_empty(struct slabinfo *s) return 1; } -void slab_debug(struct slabinfo *s) +static void slab_debug(struct slabinfo *s) { if (strcmp(s->name, "*") == 0) return; @@ -717,7 +717,7 @@ void slab_debug(struct slabinfo *s) set_obj(s, "trace", 1); } -void totals(void) +static void totals(void) { struct slabinfo *s; @@ -976,7 +976,7 @@ void totals(void) b1, b2, b3); } -void sort_slabs(void) +static void sort_slabs(void) { struct slabinfo *s1,*s2; @@ -1005,7 +1005,7 @@ void sort_slabs(void) } } -void sort_aliases(void) +static void sort_aliases(void) { struct aliasinfo *a1,*a2; @@ -1030,7 +1030,7 @@ void sort_aliases(void) } } -void link_slabs(void) +static void link_slabs(void) { struct aliasinfo *a; struct slabinfo *s; @@ -1048,7 +1048,7 @@ void link_slabs(void) } } -void alias(void) +static void alias(void) { struct aliasinfo *a; char *active = NULL; @@ -1079,7 +1079,7 @@ void alias(void) } -void rename_slabs(void) +static void rename_slabs(void) { struct slabinfo *s; struct aliasinfo *a; @@ -1102,12 +1102,12 @@ void rename_slabs(void) } } -int slab_mismatch(char *slab) +static int slab_mismatch(char *slab) { return regexec(&pattern, slab, 0, NULL, 0); } -void read_slab_dir(void) +static void read_slab_dir(void) { DIR *dir; struct dirent *de; @@ -1209,7 +1209,7 @@ void read_slab_dir(void) fatal("Too many aliases\n"); } -void output_slabs(void) +static void output_slabs(void) { struct slabinfo *slab; |