From 4978db5bd964d90265f957f980ab2b0771ca2b9f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 12 May 2008 16:51:15 -0700 Subject: lmb: Fix inconsistent alignment of size argument. When allocating, if we will align up the size when making the reservation, we should also align the size for the check that the space is actually available. The simplest thing is to just aling the size up from the beginning, then we can use plain 'size' throughout. Signed-off-by: David S. Miller --- lib/lmb.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/lmb.c b/lib/lmb.c index 83287d3869a..93445dc8f81 100644 --- a/lib/lmb.c +++ b/lib/lmb.c @@ -286,8 +286,7 @@ static u64 __init lmb_alloc_nid_unreserved(u64 start, u64 end, j = lmb_overlaps_region(&lmb.reserved, base, size); if (j < 0) { /* this area isn't reserved, take it */ - if (lmb_add_region(&lmb.reserved, base, - lmb_align_up(size, align)) < 0) + if (lmb_add_region(&lmb.reserved, base, size) < 0) base = ~(u64)0; return base; } @@ -333,6 +332,10 @@ u64 __init lmb_alloc_nid(u64 size, u64 align, int nid, struct lmb_region *mem = &lmb.memory; int i; + BUG_ON(0 == size); + + size = lmb_align_up(size, align); + for (i = 0; i < mem->cnt; i++) { u64 ret = lmb_alloc_nid_region(&mem->region[i], nid_range, @@ -370,6 +373,8 @@ u64 __init __lmb_alloc_base(u64 size, u64 align, u64 max_addr) BUG_ON(0 == size); + size = lmb_align_up(size, align); + /* On some platforms, make sure we allocate lowmem */ /* Note that LMB_REAL_LIMIT may be LMB_ALLOC_ANYWHERE */ if (max_addr == LMB_ALLOC_ANYWHERE) @@ -393,8 +398,7 @@ u64 __init __lmb_alloc_base(u64 size, u64 align, u64 max_addr) j = lmb_overlaps_region(&lmb.reserved, base, size); if (j < 0) { /* this area isn't reserved, take it */ - if (lmb_add_region(&lmb.reserved, base, - lmb_align_up(size, align)) < 0) + if (lmb_add_region(&lmb.reserved, base, size) < 0) return 0; return base; } -- cgit v1.2.3-70-g09d2 From faa6cfde747ba6d37a0889cbe85881c80806d355 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 12 May 2008 17:21:55 -0700 Subject: lmb: Make lmb debugging more useful. Having to muck with the build and set DEBUG just to get lmb_dump_all() to print things isn't very useful. So use pr_info() and use an early boot param "lmb=debug" so we can simply ask users to reboot with this option when we need some debugging from them. Signed-off-by: David S. Miller --- lib/lmb.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/lmb.c b/lib/lmb.c index 93445dc8f81..867f7b5a823 100644 --- a/lib/lmb.c +++ b/lib/lmb.c @@ -19,31 +19,42 @@ struct lmb lmb; +static int lmb_debug; + +static int __init early_lmb(char *p) +{ + if (p && strstr(p, "debug")) + lmb_debug = 1; + return 0; +} +early_param("lmb", early_lmb); + void lmb_dump_all(void) { -#ifdef DEBUG unsigned long i; - pr_debug("lmb_dump_all:\n"); - pr_debug(" memory.cnt = 0x%lx\n", lmb.memory.cnt); - pr_debug(" memory.size = 0x%llx\n", + if (!lmb_debug) + return; + + pr_info("lmb_dump_all:\n"); + pr_info(" memory.cnt = 0x%lx\n", lmb.memory.cnt); + pr_info(" memory.size = 0x%llx\n", (unsigned long long)lmb.memory.size); for (i=0; i < lmb.memory.cnt ;i++) { - pr_debug(" memory.region[0x%x].base = 0x%llx\n", + pr_info(" memory.region[0x%lx].base = 0x%llx\n", i, (unsigned long long)lmb.memory.region[i].base); - pr_debug(" .size = 0x%llx\n", + pr_info(" .size = 0x%llx\n", (unsigned long long)lmb.memory.region[i].size); } - pr_debug(" reserved.cnt = 0x%lx\n", lmb.reserved.cnt); - pr_debug(" reserved.size = 0x%lx\n", lmb.reserved.size); + pr_info(" reserved.cnt = 0x%lx\n", lmb.reserved.cnt); + pr_info(" reserved.size = 0x%lx\n", lmb.reserved.size); for (i=0; i < lmb.reserved.cnt ;i++) { - pr_debug(" reserved.region[0x%x].base = 0x%llx\n", + pr_info(" reserved.region[0x%lx].base = 0x%llx\n", i, (unsigned long long)lmb.reserved.region[i].base); - pr_debug(" .size = 0x%llx\n", + pr_info(" .size = 0x%llx\n", (unsigned long long)lmb.reserved.region[i].size); } -#endif /* DEBUG */ } static unsigned long lmb_addrs_overlap(u64 base1, u64 size1, u64 base2, -- cgit v1.2.3-70-g09d2 From f4ed0deae8983591264d0e194e168ef65f4775f5 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Mon, 12 May 2008 14:02:29 -0700 Subject: cpumask: remove bitmap_scnprintf_len and cpumask_scnprintf_len They aren't used. They were briefly used as part of some other patches to provide an alternative format for displaying some /proc and /sys cpumasks. They probably should have been removed when those other patches were dropped, in favor of a different solution. Signed-off-by: Paul Jackson Cc: "Mike Travis" Cc: "Bert Wesarg" Cc: Alexey Dobriyan Cc: WANG Cong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 1 - include/linux/cpumask.h | 7 ------- lib/bitmap.c | 16 ---------------- 3 files changed, 24 deletions(-) (limited to 'lib') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 43b406def35..1abfe664c44 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -110,7 +110,6 @@ extern int __bitmap_weight(const unsigned long *bitmap, int bits); extern int bitmap_scnprintf(char *buf, unsigned int len, const unsigned long *src, int nbits); -extern int bitmap_scnprintf_len(unsigned int len); extern int __bitmap_parse(const char *buf, unsigned int buflen, int is_user, unsigned long *dst, int nbits); extern int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 9650806fe2e..5df3db58fcc 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -289,13 +289,6 @@ static inline int __cpumask_scnprintf(char *buf, int len, return bitmap_scnprintf(buf, len, srcp->bits, nbits); } -#define cpumask_scnprintf_len(len) \ - __cpumask_scnprintf_len((len)) -static inline int __cpumask_scnprintf_len(int len) -{ - return bitmap_scnprintf_len(len); -} - #define cpumask_parse_user(ubuf, ulen, dst) \ __cpumask_parse_user((ubuf), (ulen), &(dst), NR_CPUS) static inline int __cpumask_parse_user(const char __user *buf, int len, diff --git a/lib/bitmap.c b/lib/bitmap.c index c4cb48f77f0..482df94ea21 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -315,22 +315,6 @@ int bitmap_scnprintf(char *buf, unsigned int buflen, } EXPORT_SYMBOL(bitmap_scnprintf); -/** - * bitmap_scnprintf_len - return buffer length needed to convert - * bitmap to an ASCII hex string. - * @len: number of bits to be converted - */ -int bitmap_scnprintf_len(unsigned int len) -{ - /* we need 9 chars per word for 32 bit words (8 hexdigits + sep/null) */ - int bitslen = ALIGN(len, CHUNKSZ); - int wordlen = CHUNKSZ / 4; - int buflen = (bitslen / wordlen) * (wordlen + 1) * sizeof(char); - - return buflen; -} -EXPORT_SYMBOL(bitmap_scnprintf_len); - /** * __bitmap_parse - convert an ASCII hex string into a bitmap. * @buf: pointer to buffer containing string. -- cgit v1.2.3-70-g09d2 From b32a09db4fb9a87246ba4e7726a979ac4709ad97 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 26 Feb 2008 09:57:11 -0600 Subject: add match_strlcpy() us it to make v9fs make uname and remotename parsing more robust match_strcpy() is a somewhat creepy function: the caller needs to make sure that the destination buffer is big enough, and when he screws up or forgets, match_strcpy() happily overruns the buffer. There's exactly one customer: v9fs_parse_options(). I believe it currently can't overflow its buffer, but that's not exactly obvious. The source string is a substing of the mount options. The kernel silently truncates those to PAGE_SIZE bytes, including the terminating zero. See compat_sys_mount() and do_mount(). The destination buffer is obtained from __getname(), which allocates from name_cachep, which is initialized by vfs_caches_init() for size PATH_MAX. We're safe as long as PATH_MAX <= PAGE_SIZE. PATH_MAX is 4096. As far as I know, the smallest PAGE_SIZE is also 4096. Here's a patch that makes the code a bit more obviously correct. It doesn't depend on PATH_MAX <= PAGE_SIZE. Signed-off-by: Markus Armbruster Cc: Latchesar Ionkov Cc: Jim Meyering Cc: "Randy.Dunlap" Signed-off-by: Andrew Morton Signed-off-by: Eric Van Hensbergen --- fs/9p/v9fs.c | 4 ++-- include/linux/parser.h | 2 +- lib/parser.c | 32 ++++++++++++++++++++------------ 3 files changed, 23 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 9b0f0222e8b..e307fbd34fa 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -125,10 +125,10 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses) v9ses->afid = option; break; case Opt_uname: - match_strcpy(v9ses->uname, &args[0]); + match_strlcpy(v9ses->uname, &args[0], PATH_MAX); break; case Opt_remotename: - match_strcpy(v9ses->aname, &args[0]); + match_strlcpy(v9ses->aname, &args[0], PATH_MAX); break; case Opt_nodevmap: v9ses->nodev = 1; diff --git a/include/linux/parser.h b/include/linux/parser.h index 26b2bdfcaf0..7dcd0507575 100644 --- a/include/linux/parser.h +++ b/include/linux/parser.h @@ -29,5 +29,5 @@ int match_token(char *, match_table_t table, substring_t args[]); int match_int(substring_t *, int *result); int match_octal(substring_t *, int *result); int match_hex(substring_t *, int *result); -void match_strcpy(char *, const substring_t *); +size_t match_strlcpy(char *, const substring_t *, size_t); char *match_strdup(const substring_t *); diff --git a/lib/parser.c b/lib/parser.c index 703c8c13b34..4f0cbc03e0e 100644 --- a/lib/parser.c +++ b/lib/parser.c @@ -182,18 +182,25 @@ int match_hex(substring_t *s, int *result) } /** - * match_strcpy: - copies the characters from a substring_t to a string - * @to: string to copy characters to. - * @s: &substring_t to copy + * match_strlcpy: - Copy the characters from a substring_t to a sized buffer + * @dest: where to copy to + * @src: &substring_t to copy + * @size: size of destination buffer * - * Description: Copies the set of characters represented by the given - * &substring_t @s to the c-style string @to. Caller guarantees that @to is - * large enough to hold the characters of @s. + * Description: Copy the characters in &substring_t @src to the + * c-style string @dest. Copy no more than @size - 1 characters, plus + * the terminating NUL. Return length of @src. */ -void match_strcpy(char *to, const substring_t *s) +size_t match_strlcpy(char *dest, const substring_t *src, size_t size) { - memcpy(to, s->from, s->to - s->from); - to[s->to - s->from] = '\0'; + size_t ret = src->to - src->from; + + if (size) { + size_t len = ret >= size ? size - 1 : ret; + memcpy(dest, src->from, len); + dest[len] = '\0'; + } + return ret; } /** @@ -206,9 +213,10 @@ void match_strcpy(char *to, const substring_t *s) */ char *match_strdup(const substring_t *s) { - char *p = kmalloc(s->to - s->from + 1, GFP_KERNEL); + size_t sz = s->to - s->from + 1; + char *p = kmalloc(sz, GFP_KERNEL); if (p) - match_strcpy(p, s); + match_strlcpy(p, s, sz); return p; } @@ -216,5 +224,5 @@ EXPORT_SYMBOL(match_token); EXPORT_SYMBOL(match_int); EXPORT_SYMBOL(match_octal); EXPORT_SYMBOL(match_hex); -EXPORT_SYMBOL(match_strcpy); +EXPORT_SYMBOL(match_strlcpy); EXPORT_SYMBOL(match_strdup); -- cgit v1.2.3-70-g09d2 From 3fc957721d18c93662f7d4dab455b80f53dd2641 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Wed, 14 May 2008 16:05:49 -0700 Subject: lib: create common ascii hex array Add a common hex array in hexdump.c so everyone can use it. Add a common hi/lo helper to avoid the shifting masking that is done to get the upper and lower nibbles of a byte value. Pull the pack_hex_byte helper from kgdb as it is opencoded many places in the tree that will be consolidated. Signed-off-by: Harvey Harrison Acked-by: Paul Mundt Cc: Jason Wessel Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/kernel/kgdb_stub.c | 8 -------- drivers/pnp/support.c | 8 ++++---- include/linux/kernel.h | 12 +++++++++++- kernel/kgdb.c | 8 -------- lib/hexdump.c | 7 +++++-- 5 files changed, 20 insertions(+), 23 deletions(-) (limited to 'lib') diff --git a/arch/sh/kernel/kgdb_stub.c b/arch/sh/kernel/kgdb_stub.c index d453c3a1c79..832641bbd47 100644 --- a/arch/sh/kernel/kgdb_stub.c +++ b/arch/sh/kernel/kgdb_stub.c @@ -330,14 +330,6 @@ static char *ebin_to_mem(const char *buf, char *mem, int count) return mem; } -/* Pack a hex byte */ -static char *pack_hex_byte(char *pkt, int byte) -{ - *pkt++ = hexchars[(byte >> 4) & 0xf]; - *pkt++ = hexchars[(byte & 0xf)]; - return pkt; -} - /* Scan for the start char '$', read the packet and check the checksum */ static void get_packet(char *buffer, int buflen) { diff --git a/drivers/pnp/support.c b/drivers/pnp/support.c index 3eba85ed729..95b076c18c0 100644 --- a/drivers/pnp/support.c +++ b/drivers/pnp/support.c @@ -45,10 +45,10 @@ void pnp_eisa_id_to_string(u32 id, char *str) str[0] = 'A' + ((id >> 26) & 0x3f) - 1; str[1] = 'A' + ((id >> 21) & 0x1f) - 1; str[2] = 'A' + ((id >> 16) & 0x1f) - 1; - str[3] = hex_asc((id >> 12) & 0xf); - str[4] = hex_asc((id >> 8) & 0xf); - str[5] = hex_asc((id >> 4) & 0xf); - str[6] = hex_asc((id >> 0) & 0xf); + str[3] = hex_asc_hi(id >> 8); + str[4] = hex_asc_lo(id >> 8); + str[5] = hex_asc_hi(id); + str[6] = hex_asc_lo(id); str[7] = '\0'; } diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 4d46e299afb..792bf0aa779 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -276,7 +276,17 @@ extern void print_hex_dump(const char *level, const char *prefix_str, const void *buf, size_t len, bool ascii); extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type, const void *buf, size_t len); -#define hex_asc(x) "0123456789abcdef"[x] + +extern const char hex_asc[]; +#define hex_asc_lo(x) hex_asc[((x) & 0x0f)] +#define hex_asc_hi(x) hex_asc[((x) & 0xf0) >> 4] + +static inline char *pack_hex_byte(char *buf, u8 byte) +{ + *buf++ = hex_asc_hi(byte); + *buf++ = hex_asc_lo(byte); + return buf; +} #define pr_emerg(fmt, arg...) \ printk(KERN_EMERG fmt, ##arg) diff --git a/kernel/kgdb.c b/kernel/kgdb.c index 39e31a036f5..14787de568b 100644 --- a/kernel/kgdb.c +++ b/kernel/kgdb.c @@ -346,14 +346,6 @@ static void put_packet(char *buffer) } } -static char *pack_hex_byte(char *pkt, u8 byte) -{ - *pkt++ = hexchars[byte >> 4]; - *pkt++ = hexchars[byte & 0xf]; - - return pkt; -} - /* * Convert the memory pointed to by mem into hex, placing result in buf. * Return a pointer to the last char put in buf (null). May return an error. diff --git a/lib/hexdump.c b/lib/hexdump.c index 343546550dc..f07c0db81d2 100644 --- a/lib/hexdump.c +++ b/lib/hexdump.c @@ -12,6 +12,9 @@ #include #include +const char hex_asc[] = "0123456789abcdef"; +EXPORT_SYMBOL(hex_asc); + /** * hex_dump_to_buffer - convert a blob of data to "hex ASCII" in memory * @buf: data blob to dump @@ -93,8 +96,8 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize, for (j = 0; (j < rowsize) && (j < len) && (lx + 4) < linebuflen; j++) { ch = ptr[j]; - linebuf[lx++] = hex_asc(ch >> 4); - linebuf[lx++] = hex_asc(ch & 0x0f); + linebuf[lx++] = hex_asc_hi(ch); + linebuf[lx++] = hex_asc_lo(ch); linebuf[lx++] = ' '; } ascii_column = 3 * rowsize + 2; -- cgit v1.2.3-70-g09d2 From f9ebcd9d410ba7209a8f321c41edf8615fc3ce67 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Sun, 18 May 2008 13:18:01 -0500 Subject: lmb: Fix compile warning lib/lmb.c: In function 'lmb_dump_all': lib/lmb.c:51: warning: format '%lx' expects type 'long unsigned int', but argument 2 has type 'u64' Signed-off-by: Kumar Gala --- lib/lmb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/lmb.c b/lib/lmb.c index 867f7b5a823..5d7b9286503 100644 --- a/lib/lmb.c +++ b/lib/lmb.c @@ -48,7 +48,8 @@ void lmb_dump_all(void) } pr_info(" reserved.cnt = 0x%lx\n", lmb.reserved.cnt); - pr_info(" reserved.size = 0x%lx\n", lmb.reserved.size); + pr_info(" reserved.size = 0x%llx\n", + (unsigned long long)lmb.memory.size); for (i=0; i < lmb.reserved.cnt ;i++) { pr_info(" reserved.region[0x%lx].base = 0x%llx\n", i, (unsigned long long)lmb.reserved.region[i].base); -- cgit v1.2.3-70-g09d2 From 3527fb326f07bc8e85cf66d4f987ebeea24e8e4a Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Thu, 5 Jun 2008 22:46:19 -0700 Subject: lib: export bitrev16 Bluetooth will be able to use this. Signed-off-by: Harvey Harrison Cc: Marcel Holtmann Cc: Dave Young Cc: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitrev.h | 1 + lib/bitrev.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/include/linux/bitrev.h b/include/linux/bitrev.h index 05e540d6963..7ffe03f4693 100644 --- a/include/linux/bitrev.h +++ b/include/linux/bitrev.h @@ -10,6 +10,7 @@ static inline u8 bitrev8(u8 byte) return byte_rev_table[byte]; } +extern u16 bitrev16(u16 in); extern u32 bitrev32(u32 in); #endif /* _LINUX_BITREV_H */ diff --git a/lib/bitrev.c b/lib/bitrev.c index 989aff73f88..3956203456d 100644 --- a/lib/bitrev.c +++ b/lib/bitrev.c @@ -42,10 +42,11 @@ const u8 byte_rev_table[256] = { }; EXPORT_SYMBOL_GPL(byte_rev_table); -static __always_inline u16 bitrev16(u16 x) +u16 bitrev16(u16 x) { return (bitrev8(x & 0xff) << 8) | bitrev8(x >> 8); } +EXPORT_SYMBOL(bitrev16); /** * bitrev32 - reverse the order of bits in a u32 value -- cgit v1.2.3-70-g09d2 From f595ec964daf7f99668039d7303ddedd09a75142 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 12 Jun 2008 10:47:56 +0200 Subject: common implementation of iterative div/mod We have a few instances of the open-coded iterative div/mod loop, used when we don't expcet the dividend to be much bigger than the divisor. Unfortunately modern gcc's have the tendency to strength "reduce" this into a full mod operation, which isn't necessarily any faster, and even if it were, doesn't exist if gcc implements it in libgcc. The workaround is to put a dummy asm statement in the loop to prevent gcc from performing the transformation. This patch creates a single implementation of this loop, and uses it to replace the open-coded versions I know about. Signed-off-by: Jeremy Fitzhardinge Cc: Andrew Morton Cc: john stultz Cc: Segher Boessenkool Cc: Christian Kujau Cc: Robert Hancock Signed-off-by: Ingo Molnar --- arch/x86/xen/time.c | 13 +++---------- include/linux/math64.h | 2 ++ include/linux/time.h | 11 ++--------- lib/div64.c | 23 +++++++++++++++++++++++ 4 files changed, 30 insertions(+), 19 deletions(-) (limited to 'lib') diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index c39e1a5aa24..52b2e385698 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -150,11 +151,7 @@ static void do_stolen_accounting(void) if (stolen < 0) stolen = 0; - ticks = 0; - while (stolen >= NS_PER_TICK) { - ticks++; - stolen -= NS_PER_TICK; - } + ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); __get_cpu_var(residual_stolen) = stolen; account_steal_time(NULL, ticks); @@ -166,11 +163,7 @@ static void do_stolen_accounting(void) if (blocked < 0) blocked = 0; - ticks = 0; - while (blocked >= NS_PER_TICK) { - ticks++; - blocked -= NS_PER_TICK; - } + ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); __get_cpu_var(residual_blocked) = blocked; account_steal_time(idle_task(smp_processor_id()), ticks); } diff --git a/include/linux/math64.h b/include/linux/math64.h index c1a5f81501f..177785e1e4a 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -81,4 +81,6 @@ static inline s64 div_s64(s64 dividend, s32 divisor) } #endif +u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder); + #endif /* _LINUX_MATH64_H */ diff --git a/include/linux/time.h b/include/linux/time.h index d32ef0ad4c0..05f9517a8ed 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -6,6 +6,7 @@ #ifdef __KERNEL__ # include # include +# include #endif #ifndef _STRUCT_TIMESPEC @@ -172,15 +173,7 @@ extern struct timeval ns_to_timeval(const s64 nsec); */ static inline void timespec_add_ns(struct timespec *a, u64 ns) { - ns += a->tv_nsec; - while(unlikely(ns >= NSEC_PER_SEC)) { - /* The following asm() prevents the compiler from - * optimising this loop into a modulo operation. */ - asm("" : "+r"(ns)); - - ns -= NSEC_PER_SEC; - a->tv_sec++; - } + a->tv_sec += iter_div_u64_rem(a->tv_nsec + ns, NSEC_PER_SEC, &ns); a->tv_nsec = ns; } #endif /* __KERNEL__ */ diff --git a/lib/div64.c b/lib/div64.c index bb5bd0c0f03..76c01542d3e 100644 --- a/lib/div64.c +++ b/lib/div64.c @@ -98,3 +98,26 @@ EXPORT_SYMBOL(div64_u64); #endif #endif /* BITS_PER_LONG == 32 */ + +/* + * Iterative div/mod for use when dividend is not expected to be much + * bigger than divisor. + */ +u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder) +{ + u32 ret = 0; + + while (dividend >= divisor) { + /* The following asm() prevents the compiler from + optimising this loop into a modulo operation. */ + asm("" : "+rm"(dividend)); + + dividend -= divisor; + ret++; + } + + *remainder = dividend; + + return ret; +} +EXPORT_SYMBOL(iter_div_u64_rem); -- cgit v1.2.3-70-g09d2 From d5e181f78ac753893eb930868a52a4488cd3de0a Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 12 Jun 2008 10:47:58 +0200 Subject: add an inlined version of iter_div_u64_rem iter_div_u64_rem is used in the x86-64 vdso, which cannot call other kernel code. For this case, provide the always_inlined version, __iter_div_u64_rem. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- include/linux/math64.h | 19 +++++++++++++++++++ lib/div64.c | 15 +-------------- 2 files changed, 20 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/include/linux/math64.h b/include/linux/math64.h index 177785e1e4a..c87f1528703 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -83,4 +83,23 @@ static inline s64 div_s64(s64 dividend, s32 divisor) u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder); +static __always_inline u32 +__iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder) +{ + u32 ret = 0; + + while (dividend >= divisor) { + /* The following asm() prevents the compiler from + optimising this loop into a modulo operation. */ + asm("" : "+rm"(dividend)); + + dividend -= divisor; + ret++; + } + + *remainder = dividend; + + return ret; +} + #endif /* _LINUX_MATH64_H */ diff --git a/lib/div64.c b/lib/div64.c index 76c01542d3e..a111eb8de9c 100644 --- a/lib/div64.c +++ b/lib/div64.c @@ -105,19 +105,6 @@ EXPORT_SYMBOL(div64_u64); */ u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder) { - u32 ret = 0; - - while (dividend >= divisor) { - /* The following asm() prevents the compiler from - optimising this loop into a modulo operation. */ - asm("" : "+rm"(dividend)); - - dividend -= divisor; - ret++; - } - - *remainder = dividend; - - return ret; + return __iter_div_u64_rem(dividend, divisor, remainder); } EXPORT_SYMBOL(iter_div_u64_rem); -- cgit v1.2.3-70-g09d2 From 643b52b9c0b4e959436b4b551ebf4060d06d5ae8 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Thu, 12 Jun 2008 15:21:52 -0700 Subject: radix-tree: fix small lockless radix-tree bug We shrink a radix tree when its root node has only one child, in the left most slot. The child becomes the new root node. To perform this operation in a manner compatible with concurrent lockless lookups, we atomically switch the root pointer from the parent to its child. However a concurrent lockless lookup may now have loaded a pointer to the parent (and is presently deciding what to do next). For this reason, we also have to keep the parent node in a valid state after shrinking the tree, until the next RCU grace period -- otherwise this lookup with the parent pointer may not do the right thing. Notably, we need to keep the child in the left most slot there in case that is requested by the lookup. This is all pretty standard RCU stuff. It is worth repeating because in my eagerness to obey the radix tree node constructor scheme, I had broken it by zeroing the radix tree node before the grace period. What could happen is that a lookup can load the parent pointer, then decide it wants to follow the left most child slot, only to find the slot contained NULL due to the concurrent shrinker having zeroed the parent node before waiting for a grace period. The lookup would return a false negative as a result. Fix it by doing that clearing in the RCU callback. I would normally want to rip out the constructor entirely, but radix tree nodes are one of those places where they make sense (only few cachelines will be touched soon after allocation). This was never actually found in any lockless pagecache testing or by the test harness, but by seeing the odd problem with my scalable vmap rewrite. I have not tickled the test harness into reproducing it yet, but I'll keep working at it. Fortunately, it is not a problem anywhere lockless pagecache is used in mainline kernels (pagecache probe is not a guarantee, and brd does not have concurrent lookups and deletes). Signed-off-by: Nick Piggin Acked-by: Peter Zijlstra Cc: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 120 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 62 insertions(+), 58 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index bd521716ab1..169a2f8dabc 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -88,6 +88,57 @@ static inline gfp_t root_gfp_mask(struct radix_tree_root *root) return root->gfp_mask & __GFP_BITS_MASK; } +static inline void tag_set(struct radix_tree_node *node, unsigned int tag, + int offset) +{ + __set_bit(offset, node->tags[tag]); +} + +static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, + int offset) +{ + __clear_bit(offset, node->tags[tag]); +} + +static inline int tag_get(struct radix_tree_node *node, unsigned int tag, + int offset) +{ + return test_bit(offset, node->tags[tag]); +} + +static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag) +{ + root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT)); +} + +static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag) +{ + root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT)); +} + +static inline void root_tag_clear_all(struct radix_tree_root *root) +{ + root->gfp_mask &= __GFP_BITS_MASK; +} + +static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag) +{ + return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT)); +} + +/* + * Returns 1 if any slot in the node has this tag set. + * Otherwise returns 0. + */ +static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag) +{ + int idx; + for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { + if (node->tags[tag][idx]) + return 1; + } + return 0; +} /* * This assumes that the caller has performed appropriate preallocation, and * that the caller has pinned this thread of control to the current CPU. @@ -124,6 +175,17 @@ static void radix_tree_node_rcu_free(struct rcu_head *head) { struct radix_tree_node *node = container_of(head, struct radix_tree_node, rcu_head); + + /* + * must only free zeroed nodes into the slab. radix_tree_shrink + * can leave us with a non-NULL entry in the first slot, so clear + * that here to make sure. + */ + tag_clear(node, 0, 0); + tag_clear(node, 1, 0); + node->slots[0] = NULL; + node->count = 0; + kmem_cache_free(radix_tree_node_cachep, node); } @@ -165,59 +227,6 @@ out: } EXPORT_SYMBOL(radix_tree_preload); -static inline void tag_set(struct radix_tree_node *node, unsigned int tag, - int offset) -{ - __set_bit(offset, node->tags[tag]); -} - -static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, - int offset) -{ - __clear_bit(offset, node->tags[tag]); -} - -static inline int tag_get(struct radix_tree_node *node, unsigned int tag, - int offset) -{ - return test_bit(offset, node->tags[tag]); -} - -static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag) -{ - root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT)); -} - - -static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag) -{ - root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT)); -} - -static inline void root_tag_clear_all(struct radix_tree_root *root) -{ - root->gfp_mask &= __GFP_BITS_MASK; -} - -static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag) -{ - return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT)); -} - -/* - * Returns 1 if any slot in the node has this tag set. - * Otherwise returns 0. - */ -static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag) -{ - int idx; - for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { - if (node->tags[tag][idx]) - return 1; - } - return 0; -} - /* * Return the maximum key which can be store into a * radix tree with height HEIGHT. @@ -930,11 +939,6 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) newptr = radix_tree_ptr_to_indirect(newptr); root->rnode = newptr; root->height--; - /* must only free zeroed nodes into the slab */ - tag_clear(to_free, 0, 0); - tag_clear(to_free, 1, 0); - to_free->slots[0] = NULL; - to_free->count = 0; radix_tree_node_free(to_free); } } -- cgit v1.2.3-70-g09d2