diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig | 3 | ||||
-rw-r--r-- | lib/Kconfig.debug | 63 | ||||
-rw-r--r-- | lib/Makefile | 2 | ||||
-rw-r--r-- | lib/atomic64_test.c | 2 | ||||
-rw-r--r-- | lib/devres.c | 2 | ||||
-rw-r--r-- | lib/flex_array.c | 25 | ||||
-rw-r--r-- | lib/iommu-helper.c | 9 | ||||
-rw-r--r-- | lib/ioremap.c | 10 | ||||
-rw-r--r-- | lib/list_debug.c | 6 | ||||
-rw-r--r-- | lib/lmb.c | 541 | ||||
-rw-r--r-- | lib/percpu_counter.c | 27 | ||||
-rw-r--r-- | lib/radix-tree.c | 94 | ||||
-rw-r--r-- | lib/random32.c | 2 | ||||
-rw-r--r-- | lib/rbtree.c | 116 | ||||
-rw-r--r-- | lib/rwsem.c | 150 | ||||
-rw-r--r-- | lib/swiotlb.c | 137 | ||||
-rw-r--r-- | lib/vsprintf.c | 23 |
17 files changed, 456 insertions, 756 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index 170d8ca901d..5b916bc0fba 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -181,9 +181,6 @@ config HAS_DMA config CHECK_SIGNATURE bool -config HAVE_LMB - boolean - config CPUMASK_OFFSTACK bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS help diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 95ab402db9c..9e06b7f5ecf 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -76,7 +76,6 @@ config UNUSED_SYMBOLS config DEBUG_FS bool "Debug Filesystem" - depends on SYSFS help debugfs is a virtual file system that kernel developers use to put debugging files into. Enable this option to be able to read and @@ -152,28 +151,33 @@ config DEBUG_SHIRQ Drivers ought to be able to handle interrupts coming in at those points; some don't and need to be caught. -config DETECT_SOFTLOCKUP - bool "Detect Soft Lockups" +config LOCKUP_DETECTOR + bool "Detect Hard and Soft Lockups" depends on DEBUG_KERNEL && !S390 - default y help - Say Y here to enable the kernel to detect "soft lockups", - which are bugs that cause the kernel to loop in kernel + Say Y here to enable the kernel to act as a watchdog to detect + hard and soft lockups. + + Softlockups are bugs that cause the kernel to loop in kernel mode for more than 60 seconds, without giving other tasks a - chance to run. + chance to run. The current stack trace is displayed upon + detection and the system will stay locked up. - When a soft-lockup is detected, the kernel will print the - current stack trace (which you should report), but the - system will stay locked up. This feature has negligible - overhead. + Hardlockups are bugs that cause the CPU to loop in kernel mode + for more than 60 seconds, without letting other interrupts have a + chance to run. The current stack trace is displayed upon detection + and the system will stay locked up. - (Note that "hard lockups" are separate type of bugs that - can be detected via the NMI-watchdog, on platforms that - support it.) + The overhead should be minimal. A periodic hrtimer runs to + generate interrupts and kick the watchdog task every 10-12 seconds. + An NMI is generated every 60 seconds or so to check for hardlockups. + +config HARDLOCKUP_DETECTOR + def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI config BOOTPARAM_SOFTLOCKUP_PANIC bool "Panic (Reboot) On Soft Lockups" - depends on DETECT_SOFTLOCKUP + depends on LOCKUP_DETECTOR help Say Y here to enable the kernel to panic on "soft lockups", which are bugs that cause the kernel to loop in kernel @@ -190,7 +194,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE int - depends on DETECT_SOFTLOCKUP + depends on LOCKUP_DETECTOR range 0 1 default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC default 1 if BOOTPARAM_SOFTLOCKUP_PANIC @@ -307,6 +311,12 @@ config DEBUG_OBJECTS_WORK work queue routines to track the life time of work objects and validate the work operations. +config DEBUG_OBJECTS_RCU_HEAD + bool "Debug RCU callbacks objects" + depends on DEBUG_OBJECTS && PREEMPT + help + Enable this to turn on debugging of RCU list heads (call_rcu() usage). + config DEBUG_OBJECTS_ENABLE_DEFAULT int "debug_objects bootup default value (0-1)" range 0 1 @@ -535,7 +545,7 @@ config LOCKDEP bool depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT select STACKTRACE - select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 + select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE select KALLSYMS select KALLSYMS_ALL @@ -635,6 +645,19 @@ config DEBUG_INFO If unsure, say N. +config DEBUG_INFO_REDUCED + bool "Reduce debugging information" + depends on DEBUG_INFO + help + If you say Y here gcc is instructed to generate less debugging + information for structure types. This means that tools that + need full debugging information (like kgdb or systemtap) won't + be happy. But if you merely need debugging information to + resolve line numbers there is no loss. Advantage is that + build directory object sizes shrink dramatically over a full + DEBUG_INFO build and compile times are reduced too. + Only works with newer gcc versions. + config DEBUG_VM bool "Debug VM" depends on DEBUG_KERNEL @@ -944,7 +967,7 @@ config FAIL_MAKE_REQUEST Provide fault-injection capability for disk IO. config FAIL_IO_TIMEOUT - bool "Faul-injection capability for faking disk interrupts" + bool "Fault-injection capability for faking disk interrupts" depends on FAULT_INJECTION && BLOCK help Provide fault-injection capability on end IO handling. This @@ -965,13 +988,13 @@ config FAULT_INJECTION_STACKTRACE_FILTER depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT depends on !X86_64 select STACKTRACE - select FRAME_POINTER if !PPC && !S390 + select FRAME_POINTER if !PPC && !S390 && !MICROBLAZE help Provide stacktrace filter for fault-injection capabilities config LATENCYTOP bool "Latency measuring infrastructure" - select FRAME_POINTER if !MIPS && !PPC && !S390 + select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE select KALLSYMS select KALLSYMS_ALL select STACKTRACE diff --git a/lib/Makefile b/lib/Makefile index 3f1062cbbff..0bfabba1bb3 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -89,8 +89,6 @@ obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o lib-$(CONFIG_GENERIC_BUG) += bug.o -obj-$(CONFIG_HAVE_LMB) += lmb.o - obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c index 250ed11d3ed..44524cc8c32 100644 --- a/lib/atomic64_test.c +++ b/lib/atomic64_test.c @@ -114,7 +114,7 @@ static __init int test_atomic64(void) BUG_ON(v.counter != r); #if defined(CONFIG_X86) || defined(CONFIG_MIPS) || defined(CONFIG_PPC) || \ - defined(CONFIG_S390) || defined(_ASM_GENERIC_ATOMIC64_H) + defined(CONFIG_S390) || defined(_ASM_GENERIC_ATOMIC64_H) || defined(CONFIG_ARM) INIT(onestwos); BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1)); r -= one; diff --git a/lib/devres.c b/lib/devres.c index 49368608f98..6efddf53b90 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -328,7 +328,7 @@ EXPORT_SYMBOL(pcim_iomap_regions_request_all); * @pdev: PCI device to map IO resources for * @mask: Mask of BARs to unmap and release * - * Unamp and release regions specified by @mask. + * Unmap and release regions specified by @mask. */ void pcim_iounmap_regions(struct pci_dev *pdev, u16 mask) { diff --git a/lib/flex_array.c b/lib/flex_array.c index 41b1804fa72..77a6fea7481 100644 --- a/lib/flex_array.c +++ b/lib/flex_array.c @@ -171,6 +171,8 @@ __fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags) * Note that this *copies* the contents of @src into * the array. If you are trying to store an array of * pointers, make sure to pass in &ptr instead of ptr. + * You may instead wish to use the flex_array_put_ptr() + * helper function. * * Locking must be provided by the caller. */ @@ -265,7 +267,8 @@ int flex_array_prealloc(struct flex_array *fa, unsigned int start, * * Returns a pointer to the data at index @element_nr. Note * that this is a copy of the data that was passed in. If you - * are using this to store pointers, you'll get back &ptr. + * are using this to store pointers, you'll get back &ptr. You + * may instead wish to use the flex_array_get_ptr helper. * * Locking must be provided by the caller. */ @@ -286,6 +289,26 @@ void *flex_array_get(struct flex_array *fa, unsigned int element_nr) return &part->elements[index_inside_part(fa, element_nr)]; } +/** + * flex_array_get_ptr - pull a ptr back out of the array + * @fa: the flex array from which to extract data + * @element_nr: index of the element to fetch from the array + * + * Returns the pointer placed in the flex array at element_nr using + * flex_array_put_ptr(). This function should not be called if the + * element in question was not set using the _put_ptr() helper. + */ +void *flex_array_get_ptr(struct flex_array *fa, unsigned int element_nr) +{ + void **tmp; + + tmp = flex_array_get(fa, element_nr); + if (!tmp) + return NULL; + + return *tmp; +} + static int part_is_free(struct flex_array_part *part) { int i; diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c index c0251f4ad08..da053313ee5 100644 --- a/lib/iommu-helper.c +++ b/lib/iommu-helper.c @@ -38,12 +38,3 @@ again: return -1; } EXPORT_SYMBOL(iommu_area_alloc); - -unsigned long iommu_num_pages(unsigned long addr, unsigned long len, - unsigned long io_page_size) -{ - unsigned long size = (addr & (io_page_size - 1)) + len; - - return DIV_ROUND_UP(size, io_page_size); -} -EXPORT_SYMBOL(iommu_num_pages); diff --git a/lib/ioremap.c b/lib/ioremap.c index 14c6078f17a..5730ecd3eb6 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -13,10 +13,10 @@ #include <asm/pgtable.h> static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, - unsigned long end, unsigned long phys_addr, pgprot_t prot) + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pte_t *pte; - unsigned long pfn; + u64 pfn; pfn = phys_addr >> PAGE_SHIFT; pte = pte_alloc_kernel(pmd, addr); @@ -31,7 +31,7 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, } static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, - unsigned long end, unsigned long phys_addr, pgprot_t prot) + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pmd_t *pmd; unsigned long next; @@ -49,7 +49,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, } static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, - unsigned long end, unsigned long phys_addr, pgprot_t prot) + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pud_t *pud; unsigned long next; @@ -67,7 +67,7 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, } int ioremap_page_range(unsigned long addr, - unsigned long end, unsigned long phys_addr, pgprot_t prot) + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pgd_t *pgd; unsigned long start; diff --git a/lib/list_debug.c b/lib/list_debug.c index 1a39f4e3ae1..344c710d16c 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -43,6 +43,12 @@ EXPORT_SYMBOL(__list_add); */ void list_del(struct list_head *entry) { + WARN(entry->next == LIST_POISON1, + "list_del corruption, next is LIST_POISON1 (%p)\n", + LIST_POISON1); + WARN(entry->next != LIST_POISON1 && entry->prev == LIST_POISON2, + "list_del corruption, prev is LIST_POISON2 (%p)\n", + LIST_POISON2); WARN(entry->prev->next != entry, "list_del corruption. prev->next should be %p, " "but was %p\n", entry, entry->prev->next); diff --git a/lib/lmb.c b/lib/lmb.c deleted file mode 100644 index b1fc5260652..00000000000 --- a/lib/lmb.c +++ /dev/null @@ -1,541 +0,0 @@ -/* - * Procedures for maintaining information about logical memory blocks. - * - * Peter Bergner, IBM Corp. June 2001. - * Copyright (C) 2001 Peter Bergner. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/bitops.h> -#include <linux/lmb.h> - -#define LMB_ALLOC_ANYWHERE 0 - -struct lmb lmb; - -static int lmb_debug; - -static int __init early_lmb(char *p) -{ - if (p && strstr(p, "debug")) - lmb_debug = 1; - return 0; -} -early_param("lmb", early_lmb); - -static void lmb_dump(struct lmb_region *region, char *name) -{ - unsigned long long base, size; - int i; - - pr_info(" %s.cnt = 0x%lx\n", name, region->cnt); - - for (i = 0; i < region->cnt; i++) { - base = region->region[i].base; - size = region->region[i].size; - - pr_info(" %s[0x%x]\t0x%016llx - 0x%016llx, 0x%llx bytes\n", - name, i, base, base + size - 1, size); - } -} - -void lmb_dump_all(void) -{ - if (!lmb_debug) - return; - - pr_info("LMB configuration:\n"); - pr_info(" rmo_size = 0x%llx\n", (unsigned long long)lmb.rmo_size); - pr_info(" memory.size = 0x%llx\n", (unsigned long long)lmb.memory.size); - - lmb_dump(&lmb.memory, "memory"); - lmb_dump(&lmb.reserved, "reserved"); -} - -static unsigned long lmb_addrs_overlap(u64 base1, u64 size1, u64 base2, - u64 size2) -{ - return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); -} - -static long lmb_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2) -{ - if (base2 == base1 + size1) - return 1; - else if (base1 == base2 + size2) - return -1; - - return 0; -} - -static long lmb_regions_adjacent(struct lmb_region *rgn, - unsigned long r1, unsigned long r2) -{ - u64 base1 = rgn->region[r1].base; - u64 size1 = rgn->region[r1].size; - u64 base2 = rgn->region[r2].base; - u64 size2 = rgn->region[r2].size; - - return lmb_addrs_adjacent(base1, size1, base2, size2); -} - -static void lmb_remove_region(struct lmb_region *rgn, unsigned long r) -{ - unsigned long i; - - for (i = r; i < rgn->cnt - 1; i++) { - rgn->region[i].base = rgn->region[i + 1].base; - rgn->region[i].size = rgn->region[i + 1].size; - } - rgn->cnt--; -} - -/* Assumption: base addr of region 1 < base addr of region 2 */ -static void lmb_coalesce_regions(struct lmb_region *rgn, - unsigned long r1, unsigned long r2) -{ - rgn->region[r1].size += rgn->region[r2].size; - lmb_remove_region(rgn, r2); -} - -void __init lmb_init(void) -{ - /* Create a dummy zero size LMB which will get coalesced away later. - * This simplifies the lmb_add() code below... - */ - lmb.memory.region[0].base = 0; - lmb.memory.region[0].size = 0; - lmb.memory.cnt = 1; - - /* Ditto. */ - lmb.reserved.region[0].base = 0; - lmb.reserved.region[0].size = 0; - lmb.reserved.cnt = 1; -} - -void __init lmb_analyze(void) -{ - int i; - - lmb.memory.size = 0; - - for (i = 0; i < lmb.memory.cnt; i++) - lmb.memory.size += lmb.memory.region[i].size; -} - -static long lmb_add_region(struct lmb_region *rgn, u64 base, u64 size) -{ - unsigned long coalesced = 0; - long adjacent, i; - - if ((rgn->cnt == 1) && (rgn->region[0].size == 0)) { - rgn->region[0].base = base; - rgn->region[0].size = size; - return 0; - } - - /* First try and coalesce this LMB with another. */ - for (i = 0; i < rgn->cnt; i++) { - u64 rgnbase = rgn->region[i].base; - u64 rgnsize = rgn->region[i].size; - - if ((rgnbase == base) && (rgnsize == size)) - /* Already have this region, so we're done */ - return 0; - - adjacent = lmb_addrs_adjacent(base, size, rgnbase, rgnsize); - if (adjacent > 0) { - rgn->region[i].base -= size; - rgn->region[i].size += size; - coalesced++; - break; - } else if (adjacent < 0) { - rgn->region[i].size += size; - coalesced++; - break; - } - } - - if ((i < rgn->cnt - 1) && lmb_regions_adjacent(rgn, i, i+1)) { - lmb_coalesce_regions(rgn, i, i+1); - coalesced++; - } - - if (coalesced) - return coalesced; - if (rgn->cnt >= MAX_LMB_REGIONS) - return -1; - - /* Couldn't coalesce the LMB, so add it to the sorted table. */ - for (i = rgn->cnt - 1; i >= 0; i--) { - if (base < rgn->region[i].base) { - rgn->region[i+1].base = rgn->region[i].base; - rgn->region[i+1].size = rgn->region[i].size; - } else { - rgn->region[i+1].base = base; - rgn->region[i+1].size = size; - break; - } - } - - if (base < rgn->region[0].base) { - rgn->region[0].base = base; - rgn->region[0].size = size; - } - rgn->cnt++; - - return 0; -} - -long lmb_add(u64 base, u64 size) -{ - struct lmb_region *_rgn = &lmb.memory; - - /* On pSeries LPAR systems, the first LMB is our RMO region. */ - if (base == 0) - lmb.rmo_size = size; - - return lmb_add_region(_rgn, base, size); - -} - -static long __lmb_remove(struct lmb_region *rgn, u64 base, u64 size) -{ - u64 rgnbegin, rgnend; - u64 end = base + size; - int i; - - rgnbegin = rgnend = 0; /* supress gcc warnings */ - - /* Find the region where (base, size) belongs to */ - for (i=0; i < rgn->cnt; i++) { - rgnbegin = rgn->region[i].base; - rgnend = rgnbegin + rgn->region[i].size; - - if ((rgnbegin <= base) && (end <= rgnend)) - break; - } - - /* Didn't find the region */ - if (i == rgn->cnt) - return -1; - - /* Check to see if we are removing entire region */ - if ((rgnbegin == base) && (rgnend == end)) { - lmb_remove_region(rgn, i); - return 0; - } - - /* Check to see if region is matching at the front */ - if (rgnbegin == base) { - rgn->region[i].base = end; - rgn->region[i].size -= size; - return 0; - } - - /* Check to see if the region is matching at the end */ - if (rgnend == end) { - rgn->region[i].size -= size; - return 0; - } - - /* - * We need to split the entry - adjust the current one to the - * beginging of the hole and add the region after hole. - */ - rgn->region[i].size = base - rgn->region[i].base; - return lmb_add_region(rgn, end, rgnend - end); -} - -long lmb_remove(u64 base, u64 size) -{ - return __lmb_remove(&lmb.memory, base, size); -} - -long __init lmb_free(u64 base, u64 size) -{ - return __lmb_remove(&lmb.reserved, base, size); -} - -long __init lmb_reserve(u64 base, u64 size) -{ - struct lmb_region *_rgn = &lmb.reserved; - - BUG_ON(0 == size); - - return lmb_add_region(_rgn, base, size); -} - -long lmb_overlaps_region(struct lmb_region *rgn, u64 base, u64 size) -{ - unsigned long i; - - for (i = 0; i < rgn->cnt; i++) { - u64 rgnbase = rgn->region[i].base; - u64 rgnsize = rgn->region[i].size; - if (lmb_addrs_overlap(base, size, rgnbase, rgnsize)) - break; - } - - return (i < rgn->cnt) ? i : -1; -} - -static u64 lmb_align_down(u64 addr, u64 size) -{ - return addr & ~(size - 1); -} - -static u64 lmb_align_up(u64 addr, u64 size) -{ - return (addr + (size - 1)) & ~(size - 1); -} - -static u64 __init lmb_alloc_nid_unreserved(u64 start, u64 end, - u64 size, u64 align) -{ - u64 base, res_base; - long j; - - base = lmb_align_down((end - size), align); - while (start <= base) { - j = lmb_overlaps_region(&lmb.reserved, base, size); - if (j < 0) { - /* this area isn't reserved, take it */ - if (lmb_add_region(&lmb.reserved, base, size) < 0) - base = ~(u64)0; - return base; - } - res_base = lmb.reserved.region[j].base; - if (res_base < size) - break; - base = lmb_align_down(res_base - size, align); - } - - return ~(u64)0; -} - -static u64 __init lmb_alloc_nid_region(struct lmb_property *mp, - u64 (*nid_range)(u64, u64, int *), - u64 size, u64 align, int nid) -{ - u64 start, end; - - start = mp->base; - end = start + mp->size; - - start = lmb_align_up(start, align); - while (start < end) { - u64 this_end; - int this_nid; - - this_end = nid_range(start, end, &this_nid); - if (this_nid == nid) { - u64 ret = lmb_alloc_nid_unreserved(start, this_end, - size, align); - if (ret != ~(u64)0) - return ret; - } - start = this_end; - } - - return ~(u64)0; -} - -u64 __init lmb_alloc_nid(u64 size, u64 align, int nid, - u64 (*nid_range)(u64 start, u64 end, int *nid)) -{ - struct lmb_region *mem = &lmb.memory; - int i; - - BUG_ON(0 == size); - - size = lmb_align_up(size, align); - - for (i = 0; i < mem->cnt; i++) { - u64 ret = lmb_alloc_nid_region(&mem->region[i], - nid_range, - size, align, nid); - if (ret != ~(u64)0) - return ret; - } - - return lmb_alloc(size, align); -} - -u64 __init lmb_alloc(u64 size, u64 align) -{ - return lmb_alloc_base(size, align, LMB_ALLOC_ANYWHERE); -} - -u64 __init lmb_alloc_base(u64 size, u64 align, u64 max_addr) -{ - u64 alloc; - - alloc = __lmb_alloc_base(size, align, max_addr); - - if (alloc == 0) - panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n", - (unsigned long long) size, (unsigned long long) max_addr); - - return alloc; -} - -u64 __init __lmb_alloc_base(u64 size, u64 align, u64 max_addr) -{ - long i, j; - u64 base = 0; - u64 res_base; - - BUG_ON(0 == size); - - size = lmb_align_up(size, align); - - /* On some platforms, make sure we allocate lowmem */ - /* Note that LMB_REAL_LIMIT may be LMB_ALLOC_ANYWHERE */ - if (max_addr == LMB_ALLOC_ANYWHERE) - max_addr = LMB_REAL_LIMIT; - - for (i = lmb.memory.cnt - 1; i >= 0; i--) { - u64 lmbbase = lmb.memory.region[i].base; - u64 lmbsize = lmb.memory.region[i].size; - - if (lmbsize < size) - continue; - if (max_addr == LMB_ALLOC_ANYWHERE) - base = lmb_align_down(lmbbase + lmbsize - size, align); - else if (lmbbase < max_addr) { - base = min(lmbbase + lmbsize, max_addr); - base = lmb_align_down(base - size, align); - } else - continue; - - while (base && lmbbase <= base) { - j = lmb_overlaps_region(&lmb.reserved, base, size); - if (j < 0) { - /* this area isn't reserved, take it */ - if (lmb_add_region(&lmb.reserved, base, size) < 0) - return 0; - return base; - } - res_base = lmb.reserved.region[j].base; - if (res_base < size) - break; - base = lmb_align_down(res_base - size, align); - } - } - return 0; -} - -/* You must call lmb_analyze() before this. */ -u64 __init lmb_phys_mem_size(void) -{ - return lmb.memory.size; -} - -u64 lmb_end_of_DRAM(void) -{ - int idx = lmb.memory.cnt - 1; - - return (lmb.memory.region[idx].base + lmb.memory.region[idx].size); -} - -/* You must call lmb_analyze() after this. */ -void __init lmb_enforce_memory_limit(u64 memory_limit) -{ - unsigned long i; - u64 limit; - struct lmb_property *p; - - if (!memory_limit) - return; - - /* Truncate the lmb regions to satisfy the memory limit. */ - limit = memory_limit; - for (i = 0; i < lmb.memory.cnt; i++) { - if (limit > lmb.memory.region[i].size) { - limit -= lmb.memory.region[i].size; - continue; - } - - lmb.memory.region[i].size = limit; - lmb.memory.cnt = i + 1; - break; - } - - if (lmb.memory.region[0].size < lmb.rmo_size) - lmb.rmo_size = lmb.memory.region[0].size; - - memory_limit = lmb_end_of_DRAM(); - - /* And truncate any reserves above the limit also. */ - for (i = 0; i < lmb.reserved.cnt; i++) { - p = &lmb.reserved.region[i]; - - if (p->base > memory_limit) - p->size = 0; - else if ((p->base + p->size) > memory_limit) - p->size = memory_limit - p->base; - - if (p->size == 0) { - lmb_remove_region(&lmb.reserved, i); - i--; - } - } -} - -int __init lmb_is_reserved(u64 addr) -{ - int i; - - for (i = 0; i < lmb.reserved.cnt; i++) { - u64 upper = lmb.reserved.region[i].base + - lmb.reserved.region[i].size - 1; - if ((addr >= lmb.reserved.region[i].base) && (addr <= upper)) - return 1; - } - return 0; -} - -int lmb_is_region_reserved(u64 base, u64 size) -{ - return lmb_overlaps_region(&lmb.reserved, base, size); -} - -/* - * Given a <base, len>, find which memory regions belong to this range. - * Adjust the request and return a contiguous chunk. - */ -int lmb_find(struct lmb_property *res) -{ - int i; - u64 rstart, rend; - - rstart = res->base; - rend = rstart + res->size - 1; - - for (i = 0; i < lmb.memory.cnt; i++) { - u64 start = lmb.memory.region[i].base; - u64 end = start + lmb.memory.region[i].size - 1; - - if (start > rend) - return -1; - - if ((end >= rstart) && (start < rend)) { - /* adjust the request */ - if (rstart < start) - rstart = start; - if (rend > end) - rend = end; - res->base = rstart; - res->size = rend - rstart + 1; - return 0; - } - } - return -1; -} diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index aeaa6d73444..ec9048e74f4 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -137,6 +137,33 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb, return NOTIFY_OK; } +/* + * Compare counter against given value. + * Return 1 if greater, 0 if equal and -1 if less + */ +int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) +{ + s64 count; + + count = percpu_counter_read(fbc); + /* Check to see if rough count will be sufficient for comparison */ + if (abs(count - rhs) > (percpu_counter_batch*num_online_cpus())) { + if (count > rhs) + return 1; + else + return -1; + } + /* Need to use precise count */ + count = percpu_counter_sum(fbc); + if (count > rhs) + return 1; + else if (count < rhs) + return -1; + else + return 0; +} +EXPORT_SYMBOL(percpu_counter_compare); + static int __init percpu_counter_startup(void) { compute_batch_value(); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 05da38bcc29..e907858498a 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -609,6 +609,100 @@ int radix_tree_tag_get(struct radix_tree_root *root, EXPORT_SYMBOL(radix_tree_tag_get); /** + * radix_tree_range_tag_if_tagged - for each item in given range set given + * tag if item has another tag set + * @root: radix tree root + * @first_indexp: pointer to a starting index of a range to scan + * @last_index: last index of a range to scan + * @nr_to_tag: maximum number items to tag + * @iftag: tag index to test + * @settag: tag index to set if tested tag is set + * + * This function scans range of radix tree from first_index to last_index + * (inclusive). For each item in the range if iftag is set, the function sets + * also settag. The function stops either after tagging nr_to_tag items or + * after reaching last_index. + * + * The function returns number of leaves where the tag was set and sets + * *first_indexp to the first unscanned index. + */ +unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, + unsigned long *first_indexp, unsigned long last_index, + unsigned long nr_to_tag, + unsigned int iftag, unsigned int settag) +{ + unsigned int height = root->height, shift; + unsigned long tagged = 0, index = *first_indexp; + struct radix_tree_node *open_slots[height], *slot; + + last_index = min(last_index, radix_tree_maxindex(height)); + if (index > last_index) + return 0; + if (!nr_to_tag) + return 0; + if (!root_tag_get(root, iftag)) { + *first_indexp = last_index + 1; + return 0; + } + if (height == 0) { + *first_indexp = last_index + 1; + root_tag_set(root, settag); + return 1; + } + + shift = (height - 1) * RADIX_TREE_MAP_SHIFT; + slot = radix_tree_indirect_to_ptr(root->rnode); + + for (;;) { + int offset; + + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + if (!slot->slots[offset]) + goto next; + if (!tag_get(slot, iftag, offset)) + goto next; + tag_set(slot, settag, offset); + if (height == 1) { + tagged++; + goto next; + } + /* Go down one level */ + height--; + shift -= RADIX_TREE_MAP_SHIFT; + open_slots[height] = slot; + slot = slot->slots[offset]; + continue; +next: + /* Go to next item at level determined by 'shift' */ + index = ((index >> shift) + 1) << shift; + if (index > last_index) + break; + if (tagged >= nr_to_tag) + break; + while (((index >> shift) & RADIX_TREE_MAP_MASK) == 0) { + /* + * We've fully scanned this node. Go up. Because + * last_index is guaranteed to be in the tree, what + * we do below cannot wander astray. + */ + slot = open_slots[height]; + height++; + shift += RADIX_TREE_MAP_SHIFT; + } + } + /* + * The iftag must have been set somewhere because otherwise + * we would return immediated at the beginning of the function + */ + root_tag_set(root, settag); + *first_indexp = index; + + return tagged; +} +EXPORT_SYMBOL(radix_tree_range_tag_if_tagged); + + +/** * radix_tree_next_hole - find the next hole (not-present entry) * @root: tree root * @index: index key diff --git a/lib/random32.c b/lib/random32.c index 870dc3fc0f0..fc3545a3277 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -127,7 +127,7 @@ core_initcall(random32_init); /* * Generate better values after random number generator - * is fully initalized. + * is fully initialized. */ static int __init random32_reseed(void) { diff --git a/lib/rbtree.c b/lib/rbtree.c index 15e10b1afdd..4693f79195d 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -44,11 +44,6 @@ static void __rb_rotate_left(struct rb_node *node, struct rb_root *root) else root->rb_node = right; rb_set_parent(node, right); - - if (root->augment_cb) { - root->augment_cb(node); - root->augment_cb(right); - } } static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) @@ -72,20 +67,12 @@ static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) else root->rb_node = left; rb_set_parent(node, left); - - if (root->augment_cb) { - root->augment_cb(node); - root->augment_cb(left); - } } void rb_insert_color(struct rb_node *node, struct rb_root *root) { struct rb_node *parent, *gparent; - if (root->augment_cb) - root->augment_cb(node); - while ((parent = rb_parent(node)) && rb_is_red(parent)) { gparent = rb_parent(parent); @@ -240,15 +227,12 @@ void rb_erase(struct rb_node *node, struct rb_root *root) else { struct rb_node *old = node, *left; - int old_parent_cb = 0; - int successor_parent_cb = 0; node = node->rb_right; while ((left = node->rb_left) != NULL) node = left; if (rb_parent(old)) { - old_parent_cb = 1; if (rb_parent(old)->rb_left == old) rb_parent(old)->rb_left = node; else @@ -263,10 +247,8 @@ void rb_erase(struct rb_node *node, struct rb_root *root) if (parent == old) { parent = node; } else { - successor_parent_cb = 1; if (child) rb_set_parent(child, parent); - parent->rb_left = child; node->rb_right = old->rb_right; @@ -277,24 +259,6 @@ void rb_erase(struct rb_node *node, struct rb_root *root) node->rb_left = old->rb_left; rb_set_parent(old->rb_left, node); - if (root->augment_cb) { - /* - * Here, three different nodes can have new children. - * The parent of the successor node that was selected - * to replace the node to be erased. - * The node that is getting erased and is now replaced - * by its successor. - * The parent of the node getting erased-replaced. - */ - if (successor_parent_cb) - root->augment_cb(parent); - - root->augment_cb(node); - - if (old_parent_cb) - root->augment_cb(rb_parent(old)); - } - goto color; } @@ -303,19 +267,15 @@ void rb_erase(struct rb_node *node, struct rb_root *root) if (child) rb_set_parent(child, parent); - - if (parent) { + if (parent) + { if (parent->rb_left == node) parent->rb_left = child; else parent->rb_right = child; - - if (root->augment_cb) - root->augment_cb(parent); - - } else { - root->rb_node = child; } + else + root->rb_node = child; color: if (color == RB_BLACK) @@ -323,6 +283,74 @@ void rb_erase(struct rb_node *node, struct rb_root *root) } EXPORT_SYMBOL(rb_erase); +static void rb_augment_path(struct rb_node *node, rb_augment_f func, void *data) +{ + struct rb_node *parent; + +up: + func(node, data); + parent = rb_parent(node); + if (!parent) + return; + + if (node == parent->rb_left && parent->rb_right) + func(parent->rb_right, data); + else if (parent->rb_left) + func(parent->rb_left, data); + + node = parent; + goto up; +} + +/* + * after inserting @node into the tree, update the tree to account for + * both the new entry and any damage done by rebalance + */ +void rb_augment_insert(struct rb_node *node, rb_augment_f func, void *data) +{ + if (node->rb_left) + node = node->rb_left; + else if (node->rb_right) + node = node->rb_right; + + rb_augment_path(node, func, data); +} + +/* + * before removing the node, find the deepest node on the rebalance path + * that will still be there after @node gets removed + */ +struct rb_node *rb_augment_erase_begin(struct rb_node *node) +{ + struct rb_node *deepest; + + if (!node->rb_right && !node->rb_left) + deepest = rb_parent(node); + else if (!node->rb_right) + deepest = node->rb_left; + else if (!node->rb_left) + deepest = node->rb_right; + else { + deepest = rb_next(node); + if (deepest->rb_right) + deepest = deepest->rb_right; + else if (rb_parent(deepest) != node) + deepest = rb_parent(deepest); + } + + return deepest; +} + +/* + * after removal, update the tree to account for the removed entry + * and any rebalance damage. + */ +void rb_augment_erase_end(struct rb_node *node, rb_augment_f func, void *data) +{ + if (node) + rb_augment_path(node, func, data); +} + /* * This function returns the first node (in sort order) of the tree. */ diff --git a/lib/rwsem.c b/lib/rwsem.c index ceba8e28807..f236d7cd5cf 100644 --- a/lib/rwsem.c +++ b/lib/rwsem.c @@ -36,45 +36,56 @@ struct rwsem_waiter { #define RWSEM_WAITING_FOR_WRITE 0x00000002 }; +/* Wake types for __rwsem_do_wake(). Note that RWSEM_WAKE_NO_ACTIVE and + * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held + * since the rwsem value was observed. + */ +#define RWSEM_WAKE_ANY 0 /* Wake whatever's at head of wait list */ +#define RWSEM_WAKE_NO_ACTIVE 1 /* rwsem was observed with no active thread */ +#define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */ + /* * handle the lock release when processes blocked on it that can now run * - if we come here from up_xxxx(), then: * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed) * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) - * - there must be someone on the queue + * - there must be someone on the queue * - the spinlock must be held by the caller * - woken process blocks are discarded from the list after having task zeroed * - writers are only woken if downgrading is false */ -static inline struct rw_semaphore * -__rwsem_do_wake(struct rw_semaphore *sem, int downgrading) +static struct rw_semaphore * +__rwsem_do_wake(struct rw_semaphore *sem, int wake_type) { struct rwsem_waiter *waiter; struct task_struct *tsk; struct list_head *next; - signed long oldcount, woken, loop; - - if (downgrading) - goto dont_wake_writers; - - /* if we came through an up_xxxx() call, we only only wake someone up - * if we can transition the active part of the count from 0 -> 1 - */ - try_again: - oldcount = rwsem_atomic_update(RWSEM_ACTIVE_BIAS, sem) - - RWSEM_ACTIVE_BIAS; - if (oldcount & RWSEM_ACTIVE_MASK) - goto undo; + signed long oldcount, woken, loop, adjustment; waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - - /* try to grant a single write lock if there's a writer at the front - * of the queue - note we leave the 'active part' of the count - * incremented by 1 and the waiting part incremented by 0x00010000 - */ if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) goto readers_only; + if (wake_type == RWSEM_WAKE_READ_OWNED) + /* Another active reader was observed, so wakeup is not + * likely to succeed. Save the atomic op. + */ + goto out; + + /* There's a writer at the front of the queue - try to grant it the + * write lock. However, we only wake this writer if we can transition + * the active part of the count from 0 -> 1 + */ + adjustment = RWSEM_ACTIVE_WRITE_BIAS; + if (waiter->list.next == &sem->wait_list) + adjustment -= RWSEM_WAITING_BIAS; + + try_again_write: + oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; + if (oldcount & RWSEM_ACTIVE_MASK) + /* Someone grabbed the sem already */ + goto undo_write; + /* We must be careful not to touch 'waiter' after we set ->task = NULL. * It is an allocated on the waiter's stack and may become invalid at * any time after that point (due to a wakeup from another source). @@ -87,18 +98,30 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading) put_task_struct(tsk); goto out; - /* don't want to wake any writers */ - dont_wake_writers: - waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - if (waiter->flags & RWSEM_WAITING_FOR_WRITE) + readers_only: + /* If we come here from up_xxxx(), another thread might have reached + * rwsem_down_failed_common() before we acquired the spinlock and + * woken up a waiter, making it now active. We prefer to check for + * this first in order to not spend too much time with the spinlock + * held if we're not going to be able to wake up readers in the end. + * + * Note that we do not need to update the rwsem count: any writer + * trying to acquire rwsem will run rwsem_down_write_failed() due + * to the waiting threads and block trying to acquire the spinlock. + * + * We use a dummy atomic update in order to acquire the cache line + * exclusively since we expect to succeed and run the final rwsem + * count adjustment pretty soon. + */ + if (wake_type == RWSEM_WAKE_ANY && + rwsem_atomic_update(0, sem) < RWSEM_WAITING_BIAS) + /* Someone grabbed the sem for write already */ goto out; - /* grant an infinite number of read locks to the readers at the front - * of the queue - * - note we increment the 'active part' of the count by the number of - * readers before waking any processes up + /* Grant an infinite number of read locks to the readers at the front + * of the queue. Note we increment the 'active part' of the count by + * the number of readers before waking any processes up. */ - readers_only: woken = 0; do { woken++; @@ -111,16 +134,15 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading) } while (waiter->flags & RWSEM_WAITING_FOR_READ); - loop = woken; - woken *= RWSEM_ACTIVE_BIAS - RWSEM_WAITING_BIAS; - if (!downgrading) - /* we'd already done one increment earlier */ - woken -= RWSEM_ACTIVE_BIAS; + adjustment = woken * RWSEM_ACTIVE_READ_BIAS; + if (waiter->flags & RWSEM_WAITING_FOR_READ) + /* hit end of list above */ + adjustment -= RWSEM_WAITING_BIAS; - rwsem_atomic_add(woken, sem); + rwsem_atomic_add(adjustment, sem); next = sem->wait_list.next; - for (; loop > 0; loop--) { + for (loop = woken; loop > 0; loop--) { waiter = list_entry(next, struct rwsem_waiter, list); next = waiter->list.next; tsk = waiter->task; @@ -138,10 +160,10 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading) /* undo the change to the active count, but check for a transition * 1->0 */ - undo: - if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS, sem) & RWSEM_ACTIVE_MASK) + undo_write: + if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK) goto out; - goto try_again; + goto try_again_write; } /* @@ -149,8 +171,9 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading) */ static struct rw_semaphore __sched * rwsem_down_failed_common(struct rw_semaphore *sem, - struct rwsem_waiter *waiter, signed long adjustment) + unsigned int flags, signed long adjustment) { + struct rwsem_waiter waiter; struct task_struct *tsk = current; signed long count; @@ -158,23 +181,34 @@ rwsem_down_failed_common(struct rw_semaphore *sem, /* set up my own style of waitqueue */ spin_lock_irq(&sem->wait_lock); - waiter->task = tsk; + waiter.task = tsk; + waiter.flags = flags; get_task_struct(tsk); - list_add_tail(&waiter->list, &sem->wait_list); + if (list_empty(&sem->wait_list)) + adjustment += RWSEM_WAITING_BIAS; + list_add_tail(&waiter.list, &sem->wait_list); - /* we're now waiting on the lock, but no longer actively read-locking */ + /* we're now waiting on the lock, but no longer actively locking */ count = rwsem_atomic_update(adjustment, sem); - /* if there are no active locks, wake the front queued process(es) up */ - if (!(count & RWSEM_ACTIVE_MASK)) - sem = __rwsem_do_wake(sem, 0); + /* If there are no active locks, wake the front queued process(es) up. + * + * Alternatively, if we're called from a failed down_write(), there + * were already threads queued before us and there are no active + * writers, the lock must be read owned; so we try to wake any read + * locks that were queued ahead of us. */ + if (count == RWSEM_WAITING_BIAS) + sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE); + else if (count > RWSEM_WAITING_BIAS && + adjustment == -RWSEM_ACTIVE_WRITE_BIAS) + sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); spin_unlock_irq(&sem->wait_lock); /* wait to be given the lock */ for (;;) { - if (!waiter->task) + if (!waiter.task) break; schedule(); set_task_state(tsk, TASK_UNINTERRUPTIBLE); @@ -191,12 +225,8 @@ rwsem_down_failed_common(struct rw_semaphore *sem, asmregparm struct rw_semaphore __sched * rwsem_down_read_failed(struct rw_semaphore *sem) { - struct rwsem_waiter waiter; - - waiter.flags = RWSEM_WAITING_FOR_READ; - rwsem_down_failed_common(sem, &waiter, - RWSEM_WAITING_BIAS - RWSEM_ACTIVE_BIAS); - return sem; + return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ, + -RWSEM_ACTIVE_READ_BIAS); } /* @@ -205,12 +235,8 @@ rwsem_down_read_failed(struct rw_semaphore *sem) asmregparm struct rw_semaphore __sched * rwsem_down_write_failed(struct rw_semaphore *sem) { - struct rwsem_waiter waiter; - - waiter.flags = RWSEM_WAITING_FOR_WRITE; - rwsem_down_failed_common(sem, &waiter, -RWSEM_ACTIVE_BIAS); - - return sem; + return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE, + -RWSEM_ACTIVE_WRITE_BIAS); } /* @@ -225,7 +251,7 @@ asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) /* do nothing if list empty */ if (!list_empty(&sem->wait_list)) - sem = __rwsem_do_wake(sem, 0); + sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); spin_unlock_irqrestore(&sem->wait_lock, flags); @@ -245,7 +271,7 @@ asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) /* do nothing if list empty */ if (!list_empty(&sem->wait_list)) - sem = __rwsem_do_wake(sem, 1); + sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); spin_unlock_irqrestore(&sem->wait_lock, flags); diff --git a/lib/swiotlb.c b/lib/swiotlb.c index a009055140e..34e3082632d 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -50,19 +50,11 @@ */ #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) -/* - * Enumeration for sync targets - */ -enum dma_sync_target { - SYNC_FOR_CPU = 0, - SYNC_FOR_DEVICE = 1, -}; - int swiotlb_force; /* - * Used to do a quick range check in unmap_single and - * sync_single_*, to see if the memory was in fact allocated by this + * Used to do a quick range check in swiotlb_tbl_unmap_single and + * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this * API. */ static char *io_tlb_start, *io_tlb_end; @@ -140,28 +132,14 @@ void swiotlb_print_info(void) (unsigned long long)pend); } -/* - * Statically reserve bounce buffer space and initialize bounce buffer data - * structures for the software IO TLB used to implement the DMA API. - */ -void __init -swiotlb_init_with_default_size(size_t default_size, int verbose) +void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) { unsigned long i, bytes; - if (!io_tlb_nslabs) { - io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); - } - - bytes = io_tlb_nslabs << IO_TLB_SHIFT; + bytes = nslabs << IO_TLB_SHIFT; - /* - * Get IO TLB memory from the low pages - */ - io_tlb_start = alloc_bootmem_low_pages(bytes); - if (!io_tlb_start) - panic("Cannot allocate SWIOTLB buffer"); + io_tlb_nslabs = nslabs; + io_tlb_start = tlb; io_tlb_end = io_tlb_start + bytes; /* @@ -185,6 +163,32 @@ swiotlb_init_with_default_size(size_t default_size, int verbose) swiotlb_print_info(); } +/* + * Statically reserve bounce buffer space and initialize bounce buffer data + * structures for the software IO TLB used to implement the DMA API. + */ +void __init +swiotlb_init_with_default_size(size_t default_size, int verbose) +{ + unsigned long bytes; + + if (!io_tlb_nslabs) { + io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); + io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); + } + + bytes = io_tlb_nslabs << IO_TLB_SHIFT; + + /* + * Get IO TLB memory from the low pages + */ + io_tlb_start = alloc_bootmem_low_pages(bytes); + if (!io_tlb_start) + panic("Cannot allocate SWIOTLB buffer"); + + swiotlb_init_with_tbl(io_tlb_start, io_tlb_nslabs, verbose); +} + void __init swiotlb_init(int verbose) { @@ -323,8 +327,8 @@ static int is_swiotlb_buffer(phys_addr_t paddr) /* * Bounce: copy the swiotlb buffer back to the original dma location */ -static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, - enum dma_data_direction dir) +void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, + enum dma_data_direction dir) { unsigned long pfn = PFN_DOWN(phys); @@ -360,26 +364,25 @@ static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, memcpy(phys_to_virt(phys), dma_addr, size); } } +EXPORT_SYMBOL_GPL(swiotlb_bounce); -/* - * Allocates bounce buffer and returns its kernel virtual address. - */ -static void * -map_single(struct device *hwdev, phys_addr_t phys, size_t size, int dir) +void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr, + phys_addr_t phys, size_t size, + enum dma_data_direction dir) { unsigned long flags; char *dma_addr; unsigned int nslots, stride, index, wrap; int i; - unsigned long start_dma_addr; unsigned long mask; unsigned long offset_slots; unsigned long max_slots; mask = dma_get_seg_boundary(hwdev); - start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start) & mask; - offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; + tbl_dma_addr &= mask; + + offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; /* * Carefully handle integer overflow which can occur when mask == ~0UL. @@ -466,12 +469,27 @@ found: return dma_addr; } +EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single); + +/* + * Allocates bounce buffer and returns its kernel virtual address. + */ + +static void * +map_single(struct device *hwdev, phys_addr_t phys, size_t size, + enum dma_data_direction dir) +{ + dma_addr_t start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start); + + return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir); +} /* * dma_addr is the kernel virtual address of the bounce buffer to unmap. */ -static void -do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) +void +swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size, + enum dma_data_direction dir) { unsigned long flags; int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; @@ -509,10 +527,12 @@ do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) } spin_unlock_irqrestore(&io_tlb_lock, flags); } +EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single); -static void -sync_single(struct device *hwdev, char *dma_addr, size_t size, - int dir, int target) +void +swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, size_t size, + enum dma_data_direction dir, + enum dma_sync_target target) { int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; phys_addr_t phys = io_tlb_orig_addr[index]; @@ -536,6 +556,7 @@ sync_single(struct device *hwdev, char *dma_addr, size_t size, BUG(); } } +EXPORT_SYMBOL_GPL(swiotlb_tbl_sync_single); void * swiotlb_alloc_coherent(struct device *hwdev, size_t size, @@ -559,8 +580,8 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, } if (!ret) { /* - * We are either out of memory or the device can't DMA - * to GFP_DMA memory; fall back on map_single(), which + * We are either out of memory or the device can't DMA to + * GFP_DMA memory; fall back on map_single(), which * will grab memory from the lowest available address range. */ ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE); @@ -578,7 +599,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, (unsigned long long)dev_addr); /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ - do_unmap_single(hwdev, ret, size, DMA_TO_DEVICE); + swiotlb_tbl_unmap_single(hwdev, ret, size, DMA_TO_DEVICE); return NULL; } *dma_handle = dev_addr; @@ -596,13 +617,14 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, if (!is_swiotlb_buffer(paddr)) free_pages((unsigned long)vaddr, get_order(size)); else - /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ - do_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE); + /* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */ + swiotlb_tbl_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE); } EXPORT_SYMBOL(swiotlb_free_coherent); static void -swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) +swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir, + int do_panic) { /* * Ran out of IOMMU space for this operation. This is very bad. @@ -680,14 +702,14 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page); * whatever the device wrote there. */ static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir) + size_t size, enum dma_data_direction dir) { phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); BUG_ON(dir == DMA_NONE); if (is_swiotlb_buffer(paddr)) { - do_unmap_single(hwdev, phys_to_virt(paddr), size, dir); + swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir); return; } @@ -723,14 +745,16 @@ EXPORT_SYMBOL_GPL(swiotlb_unmap_page); */ static void swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir, int target) + size_t size, enum dma_data_direction dir, + enum dma_sync_target target) { phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); BUG_ON(dir == DMA_NONE); if (is_swiotlb_buffer(paddr)) { - sync_single(hwdev, phys_to_virt(paddr), size, dir, target); + swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir, + target); return; } @@ -809,7 +833,7 @@ EXPORT_SYMBOL(swiotlb_map_sg_attrs); int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, - int dir) + enum dma_data_direction dir) { return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL); } @@ -836,7 +860,7 @@ EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, - int dir) + enum dma_data_direction dir) { return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL); } @@ -851,7 +875,8 @@ EXPORT_SYMBOL(swiotlb_unmap_sg); */ static void swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, - int nelems, int dir, int target) + int nelems, enum dma_data_direction dir, + enum dma_sync_target target) { struct scatterlist *sg; int i; diff --git a/lib/vsprintf.c b/lib/vsprintf.c index b8a2f549ab0..7af9d841c43 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -146,19 +146,16 @@ int strict_strtoul(const char *cp, unsigned int base, unsigned long *res) { char *tail; unsigned long val; - size_t len; *res = 0; - len = strlen(cp); - if (len == 0) + if (!*cp) return -EINVAL; val = simple_strtoul(cp, &tail, base); if (tail == cp) return -EINVAL; - if ((*tail == '\0') || - ((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) { + if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) { *res = val; return 0; } @@ -220,18 +217,15 @@ int strict_strtoull(const char *cp, unsigned int base, unsigned long long *res) { char *tail; unsigned long long val; - size_t len; *res = 0; - len = strlen(cp); - if (len == 0) + if (!*cp) return -EINVAL; val = simple_strtoull(cp, &tail, base); if (tail == cp) return -EINVAL; - if ((*tail == '\0') || - ((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) { + if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) { *res = val; return 0; } @@ -980,6 +974,11 @@ char *uuid_string(char *buf, char *end, const u8 *addr, * [0][1][2][3]-[4][5]-[6][7]-[8][9]-[10][11][12][13][14][15] * little endian output byte order is: * [3][2][1][0]-[5][4]-[7][6]-[8][9]-[10][11][12][13][14][15] + * - 'V' For a struct va_format which contains a format string * and va_list *, + * call vsnprintf(->format, *->va_list). + * Implements a "recursive vsnprintf". + * Do not use this feature without some mechanism to verify the + * correctness of the format string and va_list arguments. * * Note: The difference between 'S' and 'F' is that on ia64 and ppc64 * function pointers are really function descriptors, which contain a @@ -1025,6 +1024,10 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, break; case 'U': return uuid_string(buf, end, ptr, spec, fmt); + case 'V': + return buf + vsnprintf(buf, end - buf, + ((struct va_format *)ptr)->fmt, + *(((struct va_format *)ptr)->va)); } spec.flags |= SMALL; if (spec.field_width == -1) { |