diff options
Diffstat (limited to 'lib')
74 files changed, 6251 insertions, 2009 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index b3c8be0da17..334f7722a99 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -51,13 +51,6 @@ config PERCPU_RWSEM config ARCH_USE_CMPXCHG_LOCKREF bool -config CMPXCHG_LOCKREF - def_bool y if ARCH_USE_CMPXCHG_LOCKREF - depends on SMP - depends on !GENERIC_LOCKBREAK - depends on !DEBUG_SPINLOCK - depends on !DEBUG_LOCK_ALLOC - config CRC_CCITT tristate "CRC-CCITT functions" help @@ -189,6 +182,22 @@ config AUDIT_GENERIC depends on AUDIT && !AUDIT_ARCH default y +config AUDIT_ARCH_COMPAT_GENERIC + bool + default n + +config AUDIT_COMPAT_GENERIC + bool + depends on AUDIT_GENERIC && AUDIT_ARCH_COMPAT_GENERIC && COMPAT + default y + +config RANDOM32_SELFTEST + bool "PRNG perform self test on init" + default n + help + This option enables the 32 bit PRNG library functions to perform a + self test on initialization. + # # compression support is select'ed if needed # @@ -322,15 +331,43 @@ config TEXTSEARCH_FSM config BTREE boolean +config INTERVAL_TREE + boolean + help + Simple, embeddable, interval-tree. Can find the start of an + overlapping range in log(n) time and then iterate over all + overlapping nodes. The algorithm is implemented as an + augmented rbtree. + + See: + + Documentation/rbtree.txt + + for more information. + +config ASSOCIATIVE_ARRAY + bool + help + Generic associative array. Can be searched and iterated over whilst + it is being modified. It is also reasonably quick to search and + modify. The algorithms are non-recursive, and the trees are highly + capacious. + + See: + + Documentation/assoc_array.txt + + for more information. + config HAS_IOMEM boolean depends on !NO_IOMEM select GENERIC_IO default y -config HAS_IOPORT +config HAS_IOPORT_MAP boolean - depends on HAS_IOMEM && !NO_IOPORT + depends on HAS_IOMEM && !NO_IOPORT_MAP default y config HAS_DMA diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 06344d986eb..7a638aa3545 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -119,7 +119,7 @@ menu "Compile-time checks and compiler options" config DEBUG_INFO bool "Compile the kernel with debug info" - depends on DEBUG_KERNEL + depends on DEBUG_KERNEL && !COMPILE_TEST help If you say Y here the resulting kernel image will include debugging info resulting in a larger kernel image. @@ -312,6 +312,15 @@ config MAGIC_SYSRQ keys are documented in <file:Documentation/sysrq.txt>. Don't say Y unless you really know what this hack does. +config MAGIC_SYSRQ_DEFAULT_ENABLE + hex "Enable magic SysRq key functions by default" + depends on MAGIC_SYSRQ + default 0x1 + help + Specifies which SysRq key functions are enabled by default. + This may be set to 1 or 0 to enable or disable them all, or + to a bitmask as described in Documentation/sysrq.txt. + config DEBUG_KERNEL bool "Kernel debugging" help @@ -492,12 +501,21 @@ config DEBUG_VM If unsure, say N. +config DEBUG_VM_VMACACHE + bool "Debug VMA caching" + depends on DEBUG_VM + help + Enable this to turn on VMA caching debug information. Doing so + can cause significant overhead, so only enable it in non-production + environments. + + If unsure, say N. + config DEBUG_VM_RB bool "Debug VM red-black trees" depends on DEBUG_VM help - Enable this to turn on more extended checks in the virtual-memory - system that may impact performance. + Enable VM red-black tree debugging information and extra validations. If unsure, say N. @@ -567,8 +585,8 @@ config DEBUG_HIGHMEM bool "Highmem debugging" depends on DEBUG_KERNEL && HIGHMEM help - This options enables addition error checking for high memory systems. - Disable for production systems. + This option enables additional error checking for high memory + systems. Disable for production systems. config HAVE_DEBUG_STACKOVERFLOW bool @@ -752,6 +770,15 @@ config PANIC_ON_OOPS_VALUE default 0 if !PANIC_ON_OOPS default 1 if PANIC_ON_OOPS +config PANIC_TIMEOUT + int "panic timeout" + default 0 + help + Set the timeout value (in seconds) until a reboot occurs when the + the kernel panics. If n = 0, then we wait forever. A timeout + value n > 0 will wait n seconds before rebooting, while a timeout + value n < 0 will reboot immediately. + config SCHED_DEBUG bool "Collect scheduler debugging info" depends on DEBUG_KERNEL && PROC_FS @@ -806,11 +833,6 @@ config DEBUG_RT_MUTEXES This allows rt mutex semantics violations and rt mutex related deadlocks (lockups) to be detected and reported automatically. -config DEBUG_PI_LIST - bool - default y - depends on DEBUG_RT_MUTEXES - config RT_MUTEX_TESTER bool "Built-in scriptable tester for rt-mutexes" depends on DEBUG_KERNEL && RT_MUTEXES @@ -908,7 +930,7 @@ config LOCKDEP bool depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT select STACKTRACE - select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC + select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC && !SCORE select KALLSYMS select KALLSYMS_ALL @@ -962,6 +984,21 @@ config DEBUG_LOCKING_API_SELFTESTS The following locking APIs are covered: spinlocks, rwlocks, mutexes and rwsems. +config LOCK_TORTURE_TEST + tristate "torture tests for locking" + depends on DEBUG_KERNEL + select TORTURE_TEST + default n + help + This option provides a kernel module that runs torture tests + on kernel locking primitives. The kernel module may be built + after the fact on the running kernel to be tested, if desired. + + Say Y here if you want kernel locking-primitive torture tests + to be built into the kernel. + Say M if you want these torture tests to build as a module. + Say N if you are unsure. + endmenu # lock debugging config TRACE_IRQFLAGS @@ -983,7 +1020,7 @@ config DEBUG_KOBJECT config DEBUG_KOBJECT_RELEASE bool "kobject release debugging" - depends on DEBUG_KERNEL + depends on DEBUG_OBJECTS_TIMERS help kobjects are reference counted objects. This means that their last reference count put is not predictable, and the kobject can @@ -1012,22 +1049,22 @@ config DEBUG_BUGVERBOSE of the BUG call as well as the EIP and oops trace. This aids debugging but costs about 70-100K of memory. -config DEBUG_WRITECOUNT - bool "Debug filesystem writers count" +config DEBUG_LIST + bool "Debug linked list manipulation" depends on DEBUG_KERNEL help - Enable this to catch wrong use of the writers count in struct - vfsmount. This will increase the size of each file struct by - 32 bits. + Enable this to turn on extended checks in the linked-list + walking routines. If unsure, say N. -config DEBUG_LIST - bool "Debug linked list manipulation" +config DEBUG_PI_LIST + bool "Debug priority linked list manipulation" depends on DEBUG_KERNEL help - Enable this to turn on extended checks in the linked-list - walking routines. + Enable this to turn on extended checks in the priority-ordered + linked-list (plist) walking routines. This checks the entire + list multiple times during each manipulation. If unsure, say N. @@ -1123,9 +1160,14 @@ config SPARSE_RCU_POINTER Say N if you are unsure. +config TORTURE_TEST + tristate + default n + config RCU_TORTURE_TEST tristate "torture tests for RCU" depends on DEBUG_KERNEL + select TORTURE_TEST default n help This option provides a kernel module that runs torture tests @@ -1366,7 +1408,7 @@ config FAULT_INJECTION_STACKTRACE_FILTER depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT depends on !X86_64 select STACKTRACE - select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC + select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC && !SCORE help Provide stacktrace filter for fault-injection capabilities @@ -1469,9 +1511,19 @@ config RBTREE_TEST config INTERVAL_TREE_TEST tristate "Interval tree test" depends on m && DEBUG_KERNEL + select INTERVAL_TREE help A benchmark measuring the performance of the interval tree library +config PERCPU_TEST + tristate "Per cpu operations test" + depends on m && DEBUG_KERNEL + help + Enable this option to build test module which validates per-cpu + operations. + + If unsure, say N. + config ATOMIC64_SELFTEST bool "Perform an atomic64_t self-test at boot" help @@ -1529,17 +1581,6 @@ config PROVIDE_OHCI1394_DMA_INIT See Documentation/debugging-via-ohci1394.txt for more information. -config FIREWIRE_OHCI_REMOTE_DMA - bool "Remote debugging over FireWire with firewire-ohci" - depends on FIREWIRE_OHCI - help - This option lets you use the FireWire bus for remote debugging - with help of the firewire-ohci driver. It enables unfiltered - remote DMA in firewire-ohci. - See Documentation/debugging-via-ohci1394.txt for more information. - - If unsure, say N. - config BUILD_DOCSRC bool "Build targets in Documentation/ tree" depends on HEADERS_CHECK @@ -1557,8 +1598,56 @@ config DMA_API_DEBUG With this option you will be able to detect common bugs in device drivers like double-freeing of DMA mappings or freeing mappings that were never allocated. - This option causes a performance degredation. Use only if you want - to debug device drivers. If unsure, say N. + + This also attempts to catch cases where a page owned by DMA is + accessed by the cpu in a way that could cause data corruption. For + example, this enables cow_user_page() to check that the source page is + not undergoing DMA. + + This option causes a performance degradation. Use only if you want to + debug device drivers and dma interactions. + + If unsure, say N. + +config TEST_MODULE + tristate "Test module loading with 'hello world' module" + default n + depends on m + help + This builds the "test_module" module that emits "Hello, world" + on printk when loaded. It is designed to be used for basic + evaluation of the module loading subsystem (for example when + validating module verification). It lacks any extra dependencies, + and will not normally be loaded by the system unless explicitly + requested by name. + + If unsure, say N. + +config TEST_USER_COPY + tristate "Test user/kernel boundary protections" + default n + depends on m + help + This builds the "test_user_copy" module that runs sanity checks + on the copy_to/from_user infrastructure, making sure basic + user/kernel boundary testing is working. If it fails to load, + a regression has been detected in the user/kernel memory boundary + protections. + + If unsure, say N. + +config TEST_BPF + tristate "Test BPF filter functionality" + default n + depends on m && NET + help + This builds the "test_bpf" module that runs various test vectors + against the BPF interpreter or BPF JIT compiler depending on the + current setting. This is in particular useful for BPF JIT compiler + development, but also to run regression tests against changes in + the interpreter code. + + If unsure, say N. source "samples/Kconfig" diff --git a/lib/Makefile b/lib/Makefile index f3bb2cb98ad..ba967a19edb 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -13,7 +13,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ - earlycpio.o percpu-refcount.o percpu_ida.o + earlycpio.o obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o lib-$(CONFIG_MMU) += ioremap.o @@ -26,11 +26,14 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \ bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \ - percpu_ida.o + percpu-refcount.o percpu_ida.o hash.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += kstrtox.o obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o +obj-$(CONFIG_TEST_MODULE) += test_module.o +obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o +obj-$(CONFIG_TEST_BPF) += test_bpf.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG @@ -42,15 +45,14 @@ obj-$(CONFIG_GENERIC_PCI_IOMAP) += pci_iomap.o obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o -obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o -lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o -lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o -lib-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o +GCOV_PROFILE_hweight.o := n CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o obj-$(CONFIG_BTREE) += btree.o +obj-$(CONFIG_INTERVAL_TREE) += interval_tree.o +obj-$(CONFIG_ASSOCIATIVE_ARRAY) += assoc_array.o obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o obj-$(CONFIG_DEBUG_LIST) += list_debug.o obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o @@ -96,6 +98,7 @@ obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o obj-$(CONFIG_SMP) += percpu_counter.o obj-$(CONFIG_AUDIT_GENERIC) += audit.o +obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o @@ -147,7 +150,8 @@ obj-$(CONFIG_GENERIC_NET_UTILS) += net_utils.o obj-$(CONFIG_STMP_DEVICE) += stmp_device.o -libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o +libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \ + fdt_empty_tree.o $(foreach file, $(libfdt_files), \ $(eval CFLAGS_$(file) = -I$(src)/../scripts/dtc/libfdt)) lib-$(CONFIG_LIBFDT) += $(libfdt_files) @@ -155,7 +159,7 @@ lib-$(CONFIG_LIBFDT) += $(libfdt_files) obj-$(CONFIG_RBTREE_TEST) += rbtree_test.o obj-$(CONFIG_INTERVAL_TREE_TEST) += interval_tree_test.o -interval_tree_test-objs := interval_tree_test_main.o interval_tree.o +obj-$(CONFIG_PERCPU_TEST) += percpu_test.o obj-$(CONFIG_ASN1) += asn1_decoder.o diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c index 11b9b01fda6..1a000bb050f 100644 --- a/lib/asn1_decoder.c +++ b/lib/asn1_decoder.c @@ -140,7 +140,7 @@ error: * @decoder: The decoder definition (produced by asn1_compiler) * @context: The caller's context (to be passed to the action functions) * @data: The encoded data - * @datasize: The size of the encoded data + * @datalen: The size of the encoded data * * Decode BER/DER/CER encoded ASN.1 data according to a bytecode pattern * produced by asn1_compiler. Action functions are called on marked tags to diff --git a/lib/assoc_array.c b/lib/assoc_array.c new file mode 100644 index 00000000000..c0b1007011e --- /dev/null +++ b/lib/assoc_array.c @@ -0,0 +1,1746 @@ +/* Generic associative array implementation. + * + * See Documentation/assoc_array.txt for information. + * + * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ +//#define DEBUG +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/assoc_array_priv.h> + +/* + * Iterate over an associative array. The caller must hold the RCU read lock + * or better. + */ +static int assoc_array_subtree_iterate(const struct assoc_array_ptr *root, + const struct assoc_array_ptr *stop, + int (*iterator)(const void *leaf, + void *iterator_data), + void *iterator_data) +{ + const struct assoc_array_shortcut *shortcut; + const struct assoc_array_node *node; + const struct assoc_array_ptr *cursor, *ptr, *parent; + unsigned long has_meta; + int slot, ret; + + cursor = root; + +begin_node: + if (assoc_array_ptr_is_shortcut(cursor)) { + /* Descend through a shortcut */ + shortcut = assoc_array_ptr_to_shortcut(cursor); + smp_read_barrier_depends(); + cursor = ACCESS_ONCE(shortcut->next_node); + } + + node = assoc_array_ptr_to_node(cursor); + smp_read_barrier_depends(); + slot = 0; + + /* We perform two passes of each node. + * + * The first pass does all the leaves in this node. This means we + * don't miss any leaves if the node is split up by insertion whilst + * we're iterating over the branches rooted here (we may, however, see + * some leaves twice). + */ + has_meta = 0; + for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + ptr = ACCESS_ONCE(node->slots[slot]); + has_meta |= (unsigned long)ptr; + if (ptr && assoc_array_ptr_is_leaf(ptr)) { + /* We need a barrier between the read of the pointer + * and dereferencing the pointer - but only if we are + * actually going to dereference it. + */ + smp_read_barrier_depends(); + + /* Invoke the callback */ + ret = iterator(assoc_array_ptr_to_leaf(ptr), + iterator_data); + if (ret) + return ret; + } + } + + /* The second pass attends to all the metadata pointers. If we follow + * one of these we may find that we don't come back here, but rather go + * back to a replacement node with the leaves in a different layout. + * + * We are guaranteed to make progress, however, as the slot number for + * a particular portion of the key space cannot change - and we + * continue at the back pointer + 1. + */ + if (!(has_meta & ASSOC_ARRAY_PTR_META_TYPE)) + goto finished_node; + slot = 0; + +continue_node: + node = assoc_array_ptr_to_node(cursor); + smp_read_barrier_depends(); + + for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + ptr = ACCESS_ONCE(node->slots[slot]); + if (assoc_array_ptr_is_meta(ptr)) { + cursor = ptr; + goto begin_node; + } + } + +finished_node: + /* Move up to the parent (may need to skip back over a shortcut) */ + parent = ACCESS_ONCE(node->back_pointer); + slot = node->parent_slot; + if (parent == stop) + return 0; + + if (assoc_array_ptr_is_shortcut(parent)) { + shortcut = assoc_array_ptr_to_shortcut(parent); + smp_read_barrier_depends(); + cursor = parent; + parent = ACCESS_ONCE(shortcut->back_pointer); + slot = shortcut->parent_slot; + if (parent == stop) + return 0; + } + + /* Ascend to next slot in parent node */ + cursor = parent; + slot++; + goto continue_node; +} + +/** + * assoc_array_iterate - Pass all objects in the array to a callback + * @array: The array to iterate over. + * @iterator: The callback function. + * @iterator_data: Private data for the callback function. + * + * Iterate over all the objects in an associative array. Each one will be + * presented to the iterator function. + * + * If the array is being modified concurrently with the iteration then it is + * possible that some objects in the array will be passed to the iterator + * callback more than once - though every object should be passed at least + * once. If this is undesirable then the caller must lock against modification + * for the duration of this function. + * + * The function will return 0 if no objects were in the array or else it will + * return the result of the last iterator function called. Iteration stops + * immediately if any call to the iteration function results in a non-zero + * return. + * + * The caller should hold the RCU read lock or better if concurrent + * modification is possible. + */ +int assoc_array_iterate(const struct assoc_array *array, + int (*iterator)(const void *object, + void *iterator_data), + void *iterator_data) +{ + struct assoc_array_ptr *root = ACCESS_ONCE(array->root); + + if (!root) + return 0; + return assoc_array_subtree_iterate(root, NULL, iterator, iterator_data); +} + +enum assoc_array_walk_status { + assoc_array_walk_tree_empty, + assoc_array_walk_found_terminal_node, + assoc_array_walk_found_wrong_shortcut, +}; + +struct assoc_array_walk_result { + struct { + struct assoc_array_node *node; /* Node in which leaf might be found */ + int level; + int slot; + } terminal_node; + struct { + struct assoc_array_shortcut *shortcut; + int level; + int sc_level; + unsigned long sc_segments; + unsigned long dissimilarity; + } wrong_shortcut; +}; + +/* + * Navigate through the internal tree looking for the closest node to the key. + */ +static enum assoc_array_walk_status +assoc_array_walk(const struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key, + struct assoc_array_walk_result *result) +{ + struct assoc_array_shortcut *shortcut; + struct assoc_array_node *node; + struct assoc_array_ptr *cursor, *ptr; + unsigned long sc_segments, dissimilarity; + unsigned long segments; + int level, sc_level, next_sc_level; + int slot; + + pr_devel("-->%s()\n", __func__); + + cursor = ACCESS_ONCE(array->root); + if (!cursor) + return assoc_array_walk_tree_empty; + + level = 0; + + /* Use segments from the key for the new leaf to navigate through the + * internal tree, skipping through nodes and shortcuts that are on + * route to the destination. Eventually we'll come to a slot that is + * either empty or contains a leaf at which point we've found a node in + * which the leaf we're looking for might be found or into which it + * should be inserted. + */ +jumped: + segments = ops->get_key_chunk(index_key, level); + pr_devel("segments[%d]: %lx\n", level, segments); + + if (assoc_array_ptr_is_shortcut(cursor)) + goto follow_shortcut; + +consider_node: + node = assoc_array_ptr_to_node(cursor); + smp_read_barrier_depends(); + + slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK); + slot &= ASSOC_ARRAY_FAN_MASK; + ptr = ACCESS_ONCE(node->slots[slot]); + + pr_devel("consider slot %x [ix=%d type=%lu]\n", + slot, level, (unsigned long)ptr & 3); + + if (!assoc_array_ptr_is_meta(ptr)) { + /* The node doesn't have a node/shortcut pointer in the slot + * corresponding to the index key that we have to follow. + */ + result->terminal_node.node = node; + result->terminal_node.level = level; + result->terminal_node.slot = slot; + pr_devel("<--%s() = terminal_node\n", __func__); + return assoc_array_walk_found_terminal_node; + } + + if (assoc_array_ptr_is_node(ptr)) { + /* There is a pointer to a node in the slot corresponding to + * this index key segment, so we need to follow it. + */ + cursor = ptr; + level += ASSOC_ARRAY_LEVEL_STEP; + if ((level & ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) + goto consider_node; + goto jumped; + } + + /* There is a shortcut in the slot corresponding to the index key + * segment. We follow the shortcut if its partial index key matches + * this leaf's. Otherwise we need to split the shortcut. + */ + cursor = ptr; +follow_shortcut: + shortcut = assoc_array_ptr_to_shortcut(cursor); + smp_read_barrier_depends(); + pr_devel("shortcut to %d\n", shortcut->skip_to_level); + sc_level = level + ASSOC_ARRAY_LEVEL_STEP; + BUG_ON(sc_level > shortcut->skip_to_level); + + do { + /* Check the leaf against the shortcut's index key a word at a + * time, trimming the final word (the shortcut stores the index + * key completely from the root to the shortcut's target). + */ + if ((sc_level & ASSOC_ARRAY_KEY_CHUNK_MASK) == 0) + segments = ops->get_key_chunk(index_key, sc_level); + + sc_segments = shortcut->index_key[sc_level >> ASSOC_ARRAY_KEY_CHUNK_SHIFT]; + dissimilarity = segments ^ sc_segments; + + if (round_up(sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE) > shortcut->skip_to_level) { + /* Trim segments that are beyond the shortcut */ + int shift = shortcut->skip_to_level & ASSOC_ARRAY_KEY_CHUNK_MASK; + dissimilarity &= ~(ULONG_MAX << shift); + next_sc_level = shortcut->skip_to_level; + } else { + next_sc_level = sc_level + ASSOC_ARRAY_KEY_CHUNK_SIZE; + next_sc_level = round_down(next_sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); + } + + if (dissimilarity != 0) { + /* This shortcut points elsewhere */ + result->wrong_shortcut.shortcut = shortcut; + result->wrong_shortcut.level = level; + result->wrong_shortcut.sc_level = sc_level; + result->wrong_shortcut.sc_segments = sc_segments; + result->wrong_shortcut.dissimilarity = dissimilarity; + return assoc_array_walk_found_wrong_shortcut; + } + + sc_level = next_sc_level; + } while (sc_level < shortcut->skip_to_level); + + /* The shortcut matches the leaf's index to this point. */ + cursor = ACCESS_ONCE(shortcut->next_node); + if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) { + level = sc_level; + goto jumped; + } else { + level = sc_level; + goto consider_node; + } +} + +/** + * assoc_array_find - Find an object by index key + * @array: The associative array to search. + * @ops: The operations to use. + * @index_key: The key to the object. + * + * Find an object in an associative array by walking through the internal tree + * to the node that should contain the object and then searching the leaves + * there. NULL is returned if the requested object was not found in the array. + * + * The caller must hold the RCU read lock or better. + */ +void *assoc_array_find(const struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key) +{ + struct assoc_array_walk_result result; + const struct assoc_array_node *node; + const struct assoc_array_ptr *ptr; + const void *leaf; + int slot; + + if (assoc_array_walk(array, ops, index_key, &result) != + assoc_array_walk_found_terminal_node) + return NULL; + + node = result.terminal_node.node; + smp_read_barrier_depends(); + + /* If the target key is available to us, it's has to be pointed to by + * the terminal node. + */ + for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + ptr = ACCESS_ONCE(node->slots[slot]); + if (ptr && assoc_array_ptr_is_leaf(ptr)) { + /* We need a barrier between the read of the pointer + * and dereferencing the pointer - but only if we are + * actually going to dereference it. + */ + leaf = assoc_array_ptr_to_leaf(ptr); + smp_read_barrier_depends(); + if (ops->compare_object(leaf, index_key)) + return (void *)leaf; + } + } + + return NULL; +} + +/* + * Destructively iterate over an associative array. The caller must prevent + * other simultaneous accesses. + */ +static void assoc_array_destroy_subtree(struct assoc_array_ptr *root, + const struct assoc_array_ops *ops) +{ + struct assoc_array_shortcut *shortcut; + struct assoc_array_node *node; + struct assoc_array_ptr *cursor, *parent = NULL; + int slot = -1; + + pr_devel("-->%s()\n", __func__); + + cursor = root; + if (!cursor) { + pr_devel("empty\n"); + return; + } + +move_to_meta: + if (assoc_array_ptr_is_shortcut(cursor)) { + /* Descend through a shortcut */ + pr_devel("[%d] shortcut\n", slot); + BUG_ON(!assoc_array_ptr_is_shortcut(cursor)); + shortcut = assoc_array_ptr_to_shortcut(cursor); + BUG_ON(shortcut->back_pointer != parent); + BUG_ON(slot != -1 && shortcut->parent_slot != slot); + parent = cursor; + cursor = shortcut->next_node; + slot = -1; + BUG_ON(!assoc_array_ptr_is_node(cursor)); + } + + pr_devel("[%d] node\n", slot); + node = assoc_array_ptr_to_node(cursor); + BUG_ON(node->back_pointer != parent); + BUG_ON(slot != -1 && node->parent_slot != slot); + slot = 0; + +continue_node: + pr_devel("Node %p [back=%p]\n", node, node->back_pointer); + for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + struct assoc_array_ptr *ptr = node->slots[slot]; + if (!ptr) + continue; + if (assoc_array_ptr_is_meta(ptr)) { + parent = cursor; + cursor = ptr; + goto move_to_meta; + } + + if (ops) { + pr_devel("[%d] free leaf\n", slot); + ops->free_object(assoc_array_ptr_to_leaf(ptr)); + } + } + + parent = node->back_pointer; + slot = node->parent_slot; + pr_devel("free node\n"); + kfree(node); + if (!parent) + return; /* Done */ + + /* Move back up to the parent (may need to free a shortcut on + * the way up) */ + if (assoc_array_ptr_is_shortcut(parent)) { + shortcut = assoc_array_ptr_to_shortcut(parent); + BUG_ON(shortcut->next_node != cursor); + cursor = parent; + parent = shortcut->back_pointer; + slot = shortcut->parent_slot; + pr_devel("free shortcut\n"); + kfree(shortcut); + if (!parent) + return; + + BUG_ON(!assoc_array_ptr_is_node(parent)); + } + + /* Ascend to next slot in parent node */ + pr_devel("ascend to %p[%d]\n", parent, slot); + cursor = parent; + node = assoc_array_ptr_to_node(cursor); + slot++; + goto continue_node; +} + +/** + * assoc_array_destroy - Destroy an associative array + * @array: The array to destroy. + * @ops: The operations to use. + * + * Discard all metadata and free all objects in an associative array. The + * array will be empty and ready to use again upon completion. This function + * cannot fail. + * + * The caller must prevent all other accesses whilst this takes place as no + * attempt is made to adjust pointers gracefully to permit RCU readlock-holding + * accesses to continue. On the other hand, no memory allocation is required. + */ +void assoc_array_destroy(struct assoc_array *array, + const struct assoc_array_ops *ops) +{ + assoc_array_destroy_subtree(array->root, ops); + array->root = NULL; +} + +/* + * Handle insertion into an empty tree. + */ +static bool assoc_array_insert_in_empty_tree(struct assoc_array_edit *edit) +{ + struct assoc_array_node *new_n0; + + pr_devel("-->%s()\n", __func__); + + new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); + if (!new_n0) + return false; + + edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); + edit->leaf_p = &new_n0->slots[0]; + edit->adjust_count_on = new_n0; + edit->set[0].ptr = &edit->array->root; + edit->set[0].to = assoc_array_node_to_ptr(new_n0); + + pr_devel("<--%s() = ok [no root]\n", __func__); + return true; +} + +/* + * Handle insertion into a terminal node. + */ +static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit, + const struct assoc_array_ops *ops, + const void *index_key, + struct assoc_array_walk_result *result) +{ + struct assoc_array_shortcut *shortcut, *new_s0; + struct assoc_array_node *node, *new_n0, *new_n1, *side; + struct assoc_array_ptr *ptr; + unsigned long dissimilarity, base_seg, blank; + size_t keylen; + bool have_meta; + int level, diff; + int slot, next_slot, free_slot, i, j; + + node = result->terminal_node.node; + level = result->terminal_node.level; + edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = result->terminal_node.slot; + + pr_devel("-->%s()\n", __func__); + + /* We arrived at a node which doesn't have an onward node or shortcut + * pointer that we have to follow. This means that (a) the leaf we + * want must go here (either by insertion or replacement) or (b) we + * need to split this node and insert in one of the fragments. + */ + free_slot = -1; + + /* Firstly, we have to check the leaves in this node to see if there's + * a matching one we should replace in place. + */ + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + ptr = node->slots[i]; + if (!ptr) { + free_slot = i; + continue; + } + if (ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) { + pr_devel("replace in slot %d\n", i); + edit->leaf_p = &node->slots[i]; + edit->dead_leaf = node->slots[i]; + pr_devel("<--%s() = ok [replace]\n", __func__); + return true; + } + } + + /* If there is a free slot in this node then we can just insert the + * leaf here. + */ + if (free_slot >= 0) { + pr_devel("insert in free slot %d\n", free_slot); + edit->leaf_p = &node->slots[free_slot]; + edit->adjust_count_on = node; + pr_devel("<--%s() = ok [insert]\n", __func__); + return true; + } + + /* The node has no spare slots - so we're either going to have to split + * it or insert another node before it. + * + * Whatever, we're going to need at least two new nodes - so allocate + * those now. We may also need a new shortcut, but we deal with that + * when we need it. + */ + new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); + if (!new_n0) + return false; + edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); + new_n1 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); + if (!new_n1) + return false; + edit->new_meta[1] = assoc_array_node_to_ptr(new_n1); + + /* We need to find out how similar the leaves are. */ + pr_devel("no spare slots\n"); + have_meta = false; + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + ptr = node->slots[i]; + if (assoc_array_ptr_is_meta(ptr)) { + edit->segment_cache[i] = 0xff; + have_meta = true; + continue; + } + base_seg = ops->get_object_key_chunk( + assoc_array_ptr_to_leaf(ptr), level); + base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; + edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; + } + + if (have_meta) { + pr_devel("have meta\n"); + goto split_node; + } + + /* The node contains only leaves */ + dissimilarity = 0; + base_seg = edit->segment_cache[0]; + for (i = 1; i < ASSOC_ARRAY_FAN_OUT; i++) + dissimilarity |= edit->segment_cache[i] ^ base_seg; + + pr_devel("only leaves; dissimilarity=%lx\n", dissimilarity); + + if ((dissimilarity & ASSOC_ARRAY_FAN_MASK) == 0) { + /* The old leaves all cluster in the same slot. We will need + * to insert a shortcut if the new node wants to cluster with them. + */ + if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0) + goto all_leaves_cluster_together; + + /* Otherwise we can just insert a new node ahead of the old + * one. + */ + goto present_leaves_cluster_but_not_new_leaf; + } + +split_node: + pr_devel("split node\n"); + + /* We need to split the current node; we know that the node doesn't + * simply contain a full set of leaves that cluster together (it + * contains meta pointers and/or non-clustering leaves). + * + * We need to expel at least two leaves out of a set consisting of the + * leaves in the node and the new leaf. + * + * We need a new node (n0) to replace the current one and a new node to + * take the expelled nodes (n1). + */ + edit->set[0].to = assoc_array_node_to_ptr(new_n0); + new_n0->back_pointer = node->back_pointer; + new_n0->parent_slot = node->parent_slot; + new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); + new_n1->parent_slot = -1; /* Need to calculate this */ + +do_split_node: + pr_devel("do_split_node\n"); + + new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; + new_n1->nr_leaves_on_branch = 0; + + /* Begin by finding two matching leaves. There have to be at least two + * that match - even if there are meta pointers - because any leaf that + * would match a slot with a meta pointer in it must be somewhere + * behind that meta pointer and cannot be here. Further, given N + * remaining leaf slots, we now have N+1 leaves to go in them. + */ + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + slot = edit->segment_cache[i]; + if (slot != 0xff) + for (j = i + 1; j < ASSOC_ARRAY_FAN_OUT + 1; j++) + if (edit->segment_cache[j] == slot) + goto found_slot_for_multiple_occupancy; + } +found_slot_for_multiple_occupancy: + pr_devel("same slot: %x %x [%02x]\n", i, j, slot); + BUG_ON(i >= ASSOC_ARRAY_FAN_OUT); + BUG_ON(j >= ASSOC_ARRAY_FAN_OUT + 1); + BUG_ON(slot >= ASSOC_ARRAY_FAN_OUT); + + new_n1->parent_slot = slot; + + /* Metadata pointers cannot change slot */ + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) + if (assoc_array_ptr_is_meta(node->slots[i])) + new_n0->slots[i] = node->slots[i]; + else + new_n0->slots[i] = NULL; + BUG_ON(new_n0->slots[slot] != NULL); + new_n0->slots[slot] = assoc_array_node_to_ptr(new_n1); + + /* Filter the leaf pointers between the new nodes */ + free_slot = -1; + next_slot = 0; + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + if (assoc_array_ptr_is_meta(node->slots[i])) + continue; + if (edit->segment_cache[i] == slot) { + new_n1->slots[next_slot++] = node->slots[i]; + new_n1->nr_leaves_on_branch++; + } else { + do { + free_slot++; + } while (new_n0->slots[free_slot] != NULL); + new_n0->slots[free_slot] = node->slots[i]; + } + } + + pr_devel("filtered: f=%x n=%x\n", free_slot, next_slot); + + if (edit->segment_cache[ASSOC_ARRAY_FAN_OUT] != slot) { + do { + free_slot++; + } while (new_n0->slots[free_slot] != NULL); + edit->leaf_p = &new_n0->slots[free_slot]; + edit->adjust_count_on = new_n0; + } else { + edit->leaf_p = &new_n1->slots[next_slot++]; + edit->adjust_count_on = new_n1; + } + + BUG_ON(next_slot <= 1); + + edit->set_backpointers_to = assoc_array_node_to_ptr(new_n0); + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + if (edit->segment_cache[i] == 0xff) { + ptr = node->slots[i]; + BUG_ON(assoc_array_ptr_is_leaf(ptr)); + if (assoc_array_ptr_is_node(ptr)) { + side = assoc_array_ptr_to_node(ptr); + edit->set_backpointers[i] = &side->back_pointer; + } else { + shortcut = assoc_array_ptr_to_shortcut(ptr); + edit->set_backpointers[i] = &shortcut->back_pointer; + } + } + } + + ptr = node->back_pointer; + if (!ptr) + edit->set[0].ptr = &edit->array->root; + else if (assoc_array_ptr_is_node(ptr)) + edit->set[0].ptr = &assoc_array_ptr_to_node(ptr)->slots[node->parent_slot]; + else + edit->set[0].ptr = &assoc_array_ptr_to_shortcut(ptr)->next_node; + edit->excised_meta[0] = assoc_array_node_to_ptr(node); + pr_devel("<--%s() = ok [split node]\n", __func__); + return true; + +present_leaves_cluster_but_not_new_leaf: + /* All the old leaves cluster in the same slot, but the new leaf wants + * to go into a different slot, so we create a new node to hold the new + * leaf and a pointer to a new node holding all the old leaves. + */ + pr_devel("present leaves cluster but not new leaf\n"); + + new_n0->back_pointer = node->back_pointer; + new_n0->parent_slot = node->parent_slot; + new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; + new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); + new_n1->parent_slot = edit->segment_cache[0]; + new_n1->nr_leaves_on_branch = node->nr_leaves_on_branch; + edit->adjust_count_on = new_n0; + + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) + new_n1->slots[i] = node->slots[i]; + + new_n0->slots[edit->segment_cache[0]] = assoc_array_node_to_ptr(new_n0); + edit->leaf_p = &new_n0->slots[edit->segment_cache[ASSOC_ARRAY_FAN_OUT]]; + + edit->set[0].ptr = &assoc_array_ptr_to_node(node->back_pointer)->slots[node->parent_slot]; + edit->set[0].to = assoc_array_node_to_ptr(new_n0); + edit->excised_meta[0] = assoc_array_node_to_ptr(node); + pr_devel("<--%s() = ok [insert node before]\n", __func__); + return true; + +all_leaves_cluster_together: + /* All the leaves, new and old, want to cluster together in this node + * in the same slot, so we have to replace this node with a shortcut to + * skip over the identical parts of the key and then place a pair of + * nodes, one inside the other, at the end of the shortcut and + * distribute the keys between them. + * + * Firstly we need to work out where the leaves start diverging as a + * bit position into their keys so that we know how big the shortcut + * needs to be. + * + * We only need to make a single pass of N of the N+1 leaves because if + * any keys differ between themselves at bit X then at least one of + * them must also differ with the base key at bit X or before. + */ + pr_devel("all leaves cluster together\n"); + diff = INT_MAX; + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + int x = ops->diff_objects(assoc_array_ptr_to_leaf(node->slots[i]), + index_key); + if (x < diff) { + BUG_ON(x < 0); + diff = x; + } + } + BUG_ON(diff == INT_MAX); + BUG_ON(diff < level + ASSOC_ARRAY_LEVEL_STEP); + + keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); + keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; + + new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) + + keylen * sizeof(unsigned long), GFP_KERNEL); + if (!new_s0) + return false; + edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0); + + edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); + new_s0->back_pointer = node->back_pointer; + new_s0->parent_slot = node->parent_slot; + new_s0->next_node = assoc_array_node_to_ptr(new_n0); + new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); + new_n0->parent_slot = 0; + new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); + new_n1->parent_slot = -1; /* Need to calculate this */ + + new_s0->skip_to_level = level = diff & ~ASSOC_ARRAY_LEVEL_STEP_MASK; + pr_devel("skip_to_level = %d [diff %d]\n", level, diff); + BUG_ON(level <= 0); + + for (i = 0; i < keylen; i++) + new_s0->index_key[i] = + ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE); + + blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK); + pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank); + new_s0->index_key[keylen - 1] &= ~blank; + + /* This now reduces to a node splitting exercise for which we'll need + * to regenerate the disparity table. + */ + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + ptr = node->slots[i]; + base_seg = ops->get_object_key_chunk(assoc_array_ptr_to_leaf(ptr), + level); + base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; + edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; + } + + base_seg = ops->get_key_chunk(index_key, level); + base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; + edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = base_seg & ASSOC_ARRAY_FAN_MASK; + goto do_split_node; +} + +/* + * Handle insertion into the middle of a shortcut. + */ +static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit, + const struct assoc_array_ops *ops, + struct assoc_array_walk_result *result) +{ + struct assoc_array_shortcut *shortcut, *new_s0, *new_s1; + struct assoc_array_node *node, *new_n0, *side; + unsigned long sc_segments, dissimilarity, blank; + size_t keylen; + int level, sc_level, diff; + int sc_slot; + + shortcut = result->wrong_shortcut.shortcut; + level = result->wrong_shortcut.level; + sc_level = result->wrong_shortcut.sc_level; + sc_segments = result->wrong_shortcut.sc_segments; + dissimilarity = result->wrong_shortcut.dissimilarity; + + pr_devel("-->%s(ix=%d dis=%lx scix=%d)\n", + __func__, level, dissimilarity, sc_level); + + /* We need to split a shortcut and insert a node between the two + * pieces. Zero-length pieces will be dispensed with entirely. + * + * First of all, we need to find out in which level the first + * difference was. + */ + diff = __ffs(dissimilarity); + diff &= ~ASSOC_ARRAY_LEVEL_STEP_MASK; + diff += sc_level & ~ASSOC_ARRAY_KEY_CHUNK_MASK; + pr_devel("diff=%d\n", diff); + + if (!shortcut->back_pointer) { + edit->set[0].ptr = &edit->array->root; + } else if (assoc_array_ptr_is_node(shortcut->back_pointer)) { + node = assoc_array_ptr_to_node(shortcut->back_pointer); + edit->set[0].ptr = &node->slots[shortcut->parent_slot]; + } else { + BUG(); + } + + edit->excised_meta[0] = assoc_array_shortcut_to_ptr(shortcut); + + /* Create a new node now since we're going to need it anyway */ + new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); + if (!new_n0) + return false; + edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); + edit->adjust_count_on = new_n0; + + /* Insert a new shortcut before the new node if this segment isn't of + * zero length - otherwise we just connect the new node directly to the + * parent. + */ + level += ASSOC_ARRAY_LEVEL_STEP; + if (diff > level) { + pr_devel("pre-shortcut %d...%d\n", level, diff); + keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); + keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; + + new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) + + keylen * sizeof(unsigned long), GFP_KERNEL); + if (!new_s0) + return false; + edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0); + edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); + new_s0->back_pointer = shortcut->back_pointer; + new_s0->parent_slot = shortcut->parent_slot; + new_s0->next_node = assoc_array_node_to_ptr(new_n0); + new_s0->skip_to_level = diff; + + new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); + new_n0->parent_slot = 0; + + memcpy(new_s0->index_key, shortcut->index_key, + keylen * sizeof(unsigned long)); + + blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); + pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank); + new_s0->index_key[keylen - 1] &= ~blank; + } else { + pr_devel("no pre-shortcut\n"); + edit->set[0].to = assoc_array_node_to_ptr(new_n0); + new_n0->back_pointer = shortcut->back_pointer; + new_n0->parent_slot = shortcut->parent_slot; + } + + side = assoc_array_ptr_to_node(shortcut->next_node); + new_n0->nr_leaves_on_branch = side->nr_leaves_on_branch; + + /* We need to know which slot in the new node is going to take a + * metadata pointer. + */ + sc_slot = sc_segments >> (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); + sc_slot &= ASSOC_ARRAY_FAN_MASK; + + pr_devel("new slot %lx >> %d -> %d\n", + sc_segments, diff & ASSOC_ARRAY_KEY_CHUNK_MASK, sc_slot); + + /* Determine whether we need to follow the new node with a replacement + * for the current shortcut. We could in theory reuse the current + * shortcut if its parent slot number doesn't change - but that's a + * 1-in-16 chance so not worth expending the code upon. + */ + level = diff + ASSOC_ARRAY_LEVEL_STEP; + if (level < shortcut->skip_to_level) { + pr_devel("post-shortcut %d...%d\n", level, shortcut->skip_to_level); + keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); + keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; + + new_s1 = kzalloc(sizeof(struct assoc_array_shortcut) + + keylen * sizeof(unsigned long), GFP_KERNEL); + if (!new_s1) + return false; + edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1); + + new_s1->back_pointer = assoc_array_node_to_ptr(new_n0); + new_s1->parent_slot = sc_slot; + new_s1->next_node = shortcut->next_node; + new_s1->skip_to_level = shortcut->skip_to_level; + + new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1); + + memcpy(new_s1->index_key, shortcut->index_key, + keylen * sizeof(unsigned long)); + + edit->set[1].ptr = &side->back_pointer; + edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1); + } else { + pr_devel("no post-shortcut\n"); + + /* We don't have to replace the pointed-to node as long as we + * use memory barriers to make sure the parent slot number is + * changed before the back pointer (the parent slot number is + * irrelevant to the old parent shortcut). + */ + new_n0->slots[sc_slot] = shortcut->next_node; + edit->set_parent_slot[0].p = &side->parent_slot; + edit->set_parent_slot[0].to = sc_slot; + edit->set[1].ptr = &side->back_pointer; + edit->set[1].to = assoc_array_node_to_ptr(new_n0); + } + + /* Install the new leaf in a spare slot in the new node. */ + if (sc_slot == 0) + edit->leaf_p = &new_n0->slots[1]; + else + edit->leaf_p = &new_n0->slots[0]; + + pr_devel("<--%s() = ok [split shortcut]\n", __func__); + return edit; +} + +/** + * assoc_array_insert - Script insertion of an object into an associative array + * @array: The array to insert into. + * @ops: The operations to use. + * @index_key: The key to insert at. + * @object: The object to insert. + * + * Precalculate and preallocate a script for the insertion or replacement of an + * object in an associative array. This results in an edit script that can + * either be applied or cancelled. + * + * The function returns a pointer to an edit script or -ENOMEM. + * + * The caller should lock against other modifications and must continue to hold + * the lock until assoc_array_apply_edit() has been called. + * + * Accesses to the tree may take place concurrently with this function, + * provided they hold the RCU read lock. + */ +struct assoc_array_edit *assoc_array_insert(struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key, + void *object) +{ + struct assoc_array_walk_result result; + struct assoc_array_edit *edit; + + pr_devel("-->%s()\n", __func__); + + /* The leaf pointer we're given must not have the bottom bit set as we + * use those for type-marking the pointer. NULL pointers are also not + * allowed as they indicate an empty slot but we have to allow them + * here as they can be updated later. + */ + BUG_ON(assoc_array_ptr_is_meta(object)); + + edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); + if (!edit) + return ERR_PTR(-ENOMEM); + edit->array = array; + edit->ops = ops; + edit->leaf = assoc_array_leaf_to_ptr(object); + edit->adjust_count_by = 1; + + switch (assoc_array_walk(array, ops, index_key, &result)) { + case assoc_array_walk_tree_empty: + /* Allocate a root node if there isn't one yet */ + if (!assoc_array_insert_in_empty_tree(edit)) + goto enomem; + return edit; + + case assoc_array_walk_found_terminal_node: + /* We found a node that doesn't have a node/shortcut pointer in + * the slot corresponding to the index key that we have to + * follow. + */ + if (!assoc_array_insert_into_terminal_node(edit, ops, index_key, + &result)) + goto enomem; + return edit; + + case assoc_array_walk_found_wrong_shortcut: + /* We found a shortcut that didn't match our key in a slot we + * needed to follow. + */ + if (!assoc_array_insert_mid_shortcut(edit, ops, &result)) + goto enomem; + return edit; + } + +enomem: + /* Clean up after an out of memory error */ + pr_devel("enomem\n"); + assoc_array_cancel_edit(edit); + return ERR_PTR(-ENOMEM); +} + +/** + * assoc_array_insert_set_object - Set the new object pointer in an edit script + * @edit: The edit script to modify. + * @object: The object pointer to set. + * + * Change the object to be inserted in an edit script. The object pointed to + * by the old object is not freed. This must be done prior to applying the + * script. + */ +void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object) +{ + BUG_ON(!object); + edit->leaf = assoc_array_leaf_to_ptr(object); +} + +struct assoc_array_delete_collapse_context { + struct assoc_array_node *node; + const void *skip_leaf; + int slot; +}; + +/* + * Subtree collapse to node iterator. + */ +static int assoc_array_delete_collapse_iterator(const void *leaf, + void *iterator_data) +{ + struct assoc_array_delete_collapse_context *collapse = iterator_data; + + if (leaf == collapse->skip_leaf) + return 0; + + BUG_ON(collapse->slot >= ASSOC_ARRAY_FAN_OUT); + + collapse->node->slots[collapse->slot++] = assoc_array_leaf_to_ptr(leaf); + return 0; +} + +/** + * assoc_array_delete - Script deletion of an object from an associative array + * @array: The array to search. + * @ops: The operations to use. + * @index_key: The key to the object. + * + * Precalculate and preallocate a script for the deletion of an object from an + * associative array. This results in an edit script that can either be + * applied or cancelled. + * + * The function returns a pointer to an edit script if the object was found, + * NULL if the object was not found or -ENOMEM. + * + * The caller should lock against other modifications and must continue to hold + * the lock until assoc_array_apply_edit() has been called. + * + * Accesses to the tree may take place concurrently with this function, + * provided they hold the RCU read lock. + */ +struct assoc_array_edit *assoc_array_delete(struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key) +{ + struct assoc_array_delete_collapse_context collapse; + struct assoc_array_walk_result result; + struct assoc_array_node *node, *new_n0; + struct assoc_array_edit *edit; + struct assoc_array_ptr *ptr; + bool has_meta; + int slot, i; + + pr_devel("-->%s()\n", __func__); + + edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); + if (!edit) + return ERR_PTR(-ENOMEM); + edit->array = array; + edit->ops = ops; + edit->adjust_count_by = -1; + + switch (assoc_array_walk(array, ops, index_key, &result)) { + case assoc_array_walk_found_terminal_node: + /* We found a node that should contain the leaf we've been + * asked to remove - *if* it's in the tree. + */ + pr_devel("terminal_node\n"); + node = result.terminal_node.node; + + for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + ptr = node->slots[slot]; + if (ptr && + assoc_array_ptr_is_leaf(ptr) && + ops->compare_object(assoc_array_ptr_to_leaf(ptr), + index_key)) + goto found_leaf; + } + case assoc_array_walk_tree_empty: + case assoc_array_walk_found_wrong_shortcut: + default: + assoc_array_cancel_edit(edit); + pr_devel("not found\n"); + return NULL; + } + +found_leaf: + BUG_ON(array->nr_leaves_on_tree <= 0); + + /* In the simplest form of deletion we just clear the slot and release + * the leaf after a suitable interval. + */ + edit->dead_leaf = node->slots[slot]; + edit->set[0].ptr = &node->slots[slot]; + edit->set[0].to = NULL; + edit->adjust_count_on = node; + + /* If that concludes erasure of the last leaf, then delete the entire + * internal array. + */ + if (array->nr_leaves_on_tree == 1) { + edit->set[1].ptr = &array->root; + edit->set[1].to = NULL; + edit->adjust_count_on = NULL; + edit->excised_subtree = array->root; + pr_devel("all gone\n"); + return edit; + } + + /* However, we'd also like to clear up some metadata blocks if we + * possibly can. + * + * We go for a simple algorithm of: if this node has FAN_OUT or fewer + * leaves in it, then attempt to collapse it - and attempt to + * recursively collapse up the tree. + * + * We could also try and collapse in partially filled subtrees to take + * up space in this node. + */ + if (node->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { + struct assoc_array_node *parent, *grandparent; + struct assoc_array_ptr *ptr; + + /* First of all, we need to know if this node has metadata so + * that we don't try collapsing if all the leaves are already + * here. + */ + has_meta = false; + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + ptr = node->slots[i]; + if (assoc_array_ptr_is_meta(ptr)) { + has_meta = true; + break; + } + } + + pr_devel("leaves: %ld [m=%d]\n", + node->nr_leaves_on_branch - 1, has_meta); + + /* Look further up the tree to see if we can collapse this node + * into a more proximal node too. + */ + parent = node; + collapse_up: + pr_devel("collapse subtree: %ld\n", parent->nr_leaves_on_branch); + + ptr = parent->back_pointer; + if (!ptr) + goto do_collapse; + if (assoc_array_ptr_is_shortcut(ptr)) { + struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(ptr); + ptr = s->back_pointer; + if (!ptr) + goto do_collapse; + } + + grandparent = assoc_array_ptr_to_node(ptr); + if (grandparent->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { + parent = grandparent; + goto collapse_up; + } + + do_collapse: + /* There's no point collapsing if the original node has no meta + * pointers to discard and if we didn't merge into one of that + * node's ancestry. + */ + if (has_meta || parent != node) { + node = parent; + + /* Create a new node to collapse into */ + new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); + if (!new_n0) + goto enomem; + edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); + + new_n0->back_pointer = node->back_pointer; + new_n0->parent_slot = node->parent_slot; + new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; + edit->adjust_count_on = new_n0; + + collapse.node = new_n0; + collapse.skip_leaf = assoc_array_ptr_to_leaf(edit->dead_leaf); + collapse.slot = 0; + assoc_array_subtree_iterate(assoc_array_node_to_ptr(node), + node->back_pointer, + assoc_array_delete_collapse_iterator, + &collapse); + pr_devel("collapsed %d,%lu\n", collapse.slot, new_n0->nr_leaves_on_branch); + BUG_ON(collapse.slot != new_n0->nr_leaves_on_branch - 1); + + if (!node->back_pointer) { + edit->set[1].ptr = &array->root; + } else if (assoc_array_ptr_is_leaf(node->back_pointer)) { + BUG(); + } else if (assoc_array_ptr_is_node(node->back_pointer)) { + struct assoc_array_node *p = + assoc_array_ptr_to_node(node->back_pointer); + edit->set[1].ptr = &p->slots[node->parent_slot]; + } else if (assoc_array_ptr_is_shortcut(node->back_pointer)) { + struct assoc_array_shortcut *s = + assoc_array_ptr_to_shortcut(node->back_pointer); + edit->set[1].ptr = &s->next_node; + } + edit->set[1].to = assoc_array_node_to_ptr(new_n0); + edit->excised_subtree = assoc_array_node_to_ptr(node); + } + } + + return edit; + +enomem: + /* Clean up after an out of memory error */ + pr_devel("enomem\n"); + assoc_array_cancel_edit(edit); + return ERR_PTR(-ENOMEM); +} + +/** + * assoc_array_clear - Script deletion of all objects from an associative array + * @array: The array to clear. + * @ops: The operations to use. + * + * Precalculate and preallocate a script for the deletion of all the objects + * from an associative array. This results in an edit script that can either + * be applied or cancelled. + * + * The function returns a pointer to an edit script if there are objects to be + * deleted, NULL if there are no objects in the array or -ENOMEM. + * + * The caller should lock against other modifications and must continue to hold + * the lock until assoc_array_apply_edit() has been called. + * + * Accesses to the tree may take place concurrently with this function, + * provided they hold the RCU read lock. + */ +struct assoc_array_edit *assoc_array_clear(struct assoc_array *array, + const struct assoc_array_ops *ops) +{ + struct assoc_array_edit *edit; + + pr_devel("-->%s()\n", __func__); + + if (!array->root) + return NULL; + + edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); + if (!edit) + return ERR_PTR(-ENOMEM); + edit->array = array; + edit->ops = ops; + edit->set[1].ptr = &array->root; + edit->set[1].to = NULL; + edit->excised_subtree = array->root; + edit->ops_for_excised_subtree = ops; + pr_devel("all gone\n"); + return edit; +} + +/* + * Handle the deferred destruction after an applied edit. + */ +static void assoc_array_rcu_cleanup(struct rcu_head *head) +{ + struct assoc_array_edit *edit = + container_of(head, struct assoc_array_edit, rcu); + int i; + + pr_devel("-->%s()\n", __func__); + + if (edit->dead_leaf) + edit->ops->free_object(assoc_array_ptr_to_leaf(edit->dead_leaf)); + for (i = 0; i < ARRAY_SIZE(edit->excised_meta); i++) + if (edit->excised_meta[i]) + kfree(assoc_array_ptr_to_node(edit->excised_meta[i])); + + if (edit->excised_subtree) { + BUG_ON(assoc_array_ptr_is_leaf(edit->excised_subtree)); + if (assoc_array_ptr_is_node(edit->excised_subtree)) { + struct assoc_array_node *n = + assoc_array_ptr_to_node(edit->excised_subtree); + n->back_pointer = NULL; + } else { + struct assoc_array_shortcut *s = + assoc_array_ptr_to_shortcut(edit->excised_subtree); + s->back_pointer = NULL; + } + assoc_array_destroy_subtree(edit->excised_subtree, + edit->ops_for_excised_subtree); + } + + kfree(edit); +} + +/** + * assoc_array_apply_edit - Apply an edit script to an associative array + * @edit: The script to apply. + * + * Apply an edit script to an associative array to effect an insertion, + * deletion or clearance. As the edit script includes preallocated memory, + * this is guaranteed not to fail. + * + * The edit script, dead objects and dead metadata will be scheduled for + * destruction after an RCU grace period to permit those doing read-only + * accesses on the array to continue to do so under the RCU read lock whilst + * the edit is taking place. + */ +void assoc_array_apply_edit(struct assoc_array_edit *edit) +{ + struct assoc_array_shortcut *shortcut; + struct assoc_array_node *node; + struct assoc_array_ptr *ptr; + int i; + + pr_devel("-->%s()\n", __func__); + + smp_wmb(); + if (edit->leaf_p) + *edit->leaf_p = edit->leaf; + + smp_wmb(); + for (i = 0; i < ARRAY_SIZE(edit->set_parent_slot); i++) + if (edit->set_parent_slot[i].p) + *edit->set_parent_slot[i].p = edit->set_parent_slot[i].to; + + smp_wmb(); + for (i = 0; i < ARRAY_SIZE(edit->set_backpointers); i++) + if (edit->set_backpointers[i]) + *edit->set_backpointers[i] = edit->set_backpointers_to; + + smp_wmb(); + for (i = 0; i < ARRAY_SIZE(edit->set); i++) + if (edit->set[i].ptr) + *edit->set[i].ptr = edit->set[i].to; + + if (edit->array->root == NULL) { + edit->array->nr_leaves_on_tree = 0; + } else if (edit->adjust_count_on) { + node = edit->adjust_count_on; + for (;;) { + node->nr_leaves_on_branch += edit->adjust_count_by; + + ptr = node->back_pointer; + if (!ptr) + break; + if (assoc_array_ptr_is_shortcut(ptr)) { + shortcut = assoc_array_ptr_to_shortcut(ptr); + ptr = shortcut->back_pointer; + if (!ptr) + break; + } + BUG_ON(!assoc_array_ptr_is_node(ptr)); + node = assoc_array_ptr_to_node(ptr); + } + + edit->array->nr_leaves_on_tree += edit->adjust_count_by; + } + + call_rcu(&edit->rcu, assoc_array_rcu_cleanup); +} + +/** + * assoc_array_cancel_edit - Discard an edit script. + * @edit: The script to discard. + * + * Free an edit script and all the preallocated data it holds without making + * any changes to the associative array it was intended for. + * + * NOTE! In the case of an insertion script, this does _not_ release the leaf + * that was to be inserted. That is left to the caller. + */ +void assoc_array_cancel_edit(struct assoc_array_edit *edit) +{ + struct assoc_array_ptr *ptr; + int i; + + pr_devel("-->%s()\n", __func__); + + /* Clean up after an out of memory error */ + for (i = 0; i < ARRAY_SIZE(edit->new_meta); i++) { + ptr = edit->new_meta[i]; + if (ptr) { + if (assoc_array_ptr_is_node(ptr)) + kfree(assoc_array_ptr_to_node(ptr)); + else + kfree(assoc_array_ptr_to_shortcut(ptr)); + } + } + kfree(edit); +} + +/** + * assoc_array_gc - Garbage collect an associative array. + * @array: The array to clean. + * @ops: The operations to use. + * @iterator: A callback function to pass judgement on each object. + * @iterator_data: Private data for the callback function. + * + * Collect garbage from an associative array and pack down the internal tree to + * save memory. + * + * The iterator function is asked to pass judgement upon each object in the + * array. If it returns false, the object is discard and if it returns true, + * the object is kept. If it returns true, it must increment the object's + * usage count (or whatever it needs to do to retain it) before returning. + * + * This function returns 0 if successful or -ENOMEM if out of memory. In the + * latter case, the array is not changed. + * + * The caller should lock against other modifications and must continue to hold + * the lock until assoc_array_apply_edit() has been called. + * + * Accesses to the tree may take place concurrently with this function, + * provided they hold the RCU read lock. + */ +int assoc_array_gc(struct assoc_array *array, + const struct assoc_array_ops *ops, + bool (*iterator)(void *object, void *iterator_data), + void *iterator_data) +{ + struct assoc_array_shortcut *shortcut, *new_s; + struct assoc_array_node *node, *new_n; + struct assoc_array_edit *edit; + struct assoc_array_ptr *cursor, *ptr; + struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp; + unsigned long nr_leaves_on_tree; + int keylen, slot, nr_free, next_slot, i; + + pr_devel("-->%s()\n", __func__); + + if (!array->root) + return 0; + + edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); + if (!edit) + return -ENOMEM; + edit->array = array; + edit->ops = ops; + edit->ops_for_excised_subtree = ops; + edit->set[0].ptr = &array->root; + edit->excised_subtree = array->root; + + new_root = new_parent = NULL; + new_ptr_pp = &new_root; + cursor = array->root; + +descend: + /* If this point is a shortcut, then we need to duplicate it and + * advance the target cursor. + */ + if (assoc_array_ptr_is_shortcut(cursor)) { + shortcut = assoc_array_ptr_to_shortcut(cursor); + keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); + keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; + new_s = kmalloc(sizeof(struct assoc_array_shortcut) + + keylen * sizeof(unsigned long), GFP_KERNEL); + if (!new_s) + goto enomem; + pr_devel("dup shortcut %p -> %p\n", shortcut, new_s); + memcpy(new_s, shortcut, (sizeof(struct assoc_array_shortcut) + + keylen * sizeof(unsigned long))); + new_s->back_pointer = new_parent; + new_s->parent_slot = shortcut->parent_slot; + *new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s); + new_ptr_pp = &new_s->next_node; + cursor = shortcut->next_node; + } + + /* Duplicate the node at this position */ + node = assoc_array_ptr_to_node(cursor); + new_n = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); + if (!new_n) + goto enomem; + pr_devel("dup node %p -> %p\n", node, new_n); + new_n->back_pointer = new_parent; + new_n->parent_slot = node->parent_slot; + *new_ptr_pp = new_parent = assoc_array_node_to_ptr(new_n); + new_ptr_pp = NULL; + slot = 0; + +continue_node: + /* Filter across any leaves and gc any subtrees */ + for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + ptr = node->slots[slot]; + if (!ptr) + continue; + + if (assoc_array_ptr_is_leaf(ptr)) { + if (iterator(assoc_array_ptr_to_leaf(ptr), + iterator_data)) + /* The iterator will have done any reference + * counting on the object for us. + */ + new_n->slots[slot] = ptr; + continue; + } + + new_ptr_pp = &new_n->slots[slot]; + cursor = ptr; + goto descend; + } + + pr_devel("-- compress node %p --\n", new_n); + + /* Count up the number of empty slots in this node and work out the + * subtree leaf count. + */ + new_n->nr_leaves_on_branch = 0; + nr_free = 0; + for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + ptr = new_n->slots[slot]; + if (!ptr) + nr_free++; + else if (assoc_array_ptr_is_leaf(ptr)) + new_n->nr_leaves_on_branch++; + } + pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch); + + /* See what we can fold in */ + next_slot = 0; + for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + struct assoc_array_shortcut *s; + struct assoc_array_node *child; + + ptr = new_n->slots[slot]; + if (!ptr || assoc_array_ptr_is_leaf(ptr)) + continue; + + s = NULL; + if (assoc_array_ptr_is_shortcut(ptr)) { + s = assoc_array_ptr_to_shortcut(ptr); + ptr = s->next_node; + } + + child = assoc_array_ptr_to_node(ptr); + new_n->nr_leaves_on_branch += child->nr_leaves_on_branch; + + if (child->nr_leaves_on_branch <= nr_free + 1) { + /* Fold the child node into this one */ + pr_devel("[%d] fold node %lu/%d [nx %d]\n", + slot, child->nr_leaves_on_branch, nr_free + 1, + next_slot); + + /* We would already have reaped an intervening shortcut + * on the way back up the tree. + */ + BUG_ON(s); + + new_n->slots[slot] = NULL; + nr_free++; + if (slot < next_slot) + next_slot = slot; + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + struct assoc_array_ptr *p = child->slots[i]; + if (!p) + continue; + BUG_ON(assoc_array_ptr_is_meta(p)); + while (new_n->slots[next_slot]) + next_slot++; + BUG_ON(next_slot >= ASSOC_ARRAY_FAN_OUT); + new_n->slots[next_slot++] = p; + nr_free--; + } + kfree(child); + } else { + pr_devel("[%d] retain node %lu/%d [nx %d]\n", + slot, child->nr_leaves_on_branch, nr_free + 1, + next_slot); + } + } + + pr_devel("after: %lu\n", new_n->nr_leaves_on_branch); + + nr_leaves_on_tree = new_n->nr_leaves_on_branch; + + /* Excise this node if it is singly occupied by a shortcut */ + if (nr_free == ASSOC_ARRAY_FAN_OUT - 1) { + for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) + if ((ptr = new_n->slots[slot])) + break; + + if (assoc_array_ptr_is_meta(ptr) && + assoc_array_ptr_is_shortcut(ptr)) { + pr_devel("excise node %p with 1 shortcut\n", new_n); + new_s = assoc_array_ptr_to_shortcut(ptr); + new_parent = new_n->back_pointer; + slot = new_n->parent_slot; + kfree(new_n); + if (!new_parent) { + new_s->back_pointer = NULL; + new_s->parent_slot = 0; + new_root = ptr; + goto gc_complete; + } + + if (assoc_array_ptr_is_shortcut(new_parent)) { + /* We can discard any preceding shortcut also */ + struct assoc_array_shortcut *s = + assoc_array_ptr_to_shortcut(new_parent); + + pr_devel("excise preceding shortcut\n"); + + new_parent = new_s->back_pointer = s->back_pointer; + slot = new_s->parent_slot = s->parent_slot; + kfree(s); + if (!new_parent) { + new_s->back_pointer = NULL; + new_s->parent_slot = 0; + new_root = ptr; + goto gc_complete; + } + } + + new_s->back_pointer = new_parent; + new_s->parent_slot = slot; + new_n = assoc_array_ptr_to_node(new_parent); + new_n->slots[slot] = ptr; + goto ascend_old_tree; + } + } + + /* Excise any shortcuts we might encounter that point to nodes that + * only contain leaves. + */ + ptr = new_n->back_pointer; + if (!ptr) + goto gc_complete; + + if (assoc_array_ptr_is_shortcut(ptr)) { + new_s = assoc_array_ptr_to_shortcut(ptr); + new_parent = new_s->back_pointer; + slot = new_s->parent_slot; + + if (new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) { + struct assoc_array_node *n; + + pr_devel("excise shortcut\n"); + new_n->back_pointer = new_parent; + new_n->parent_slot = slot; + kfree(new_s); + if (!new_parent) { + new_root = assoc_array_node_to_ptr(new_n); + goto gc_complete; + } + + n = assoc_array_ptr_to_node(new_parent); + n->slots[slot] = assoc_array_node_to_ptr(new_n); + } + } else { + new_parent = ptr; + } + new_n = assoc_array_ptr_to_node(new_parent); + +ascend_old_tree: + ptr = node->back_pointer; + if (assoc_array_ptr_is_shortcut(ptr)) { + shortcut = assoc_array_ptr_to_shortcut(ptr); + slot = shortcut->parent_slot; + cursor = shortcut->back_pointer; + } else { + slot = node->parent_slot; + cursor = ptr; + } + BUG_ON(!ptr); + node = assoc_array_ptr_to_node(cursor); + slot++; + goto continue_node; + +gc_complete: + edit->set[0].to = new_root; + assoc_array_apply_edit(edit); + edit->array->nr_leaves_on_tree = nr_leaves_on_tree; + return 0; + +enomem: + pr_devel("enomem\n"); + assoc_array_destroy_subtree(new_root, edit->ops); + kfree(edit); + return -ENOMEM; +} diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c index 00bca223d1e..0211d30d8c3 100644 --- a/lib/atomic64_test.c +++ b/lib/atomic64_test.c @@ -8,6 +8,9 @@ * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/init.h> #include <linux/bug.h> #include <linux/kernel.h> @@ -146,18 +149,18 @@ static __init int test_atomic64(void) BUG_ON(v.counter != r); #ifdef CONFIG_X86 - printk(KERN_INFO "atomic64 test passed for %s platform %s CX8 and %s SSE\n", + pr_info("passed for %s platform %s CX8 and %s SSE\n", #ifdef CONFIG_X86_64 - "x86-64", + "x86-64", #elif defined(CONFIG_X86_CMPXCHG64) - "i586+", + "i586+", #else - "i386+", + "i386+", #endif boot_cpu_has(X86_FEATURE_CX8) ? "with" : "without", boot_cpu_has(X86_FEATURE_XMM) ? "with" : "without"); #else - printk(KERN_INFO "atomic64 test passed\n"); + pr_info("passed\n"); #endif return 0; diff --git a/lib/audit.c b/lib/audit.c index 76bbed4a20e..1d726a22565 100644 --- a/lib/audit.c +++ b/lib/audit.c @@ -30,11 +30,17 @@ static unsigned signal_class[] = { int audit_classify_arch(int arch) { - return 0; + if (audit_is_compat(arch)) + return 1; + else + return 0; } int audit_classify_syscall(int abi, unsigned syscall) { + if (audit_is_compat(abi)) + return audit_classify_compat_syscall(abi, syscall); + switch(syscall) { #ifdef __NR_open case __NR_open: @@ -57,6 +63,13 @@ int audit_classify_syscall(int abi, unsigned syscall) static int __init audit_classes_init(void) { +#ifdef CONFIG_AUDIT_COMPAT_GENERIC + audit_register_class(AUDIT_CLASS_WRITE_32, compat_write_class); + audit_register_class(AUDIT_CLASS_READ_32, compat_read_class); + audit_register_class(AUDIT_CLASS_DIR_WRITE_32, compat_dir_class); + audit_register_class(AUDIT_CLASS_CHATTR_32, compat_chattr_class); + audit_register_class(AUDIT_CLASS_SIGNAL_32, compat_signal_class); +#endif audit_register_class(AUDIT_CLASS_WRITE, write_class); audit_register_class(AUDIT_CLASS_READ, read_class); audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class); diff --git a/lib/average.c b/lib/average.c index 99a67e662b3..114d1beae0c 100644 --- a/lib/average.c +++ b/lib/average.c @@ -53,8 +53,10 @@ EXPORT_SYMBOL(ewma_init); */ struct ewma *ewma_add(struct ewma *avg, unsigned long val) { - avg->internal = avg->internal ? - (((avg->internal << avg->weight) - avg->internal) + + unsigned long internal = ACCESS_ONCE(avg->internal); + + ACCESS_ONCE(avg->internal) = internal ? + (((internal << avg->weight) - internal) + (val << avg->factor)) >> avg->weight : (val << avg->factor); return avg; diff --git a/lib/btree.c b/lib/btree.c index f9a484676cb..4264871ea1a 100644 --- a/lib/btree.c +++ b/lib/btree.c @@ -198,6 +198,7 @@ EXPORT_SYMBOL_GPL(btree_init); void btree_destroy(struct btree_head *head) { + mempool_free(head->node, head->mempool); mempool_destroy(head->mempool); head->mempool = NULL; } diff --git a/lib/bug.c b/lib/bug.c index 168603477f0..d1d7c787890 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -37,6 +37,9 @@ Jeremy Fitzhardinge <jeremy@goop.org> 2006 */ + +#define pr_fmt(fmt) fmt + #include <linux/list.h> #include <linux/module.h> #include <linux/kernel.h> @@ -153,15 +156,13 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) if (warning) { /* this is a WARN_ON rather than BUG/BUG_ON */ - printk(KERN_WARNING "------------[ cut here ]------------\n"); + pr_warn("------------[ cut here ]------------\n"); if (file) - printk(KERN_WARNING "WARNING: at %s:%u\n", - file, line); + pr_warn("WARNING: at %s:%u\n", file, line); else - printk(KERN_WARNING "WARNING: at %p " - "[verbose debug info unavailable]\n", - (void *)bugaddr); + pr_warn("WARNING: at %p [verbose debug info unavailable]\n", + (void *)bugaddr); print_modules(); show_regs(regs); @@ -174,12 +175,10 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) printk(KERN_DEFAULT "------------[ cut here ]------------\n"); if (file) - printk(KERN_CRIT "kernel BUG at %s:%u!\n", - file, line); + pr_crit("kernel BUG at %s:%u!\n", file, line); else - printk(KERN_CRIT "Kernel BUG at %p " - "[verbose debug info unavailable]\n", - (void *)bugaddr); + pr_crit("Kernel BUG at %p [verbose debug info unavailable]\n", + (void *)bugaddr); return BUG_TRAP_TYPE_BUG; } diff --git a/lib/clz_ctz.c b/lib/clz_ctz.c index a8f8379eb49..2e11e48446a 100644 --- a/lib/clz_ctz.c +++ b/lib/clz_ctz.c @@ -6,6 +6,9 @@ * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. + * The functions in this file aren't called directly, but are required by + * GCC builtins such as __builtin_ctz, and therefore they can't be removed + * despite appearing unreferenced in kernel source. * * __c[lt]z[sd]i2 can be overridden by linking arch-specific versions. */ @@ -13,18 +16,22 @@ #include <linux/export.h> #include <linux/kernel.h> +int __weak __ctzsi2(int val); int __weak __ctzsi2(int val) { return __ffs(val); } EXPORT_SYMBOL(__ctzsi2); +int __weak __clzsi2(int val); int __weak __clzsi2(int val) { return 32 - fls(val); } EXPORT_SYMBOL(__clzsi2); +int __weak __clzdi2(long val); +int __weak __ctzdi2(long val); #if BITS_PER_LONG == 32 int __weak __clzdi2(long val) diff --git a/lib/cmdline.c b/lib/cmdline.c index eb6791188cf..d4932f745e9 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -49,13 +49,13 @@ static int get_range(char **str, int *pint) * 3 - hyphen found to denote a range */ -int get_option (char **str, int *pint) +int get_option(char **str, int *pint) { char *cur = *str; if (!cur || !(*cur)) return 0; - *pint = simple_strtol (cur, str, 0); + *pint = simple_strtol(cur, str, 0); if (cur == *str) return 0; if (**str == ',') { @@ -67,6 +67,7 @@ int get_option (char **str, int *pint) return 1; } +EXPORT_SYMBOL(get_option); /** * get_options - Parse a string into a list of integers @@ -84,13 +85,13 @@ int get_option (char **str, int *pint) * the parse to end (typically a null terminator, if @str is * completely parseable). */ - + char *get_options(const char *str, int nints, int *ints) { int res, i = 1; while (i < nints) { - res = get_option ((char **)&str, ints + i); + res = get_option((char **)&str, ints + i); if (res == 0) break; if (res == 3) { @@ -112,6 +113,7 @@ char *get_options(const char *str, int nints, int *ints) ints[0] = i - 1; return (char *)str; } +EXPORT_SYMBOL(get_options); /** * memparse - parse a string with mem suffixes into a number @@ -152,8 +154,4 @@ unsigned long long memparse(const char *ptr, char **retptr) return ret; } - - EXPORT_SYMBOL(memparse); -EXPORT_SYMBOL(get_option); -EXPORT_SYMBOL(get_options); diff --git a/lib/compat_audit.c b/lib/compat_audit.c new file mode 100644 index 00000000000..873f75b640a --- /dev/null +++ b/lib/compat_audit.c @@ -0,0 +1,50 @@ +#include <linux/init.h> +#include <linux/types.h> +#include <asm/unistd32.h> + +unsigned compat_dir_class[] = { +#include <asm-generic/audit_dir_write.h> +~0U +}; + +unsigned compat_read_class[] = { +#include <asm-generic/audit_read.h> +~0U +}; + +unsigned compat_write_class[] = { +#include <asm-generic/audit_write.h> +~0U +}; + +unsigned compat_chattr_class[] = { +#include <asm-generic/audit_change_attr.h> +~0U +}; + +unsigned compat_signal_class[] = { +#include <asm-generic/audit_signal.h> +~0U +}; + +int audit_classify_compat_syscall(int abi, unsigned syscall) +{ + switch (syscall) { +#ifdef __NR_open + case __NR_open: + return 2; +#endif +#ifdef __NR_openat + case __NR_openat: + return 3; +#endif +#ifdef __NR_socketcall + case __NR_socketcall: + return 4; +#endif + case __NR_execve: + return 5; + default: + return 1; + } +} diff --git a/lib/cpumask.c b/lib/cpumask.c index d327b87c99b..b6513a9f289 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -140,7 +140,7 @@ EXPORT_SYMBOL(zalloc_cpumask_var); */ void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask) { - *mask = alloc_bootmem(cpumask_size()); + *mask = memblock_virt_alloc(cpumask_size(), 0); } /** @@ -161,6 +161,69 @@ EXPORT_SYMBOL(free_cpumask_var); */ void __init free_bootmem_cpumask_var(cpumask_var_t mask) { - free_bootmem(__pa(mask), cpumask_size()); + memblock_free_early(__pa(mask), cpumask_size()); } #endif + +/** + * cpumask_set_cpu_local_first - set i'th cpu with local numa cpu's first + * + * @i: index number + * @numa_node: local numa_node + * @dstp: cpumask with the relevant cpu bit set according to the policy + * + * This function sets the cpumask according to a numa aware policy. + * cpumask could be used as an affinity hint for the IRQ related to a + * queue. When the policy is to spread queues across cores - local cores + * first. + * + * Returns 0 on success, -ENOMEM for no memory, and -EAGAIN when failed to set + * the cpu bit and need to re-call the function. + */ +int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp) +{ + cpumask_var_t mask; + int cpu; + int ret = 0; + + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + i %= num_online_cpus(); + + if (numa_node == -1 || !cpumask_of_node(numa_node)) { + /* Use all online cpu's for non numa aware system */ + cpumask_copy(mask, cpu_online_mask); + } else { + int n; + + cpumask_and(mask, + cpumask_of_node(numa_node), cpu_online_mask); + + n = cpumask_weight(mask); + if (i >= n) { + i -= n; + + /* If index > number of local cpu's, mask out local + * cpu's + */ + cpumask_andnot(mask, cpu_online_mask, mask); + } + } + + for_each_cpu(cpu, mask) { + if (--i < 0) + goto out; + } + + ret = -EAGAIN; + +out: + free_cpumask_var(mask); + + if (!ret) + cpumask_set_cpu(cpu, dstp); + + return ret; +} +EXPORT_SYMBOL(cpumask_set_cpu_local_first); diff --git a/lib/crc32.c b/lib/crc32.c index 410093dbe51..21a7b2135af 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -29,16 +29,17 @@ #include <linux/crc32.h> #include <linux/module.h> #include <linux/types.h> +#include <linux/sched.h> #include "crc32defs.h" #if CRC_LE_BITS > 8 -# define tole(x) ((__force u32) __constant_cpu_to_le32(x)) +# define tole(x) ((__force u32) cpu_to_le32(x)) #else # define tole(x) (x) #endif #if CRC_BE_BITS > 8 -# define tobe(x) ((__force u32) __constant_cpu_to_be32(x)) +# define tobe(x) ((__force u32) cpu_to_be32(x)) #else # define tobe(x) (x) #endif @@ -49,6 +50,30 @@ MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>"); MODULE_DESCRIPTION("Various CRC32 calculations"); MODULE_LICENSE("GPL"); +#define GF2_DIM 32 + +static u32 gf2_matrix_times(u32 *mat, u32 vec) +{ + u32 sum = 0; + + while (vec) { + if (vec & 1) + sum ^= *mat; + vec >>= 1; + mat++; + } + + return sum; +} + +static void gf2_matrix_square(u32 *square, u32 *mat) +{ + int i; + + for (i = 0; i < GF2_DIM; i++) + square[i] = gf2_matrix_times(mat, mat[i]); +} + #if CRC_LE_BITS > 8 || CRC_BE_BITS > 8 /* implements slicing-by-4 or slicing-by-8 algorithm */ @@ -130,6 +155,52 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) } #endif +/* For conditions of distribution and use, see copyright notice in zlib.h */ +static u32 crc32_generic_combine(u32 crc1, u32 crc2, size_t len2, + u32 polynomial) +{ + u32 even[GF2_DIM]; /* Even-power-of-two zeros operator */ + u32 odd[GF2_DIM]; /* Odd-power-of-two zeros operator */ + u32 row; + int i; + + if (len2 <= 0) + return crc1; + + /* Put operator for one zero bit in odd */ + odd[0] = polynomial; + row = 1; + for (i = 1; i < GF2_DIM; i++) { + odd[i] = row; + row <<= 1; + } + + gf2_matrix_square(even, odd); /* Put operator for two zero bits in even */ + gf2_matrix_square(odd, even); /* Put operator for four zero bits in odd */ + + /* Apply len2 zeros to crc1 (first square will put the operator for one + * zero byte, eight zero bits, in even). + */ + do { + /* Apply zeros operator for this bit of len2 */ + gf2_matrix_square(even, odd); + if (len2 & 1) + crc1 = gf2_matrix_times(even, crc1); + len2 >>= 1; + /* If no more bits set, then done */ + if (len2 == 0) + break; + /* Another iteration of the loop with odd and even swapped */ + gf2_matrix_square(odd, even); + if (len2 & 1) + crc1 = gf2_matrix_times(odd, crc1); + len2 >>= 1; + } while (len2 != 0); + + crc1 ^= crc2; + return crc1; +} + /** * crc32_le_generic() - Calculate bitwise little-endian Ethernet AUTODIN II * CRC32/CRC32C @@ -200,8 +271,19 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) (const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE); } #endif +u32 __pure crc32_le_combine(u32 crc1, u32 crc2, size_t len2) +{ + return crc32_generic_combine(crc1, crc2, len2, CRCPOLY_LE); +} + +u32 __pure __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2) +{ + return crc32_generic_combine(crc1, crc2, len2, CRC32C_POLY_LE); +} EXPORT_SYMBOL(crc32_le); +EXPORT_SYMBOL(crc32_le_combine); EXPORT_SYMBOL(__crc32c_le); +EXPORT_SYMBOL(__crc32c_le_combine); /** * crc32_be_generic() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32 @@ -795,206 +877,106 @@ static struct crc_test { u32 crc32c_le; /* expected crc32c_le result */ } test[] = { - {0x674bf11d, 0x00000038, 0x00000542, 0x0af6d466, 0xd8b6e4c1, - 0xf6e93d6c}, - {0x35c672c6, 0x0000003a, 0x000001aa, 0xc6d3dfba, 0x28aaf3ad, - 0x0fe92aca}, - {0x496da28e, 0x00000039, 0x000005af, 0xd933660f, 0x5d57e81f, - 0x52e1ebb8}, - {0x09a9b90e, 0x00000027, 0x000001f8, 0xb45fe007, 0xf45fca9a, - 0x0798af9a}, - {0xdc97e5a9, 0x00000025, 0x000003b6, 0xf81a3562, 0xe0126ba2, - 0x18eb3152}, - {0x47c58900, 0x0000000a, 0x000000b9, 0x8e58eccf, 0xf3afc793, - 0xd00d08c7}, - {0x292561e8, 0x0000000c, 0x00000403, 0xa2ba8aaf, 0x0b797aed, - 0x8ba966bc}, - {0x415037f6, 0x00000003, 0x00000676, 0xa17d52e8, 0x7f0fdf35, - 0x11d694a2}, - {0x3466e707, 0x00000026, 0x00000042, 0x258319be, 0x75c484a2, - 0x6ab3208d}, - {0xafd1281b, 0x00000023, 0x000002ee, 0x4428eaf8, 0x06c7ad10, - 0xba4603c5}, - {0xd3857b18, 0x00000028, 0x000004a2, 0x5c430821, 0xb062b7cb, - 0xe6071c6f}, - {0x1d825a8f, 0x0000002b, 0x0000050b, 0xd2c45f0c, 0xd68634e0, - 0x179ec30a}, - {0x5033e3bc, 0x0000000b, 0x00000078, 0xa3ea4113, 0xac6d31fb, - 0x0903beb8}, - {0x94f1fb5e, 0x0000000f, 0x000003a2, 0xfbfc50b1, 0x3cfe50ed, - 0x6a7cb4fa}, - {0xc9a0fe14, 0x00000009, 0x00000473, 0x5fb61894, 0x87070591, - 0xdb535801}, - {0x88a034b1, 0x0000001c, 0x000005ad, 0xc1b16053, 0x46f95c67, - 0x92bed597}, - {0xf0f72239, 0x00000020, 0x0000026d, 0xa6fa58f3, 0xf8c2c1dd, - 0x192a3f1b}, - {0xcc20a5e3, 0x0000003b, 0x0000067a, 0x7740185a, 0x308b979a, - 0xccbaec1a}, - {0xce589c95, 0x0000002b, 0x00000641, 0xd055e987, 0x40aae25b, - 0x7eabae4d}, - {0x78edc885, 0x00000035, 0x000005be, 0xa39cb14b, 0x035b0d1f, - 0x28c72982}, - {0x9d40a377, 0x0000003b, 0x00000038, 0x1f47ccd2, 0x197fbc9d, - 0xc3cd4d18}, - {0x703d0e01, 0x0000003c, 0x000006f1, 0x88735e7c, 0xfed57c5a, - 0xbca8f0e7}, - {0x776bf505, 0x0000000f, 0x000005b2, 0x5cc4fc01, 0xf32efb97, - 0x713f60b3}, - {0x4a3e7854, 0x00000027, 0x000004b8, 0x8d923c82, 0x0cbfb4a2, - 0xebd08fd5}, - {0x209172dd, 0x0000003b, 0x00000356, 0xb89e9c2b, 0xd7868138, - 0x64406c59}, - {0x3ba4cc5b, 0x0000002f, 0x00000203, 0xe51601a9, 0x5b2a1032, - 0x7421890e}, - {0xfc62f297, 0x00000000, 0x00000079, 0x71a8e1a2, 0x5d88685f, - 0xe9347603}, - {0x64280b8b, 0x00000016, 0x000007ab, 0x0fa7a30c, 0xda3a455f, - 0x1bef9060}, - {0x97dd724b, 0x00000033, 0x000007ad, 0x5788b2f4, 0xd7326d32, - 0x34720072}, - {0x61394b52, 0x00000035, 0x00000571, 0xc66525f1, 0xcabe7fef, - 0x48310f59}, - {0x29b4faff, 0x00000024, 0x0000006e, 0xca13751e, 0x993648e0, - 0x783a4213}, - {0x29bfb1dc, 0x0000000b, 0x00000244, 0x436c43f7, 0x429f7a59, - 0x9e8efd41}, - {0x86ae934b, 0x00000035, 0x00000104, 0x0760ec93, 0x9cf7d0f4, - 0xfc3d34a5}, - {0xc4c1024e, 0x0000002e, 0x000006b1, 0x6516a3ec, 0x19321f9c, - 0x17a52ae2}, - {0x3287a80a, 0x00000026, 0x00000496, 0x0b257eb1, 0x754ebd51, - 0x886d935a}, - {0xa4db423e, 0x00000023, 0x0000045d, 0x9b3a66dc, 0x873e9f11, - 0xeaaeaeb2}, - {0x7a1078df, 0x00000015, 0x0000014a, 0x8c2484c5, 0x6a628659, - 0x8e900a4b}, - {0x6048bd5b, 0x00000006, 0x0000006a, 0x897e3559, 0xac9961af, - 0xd74662b1}, - {0xd8f9ea20, 0x0000003d, 0x00000277, 0x60eb905b, 0xed2aaf99, - 0xd26752ba}, - {0xea5ec3b4, 0x0000002a, 0x000004fe, 0x869965dc, 0x6c1f833b, - 0x8b1fcd62}, - {0x2dfb005d, 0x00000016, 0x00000345, 0x6a3b117e, 0xf05e8521, - 0xf54342fe}, - {0x5a214ade, 0x00000020, 0x000005b6, 0x467f70be, 0xcb22ccd3, - 0x5b95b988}, - {0xf0ab9cca, 0x00000032, 0x00000515, 0xed223df3, 0x7f3ef01d, - 0x2e1176be}, - {0x91b444f9, 0x0000002e, 0x000007f8, 0x84e9a983, 0x5676756f, - 0x66120546}, - {0x1b5d2ddb, 0x0000002e, 0x0000012c, 0xba638c4c, 0x3f42047b, - 0xf256a5cc}, - {0xd824d1bb, 0x0000003a, 0x000007b5, 0x6288653b, 0x3a3ebea0, - 0x4af1dd69}, - {0x0470180c, 0x00000034, 0x000001f0, 0x9d5b80d6, 0x3de08195, - 0x56f0a04a}, - {0xffaa3a3f, 0x00000036, 0x00000299, 0xf3a82ab8, 0x53e0c13d, - 0x74f6b6b2}, - {0x6406cfeb, 0x00000023, 0x00000600, 0xa920b8e8, 0xe4e2acf4, - 0x085951fd}, - {0xb24aaa38, 0x0000003e, 0x000004a1, 0x657cc328, 0x5077b2c3, - 0xc65387eb}, - {0x58b2ab7c, 0x00000039, 0x000002b4, 0x3a17ee7e, 0x9dcb3643, - 0x1ca9257b}, - {0x3db85970, 0x00000006, 0x000002b6, 0x95268b59, 0xb9812c10, - 0xfd196d76}, - {0x857830c5, 0x00000003, 0x00000590, 0x4ef439d5, 0xf042161d, - 0x5ef88339}, - {0xe1fcd978, 0x0000003e, 0x000007d8, 0xae8d8699, 0xce0a1ef5, - 0x2c3714d9}, - {0xb982a768, 0x00000016, 0x000006e0, 0x62fad3df, 0x5f8a067b, - 0x58576548}, - {0x1d581ce8, 0x0000001e, 0x0000058b, 0xf0f5da53, 0x26e39eee, - 0xfd7c57de}, - {0x2456719b, 0x00000025, 0x00000503, 0x4296ac64, 0xd50e4c14, - 0xd5fedd59}, - {0xfae6d8f2, 0x00000000, 0x0000055d, 0x057fdf2e, 0x2a31391a, - 0x1cc3b17b}, - {0xcba828e3, 0x00000039, 0x000002ce, 0xe3f22351, 0x8f00877b, - 0x270eed73}, - {0x13d25952, 0x0000000a, 0x0000072d, 0x76d4b4cc, 0x5eb67ec3, - 0x91ecbb11}, - {0x0342be3f, 0x00000015, 0x00000599, 0xec75d9f1, 0x9d4d2826, - 0x05ed8d0c}, - {0xeaa344e0, 0x00000014, 0x000004d8, 0x72a4c981, 0x2064ea06, - 0x0b09ad5b}, - {0xbbb52021, 0x0000003b, 0x00000272, 0x04af99fc, 0xaf042d35, - 0xf8d511fb}, - {0xb66384dc, 0x0000001d, 0x000007fc, 0xd7629116, 0x782bd801, - 0x5ad832cc}, - {0x616c01b6, 0x00000022, 0x000002c8, 0x5b1dab30, 0x783ce7d2, - 0x1214d196}, - {0xce2bdaad, 0x00000016, 0x0000062a, 0x932535c8, 0x3f02926d, - 0x5747218a}, - {0x00fe84d7, 0x00000005, 0x00000205, 0x850e50aa, 0x753d649c, - 0xde8f14de}, - {0xbebdcb4c, 0x00000006, 0x0000055d, 0xbeaa37a2, 0x2d8c9eba, - 0x3563b7b9}, - {0xd8b1a02a, 0x00000010, 0x00000387, 0x5017d2fc, 0x503541a5, - 0x071475d0}, - {0x3b96cad2, 0x00000036, 0x00000347, 0x1d2372ae, 0x926cd90b, - 0x54c79d60}, - {0xc94c1ed7, 0x00000005, 0x0000038b, 0x9e9fdb22, 0x144a9178, - 0x4c53eee6}, - {0x1aad454e, 0x00000025, 0x000002b2, 0xc3f6315c, 0x5c7a35b3, - 0x10137a3c}, - {0xa4fec9a6, 0x00000000, 0x000006d6, 0x90be5080, 0xa4107605, - 0xaa9d6c73}, - {0x1bbe71e2, 0x0000001f, 0x000002fd, 0x4e504c3b, 0x284ccaf1, - 0xb63d23e7}, - {0x4201c7e4, 0x00000002, 0x000002b7, 0x7822e3f9, 0x0cc912a9, - 0x7f53e9cf}, - {0x23fddc96, 0x00000003, 0x00000627, 0x8a385125, 0x07767e78, - 0x13c1cd83}, - {0xd82ba25c, 0x00000016, 0x0000063e, 0x98e4148a, 0x283330c9, - 0x49ff5867}, - {0x786f2032, 0x0000002d, 0x0000060f, 0xf201600a, 0xf561bfcd, - 0x8467f211}, - {0xfebe4e1f, 0x0000002a, 0x000004f2, 0x95e51961, 0xfd80dcab, - 0x3f9683b2}, - {0x1a6e0a39, 0x00000008, 0x00000672, 0x8af6c2a5, 0x78dd84cb, - 0x76a3f874}, - {0x56000ab8, 0x0000000e, 0x000000e5, 0x36bacb8f, 0x22ee1f77, - 0x863b702f}, - {0x4717fe0c, 0x00000000, 0x000006ec, 0x8439f342, 0x5c8e03da, - 0xdc6c58ff}, - {0xd5d5d68e, 0x0000003c, 0x000003a3, 0x46fff083, 0x177d1b39, - 0x0622cc95}, - {0xc25dd6c6, 0x00000024, 0x000006c0, 0x5ceb8eb4, 0x892b0d16, - 0xe85605cd}, - {0xe9b11300, 0x00000023, 0x00000683, 0x07a5d59a, 0x6c6a3208, - 0x31da5f06}, - {0x95cd285e, 0x00000001, 0x00000047, 0x7b3a4368, 0x0202c07e, - 0xa1f2e784}, - {0xd9245a25, 0x0000001e, 0x000003a6, 0xd33c1841, 0x1936c0d5, - 0xb07cc616}, - {0x103279db, 0x00000006, 0x0000039b, 0xca09b8a0, 0x77d62892, - 0xbf943b6c}, - {0x1cba3172, 0x00000027, 0x000001c8, 0xcb377194, 0xebe682db, - 0x2c01af1c}, - {0x8f613739, 0x0000000c, 0x000001df, 0xb4b0bc87, 0x7710bd43, - 0x0fe5f56d}, - {0x1c6aa90d, 0x0000001b, 0x0000053c, 0x70559245, 0xda7894ac, - 0xf8943b2d}, - {0xaabe5b93, 0x0000003d, 0x00000715, 0xcdbf42fa, 0x0c3b99e7, - 0xe4d89272}, - {0xf15dd038, 0x00000006, 0x000006db, 0x6e104aea, 0x8d5967f2, - 0x7c2f6bbb}, - {0x584dd49c, 0x00000020, 0x000007bc, 0x36b6cfd6, 0xad4e23b2, - 0xabbf388b}, - {0x5d8c9506, 0x00000020, 0x00000470, 0x4c62378e, 0x31d92640, - 0x1dca1f4e}, - {0xb80d17b0, 0x00000032, 0x00000346, 0x22a5bb88, 0x9a7ec89f, - 0x5c170e23}, - {0xdaf0592e, 0x00000023, 0x000007b0, 0x3cab3f99, 0x9b1fdd99, - 0xc0e9d672}, - {0x4793cc85, 0x0000000d, 0x00000706, 0xe82e04f6, 0xed3db6b7, - 0xc18bdc86}, - {0x82ebf64e, 0x00000009, 0x000007c3, 0x69d590a9, 0x9efa8499, - 0xa874fcdd}, - {0xb18a0319, 0x00000026, 0x000007db, 0x1cf98dcc, 0x8fa9ad6a, - 0x9dc0bb48}, + {0x674bf11d, 0x00000038, 0x00000542, 0x0af6d466, 0xd8b6e4c1, 0xf6e93d6c}, + {0x35c672c6, 0x0000003a, 0x000001aa, 0xc6d3dfba, 0x28aaf3ad, 0x0fe92aca}, + {0x496da28e, 0x00000039, 0x000005af, 0xd933660f, 0x5d57e81f, 0x52e1ebb8}, + {0x09a9b90e, 0x00000027, 0x000001f8, 0xb45fe007, 0xf45fca9a, 0x0798af9a}, + {0xdc97e5a9, 0x00000025, 0x000003b6, 0xf81a3562, 0xe0126ba2, 0x18eb3152}, + {0x47c58900, 0x0000000a, 0x000000b9, 0x8e58eccf, 0xf3afc793, 0xd00d08c7}, + {0x292561e8, 0x0000000c, 0x00000403, 0xa2ba8aaf, 0x0b797aed, 0x8ba966bc}, + {0x415037f6, 0x00000003, 0x00000676, 0xa17d52e8, 0x7f0fdf35, 0x11d694a2}, + {0x3466e707, 0x00000026, 0x00000042, 0x258319be, 0x75c484a2, 0x6ab3208d}, + {0xafd1281b, 0x00000023, 0x000002ee, 0x4428eaf8, 0x06c7ad10, 0xba4603c5}, + {0xd3857b18, 0x00000028, 0x000004a2, 0x5c430821, 0xb062b7cb, 0xe6071c6f}, + {0x1d825a8f, 0x0000002b, 0x0000050b, 0xd2c45f0c, 0xd68634e0, 0x179ec30a}, + {0x5033e3bc, 0x0000000b, 0x00000078, 0xa3ea4113, 0xac6d31fb, 0x0903beb8}, + {0x94f1fb5e, 0x0000000f, 0x000003a2, 0xfbfc50b1, 0x3cfe50ed, 0x6a7cb4fa}, + {0xc9a0fe14, 0x00000009, 0x00000473, 0x5fb61894, 0x87070591, 0xdb535801}, + {0x88a034b1, 0x0000001c, 0x000005ad, 0xc1b16053, 0x46f95c67, 0x92bed597}, + {0xf0f72239, 0x00000020, 0x0000026d, 0xa6fa58f3, 0xf8c2c1dd, 0x192a3f1b}, + {0xcc20a5e3, 0x0000003b, 0x0000067a, 0x7740185a, 0x308b979a, 0xccbaec1a}, + {0xce589c95, 0x0000002b, 0x00000641, 0xd055e987, 0x40aae25b, 0x7eabae4d}, + {0x78edc885, 0x00000035, 0x000005be, 0xa39cb14b, 0x035b0d1f, 0x28c72982}, + {0x9d40a377, 0x0000003b, 0x00000038, 0x1f47ccd2, 0x197fbc9d, 0xc3cd4d18}, + {0x703d0e01, 0x0000003c, 0x000006f1, 0x88735e7c, 0xfed57c5a, 0xbca8f0e7}, + {0x776bf505, 0x0000000f, 0x000005b2, 0x5cc4fc01, 0xf32efb97, 0x713f60b3}, + {0x4a3e7854, 0x00000027, 0x000004b8, 0x8d923c82, 0x0cbfb4a2, 0xebd08fd5}, + {0x209172dd, 0x0000003b, 0x00000356, 0xb89e9c2b, 0xd7868138, 0x64406c59}, + {0x3ba4cc5b, 0x0000002f, 0x00000203, 0xe51601a9, 0x5b2a1032, 0x7421890e}, + {0xfc62f297, 0x00000000, 0x00000079, 0x71a8e1a2, 0x5d88685f, 0xe9347603}, + {0x64280b8b, 0x00000016, 0x000007ab, 0x0fa7a30c, 0xda3a455f, 0x1bef9060}, + {0x97dd724b, 0x00000033, 0x000007ad, 0x5788b2f4, 0xd7326d32, 0x34720072}, + {0x61394b52, 0x00000035, 0x00000571, 0xc66525f1, 0xcabe7fef, 0x48310f59}, + {0x29b4faff, 0x00000024, 0x0000006e, 0xca13751e, 0x993648e0, 0x783a4213}, + {0x29bfb1dc, 0x0000000b, 0x00000244, 0x436c43f7, 0x429f7a59, 0x9e8efd41}, + {0x86ae934b, 0x00000035, 0x00000104, 0x0760ec93, 0x9cf7d0f4, 0xfc3d34a5}, + {0xc4c1024e, 0x0000002e, 0x000006b1, 0x6516a3ec, 0x19321f9c, 0x17a52ae2}, + {0x3287a80a, 0x00000026, 0x00000496, 0x0b257eb1, 0x754ebd51, 0x886d935a}, + {0xa4db423e, 0x00000023, 0x0000045d, 0x9b3a66dc, 0x873e9f11, 0xeaaeaeb2}, + {0x7a1078df, 0x00000015, 0x0000014a, 0x8c2484c5, 0x6a628659, 0x8e900a4b}, + {0x6048bd5b, 0x00000006, 0x0000006a, 0x897e3559, 0xac9961af, 0xd74662b1}, + {0xd8f9ea20, 0x0000003d, 0x00000277, 0x60eb905b, 0xed2aaf99, 0xd26752ba}, + {0xea5ec3b4, 0x0000002a, 0x000004fe, 0x869965dc, 0x6c1f833b, 0x8b1fcd62}, + {0x2dfb005d, 0x00000016, 0x00000345, 0x6a3b117e, 0xf05e8521, 0xf54342fe}, + {0x5a214ade, 0x00000020, 0x000005b6, 0x467f70be, 0xcb22ccd3, 0x5b95b988}, + {0xf0ab9cca, 0x00000032, 0x00000515, 0xed223df3, 0x7f3ef01d, 0x2e1176be}, + {0x91b444f9, 0x0000002e, 0x000007f8, 0x84e9a983, 0x5676756f, 0x66120546}, + {0x1b5d2ddb, 0x0000002e, 0x0000012c, 0xba638c4c, 0x3f42047b, 0xf256a5cc}, + {0xd824d1bb, 0x0000003a, 0x000007b5, 0x6288653b, 0x3a3ebea0, 0x4af1dd69}, + {0x0470180c, 0x00000034, 0x000001f0, 0x9d5b80d6, 0x3de08195, 0x56f0a04a}, + {0xffaa3a3f, 0x00000036, 0x00000299, 0xf3a82ab8, 0x53e0c13d, 0x74f6b6b2}, + {0x6406cfeb, 0x00000023, 0x00000600, 0xa920b8e8, 0xe4e2acf4, 0x085951fd}, + {0xb24aaa38, 0x0000003e, 0x000004a1, 0x657cc328, 0x5077b2c3, 0xc65387eb}, + {0x58b2ab7c, 0x00000039, 0x000002b4, 0x3a17ee7e, 0x9dcb3643, 0x1ca9257b}, + {0x3db85970, 0x00000006, 0x000002b6, 0x95268b59, 0xb9812c10, 0xfd196d76}, + {0x857830c5, 0x00000003, 0x00000590, 0x4ef439d5, 0xf042161d, 0x5ef88339}, + {0xe1fcd978, 0x0000003e, 0x000007d8, 0xae8d8699, 0xce0a1ef5, 0x2c3714d9}, + {0xb982a768, 0x00000016, 0x000006e0, 0x62fad3df, 0x5f8a067b, 0x58576548}, + {0x1d581ce8, 0x0000001e, 0x0000058b, 0xf0f5da53, 0x26e39eee, 0xfd7c57de}, + {0x2456719b, 0x00000025, 0x00000503, 0x4296ac64, 0xd50e4c14, 0xd5fedd59}, + {0xfae6d8f2, 0x00000000, 0x0000055d, 0x057fdf2e, 0x2a31391a, 0x1cc3b17b}, + {0xcba828e3, 0x00000039, 0x000002ce, 0xe3f22351, 0x8f00877b, 0x270eed73}, + {0x13d25952, 0x0000000a, 0x0000072d, 0x76d4b4cc, 0x5eb67ec3, 0x91ecbb11}, + {0x0342be3f, 0x00000015, 0x00000599, 0xec75d9f1, 0x9d4d2826, 0x05ed8d0c}, + {0xeaa344e0, 0x00000014, 0x000004d8, 0x72a4c981, 0x2064ea06, 0x0b09ad5b}, + {0xbbb52021, 0x0000003b, 0x00000272, 0x04af99fc, 0xaf042d35, 0xf8d511fb}, + {0xb66384dc, 0x0000001d, 0x000007fc, 0xd7629116, 0x782bd801, 0x5ad832cc}, + {0x616c01b6, 0x00000022, 0x000002c8, 0x5b1dab30, 0x783ce7d2, 0x1214d196}, + {0xce2bdaad, 0x00000016, 0x0000062a, 0x932535c8, 0x3f02926d, 0x5747218a}, + {0x00fe84d7, 0x00000005, 0x00000205, 0x850e50aa, 0x753d649c, 0xde8f14de}, + {0xbebdcb4c, 0x00000006, 0x0000055d, 0xbeaa37a2, 0x2d8c9eba, 0x3563b7b9}, + {0xd8b1a02a, 0x00000010, 0x00000387, 0x5017d2fc, 0x503541a5, 0x071475d0}, + {0x3b96cad2, 0x00000036, 0x00000347, 0x1d2372ae, 0x926cd90b, 0x54c79d60}, + {0xc94c1ed7, 0x00000005, 0x0000038b, 0x9e9fdb22, 0x144a9178, 0x4c53eee6}, + {0x1aad454e, 0x00000025, 0x000002b2, 0xc3f6315c, 0x5c7a35b3, 0x10137a3c}, + {0xa4fec9a6, 0x00000000, 0x000006d6, 0x90be5080, 0xa4107605, 0xaa9d6c73}, + {0x1bbe71e2, 0x0000001f, 0x000002fd, 0x4e504c3b, 0x284ccaf1, 0xb63d23e7}, + {0x4201c7e4, 0x00000002, 0x000002b7, 0x7822e3f9, 0x0cc912a9, 0x7f53e9cf}, + {0x23fddc96, 0x00000003, 0x00000627, 0x8a385125, 0x07767e78, 0x13c1cd83}, + {0xd82ba25c, 0x00000016, 0x0000063e, 0x98e4148a, 0x283330c9, 0x49ff5867}, + {0x786f2032, 0x0000002d, 0x0000060f, 0xf201600a, 0xf561bfcd, 0x8467f211}, + {0xfebe4e1f, 0x0000002a, 0x000004f2, 0x95e51961, 0xfd80dcab, 0x3f9683b2}, + {0x1a6e0a39, 0x00000008, 0x00000672, 0x8af6c2a5, 0x78dd84cb, 0x76a3f874}, + {0x56000ab8, 0x0000000e, 0x000000e5, 0x36bacb8f, 0x22ee1f77, 0x863b702f}, + {0x4717fe0c, 0x00000000, 0x000006ec, 0x8439f342, 0x5c8e03da, 0xdc6c58ff}, + {0xd5d5d68e, 0x0000003c, 0x000003a3, 0x46fff083, 0x177d1b39, 0x0622cc95}, + {0xc25dd6c6, 0x00000024, 0x000006c0, 0x5ceb8eb4, 0x892b0d16, 0xe85605cd}, + {0xe9b11300, 0x00000023, 0x00000683, 0x07a5d59a, 0x6c6a3208, 0x31da5f06}, + {0x95cd285e, 0x00000001, 0x00000047, 0x7b3a4368, 0x0202c07e, 0xa1f2e784}, + {0xd9245a25, 0x0000001e, 0x000003a6, 0xd33c1841, 0x1936c0d5, 0xb07cc616}, + {0x103279db, 0x00000006, 0x0000039b, 0xca09b8a0, 0x77d62892, 0xbf943b6c}, + {0x1cba3172, 0x00000027, 0x000001c8, 0xcb377194, 0xebe682db, 0x2c01af1c}, + {0x8f613739, 0x0000000c, 0x000001df, 0xb4b0bc87, 0x7710bd43, 0x0fe5f56d}, + {0x1c6aa90d, 0x0000001b, 0x0000053c, 0x70559245, 0xda7894ac, 0xf8943b2d}, + {0xaabe5b93, 0x0000003d, 0x00000715, 0xcdbf42fa, 0x0c3b99e7, 0xe4d89272}, + {0xf15dd038, 0x00000006, 0x000006db, 0x6e104aea, 0x8d5967f2, 0x7c2f6bbb}, + {0x584dd49c, 0x00000020, 0x000007bc, 0x36b6cfd6, 0xad4e23b2, 0xabbf388b}, + {0x5d8c9506, 0x00000020, 0x00000470, 0x4c62378e, 0x31d92640, 0x1dca1f4e}, + {0xb80d17b0, 0x00000032, 0x00000346, 0x22a5bb88, 0x9a7ec89f, 0x5c170e23}, + {0xdaf0592e, 0x00000023, 0x000007b0, 0x3cab3f99, 0x9b1fdd99, 0xc0e9d672}, + {0x4793cc85, 0x0000000d, 0x00000706, 0xe82e04f6, 0xed3db6b7, 0xc18bdc86}, + {0x82ebf64e, 0x00000009, 0x000007c3, 0x69d590a9, 0x9efa8499, 0xa874fcdd}, + {0xb18a0319, 0x00000026, 0x000007db, 0x1cf98dcc, 0x8fa9ad6a, 0x9dc0bb48}, }; #include <linux/time.h> @@ -1050,6 +1032,41 @@ static int __init crc32c_test(void) return 0; } +static int __init crc32c_combine_test(void) +{ + int i, j; + int errors = 0, runs = 0; + + for (i = 0; i < 10; i++) { + u32 crc_full; + + crc_full = __crc32c_le(test[i].crc, test_buf + test[i].start, + test[i].length); + for (j = 0; j <= test[i].length; ++j) { + u32 crc1, crc2; + u32 len1 = j, len2 = test[i].length - j; + + crc1 = __crc32c_le(test[i].crc, test_buf + + test[i].start, len1); + crc2 = __crc32c_le(0, test_buf + test[i].start + + len1, len2); + + if (!(crc_full == __crc32c_le_combine(crc1, crc2, len2) && + crc_full == test[i].crc32c_le)) + errors++; + runs++; + cond_resched(); + } + } + + if (errors) + pr_warn("crc32c_combine: %d/%d self tests failed\n", errors, runs); + else + pr_info("crc32c_combine: %d self tests passed\n", runs); + + return 0; +} + static int __init crc32_test(void) { int i; @@ -1109,10 +1126,49 @@ static int __init crc32_test(void) return 0; } +static int __init crc32_combine_test(void) +{ + int i, j; + int errors = 0, runs = 0; + + for (i = 0; i < 10; i++) { + u32 crc_full; + + crc_full = crc32_le(test[i].crc, test_buf + test[i].start, + test[i].length); + for (j = 0; j <= test[i].length; ++j) { + u32 crc1, crc2; + u32 len1 = j, len2 = test[i].length - j; + + crc1 = crc32_le(test[i].crc, test_buf + + test[i].start, len1); + crc2 = crc32_le(0, test_buf + test[i].start + + len1, len2); + + if (!(crc_full == crc32_le_combine(crc1, crc2, len2) && + crc_full == test[i].crc_le)) + errors++; + runs++; + cond_resched(); + } + } + + if (errors) + pr_warn("crc32_combine: %d/%d self tests failed\n", errors, runs); + else + pr_info("crc32_combine: %d self tests passed\n", runs); + + return 0; +} + static int __init crc32test_init(void) { crc32_test(); crc32c_test(); + + crc32_combine_test(); + crc32c_combine_test(); + return 0; } diff --git a/lib/crc7.c b/lib/crc7.c index f1c3a144cec..bf6255e2391 100644 --- a/lib/crc7.c +++ b/lib/crc7.c @@ -10,42 +10,47 @@ #include <linux/crc7.h> -/* Table for CRC-7 (polynomial x^7 + x^3 + 1) */ -const u8 crc7_syndrome_table[256] = { - 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, - 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77, - 0x19, 0x10, 0x0b, 0x02, 0x3d, 0x34, 0x2f, 0x26, - 0x51, 0x58, 0x43, 0x4a, 0x75, 0x7c, 0x67, 0x6e, - 0x32, 0x3b, 0x20, 0x29, 0x16, 0x1f, 0x04, 0x0d, - 0x7a, 0x73, 0x68, 0x61, 0x5e, 0x57, 0x4c, 0x45, - 0x2b, 0x22, 0x39, 0x30, 0x0f, 0x06, 0x1d, 0x14, - 0x63, 0x6a, 0x71, 0x78, 0x47, 0x4e, 0x55, 0x5c, - 0x64, 0x6d, 0x76, 0x7f, 0x40, 0x49, 0x52, 0x5b, - 0x2c, 0x25, 0x3e, 0x37, 0x08, 0x01, 0x1a, 0x13, - 0x7d, 0x74, 0x6f, 0x66, 0x59, 0x50, 0x4b, 0x42, - 0x35, 0x3c, 0x27, 0x2e, 0x11, 0x18, 0x03, 0x0a, - 0x56, 0x5f, 0x44, 0x4d, 0x72, 0x7b, 0x60, 0x69, - 0x1e, 0x17, 0x0c, 0x05, 0x3a, 0x33, 0x28, 0x21, - 0x4f, 0x46, 0x5d, 0x54, 0x6b, 0x62, 0x79, 0x70, - 0x07, 0x0e, 0x15, 0x1c, 0x23, 0x2a, 0x31, 0x38, - 0x41, 0x48, 0x53, 0x5a, 0x65, 0x6c, 0x77, 0x7e, - 0x09, 0x00, 0x1b, 0x12, 0x2d, 0x24, 0x3f, 0x36, - 0x58, 0x51, 0x4a, 0x43, 0x7c, 0x75, 0x6e, 0x67, - 0x10, 0x19, 0x02, 0x0b, 0x34, 0x3d, 0x26, 0x2f, - 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c, - 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, - 0x6a, 0x63, 0x78, 0x71, 0x4e, 0x47, 0x5c, 0x55, - 0x22, 0x2b, 0x30, 0x39, 0x06, 0x0f, 0x14, 0x1d, - 0x25, 0x2c, 0x37, 0x3e, 0x01, 0x08, 0x13, 0x1a, - 0x6d, 0x64, 0x7f, 0x76, 0x49, 0x40, 0x5b, 0x52, - 0x3c, 0x35, 0x2e, 0x27, 0x18, 0x11, 0x0a, 0x03, - 0x74, 0x7d, 0x66, 0x6f, 0x50, 0x59, 0x42, 0x4b, - 0x17, 0x1e, 0x05, 0x0c, 0x33, 0x3a, 0x21, 0x28, - 0x5f, 0x56, 0x4d, 0x44, 0x7b, 0x72, 0x69, 0x60, - 0x0e, 0x07, 0x1c, 0x15, 0x2a, 0x23, 0x38, 0x31, - 0x46, 0x4f, 0x54, 0x5d, 0x62, 0x6b, 0x70, 0x79 +/* + * Table for CRC-7 (polynomial x^7 + x^3 + 1). + * This is a big-endian CRC (msbit is highest power of x), + * aligned so the msbit of the byte is the x^6 coefficient + * and the lsbit is not used. + */ +const u8 crc7_be_syndrome_table[256] = { + 0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, + 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee, + 0x32, 0x20, 0x16, 0x04, 0x7a, 0x68, 0x5e, 0x4c, + 0xa2, 0xb0, 0x86, 0x94, 0xea, 0xf8, 0xce, 0xdc, + 0x64, 0x76, 0x40, 0x52, 0x2c, 0x3e, 0x08, 0x1a, + 0xf4, 0xe6, 0xd0, 0xc2, 0xbc, 0xae, 0x98, 0x8a, + 0x56, 0x44, 0x72, 0x60, 0x1e, 0x0c, 0x3a, 0x28, + 0xc6, 0xd4, 0xe2, 0xf0, 0x8e, 0x9c, 0xaa, 0xb8, + 0xc8, 0xda, 0xec, 0xfe, 0x80, 0x92, 0xa4, 0xb6, + 0x58, 0x4a, 0x7c, 0x6e, 0x10, 0x02, 0x34, 0x26, + 0xfa, 0xe8, 0xde, 0xcc, 0xb2, 0xa0, 0x96, 0x84, + 0x6a, 0x78, 0x4e, 0x5c, 0x22, 0x30, 0x06, 0x14, + 0xac, 0xbe, 0x88, 0x9a, 0xe4, 0xf6, 0xc0, 0xd2, + 0x3c, 0x2e, 0x18, 0x0a, 0x74, 0x66, 0x50, 0x42, + 0x9e, 0x8c, 0xba, 0xa8, 0xd6, 0xc4, 0xf2, 0xe0, + 0x0e, 0x1c, 0x2a, 0x38, 0x46, 0x54, 0x62, 0x70, + 0x82, 0x90, 0xa6, 0xb4, 0xca, 0xd8, 0xee, 0xfc, + 0x12, 0x00, 0x36, 0x24, 0x5a, 0x48, 0x7e, 0x6c, + 0xb0, 0xa2, 0x94, 0x86, 0xf8, 0xea, 0xdc, 0xce, + 0x20, 0x32, 0x04, 0x16, 0x68, 0x7a, 0x4c, 0x5e, + 0xe6, 0xf4, 0xc2, 0xd0, 0xae, 0xbc, 0x8a, 0x98, + 0x76, 0x64, 0x52, 0x40, 0x3e, 0x2c, 0x1a, 0x08, + 0xd4, 0xc6, 0xf0, 0xe2, 0x9c, 0x8e, 0xb8, 0xaa, + 0x44, 0x56, 0x60, 0x72, 0x0c, 0x1e, 0x28, 0x3a, + 0x4a, 0x58, 0x6e, 0x7c, 0x02, 0x10, 0x26, 0x34, + 0xda, 0xc8, 0xfe, 0xec, 0x92, 0x80, 0xb6, 0xa4, + 0x78, 0x6a, 0x5c, 0x4e, 0x30, 0x22, 0x14, 0x06, + 0xe8, 0xfa, 0xcc, 0xde, 0xa0, 0xb2, 0x84, 0x96, + 0x2e, 0x3c, 0x0a, 0x18, 0x66, 0x74, 0x42, 0x50, + 0xbe, 0xac, 0x9a, 0x88, 0xf6, 0xe4, 0xd2, 0xc0, + 0x1c, 0x0e, 0x38, 0x2a, 0x54, 0x46, 0x70, 0x62, + 0x8c, 0x9e, 0xa8, 0xba, 0xc4, 0xd6, 0xe0, 0xf2 }; -EXPORT_SYMBOL(crc7_syndrome_table); +EXPORT_SYMBOL(crc7_be_syndrome_table); /** * crc7 - update the CRC7 for the data buffer @@ -55,14 +60,17 @@ EXPORT_SYMBOL(crc7_syndrome_table); * Context: any * * Returns the updated CRC7 value. + * The CRC7 is left-aligned in the byte (the lsbit is always 0), as that + * makes the computation easier, and all callers want it in that form. + * */ -u8 crc7(u8 crc, const u8 *buffer, size_t len) +u8 crc7_be(u8 crc, const u8 *buffer, size_t len) { while (len--) - crc = crc7_byte(crc, *buffer++); + crc = crc7_be_byte(crc, *buffer++); return crc; } -EXPORT_SYMBOL(crc7); +EXPORT_SYMBOL(crc7_be); MODULE_DESCRIPTION("CRC7 calculations"); MODULE_LICENSE("GPL"); diff --git a/lib/debugobjects.c b/lib/debugobjects.c index bf2c8b1043d..547f7f923db 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -7,6 +7,9 @@ * * For licencing details see kernel-base/COPYING */ + +#define pr_fmt(fmt) "ODEBUG: " fmt + #include <linux/debugobjects.h> #include <linux/interrupt.h> #include <linux/sched.h> @@ -196,7 +199,7 @@ static void free_object(struct debug_obj *obj) * initialized: */ if (obj_pool_free > ODEBUG_POOL_SIZE && obj_cache) - sched = keventd_up() && !work_pending(&debug_obj_work); + sched = keventd_up(); hlist_add_head(&obj->node, &obj_pool); obj_pool_free++; obj_pool_used--; @@ -218,7 +221,7 @@ static void debug_objects_oom(void) unsigned long flags; int i; - printk(KERN_WARNING "ODEBUG: Out of memory. ODEBUG disabled\n"); + pr_warn("Out of memory. ODEBUG disabled\n"); for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) { raw_spin_lock_irqsave(&db->lock, flags); @@ -292,11 +295,9 @@ static void debug_object_is_on_stack(void *addr, int onstack) limit++; if (is_on_stack) - printk(KERN_WARNING - "ODEBUG: object is on stack, but not annotated\n"); + pr_warn("object is on stack, but not annotated\n"); else - printk(KERN_WARNING - "ODEBUG: object is not on stack, but annotated\n"); + pr_warn("object is not on stack, but annotated\n"); WARN_ON(1); } @@ -985,7 +986,7 @@ static void __init debug_objects_selftest(void) if (check_results(&obj, ODEBUG_STATE_NONE, ++fixups, ++warnings)) goto out; #endif - printk(KERN_INFO "ODEBUG: selftest passed\n"); + pr_info("selftest passed\n"); out: debug_objects_fixups = oldfixups; @@ -1060,8 +1061,8 @@ static int __init debug_objects_replace_static_objects(void) } local_irq_enable(); - printk(KERN_DEBUG "ODEBUG: %d of %d active objects replaced\n", cnt, - obj_pool_used); + pr_debug("%d of %d active objects replaced\n", + cnt, obj_pool_used); return 0; free: hlist_for_each_entry_safe(obj, tmp, &objects, node) { @@ -1090,7 +1091,7 @@ void __init debug_objects_mem_init(void) debug_objects_enabled = 0; if (obj_cache) kmem_cache_destroy(obj_cache); - printk(KERN_WARNING "ODEBUG: out of memory.\n"); + pr_warn("out of memory.\n"); } else debug_objects_selftest(); } diff --git a/lib/decompress.c b/lib/decompress.c index 4d1cd0397aa..86069d74c06 100644 --- a/lib/decompress.c +++ b/lib/decompress.c @@ -16,6 +16,7 @@ #include <linux/types.h> #include <linux/string.h> #include <linux/init.h> +#include <linux/printk.h> #ifndef CONFIG_DECOMPRESS_GZIP # define gunzip NULL @@ -61,6 +62,8 @@ decompress_fn __init decompress_method(const unsigned char *inbuf, int len, if (len < 2) return NULL; /* Need at least this much... */ + pr_debug("Compressed data magic: %#.2x %#.2x\n", inbuf[0], inbuf[1]); + for (cf = compressed_formats; cf->name; cf++) { if (!memcmp(inbuf, cf->magic, 2)) break; diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c index d619b28c456..0edfd742a15 100644 --- a/lib/decompress_inflate.c +++ b/lib/decompress_inflate.c @@ -19,6 +19,7 @@ #include "zlib_inflate/inflate.h" #include "zlib_inflate/infutil.h" +#include <linux/decompress/inflate.h> #endif /* STATIC */ diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c index 3e67cfad16a..7d1e83caf8a 100644 --- a/lib/decompress_unlz4.c +++ b/lib/decompress_unlz4.c @@ -141,6 +141,7 @@ STATIC inline int INIT unlz4(u8 *input, int in_len, goto exit_2; } + ret = -1; if (flush && flush(outp, dest_len) != dest_len) goto exit_2; if (output) diff --git a/lib/devres.c b/lib/devres.c index 823533138fa..f562bf6ff71 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -81,11 +81,13 @@ EXPORT_SYMBOL(devm_ioremap_nocache); void devm_iounmap(struct device *dev, void __iomem *addr) { WARN_ON(devres_destroy(dev, devm_ioremap_release, devm_ioremap_match, - (void *)addr)); + (__force void *)addr)); iounmap(addr); } EXPORT_SYMBOL(devm_iounmap); +#define IOMEM_ERR_PTR(err) (__force void __iomem *)ERR_PTR(err) + /** * devm_ioremap_resource() - check, request region, and ioremap resource * @dev: generic device to handle the resource for @@ -114,7 +116,7 @@ void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res) if (!res || resource_type(res) != IORESOURCE_MEM) { dev_err(dev, "invalid resource\n"); - return ERR_PTR(-EINVAL); + return IOMEM_ERR_PTR(-EINVAL); } size = resource_size(res); @@ -122,7 +124,7 @@ void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res) if (!devm_request_mem_region(dev, res->start, size, name)) { dev_err(dev, "can't request region for resource %pR\n", res); - return ERR_PTR(-EBUSY); + return IOMEM_ERR_PTR(-EBUSY); } if (res->flags & IORESOURCE_CACHEABLE) @@ -133,7 +135,7 @@ void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res) if (!dest_ptr) { dev_err(dev, "ioremap failed for resource %pR\n", res); devm_release_mem_region(dev, res->start, size); - dest_ptr = ERR_PTR(-ENOMEM); + dest_ptr = IOMEM_ERR_PTR(-ENOMEM); } return dest_ptr; @@ -155,12 +157,12 @@ EXPORT_SYMBOL(devm_ioremap_resource); * if (!base) * return -EADDRNOTAVAIL; */ -void __iomem *devm_request_and_ioremap(struct device *device, +void __iomem *devm_request_and_ioremap(struct device *dev, struct resource *res) { void __iomem *dest_ptr; - dest_ptr = devm_ioremap_resource(device, res); + dest_ptr = devm_ioremap_resource(dev, res); if (IS_ERR(dest_ptr)) return NULL; @@ -168,7 +170,7 @@ void __iomem *devm_request_and_ioremap(struct device *device, } EXPORT_SYMBOL(devm_request_and_ioremap); -#ifdef CONFIG_HAS_IOPORT +#ifdef CONFIG_HAS_IOPORT_MAP /* * Generic iomap devres */ @@ -192,7 +194,7 @@ static int devm_ioport_map_match(struct device *dev, void *res, * Managed ioport_map(). Map is automatically unmapped on driver * detach. */ -void __iomem * devm_ioport_map(struct device *dev, unsigned long port, +void __iomem *devm_ioport_map(struct device *dev, unsigned long port, unsigned int nr) { void __iomem **ptr, *addr; @@ -224,10 +226,10 @@ void devm_ioport_unmap(struct device *dev, void __iomem *addr) { ioport_unmap(addr); WARN_ON(devres_destroy(dev, devm_ioport_map_release, - devm_ioport_map_match, (void *)addr)); + devm_ioport_map_match, (__force void *)addr)); } EXPORT_SYMBOL(devm_ioport_unmap); -#endif /* CONFIG_HAS_IOPORT */ +#endif /* CONFIG_HAS_IOPORT_MAP */ #ifdef CONFIG_PCI /* @@ -263,7 +265,7 @@ static void pcim_iomap_release(struct device *gendev, void *res) * be safely called without context and guaranteed to succed once * allocated. */ -void __iomem * const * pcim_iomap_table(struct pci_dev *pdev) +void __iomem * const *pcim_iomap_table(struct pci_dev *pdev) { struct pcim_iomap_devres *dr, *new_dr; @@ -288,7 +290,7 @@ EXPORT_SYMBOL(pcim_iomap_table); * Managed pci_iomap(). Map is automatically unmapped on driver * detach. */ -void __iomem * pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen) +void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen) { void __iomem **tbl; diff --git a/lib/digsig.c b/lib/digsig.c index 2f31e6a45f0..ae05ea393fc 100644 --- a/lib/digsig.c +++ b/lib/digsig.c @@ -175,10 +175,11 @@ err1: * digsig_verify() - digital signature verification with public key * @keyring: keyring to search key in * @sig: digital signature - * @sigen: length of the signature + * @siglen: length of the signature * @data: data * @datalen: length of the data - * @return: 0 on success, -EINVAL otherwise + * + * Returns 0 on success, -EINVAL otherwise * * Verifies data integrity against digital signature. * Currently only RSA is supported. @@ -209,7 +210,7 @@ int digsig_verify(struct key *keyring, const char *sig, int siglen, kref = keyring_search(make_key_ref(keyring, 1UL), &key_type_user, name); if (IS_ERR(kref)) - key = ERR_PTR(PTR_ERR(kref)); + key = ERR_CAST(kref); else key = key_ref_to_ptr(kref); } else { diff --git a/lib/dma-debug.c b/lib/dma-debug.c index d87a17a819d..98f2d7e91a9 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -53,11 +53,26 @@ enum map_err_types { #define DMA_DEBUG_STACKTRACE_ENTRIES 5 +/** + * struct dma_debug_entry - track a dma_map* or dma_alloc_coherent mapping + * @list: node on pre-allocated free_entries list + * @dev: 'dev' argument to dma_map_{page|single|sg} or dma_alloc_coherent + * @type: single, page, sg, coherent + * @pfn: page frame of the start address + * @offset: offset of mapping relative to pfn + * @size: length of the mapping + * @direction: enum dma_data_direction + * @sg_call_ents: 'nents' from dma_map_sg + * @sg_mapped_ents: 'mapped_ents' from dma_map_sg + * @map_err_type: track whether dma_mapping_error() was checked + * @stacktrace: support backtraces when a violation is detected + */ struct dma_debug_entry { struct list_head list; struct device *dev; int type; - phys_addr_t paddr; + unsigned long pfn; + size_t offset; u64 dev_addr; u64 size; int direction; @@ -372,6 +387,11 @@ static void hash_bucket_del(struct dma_debug_entry *entry) list_del(&entry->list); } +static unsigned long long phys_addr(struct dma_debug_entry *entry) +{ + return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset; +} + /* * Dump mapping entries for debugging purposes */ @@ -389,9 +409,9 @@ void debug_dma_dump_mappings(struct device *dev) list_for_each_entry(entry, &bucket->list, list) { if (!dev || dev == entry->dev) { dev_info(entry->dev, - "%s idx %d P=%Lx D=%Lx L=%Lx %s %s\n", + "%s idx %d P=%Lx N=%lx D=%Lx L=%Lx %s %s\n", type2name[entry->type], idx, - (unsigned long long)entry->paddr, + phys_addr(entry), entry->pfn, entry->dev_addr, entry->size, dir2name[entry->direction], maperr2str[entry->map_err_type]); @@ -404,6 +424,176 @@ void debug_dma_dump_mappings(struct device *dev) EXPORT_SYMBOL(debug_dma_dump_mappings); /* + * For each mapping (initial cacheline in the case of + * dma_alloc_coherent/dma_map_page, initial cacheline in each page of a + * scatterlist, or the cacheline specified in dma_map_single) insert + * into this tree using the cacheline as the key. At + * dma_unmap_{single|sg|page} or dma_free_coherent delete the entry. If + * the entry already exists at insertion time add a tag as a reference + * count for the overlapping mappings. For now, the overlap tracking + * just ensures that 'unmaps' balance 'maps' before marking the + * cacheline idle, but we should also be flagging overlaps as an API + * violation. + * + * Memory usage is mostly constrained by the maximum number of available + * dma-debug entries in that we need a free dma_debug_entry before + * inserting into the tree. In the case of dma_map_page and + * dma_alloc_coherent there is only one dma_debug_entry and one + * dma_active_cacheline entry to track per event. dma_map_sg(), on the + * other hand, consumes a single dma_debug_entry, but inserts 'nents' + * entries into the tree. + * + * At any time debug_dma_assert_idle() can be called to trigger a + * warning if any cachelines in the given page are in the active set. + */ +static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT); +static DEFINE_SPINLOCK(radix_lock); +#define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) +#define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT) +#define CACHELINES_PER_PAGE (1 << CACHELINE_PER_PAGE_SHIFT) + +static phys_addr_t to_cacheline_number(struct dma_debug_entry *entry) +{ + return (entry->pfn << CACHELINE_PER_PAGE_SHIFT) + + (entry->offset >> L1_CACHE_SHIFT); +} + +static int active_cacheline_read_overlap(phys_addr_t cln) +{ + int overlap = 0, i; + + for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) + if (radix_tree_tag_get(&dma_active_cacheline, cln, i)) + overlap |= 1 << i; + return overlap; +} + +static int active_cacheline_set_overlap(phys_addr_t cln, int overlap) +{ + int i; + + if (overlap > ACTIVE_CACHELINE_MAX_OVERLAP || overlap < 0) + return overlap; + + for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) + if (overlap & 1 << i) + radix_tree_tag_set(&dma_active_cacheline, cln, i); + else + radix_tree_tag_clear(&dma_active_cacheline, cln, i); + + return overlap; +} + +static void active_cacheline_inc_overlap(phys_addr_t cln) +{ + int overlap = active_cacheline_read_overlap(cln); + + overlap = active_cacheline_set_overlap(cln, ++overlap); + + /* If we overflowed the overlap counter then we're potentially + * leaking dma-mappings. Otherwise, if maps and unmaps are + * balanced then this overflow may cause false negatives in + * debug_dma_assert_idle() as the cacheline may be marked idle + * prematurely. + */ + WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP, + "DMA-API: exceeded %d overlapping mappings of cacheline %pa\n", + ACTIVE_CACHELINE_MAX_OVERLAP, &cln); +} + +static int active_cacheline_dec_overlap(phys_addr_t cln) +{ + int overlap = active_cacheline_read_overlap(cln); + + return active_cacheline_set_overlap(cln, --overlap); +} + +static int active_cacheline_insert(struct dma_debug_entry *entry) +{ + phys_addr_t cln = to_cacheline_number(entry); + unsigned long flags; + int rc; + + /* If the device is not writing memory then we don't have any + * concerns about the cpu consuming stale data. This mitigates + * legitimate usages of overlapping mappings. + */ + if (entry->direction == DMA_TO_DEVICE) + return 0; + + spin_lock_irqsave(&radix_lock, flags); + rc = radix_tree_insert(&dma_active_cacheline, cln, entry); + if (rc == -EEXIST) + active_cacheline_inc_overlap(cln); + spin_unlock_irqrestore(&radix_lock, flags); + + return rc; +} + +static void active_cacheline_remove(struct dma_debug_entry *entry) +{ + phys_addr_t cln = to_cacheline_number(entry); + unsigned long flags; + + /* ...mirror the insert case */ + if (entry->direction == DMA_TO_DEVICE) + return; + + spin_lock_irqsave(&radix_lock, flags); + /* since we are counting overlaps the final put of the + * cacheline will occur when the overlap count is 0. + * active_cacheline_dec_overlap() returns -1 in that case + */ + if (active_cacheline_dec_overlap(cln) < 0) + radix_tree_delete(&dma_active_cacheline, cln); + spin_unlock_irqrestore(&radix_lock, flags); +} + +/** + * debug_dma_assert_idle() - assert that a page is not undergoing dma + * @page: page to lookup in the dma_active_cacheline tree + * + * Place a call to this routine in cases where the cpu touching the page + * before the dma completes (page is dma_unmapped) will lead to data + * corruption. + */ +void debug_dma_assert_idle(struct page *page) +{ + static struct dma_debug_entry *ents[CACHELINES_PER_PAGE]; + struct dma_debug_entry *entry = NULL; + void **results = (void **) &ents; + unsigned int nents, i; + unsigned long flags; + phys_addr_t cln; + + if (!page) + return; + + cln = (phys_addr_t) page_to_pfn(page) << CACHELINE_PER_PAGE_SHIFT; + spin_lock_irqsave(&radix_lock, flags); + nents = radix_tree_gang_lookup(&dma_active_cacheline, results, cln, + CACHELINES_PER_PAGE); + for (i = 0; i < nents; i++) { + phys_addr_t ent_cln = to_cacheline_number(ents[i]); + + if (ent_cln == cln) { + entry = ents[i]; + break; + } else if (ent_cln >= cln + CACHELINES_PER_PAGE) + break; + } + spin_unlock_irqrestore(&radix_lock, flags); + + if (!entry) + return; + + cln = to_cacheline_number(entry); + err_printk(entry->dev, entry, + "DMA-API: cpu touching an active dma mapped cacheline [cln=%pa]\n", + &cln); +} + +/* * Wrapper function for adding an entry to the hash. * This function takes care of locking itself. */ @@ -411,10 +601,21 @@ static void add_dma_entry(struct dma_debug_entry *entry) { struct hash_bucket *bucket; unsigned long flags; + int rc; bucket = get_hash_bucket(entry, &flags); hash_bucket_add(bucket, entry); put_hash_bucket(bucket, &flags); + + rc = active_cacheline_insert(entry); + if (rc == -ENOMEM) { + pr_err("DMA-API: cacheline tracking ENOMEM, dma-debug disabled\n"); + global_disable = true; + } + + /* TODO: report -EEXIST errors here as overlapping mappings are + * not supported by the DMA API + */ } static struct dma_debug_entry *__dma_entry_alloc(void) @@ -469,6 +670,8 @@ static void dma_entry_free(struct dma_debug_entry *entry) { unsigned long flags; + active_cacheline_remove(entry); + /* * add to beginning of the list - this way the entries are * more likely cache hot when they are reallocated. @@ -895,15 +1098,15 @@ static void check_unmap(struct dma_debug_entry *ref) ref->dev_addr, ref->size, type2name[entry->type], type2name[ref->type]); } else if ((entry->type == dma_debug_coherent) && - (ref->paddr != entry->paddr)) { + (phys_addr(ref) != phys_addr(entry))) { err_printk(ref->dev, entry, "DMA-API: device driver frees " "DMA memory with different CPU address " "[device address=0x%016llx] [size=%llu bytes] " "[cpu alloc address=0x%016llx] " "[cpu free address=0x%016llx]", ref->dev_addr, ref->size, - (unsigned long long)entry->paddr, - (unsigned long long)ref->paddr); + phys_addr(entry), + phys_addr(ref)); } if (ref->sg_call_ents && ref->type == dma_debug_sg && @@ -1052,7 +1255,8 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, entry->dev = dev; entry->type = dma_debug_page; - entry->paddr = page_to_phys(page) + offset; + entry->pfn = page_to_pfn(page); + entry->offset = offset, entry->dev_addr = dma_addr; entry->size = size; entry->direction = direction; @@ -1148,7 +1352,8 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, entry->type = dma_debug_sg; entry->dev = dev; - entry->paddr = sg_phys(s); + entry->pfn = page_to_pfn(sg_page(s)); + entry->offset = s->offset, entry->size = sg_dma_len(s); entry->dev_addr = sg_dma_address(s); entry->direction = direction; @@ -1198,7 +1403,8 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, struct dma_debug_entry ref = { .type = dma_debug_sg, .dev = dev, - .paddr = sg_phys(s), + .pfn = page_to_pfn(sg_page(s)), + .offset = s->offset, .dev_addr = sg_dma_address(s), .size = sg_dma_len(s), .direction = dir, @@ -1233,7 +1439,8 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size, entry->type = dma_debug_coherent; entry->dev = dev; - entry->paddr = virt_to_phys(virt); + entry->pfn = page_to_pfn(virt_to_page(virt)); + entry->offset = (size_t) virt & PAGE_MASK; entry->size = size; entry->dev_addr = dma_addr; entry->direction = DMA_BIDIRECTIONAL; @@ -1248,7 +1455,8 @@ void debug_dma_free_coherent(struct device *dev, size_t size, struct dma_debug_entry ref = { .type = dma_debug_coherent, .dev = dev, - .paddr = virt_to_phys(virt), + .pfn = page_to_pfn(virt_to_page(virt)), + .offset = (size_t) virt & PAGE_MASK, .dev_addr = addr, .size = size, .direction = DMA_BIDIRECTIONAL, @@ -1356,7 +1564,8 @@ void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, struct dma_debug_entry ref = { .type = dma_debug_sg, .dev = dev, - .paddr = sg_phys(s), + .pfn = page_to_pfn(sg_page(s)), + .offset = s->offset, .dev_addr = sg_dma_address(s), .size = sg_dma_len(s), .direction = direction, @@ -1388,7 +1597,8 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, struct dma_debug_entry ref = { .type = dma_debug_sg, .dev = dev, - .paddr = sg_phys(s), + .pfn = page_to_pfn(sg_page(s)), + .offset = s->offset, .dev_addr = sg_dma_address(s), .size = sg_dma_len(s), .direction = direction, diff --git a/lib/dump_stack.c b/lib/dump_stack.c index f23b63f0a1c..6745c6230db 100644 --- a/lib/dump_stack.c +++ b/lib/dump_stack.c @@ -23,7 +23,7 @@ static void __dump_stack(void) #ifdef CONFIG_SMP static atomic_t dump_lock = ATOMIC_INIT(-1); -asmlinkage void dump_stack(void) +asmlinkage __visible void dump_stack(void) { int was_locked; int old; @@ -55,7 +55,7 @@ retry: preempt_enable(); } #else -asmlinkage void dump_stack(void) +asmlinkage __visible void dump_stack(void) { __dump_stack(); } diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index c37aeacd765..7288e38e175 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -8,6 +8,7 @@ * By Greg Banks <gnb@melbourne.sgi.com> * Copyright (c) 2008 Silicon Graphics Inc. All Rights Reserved. * Copyright (C) 2011 Bart Van Assche. All Rights Reserved. + * Copyright (C) 2013 Du, Changbin <changbin.du@gmail.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ @@ -24,6 +25,7 @@ #include <linux/sysctl.h> #include <linux/ctype.h> #include <linux/string.h> +#include <linux/parser.h> #include <linux/string_helpers.h> #include <linux/uaccess.h> #include <linux/dynamic_debug.h> @@ -147,7 +149,8 @@ static int ddebug_change(const struct ddebug_query *query, list_for_each_entry(dt, &ddebug_tables, link) { /* match against the module name */ - if (query->module && strcmp(query->module, dt->mod_name)) + if (query->module && + !match_wildcard(query->module, dt->mod_name)) continue; for (i = 0; i < dt->num_ddebugs; i++) { @@ -155,14 +158,16 @@ static int ddebug_change(const struct ddebug_query *query, /* match against the source filename */ if (query->filename && - strcmp(query->filename, dp->filename) && - strcmp(query->filename, kbasename(dp->filename)) && - strcmp(query->filename, trim_prefix(dp->filename))) + !match_wildcard(query->filename, dp->filename) && + !match_wildcard(query->filename, + kbasename(dp->filename)) && + !match_wildcard(query->filename, + trim_prefix(dp->filename))) continue; /* match against the function */ if (query->function && - strcmp(query->function, dp->function)) + !match_wildcard(query->function, dp->function)) continue; /* match against the format */ @@ -263,14 +268,12 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords) */ static inline int parse_lineno(const char *str, unsigned int *val) { - char *end = NULL; BUG_ON(str == NULL); if (*str == '\0') { *val = 0; return 0; } - *val = simple_strtoul(str, &end, 10); - if (end == NULL || end == str || *end != '\0') { + if (kstrtouint(str, 10, val) < 0) { pr_err("bad line-number: %s\n", str); return -EINVAL; } @@ -343,14 +346,14 @@ static int ddebug_parse_query(char *words[], int nwords, } if (last) *last++ = '\0'; - if (parse_lineno(first, &query->first_lineno) < 0) { - pr_err("line-number is <0\n"); + if (parse_lineno(first, &query->first_lineno) < 0) return -EINVAL; - } if (last) { /* range <first>-<last> */ - if (parse_lineno(last, &query->last_lineno) - < query->first_lineno) { + if (parse_lineno(last, &query->last_lineno) < 0) + return -EINVAL; + + if (query->last_lineno < query->first_lineno) { pr_err("last-line:%d < 1st-line:%d\n", query->last_lineno, query->first_lineno); diff --git a/lib/fdt_empty_tree.c b/lib/fdt_empty_tree.c new file mode 100644 index 00000000000..5d30c58150a --- /dev/null +++ b/lib/fdt_empty_tree.c @@ -0,0 +1,2 @@ +#include <linux/libfdt_env.h> +#include "../scripts/dtc/libfdt/fdt_empty_tree.c" diff --git a/lib/flex_array.c b/lib/flex_array.c index 6948a6692fc..2eed22fa507 100644 --- a/lib/flex_array.c +++ b/lib/flex_array.c @@ -90,8 +90,8 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total, { struct flex_array *ret; int elems_per_part = 0; - int reciprocal_elems = 0; int max_size = 0; + struct reciprocal_value reciprocal_elems = { 0 }; if (element_size) { elems_per_part = FLEX_ARRAY_ELEMENTS_PER_PART(element_size); @@ -119,6 +119,11 @@ EXPORT_SYMBOL(flex_array_alloc); static int fa_element_to_part_nr(struct flex_array *fa, unsigned int element_nr) { + /* + * if element_size == 0 we don't get here, so we never touch + * the zeroed fa->reciprocal_elems, which would yield invalid + * results + */ return reciprocal_divide(element_nr, fa->reciprocal_elems); } diff --git a/lib/genalloc.c b/lib/genalloc.c index 26cf20be72b..bdb9a456bcb 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -313,6 +313,35 @@ retry: EXPORT_SYMBOL(gen_pool_alloc); /** + * gen_pool_dma_alloc - allocate special memory from the pool for DMA usage + * @pool: pool to allocate from + * @size: number of bytes to allocate from the pool + * @dma: dma-view physical address return value. Use NULL if unneeded. + * + * Allocate the requested number of bytes from the specified pool. + * Uses the pool allocation function (with first-fit algorithm by default). + * Can not be used in NMI handler on architectures without + * NMI-safe cmpxchg implementation. + */ +void *gen_pool_dma_alloc(struct gen_pool *pool, size_t size, dma_addr_t *dma) +{ + unsigned long vaddr; + + if (!pool) + return NULL; + + vaddr = gen_pool_alloc(pool, size); + if (!vaddr) + return NULL; + + if (dma) + *dma = gen_pool_virt_to_phys(pool, vaddr); + + return (void *)vaddr; +} +EXPORT_SYMBOL(gen_pool_dma_alloc); + +/** * gen_pool_free - free allocated special memory back to the pool * @pool: pool to free to * @addr: starting address of memory to free back to pool diff --git a/lib/hash.c b/lib/hash.c new file mode 100644 index 00000000000..fea973f4bd5 --- /dev/null +++ b/lib/hash.c @@ -0,0 +1,39 @@ +/* General purpose hashing library + * + * That's a start of a kernel hashing library, which can be extended + * with further algorithms in future. arch_fast_hash{2,}() will + * eventually resolve to an architecture optimized implementation. + * + * Copyright 2013 Francesco Fusco <ffusco@redhat.com> + * Copyright 2013 Daniel Borkmann <dborkman@redhat.com> + * Copyright 2013 Thomas Graf <tgraf@redhat.com> + * Licensed under the GNU General Public License, version 2.0 (GPLv2) + */ + +#include <linux/jhash.h> +#include <linux/hash.h> +#include <linux/cache.h> + +static struct fast_hash_ops arch_hash_ops __read_mostly = { + .hash = jhash, + .hash2 = jhash2, +}; + +u32 arch_fast_hash(const void *data, u32 len, u32 seed) +{ + return arch_hash_ops.hash(data, len, seed); +} +EXPORT_SYMBOL_GPL(arch_fast_hash); + +u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed) +{ + return arch_hash_ops.hash2(data, len, seed); +} +EXPORT_SYMBOL_GPL(arch_fast_hash2); + +static int __init hashlib_init(void) +{ + setup_arch_fast_hash(&arch_hash_ops); + return 0; +} +early_initcall(hashlib_init); diff --git a/lib/hexdump.c b/lib/hexdump.c index 3f0494c9d57..8499c810909 100644 --- a/lib/hexdump.c +++ b/lib/hexdump.c @@ -14,6 +14,8 @@ const char hex_asc[] = "0123456789abcdef"; EXPORT_SYMBOL(hex_asc); +const char hex_asc_upper[] = "0123456789ABCDEF"; +EXPORT_SYMBOL(hex_asc_upper); /** * hex_to_bin - convert a hex digit to its real value diff --git a/lib/idr.c b/lib/idr.c index bfe4db4e165..39158abebad 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -18,12 +18,6 @@ * pointer or what ever, we treat it as a (void *). You can pass this * id to a user for him to pass back at a later time. You then pass * that id to this code and it returns your pointer. - - * You can release ids at any time. When all ids are released, most of - * the memory is returned (we keep MAX_IDR_FREE) in a local pool so we - * don't need to go to the memory "store" during an id allocate, just - * so you don't need to be too concerned about locking and conflicts - * with the slab allocator. */ #ifndef TEST // to test in user space... @@ -151,7 +145,7 @@ static void idr_layer_rcu_free(struct rcu_head *head) static inline void free_layer(struct idr *idr, struct idr_layer *p) { - if (idr->hint && idr->hint == p) + if (idr->hint == p) RCU_INIT_POINTER(idr->hint, NULL); call_rcu(&p->rcu_head, idr_layer_rcu_free); } @@ -196,7 +190,7 @@ static void idr_mark_full(struct idr_layer **pa, int id) } } -int __idr_pre_get(struct idr *idp, gfp_t gfp_mask) +static int __idr_pre_get(struct idr *idp, gfp_t gfp_mask) { while (idp->id_free_cnt < MAX_IDR_FREE) { struct idr_layer *new; @@ -207,7 +201,6 @@ int __idr_pre_get(struct idr *idp, gfp_t gfp_mask) } return 1; } -EXPORT_SYMBOL(__idr_pre_get); /** * sub_alloc - try to allocate an id without growing the tree depth @@ -250,7 +243,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa, id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1; /* if already at the top layer, we need to grow */ - if (id >= 1 << (idp->layers * IDR_BITS)) { + if (id > idr_max(idp->layers)) { *starting_id = id; return -EAGAIN; } @@ -374,20 +367,6 @@ static void idr_fill_slot(struct idr *idr, void *ptr, int id, idr_mark_full(pa, id); } -int __idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id) -{ - struct idr_layer *pa[MAX_IDR_LEVEL + 1]; - int rv; - - rv = idr_get_empty_slot(idp, starting_id, pa, 0, idp); - if (rv < 0) - return rv == -ENOMEM ? -EAGAIN : rv; - - idr_fill_slot(idp, ptr, rv, pa); - *id = rv; - return 0; -} -EXPORT_SYMBOL(__idr_get_new_above); /** * idr_preload - preload for idr_alloc() @@ -548,7 +527,7 @@ static void sub_remove(struct idr *idp, int shift, int id) n = id & IDR_MASK; if (likely(p != NULL && test_bit(n, p->bitmap))) { __clear_bit(n, p->bitmap); - rcu_assign_pointer(p->ary[n], NULL); + RCU_INIT_POINTER(p->ary[n], NULL); to_free = NULL; while(*paa && ! --((**paa)->count)){ if (to_free) @@ -577,6 +556,11 @@ void idr_remove(struct idr *idp, int id) if (id < 0) return; + if (id > idr_max(idp->layers)) { + idr_remove_warning(id); + return; + } + sub_remove(idp, (idp->layers - 1) * IDR_BITS, id); if (idp->top && idp->top->count == 1 && (idp->layers > 1) && idp->top->ary[0]) { @@ -594,20 +578,10 @@ void idr_remove(struct idr *idp, int id) bitmap_clear(to_free->bitmap, 0, IDR_SIZE); free_layer(idp, to_free); } - while (idp->id_free_cnt >= MAX_IDR_FREE) { - p = get_from_free_list(idp); - /* - * Note: we don't call the rcu callback here, since the only - * layers that fall into the freelist are those that have been - * preallocated. - */ - kmem_cache_free(idr_layer_cache, p); - } - return; } EXPORT_SYMBOL(idr_remove); -void __idr_remove_all(struct idr *idp) +static void __idr_remove_all(struct idr *idp) { int n, id, max; int bt_mask; @@ -617,7 +591,7 @@ void __idr_remove_all(struct idr *idp) n = idp->layers * IDR_BITS; p = idp->top; - rcu_assign_pointer(idp->top, NULL); + RCU_INIT_POINTER(idp->top, NULL); max = idr_max(idp->layers); id = 0; @@ -640,7 +614,6 @@ void __idr_remove_all(struct idr *idp) } idp->layers = 0; } -EXPORT_SYMBOL(__idr_remove_all); /** * idr_destroy - release all cached layers within an idr tree @@ -825,14 +798,12 @@ void *idr_replace(struct idr *idp, void *ptr, int id) p = idp->top; if (!p) - return ERR_PTR(-EINVAL); - - n = (p->layer+1) * IDR_BITS; + return ERR_PTR(-ENOENT); - if (id >= (1 << n)) - return ERR_PTR(-EINVAL); + if (id > idr_max(p->layer + 1)) + return ERR_PTR(-ENOENT); - n -= IDR_BITS; + n = p->layer * IDR_BITS; while ((n > 0) && p) { p = p->ary[(id >> n) & IDR_MASK]; n -= IDR_BITS; @@ -869,6 +840,16 @@ void idr_init(struct idr *idp) } EXPORT_SYMBOL(idr_init); +static int idr_has_entry(int id, void *p, void *data) +{ + return 1; +} + +bool idr_is_empty(struct idr *idp) +{ + return !idr_for_each(idp, idr_has_entry, NULL); +} +EXPORT_SYMBOL(idr_is_empty); /** * DOC: IDA description @@ -1033,6 +1014,9 @@ void ida_remove(struct ida *ida, int id) int n; struct ida_bitmap *bitmap; + if (idr_id > idr_max(ida->idr.layers)) + goto err; + /* clear full bits while looking up the leaf idr_layer */ while ((shift > 0) && p) { n = (idr_id >> shift) & IDR_MASK; @@ -1048,7 +1032,7 @@ void ida_remove(struct ida *ida, int id) __clear_bit(n, p->bitmap); bitmap = (void *)p->ary[n]; - if (!test_bit(offset, bitmap->bitmap)) + if (!bitmap || !test_bit(offset, bitmap->bitmap)) goto err; /* update bitmap and remove it if empty */ diff --git a/lib/interval_tree.c b/lib/interval_tree.c index e6eb406f2d6..f367f9ad544 100644 --- a/lib/interval_tree.c +++ b/lib/interval_tree.c @@ -1,6 +1,7 @@ #include <linux/init.h> #include <linux/interval_tree.h> #include <linux/interval_tree_generic.h> +#include <linux/module.h> #define START(node) ((node)->start) #define LAST(node) ((node)->last) @@ -8,3 +9,8 @@ INTERVAL_TREE_DEFINE(struct interval_tree_node, rb, unsigned long, __subtree_last, START, LAST,, interval_tree) + +EXPORT_SYMBOL_GPL(interval_tree_insert); +EXPORT_SYMBOL_GPL(interval_tree_remove); +EXPORT_SYMBOL_GPL(interval_tree_iter_first); +EXPORT_SYMBOL_GPL(interval_tree_iter_next); diff --git a/lib/interval_tree_test_main.c b/lib/interval_tree_test.c index 245900b98c8..245900b98c8 100644 --- a/lib/interval_tree_test_main.c +++ b/lib/interval_tree_test.c diff --git a/lib/iomap.c b/lib/iomap.c index 2c08f36862e..fc3dcb4b238 100644 --- a/lib/iomap.c +++ b/lib/iomap.c @@ -224,7 +224,7 @@ EXPORT_SYMBOL(iowrite8_rep); EXPORT_SYMBOL(iowrite16_rep); EXPORT_SYMBOL(iowrite32_rep); -#ifdef CONFIG_HAS_IOPORT +#ifdef CONFIG_HAS_IOPORT_MAP /* Create a virtual mapping cookie for an IO port range */ void __iomem *ioport_map(unsigned long port, unsigned int nr) { @@ -239,7 +239,7 @@ void ioport_unmap(void __iomem *addr) } EXPORT_SYMBOL(ioport_map); EXPORT_SYMBOL(ioport_unmap); -#endif /* CONFIG_HAS_IOPORT */ +#endif /* CONFIG_HAS_IOPORT_MAP */ #ifdef CONFIG_PCI /* Hide the details if this is a MMIO or PIO address space and just do what diff --git a/lib/iovec.c b/lib/iovec.c index 454baa88bf2..7a7c2da4cdd 100644 --- a/lib/iovec.c +++ b/lib/iovec.c @@ -51,3 +51,58 @@ int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len) return 0; } EXPORT_SYMBOL(memcpy_toiovec); + +/* + * Copy kernel to iovec. Returns -EFAULT on error. + */ + +int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata, + int offset, int len) +{ + int copy; + for (; len > 0; ++iov) { + /* Skip over the finished iovecs */ + if (unlikely(offset >= iov->iov_len)) { + offset -= iov->iov_len; + continue; + } + copy = min_t(unsigned int, iov->iov_len - offset, len); + if (copy_to_user(iov->iov_base + offset, kdata, copy)) + return -EFAULT; + offset = 0; + kdata += copy; + len -= copy; + } + + return 0; +} +EXPORT_SYMBOL(memcpy_toiovecend); + +/* + * Copy iovec to kernel. Returns -EFAULT on error. + */ + +int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov, + int offset, int len) +{ + /* Skip over the finished iovecs */ + while (offset >= iov->iov_len) { + offset -= iov->iov_len; + iov++; + } + + while (len > 0) { + u8 __user *base = iov->iov_base + offset; + int copy = min_t(unsigned int, len, iov->iov_len - offset); + + offset = 0; + if (copy_from_user(kdata, base, copy)) + return -EFAULT; + len -= copy; + kdata += copy; + iov++; + } + + return 0; +} +EXPORT_SYMBOL(memcpy_fromiovecend); diff --git a/lib/kfifo.c b/lib/kfifo.c index 7b7f83027b7..d79b9d22206 100644 --- a/lib/kfifo.c +++ b/lib/kfifo.c @@ -215,7 +215,7 @@ static unsigned long kfifo_copy_from_user(struct __kfifo *fifo, * incrementing the fifo->in index counter */ smp_wmb(); - *copied = len - ret; + *copied = len - ret * esize; /* return the number of elements which are not copied */ return ret; } @@ -275,7 +275,7 @@ static unsigned long kfifo_copy_to_user(struct __kfifo *fifo, void __user *to, * incrementing the fifo->out index counter */ smp_wmb(); - *copied = len - ret; + *copied = len - ret * esize; /* return the number of elements which are not copied */ return ret; } diff --git a/lib/kobject.c b/lib/kobject.c index 96217513470..58751bb80a7 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -17,6 +17,25 @@ #include <linux/export.h> #include <linux/stat.h> #include <linux/slab.h> +#include <linux/random.h> + +/** + * kobject_namespace - return @kobj's namespace tag + * @kobj: kobject in question + * + * Returns namespace tag of @kobj if its parent has namespace ops enabled + * and thus @kobj should have a namespace tag associated with it. Returns + * %NULL otherwise. + */ +const void *kobject_namespace(struct kobject *kobj) +{ + const struct kobj_ns_type_operations *ns_ops = kobj_ns_ops(kobj); + + if (!ns_ops || ns_ops->type == KOBJ_NS_TYPE_NONE) + return NULL; + + return kobj->ktype->namespace(kobj); +} /* * populate_dir - populate directory with attributes. @@ -46,14 +65,39 @@ static int populate_dir(struct kobject *kobj) static int create_dir(struct kobject *kobj) { - int error = 0; - error = sysfs_create_dir(kobj); - if (!error) { - error = populate_dir(kobj); - if (error) - sysfs_remove_dir(kobj); + const struct kobj_ns_type_operations *ops; + int error; + + error = sysfs_create_dir_ns(kobj, kobject_namespace(kobj)); + if (error) + return error; + + error = populate_dir(kobj); + if (error) { + sysfs_remove_dir(kobj); + return error; } - return error; + + /* + * @kobj->sd may be deleted by an ancestor going away. Hold an + * extra reference so that it stays until @kobj is gone. + */ + sysfs_get(kobj->sd); + + /* + * If @kobj has ns_ops, its children need to be filtered based on + * their namespace tags. Enable namespace support on @kobj->sd. + */ + ops = kobj_child_ns_ops(kobj); + if (ops) { + BUG_ON(ops->type <= KOBJ_NS_TYPE_NONE); + BUG_ON(ops->type >= KOBJ_NS_TYPES); + BUG_ON(!kobj_ns_type_registered(ops->type)); + + sysfs_enable_ns(kobj->sd); + } + + return 0; } static int get_kobj_path_length(struct kobject *kobj) @@ -220,8 +264,10 @@ int kobject_set_name_vargs(struct kobject *kobj, const char *fmt, return 0; kobj->name = kvasprintf(GFP_KERNEL, fmt, vargs); - if (!kobj->name) + if (!kobj->name) { + kobj->name = old_name; return -ENOMEM; + } /* ewww... some of these buggers have '/' in the name ... */ while ((s = strchr(kobj->name, '/'))) @@ -319,7 +365,7 @@ static int kobject_add_varg(struct kobject *kobj, struct kobject *parent, * * If @parent is set, then the parent of the @kobj will be set to it. * If @parent is NULL, then the parent of the @kobj will be set to the - * kobject associted with the kset assigned to this kobject. If no kset + * kobject associated with the kset assigned to this kobject. If no kset * is assigned to the kobject, then the kobject will be located in the * root of the sysfs tree. * @@ -428,7 +474,7 @@ int kobject_rename(struct kobject *kobj, const char *new_name) goto out; } - error = sysfs_rename_dir(kobj, new_name); + error = sysfs_rename_dir_ns(kobj, new_name, kobject_namespace(kobj)); if (error) goto out; @@ -472,6 +518,7 @@ int kobject_move(struct kobject *kobj, struct kobject *new_parent) if (kobj->kset) new_parent = kobject_get(&kobj->kset->kobj); } + /* old object path */ devpath = kobject_get_path(kobj, GFP_KERNEL); if (!devpath) { @@ -486,7 +533,7 @@ int kobject_move(struct kobject *kobj, struct kobject *new_parent) sprintf(devpath_string, "DEVPATH_OLD=%s", devpath); envp[0] = devpath_string; envp[1] = NULL; - error = sysfs_move_dir(kobj, new_parent); + error = sysfs_move_dir_ns(kobj, new_parent, kobject_namespace(kobj)); if (error) goto out; old_parent = kobj->parent; @@ -508,10 +555,15 @@ out: */ void kobject_del(struct kobject *kobj) { + struct kernfs_node *sd; + if (!kobj) return; + sd = kobj->sd; sysfs_remove_dir(kobj); + sysfs_put(sd); + kobj->state_in_sysfs = 0; kobj_kset_leave(kobj); kobject_put(kobj->parent); @@ -592,10 +644,12 @@ static void kobject_release(struct kref *kref) { struct kobject *kobj = container_of(kref, struct kobject, kref); #ifdef CONFIG_DEBUG_KOBJECT_RELEASE - pr_debug("kobject: '%s' (%p): %s, parent %p (delayed)\n", - kobject_name(kobj), kobj, __func__, kobj->parent); + unsigned long delay = HZ + HZ * (get_random_int() & 0x3); + pr_info("kobject: '%s' (%p): %s, parent %p (delayed %ld)\n", + kobject_name(kobj), kobj, __func__, kobj->parent, delay); INIT_DELAYED_WORK(&kobj->release, kobject_delayed_cleanup); - schedule_delayed_work(&kobj->release, HZ); + + schedule_delayed_work(&kobj->release, delay); #else kobject_cleanup(kobj); #endif @@ -725,6 +779,7 @@ const struct sysfs_ops kobj_sysfs_ops = { .show = kobj_attr_show, .store = kobj_attr_store, }; +EXPORT_SYMBOL_GPL(kobj_sysfs_ops); /** * kset_register - initialize and add a kset. @@ -753,6 +808,7 @@ void kset_unregister(struct kset *k) { if (!k) return; + kobject_del(&k->kobj); kobject_put(&k->kobj); } @@ -933,10 +989,7 @@ const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj) bool kobj_ns_current_may_mount(enum kobj_ns_type type) { - bool may_mount = false; - - if (type == KOBJ_NS_TYPE_NONE) - return true; + bool may_mount = true; spin_lock(&kobj_ns_type_lock); if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 52e5abbc41d..9ebf9e20de5 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -29,7 +29,9 @@ u64 uevent_seqnum; +#ifdef CONFIG_UEVENT_HELPER char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH; +#endif #ifdef CONFIG_NET struct uevent_sock { struct list_head list; @@ -88,11 +90,17 @@ out: #ifdef CONFIG_NET static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data) { - struct kobject *kobj = data; + struct kobject *kobj = data, *ksobj; const struct kobj_ns_type_operations *ops; ops = kobj_ns_ops(kobj); - if (ops) { + if (!ops && kobj->kset) { + ksobj = &kobj->kset->kobj; + if (ksobj->parent != NULL) + ops = kobj_ns_ops(ksobj->parent); + } + + if (ops && ops->netlink_ns && kobj->ktype->namespace) { const void *sock_ns, *ns; ns = kobj->ktype->namespace(kobj); sock_ns = ops->netlink_ns(dsk); @@ -103,6 +111,7 @@ static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data) } #endif +#ifdef CONFIG_UEVENT_HELPER static int kobj_usermode_filter(struct kobject *kobj) { const struct kobj_ns_type_operations *ops; @@ -118,6 +127,31 @@ static int kobj_usermode_filter(struct kobject *kobj) return 0; } +static int init_uevent_argv(struct kobj_uevent_env *env, const char *subsystem) +{ + int len; + + len = strlcpy(&env->buf[env->buflen], subsystem, + sizeof(env->buf) - env->buflen); + if (len >= (sizeof(env->buf) - env->buflen)) { + WARN(1, KERN_ERR "init_uevent_argv: buffer size too small\n"); + return -ENOMEM; + } + + env->argv[0] = uevent_helper; + env->argv[1] = &env->buf[env->buflen]; + env->argv[2] = NULL; + + env->buflen += len + 1; + return 0; +} + +static void cleanup_uevent_env(struct subprocess_info *info) +{ + kfree(info->data); +} +#endif + /** * kobject_uevent_env - send an uevent with environmental data * @@ -293,13 +327,11 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, #endif mutex_unlock(&uevent_sock_mutex); +#ifdef CONFIG_UEVENT_HELPER /* call uevent_helper, usually only enabled during early boot */ if (uevent_helper[0] && !kobj_usermode_filter(kobj)) { - char *argv [3]; + struct subprocess_info *info; - argv [0] = uevent_helper; - argv [1] = (char *)subsystem; - argv [2] = NULL; retval = add_uevent_var(env, "HOME=/"); if (retval) goto exit; @@ -307,10 +339,20 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, "PATH=/sbin:/bin:/usr/sbin:/usr/bin"); if (retval) goto exit; + retval = init_uevent_argv(env, subsystem); + if (retval) + goto exit; - retval = call_usermodehelper(argv[0], argv, - env->envp, UMH_WAIT_EXEC); + retval = -ENOMEM; + info = call_usermodehelper_setup(env->argv[0], env->argv, + env->envp, GFP_KERNEL, + NULL, cleanup_uevent_env, env); + if (info) { + retval = call_usermodehelper_exec(info, UMH_NO_WAIT); + env = NULL; /* freed by cleanup_uevent_env */ + } } +#endif exit: kfree(devpath); diff --git a/lib/kstrtox.c b/lib/kstrtox.c index f78ae0c0c4e..ec8da78df9b 100644 --- a/lib/kstrtox.c +++ b/lib/kstrtox.c @@ -92,7 +92,6 @@ static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res) rv = _parse_integer(s, base, &_res); if (rv & KSTRTOX_OVERFLOW) return -ERANGE; - rv &= ~KSTRTOX_OVERFLOW; if (rv == 0) return -EINVAL; s += rv; diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c index 244f5480c89..b3131f5cf8a 100644 --- a/lib/libcrc32c.c +++ b/lib/libcrc32c.c @@ -62,10 +62,7 @@ EXPORT_SYMBOL(crc32c); static int __init libcrc32c_mod_init(void) { tfm = crypto_alloc_shash("crc32c", 0, 0); - if (IS_ERR(tfm)) - return PTR_ERR(tfm); - - return 0; + return PTR_ERR_OR_ZERO(tfm); } static void __exit libcrc32c_mod_fini(void) diff --git a/lib/llist.c b/lib/llist.c index 4a70d120138..f76196d0740 100644 --- a/lib/llist.c +++ b/lib/llist.c @@ -81,3 +81,25 @@ struct llist_node *llist_del_first(struct llist_head *head) return entry; } EXPORT_SYMBOL_GPL(llist_del_first); + +/** + * llist_reverse_order - reverse order of a llist chain + * @head: first item of the list to be reversed + * + * Reverse the order of a chain of llist entries and return the + * new first entry. + */ +struct llist_node *llist_reverse_order(struct llist_node *head) +{ + struct llist_node *new_head = NULL; + + while (head) { + struct llist_node *tmp = head; + head = head->next; + tmp->next = new_head; + new_head = tmp; + } + + return new_head; +} +EXPORT_SYMBOL_GPL(llist_reverse_order); diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c index 6dc09d8f4c2..872a15a2a63 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -1002,7 +1002,7 @@ static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask) * Some tests (e.g. double-unlock) might corrupt the preemption * count, so restore it: */ - preempt_count() = saved_preempt_count; + preempt_count_set(saved_preempt_count); #ifdef CONFIG_TRACE_IRQFLAGS if (softirq_count()) current->softirqs_enabled = 0; diff --git a/lib/lockref.c b/lib/lockref.c index e2cd2c0a882..f07a40d3387 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -1,7 +1,16 @@ #include <linux/export.h> #include <linux/lockref.h> +#include <linux/mutex.h> -#ifdef CONFIG_CMPXCHG_LOCKREF +#if USE_CMPXCHG_LOCKREF + +/* + * Allow weakly-ordered memory architectures to provide barrier-less + * cmpxchg semantics for lockref updates. + */ +#ifndef cmpxchg64_relaxed +# define cmpxchg64_relaxed cmpxchg64 +#endif /* * Note that the "cmpxchg()" reloads the "old" value for the @@ -14,12 +23,13 @@ while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) { \ struct lockref new = old, prev = old; \ CODE \ - old.lock_count = cmpxchg(&lockref->lock_count, \ - old.lock_count, new.lock_count); \ + old.lock_count = cmpxchg64_relaxed(&lockref->lock_count, \ + old.lock_count, \ + new.lock_count); \ if (likely(old.lock_count == prev.lock_count)) { \ SUCCESS; \ } \ - cpu_relax(); \ + arch_mutex_cpu_relax(); \ } \ } while (0) @@ -136,6 +146,7 @@ void lockref_mark_dead(struct lockref *lockref) assert_spin_locked(&lockref->lock); lockref->count = -128; } +EXPORT_SYMBOL(lockref_mark_dead); /** * lockref_get_not_dead - Increments count unless the ref is dead diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index df6839e3ce0..7a85967060a 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -72,6 +72,8 @@ static int lz4_uncompress(const char *source, char *dest, int osize) len = *ip++; for (; len == 255; length += 255) len = *ip++; + if (unlikely(length > (size_t)(length + len))) + goto _output_error; length += len; } @@ -106,6 +108,8 @@ static int lz4_uncompress(const char *source, char *dest, int osize) if (length == ML_MASK) { for (; *ip == 255; length += 255) ip++; + if (unlikely(length > (size_t)(length + *ip))) + goto _output_error; length += *ip++; } @@ -155,7 +159,7 @@ static int lz4_uncompress(const char *source, char *dest, int osize) /* write overflow error detected */ _output_error: - return (int) (-(((char *)ip) - source)); + return -1; } static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, @@ -188,6 +192,8 @@ static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, int s = 255; while ((ip < iend) && (s == 255)) { s = *ip++; + if (unlikely(length > (size_t)(length + s))) + goto _output_error; length += s; } } @@ -228,6 +234,8 @@ static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, if (length == ML_MASK) { while (ip < iend) { int s = *ip++; + if (unlikely(length > (size_t)(length + s))) + goto _output_error; length += s; if (s == 255) continue; @@ -280,7 +288,7 @@ static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, /* write overflow error detected */ _output_error: - return (int) (-(((char *) ip) - source)); + return -1; } int lz4_decompress(const unsigned char *src, size_t *src_len, diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c index 569985d522d..8563081e8da 100644 --- a/lib/lzo/lzo1x_decompress_safe.c +++ b/lib/lzo/lzo1x_decompress_safe.c @@ -19,11 +19,31 @@ #include <linux/lzo.h> #include "lzodefs.h" -#define HAVE_IP(x) ((size_t)(ip_end - ip) >= (size_t)(x)) -#define HAVE_OP(x) ((size_t)(op_end - op) >= (size_t)(x)) -#define NEED_IP(x) if (!HAVE_IP(x)) goto input_overrun -#define NEED_OP(x) if (!HAVE_OP(x)) goto output_overrun -#define TEST_LB(m_pos) if ((m_pos) < out) goto lookbehind_overrun +#define HAVE_IP(t, x) \ + (((size_t)(ip_end - ip) >= (size_t)(t + x)) && \ + (((t + x) >= t) && ((t + x) >= x))) + +#define HAVE_OP(t, x) \ + (((size_t)(op_end - op) >= (size_t)(t + x)) && \ + (((t + x) >= t) && ((t + x) >= x))) + +#define NEED_IP(t, x) \ + do { \ + if (!HAVE_IP(t, x)) \ + goto input_overrun; \ + } while (0) + +#define NEED_OP(t, x) \ + do { \ + if (!HAVE_OP(t, x)) \ + goto output_overrun; \ + } while (0) + +#define TEST_LB(m_pos) \ + do { \ + if ((m_pos) < out) \ + goto lookbehind_overrun; \ + } while (0) int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, unsigned char *out, size_t *out_len) @@ -58,14 +78,14 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, while (unlikely(*ip == 0)) { t += 255; ip++; - NEED_IP(1); + NEED_IP(1, 0); } t += 15 + *ip++; } t += 3; copy_literal_run: #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) - if (likely(HAVE_IP(t + 15) && HAVE_OP(t + 15))) { + if (likely(HAVE_IP(t, 15) && HAVE_OP(t, 15))) { const unsigned char *ie = ip + t; unsigned char *oe = op + t; do { @@ -81,8 +101,8 @@ copy_literal_run: } else #endif { - NEED_OP(t); - NEED_IP(t + 3); + NEED_OP(t, 0); + NEED_IP(t, 3); do { *op++ = *ip++; } while (--t > 0); @@ -95,7 +115,7 @@ copy_literal_run: m_pos -= t >> 2; m_pos -= *ip++ << 2; TEST_LB(m_pos); - NEED_OP(2); + NEED_OP(2, 0); op[0] = m_pos[0]; op[1] = m_pos[1]; op += 2; @@ -119,10 +139,10 @@ copy_literal_run: while (unlikely(*ip == 0)) { t += 255; ip++; - NEED_IP(1); + NEED_IP(1, 0); } t += 31 + *ip++; - NEED_IP(2); + NEED_IP(2, 0); } m_pos = op - 1; next = get_unaligned_le16(ip); @@ -137,10 +157,10 @@ copy_literal_run: while (unlikely(*ip == 0)) { t += 255; ip++; - NEED_IP(1); + NEED_IP(1, 0); } t += 7 + *ip++; - NEED_IP(2); + NEED_IP(2, 0); } next = get_unaligned_le16(ip); ip += 2; @@ -154,7 +174,7 @@ copy_literal_run: #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) if (op - m_pos >= 8) { unsigned char *oe = op + t; - if (likely(HAVE_OP(t + 15))) { + if (likely(HAVE_OP(t, 15))) { do { COPY8(op, m_pos); op += 8; @@ -164,7 +184,7 @@ copy_literal_run: m_pos += 8; } while (op < oe); op = oe; - if (HAVE_IP(6)) { + if (HAVE_IP(6, 0)) { state = next; COPY4(op, ip); op += next; @@ -172,7 +192,7 @@ copy_literal_run: continue; } } else { - NEED_OP(t); + NEED_OP(t, 0); do { *op++ = *m_pos++; } while (op < oe); @@ -181,7 +201,7 @@ copy_literal_run: #endif { unsigned char *oe = op + t; - NEED_OP(t); + NEED_OP(t, 0); op[0] = m_pos[0]; op[1] = m_pos[1]; op += 2; @@ -194,15 +214,15 @@ match_next: state = next; t = next; #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) - if (likely(HAVE_IP(6) && HAVE_OP(4))) { + if (likely(HAVE_IP(6, 0) && HAVE_OP(4, 0))) { COPY4(op, ip); op += t; ip += t; } else #endif { - NEED_IP(t + 3); - NEED_OP(t); + NEED_IP(t, 3); + NEED_OP(t, 0); while (t > 0) { *op++ = *ip++; t--; diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c index 657979f71be..bf076d281d4 100644 --- a/lib/mpi/mpiutil.c +++ b/lib/mpi/mpiutil.c @@ -121,3 +121,6 @@ void mpi_free(MPI a) kfree(a); } EXPORT_SYMBOL_GPL(mpi_free); + +MODULE_DESCRIPTION("Multiprecision maths library"); +MODULE_LICENSE("GPL"); diff --git a/lib/nlattr.c b/lib/nlattr.c index 18eca7809b0..9c3e85ff0a6 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -136,6 +136,7 @@ int nla_validate(const struct nlattr *head, int len, int maxtype, errout: return err; } +EXPORT_SYMBOL(nla_validate); /** * nla_policy_len - Determin the max. length of a policy @@ -162,6 +163,7 @@ nla_policy_len(const struct nla_policy *p, int n) return len; } +EXPORT_SYMBOL(nla_policy_len); /** * nla_parse - Parse a stream of attributes into a tb buffer @@ -201,13 +203,14 @@ int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, } if (unlikely(rem > 0)) - printk(KERN_WARNING "netlink: %d bytes leftover after parsing " - "attributes.\n", rem); + pr_warn_ratelimited("netlink: %d bytes leftover after parsing attributes in process `%s'.\n", + rem, current->comm); err = 0; errout: return err; } +EXPORT_SYMBOL(nla_parse); /** * nla_find - Find a specific attribute in a stream of attributes @@ -228,6 +231,7 @@ struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype) return NULL; } +EXPORT_SYMBOL(nla_find); /** * nla_strlcpy - Copy string attribute payload into a sized buffer @@ -258,6 +262,7 @@ size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize) return srclen; } +EXPORT_SYMBOL(nla_strlcpy); /** * nla_memcpy - Copy a netlink attribute into another memory area @@ -278,6 +283,7 @@ int nla_memcpy(void *dest, const struct nlattr *src, int count) return minlen; } +EXPORT_SYMBOL(nla_memcpy); /** * nla_memcmp - Compare an attribute with sized memory area @@ -295,6 +301,7 @@ int nla_memcmp(const struct nlattr *nla, const void *data, return d; } +EXPORT_SYMBOL(nla_memcmp); /** * nla_strcmp - Compare a string attribute against a string @@ -303,14 +310,21 @@ int nla_memcmp(const struct nlattr *nla, const void *data, */ int nla_strcmp(const struct nlattr *nla, const char *str) { - int len = strlen(str) + 1; - int d = nla_len(nla) - len; + int len = strlen(str); + char *buf = nla_data(nla); + int attrlen = nla_len(nla); + int d; + if (attrlen > 0 && buf[attrlen - 1] == '\0') + attrlen--; + + d = attrlen - len; if (d == 0) d = memcmp(nla_data(nla), str, len); return d; } +EXPORT_SYMBOL(nla_strcmp); #ifdef CONFIG_NET /** @@ -496,12 +510,3 @@ int nla_append(struct sk_buff *skb, int attrlen, const void *data) } EXPORT_SYMBOL(nla_append); #endif - -EXPORT_SYMBOL(nla_validate); -EXPORT_SYMBOL(nla_policy_len); -EXPORT_SYMBOL(nla_parse); -EXPORT_SYMBOL(nla_find); -EXPORT_SYMBOL(nla_strlcpy); -EXPORT_SYMBOL(nla_memcpy); -EXPORT_SYMBOL(nla_memcmp); -EXPORT_SYMBOL(nla_strcmp); diff --git a/lib/parser.c b/lib/parser.c index 807b2aaa33f..b6d11631231 100644 --- a/lib/parser.c +++ b/lib/parser.c @@ -113,6 +113,7 @@ int match_token(char *s, const match_table_t table, substring_t args[]) return p->token; } +EXPORT_SYMBOL(match_token); /** * match_number: scan a number in the given base from a substring_t @@ -163,6 +164,7 @@ int match_int(substring_t *s, int *result) { return match_number(s, result, 0); } +EXPORT_SYMBOL(match_int); /** * match_octal: - scan an octal representation of an integer from a substring_t @@ -177,6 +179,7 @@ int match_octal(substring_t *s, int *result) { return match_number(s, result, 8); } +EXPORT_SYMBOL(match_octal); /** * match_hex: - scan a hex representation of an integer from a substring_t @@ -191,6 +194,58 @@ int match_hex(substring_t *s, int *result) { return match_number(s, result, 16); } +EXPORT_SYMBOL(match_hex); + +/** + * match_wildcard: - parse if a string matches given wildcard pattern + * @pattern: wildcard pattern + * @str: the string to be parsed + * + * Description: Parse the string @str to check if matches wildcard + * pattern @pattern. The pattern may contain two type wildcardes: + * '*' - matches zero or more characters + * '?' - matches one character + * If it's matched, return true, else return false. + */ +bool match_wildcard(const char *pattern, const char *str) +{ + const char *s = str; + const char *p = pattern; + bool star = false; + + while (*s) { + switch (*p) { + case '?': + s++; + p++; + break; + case '*': + star = true; + str = s; + if (!*++p) + return true; + pattern = p; + break; + default: + if (*s == *p) { + s++; + p++; + } else { + if (!star) + return false; + str++; + s = str; + p = pattern; + } + break; + } + } + + if (*p == '*') + ++p; + return !*p; +} +EXPORT_SYMBOL(match_wildcard); /** * match_strlcpy: - Copy the characters from a substring_t to a sized buffer @@ -213,6 +268,7 @@ size_t match_strlcpy(char *dest, const substring_t *src, size_t size) } return ret; } +EXPORT_SYMBOL(match_strlcpy); /** * match_strdup: - allocate a new string with the contents of a substring_t @@ -230,10 +286,4 @@ char *match_strdup(const substring_t *s) match_strlcpy(p, s, sz); return p; } - -EXPORT_SYMBOL(match_token); -EXPORT_SYMBOL(match_int); -EXPORT_SYMBOL(match_octal); -EXPORT_SYMBOL(match_hex); -EXPORT_SYMBOL(match_strlcpy); EXPORT_SYMBOL(match_strdup); diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index 7deeb6297a4..963b7034a51 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -53,6 +53,7 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release) ref->release = release; return 0; } +EXPORT_SYMBOL_GPL(percpu_ref_init); /** * percpu_ref_cancel_init - cancel percpu_ref_init() @@ -84,6 +85,7 @@ void percpu_ref_cancel_init(struct percpu_ref *ref) free_percpu(ref->pcpu_count); } } +EXPORT_SYMBOL_GPL(percpu_ref_cancel_init); static void percpu_ref_kill_rcu(struct rcu_head *rcu) { @@ -118,6 +120,9 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu) atomic_add((int) count - PCPU_COUNT_BIAS, &ref->count); + WARN_ONCE(atomic_read(&ref->count) <= 0, "percpu ref <= 0 (%i)", + atomic_read(&ref->count)); + /* @ref is viewed as dead on all CPUs, send out kill confirmation */ if (ref->confirm_kill) ref->confirm_kill(ref); @@ -156,3 +161,4 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref, call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu); } +EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm); diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c deleted file mode 100644 index 652a8ee8efe..00000000000 --- a/lib/percpu-rwsem.c +++ /dev/null @@ -1,165 +0,0 @@ -#include <linux/atomic.h> -#include <linux/rwsem.h> -#include <linux/percpu.h> -#include <linux/wait.h> -#include <linux/lockdep.h> -#include <linux/percpu-rwsem.h> -#include <linux/rcupdate.h> -#include <linux/sched.h> -#include <linux/errno.h> - -int __percpu_init_rwsem(struct percpu_rw_semaphore *brw, - const char *name, struct lock_class_key *rwsem_key) -{ - brw->fast_read_ctr = alloc_percpu(int); - if (unlikely(!brw->fast_read_ctr)) - return -ENOMEM; - - /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */ - __init_rwsem(&brw->rw_sem, name, rwsem_key); - atomic_set(&brw->write_ctr, 0); - atomic_set(&brw->slow_read_ctr, 0); - init_waitqueue_head(&brw->write_waitq); - return 0; -} - -void percpu_free_rwsem(struct percpu_rw_semaphore *brw) -{ - free_percpu(brw->fast_read_ctr); - brw->fast_read_ctr = NULL; /* catch use after free bugs */ -} - -/* - * This is the fast-path for down_read/up_read, it only needs to ensure - * there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the - * fast per-cpu counter. The writer uses synchronize_sched_expedited() to - * serialize with the preempt-disabled section below. - * - * The nontrivial part is that we should guarantee acquire/release semantics - * in case when - * - * R_W: down_write() comes after up_read(), the writer should see all - * changes done by the reader - * or - * W_R: down_read() comes after up_write(), the reader should see all - * changes done by the writer - * - * If this helper fails the callers rely on the normal rw_semaphore and - * atomic_dec_and_test(), so in this case we have the necessary barriers. - * - * But if it succeeds we do not have any barriers, atomic_read(write_ctr) or - * __this_cpu_add() below can be reordered with any LOAD/STORE done by the - * reader inside the critical section. See the comments in down_write and - * up_write below. - */ -static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val) -{ - bool success = false; - - preempt_disable(); - if (likely(!atomic_read(&brw->write_ctr))) { - __this_cpu_add(*brw->fast_read_ctr, val); - success = true; - } - preempt_enable(); - - return success; -} - -/* - * Like the normal down_read() this is not recursive, the writer can - * come after the first percpu_down_read() and create the deadlock. - * - * Note: returns with lock_is_held(brw->rw_sem) == T for lockdep, - * percpu_up_read() does rwsem_release(). This pairs with the usage - * of ->rw_sem in percpu_down/up_write(). - */ -void percpu_down_read(struct percpu_rw_semaphore *brw) -{ - might_sleep(); - if (likely(update_fast_ctr(brw, +1))) { - rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_); - return; - } - - down_read(&brw->rw_sem); - atomic_inc(&brw->slow_read_ctr); - /* avoid up_read()->rwsem_release() */ - __up_read(&brw->rw_sem); -} - -void percpu_up_read(struct percpu_rw_semaphore *brw) -{ - rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_); - - if (likely(update_fast_ctr(brw, -1))) - return; - - /* false-positive is possible but harmless */ - if (atomic_dec_and_test(&brw->slow_read_ctr)) - wake_up_all(&brw->write_waitq); -} - -static int clear_fast_ctr(struct percpu_rw_semaphore *brw) -{ - unsigned int sum = 0; - int cpu; - - for_each_possible_cpu(cpu) { - sum += per_cpu(*brw->fast_read_ctr, cpu); - per_cpu(*brw->fast_read_ctr, cpu) = 0; - } - - return sum; -} - -/* - * A writer increments ->write_ctr to force the readers to switch to the - * slow mode, note the atomic_read() check in update_fast_ctr(). - * - * After that the readers can only inc/dec the slow ->slow_read_ctr counter, - * ->fast_read_ctr is stable. Once the writer moves its sum into the slow - * counter it represents the number of active readers. - * - * Finally the writer takes ->rw_sem for writing and blocks the new readers, - * then waits until the slow counter becomes zero. - */ -void percpu_down_write(struct percpu_rw_semaphore *brw) -{ - /* tell update_fast_ctr() there is a pending writer */ - atomic_inc(&brw->write_ctr); - /* - * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read - * so that update_fast_ctr() can't succeed. - * - * 2. Ensures we see the result of every previous this_cpu_add() in - * update_fast_ctr(). - * - * 3. Ensures that if any reader has exited its critical section via - * fast-path, it executes a full memory barrier before we return. - * See R_W case in the comment above update_fast_ctr(). - */ - synchronize_sched_expedited(); - - /* exclude other writers, and block the new readers completely */ - down_write(&brw->rw_sem); - - /* nobody can use fast_read_ctr, move its sum into slow_read_ctr */ - atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr); - - /* wait for all readers to complete their percpu_up_read() */ - wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr)); -} - -void percpu_up_write(struct percpu_rw_semaphore *brw) -{ - /* release the lock, but the readers can't use the fast-path */ - up_write(&brw->rw_sem); - /* - * Insert the barrier before the next fast-path in down_read, - * see W_R case in the comment above update_fast_ctr(). - */ - synchronize_sched_expedited(); - /* the last writer unblocks update_fast_ctr() */ - atomic_dec(&brw->write_ctr); -} diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 93c5d5ecff4..7dd33577b90 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -60,14 +60,15 @@ static inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc) void percpu_counter_set(struct percpu_counter *fbc, s64 amount) { int cpu; + unsigned long flags; - raw_spin_lock(&fbc->lock); + raw_spin_lock_irqsave(&fbc->lock, flags); for_each_possible_cpu(cpu) { s32 *pcount = per_cpu_ptr(fbc->counters, cpu); *pcount = 0; } fbc->count = amount; - raw_spin_unlock(&fbc->lock); + raw_spin_unlock_irqrestore(&fbc->lock, flags); } EXPORT_SYMBOL(percpu_counter_set); @@ -78,12 +79,13 @@ void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch) preempt_disable(); count = __this_cpu_read(*fbc->counters) + amount; if (count >= batch || count <= -batch) { - raw_spin_lock(&fbc->lock); + unsigned long flags; + raw_spin_lock_irqsave(&fbc->lock, flags); fbc->count += count; - raw_spin_unlock(&fbc->lock); - __this_cpu_write(*fbc->counters, 0); + __this_cpu_sub(*fbc->counters, count - amount); + raw_spin_unlock_irqrestore(&fbc->lock, flags); } else { - __this_cpu_write(*fbc->counters, count); + this_cpu_add(*fbc->counters, amount); } preempt_enable(); } @@ -97,14 +99,15 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc) { s64 ret; int cpu; + unsigned long flags; - raw_spin_lock(&fbc->lock); + raw_spin_lock_irqsave(&fbc->lock, flags); ret = fbc->count; for_each_online_cpu(cpu) { s32 *pcount = per_cpu_ptr(fbc->counters, cpu); ret += *pcount; } - raw_spin_unlock(&fbc->lock); + raw_spin_unlock_irqrestore(&fbc->lock, flags); return ret; } EXPORT_SYMBOL(__percpu_counter_sum); @@ -166,7 +169,7 @@ static int percpu_counter_hotcpu_callback(struct notifier_block *nb, struct percpu_counter *fbc; compute_batch_value(); - if (action != CPU_DEAD) + if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) return NOTIFY_OK; cpu = (unsigned long)hcpu; diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c index bab1ba2a4c7..93d145e5539 100644 --- a/lib/percpu_ida.c +++ b/lib/percpu_ida.c @@ -30,15 +30,6 @@ #include <linux/spinlock.h> #include <linux/percpu_ida.h> -/* - * Number of tags we move between the percpu freelist and the global freelist at - * a time - */ -#define IDA_PCPU_BATCH_MOVE 32U - -/* Max size of percpu freelist, */ -#define IDA_PCPU_SIZE ((IDA_PCPU_BATCH_MOVE * 3) / 2) - struct percpu_ida_cpu { /* * Even though this is percpu, we need a lock for tag stealing by remote @@ -63,9 +54,7 @@ static inline void move_tags(unsigned *dst, unsigned *dst_nr, /* * Try to steal tags from a remote cpu's percpu freelist. * - * We first check how many percpu freelists have tags - we don't steal tags - * unless enough percpu freelists have tags on them that it's possible more than - * half the total tags could be stuck on remote percpu freelists. + * We first check how many percpu freelists have tags * * Then we iterate through the cpus until we find some tags - we don't attempt * to find the "best" cpu to steal from, to keep cacheline bouncing to a @@ -78,8 +67,7 @@ static inline void steal_tags(struct percpu_ida *pool, struct percpu_ida_cpu *remote; for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags); - cpus_have_tags * IDA_PCPU_SIZE > pool->nr_tags / 2; - cpus_have_tags--) { + cpus_have_tags; cpus_have_tags--) { cpu = cpumask_next(cpu, &pool->cpus_have_tags); if (cpu >= nr_cpu_ids) { @@ -123,11 +111,10 @@ static inline void alloc_global_tags(struct percpu_ida *pool, { move_tags(tags->freelist, &tags->nr_free, pool->freelist, &pool->nr_free, - min(pool->nr_free, IDA_PCPU_BATCH_MOVE)); + min(pool->nr_free, pool->percpu_batch_size)); } -static inline unsigned alloc_local_tag(struct percpu_ida *pool, - struct percpu_ida_cpu *tags) +static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags) { int tag = -ENOSPC; @@ -142,22 +129,22 @@ static inline unsigned alloc_local_tag(struct percpu_ida *pool, /** * percpu_ida_alloc - allocate a tag * @pool: pool to allocate from - * @gfp: gfp flags + * @state: task state for prepare_to_wait * * Returns a tag - an integer in the range [0..nr_tags) (passed to * tag_pool_init()), or otherwise -ENOSPC on allocation failure. * * Safe to be called from interrupt context (assuming it isn't passed - * __GFP_WAIT, of course). + * TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, of course). * * @gfp indicates whether or not to wait until a free id is available (it's not * used for internal memory allocations); thus if passed __GFP_WAIT we may sleep * however long it takes until another thread frees an id (same semantics as a * mempool). * - * Will not fail if passed __GFP_WAIT. + * Will not fail if passed TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE. */ -int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp) +int percpu_ida_alloc(struct percpu_ida *pool, int state) { DEFINE_WAIT(wait); struct percpu_ida_cpu *tags; @@ -168,7 +155,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp) tags = this_cpu_ptr(pool->tag_cpu); /* Fastpath */ - tag = alloc_local_tag(pool, tags); + tag = alloc_local_tag(tags); if (likely(tag >= 0)) { local_irq_restore(flags); return tag; @@ -184,7 +171,8 @@ int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp) * * global lock held and irqs disabled, don't need percpu lock */ - prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); + if (state != TASK_RUNNING) + prepare_to_wait(&pool->wait, &wait, state); if (!tags->nr_free) alloc_global_tags(pool, tags); @@ -201,16 +189,22 @@ int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp) spin_unlock(&pool->lock); local_irq_restore(flags); - if (tag >= 0 || !(gfp & __GFP_WAIT)) + if (tag >= 0 || state == TASK_RUNNING) break; + if (signal_pending_state(state, current)) { + tag = -ERESTARTSYS; + break; + } + schedule(); local_irq_save(flags); tags = this_cpu_ptr(pool->tag_cpu); } + if (state != TASK_RUNNING) + finish_wait(&pool->wait, &wait); - finish_wait(&pool->wait, &wait); return tag; } EXPORT_SYMBOL_GPL(percpu_ida_alloc); @@ -245,17 +239,17 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag) wake_up(&pool->wait); } - if (nr_free == IDA_PCPU_SIZE) { + if (nr_free == pool->percpu_max_size) { spin_lock(&pool->lock); /* * Global lock held and irqs disabled, don't need percpu * lock */ - if (tags->nr_free == IDA_PCPU_SIZE) { + if (tags->nr_free == pool->percpu_max_size) { move_tags(pool->freelist, &pool->nr_free, tags->freelist, &tags->nr_free, - IDA_PCPU_BATCH_MOVE); + pool->percpu_batch_size); wake_up(&pool->wait); } @@ -292,7 +286,8 @@ EXPORT_SYMBOL_GPL(percpu_ida_destroy); * Allocation is percpu, but sharding is limited by nr_tags - for best * performance, the workload should not span more cpus than nr_tags / 128. */ -int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags) +int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags, + unsigned long max_size, unsigned long batch_size) { unsigned i, cpu, order; @@ -301,6 +296,8 @@ int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags) init_waitqueue_head(&pool->wait); spin_lock_init(&pool->lock); pool->nr_tags = nr_tags; + pool->percpu_max_size = max_size; + pool->percpu_batch_size = batch_size; /* Guard against overflow */ if (nr_tags > (unsigned) INT_MAX + 1) { @@ -319,7 +316,7 @@ int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags) pool->nr_free = nr_tags; pool->tag_cpu = __alloc_percpu(sizeof(struct percpu_ida_cpu) + - IDA_PCPU_SIZE * sizeof(unsigned), + pool->percpu_max_size * sizeof(unsigned), sizeof(unsigned)); if (!pool->tag_cpu) goto err; @@ -332,4 +329,65 @@ err: percpu_ida_destroy(pool); return -ENOMEM; } -EXPORT_SYMBOL_GPL(percpu_ida_init); +EXPORT_SYMBOL_GPL(__percpu_ida_init); + +/** + * percpu_ida_for_each_free - iterate free ids of a pool + * @pool: pool to iterate + * @fn: interate callback function + * @data: parameter for @fn + * + * Note, this doesn't guarantee to iterate all free ids restrictly. Some free + * ids might be missed, some might be iterated duplicated, and some might + * be iterated and not free soon. + */ +int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn, + void *data) +{ + unsigned long flags; + struct percpu_ida_cpu *remote; + unsigned cpu, i, err = 0; + + local_irq_save(flags); + for_each_possible_cpu(cpu) { + remote = per_cpu_ptr(pool->tag_cpu, cpu); + spin_lock(&remote->lock); + for (i = 0; i < remote->nr_free; i++) { + err = fn(remote->freelist[i], data); + if (err) + break; + } + spin_unlock(&remote->lock); + if (err) + goto out; + } + + spin_lock(&pool->lock); + for (i = 0; i < pool->nr_free; i++) { + err = fn(pool->freelist[i], data); + if (err) + break; + } + spin_unlock(&pool->lock); +out: + local_irq_restore(flags); + return err; +} +EXPORT_SYMBOL_GPL(percpu_ida_for_each_free); + +/** + * percpu_ida_free_tags - return free tags number of a specific cpu or global pool + * @pool: pool related + * @cpu: specific cpu or global pool if @cpu == nr_cpu_ids + * + * Note: this just returns a snapshot of free tags number. + */ +unsigned percpu_ida_free_tags(struct percpu_ida *pool, int cpu) +{ + struct percpu_ida_cpu *remote; + if (cpu == nr_cpu_ids) + return pool->nr_free; + remote = per_cpu_ptr(pool->tag_cpu, cpu); + return remote->nr_free; +} +EXPORT_SYMBOL_GPL(percpu_ida_free_tags); diff --git a/lib/percpu_test.c b/lib/percpu_test.c new file mode 100644 index 00000000000..0b5d14dadd1 --- /dev/null +++ b/lib/percpu_test.c @@ -0,0 +1,138 @@ +#include <linux/module.h> + +/* validate @native and @pcp counter values match @expected */ +#define CHECK(native, pcp, expected) \ + do { \ + WARN((native) != (expected), \ + "raw %ld (0x%lx) != expected %lld (0x%llx)", \ + (native), (native), \ + (long long)(expected), (long long)(expected)); \ + WARN(__this_cpu_read(pcp) != (expected), \ + "pcp %ld (0x%lx) != expected %lld (0x%llx)", \ + __this_cpu_read(pcp), __this_cpu_read(pcp), \ + (long long)(expected), (long long)(expected)); \ + } while (0) + +static DEFINE_PER_CPU(long, long_counter); +static DEFINE_PER_CPU(unsigned long, ulong_counter); + +static int __init percpu_test_init(void) +{ + /* + * volatile prevents compiler from optimizing it uses, otherwise the + * +ul_one/-ul_one below would replace with inc/dec instructions. + */ + volatile unsigned int ui_one = 1; + long l = 0; + unsigned long ul = 0; + + pr_info("percpu test start\n"); + + preempt_disable(); + + l += -1; + __this_cpu_add(long_counter, -1); + CHECK(l, long_counter, -1); + + l += 1; + __this_cpu_add(long_counter, 1); + CHECK(l, long_counter, 0); + + ul = 0; + __this_cpu_write(ulong_counter, 0); + + ul += 1UL; + __this_cpu_add(ulong_counter, 1UL); + CHECK(ul, ulong_counter, 1); + + ul += -1UL; + __this_cpu_add(ulong_counter, -1UL); + CHECK(ul, ulong_counter, 0); + + ul += -(unsigned long)1; + __this_cpu_add(ulong_counter, -(unsigned long)1); + CHECK(ul, ulong_counter, -1); + + ul = 0; + __this_cpu_write(ulong_counter, 0); + + ul -= 1; + __this_cpu_dec(ulong_counter); + CHECK(ul, ulong_counter, -1); + CHECK(ul, ulong_counter, ULONG_MAX); + + l += -ui_one; + __this_cpu_add(long_counter, -ui_one); + CHECK(l, long_counter, 0xffffffff); + + l += ui_one; + __this_cpu_add(long_counter, ui_one); + CHECK(l, long_counter, (long)0x100000000LL); + + + l = 0; + __this_cpu_write(long_counter, 0); + + l -= ui_one; + __this_cpu_sub(long_counter, ui_one); + CHECK(l, long_counter, -1); + + l = 0; + __this_cpu_write(long_counter, 0); + + l += ui_one; + __this_cpu_add(long_counter, ui_one); + CHECK(l, long_counter, 1); + + l += -ui_one; + __this_cpu_add(long_counter, -ui_one); + CHECK(l, long_counter, (long)0x100000000LL); + + l = 0; + __this_cpu_write(long_counter, 0); + + l -= ui_one; + this_cpu_sub(long_counter, ui_one); + CHECK(l, long_counter, -1); + CHECK(l, long_counter, ULONG_MAX); + + ul = 0; + __this_cpu_write(ulong_counter, 0); + + ul += ui_one; + __this_cpu_add(ulong_counter, ui_one); + CHECK(ul, ulong_counter, 1); + + ul = 0; + __this_cpu_write(ulong_counter, 0); + + ul -= ui_one; + __this_cpu_sub(ulong_counter, ui_one); + CHECK(ul, ulong_counter, -1); + CHECK(ul, ulong_counter, ULONG_MAX); + + ul = 3; + __this_cpu_write(ulong_counter, 3); + + ul = this_cpu_sub_return(ulong_counter, ui_one); + CHECK(ul, ulong_counter, 2); + + ul = __this_cpu_sub_return(ulong_counter, ui_one); + CHECK(ul, ulong_counter, 1); + + preempt_enable(); + + pr_info("percpu test done\n"); + return -EAGAIN; /* Fail will directly unload the module */ +} + +static void __exit percpu_test_exit(void) +{ +} + +module_init(percpu_test_init) +module_exit(percpu_test_exit) + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Greg Thelen"); +MODULE_DESCRIPTION("percpu operations test"); diff --git a/lib/plist.c b/lib/plist.c index 1ebc95f7a46..d408e774b74 100644 --- a/lib/plist.c +++ b/lib/plist.c @@ -134,6 +134,46 @@ void plist_del(struct plist_node *node, struct plist_head *head) plist_check_head(head); } +/** + * plist_requeue - Requeue @node at end of same-prio entries. + * + * This is essentially an optimized plist_del() followed by + * plist_add(). It moves an entry already in the plist to + * after any other same-priority entries. + * + * @node: &struct plist_node pointer - entry to be moved + * @head: &struct plist_head pointer - list head + */ +void plist_requeue(struct plist_node *node, struct plist_head *head) +{ + struct plist_node *iter; + struct list_head *node_next = &head->node_list; + + plist_check_head(head); + BUG_ON(plist_head_empty(head)); + BUG_ON(plist_node_empty(node)); + + if (node == plist_last(head)) + return; + + iter = plist_next(node); + + if (node->prio != iter->prio) + return; + + plist_del(node, head); + + plist_for_each_continue(iter, head) { + if (node->prio != iter->prio) { + node_next = &iter->node_list; + break; + } + } + list_add_tail(&node->node_list, node_next); + + plist_check_head(head); +} + #ifdef CONFIG_DEBUG_PI_LIST #include <linux/sched.h> #include <linux/module.h> @@ -170,12 +210,20 @@ static void __init plist_test_check(int nr_expect) BUG_ON(prio_pos->prio_list.next != &first->prio_list); } +static void __init plist_test_requeue(struct plist_node *node) +{ + plist_requeue(node, &test_head); + + if (node != plist_last(&test_head)) + BUG_ON(node->prio == plist_next(node)->prio); +} + static int __init plist_test(void) { int nr_expect = 0, i, loop; unsigned int r = local_clock(); - pr_debug("start plist test\n"); + printk(KERN_DEBUG "start plist test\n"); plist_head_init(&test_head); for (i = 0; i < ARRAY_SIZE(test_node); i++) plist_node_init(test_node + i, 0); @@ -193,6 +241,10 @@ static int __init plist_test(void) nr_expect--; } plist_test_check(nr_expect); + if (!plist_node_empty(test_node + i)) { + plist_test_requeue(test_node + i); + plist_test_check(nr_expect); + } } for (i = 0; i < ARRAY_SIZE(test_node); i++) { @@ -203,7 +255,7 @@ static int __init plist_test(void) plist_test_check(nr_expect); } - pr_debug("end plist test\n"); + printk(KERN_DEBUG "end plist test\n"); return 0; } diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 7811ed3b4e7..3291a8e3749 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -27,6 +27,7 @@ #include <linux/radix-tree.h> #include <linux/percpu.h> #include <linux/slab.h> +#include <linux/kmemleak.h> #include <linux/notifier.h> #include <linux/cpu.h> #include <linux/string.h> @@ -35,33 +36,6 @@ #include <linux/hardirq.h> /* in_interrupt() */ -#ifdef __KERNEL__ -#define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) -#else -#define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */ -#endif - -#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) -#define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) - -#define RADIX_TREE_TAG_LONGS \ - ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG) - -struct radix_tree_node { - unsigned int height; /* Height from the bottom */ - unsigned int count; - union { - struct radix_tree_node *parent; /* Used when ascending tree */ - struct rcu_head rcu_head; /* Used when freeing node */ - }; - void __rcu *slots[RADIX_TREE_MAP_SIZE]; - unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; -}; - -#define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) -#define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ - RADIX_TREE_MAP_SHIFT)) - /* * The height_to_maxindex array needs to be one deeper than the maximum * path as height 0 holds only 1 entry. @@ -221,12 +195,17 @@ radix_tree_node_alloc(struct radix_tree_root *root) * succeed in getting a node here (and never reach * kmem_cache_alloc) */ - rtp = &__get_cpu_var(radix_tree_preloads); + rtp = this_cpu_ptr(&radix_tree_preloads); if (rtp->nr) { ret = rtp->nodes[rtp->nr - 1]; rtp->nodes[rtp->nr - 1] = NULL; rtp->nr--; } + /* + * Update the allocation stack trace as this is more useful + * for debugging. + */ + kmemleak_update_trace(ret); } if (ret == NULL) ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); @@ -277,14 +256,14 @@ static int __radix_tree_preload(gfp_t gfp_mask) int ret = -ENOMEM; preempt_disable(); - rtp = &__get_cpu_var(radix_tree_preloads); + rtp = this_cpu_ptr(&radix_tree_preloads); while (rtp->nr < ARRAY_SIZE(rtp->nodes)) { preempt_enable(); node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); if (node == NULL) goto out; preempt_disable(); - rtp = &__get_cpu_var(radix_tree_preloads); + rtp = this_cpu_ptr(&radix_tree_preloads); if (rtp->nr < ARRAY_SIZE(rtp->nodes)) rtp->nodes[rtp->nr++] = node; else @@ -369,7 +348,8 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) /* Increase the height. */ newheight = root->height+1; - node->height = newheight; + BUG_ON(newheight & ~RADIX_TREE_HEIGHT_MASK); + node->path = newheight; node->count = 1; node->parent = NULL; slot = root->rnode; @@ -387,23 +367,28 @@ out: } /** - * radix_tree_insert - insert into a radix tree + * __radix_tree_create - create a slot in a radix tree * @root: radix tree root * @index: index key - * @item: item to insert + * @nodep: returns node + * @slotp: returns slot * - * Insert an item into the radix tree at position @index. + * Create, if necessary, and return the node and slot for an item + * at position @index in the radix tree @root. + * + * Until there is more than one item in the tree, no nodes are + * allocated and @root->rnode is used as a direct slot instead of + * pointing to a node, in which case *@nodep will be NULL. + * + * Returns -ENOMEM, or 0 for success. */ -int radix_tree_insert(struct radix_tree_root *root, - unsigned long index, void *item) +int __radix_tree_create(struct radix_tree_root *root, unsigned long index, + struct radix_tree_node **nodep, void ***slotp) { struct radix_tree_node *node = NULL, *slot; - unsigned int height, shift; - int offset; + unsigned int height, shift, offset; int error; - BUG_ON(radix_tree_is_indirect_ptr(item)); - /* Make sure the tree is high enough. */ if (index > radix_tree_maxindex(root->height)) { error = radix_tree_extend(root, index); @@ -422,11 +407,12 @@ int radix_tree_insert(struct radix_tree_root *root, /* Have to add a child node. */ if (!(slot = radix_tree_node_alloc(root))) return -ENOMEM; - slot->height = height; + slot->path = height; slot->parent = node; if (node) { rcu_assign_pointer(node->slots[offset], slot); node->count++; + slot->path |= offset << RADIX_TREE_HEIGHT_SHIFT; } else rcu_assign_pointer(root->rnode, ptr_to_indirect(slot)); } @@ -439,16 +425,42 @@ int radix_tree_insert(struct radix_tree_root *root, height--; } - if (slot != NULL) + if (nodep) + *nodep = node; + if (slotp) + *slotp = node ? node->slots + offset : (void **)&root->rnode; + return 0; +} + +/** + * radix_tree_insert - insert into a radix tree + * @root: radix tree root + * @index: index key + * @item: item to insert + * + * Insert an item into the radix tree at position @index. + */ +int radix_tree_insert(struct radix_tree_root *root, + unsigned long index, void *item) +{ + struct radix_tree_node *node; + void **slot; + int error; + + BUG_ON(radix_tree_is_indirect_ptr(item)); + + error = __radix_tree_create(root, index, &node, &slot); + if (error) + return error; + if (*slot != NULL) return -EEXIST; + rcu_assign_pointer(*slot, item); if (node) { node->count++; - rcu_assign_pointer(node->slots[offset], item); - BUG_ON(tag_get(node, 0, offset)); - BUG_ON(tag_get(node, 1, offset)); + BUG_ON(tag_get(node, 0, index & RADIX_TREE_MAP_MASK)); + BUG_ON(tag_get(node, 1, index & RADIX_TREE_MAP_MASK)); } else { - rcu_assign_pointer(root->rnode, item); BUG_ON(root_tag_get(root, 0)); BUG_ON(root_tag_get(root, 1)); } @@ -457,15 +469,26 @@ int radix_tree_insert(struct radix_tree_root *root, } EXPORT_SYMBOL(radix_tree_insert); -/* - * is_slot == 1 : search for the slot. - * is_slot == 0 : search for the node. +/** + * __radix_tree_lookup - lookup an item in a radix tree + * @root: radix tree root + * @index: index key + * @nodep: returns node + * @slotp: returns slot + * + * Lookup and return the item at position @index in the radix + * tree @root. + * + * Until there is more than one item in the tree, no nodes are + * allocated and @root->rnode is used as a direct slot instead of + * pointing to a node, in which case *@nodep will be NULL. */ -static void *radix_tree_lookup_element(struct radix_tree_root *root, - unsigned long index, int is_slot) +void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index, + struct radix_tree_node **nodep, void ***slotp) { + struct radix_tree_node *node, *parent; unsigned int height, shift; - struct radix_tree_node *node, **slot; + void **slot; node = rcu_dereference_raw(root->rnode); if (node == NULL) @@ -474,19 +497,24 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root, if (!radix_tree_is_indirect_ptr(node)) { if (index > 0) return NULL; - return is_slot ? (void *)&root->rnode : node; + + if (nodep) + *nodep = NULL; + if (slotp) + *slotp = (void **)&root->rnode; + return node; } node = indirect_to_ptr(node); - height = node->height; + height = node->path & RADIX_TREE_HEIGHT_MASK; if (index > radix_tree_maxindex(height)) return NULL; shift = (height-1) * RADIX_TREE_MAP_SHIFT; do { - slot = (struct radix_tree_node **) - (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); + parent = node; + slot = node->slots + ((index >> shift) & RADIX_TREE_MAP_MASK); node = rcu_dereference_raw(*slot); if (node == NULL) return NULL; @@ -495,7 +523,11 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root, height--; } while (height > 0); - return is_slot ? (void *)slot : indirect_to_ptr(node); + if (nodep) + *nodep = parent; + if (slotp) + *slotp = slot; + return node; } /** @@ -513,7 +545,11 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root, */ void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) { - return (void **)radix_tree_lookup_element(root, index, 1); + void **slot; + + if (!__radix_tree_lookup(root, index, NULL, &slot)) + return NULL; + return slot; } EXPORT_SYMBOL(radix_tree_lookup_slot); @@ -531,7 +567,7 @@ EXPORT_SYMBOL(radix_tree_lookup_slot); */ void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) { - return radix_tree_lookup_element(root, index, 0); + return __radix_tree_lookup(root, index, NULL, NULL); } EXPORT_SYMBOL(radix_tree_lookup); @@ -676,7 +712,7 @@ int radix_tree_tag_get(struct radix_tree_root *root, return (index == 0); node = indirect_to_ptr(node); - height = node->height; + height = node->path & RADIX_TREE_HEIGHT_MASK; if (index > radix_tree_maxindex(height)) return 0; @@ -713,7 +749,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, { unsigned shift, tag = flags & RADIX_TREE_ITER_TAG_MASK; struct radix_tree_node *rnode, *node; - unsigned long index, offset; + unsigned long index, offset, height; if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag)) return NULL; @@ -744,7 +780,8 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, return NULL; restart: - shift = (rnode->height - 1) * RADIX_TREE_MAP_SHIFT; + height = rnode->path & RADIX_TREE_HEIGHT_MASK; + shift = (height - 1) * RADIX_TREE_MAP_SHIFT; offset = index >> shift; /* Index outside of the tree */ @@ -946,81 +983,6 @@ next: } EXPORT_SYMBOL(radix_tree_range_tag_if_tagged); - -/** - * radix_tree_next_hole - find the next hole (not-present entry) - * @root: tree root - * @index: index key - * @max_scan: maximum range to search - * - * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest - * indexed hole. - * - * Returns: the index of the hole if found, otherwise returns an index - * outside of the set specified (in which case 'return - index >= max_scan' - * will be true). In rare cases of index wrap-around, 0 will be returned. - * - * radix_tree_next_hole may be called under rcu_read_lock. However, like - * radix_tree_gang_lookup, this will not atomically search a snapshot of - * the tree at a single point in time. For example, if a hole is created - * at index 5, then subsequently a hole is created at index 10, - * radix_tree_next_hole covering both indexes may return 10 if called - * under rcu_read_lock. - */ -unsigned long radix_tree_next_hole(struct radix_tree_root *root, - unsigned long index, unsigned long max_scan) -{ - unsigned long i; - - for (i = 0; i < max_scan; i++) { - if (!radix_tree_lookup(root, index)) - break; - index++; - if (index == 0) - break; - } - - return index; -} -EXPORT_SYMBOL(radix_tree_next_hole); - -/** - * radix_tree_prev_hole - find the prev hole (not-present entry) - * @root: tree root - * @index: index key - * @max_scan: maximum range to search - * - * Search backwards in the range [max(index-max_scan+1, 0), index] - * for the first hole. - * - * Returns: the index of the hole if found, otherwise returns an index - * outside of the set specified (in which case 'index - return >= max_scan' - * will be true). In rare cases of wrap-around, ULONG_MAX will be returned. - * - * radix_tree_next_hole may be called under rcu_read_lock. However, like - * radix_tree_gang_lookup, this will not atomically search a snapshot of - * the tree at a single point in time. For example, if a hole is created - * at index 10, then subsequently a hole is created at index 5, - * radix_tree_prev_hole covering both indexes may return 5 if called under - * rcu_read_lock. - */ -unsigned long radix_tree_prev_hole(struct radix_tree_root *root, - unsigned long index, unsigned long max_scan) -{ - unsigned long i; - - for (i = 0; i < max_scan; i++) { - if (!radix_tree_lookup(root, index)) - break; - index--; - if (index == ULONG_MAX) - break; - } - - return index; -} -EXPORT_SYMBOL(radix_tree_prev_hole); - /** * radix_tree_gang_lookup - perform multiple lookup on a radix tree * @root: radix tree root @@ -1189,7 +1151,7 @@ static unsigned long __locate(struct radix_tree_node *slot, void *item, unsigned int shift, height; unsigned long i; - height = slot->height; + height = slot->path & RADIX_TREE_HEIGHT_MASK; shift = (height-1) * RADIX_TREE_MAP_SHIFT; for ( ; height > 1; height--) { @@ -1252,9 +1214,12 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) } node = indirect_to_ptr(node); - max_index = radix_tree_maxindex(node->height); - if (cur_index > max_index) + max_index = radix_tree_maxindex(node->path & + RADIX_TREE_HEIGHT_MASK); + if (cur_index > max_index) { + rcu_read_unlock(); break; + } cur_index = __locate(node, item, cur_index, &found_index); rcu_read_unlock(); @@ -1335,48 +1300,89 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) } /** - * radix_tree_delete - delete an item from a radix tree + * __radix_tree_delete_node - try to free node after clearing a slot + * @root: radix tree root + * @node: node containing @index + * + * After clearing the slot at @index in @node from radix tree + * rooted at @root, call this function to attempt freeing the + * node and shrinking the tree. + * + * Returns %true if @node was freed, %false otherwise. + */ +bool __radix_tree_delete_node(struct radix_tree_root *root, + struct radix_tree_node *node) +{ + bool deleted = false; + + do { + struct radix_tree_node *parent; + + if (node->count) { + if (node == indirect_to_ptr(root->rnode)) { + radix_tree_shrink(root); + if (root->height == 0) + deleted = true; + } + return deleted; + } + + parent = node->parent; + if (parent) { + unsigned int offset; + + offset = node->path >> RADIX_TREE_HEIGHT_SHIFT; + parent->slots[offset] = NULL; + parent->count--; + } else { + root_tag_clear_all(root); + root->height = 0; + root->rnode = NULL; + } + + radix_tree_node_free(node); + deleted = true; + + node = parent; + } while (node); + + return deleted; +} + +/** + * radix_tree_delete_item - delete an item from a radix tree * @root: radix tree root * @index: index key + * @item: expected item * - * Remove the item at @index from the radix tree rooted at @root. + * Remove @item at @index from the radix tree rooted at @root. * - * Returns the address of the deleted item, or NULL if it was not present. + * Returns the address of the deleted item, or NULL if it was not present + * or the entry at the given @index was not @item. */ -void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) +void *radix_tree_delete_item(struct radix_tree_root *root, + unsigned long index, void *item) { - struct radix_tree_node *node = NULL; - struct radix_tree_node *slot = NULL; - struct radix_tree_node *to_free; - unsigned int height, shift; + struct radix_tree_node *node; + unsigned int offset; + void **slot; + void *entry; int tag; - int uninitialized_var(offset); - height = root->height; - if (index > radix_tree_maxindex(height)) - goto out; + entry = __radix_tree_lookup(root, index, &node, &slot); + if (!entry) + return NULL; - slot = root->rnode; - if (height == 0) { + if (item && entry != item) + return NULL; + + if (!node) { root_tag_clear_all(root); root->rnode = NULL; - goto out; + return entry; } - slot = indirect_to_ptr(slot); - shift = height * RADIX_TREE_MAP_SHIFT; - do { - if (slot == NULL) - goto out; - - shift -= RADIX_TREE_MAP_SHIFT; - offset = (index >> shift) & RADIX_TREE_MAP_MASK; - node = slot; - slot = slot->slots[offset]; - } while (shift); - - if (slot == NULL) - goto out; + offset = index & RADIX_TREE_MAP_MASK; /* * Clear all tags associated with the item to be deleted. @@ -1387,40 +1393,27 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) radix_tree_tag_clear(root, index, tag); } - to_free = NULL; - /* Now free the nodes we do not need anymore */ - while (node) { - node->slots[offset] = NULL; - node->count--; - /* - * Queue the node for deferred freeing after the - * last reference to it disappears (set NULL, above). - */ - if (to_free) - radix_tree_node_free(to_free); - - if (node->count) { - if (node == indirect_to_ptr(root->rnode)) - radix_tree_shrink(root); - goto out; - } - - /* Node with zero slots in use so free it */ - to_free = node; + node->slots[offset] = NULL; + node->count--; - index >>= RADIX_TREE_MAP_SHIFT; - offset = index & RADIX_TREE_MAP_MASK; - node = node->parent; - } + __radix_tree_delete_node(root, node); - root_tag_clear_all(root); - root->height = 0; - root->rnode = NULL; - if (to_free) - radix_tree_node_free(to_free); + return entry; +} +EXPORT_SYMBOL(radix_tree_delete_item); -out: - return slot; +/** + * radix_tree_delete - delete an item from a radix tree + * @root: radix tree root + * @index: index key + * + * Remove the item at @index from the radix tree rooted at @root. + * + * Returns the address of the deleted item, or NULL if it was not present. + */ +void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) +{ + return radix_tree_delete_item(root, index, NULL); } EXPORT_SYMBOL(radix_tree_delete); @@ -1436,9 +1429,12 @@ int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag) EXPORT_SYMBOL(radix_tree_tagged); static void -radix_tree_node_ctor(void *node) +radix_tree_node_ctor(void *arg) { - memset(node, 0, sizeof(struct radix_tree_node)); + struct radix_tree_node *node = arg; + + memset(node, 0, sizeof(*node)); + INIT_LIST_HEAD(&node->private_list); } static __init unsigned long __maxindex(unsigned int height) diff --git a/lib/random32.c b/lib/random32.c index 52280d5526b..fa5da61ce7a 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -1,43 +1,46 @@ /* - This is a maximally equidistributed combined Tausworthe generator - based on code from GNU Scientific Library 1.5 (30 Jun 2004) - - x_n = (s1_n ^ s2_n ^ s3_n) - - s1_{n+1} = (((s1_n & 4294967294) <<12) ^ (((s1_n <<13) ^ s1_n) >>19)) - s2_{n+1} = (((s2_n & 4294967288) << 4) ^ (((s2_n << 2) ^ s2_n) >>25)) - s3_{n+1} = (((s3_n & 4294967280) <<17) ^ (((s3_n << 3) ^ s3_n) >>11)) - - The period of this generator is about 2^88. - - From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe - Generators", Mathematics of Computation, 65, 213 (1996), 203--213. - - This is available on the net from L'Ecuyer's home page, - - http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps - ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps - - There is an erratum in the paper "Tables of Maximally - Equidistributed Combined LFSR Generators", Mathematics of - Computation, 68, 225 (1999), 261--269: - http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps - - ... the k_j most significant bits of z_j must be non- - zero, for each j. (Note: this restriction also applies to the - computer code given in [4], but was mistakenly not mentioned in - that paper.) - - This affects the seeding procedure by imposing the requirement - s1 > 1, s2 > 7, s3 > 15. - -*/ + * This is a maximally equidistributed combined Tausworthe generator + * based on code from GNU Scientific Library 1.5 (30 Jun 2004) + * + * lfsr113 version: + * + * x_n = (s1_n ^ s2_n ^ s3_n ^ s4_n) + * + * s1_{n+1} = (((s1_n & 4294967294) << 18) ^ (((s1_n << 6) ^ s1_n) >> 13)) + * s2_{n+1} = (((s2_n & 4294967288) << 2) ^ (((s2_n << 2) ^ s2_n) >> 27)) + * s3_{n+1} = (((s3_n & 4294967280) << 7) ^ (((s3_n << 13) ^ s3_n) >> 21)) + * s4_{n+1} = (((s4_n & 4294967168) << 13) ^ (((s4_n << 3) ^ s4_n) >> 12)) + * + * The period of this generator is about 2^113 (see erratum paper). + * + * From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe + * Generators", Mathematics of Computation, 65, 213 (1996), 203--213: + * http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps + * ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps + * + * There is an erratum in the paper "Tables of Maximally Equidistributed + * Combined LFSR Generators", Mathematics of Computation, 68, 225 (1999), + * 261--269: http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps + * + * ... the k_j most significant bits of z_j must be non-zero, + * for each j. (Note: this restriction also applies to the + * computer code given in [4], but was mistakenly not mentioned + * in that paper.) + * + * This affects the seeding procedure by imposing the requirement + * s1 > 1, s2 > 7, s3 > 15, s4 > 127. + */ #include <linux/types.h> #include <linux/percpu.h> #include <linux/export.h> #include <linux/jiffies.h> #include <linux/random.h> +#include <linux/sched.h> + +#ifdef CONFIG_RANDOM32_SELFTEST +static void __init prandom_state_selftest(void); +#endif static DEFINE_PER_CPU(struct rnd_state, net_rand_state); @@ -52,11 +55,12 @@ u32 prandom_u32_state(struct rnd_state *state) { #define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b) - state->s1 = TAUSWORTHE(state->s1, 13, 19, 4294967294UL, 12); - state->s2 = TAUSWORTHE(state->s2, 2, 25, 4294967288UL, 4); - state->s3 = TAUSWORTHE(state->s3, 3, 11, 4294967280UL, 17); + state->s1 = TAUSWORTHE(state->s1, 6U, 13U, 4294967294U, 18U); + state->s2 = TAUSWORTHE(state->s2, 2U, 27U, 4294967288U, 2U); + state->s3 = TAUSWORTHE(state->s3, 13U, 21U, 4294967280U, 7U); + state->s4 = TAUSWORTHE(state->s4, 3U, 12U, 4294967168U, 13U); - return (state->s1 ^ state->s2 ^ state->s3); + return (state->s1 ^ state->s2 ^ state->s3 ^ state->s4); } EXPORT_SYMBOL(prandom_u32_state); @@ -69,15 +73,17 @@ EXPORT_SYMBOL(prandom_u32_state); */ u32 prandom_u32(void) { - unsigned long r; struct rnd_state *state = &get_cpu_var(net_rand_state); - r = prandom_u32_state(state); + u32 res; + + res = prandom_u32_state(state); put_cpu_var(state); - return r; + + return res; } EXPORT_SYMBOL(prandom_u32); -/* +/** * prandom_bytes_state - get the requested number of pseudo-random bytes * * @state: pointer to state structure holding seeded state. @@ -126,6 +132,38 @@ void prandom_bytes(void *buf, int bytes) } EXPORT_SYMBOL(prandom_bytes); +static void prandom_warmup(struct rnd_state *state) +{ + /* Calling RNG ten times to satify recurrence condition */ + prandom_u32_state(state); + prandom_u32_state(state); + prandom_u32_state(state); + prandom_u32_state(state); + prandom_u32_state(state); + prandom_u32_state(state); + prandom_u32_state(state); + prandom_u32_state(state); + prandom_u32_state(state); + prandom_u32_state(state); +} + +static void prandom_seed_very_weak(struct rnd_state *state, u32 seed) +{ + /* Note: This sort of seeding is ONLY used in test cases and + * during boot at the time from core_initcall until late_initcall + * as we don't have a stronger entropy source available yet. + * After late_initcall, we reseed entire state, we have to (!), + * otherwise an attacker just needs to search 32 bit space to + * probe for our internal 128 bit state if he knows a couple + * of prandom32 outputs! + */ +#define LCG(x) ((x) * 69069U) /* super-duper LCG */ + state->s1 = __seed(LCG(seed), 2U); + state->s2 = __seed(LCG(state->s1), 8U); + state->s3 = __seed(LCG(state->s2), 16U); + state->s4 = __seed(LCG(state->s3), 128U); +} + /** * prandom_seed - add entropy to pseudo random number generator * @seed: seed value @@ -141,7 +179,9 @@ void prandom_seed(u32 entropy) */ for_each_possible_cpu (i) { struct rnd_state *state = &per_cpu(net_rand_state, i); - state->s1 = __seed(state->s1 ^ entropy, 1); + + state->s1 = __seed(state->s1 ^ entropy, 2U); + prandom_warmup(state); } } EXPORT_SYMBOL(prandom_seed); @@ -154,46 +194,262 @@ static int __init prandom_init(void) { int i; +#ifdef CONFIG_RANDOM32_SELFTEST + prandom_state_selftest(); +#endif + for_each_possible_cpu(i) { struct rnd_state *state = &per_cpu(net_rand_state,i); -#define LCG(x) ((x) * 69069) /* super-duper LCG */ - state->s1 = __seed(LCG(i + jiffies), 1); - state->s2 = __seed(LCG(state->s1), 7); - state->s3 = __seed(LCG(state->s2), 15); - - /* "warm it up" */ - prandom_u32_state(state); - prandom_u32_state(state); - prandom_u32_state(state); - prandom_u32_state(state); - prandom_u32_state(state); - prandom_u32_state(state); + prandom_seed_very_weak(state, (i + jiffies) ^ random_get_entropy()); + prandom_warmup(state); } + return 0; } core_initcall(prandom_init); +static void __prandom_timer(unsigned long dontcare); +static DEFINE_TIMER(seed_timer, __prandom_timer, 0, 0); + +static void __prandom_timer(unsigned long dontcare) +{ + u32 entropy; + unsigned long expires; + + get_random_bytes(&entropy, sizeof(entropy)); + prandom_seed(entropy); + + /* reseed every ~60 seconds, in [40 .. 80) interval with slack */ + expires = 40 + (prandom_u32() % 40); + seed_timer.expires = jiffies + msecs_to_jiffies(expires * MSEC_PER_SEC); + + add_timer(&seed_timer); +} + +static void __init __prandom_start_seed_timer(void) +{ + set_timer_slack(&seed_timer, HZ); + seed_timer.expires = jiffies + msecs_to_jiffies(40 * MSEC_PER_SEC); + add_timer(&seed_timer); +} + /* * Generate better values after random number generator * is fully initialized. */ -static int __init prandom_reseed(void) +static void __prandom_reseed(bool late) { int i; + unsigned long flags; + static bool latch = false; + static DEFINE_SPINLOCK(lock); + + /* Asking for random bytes might result in bytes getting + * moved into the nonblocking pool and thus marking it + * as initialized. In this case we would double back into + * this function and attempt to do a late reseed. + * Ignore the pointless attempt to reseed again if we're + * already waiting for bytes when the nonblocking pool + * got initialized. + */ + + /* only allow initial seeding (late == false) once */ + if (!spin_trylock_irqsave(&lock, flags)) + return; + + if (latch && !late) + goto out; + + latch = true; for_each_possible_cpu(i) { struct rnd_state *state = &per_cpu(net_rand_state,i); - u32 seeds[3]; + u32 seeds[4]; get_random_bytes(&seeds, sizeof(seeds)); - state->s1 = __seed(seeds[0], 1); - state->s2 = __seed(seeds[1], 7); - state->s3 = __seed(seeds[2], 15); + state->s1 = __seed(seeds[0], 2U); + state->s2 = __seed(seeds[1], 8U); + state->s3 = __seed(seeds[2], 16U); + state->s4 = __seed(seeds[3], 128U); - /* mix it in */ - prandom_u32_state(state); + prandom_warmup(state); } +out: + spin_unlock_irqrestore(&lock, flags); +} + +void prandom_reseed_late(void) +{ + __prandom_reseed(true); +} + +static int __init prandom_reseed(void) +{ + __prandom_reseed(false); + __prandom_start_seed_timer(); return 0; } late_initcall(prandom_reseed); + +#ifdef CONFIG_RANDOM32_SELFTEST +static struct prandom_test1 { + u32 seed; + u32 result; +} test1[] = { + { 1U, 3484351685U }, + { 2U, 2623130059U }, + { 3U, 3125133893U }, + { 4U, 984847254U }, +}; + +static struct prandom_test2 { + u32 seed; + u32 iteration; + u32 result; +} test2[] = { + /* Test cases against taus113 from GSL library. */ + { 931557656U, 959U, 2975593782U }, + { 1339693295U, 876U, 3887776532U }, + { 1545556285U, 961U, 1615538833U }, + { 601730776U, 723U, 1776162651U }, + { 1027516047U, 687U, 511983079U }, + { 416526298U, 700U, 916156552U }, + { 1395522032U, 652U, 2222063676U }, + { 366221443U, 617U, 2992857763U }, + { 1539836965U, 714U, 3783265725U }, + { 556206671U, 994U, 799626459U }, + { 684907218U, 799U, 367789491U }, + { 2121230701U, 931U, 2115467001U }, + { 1668516451U, 644U, 3620590685U }, + { 768046066U, 883U, 2034077390U }, + { 1989159136U, 833U, 1195767305U }, + { 536585145U, 996U, 3577259204U }, + { 1008129373U, 642U, 1478080776U }, + { 1740775604U, 939U, 1264980372U }, + { 1967883163U, 508U, 10734624U }, + { 1923019697U, 730U, 3821419629U }, + { 442079932U, 560U, 3440032343U }, + { 1961302714U, 845U, 841962572U }, + { 2030205964U, 962U, 1325144227U }, + { 1160407529U, 507U, 240940858U }, + { 635482502U, 779U, 4200489746U }, + { 1252788931U, 699U, 867195434U }, + { 1961817131U, 719U, 668237657U }, + { 1071468216U, 983U, 917876630U }, + { 1281848367U, 932U, 1003100039U }, + { 582537119U, 780U, 1127273778U }, + { 1973672777U, 853U, 1071368872U }, + { 1896756996U, 762U, 1127851055U }, + { 847917054U, 500U, 1717499075U }, + { 1240520510U, 951U, 2849576657U }, + { 1685071682U, 567U, 1961810396U }, + { 1516232129U, 557U, 3173877U }, + { 1208118903U, 612U, 1613145022U }, + { 1817269927U, 693U, 4279122573U }, + { 1510091701U, 717U, 638191229U }, + { 365916850U, 807U, 600424314U }, + { 399324359U, 702U, 1803598116U }, + { 1318480274U, 779U, 2074237022U }, + { 697758115U, 840U, 1483639402U }, + { 1696507773U, 840U, 577415447U }, + { 2081979121U, 981U, 3041486449U }, + { 955646687U, 742U, 3846494357U }, + { 1250683506U, 749U, 836419859U }, + { 595003102U, 534U, 366794109U }, + { 47485338U, 558U, 3521120834U }, + { 619433479U, 610U, 3991783875U }, + { 704096520U, 518U, 4139493852U }, + { 1712224984U, 606U, 2393312003U }, + { 1318233152U, 922U, 3880361134U }, + { 855572992U, 761U, 1472974787U }, + { 64721421U, 703U, 683860550U }, + { 678931758U, 840U, 380616043U }, + { 692711973U, 778U, 1382361947U }, + { 677703619U, 530U, 2826914161U }, + { 92393223U, 586U, 1522128471U }, + { 1222592920U, 743U, 3466726667U }, + { 358288986U, 695U, 1091956998U }, + { 1935056945U, 958U, 514864477U }, + { 735675993U, 990U, 1294239989U }, + { 1560089402U, 897U, 2238551287U }, + { 70616361U, 829U, 22483098U }, + { 368234700U, 731U, 2913875084U }, + { 20221190U, 879U, 1564152970U }, + { 539444654U, 682U, 1835141259U }, + { 1314987297U, 840U, 1801114136U }, + { 2019295544U, 645U, 3286438930U }, + { 469023838U, 716U, 1637918202U }, + { 1843754496U, 653U, 2562092152U }, + { 400672036U, 809U, 4264212785U }, + { 404722249U, 965U, 2704116999U }, + { 600702209U, 758U, 584979986U }, + { 519953954U, 667U, 2574436237U }, + { 1658071126U, 694U, 2214569490U }, + { 420480037U, 749U, 3430010866U }, + { 690103647U, 969U, 3700758083U }, + { 1029424799U, 937U, 3787746841U }, + { 2012608669U, 506U, 3362628973U }, + { 1535432887U, 998U, 42610943U }, + { 1330635533U, 857U, 3040806504U }, + { 1223800550U, 539U, 3954229517U }, + { 1322411537U, 680U, 3223250324U }, + { 1877847898U, 945U, 2915147143U }, + { 1646356099U, 874U, 965988280U }, + { 805687536U, 744U, 4032277920U }, + { 1948093210U, 633U, 1346597684U }, + { 392609744U, 783U, 1636083295U }, + { 690241304U, 770U, 1201031298U }, + { 1360302965U, 696U, 1665394461U }, + { 1220090946U, 780U, 1316922812U }, + { 447092251U, 500U, 3438743375U }, + { 1613868791U, 592U, 828546883U }, + { 523430951U, 548U, 2552392304U }, + { 726692899U, 810U, 1656872867U }, + { 1364340021U, 836U, 3710513486U }, + { 1986257729U, 931U, 935013962U }, + { 407983964U, 921U, 728767059U }, +}; + +static void __init prandom_state_selftest(void) +{ + int i, j, errors = 0, runs = 0; + bool error = false; + + for (i = 0; i < ARRAY_SIZE(test1); i++) { + struct rnd_state state; + + prandom_seed_very_weak(&state, test1[i].seed); + prandom_warmup(&state); + + if (test1[i].result != prandom_u32_state(&state)) + error = true; + } + + if (error) + pr_warn("prandom: seed boundary self test failed\n"); + else + pr_info("prandom: seed boundary self test passed\n"); + + for (i = 0; i < ARRAY_SIZE(test2); i++) { + struct rnd_state state; + + prandom_seed_very_weak(&state, test2[i].seed); + prandom_warmup(&state); + + for (j = 0; j < test2[i].iteration - 1; j++) + prandom_u32_state(&state); + + if (test2[i].result != prandom_u32_state(&state)) + errors++; + + runs++; + cond_resched(); + } + + if (errors) + pr_warn("prandom: %d/%d self tests failed\n", errors, runs); + else + pr_info("prandom: %d self tests passed\n", runs); +} +#endif diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c index 31dd4ccd3ba..8b3c9dc8826 100644 --- a/lib/rbtree_test.c +++ b/lib/rbtree_test.c @@ -8,8 +8,8 @@ #define CHECK_LOOPS 100 struct test_node { - struct rb_node rb; u32 key; + struct rb_node rb; /* following fields used for testing augmented rbtree functionality */ u32 val; @@ -114,6 +114,16 @@ static int black_path_count(struct rb_node *rb) return count; } +static void check_postorder_foreach(int nr_nodes) +{ + struct test_node *cur, *n; + int count = 0; + rbtree_postorder_for_each_entry_safe(cur, n, &root, rb) + count++; + + WARN_ON_ONCE(count != nr_nodes); +} + static void check_postorder(int nr_nodes) { struct rb_node *rb; @@ -148,6 +158,7 @@ static void check(int nr_nodes) WARN_ON_ONCE(count < (1 << black_path_count(rb_last(&root))) - 1); check_postorder(nr_nodes); + check_postorder_foreach(nr_nodes); } static void check_augmented(int nr_nodes) diff --git a/lib/reciprocal_div.c b/lib/reciprocal_div.c index 75510e94f7d..464152410c5 100644 --- a/lib/reciprocal_div.c +++ b/lib/reciprocal_div.c @@ -1,11 +1,27 @@ +#include <linux/kernel.h> #include <asm/div64.h> #include <linux/reciprocal_div.h> #include <linux/export.h> -u32 reciprocal_value(u32 k) +/* + * For a description of the algorithm please have a look at + * include/linux/reciprocal_div.h + */ + +struct reciprocal_value reciprocal_value(u32 d) { - u64 val = (1LL << 32) + (k - 1); - do_div(val, k); - return (u32)val; + struct reciprocal_value R; + u64 m; + int l; + + l = fls(d - 1); + m = ((1ULL << 32) * ((1ULL << l) - d)); + do_div(m, d); + ++m; + R.m = (u32)m; + R.sh1 = min(l, 1); + R.sh2 = max(l - 1, 0); + + return R; } EXPORT_SYMBOL(reciprocal_value); diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c deleted file mode 100644 index 9be8a914497..00000000000 --- a/lib/rwsem-spinlock.c +++ /dev/null @@ -1,296 +0,0 @@ -/* rwsem-spinlock.c: R/W semaphores: contention handling functions for - * generic spinlock implementation - * - * Copyright (c) 2001 David Howells (dhowells@redhat.com). - * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de> - * - Derived also from comments by Linus - */ -#include <linux/rwsem.h> -#include <linux/sched.h> -#include <linux/export.h> - -enum rwsem_waiter_type { - RWSEM_WAITING_FOR_WRITE, - RWSEM_WAITING_FOR_READ -}; - -struct rwsem_waiter { - struct list_head list; - struct task_struct *task; - enum rwsem_waiter_type type; -}; - -int rwsem_is_locked(struct rw_semaphore *sem) -{ - int ret = 1; - unsigned long flags; - - if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) { - ret = (sem->activity != 0); - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - } - return ret; -} -EXPORT_SYMBOL(rwsem_is_locked); - -/* - * initialise the semaphore - */ -void __init_rwsem(struct rw_semaphore *sem, const char *name, - struct lock_class_key *key) -{ -#ifdef CONFIG_DEBUG_LOCK_ALLOC - /* - * Make sure we are not reinitializing a held semaphore: - */ - debug_check_no_locks_freed((void *)sem, sizeof(*sem)); - lockdep_init_map(&sem->dep_map, name, key, 0); -#endif - sem->activity = 0; - raw_spin_lock_init(&sem->wait_lock); - INIT_LIST_HEAD(&sem->wait_list); -} -EXPORT_SYMBOL(__init_rwsem); - -/* - * handle the lock release when processes blocked on it that can now run - * - if we come here, then: - * - the 'active count' _reached_ zero - * - the 'waiting count' is non-zero - * - the spinlock must be held by the caller - * - woken process blocks are discarded from the list after having task zeroed - * - writers are only woken if wakewrite is non-zero - */ -static inline struct rw_semaphore * -__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) -{ - struct rwsem_waiter *waiter; - struct task_struct *tsk; - int woken; - - waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - - if (waiter->type == RWSEM_WAITING_FOR_WRITE) { - if (wakewrite) - /* Wake up a writer. Note that we do not grant it the - * lock - it will have to acquire it when it runs. */ - wake_up_process(waiter->task); - goto out; - } - - /* grant an infinite number of read locks to the front of the queue */ - woken = 0; - do { - struct list_head *next = waiter->list.next; - - list_del(&waiter->list); - tsk = waiter->task; - smp_mb(); - waiter->task = NULL; - wake_up_process(tsk); - put_task_struct(tsk); - woken++; - if (next == &sem->wait_list) - break; - waiter = list_entry(next, struct rwsem_waiter, list); - } while (waiter->type != RWSEM_WAITING_FOR_WRITE); - - sem->activity += woken; - - out: - return sem; -} - -/* - * wake a single writer - */ -static inline struct rw_semaphore * -__rwsem_wake_one_writer(struct rw_semaphore *sem) -{ - struct rwsem_waiter *waiter; - - waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - wake_up_process(waiter->task); - - return sem; -} - -/* - * get a read lock on the semaphore - */ -void __sched __down_read(struct rw_semaphore *sem) -{ - struct rwsem_waiter waiter; - struct task_struct *tsk; - unsigned long flags; - - raw_spin_lock_irqsave(&sem->wait_lock, flags); - - if (sem->activity >= 0 && list_empty(&sem->wait_list)) { - /* granted */ - sem->activity++; - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - goto out; - } - - tsk = current; - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - - /* set up my own style of waitqueue */ - waiter.task = tsk; - waiter.type = RWSEM_WAITING_FOR_READ; - get_task_struct(tsk); - - list_add_tail(&waiter.list, &sem->wait_list); - - /* we don't need to touch the semaphore struct anymore */ - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - - /* wait to be given the lock */ - for (;;) { - if (!waiter.task) - break; - schedule(); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - } - - tsk->state = TASK_RUNNING; - out: - ; -} - -/* - * trylock for reading -- returns 1 if successful, 0 if contention - */ -int __down_read_trylock(struct rw_semaphore *sem) -{ - unsigned long flags; - int ret = 0; - - - raw_spin_lock_irqsave(&sem->wait_lock, flags); - - if (sem->activity >= 0 && list_empty(&sem->wait_list)) { - /* granted */ - sem->activity++; - ret = 1; - } - - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - - return ret; -} - -/* - * get a write lock on the semaphore - */ -void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) -{ - struct rwsem_waiter waiter; - struct task_struct *tsk; - unsigned long flags; - - raw_spin_lock_irqsave(&sem->wait_lock, flags); - - /* set up my own style of waitqueue */ - tsk = current; - waiter.task = tsk; - waiter.type = RWSEM_WAITING_FOR_WRITE; - list_add_tail(&waiter.list, &sem->wait_list); - - /* wait for someone to release the lock */ - for (;;) { - /* - * That is the key to support write lock stealing: allows the - * task already on CPU to get the lock soon rather than put - * itself into sleep and waiting for system woke it or someone - * else in the head of the wait list up. - */ - if (sem->activity == 0) - break; - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - schedule(); - raw_spin_lock_irqsave(&sem->wait_lock, flags); - } - /* got the lock */ - sem->activity = -1; - list_del(&waiter.list); - - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); -} - -void __sched __down_write(struct rw_semaphore *sem) -{ - __down_write_nested(sem, 0); -} - -/* - * trylock for writing -- returns 1 if successful, 0 if contention - */ -int __down_write_trylock(struct rw_semaphore *sem) -{ - unsigned long flags; - int ret = 0; - - raw_spin_lock_irqsave(&sem->wait_lock, flags); - - if (sem->activity == 0) { - /* got the lock */ - sem->activity = -1; - ret = 1; - } - - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - - return ret; -} - -/* - * release a read lock on the semaphore - */ -void __up_read(struct rw_semaphore *sem) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&sem->wait_lock, flags); - - if (--sem->activity == 0 && !list_empty(&sem->wait_list)) - sem = __rwsem_wake_one_writer(sem); - - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); -} - -/* - * release a write lock on the semaphore - */ -void __up_write(struct rw_semaphore *sem) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&sem->wait_lock, flags); - - sem->activity = 0; - if (!list_empty(&sem->wait_list)) - sem = __rwsem_do_wake(sem, 1); - - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); -} - -/* - * downgrade a write lock into a read lock - * - just wake up any readers at the front of the queue - */ -void __downgrade_write(struct rw_semaphore *sem) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&sem->wait_lock, flags); - - sem->activity = 1; - if (!list_empty(&sem->wait_list)) - sem = __rwsem_do_wake(sem, 0); - - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); -} - diff --git a/lib/rwsem.c b/lib/rwsem.c deleted file mode 100644 index 19c5fa95e0b..00000000000 --- a/lib/rwsem.c +++ /dev/null @@ -1,293 +0,0 @@ -/* rwsem.c: R/W semaphores: contention handling functions - * - * Written by David Howells (dhowells@redhat.com). - * Derived from arch/i386/kernel/semaphore.c - * - * Writer lock-stealing by Alex Shi <alex.shi@intel.com> - * and Michel Lespinasse <walken@google.com> - */ -#include <linux/rwsem.h> -#include <linux/sched.h> -#include <linux/init.h> -#include <linux/export.h> - -/* - * Initialize an rwsem: - */ -void __init_rwsem(struct rw_semaphore *sem, const char *name, - struct lock_class_key *key) -{ -#ifdef CONFIG_DEBUG_LOCK_ALLOC - /* - * Make sure we are not reinitializing a held semaphore: - */ - debug_check_no_locks_freed((void *)sem, sizeof(*sem)); - lockdep_init_map(&sem->dep_map, name, key, 0); -#endif - sem->count = RWSEM_UNLOCKED_VALUE; - raw_spin_lock_init(&sem->wait_lock); - INIT_LIST_HEAD(&sem->wait_list); -} - -EXPORT_SYMBOL(__init_rwsem); - -enum rwsem_waiter_type { - RWSEM_WAITING_FOR_WRITE, - RWSEM_WAITING_FOR_READ -}; - -struct rwsem_waiter { - struct list_head list; - struct task_struct *task; - enum rwsem_waiter_type type; -}; - -enum rwsem_wake_type { - RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */ - RWSEM_WAKE_READERS, /* Wake readers only */ - RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */ -}; - -/* - * handle the lock release when processes blocked on it that can now run - * - if we come here from up_xxxx(), then: - * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed) - * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) - * - there must be someone on the queue - * - the spinlock must be held by the caller - * - woken process blocks are discarded from the list after having task zeroed - * - writers are only woken if downgrading is false - */ -static struct rw_semaphore * -__rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type) -{ - struct rwsem_waiter *waiter; - struct task_struct *tsk; - struct list_head *next; - long oldcount, woken, loop, adjustment; - - waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - if (waiter->type == RWSEM_WAITING_FOR_WRITE) { - if (wake_type == RWSEM_WAKE_ANY) - /* Wake writer at the front of the queue, but do not - * grant it the lock yet as we want other writers - * to be able to steal it. Readers, on the other hand, - * will block as they will notice the queued writer. - */ - wake_up_process(waiter->task); - goto out; - } - - /* Writers might steal the lock before we grant it to the next reader. - * We prefer to do the first reader grant before counting readers - * so we can bail out early if a writer stole the lock. - */ - adjustment = 0; - if (wake_type != RWSEM_WAKE_READ_OWNED) { - adjustment = RWSEM_ACTIVE_READ_BIAS; - try_reader_grant: - oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; - if (unlikely(oldcount < RWSEM_WAITING_BIAS)) { - /* A writer stole the lock. Undo our reader grant. */ - if (rwsem_atomic_update(-adjustment, sem) & - RWSEM_ACTIVE_MASK) - goto out; - /* Last active locker left. Retry waking readers. */ - goto try_reader_grant; - } - } - - /* Grant an infinite number of read locks to the readers at the front - * of the queue. Note we increment the 'active part' of the count by - * the number of readers before waking any processes up. - */ - woken = 0; - do { - woken++; - - if (waiter->list.next == &sem->wait_list) - break; - - waiter = list_entry(waiter->list.next, - struct rwsem_waiter, list); - - } while (waiter->type != RWSEM_WAITING_FOR_WRITE); - - adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; - if (waiter->type != RWSEM_WAITING_FOR_WRITE) - /* hit end of list above */ - adjustment -= RWSEM_WAITING_BIAS; - - if (adjustment) - rwsem_atomic_add(adjustment, sem); - - next = sem->wait_list.next; - loop = woken; - do { - waiter = list_entry(next, struct rwsem_waiter, list); - next = waiter->list.next; - tsk = waiter->task; - smp_mb(); - waiter->task = NULL; - wake_up_process(tsk); - put_task_struct(tsk); - } while (--loop); - - sem->wait_list.next = next; - next->prev = &sem->wait_list; - - out: - return sem; -} - -/* - * wait for the read lock to be granted - */ -struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) -{ - long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; - struct rwsem_waiter waiter; - struct task_struct *tsk = current; - - /* set up my own style of waitqueue */ - waiter.task = tsk; - waiter.type = RWSEM_WAITING_FOR_READ; - get_task_struct(tsk); - - raw_spin_lock_irq(&sem->wait_lock); - if (list_empty(&sem->wait_list)) - adjustment += RWSEM_WAITING_BIAS; - list_add_tail(&waiter.list, &sem->wait_list); - - /* we're now waiting on the lock, but no longer actively locking */ - count = rwsem_atomic_update(adjustment, sem); - - /* If there are no active locks, wake the front queued process(es). - * - * If there are no writers and we are first in the queue, - * wake our own waiter to join the existing active readers ! - */ - if (count == RWSEM_WAITING_BIAS || - (count > RWSEM_WAITING_BIAS && - adjustment != -RWSEM_ACTIVE_READ_BIAS)) - sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); - - raw_spin_unlock_irq(&sem->wait_lock); - - /* wait to be given the lock */ - while (true) { - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - if (!waiter.task) - break; - schedule(); - } - - tsk->state = TASK_RUNNING; - - return sem; -} - -/* - * wait until we successfully acquire the write lock - */ -struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) -{ - long count, adjustment = -RWSEM_ACTIVE_WRITE_BIAS; - struct rwsem_waiter waiter; - struct task_struct *tsk = current; - - /* set up my own style of waitqueue */ - waiter.task = tsk; - waiter.type = RWSEM_WAITING_FOR_WRITE; - - raw_spin_lock_irq(&sem->wait_lock); - if (list_empty(&sem->wait_list)) - adjustment += RWSEM_WAITING_BIAS; - list_add_tail(&waiter.list, &sem->wait_list); - - /* we're now waiting on the lock, but no longer actively locking */ - count = rwsem_atomic_update(adjustment, sem); - - /* If there were already threads queued before us and there are no - * active writers, the lock must be read owned; so we try to wake - * any read locks that were queued ahead of us. */ - if (count > RWSEM_WAITING_BIAS && - adjustment == -RWSEM_ACTIVE_WRITE_BIAS) - sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS); - - /* wait until we successfully acquire the lock */ - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - while (true) { - if (!(count & RWSEM_ACTIVE_MASK)) { - /* Try acquiring the write lock. */ - count = RWSEM_ACTIVE_WRITE_BIAS; - if (!list_is_singular(&sem->wait_list)) - count += RWSEM_WAITING_BIAS; - - if (sem->count == RWSEM_WAITING_BIAS && - cmpxchg(&sem->count, RWSEM_WAITING_BIAS, count) == - RWSEM_WAITING_BIAS) - break; - } - - raw_spin_unlock_irq(&sem->wait_lock); - - /* Block until there are no active lockers. */ - do { - schedule(); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - } while ((count = sem->count) & RWSEM_ACTIVE_MASK); - - raw_spin_lock_irq(&sem->wait_lock); - } - - list_del(&waiter.list); - raw_spin_unlock_irq(&sem->wait_lock); - tsk->state = TASK_RUNNING; - - return sem; -} - -/* - * handle waking up a waiter on the semaphore - * - up_read/up_write has decremented the active part of count if we come here - */ -struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&sem->wait_lock, flags); - - /* do nothing if list empty */ - if (!list_empty(&sem->wait_list)) - sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); - - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - - return sem; -} - -/* - * downgrade a write lock into a read lock - * - caller incremented waiting part of count and discovered it still negative - * - just wake up any readers at the front of the queue - */ -struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&sem->wait_lock, flags); - - /* do nothing if list empty */ - if (!list_empty(&sem->wait_list)) - sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); - - raw_spin_unlock_irqrestore(&sem->wait_lock, flags); - - return sem; -} - -EXPORT_SYMBOL(rwsem_down_read_failed); -EXPORT_SYMBOL(rwsem_down_write_failed); -EXPORT_SYMBOL(rwsem_wake); -EXPORT_SYMBOL(rwsem_downgrade_wake); diff --git a/lib/scatterlist.c b/lib/scatterlist.c index a685c8a7957..3a8e8e8fb2a 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -495,7 +495,7 @@ static bool sg_miter_get_next_page(struct sg_mapping_iter *miter) * true if @miter contains the valid mapping. false if end of sg * list is reached. */ -static bool sg_miter_skip(struct sg_mapping_iter *miter, off_t offset) +bool sg_miter_skip(struct sg_mapping_iter *miter, off_t offset) { sg_miter_stop(miter); @@ -513,6 +513,7 @@ static bool sg_miter_skip(struct sg_mapping_iter *miter, off_t offset) return true; } +EXPORT_SYMBOL(sg_miter_skip); /** * sg_miter_next - proceed mapping iterator to the next mapping @@ -577,7 +578,8 @@ void sg_miter_stop(struct sg_mapping_iter *miter) miter->__offset += miter->consumed; miter->__remaining -= miter->consumed; - if (miter->__flags & SG_MITER_TO_SG) + if ((miter->__flags & SG_MITER_TO_SG) && + !PageSlab(miter->page)) flush_kernel_dcache_page(miter->page); if (miter->__flags & SG_MITER_ATOMIC) { diff --git a/lib/show_mem.c b/lib/show_mem.c index b7c72311ad0..09225796991 100644 --- a/lib/show_mem.c +++ b/lib/show_mem.c @@ -12,55 +12,38 @@ void show_mem(unsigned int filter) { pg_data_t *pgdat; - unsigned long total = 0, reserved = 0, shared = 0, - nonshared = 0, highmem = 0; + unsigned long total = 0, reserved = 0, highmem = 0; printk("Mem-Info:\n"); show_free_areas(filter); - if (filter & SHOW_MEM_FILTER_PAGE_COUNT) - return; - for_each_online_pgdat(pgdat) { - unsigned long i, flags; + unsigned long flags; + int zoneid; pgdat_resize_lock(pgdat, &flags); - for (i = 0; i < pgdat->node_spanned_pages; i++) { - struct page *page; - unsigned long pfn = pgdat->node_start_pfn + i; - - if (unlikely(!(i % MAX_ORDER_NR_PAGES))) - touch_nmi_watchdog(); - - if (!pfn_valid(pfn)) + for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { + struct zone *zone = &pgdat->node_zones[zoneid]; + if (!populated_zone(zone)) continue; - page = pfn_to_page(pfn); - - if (PageHighMem(page)) - highmem++; + total += zone->present_pages; + reserved = zone->present_pages - zone->managed_pages; - if (PageReserved(page)) - reserved++; - else if (page_count(page) == 1) - nonshared++; - else if (page_count(page) > 1) - shared += page_count(page) - 1; - - total++; + if (is_highmem_idx(zoneid)) + highmem += zone->present_pages; } pgdat_resize_unlock(pgdat, &flags); } printk("%lu pages RAM\n", total); -#ifdef CONFIG_HIGHMEM - printk("%lu pages HighMem\n", highmem); -#endif + printk("%lu pages HighMem/MovableOnly\n", highmem); printk("%lu pages reserved\n", reserved); - printk("%lu pages shared\n", shared); - printk("%lu pages non-shared\n", nonshared); #ifdef CONFIG_QUICKLIST printk("%lu pages in pagetable cache\n", quicklist_total_size()); #endif +#ifdef CONFIG_MEMORY_FAILURE + printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages)); +#endif } diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index 4c0d0e51d49..1afec32de6f 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -7,12 +7,12 @@ #include <linux/kallsyms.h> #include <linux/sched.h> -notrace unsigned int debug_smp_processor_id(void) +notrace static unsigned int check_preemption_disabled(const char *what1, + const char *what2) { - unsigned long preempt_count = preempt_count(); int this_cpu = raw_smp_processor_id(); - if (likely(preempt_count)) + if (likely(preempt_count())) goto out; if (irqs_disabled()) @@ -39,9 +39,9 @@ notrace unsigned int debug_smp_processor_id(void) if (!printk_ratelimit()) goto out_enable; - printk(KERN_ERR "BUG: using smp_processor_id() in preemptible [%08x] " - "code: %s/%d\n", - preempt_count() - 1, current->comm, current->pid); + printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x] code: %s/%d\n", + what1, what2, preempt_count() - 1, current->comm, current->pid); + print_symbol("caller is %s\n", (long)__builtin_return_address(0)); dump_stack(); @@ -51,5 +51,14 @@ out: return this_cpu; } +notrace unsigned int debug_smp_processor_id(void) +{ + return check_preemption_disabled("smp_processor_id", ""); +} EXPORT_SYMBOL(debug_smp_processor_id); +notrace void __this_cpu_preempt_check(const char *op) +{ + check_preemption_disabled("__this_cpu_", op); +} +EXPORT_SYMBOL(__this_cpu_preempt_check); diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c deleted file mode 100644 index 0374a596cff..00000000000 --- a/lib/spinlock_debug.c +++ /dev/null @@ -1,302 +0,0 @@ -/* - * Copyright 2005, Red Hat, Inc., Ingo Molnar - * Released under the General Public License (GPL). - * - * This file contains the spinlock/rwlock implementations for - * DEBUG_SPINLOCK. - */ - -#include <linux/spinlock.h> -#include <linux/nmi.h> -#include <linux/interrupt.h> -#include <linux/debug_locks.h> -#include <linux/delay.h> -#include <linux/export.h> - -void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name, - struct lock_class_key *key) -{ -#ifdef CONFIG_DEBUG_LOCK_ALLOC - /* - * Make sure we are not reinitializing a held lock: - */ - debug_check_no_locks_freed((void *)lock, sizeof(*lock)); - lockdep_init_map(&lock->dep_map, name, key, 0); -#endif - lock->raw_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; - lock->magic = SPINLOCK_MAGIC; - lock->owner = SPINLOCK_OWNER_INIT; - lock->owner_cpu = -1; -} - -EXPORT_SYMBOL(__raw_spin_lock_init); - -void __rwlock_init(rwlock_t *lock, const char *name, - struct lock_class_key *key) -{ -#ifdef CONFIG_DEBUG_LOCK_ALLOC - /* - * Make sure we are not reinitializing a held lock: - */ - debug_check_no_locks_freed((void *)lock, sizeof(*lock)); - lockdep_init_map(&lock->dep_map, name, key, 0); -#endif - lock->raw_lock = (arch_rwlock_t) __ARCH_RW_LOCK_UNLOCKED; - lock->magic = RWLOCK_MAGIC; - lock->owner = SPINLOCK_OWNER_INIT; - lock->owner_cpu = -1; -} - -EXPORT_SYMBOL(__rwlock_init); - -static void spin_dump(raw_spinlock_t *lock, const char *msg) -{ - struct task_struct *owner = NULL; - - if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT) - owner = lock->owner; - printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n", - msg, raw_smp_processor_id(), - current->comm, task_pid_nr(current)); - printk(KERN_EMERG " lock: %pS, .magic: %08x, .owner: %s/%d, " - ".owner_cpu: %d\n", - lock, lock->magic, - owner ? owner->comm : "<none>", - owner ? task_pid_nr(owner) : -1, - lock->owner_cpu); - dump_stack(); -} - -static void spin_bug(raw_spinlock_t *lock, const char *msg) -{ - if (!debug_locks_off()) - return; - - spin_dump(lock, msg); -} - -#define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg) - -static inline void -debug_spin_lock_before(raw_spinlock_t *lock) -{ - SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic"); - SPIN_BUG_ON(lock->owner == current, lock, "recursion"); - SPIN_BUG_ON(lock->owner_cpu == raw_smp_processor_id(), - lock, "cpu recursion"); -} - -static inline void debug_spin_lock_after(raw_spinlock_t *lock) -{ - lock->owner_cpu = raw_smp_processor_id(); - lock->owner = current; -} - -static inline void debug_spin_unlock(raw_spinlock_t *lock) -{ - SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic"); - SPIN_BUG_ON(!raw_spin_is_locked(lock), lock, "already unlocked"); - SPIN_BUG_ON(lock->owner != current, lock, "wrong owner"); - SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(), - lock, "wrong CPU"); - lock->owner = SPINLOCK_OWNER_INIT; - lock->owner_cpu = -1; -} - -static void __spin_lock_debug(raw_spinlock_t *lock) -{ - u64 i; - u64 loops = loops_per_jiffy * HZ; - - for (i = 0; i < loops; i++) { - if (arch_spin_trylock(&lock->raw_lock)) - return; - __delay(1); - } - /* lockup suspected: */ - spin_dump(lock, "lockup suspected"); -#ifdef CONFIG_SMP - trigger_all_cpu_backtrace(); -#endif - - /* - * The trylock above was causing a livelock. Give the lower level arch - * specific lock code a chance to acquire the lock. We have already - * printed a warning/backtrace at this point. The non-debug arch - * specific code might actually succeed in acquiring the lock. If it is - * not successful, the end-result is the same - there is no forward - * progress. - */ - arch_spin_lock(&lock->raw_lock); -} - -void do_raw_spin_lock(raw_spinlock_t *lock) -{ - debug_spin_lock_before(lock); - if (unlikely(!arch_spin_trylock(&lock->raw_lock))) - __spin_lock_debug(lock); - debug_spin_lock_after(lock); -} - -int do_raw_spin_trylock(raw_spinlock_t *lock) -{ - int ret = arch_spin_trylock(&lock->raw_lock); - - if (ret) - debug_spin_lock_after(lock); -#ifndef CONFIG_SMP - /* - * Must not happen on UP: - */ - SPIN_BUG_ON(!ret, lock, "trylock failure on UP"); -#endif - return ret; -} - -void do_raw_spin_unlock(raw_spinlock_t *lock) -{ - debug_spin_unlock(lock); - arch_spin_unlock(&lock->raw_lock); -} - -static void rwlock_bug(rwlock_t *lock, const char *msg) -{ - if (!debug_locks_off()) - return; - - printk(KERN_EMERG "BUG: rwlock %s on CPU#%d, %s/%d, %p\n", - msg, raw_smp_processor_id(), current->comm, - task_pid_nr(current), lock); - dump_stack(); -} - -#define RWLOCK_BUG_ON(cond, lock, msg) if (unlikely(cond)) rwlock_bug(lock, msg) - -#if 0 /* __write_lock_debug() can lock up - maybe this can too? */ -static void __read_lock_debug(rwlock_t *lock) -{ - u64 i; - u64 loops = loops_per_jiffy * HZ; - int print_once = 1; - - for (;;) { - for (i = 0; i < loops; i++) { - if (arch_read_trylock(&lock->raw_lock)) - return; - __delay(1); - } - /* lockup suspected: */ - if (print_once) { - print_once = 0; - printk(KERN_EMERG "BUG: read-lock lockup on CPU#%d, " - "%s/%d, %p\n", - raw_smp_processor_id(), current->comm, - current->pid, lock); - dump_stack(); - } - } -} -#endif - -void do_raw_read_lock(rwlock_t *lock) -{ - RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); - arch_read_lock(&lock->raw_lock); -} - -int do_raw_read_trylock(rwlock_t *lock) -{ - int ret = arch_read_trylock(&lock->raw_lock); - -#ifndef CONFIG_SMP - /* - * Must not happen on UP: - */ - RWLOCK_BUG_ON(!ret, lock, "trylock failure on UP"); -#endif - return ret; -} - -void do_raw_read_unlock(rwlock_t *lock) -{ - RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); - arch_read_unlock(&lock->raw_lock); -} - -static inline void debug_write_lock_before(rwlock_t *lock) -{ - RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); - RWLOCK_BUG_ON(lock->owner == current, lock, "recursion"); - RWLOCK_BUG_ON(lock->owner_cpu == raw_smp_processor_id(), - lock, "cpu recursion"); -} - -static inline void debug_write_lock_after(rwlock_t *lock) -{ - lock->owner_cpu = raw_smp_processor_id(); - lock->owner = current; -} - -static inline void debug_write_unlock(rwlock_t *lock) -{ - RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); - RWLOCK_BUG_ON(lock->owner != current, lock, "wrong owner"); - RWLOCK_BUG_ON(lock->owner_cpu != raw_smp_processor_id(), - lock, "wrong CPU"); - lock->owner = SPINLOCK_OWNER_INIT; - lock->owner_cpu = -1; -} - -#if 0 /* This can cause lockups */ -static void __write_lock_debug(rwlock_t *lock) -{ - u64 i; - u64 loops = loops_per_jiffy * HZ; - int print_once = 1; - - for (;;) { - for (i = 0; i < loops; i++) { - if (arch_write_trylock(&lock->raw_lock)) - return; - __delay(1); - } - /* lockup suspected: */ - if (print_once) { - print_once = 0; - printk(KERN_EMERG "BUG: write-lock lockup on CPU#%d, " - "%s/%d, %p\n", - raw_smp_processor_id(), current->comm, - current->pid, lock); - dump_stack(); - } - } -} -#endif - -void do_raw_write_lock(rwlock_t *lock) -{ - debug_write_lock_before(lock); - arch_write_lock(&lock->raw_lock); - debug_write_lock_after(lock); -} - -int do_raw_write_trylock(rwlock_t *lock) -{ - int ret = arch_write_trylock(&lock->raw_lock); - - if (ret) - debug_write_lock_after(lock); -#ifndef CONFIG_SMP - /* - * Must not happen on UP: - */ - RWLOCK_BUG_ON(!ret, lock, "trylock failure on UP"); -#endif - return ret; -} - -void do_raw_write_unlock(rwlock_t *lock) -{ - debug_write_unlock(lock); - arch_write_unlock(&lock->raw_lock); -} diff --git a/lib/string.c b/lib/string.c index e5878de4f10..992bf30af75 100644 --- a/lib/string.c +++ b/lib/string.c @@ -107,7 +107,7 @@ EXPORT_SYMBOL(strcpy); #ifndef __HAVE_ARCH_STRNCPY /** - * strncpy - Copy a length-limited, %NUL-terminated string + * strncpy - Copy a length-limited, C-string * @dest: Where to copy the string to * @src: Where to copy the string from * @count: The maximum number of bytes to copy @@ -136,7 +136,7 @@ EXPORT_SYMBOL(strncpy); #ifndef __HAVE_ARCH_STRLCPY /** - * strlcpy - Copy a %NUL terminated string into a sized buffer + * strlcpy - Copy a C-string into a sized buffer * @dest: Where to copy the string to * @src: Where to copy the string from * @size: size of destination buffer @@ -182,7 +182,7 @@ EXPORT_SYMBOL(strcat); #ifndef __HAVE_ARCH_STRNCAT /** - * strncat - Append a length-limited, %NUL-terminated string to another + * strncat - Append a length-limited, C-string to another * @dest: The string to be appended to * @src: The string to append to it * @count: The maximum numbers of bytes to copy @@ -211,7 +211,7 @@ EXPORT_SYMBOL(strncat); #ifndef __HAVE_ARCH_STRLCAT /** - * strlcat - Append a length-limited, %NUL-terminated string to another + * strlcat - Append a length-limited, C-string to another * @dest: The string to be appended to * @src: The string to append to it * @count: The size of the destination buffer. @@ -301,6 +301,24 @@ char *strchr(const char *s, int c) EXPORT_SYMBOL(strchr); #endif +#ifndef __HAVE_ARCH_STRCHRNUL +/** + * strchrnul - Find and return a character in a string, or end of string + * @s: The string to be searched + * @c: The character to search for + * + * Returns pointer to first occurrence of 'c' in s. If c is not found, then + * return a pointer to the null byte at the end of s. + */ +char *strchrnul(const char *s, int c) +{ + while (*s && *s != (char)c) + s++; + return (char *)s; +} +EXPORT_SYMBOL(strchrnul); +#endif + #ifndef __HAVE_ARCH_STRRCHR /** * strrchr - Find the last occurrence of a character in a string @@ -648,7 +666,7 @@ EXPORT_SYMBOL(memmove); * @count: The size of the area. */ #undef memcmp -int memcmp(const void *cs, const void *ct, size_t count) +__visible int memcmp(const void *cs, const void *ct, size_t count) { const unsigned char *su1, *su2; int res = 0; diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 4e8686c7e5a..4abda074ea4 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -38,6 +38,9 @@ #include <linux/bootmem.h> #include <linux/iommu-helper.h> +#define CREATE_TRACE_POINTS +#include <trace/events/swiotlb.h> + #define OFFSET(val,align) ((unsigned long) \ ( (val) & ( (align) - 1))) @@ -83,6 +86,7 @@ static unsigned int io_tlb_index; * We need to save away the original address corresponding to a mapped entry * for the sync operations. */ +#define INVALID_PHYS_ADDR (~(phys_addr_t)0) static phys_addr_t *io_tlb_orig_addr; /* @@ -169,8 +173,9 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) /* * Get the overflow emergency buffer */ - v_overflow_buffer = alloc_bootmem_low_pages_nopanic( - PAGE_ALIGN(io_tlb_overflow)); + v_overflow_buffer = memblock_virt_alloc_low_nopanic( + PAGE_ALIGN(io_tlb_overflow), + PAGE_SIZE); if (!v_overflow_buffer) return -ENOMEM; @@ -181,11 +186,17 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE * between io_tlb_start and io_tlb_end. */ - io_tlb_list = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(int))); - for (i = 0; i < io_tlb_nslabs; i++) - io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); + io_tlb_list = memblock_virt_alloc( + PAGE_ALIGN(io_tlb_nslabs * sizeof(int)), + PAGE_SIZE); + io_tlb_orig_addr = memblock_virt_alloc( + PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)), + PAGE_SIZE); + for (i = 0; i < io_tlb_nslabs; i++) { + io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); + io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; + } io_tlb_index = 0; - io_tlb_orig_addr = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); if (verbose) swiotlb_print_info(); @@ -212,13 +223,13 @@ swiotlb_init(int verbose) bytes = io_tlb_nslabs << IO_TLB_SHIFT; /* Get IO TLB memory from the low pages */ - vstart = alloc_bootmem_low_pages_nopanic(PAGE_ALIGN(bytes)); + vstart = memblock_virt_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE); if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) return; if (io_tlb_start) - free_bootmem(io_tlb_start, - PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); + memblock_free_early(io_tlb_start, + PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); pr_warn("Cannot allocate SWIOTLB buffer"); no_iotlb_memory = true; } @@ -305,10 +316,6 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) if (!io_tlb_list) goto cleanup3; - for (i = 0; i < io_tlb_nslabs; i++) - io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); - io_tlb_index = 0; - io_tlb_orig_addr = (phys_addr_t *) __get_free_pages(GFP_KERNEL, get_order(io_tlb_nslabs * @@ -316,7 +323,11 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) if (!io_tlb_orig_addr) goto cleanup4; - memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(phys_addr_t)); + for (i = 0; i < io_tlb_nslabs; i++) { + io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); + io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; + } + io_tlb_index = 0; swiotlb_print_info(); @@ -354,19 +365,19 @@ void __init swiotlb_free(void) free_pages((unsigned long)phys_to_virt(io_tlb_start), get_order(io_tlb_nslabs << IO_TLB_SHIFT)); } else { - free_bootmem_late(io_tlb_overflow_buffer, - PAGE_ALIGN(io_tlb_overflow)); - free_bootmem_late(__pa(io_tlb_orig_addr), - PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); - free_bootmem_late(__pa(io_tlb_list), - PAGE_ALIGN(io_tlb_nslabs * sizeof(int))); - free_bootmem_late(io_tlb_start, - PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); + memblock_free_late(io_tlb_overflow_buffer, + PAGE_ALIGN(io_tlb_overflow)); + memblock_free_late(__pa(io_tlb_orig_addr), + PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); + memblock_free_late(__pa(io_tlb_list), + PAGE_ALIGN(io_tlb_nslabs * sizeof(int))); + memblock_free_late(io_tlb_start, + PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); } io_tlb_nslabs = 0; } -static int is_swiotlb_buffer(phys_addr_t paddr) +int is_swiotlb_buffer(phys_addr_t paddr) { return paddr >= io_tlb_start && paddr < io_tlb_end; } @@ -502,6 +513,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, not_found: spin_unlock_irqrestore(&io_tlb_lock, flags); + if (printk_ratelimit()) + dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size); return SWIOTLB_MAP_ERROR; found: spin_unlock_irqrestore(&io_tlb_lock, flags); @@ -546,7 +559,8 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, /* * First, sync the memory before unmapping the entry */ - if (orig_addr && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) + if (orig_addr != INVALID_PHYS_ADDR && + ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE); /* @@ -563,8 +577,10 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, * Step 1: return the slots to the free list, merging the * slots with superceeding slots */ - for (i = index + nslots - 1; i >= index; i--) + for (i = index + nslots - 1; i >= index; i--) { io_tlb_list[i] = ++count; + io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; + } /* * Step 2: merge the returned slots with the preceding slots, * if available (non zero) @@ -583,6 +599,8 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; phys_addr_t orig_addr = io_tlb_orig_addr[index]; + if (orig_addr == INVALID_PHYS_ADDR) + return; orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1); switch (target) { @@ -726,6 +744,8 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, if (dma_capable(dev, dev_addr, size) && !swiotlb_force) return dev_addr; + trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force); + /* Oh well, have to allocate and map a bounce buffer. */ map = map_single(dev, phys, size, dir); if (map == SWIOTLB_MAP_ERROR) { diff --git a/lib/syscall.c b/lib/syscall.c index 58710eefeac..e30e0393248 100644 --- a/lib/syscall.c +++ b/lib/syscall.c @@ -72,4 +72,3 @@ int task_current_syscall(struct task_struct *target, long *callno, return 0; } -EXPORT_SYMBOL_GPL(task_current_syscall); diff --git a/lib/test_bpf.c b/lib/test_bpf.c new file mode 100644 index 00000000000..c579e0f5881 --- /dev/null +++ b/lib/test_bpf.c @@ -0,0 +1,1929 @@ +/* + * Testsuite for BPF interpreter and BPF JIT compiler + * + * Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/filter.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/if_vlan.h> + +/* General test specific settings */ +#define MAX_SUBTESTS 3 +#define MAX_TESTRUNS 10000 +#define MAX_DATA 128 +#define MAX_INSNS 512 +#define MAX_K 0xffffFFFF + +/* Few constants used to init test 'skb' */ +#define SKB_TYPE 3 +#define SKB_MARK 0x1234aaaa +#define SKB_HASH 0x1234aaab +#define SKB_QUEUE_MAP 123 +#define SKB_VLAN_TCI 0xffff +#define SKB_DEV_IFINDEX 577 +#define SKB_DEV_TYPE 588 + +/* Redefine REGs to make tests less verbose */ +#define R0 BPF_REG_0 +#define R1 BPF_REG_1 +#define R2 BPF_REG_2 +#define R3 BPF_REG_3 +#define R4 BPF_REG_4 +#define R5 BPF_REG_5 +#define R6 BPF_REG_6 +#define R7 BPF_REG_7 +#define R8 BPF_REG_8 +#define R9 BPF_REG_9 +#define R10 BPF_REG_10 + +/* Flags that can be passed to test cases */ +#define FLAG_NO_DATA BIT(0) +#define FLAG_EXPECTED_FAIL BIT(1) + +enum { + CLASSIC = BIT(6), /* Old BPF instructions only. */ + INTERNAL = BIT(7), /* Extended instruction set. */ +}; + +#define TEST_TYPE_MASK (CLASSIC | INTERNAL) + +struct bpf_test { + const char *descr; + union { + struct sock_filter insns[MAX_INSNS]; + struct sock_filter_int insns_int[MAX_INSNS]; + } u; + __u8 aux; + __u8 data[MAX_DATA]; + struct { + int data_size; + __u32 result; + } test[MAX_SUBTESTS]; +}; + +static struct bpf_test tests[] = { + { + "TAX", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_IMM, 2), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_ALU | BPF_NEG, 0), /* A == -3 */ + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_LEN, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_MISC | BPF_TAX, 0), /* X == len - 3 */ + BPF_STMT(BPF_LD | BPF_B | BPF_IND, 1), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { 10, 20, 30, 40, 50 }, + { { 2, 10 }, { 3, 20 }, { 4, 30 } }, + }, + { + "TXA", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_LEN, 0), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_RET | BPF_A, 0) /* A == len * 2 */ + }, + CLASSIC, + { 10, 20, 30, 40, 50 }, + { { 1, 2 }, { 3, 6 }, { 4, 8 } }, + }, + { + "ADD_SUB_MUL_K", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, 1), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 2), + BPF_STMT(BPF_LDX | BPF_IMM, 3), + BPF_STMT(BPF_ALU | BPF_SUB | BPF_X, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 0xffffffff), + BPF_STMT(BPF_ALU | BPF_MUL | BPF_K, 3), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC | FLAG_NO_DATA, + { }, + { { 0, 0xfffffffd } } + }, + { + "DIV_KX", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, 8), + BPF_STMT(BPF_ALU | BPF_DIV | BPF_K, 2), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_IMM, 0xffffffff), + BPF_STMT(BPF_ALU | BPF_DIV | BPF_X, 0), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_IMM, 0xffffffff), + BPF_STMT(BPF_ALU | BPF_DIV | BPF_K, 0x70000000), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC | FLAG_NO_DATA, + { }, + { { 0, 0x40000001 } } + }, + { + "AND_OR_LSH_K", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, 0xff), + BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0xf0), + BPF_STMT(BPF_ALU | BPF_LSH | BPF_K, 27), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_IMM, 0xf), + BPF_STMT(BPF_ALU | BPF_OR | BPF_K, 0xf0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC | FLAG_NO_DATA, + { }, + { { 0, 0x800000ff }, { 1, 0x800000ff } }, + }, + { + "LD_IMM_0", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, 0), /* ld #0 */ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 0), + BPF_STMT(BPF_RET | BPF_K, 1), + }, + CLASSIC, + { }, + { { 1, 1 } }, + }, + { + "LD_IND", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_LEN, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_IND, MAX_K), + BPF_STMT(BPF_RET | BPF_K, 1) + }, + CLASSIC, + { }, + { { 1, 0 }, { 10, 0 }, { 60, 0 } }, + }, + { + "LD_ABS", + .u.insns = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, 1000), + BPF_STMT(BPF_RET | BPF_K, 1) + }, + CLASSIC, + { }, + { { 1, 0 }, { 10, 0 }, { 60, 0 } }, + }, + { + "LD_ABS_LL", + .u.insns = { + BPF_STMT(BPF_LD | BPF_B | BPF_ABS, SKF_LL_OFF), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_B | BPF_ABS, SKF_LL_OFF + 1), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { 1, 2, 3 }, + { { 1, 0 }, { 2, 3 } }, + }, + { + "LD_IND_LL", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, SKF_LL_OFF - 1), + BPF_STMT(BPF_LDX | BPF_LEN, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_B | BPF_IND, 0), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { 1, 2, 3, 0xff }, + { { 1, 1 }, { 3, 3 }, { 4, 0xff } }, + }, + { + "LD_ABS_NET", + .u.insns = { + BPF_STMT(BPF_LD | BPF_B | BPF_ABS, SKF_NET_OFF), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_B | BPF_ABS, SKF_NET_OFF + 1), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3 }, + { { 15, 0 }, { 16, 3 } }, + }, + { + "LD_IND_NET", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, SKF_NET_OFF - 15), + BPF_STMT(BPF_LDX | BPF_LEN, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_B | BPF_IND, 0), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3 }, + { { 14, 0 }, { 15, 1 }, { 17, 3 } }, + }, + { + "LD_PKTTYPE", + .u.insns = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_PKTTYPE), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SKB_TYPE, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 1), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_PKTTYPE), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SKB_TYPE, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 1), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_PKTTYPE), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SKB_TYPE, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 1), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { }, + { { 1, 3 }, { 10, 3 } }, + }, + { + "LD_MARK", + .u.insns = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_MARK), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { }, + { { 1, SKB_MARK}, { 10, SKB_MARK} }, + }, + { + "LD_RXHASH", + .u.insns = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_RXHASH), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { }, + { { 1, SKB_HASH}, { 10, SKB_HASH} }, + }, + { + "LD_QUEUE", + .u.insns = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_QUEUE), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { }, + { { 1, SKB_QUEUE_MAP }, { 10, SKB_QUEUE_MAP } }, + }, + { + "LD_PROTOCOL", + .u.insns = { + BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 1), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 20, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 0), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_PROTOCOL), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 2), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 30, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 0), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { 10, 20, 30 }, + { { 10, ETH_P_IP }, { 100, ETH_P_IP } }, + }, + { + "LD_VLAN_TAG", + .u.insns = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_VLAN_TAG), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { }, + { + { 1, SKB_VLAN_TCI & ~VLAN_TAG_PRESENT }, + { 10, SKB_VLAN_TCI & ~VLAN_TAG_PRESENT } + }, + }, + { + "LD_VLAN_TAG_PRESENT", + .u.insns = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { }, + { + { 1, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) }, + { 10, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) } + }, + }, + { + "LD_IFINDEX", + .u.insns = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_IFINDEX), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { }, + { { 1, SKB_DEV_IFINDEX }, { 10, SKB_DEV_IFINDEX } }, + }, + { + "LD_HATYPE", + .u.insns = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_HATYPE), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { }, + { { 1, SKB_DEV_TYPE }, { 10, SKB_DEV_TYPE } }, + }, + { + "LD_CPU", + .u.insns = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_CPU), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_CPU), + BPF_STMT(BPF_ALU | BPF_SUB | BPF_X, 0), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { }, + { { 1, 0 }, { 10, 0 } }, + }, + { + "LD_NLATTR", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_IMM, 2), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_LDX | BPF_IMM, 3), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_NLATTR), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, +#ifdef __BIG_ENDIAN + { 0xff, 0xff, 0, 4, 0, 2, 0, 4, 0, 3 }, +#else + { 0xff, 0xff, 4, 0, 2, 0, 4, 0, 3, 0 }, +#endif + { { 4, 0 }, { 20, 6 } }, + }, + { + "LD_NLATTR_NEST", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, 2), + BPF_STMT(BPF_LDX | BPF_IMM, 3), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_NLATTR_NEST), + BPF_STMT(BPF_LD | BPF_IMM, 2), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_NLATTR_NEST), + BPF_STMT(BPF_LD | BPF_IMM, 2), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_NLATTR_NEST), + BPF_STMT(BPF_LD | BPF_IMM, 2), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_NLATTR_NEST), + BPF_STMT(BPF_LD | BPF_IMM, 2), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_NLATTR_NEST), + BPF_STMT(BPF_LD | BPF_IMM, 2), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_NLATTR_NEST), + BPF_STMT(BPF_LD | BPF_IMM, 2), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_NLATTR_NEST), + BPF_STMT(BPF_LD | BPF_IMM, 2), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_NLATTR_NEST), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, +#ifdef __BIG_ENDIAN + { 0xff, 0xff, 0, 12, 0, 1, 0, 4, 0, 2, 0, 4, 0, 3 }, +#else + { 0xff, 0xff, 12, 0, 1, 0, 4, 0, 2, 0, 4, 0, 3, 0 }, +#endif + { { 4, 0 }, { 20, 10 } }, + }, + { + "LD_PAYLOAD_OFF", + .u.insns = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_PAY_OFFSET), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_PAY_OFFSET), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_PAY_OFFSET), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_PAY_OFFSET), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_PAY_OFFSET), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + /* 00:00:00:00:00:00 > 00:00:00:00:00:00, ethtype IPv4 (0x0800), + * length 98: 127.0.0.1 > 127.0.0.1: ICMP echo request, + * id 9737, seq 1, length 64 + */ + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x08, 0x00, + 0x45, 0x00, 0x00, 0x54, 0xac, 0x8b, 0x40, 0x00, 0x40, + 0x01, 0x90, 0x1b, 0x7f, 0x00, 0x00, 0x01 }, + { { 30, 0 }, { 100, 42 } }, + }, + { + "LD_ANC_XOR", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, 10), + BPF_STMT(BPF_LDX | BPF_IMM, 300), + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_ALU_XOR_X), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { }, + { { 4, 10 ^ 300 }, { 20, 10 ^ 300 } }, + }, + { + "SPILL_FILL", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_LEN, 0), + BPF_STMT(BPF_LD | BPF_IMM, 2), + BPF_STMT(BPF_ALU | BPF_RSH, 1), + BPF_STMT(BPF_ALU | BPF_XOR | BPF_X, 0), + BPF_STMT(BPF_ST, 1), /* M1 = 1 ^ len */ + BPF_STMT(BPF_ALU | BPF_XOR | BPF_K, 0x80000000), + BPF_STMT(BPF_ST, 2), /* M2 = 1 ^ len ^ 0x80000000 */ + BPF_STMT(BPF_STX, 15), /* M3 = len */ + BPF_STMT(BPF_LDX | BPF_MEM, 1), + BPF_STMT(BPF_LD | BPF_MEM, 2), + BPF_STMT(BPF_ALU | BPF_XOR | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 15), + BPF_STMT(BPF_ALU | BPF_XOR | BPF_X, 0), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { }, + { { 1, 0x80000001 }, { 2, 0x80000002 }, { 60, 0x80000000 ^ 60 } } + }, + { + "JEQ", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_LEN, 0), + BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 2), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_X, 0, 0, 1), + BPF_STMT(BPF_RET | BPF_K, 1), + BPF_STMT(BPF_RET | BPF_K, MAX_K) + }, + CLASSIC, + { 3, 3, 3, 3, 3 }, + { { 1, 0 }, { 3, 1 }, { 4, MAX_K } }, + }, + { + "JGT", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_LEN, 0), + BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 2), + BPF_JUMP(BPF_JMP | BPF_JGT | BPF_X, 0, 0, 1), + BPF_STMT(BPF_RET | BPF_K, 1), + BPF_STMT(BPF_RET | BPF_K, MAX_K) + }, + CLASSIC, + { 4, 4, 4, 3, 3 }, + { { 2, 0 }, { 3, 1 }, { 4, MAX_K } }, + }, + { + "JGE", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_LEN, 0), + BPF_STMT(BPF_LD | BPF_B | BPF_IND, MAX_K), + BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, 1, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 10), + BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, 2, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 20), + BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, 3, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 30), + BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, 4, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 40), + BPF_STMT(BPF_RET | BPF_K, MAX_K) + }, + CLASSIC, + { 1, 2, 3, 4, 5 }, + { { 1, 20 }, { 3, 40 }, { 5, MAX_K } }, + }, + { + "JSET", + .u.insns = { + BPF_JUMP(BPF_JMP | BPF_JA, 0, 0, 0), + BPF_JUMP(BPF_JMP | BPF_JA, 1, 1, 1), + BPF_JUMP(BPF_JMP | BPF_JA, 0, 0, 0), + BPF_JUMP(BPF_JMP | BPF_JA, 0, 0, 0), + BPF_STMT(BPF_LDX | BPF_LEN, 0), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_SUB | BPF_K, 4), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_W | BPF_IND, 0), + BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 1, 0, 1), + BPF_STMT(BPF_RET | BPF_K, 10), + BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0x80000000, 0, 1), + BPF_STMT(BPF_RET | BPF_K, 20), + BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0xffffff, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 30), + BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0xffffff, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 30), + BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0xffffff, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 30), + BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0xffffff, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 30), + BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0xffffff, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 30), + BPF_STMT(BPF_RET | BPF_K, MAX_K) + }, + CLASSIC, + { 0, 0xAA, 0x55, 1 }, + { { 4, 10 }, { 5, 20 }, { 6, MAX_K } }, + }, + { + "tcpdump port 22", + .u.insns = { + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 12), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x86dd, 0, 8), /* IPv6 */ + BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 20), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x84, 2, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x6, 1, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x11, 0, 17), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 54), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 22, 14, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 56), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 22, 12, 13), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x0800, 0, 12), /* IPv4 */ + BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 23), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x84, 2, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x6, 1, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x11, 0, 8), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 20), + BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0x1fff, 6, 0), + BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 14), + BPF_STMT(BPF_LD | BPF_H | BPF_IND, 14), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 22, 2, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_IND, 16), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 22, 0, 1), + BPF_STMT(BPF_RET | BPF_K, 0xffff), + BPF_STMT(BPF_RET | BPF_K, 0), + }, + CLASSIC, + /* 3c:07:54:43:e5:76 > 10:bf:48:d6:43:d6, ethertype IPv4(0x0800) + * length 114: 10.1.1.149.49700 > 10.1.2.10.22: Flags [P.], + * seq 1305692979:1305693027, ack 3650467037, win 65535, + * options [nop,nop,TS val 2502645400 ecr 3971138], length 48 + */ + { 0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6, + 0x3c, 0x07, 0x54, 0x43, 0xe5, 0x76, + 0x08, 0x00, + 0x45, 0x10, 0x00, 0x64, 0x75, 0xb5, + 0x40, 0x00, 0x40, 0x06, 0xad, 0x2e, /* IP header */ + 0x0a, 0x01, 0x01, 0x95, /* ip src */ + 0x0a, 0x01, 0x02, 0x0a, /* ip dst */ + 0xc2, 0x24, + 0x00, 0x16 /* dst port */ }, + { { 10, 0 }, { 30, 0 }, { 100, 65535 } }, + }, + { + "tcpdump complex", + .u.insns = { + /* tcpdump -nei eth0 'tcp port 22 and (((ip[2:2] - + * ((ip[0]&0xf)<<2)) - ((tcp[12]&0xf0)>>2)) != 0) and + * (len > 115 or len < 30000000000)' -d + */ + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 12), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x86dd, 30, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x800, 0, 29), + BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 23), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x6, 0, 27), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 20), + BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, 0x1fff, 25, 0), + BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 14), + BPF_STMT(BPF_LD | BPF_H | BPF_IND, 14), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 22, 2, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_IND, 16), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 22, 0, 20), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 16), + BPF_STMT(BPF_ST, 1), + BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 14), + BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0xf), + BPF_STMT(BPF_ALU | BPF_LSH | BPF_K, 2), + BPF_STMT(BPF_MISC | BPF_TAX, 0x5), /* libpcap emits K on TAX */ + BPF_STMT(BPF_LD | BPF_MEM, 1), + BPF_STMT(BPF_ALU | BPF_SUB | BPF_X, 0), + BPF_STMT(BPF_ST, 5), + BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 14), + BPF_STMT(BPF_LD | BPF_B | BPF_IND, 26), + BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0xf0), + BPF_STMT(BPF_ALU | BPF_RSH | BPF_K, 2), + BPF_STMT(BPF_MISC | BPF_TAX, 0x9), /* libpcap emits K on TAX */ + BPF_STMT(BPF_LD | BPF_MEM, 5), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_X, 0, 4, 0), + BPF_STMT(BPF_LD | BPF_LEN, 0), + BPF_JUMP(BPF_JMP | BPF_JGT | BPF_K, 0x73, 1, 0), + BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, 0xfc23ac00, 1, 0), + BPF_STMT(BPF_RET | BPF_K, 0xffff), + BPF_STMT(BPF_RET | BPF_K, 0), + }, + CLASSIC, + { 0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6, + 0x3c, 0x07, 0x54, 0x43, 0xe5, 0x76, + 0x08, 0x00, + 0x45, 0x10, 0x00, 0x64, 0x75, 0xb5, + 0x40, 0x00, 0x40, 0x06, 0xad, 0x2e, /* IP header */ + 0x0a, 0x01, 0x01, 0x95, /* ip src */ + 0x0a, 0x01, 0x02, 0x0a, /* ip dst */ + 0xc2, 0x24, + 0x00, 0x16 /* dst port */ }, + { { 10, 0 }, { 30, 0 }, { 100, 65535 } }, + }, + { + "RET_A", + .u.insns = { + /* check that unitialized X and A contain zeros */ + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_RET | BPF_A, 0) + }, + CLASSIC, + { }, + { {1, 0}, {2, 0} }, + }, + { + "INT: ADD trivial", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R1, 1), + BPF_ALU64_IMM(BPF_ADD, R1, 2), + BPF_ALU64_IMM(BPF_MOV, R2, 3), + BPF_ALU64_REG(BPF_SUB, R1, R2), + BPF_ALU64_IMM(BPF_ADD, R1, -1), + BPF_ALU64_IMM(BPF_MUL, R1, 3), + BPF_ALU64_REG(BPF_MOV, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffd } } + }, + { + "INT: MUL_X", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, -1), + BPF_ALU64_IMM(BPF_MOV, R1, -1), + BPF_ALU64_IMM(BPF_MOV, R2, 3), + BPF_ALU64_REG(BPF_MUL, R1, R2), + BPF_JMP_IMM(BPF_JEQ, R1, 0xfffffffd, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { + "INT: MUL_X2", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -1), + BPF_ALU32_IMM(BPF_MOV, R1, -1), + BPF_ALU32_IMM(BPF_MOV, R2, 3), + BPF_ALU64_REG(BPF_MUL, R1, R2), + BPF_ALU64_IMM(BPF_RSH, R1, 8), + BPF_JMP_IMM(BPF_JEQ, R1, 0x2ffffff, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { + "INT: MUL32_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -1), + BPF_ALU64_IMM(BPF_MOV, R1, -1), + BPF_ALU32_IMM(BPF_MOV, R2, 3), + BPF_ALU32_REG(BPF_MUL, R1, R2), + BPF_ALU64_IMM(BPF_RSH, R1, 8), + BPF_JMP_IMM(BPF_JEQ, R1, 0xffffff, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { + /* Have to test all register combinations, since + * JITing of different registers will produce + * different asm code. + */ + "INT: ADD 64-bit", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_ALU64_IMM(BPF_MOV, R1, 1), + BPF_ALU64_IMM(BPF_MOV, R2, 2), + BPF_ALU64_IMM(BPF_MOV, R3, 3), + BPF_ALU64_IMM(BPF_MOV, R4, 4), + BPF_ALU64_IMM(BPF_MOV, R5, 5), + BPF_ALU64_IMM(BPF_MOV, R6, 6), + BPF_ALU64_IMM(BPF_MOV, R7, 7), + BPF_ALU64_IMM(BPF_MOV, R8, 8), + BPF_ALU64_IMM(BPF_MOV, R9, 9), + BPF_ALU64_IMM(BPF_ADD, R0, 20), + BPF_ALU64_IMM(BPF_ADD, R1, 20), + BPF_ALU64_IMM(BPF_ADD, R2, 20), + BPF_ALU64_IMM(BPF_ADD, R3, 20), + BPF_ALU64_IMM(BPF_ADD, R4, 20), + BPF_ALU64_IMM(BPF_ADD, R5, 20), + BPF_ALU64_IMM(BPF_ADD, R6, 20), + BPF_ALU64_IMM(BPF_ADD, R7, 20), + BPF_ALU64_IMM(BPF_ADD, R8, 20), + BPF_ALU64_IMM(BPF_ADD, R9, 20), + BPF_ALU64_IMM(BPF_SUB, R0, 10), + BPF_ALU64_IMM(BPF_SUB, R1, 10), + BPF_ALU64_IMM(BPF_SUB, R2, 10), + BPF_ALU64_IMM(BPF_SUB, R3, 10), + BPF_ALU64_IMM(BPF_SUB, R4, 10), + BPF_ALU64_IMM(BPF_SUB, R5, 10), + BPF_ALU64_IMM(BPF_SUB, R6, 10), + BPF_ALU64_IMM(BPF_SUB, R7, 10), + BPF_ALU64_IMM(BPF_SUB, R8, 10), + BPF_ALU64_IMM(BPF_SUB, R9, 10), + BPF_ALU64_REG(BPF_ADD, R0, R0), + BPF_ALU64_REG(BPF_ADD, R0, R1), + BPF_ALU64_REG(BPF_ADD, R0, R2), + BPF_ALU64_REG(BPF_ADD, R0, R3), + BPF_ALU64_REG(BPF_ADD, R0, R4), + BPF_ALU64_REG(BPF_ADD, R0, R5), + BPF_ALU64_REG(BPF_ADD, R0, R6), + BPF_ALU64_REG(BPF_ADD, R0, R7), + BPF_ALU64_REG(BPF_ADD, R0, R8), + BPF_ALU64_REG(BPF_ADD, R0, R9), /* R0 == 155 */ + BPF_JMP_IMM(BPF_JEQ, R0, 155, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, R1, R0), + BPF_ALU64_REG(BPF_ADD, R1, R1), + BPF_ALU64_REG(BPF_ADD, R1, R2), + BPF_ALU64_REG(BPF_ADD, R1, R3), + BPF_ALU64_REG(BPF_ADD, R1, R4), + BPF_ALU64_REG(BPF_ADD, R1, R5), + BPF_ALU64_REG(BPF_ADD, R1, R6), + BPF_ALU64_REG(BPF_ADD, R1, R7), + BPF_ALU64_REG(BPF_ADD, R1, R8), + BPF_ALU64_REG(BPF_ADD, R1, R9), /* R1 == 456 */ + BPF_JMP_IMM(BPF_JEQ, R1, 456, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, R2, R0), + BPF_ALU64_REG(BPF_ADD, R2, R1), + BPF_ALU64_REG(BPF_ADD, R2, R2), + BPF_ALU64_REG(BPF_ADD, R2, R3), + BPF_ALU64_REG(BPF_ADD, R2, R4), + BPF_ALU64_REG(BPF_ADD, R2, R5), + BPF_ALU64_REG(BPF_ADD, R2, R6), + BPF_ALU64_REG(BPF_ADD, R2, R7), + BPF_ALU64_REG(BPF_ADD, R2, R8), + BPF_ALU64_REG(BPF_ADD, R2, R9), /* R2 == 1358 */ + BPF_JMP_IMM(BPF_JEQ, R2, 1358, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, R3, R0), + BPF_ALU64_REG(BPF_ADD, R3, R1), + BPF_ALU64_REG(BPF_ADD, R3, R2), + BPF_ALU64_REG(BPF_ADD, R3, R3), + BPF_ALU64_REG(BPF_ADD, R3, R4), + BPF_ALU64_REG(BPF_ADD, R3, R5), + BPF_ALU64_REG(BPF_ADD, R3, R6), + BPF_ALU64_REG(BPF_ADD, R3, R7), + BPF_ALU64_REG(BPF_ADD, R3, R8), + BPF_ALU64_REG(BPF_ADD, R3, R9), /* R3 == 4063 */ + BPF_JMP_IMM(BPF_JEQ, R3, 4063, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, R4, R0), + BPF_ALU64_REG(BPF_ADD, R4, R1), + BPF_ALU64_REG(BPF_ADD, R4, R2), + BPF_ALU64_REG(BPF_ADD, R4, R3), + BPF_ALU64_REG(BPF_ADD, R4, R4), + BPF_ALU64_REG(BPF_ADD, R4, R5), + BPF_ALU64_REG(BPF_ADD, R4, R6), + BPF_ALU64_REG(BPF_ADD, R4, R7), + BPF_ALU64_REG(BPF_ADD, R4, R8), + BPF_ALU64_REG(BPF_ADD, R4, R9), /* R4 == 12177 */ + BPF_JMP_IMM(BPF_JEQ, R4, 12177, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, R5, R0), + BPF_ALU64_REG(BPF_ADD, R5, R1), + BPF_ALU64_REG(BPF_ADD, R5, R2), + BPF_ALU64_REG(BPF_ADD, R5, R3), + BPF_ALU64_REG(BPF_ADD, R5, R4), + BPF_ALU64_REG(BPF_ADD, R5, R5), + BPF_ALU64_REG(BPF_ADD, R5, R6), + BPF_ALU64_REG(BPF_ADD, R5, R7), + BPF_ALU64_REG(BPF_ADD, R5, R8), + BPF_ALU64_REG(BPF_ADD, R5, R9), /* R5 == 36518 */ + BPF_JMP_IMM(BPF_JEQ, R5, 36518, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, R6, R0), + BPF_ALU64_REG(BPF_ADD, R6, R1), + BPF_ALU64_REG(BPF_ADD, R6, R2), + BPF_ALU64_REG(BPF_ADD, R6, R3), + BPF_ALU64_REG(BPF_ADD, R6, R4), + BPF_ALU64_REG(BPF_ADD, R6, R5), + BPF_ALU64_REG(BPF_ADD, R6, R6), + BPF_ALU64_REG(BPF_ADD, R6, R7), + BPF_ALU64_REG(BPF_ADD, R6, R8), + BPF_ALU64_REG(BPF_ADD, R6, R9), /* R6 == 109540 */ + BPF_JMP_IMM(BPF_JEQ, R6, 109540, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, R7, R0), + BPF_ALU64_REG(BPF_ADD, R7, R1), + BPF_ALU64_REG(BPF_ADD, R7, R2), + BPF_ALU64_REG(BPF_ADD, R7, R3), + BPF_ALU64_REG(BPF_ADD, R7, R4), + BPF_ALU64_REG(BPF_ADD, R7, R5), + BPF_ALU64_REG(BPF_ADD, R7, R6), + BPF_ALU64_REG(BPF_ADD, R7, R7), + BPF_ALU64_REG(BPF_ADD, R7, R8), + BPF_ALU64_REG(BPF_ADD, R7, R9), /* R7 == 328605 */ + BPF_JMP_IMM(BPF_JEQ, R7, 328605, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, R8, R0), + BPF_ALU64_REG(BPF_ADD, R8, R1), + BPF_ALU64_REG(BPF_ADD, R8, R2), + BPF_ALU64_REG(BPF_ADD, R8, R3), + BPF_ALU64_REG(BPF_ADD, R8, R4), + BPF_ALU64_REG(BPF_ADD, R8, R5), + BPF_ALU64_REG(BPF_ADD, R8, R6), + BPF_ALU64_REG(BPF_ADD, R8, R7), + BPF_ALU64_REG(BPF_ADD, R8, R8), + BPF_ALU64_REG(BPF_ADD, R8, R9), /* R8 == 985799 */ + BPF_JMP_IMM(BPF_JEQ, R8, 985799, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, R9, R0), + BPF_ALU64_REG(BPF_ADD, R9, R1), + BPF_ALU64_REG(BPF_ADD, R9, R2), + BPF_ALU64_REG(BPF_ADD, R9, R3), + BPF_ALU64_REG(BPF_ADD, R9, R4), + BPF_ALU64_REG(BPF_ADD, R9, R5), + BPF_ALU64_REG(BPF_ADD, R9, R6), + BPF_ALU64_REG(BPF_ADD, R9, R7), + BPF_ALU64_REG(BPF_ADD, R9, R8), + BPF_ALU64_REG(BPF_ADD, R9, R9), /* R9 == 2957380 */ + BPF_ALU64_REG(BPF_MOV, R0, R9), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2957380 } } + }, + { + "INT: ADD 32-bit", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 20), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R2, 2), + BPF_ALU32_IMM(BPF_MOV, R3, 3), + BPF_ALU32_IMM(BPF_MOV, R4, 4), + BPF_ALU32_IMM(BPF_MOV, R5, 5), + BPF_ALU32_IMM(BPF_MOV, R6, 6), + BPF_ALU32_IMM(BPF_MOV, R7, 7), + BPF_ALU32_IMM(BPF_MOV, R8, 8), + BPF_ALU32_IMM(BPF_MOV, R9, 9), + BPF_ALU64_IMM(BPF_ADD, R1, 10), + BPF_ALU64_IMM(BPF_ADD, R2, 10), + BPF_ALU64_IMM(BPF_ADD, R3, 10), + BPF_ALU64_IMM(BPF_ADD, R4, 10), + BPF_ALU64_IMM(BPF_ADD, R5, 10), + BPF_ALU64_IMM(BPF_ADD, R6, 10), + BPF_ALU64_IMM(BPF_ADD, R7, 10), + BPF_ALU64_IMM(BPF_ADD, R8, 10), + BPF_ALU64_IMM(BPF_ADD, R9, 10), + BPF_ALU32_REG(BPF_ADD, R0, R1), + BPF_ALU32_REG(BPF_ADD, R0, R2), + BPF_ALU32_REG(BPF_ADD, R0, R3), + BPF_ALU32_REG(BPF_ADD, R0, R4), + BPF_ALU32_REG(BPF_ADD, R0, R5), + BPF_ALU32_REG(BPF_ADD, R0, R6), + BPF_ALU32_REG(BPF_ADD, R0, R7), + BPF_ALU32_REG(BPF_ADD, R0, R8), + BPF_ALU32_REG(BPF_ADD, R0, R9), /* R0 == 155 */ + BPF_JMP_IMM(BPF_JEQ, R0, 155, 1), + BPF_EXIT_INSN(), + BPF_ALU32_REG(BPF_ADD, R1, R0), + BPF_ALU32_REG(BPF_ADD, R1, R1), + BPF_ALU32_REG(BPF_ADD, R1, R2), + BPF_ALU32_REG(BPF_ADD, R1, R3), + BPF_ALU32_REG(BPF_ADD, R1, R4), + BPF_ALU32_REG(BPF_ADD, R1, R5), + BPF_ALU32_REG(BPF_ADD, R1, R6), + BPF_ALU32_REG(BPF_ADD, R1, R7), + BPF_ALU32_REG(BPF_ADD, R1, R8), + BPF_ALU32_REG(BPF_ADD, R1, R9), /* R1 == 456 */ + BPF_JMP_IMM(BPF_JEQ, R1, 456, 1), + BPF_EXIT_INSN(), + BPF_ALU32_REG(BPF_ADD, R2, R0), + BPF_ALU32_REG(BPF_ADD, R2, R1), + BPF_ALU32_REG(BPF_ADD, R2, R2), + BPF_ALU32_REG(BPF_ADD, R2, R3), + BPF_ALU32_REG(BPF_ADD, R2, R4), + BPF_ALU32_REG(BPF_ADD, R2, R5), + BPF_ALU32_REG(BPF_ADD, R2, R6), + BPF_ALU32_REG(BPF_ADD, R2, R7), + BPF_ALU32_REG(BPF_ADD, R2, R8), + BPF_ALU32_REG(BPF_ADD, R2, R9), /* R2 == 1358 */ + BPF_JMP_IMM(BPF_JEQ, R2, 1358, 1), + BPF_EXIT_INSN(), + BPF_ALU32_REG(BPF_ADD, R3, R0), + BPF_ALU32_REG(BPF_ADD, R3, R1), + BPF_ALU32_REG(BPF_ADD, R3, R2), + BPF_ALU32_REG(BPF_ADD, R3, R3), + BPF_ALU32_REG(BPF_ADD, R3, R4), + BPF_ALU32_REG(BPF_ADD, R3, R5), + BPF_ALU32_REG(BPF_ADD, R3, R6), + BPF_ALU32_REG(BPF_ADD, R3, R7), + BPF_ALU32_REG(BPF_ADD, R3, R8), + BPF_ALU32_REG(BPF_ADD, R3, R9), /* R3 == 4063 */ + BPF_JMP_IMM(BPF_JEQ, R3, 4063, 1), + BPF_EXIT_INSN(), + BPF_ALU32_REG(BPF_ADD, R4, R0), + BPF_ALU32_REG(BPF_ADD, R4, R1), + BPF_ALU32_REG(BPF_ADD, R4, R2), + BPF_ALU32_REG(BPF_ADD, R4, R3), + BPF_ALU32_REG(BPF_ADD, R4, R4), + BPF_ALU32_REG(BPF_ADD, R4, R5), + BPF_ALU32_REG(BPF_ADD, R4, R6), + BPF_ALU32_REG(BPF_ADD, R4, R7), + BPF_ALU32_REG(BPF_ADD, R4, R8), + BPF_ALU32_REG(BPF_ADD, R4, R9), /* R4 == 12177 */ + BPF_JMP_IMM(BPF_JEQ, R4, 12177, 1), + BPF_EXIT_INSN(), + BPF_ALU32_REG(BPF_ADD, R5, R0), + BPF_ALU32_REG(BPF_ADD, R5, R1), + BPF_ALU32_REG(BPF_ADD, R5, R2), + BPF_ALU32_REG(BPF_ADD, R5, R3), + BPF_ALU32_REG(BPF_ADD, R5, R4), + BPF_ALU32_REG(BPF_ADD, R5, R5), + BPF_ALU32_REG(BPF_ADD, R5, R6), + BPF_ALU32_REG(BPF_ADD, R5, R7), + BPF_ALU32_REG(BPF_ADD, R5, R8), + BPF_ALU32_REG(BPF_ADD, R5, R9), /* R5 == 36518 */ + BPF_JMP_IMM(BPF_JEQ, R5, 36518, 1), + BPF_EXIT_INSN(), + BPF_ALU32_REG(BPF_ADD, R6, R0), + BPF_ALU32_REG(BPF_ADD, R6, R1), + BPF_ALU32_REG(BPF_ADD, R6, R2), + BPF_ALU32_REG(BPF_ADD, R6, R3), + BPF_ALU32_REG(BPF_ADD, R6, R4), + BPF_ALU32_REG(BPF_ADD, R6, R5), + BPF_ALU32_REG(BPF_ADD, R6, R6), + BPF_ALU32_REG(BPF_ADD, R6, R7), + BPF_ALU32_REG(BPF_ADD, R6, R8), + BPF_ALU32_REG(BPF_ADD, R6, R9), /* R6 == 109540 */ + BPF_JMP_IMM(BPF_JEQ, R6, 109540, 1), + BPF_EXIT_INSN(), + BPF_ALU32_REG(BPF_ADD, R7, R0), + BPF_ALU32_REG(BPF_ADD, R7, R1), + BPF_ALU32_REG(BPF_ADD, R7, R2), + BPF_ALU32_REG(BPF_ADD, R7, R3), + BPF_ALU32_REG(BPF_ADD, R7, R4), + BPF_ALU32_REG(BPF_ADD, R7, R5), + BPF_ALU32_REG(BPF_ADD, R7, R6), + BPF_ALU32_REG(BPF_ADD, R7, R7), + BPF_ALU32_REG(BPF_ADD, R7, R8), + BPF_ALU32_REG(BPF_ADD, R7, R9), /* R7 == 328605 */ + BPF_JMP_IMM(BPF_JEQ, R7, 328605, 1), + BPF_EXIT_INSN(), + BPF_ALU32_REG(BPF_ADD, R8, R0), + BPF_ALU32_REG(BPF_ADD, R8, R1), + BPF_ALU32_REG(BPF_ADD, R8, R2), + BPF_ALU32_REG(BPF_ADD, R8, R3), + BPF_ALU32_REG(BPF_ADD, R8, R4), + BPF_ALU32_REG(BPF_ADD, R8, R5), + BPF_ALU32_REG(BPF_ADD, R8, R6), + BPF_ALU32_REG(BPF_ADD, R8, R7), + BPF_ALU32_REG(BPF_ADD, R8, R8), + BPF_ALU32_REG(BPF_ADD, R8, R9), /* R8 == 985799 */ + BPF_JMP_IMM(BPF_JEQ, R8, 985799, 1), + BPF_EXIT_INSN(), + BPF_ALU32_REG(BPF_ADD, R9, R0), + BPF_ALU32_REG(BPF_ADD, R9, R1), + BPF_ALU32_REG(BPF_ADD, R9, R2), + BPF_ALU32_REG(BPF_ADD, R9, R3), + BPF_ALU32_REG(BPF_ADD, R9, R4), + BPF_ALU32_REG(BPF_ADD, R9, R5), + BPF_ALU32_REG(BPF_ADD, R9, R6), + BPF_ALU32_REG(BPF_ADD, R9, R7), + BPF_ALU32_REG(BPF_ADD, R9, R8), + BPF_ALU32_REG(BPF_ADD, R9, R9), /* R9 == 2957380 */ + BPF_ALU32_REG(BPF_MOV, R0, R9), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 2957380 } } + }, + { /* Mainly checking JIT here. */ + "INT: SUB", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_ALU64_IMM(BPF_MOV, R1, 1), + BPF_ALU64_IMM(BPF_MOV, R2, 2), + BPF_ALU64_IMM(BPF_MOV, R3, 3), + BPF_ALU64_IMM(BPF_MOV, R4, 4), + BPF_ALU64_IMM(BPF_MOV, R5, 5), + BPF_ALU64_IMM(BPF_MOV, R6, 6), + BPF_ALU64_IMM(BPF_MOV, R7, 7), + BPF_ALU64_IMM(BPF_MOV, R8, 8), + BPF_ALU64_IMM(BPF_MOV, R9, 9), + BPF_ALU64_REG(BPF_SUB, R0, R0), + BPF_ALU64_REG(BPF_SUB, R0, R1), + BPF_ALU64_REG(BPF_SUB, R0, R2), + BPF_ALU64_REG(BPF_SUB, R0, R3), + BPF_ALU64_REG(BPF_SUB, R0, R4), + BPF_ALU64_REG(BPF_SUB, R0, R5), + BPF_ALU64_REG(BPF_SUB, R0, R6), + BPF_ALU64_REG(BPF_SUB, R0, R7), + BPF_ALU64_REG(BPF_SUB, R0, R8), + BPF_ALU64_REG(BPF_SUB, R0, R9), + BPF_ALU64_IMM(BPF_SUB, R0, 10), + BPF_JMP_IMM(BPF_JEQ, R0, -55, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, R1, R0), + BPF_ALU64_REG(BPF_SUB, R1, R2), + BPF_ALU64_REG(BPF_SUB, R1, R3), + BPF_ALU64_REG(BPF_SUB, R1, R4), + BPF_ALU64_REG(BPF_SUB, R1, R5), + BPF_ALU64_REG(BPF_SUB, R1, R6), + BPF_ALU64_REG(BPF_SUB, R1, R7), + BPF_ALU64_REG(BPF_SUB, R1, R8), + BPF_ALU64_REG(BPF_SUB, R1, R9), + BPF_ALU64_IMM(BPF_SUB, R1, 10), + BPF_ALU64_REG(BPF_SUB, R2, R0), + BPF_ALU64_REG(BPF_SUB, R2, R1), + BPF_ALU64_REG(BPF_SUB, R2, R3), + BPF_ALU64_REG(BPF_SUB, R2, R4), + BPF_ALU64_REG(BPF_SUB, R2, R5), + BPF_ALU64_REG(BPF_SUB, R2, R6), + BPF_ALU64_REG(BPF_SUB, R2, R7), + BPF_ALU64_REG(BPF_SUB, R2, R8), + BPF_ALU64_REG(BPF_SUB, R2, R9), + BPF_ALU64_IMM(BPF_SUB, R2, 10), + BPF_ALU64_REG(BPF_SUB, R3, R0), + BPF_ALU64_REG(BPF_SUB, R3, R1), + BPF_ALU64_REG(BPF_SUB, R3, R2), + BPF_ALU64_REG(BPF_SUB, R3, R4), + BPF_ALU64_REG(BPF_SUB, R3, R5), + BPF_ALU64_REG(BPF_SUB, R3, R6), + BPF_ALU64_REG(BPF_SUB, R3, R7), + BPF_ALU64_REG(BPF_SUB, R3, R8), + BPF_ALU64_REG(BPF_SUB, R3, R9), + BPF_ALU64_IMM(BPF_SUB, R3, 10), + BPF_ALU64_REG(BPF_SUB, R4, R0), + BPF_ALU64_REG(BPF_SUB, R4, R1), + BPF_ALU64_REG(BPF_SUB, R4, R2), + BPF_ALU64_REG(BPF_SUB, R4, R3), + BPF_ALU64_REG(BPF_SUB, R4, R5), + BPF_ALU64_REG(BPF_SUB, R4, R6), + BPF_ALU64_REG(BPF_SUB, R4, R7), + BPF_ALU64_REG(BPF_SUB, R4, R8), + BPF_ALU64_REG(BPF_SUB, R4, R9), + BPF_ALU64_IMM(BPF_SUB, R4, 10), + BPF_ALU64_REG(BPF_SUB, R5, R0), + BPF_ALU64_REG(BPF_SUB, R5, R1), + BPF_ALU64_REG(BPF_SUB, R5, R2), + BPF_ALU64_REG(BPF_SUB, R5, R3), + BPF_ALU64_REG(BPF_SUB, R5, R4), + BPF_ALU64_REG(BPF_SUB, R5, R6), + BPF_ALU64_REG(BPF_SUB, R5, R7), + BPF_ALU64_REG(BPF_SUB, R5, R8), + BPF_ALU64_REG(BPF_SUB, R5, R9), + BPF_ALU64_IMM(BPF_SUB, R5, 10), + BPF_ALU64_REG(BPF_SUB, R6, R0), + BPF_ALU64_REG(BPF_SUB, R6, R1), + BPF_ALU64_REG(BPF_SUB, R6, R2), + BPF_ALU64_REG(BPF_SUB, R6, R3), + BPF_ALU64_REG(BPF_SUB, R6, R4), + BPF_ALU64_REG(BPF_SUB, R6, R5), + BPF_ALU64_REG(BPF_SUB, R6, R7), + BPF_ALU64_REG(BPF_SUB, R6, R8), + BPF_ALU64_REG(BPF_SUB, R6, R9), + BPF_ALU64_IMM(BPF_SUB, R6, 10), + BPF_ALU64_REG(BPF_SUB, R7, R0), + BPF_ALU64_REG(BPF_SUB, R7, R1), + BPF_ALU64_REG(BPF_SUB, R7, R2), + BPF_ALU64_REG(BPF_SUB, R7, R3), + BPF_ALU64_REG(BPF_SUB, R7, R4), + BPF_ALU64_REG(BPF_SUB, R7, R5), + BPF_ALU64_REG(BPF_SUB, R7, R6), + BPF_ALU64_REG(BPF_SUB, R7, R8), + BPF_ALU64_REG(BPF_SUB, R7, R9), + BPF_ALU64_IMM(BPF_SUB, R7, 10), + BPF_ALU64_REG(BPF_SUB, R8, R0), + BPF_ALU64_REG(BPF_SUB, R8, R1), + BPF_ALU64_REG(BPF_SUB, R8, R2), + BPF_ALU64_REG(BPF_SUB, R8, R3), + BPF_ALU64_REG(BPF_SUB, R8, R4), + BPF_ALU64_REG(BPF_SUB, R8, R5), + BPF_ALU64_REG(BPF_SUB, R8, R6), + BPF_ALU64_REG(BPF_SUB, R8, R7), + BPF_ALU64_REG(BPF_SUB, R8, R9), + BPF_ALU64_IMM(BPF_SUB, R8, 10), + BPF_ALU64_REG(BPF_SUB, R9, R0), + BPF_ALU64_REG(BPF_SUB, R9, R1), + BPF_ALU64_REG(BPF_SUB, R9, R2), + BPF_ALU64_REG(BPF_SUB, R9, R3), + BPF_ALU64_REG(BPF_SUB, R9, R4), + BPF_ALU64_REG(BPF_SUB, R9, R5), + BPF_ALU64_REG(BPF_SUB, R9, R6), + BPF_ALU64_REG(BPF_SUB, R9, R7), + BPF_ALU64_REG(BPF_SUB, R9, R8), + BPF_ALU64_IMM(BPF_SUB, R9, 10), + BPF_ALU64_IMM(BPF_SUB, R0, 10), + BPF_ALU64_IMM(BPF_NEG, R0, 0), + BPF_ALU64_REG(BPF_SUB, R0, R1), + BPF_ALU64_REG(BPF_SUB, R0, R2), + BPF_ALU64_REG(BPF_SUB, R0, R3), + BPF_ALU64_REG(BPF_SUB, R0, R4), + BPF_ALU64_REG(BPF_SUB, R0, R5), + BPF_ALU64_REG(BPF_SUB, R0, R6), + BPF_ALU64_REG(BPF_SUB, R0, R7), + BPF_ALU64_REG(BPF_SUB, R0, R8), + BPF_ALU64_REG(BPF_SUB, R0, R9), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 11 } } + }, + { /* Mainly checking JIT here. */ + "INT: XOR", + .u.insns_int = { + BPF_ALU64_REG(BPF_SUB, R0, R0), + BPF_ALU64_REG(BPF_XOR, R1, R1), + BPF_JMP_REG(BPF_JEQ, R0, R1, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_MOV, R0, 10), + BPF_ALU64_IMM(BPF_MOV, R1, -1), + BPF_ALU64_REG(BPF_SUB, R1, R1), + BPF_ALU64_REG(BPF_XOR, R2, R2), + BPF_JMP_REG(BPF_JEQ, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, R2, R2), + BPF_ALU64_REG(BPF_XOR, R3, R3), + BPF_ALU64_IMM(BPF_MOV, R0, 10), + BPF_ALU64_IMM(BPF_MOV, R1, -1), + BPF_JMP_REG(BPF_JEQ, R2, R3, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, R3, R3), + BPF_ALU64_REG(BPF_XOR, R4, R4), + BPF_ALU64_IMM(BPF_MOV, R2, 1), + BPF_ALU64_IMM(BPF_MOV, R5, -1), + BPF_JMP_REG(BPF_JEQ, R3, R4, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, R4, R4), + BPF_ALU64_REG(BPF_XOR, R5, R5), + BPF_ALU64_IMM(BPF_MOV, R3, 1), + BPF_ALU64_IMM(BPF_MOV, R7, -1), + BPF_JMP_REG(BPF_JEQ, R5, R4, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_MOV, R5, 1), + BPF_ALU64_REG(BPF_SUB, R5, R5), + BPF_ALU64_REG(BPF_XOR, R6, R6), + BPF_ALU64_IMM(BPF_MOV, R1, 1), + BPF_ALU64_IMM(BPF_MOV, R8, -1), + BPF_JMP_REG(BPF_JEQ, R5, R6, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, R6, R6), + BPF_ALU64_REG(BPF_XOR, R7, R7), + BPF_JMP_REG(BPF_JEQ, R7, R6, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, R7, R7), + BPF_ALU64_REG(BPF_XOR, R8, R8), + BPF_JMP_REG(BPF_JEQ, R7, R8, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, R8, R8), + BPF_ALU64_REG(BPF_XOR, R9, R9), + BPF_JMP_REG(BPF_JEQ, R9, R8, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, R9, R9), + BPF_ALU64_REG(BPF_XOR, R0, R0), + BPF_JMP_REG(BPF_JEQ, R9, R0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, R1, R1), + BPF_ALU64_REG(BPF_XOR, R0, R0), + BPF_JMP_REG(BPF_JEQ, R9, R0, 2), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { /* Mainly checking JIT here. */ + "INT: MUL", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, 11), + BPF_ALU64_IMM(BPF_MOV, R1, 1), + BPF_ALU64_IMM(BPF_MOV, R2, 2), + BPF_ALU64_IMM(BPF_MOV, R3, 3), + BPF_ALU64_IMM(BPF_MOV, R4, 4), + BPF_ALU64_IMM(BPF_MOV, R5, 5), + BPF_ALU64_IMM(BPF_MOV, R6, 6), + BPF_ALU64_IMM(BPF_MOV, R7, 7), + BPF_ALU64_IMM(BPF_MOV, R8, 8), + BPF_ALU64_IMM(BPF_MOV, R9, 9), + BPF_ALU64_REG(BPF_MUL, R0, R0), + BPF_ALU64_REG(BPF_MUL, R0, R1), + BPF_ALU64_REG(BPF_MUL, R0, R2), + BPF_ALU64_REG(BPF_MUL, R0, R3), + BPF_ALU64_REG(BPF_MUL, R0, R4), + BPF_ALU64_REG(BPF_MUL, R0, R5), + BPF_ALU64_REG(BPF_MUL, R0, R6), + BPF_ALU64_REG(BPF_MUL, R0, R7), + BPF_ALU64_REG(BPF_MUL, R0, R8), + BPF_ALU64_REG(BPF_MUL, R0, R9), + BPF_ALU64_IMM(BPF_MUL, R0, 10), + BPF_JMP_IMM(BPF_JEQ, R0, 439084800, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_MUL, R1, R0), + BPF_ALU64_REG(BPF_MUL, R1, R2), + BPF_ALU64_REG(BPF_MUL, R1, R3), + BPF_ALU64_REG(BPF_MUL, R1, R4), + BPF_ALU64_REG(BPF_MUL, R1, R5), + BPF_ALU64_REG(BPF_MUL, R1, R6), + BPF_ALU64_REG(BPF_MUL, R1, R7), + BPF_ALU64_REG(BPF_MUL, R1, R8), + BPF_ALU64_REG(BPF_MUL, R1, R9), + BPF_ALU64_IMM(BPF_MUL, R1, 10), + BPF_ALU64_REG(BPF_MOV, R2, R1), + BPF_ALU64_IMM(BPF_RSH, R2, 32), + BPF_JMP_IMM(BPF_JEQ, R2, 0x5a924, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_LSH, R1, 32), + BPF_ALU64_IMM(BPF_ARSH, R1, 32), + BPF_JMP_IMM(BPF_JEQ, R1, 0xebb90000, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_MUL, R2, R0), + BPF_ALU64_REG(BPF_MUL, R2, R1), + BPF_ALU64_REG(BPF_MUL, R2, R3), + BPF_ALU64_REG(BPF_MUL, R2, R4), + BPF_ALU64_REG(BPF_MUL, R2, R5), + BPF_ALU64_REG(BPF_MUL, R2, R6), + BPF_ALU64_REG(BPF_MUL, R2, R7), + BPF_ALU64_REG(BPF_MUL, R2, R8), + BPF_ALU64_REG(BPF_MUL, R2, R9), + BPF_ALU64_IMM(BPF_MUL, R2, 10), + BPF_ALU64_IMM(BPF_RSH, R2, 32), + BPF_ALU64_REG(BPF_MOV, R0, R2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x35d97ef2 } } + }, + { + "INT: ALU MIX", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, 11), + BPF_ALU64_IMM(BPF_ADD, R0, -1), + BPF_ALU64_IMM(BPF_MOV, R2, 2), + BPF_ALU64_IMM(BPF_XOR, R2, 3), + BPF_ALU64_REG(BPF_DIV, R0, R2), + BPF_JMP_IMM(BPF_JEQ, R0, 10, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_MOD, R0, 3), + BPF_JMP_IMM(BPF_JEQ, R0, 1, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_MOV, R0, -1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } } + }, + { + "INT: DIV + ABS", + .u.insns_int = { + BPF_ALU64_REG(BPF_MOV, R6, R1), + BPF_LD_ABS(BPF_B, 3), + BPF_ALU64_IMM(BPF_MOV, R2, 2), + BPF_ALU32_REG(BPF_DIV, R0, R2), + BPF_ALU64_REG(BPF_MOV, R8, R0), + BPF_LD_ABS(BPF_B, 4), + BPF_ALU64_REG(BPF_ADD, R8, R0), + BPF_LD_IND(BPF_B, R8, -70), + BPF_EXIT_INSN(), + }, + INTERNAL, + { 10, 20, 30, 40, 50 }, + { { 4, 0 }, { 5, 10 } } + }, + { + "INT: DIV by zero", + .u.insns_int = { + BPF_ALU64_REG(BPF_MOV, R6, R1), + BPF_ALU64_IMM(BPF_MOV, R7, 0), + BPF_LD_ABS(BPF_B, 3), + BPF_ALU32_REG(BPF_DIV, R0, R7), + BPF_EXIT_INSN(), + }, + INTERNAL, + { 10, 20, 30, 40, 50 }, + { { 3, 0 }, { 4, 0 } } + }, + { + "check: missing ret", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, 1), + }, + CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, + { }, + { } + }, + { + "check: div_k_0", + .u.insns = { + BPF_STMT(BPF_ALU | BPF_DIV | BPF_K, 0), + BPF_STMT(BPF_RET | BPF_K, 0) + }, + CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, + { }, + { } + }, + { + "check: unknown insn", + .u.insns = { + /* seccomp insn, rejected in socket filter */ + BPF_STMT(BPF_LDX | BPF_W | BPF_ABS, 0), + BPF_STMT(BPF_RET | BPF_K, 0) + }, + CLASSIC | FLAG_EXPECTED_FAIL, + { }, + { } + }, + { + "check: out of range spill/fill", + .u.insns = { + BPF_STMT(BPF_STX, 16), + BPF_STMT(BPF_RET | BPF_K, 0) + }, + CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, + { }, + { } + }, + { + "JUMPS + HOLES", + .u.insns = { + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_JUMP(BPF_JMP | BPF_JGE, 0, 13, 15), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ, 0x90c2894d, 3, 4), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ, 0x90c2894d, 1, 2), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_JUMP(BPF_JMP | BPF_JGE, 0, 14, 15), + BPF_JUMP(BPF_JMP | BPF_JGE, 0, 13, 14), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ, 0x2ac28349, 2, 3), + BPF_JUMP(BPF_JMP | BPF_JEQ, 0x2ac28349, 1, 2), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_JUMP(BPF_JMP | BPF_JGE, 0, 14, 15), + BPF_JUMP(BPF_JMP | BPF_JGE, 0, 13, 14), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ, 0x90d2ff41, 2, 3), + BPF_JUMP(BPF_JMP | BPF_JEQ, 0x90d2ff41, 1, 2), + BPF_STMT(BPF_LD | BPF_H | BPF_ABS, 0), + BPF_STMT(BPF_RET | BPF_A, 0), + BPF_STMT(BPF_RET | BPF_A, 0), + }, + CLASSIC, + { 0x00, 0x1b, 0x21, 0x3c, 0x9d, 0xf8, + 0x90, 0xe2, 0xba, 0x0a, 0x56, 0xb4, + 0x08, 0x00, + 0x45, 0x00, 0x00, 0x28, 0x00, 0x00, + 0x20, 0x00, 0x40, 0x11, 0x00, 0x00, /* IP header */ + 0xc0, 0xa8, 0x33, 0x01, + 0xc0, 0xa8, 0x33, 0x02, + 0xbb, 0xb6, + 0xa9, 0xfa, + 0x00, 0x14, 0x00, 0x00, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc }, + { { 88, 0x001b } } + }, + { + "check: RET X", + .u.insns = { + BPF_STMT(BPF_RET | BPF_X, 0), + }, + CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, + { }, + { }, + }, + { + "check: LDX + RET X", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_IMM, 42), + BPF_STMT(BPF_RET | BPF_X, 0), + }, + CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, + { }, + { }, + }, + { /* Mainly checking JIT here. */ + "M[]: alt STX + LDX", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_IMM, 100), + BPF_STMT(BPF_STX, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 0), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 1), + BPF_STMT(BPF_LDX | BPF_MEM, 1), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 2), + BPF_STMT(BPF_LDX | BPF_MEM, 2), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 3), + BPF_STMT(BPF_LDX | BPF_MEM, 3), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 4), + BPF_STMT(BPF_LDX | BPF_MEM, 4), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 5), + BPF_STMT(BPF_LDX | BPF_MEM, 5), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 6), + BPF_STMT(BPF_LDX | BPF_MEM, 6), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 7), + BPF_STMT(BPF_LDX | BPF_MEM, 7), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 8), + BPF_STMT(BPF_LDX | BPF_MEM, 8), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 9), + BPF_STMT(BPF_LDX | BPF_MEM, 9), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 10), + BPF_STMT(BPF_LDX | BPF_MEM, 10), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 11), + BPF_STMT(BPF_LDX | BPF_MEM, 11), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 12), + BPF_STMT(BPF_LDX | BPF_MEM, 12), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 13), + BPF_STMT(BPF_LDX | BPF_MEM, 13), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 14), + BPF_STMT(BPF_LDX | BPF_MEM, 14), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_STX, 15), + BPF_STMT(BPF_LDX | BPF_MEM, 15), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_K, 1), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_RET | BPF_A, 0), + }, + CLASSIC | FLAG_NO_DATA, + { }, + { { 0, 116 } }, + }, + { /* Mainly checking JIT here. */ + "M[]: full STX + full LDX", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_IMM, 0xbadfeedb), + BPF_STMT(BPF_STX, 0), + BPF_STMT(BPF_LDX | BPF_IMM, 0xecabedae), + BPF_STMT(BPF_STX, 1), + BPF_STMT(BPF_LDX | BPF_IMM, 0xafccfeaf), + BPF_STMT(BPF_STX, 2), + BPF_STMT(BPF_LDX | BPF_IMM, 0xbffdcedc), + BPF_STMT(BPF_STX, 3), + BPF_STMT(BPF_LDX | BPF_IMM, 0xfbbbdccb), + BPF_STMT(BPF_STX, 4), + BPF_STMT(BPF_LDX | BPF_IMM, 0xfbabcbda), + BPF_STMT(BPF_STX, 5), + BPF_STMT(BPF_LDX | BPF_IMM, 0xaedecbdb), + BPF_STMT(BPF_STX, 6), + BPF_STMT(BPF_LDX | BPF_IMM, 0xadebbade), + BPF_STMT(BPF_STX, 7), + BPF_STMT(BPF_LDX | BPF_IMM, 0xfcfcfaec), + BPF_STMT(BPF_STX, 8), + BPF_STMT(BPF_LDX | BPF_IMM, 0xbcdddbdc), + BPF_STMT(BPF_STX, 9), + BPF_STMT(BPF_LDX | BPF_IMM, 0xfeefdfac), + BPF_STMT(BPF_STX, 10), + BPF_STMT(BPF_LDX | BPF_IMM, 0xcddcdeea), + BPF_STMT(BPF_STX, 11), + BPF_STMT(BPF_LDX | BPF_IMM, 0xaccfaebb), + BPF_STMT(BPF_STX, 12), + BPF_STMT(BPF_LDX | BPF_IMM, 0xbdcccdcf), + BPF_STMT(BPF_STX, 13), + BPF_STMT(BPF_LDX | BPF_IMM, 0xaaedecde), + BPF_STMT(BPF_STX, 14), + BPF_STMT(BPF_LDX | BPF_IMM, 0xfaeacdad), + BPF_STMT(BPF_STX, 15), + BPF_STMT(BPF_LDX | BPF_MEM, 0), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 1), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 2), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 3), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 4), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 5), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 6), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 7), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 8), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 9), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 10), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 11), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 12), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 13), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 14), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_LDX | BPF_MEM, 15), + BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), + BPF_STMT(BPF_RET | BPF_A, 0), + }, + CLASSIC | FLAG_NO_DATA, + { }, + { { 0, 0x2a5a5e5 } }, + }, + { + "check: SKF_AD_MAX", + .u.insns = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_MAX), + BPF_STMT(BPF_RET | BPF_A, 0), + }, + CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, + { }, + { }, + }, + { /* Passes checker but fails during runtime. */ + "LD [SKF_AD_OFF-1]", + .u.insns = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF - 1), + BPF_STMT(BPF_RET | BPF_K, 1), + }, + CLASSIC, + { }, + { { 1, 0 } }, + }, +}; + +static struct net_device dev; + +static struct sk_buff *populate_skb(char *buf, int size) +{ + struct sk_buff *skb; + + if (size >= MAX_DATA) + return NULL; + + skb = alloc_skb(MAX_DATA, GFP_KERNEL); + if (!skb) + return NULL; + + memcpy(__skb_put(skb, size), buf, size); + + /* Initialize a fake skb with test pattern. */ + skb_reset_mac_header(skb); + skb->protocol = htons(ETH_P_IP); + skb->pkt_type = SKB_TYPE; + skb->mark = SKB_MARK; + skb->hash = SKB_HASH; + skb->queue_mapping = SKB_QUEUE_MAP; + skb->vlan_tci = SKB_VLAN_TCI; + skb->dev = &dev; + skb->dev->ifindex = SKB_DEV_IFINDEX; + skb->dev->type = SKB_DEV_TYPE; + skb_set_network_header(skb, min(size, ETH_HLEN)); + + return skb; +} + +static void *generate_test_data(struct bpf_test *test, int sub) +{ + if (test->aux & FLAG_NO_DATA) + return NULL; + + /* Test case expects an skb, so populate one. Various + * subtests generate skbs of different sizes based on + * the same data. + */ + return populate_skb(test->data, test->test[sub].data_size); +} + +static void release_test_data(const struct bpf_test *test, void *data) +{ + if (test->aux & FLAG_NO_DATA) + return; + + kfree_skb(data); +} + +static int probe_filter_length(struct sock_filter *fp) +{ + int len = 0; + + for (len = MAX_INSNS - 1; len > 0; --len) + if (fp[len].code != 0 || fp[len].k != 0) + break; + + return len + 1; +} + +static struct sk_filter *generate_filter(int which, int *err) +{ + struct sk_filter *fp; + struct sock_fprog_kern fprog; + unsigned int flen = probe_filter_length(tests[which].u.insns); + __u8 test_type = tests[which].aux & TEST_TYPE_MASK; + + switch (test_type) { + case CLASSIC: + fprog.filter = tests[which].u.insns; + fprog.len = flen; + + *err = sk_unattached_filter_create(&fp, &fprog); + if (tests[which].aux & FLAG_EXPECTED_FAIL) { + if (*err == -EINVAL) { + pr_cont("PASS\n"); + /* Verifier rejected filter as expected. */ + *err = 0; + return NULL; + } else { + pr_cont("UNEXPECTED_PASS\n"); + /* Verifier didn't reject the test that's + * bad enough, just return! + */ + *err = -EINVAL; + return NULL; + } + } + /* We don't expect to fail. */ + if (*err) { + pr_cont("FAIL to attach err=%d len=%d\n", + *err, fprog.len); + return NULL; + } + break; + + case INTERNAL: + fp = kzalloc(sk_filter_size(flen), GFP_KERNEL); + if (fp == NULL) { + pr_cont("UNEXPECTED_FAIL no memory left\n"); + *err = -ENOMEM; + return NULL; + } + + fp->len = flen; + memcpy(fp->insnsi, tests[which].u.insns_int, + fp->len * sizeof(struct sock_filter_int)); + + sk_filter_select_runtime(fp); + break; + } + + *err = 0; + return fp; +} + +static void release_filter(struct sk_filter *fp, int which) +{ + __u8 test_type = tests[which].aux & TEST_TYPE_MASK; + + switch (test_type) { + case CLASSIC: + sk_unattached_filter_destroy(fp); + break; + case INTERNAL: + sk_filter_free(fp); + break; + } +} + +static int __run_one(const struct sk_filter *fp, const void *data, + int runs, u64 *duration) +{ + u64 start, finish; + int ret, i; + + start = ktime_to_us(ktime_get()); + + for (i = 0; i < runs; i++) + ret = SK_RUN_FILTER(fp, data); + + finish = ktime_to_us(ktime_get()); + + *duration = (finish - start) * 1000ULL; + do_div(*duration, runs); + + return ret; +} + +static int run_one(const struct sk_filter *fp, struct bpf_test *test) +{ + int err_cnt = 0, i, runs = MAX_TESTRUNS; + + for (i = 0; i < MAX_SUBTESTS; i++) { + void *data; + u64 duration; + u32 ret; + + if (test->test[i].data_size == 0 && + test->test[i].result == 0) + break; + + data = generate_test_data(test, i); + ret = __run_one(fp, data, runs, &duration); + release_test_data(test, data); + + if (ret == test->test[i].result) { + pr_cont("%lld ", duration); + } else { + pr_cont("ret %d != %d ", ret, + test->test[i].result); + err_cnt++; + } + } + + return err_cnt; +} + +static __init int test_bpf(void) +{ + int i, err_cnt = 0, pass_cnt = 0; + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + struct sk_filter *fp; + int err; + + pr_info("#%d %s ", i, tests[i].descr); + + fp = generate_filter(i, &err); + if (fp == NULL) { + if (err == 0) { + pass_cnt++; + continue; + } + + return err; + } + err = run_one(fp, &tests[i]); + release_filter(fp, i); + + if (err) { + pr_cont("FAIL (%d times)\n", err); + err_cnt++; + } else { + pr_cont("PASS\n"); + pass_cnt++; + } + } + + pr_info("Summary: %d PASSED, %d FAILED\n", pass_cnt, err_cnt); + return err_cnt ? -EINVAL : 0; +} + +static int __init test_bpf_init(void) +{ + return test_bpf(); +} + +static void __exit test_bpf_exit(void) +{ +} + +module_init(test_bpf_init); +module_exit(test_bpf_exit); + +MODULE_LICENSE("GPL"); diff --git a/lib/test_module.c b/lib/test_module.c new file mode 100644 index 00000000000..319b66f1ff6 --- /dev/null +++ b/lib/test_module.c @@ -0,0 +1,33 @@ +/* + * This module emits "Hello, world" on printk when loaded. + * + * It is designed to be used for basic evaluation of the module loading + * subsystem (for example when validating module signing/verification). It + * lacks any extra dependencies, and will not normally be loaded by the + * system unless explicitly requested by name. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/printk.h> + +static int __init test_module_init(void) +{ + pr_warn("Hello, world\n"); + + return 0; +} + +module_init(test_module_init); + +static void __exit test_module_exit(void) +{ + pr_warn("Goodbye\n"); +} + +module_exit(test_module_exit); + +MODULE_AUTHOR("Kees Cook <keescook@chromium.org>"); +MODULE_LICENSE("GPL"); diff --git a/lib/test_user_copy.c b/lib/test_user_copy.c new file mode 100644 index 00000000000..0ecef3e4690 --- /dev/null +++ b/lib/test_user_copy.c @@ -0,0 +1,110 @@ +/* + * Kernel module for testing copy_to/from_user infrastructure. + * + * Copyright 2013 Google Inc. All Rights Reserved + * + * Authors: + * Kees Cook <keescook@chromium.org> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/mman.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/vmalloc.h> + +#define test(condition, msg) \ +({ \ + int cond = (condition); \ + if (cond) \ + pr_warn("%s\n", msg); \ + cond; \ +}) + +static int __init test_user_copy_init(void) +{ + int ret = 0; + char *kmem; + char __user *usermem; + char *bad_usermem; + unsigned long user_addr; + unsigned long value = 0x5A; + + kmem = kmalloc(PAGE_SIZE * 2, GFP_KERNEL); + if (!kmem) + return -ENOMEM; + + user_addr = vm_mmap(NULL, 0, PAGE_SIZE * 2, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_ANONYMOUS | MAP_PRIVATE, 0); + if (user_addr >= (unsigned long)(TASK_SIZE)) { + pr_warn("Failed to allocate user memory\n"); + kfree(kmem); + return -ENOMEM; + } + + usermem = (char __user *)user_addr; + bad_usermem = (char *)user_addr; + + /* Legitimate usage: none of these should fail. */ + ret |= test(copy_from_user(kmem, usermem, PAGE_SIZE), + "legitimate copy_from_user failed"); + ret |= test(copy_to_user(usermem, kmem, PAGE_SIZE), + "legitimate copy_to_user failed"); + ret |= test(get_user(value, (unsigned long __user *)usermem), + "legitimate get_user failed"); + ret |= test(put_user(value, (unsigned long __user *)usermem), + "legitimate put_user failed"); + + /* Invalid usage: none of these should succeed. */ + ret |= test(!copy_from_user(kmem, (char __user *)(kmem + PAGE_SIZE), + PAGE_SIZE), + "illegal all-kernel copy_from_user passed"); + ret |= test(!copy_from_user(bad_usermem, (char __user *)kmem, + PAGE_SIZE), + "illegal reversed copy_from_user passed"); + ret |= test(!copy_to_user((char __user *)kmem, kmem + PAGE_SIZE, + PAGE_SIZE), + "illegal all-kernel copy_to_user passed"); + ret |= test(!copy_to_user((char __user *)kmem, bad_usermem, + PAGE_SIZE), + "illegal reversed copy_to_user passed"); + ret |= test(!get_user(value, (unsigned long __user *)kmem), + "illegal get_user passed"); + ret |= test(!put_user(value, (unsigned long __user *)kmem), + "illegal put_user passed"); + + vm_munmap(user_addr, PAGE_SIZE * 2); + kfree(kmem); + + if (ret == 0) { + pr_info("tests passed.\n"); + return 0; + } + + return -EINVAL; +} + +module_init(test_user_copy_init); + +static void __exit test_user_copy_exit(void) +{ + pr_info("unloaded.\n"); +} + +module_exit(test_user_copy_exit); + +MODULE_AUTHOR("Kees Cook <keescook@chromium.org>"); +MODULE_LICENSE("GPL"); diff --git a/lib/textsearch.c b/lib/textsearch.c index e0cc0146ae6..0c7e9ab2d88 100644 --- a/lib/textsearch.c +++ b/lib/textsearch.c @@ -159,6 +159,7 @@ errout: spin_unlock(&ts_mod_lock); return err; } +EXPORT_SYMBOL(textsearch_register); /** * textsearch_unregister - unregister a textsearch module @@ -190,6 +191,7 @@ out: spin_unlock(&ts_mod_lock); return err; } +EXPORT_SYMBOL(textsearch_unregister); struct ts_linear_state { @@ -236,6 +238,7 @@ unsigned int textsearch_find_continuous(struct ts_config *conf, return textsearch_find(conf, state); } +EXPORT_SYMBOL(textsearch_find_continuous); /** * textsearch_prepare - Prepare a search @@ -298,6 +301,7 @@ errout: return ERR_PTR(err); } +EXPORT_SYMBOL(textsearch_prepare); /** * textsearch_destroy - destroy a search configuration @@ -316,9 +320,4 @@ void textsearch_destroy(struct ts_config *conf) kfree(conf); } - -EXPORT_SYMBOL(textsearch_register); -EXPORT_SYMBOL(textsearch_unregister); -EXPORT_SYMBOL(textsearch_prepare); -EXPORT_SYMBOL(textsearch_find_continuous); EXPORT_SYMBOL(textsearch_destroy); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 26559bdb4c4..6fe2c84eb05 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -27,6 +27,7 @@ #include <linux/uaccess.h> #include <linux/ioport.h> #include <linux/dcache.h> +#include <linux/cred.h> #include <net/addrconf.h> #include <asm/page.h> /* for PAGE_SIZE */ @@ -363,7 +364,6 @@ enum format_type { FORMAT_TYPE_SHORT, FORMAT_TYPE_UINT, FORMAT_TYPE_INT, - FORMAT_TYPE_NRCHARS, FORMAT_TYPE_SIZE_T, FORMAT_TYPE_PTRDIFF }; @@ -718,10 +718,15 @@ char *resource_string(char *buf, char *end, struct resource *res, specp = &mem_spec; decode = 0; } - p = number(p, pend, res->start, *specp); - if (res->start != res->end) { - *p++ = '-'; - p = number(p, pend, res->end, *specp); + if (decode && res->flags & IORESOURCE_UNSET) { + p = string(p, pend, "size ", str_spec); + p = number(p, pend, resource_size(res), *specp); + } else { + p = number(p, pend, res->start, *specp); + if (res->start != res->end) { + *p++ = '-'; + p = number(p, pend, res->end, *specp); + } } if (decode) { if (res->flags & IORESOURCE_MEM_64) @@ -1154,6 +1159,30 @@ char *netdev_feature_string(char *buf, char *end, const u8 *addr, return number(buf, end, *(const netdev_features_t *)addr, spec); } +static noinline_for_stack +char *address_val(char *buf, char *end, const void *addr, + struct printf_spec spec, const char *fmt) +{ + unsigned long long num; + + spec.flags |= SPECIAL | SMALL | ZEROPAD; + spec.base = 16; + + switch (fmt[1]) { + case 'd': + num = *(const dma_addr_t *)addr; + spec.field_width = sizeof(dma_addr_t) * 2 + 2; + break; + case 'p': + default: + num = *(const phys_addr_t *)addr; + spec.field_width = sizeof(phys_addr_t) * 2 + 2; + break; + } + + return number(buf, end, num, spec); +} + int kptr_restrict __read_mostly; /* @@ -1217,7 +1246,10 @@ int kptr_restrict __read_mostly; * N no separator * The maximum supported length is 64 bytes of the input. Consider * to use print_hex_dump() for the larger input. - * - 'a' For a phys_addr_t type and its derivative types (passed by reference) + * - 'a[pd]' For address types [p] phys_addr_t, [d] dma_addr_t and derivatives + * (default assumed to be phys_addr_t, passed by reference) + * - 'd[234]' For a dentry name (optionally 2-4 last components) + * - 'D[234]' Same as 'd' but for a struct file * * Note: The difference between 'S' and 'F' is that on ia64 and ppc64 * function pointers are really function descriptors, which contain a @@ -1312,11 +1344,37 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, spec.field_width = default_width; return string(buf, end, "pK-error", spec); } - if (!((kptr_restrict == 0) || - (kptr_restrict == 1 && - has_capability_noaudit(current, CAP_SYSLOG)))) + + switch (kptr_restrict) { + case 0: + /* Always print %pK values */ + break; + case 1: { + /* + * Only print the real pointer value if the current + * process has CAP_SYSLOG and is running with the + * same credentials it started with. This is because + * access to files is checked at open() time, but %pK + * checks permission at read() time. We don't want to + * leak pointer values if a binary opens a file using + * %pK and then elevates privileges before reading it. + */ + const struct cred *cred = current_cred(); + + if (!has_capability_noaudit(current, CAP_SYSLOG) || + !uid_eq(cred->euid, cred->uid) || + !gid_eq(cred->egid, cred->gid)) + ptr = NULL; + break; + } + case 2: + default: + /* Always print 0's for %pK */ ptr = NULL; + break; + } break; + case 'N': switch (fmt[1]) { case 'F': @@ -1324,11 +1382,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, } break; case 'a': - spec.flags |= SPECIAL | SMALL | ZEROPAD; - spec.field_width = sizeof(phys_addr_t) * 2 + 2; - spec.base = 16; - return number(buf, end, - (unsigned long long) *((phys_addr_t *)ptr), spec); + return address_val(buf, end, ptr, spec, fmt); case 'd': return dentry_name(buf, end, ptr, spec, fmt); case 'D': @@ -1483,10 +1537,6 @@ qualifier: return fmt - start; /* skip alnum */ - case 'n': - spec->type = FORMAT_TYPE_NRCHARS; - return ++fmt - start; - case '%': spec->type = FORMAT_TYPE_PERCENT_CHAR; return ++fmt - start; @@ -1509,6 +1559,15 @@ qualifier: case 'u': break; + case 'n': + /* + * Since %n poses a greater security risk than utility, treat + * it as an invalid format specifier. Warn about its use so + * that new instances don't get added. + */ + WARN_ONCE(1, "Please remove ignored %%n in '%s'\n", fmt); + /* Fall-through */ + default: spec->type = FORMAT_TYPE_INVALID; return fmt - start; @@ -1682,22 +1741,6 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) ++str; break; - case FORMAT_TYPE_NRCHARS: { - u8 qualifier = spec.qualifier; - - if (qualifier == 'l') { - long *ip = va_arg(args, long *); - *ip = (str - buf); - } else if (_tolower(qualifier) == 'z') { - size_t *ip = va_arg(args, size_t *); - *ip = (str - buf); - } else { - int *ip = va_arg(args, int *); - *ip = (str - buf); - } - break; - } - default: switch (spec.type) { case FORMAT_TYPE_LONG_LONG: @@ -1972,19 +2015,6 @@ do { \ fmt++; break; - case FORMAT_TYPE_NRCHARS: { - /* skip %n 's argument */ - u8 qualifier = spec.qualifier; - void *skip_arg; - if (qualifier == 'l') - skip_arg = va_arg(args, long *); - else if (_tolower(qualifier) == 'z') - skip_arg = va_arg(args, size_t *); - else - skip_arg = va_arg(args, int *); - break; - } - default: switch (spec.type) { @@ -2143,10 +2173,6 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) ++str; break; - case FORMAT_TYPE_NRCHARS: - /* skip */ - break; - default: { unsigned long long num; @@ -2321,7 +2347,7 @@ int vsscanf(const char *buf, const char *fmt, va_list args) break; base = 10; - is_sign = 0; + is_sign = false; switch (*fmt++) { case 'c': @@ -2360,7 +2386,7 @@ int vsscanf(const char *buf, const char *fmt, va_list args) case 'i': base = 0; case 'd': - is_sign = 1; + is_sign = true; case 'u': break; case '%': diff --git a/lib/xz/Kconfig b/lib/xz/Kconfig index 08837db52d9..12d2d777f36 100644 --- a/lib/xz/Kconfig +++ b/lib/xz/Kconfig @@ -9,33 +9,33 @@ config XZ_DEC if XZ_DEC config XZ_DEC_X86 - bool "x86 BCJ filter decoder" - default y if X86 + bool "x86 BCJ filter decoder" if EXPERT + default y select XZ_DEC_BCJ config XZ_DEC_POWERPC - bool "PowerPC BCJ filter decoder" - default y if PPC + bool "PowerPC BCJ filter decoder" if EXPERT + default y select XZ_DEC_BCJ config XZ_DEC_IA64 - bool "IA-64 BCJ filter decoder" - default y if IA64 + bool "IA-64 BCJ filter decoder" if EXPERT + default y select XZ_DEC_BCJ config XZ_DEC_ARM - bool "ARM BCJ filter decoder" - default y if ARM + bool "ARM BCJ filter decoder" if EXPERT + default y select XZ_DEC_BCJ config XZ_DEC_ARMTHUMB - bool "ARM-Thumb BCJ filter decoder" - default y if (ARM && ARM_THUMB) + bool "ARM-Thumb BCJ filter decoder" if EXPERT + default y select XZ_DEC_BCJ config XZ_DEC_SPARC - bool "SPARC BCJ filter decoder" - default y if SPARC + bool "SPARC BCJ filter decoder" if EXPERT + default y select XZ_DEC_BCJ endif diff --git a/lib/xz/xz_dec_lzma2.c b/lib/xz/xz_dec_lzma2.c index a6cdc969ea4..08c3c804999 100644 --- a/lib/xz/xz_dec_lzma2.c +++ b/lib/xz/xz_dec_lzma2.c @@ -1043,6 +1043,8 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s, s->lzma2.sequence = SEQ_LZMA_PREPARE; + /* Fall through */ + case SEQ_LZMA_PREPARE: if (s->lzma2.compressed < RC_INIT_BYTES) return XZ_DATA_ERROR; @@ -1053,6 +1055,8 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s, s->lzma2.compressed -= RC_INIT_BYTES; s->lzma2.sequence = SEQ_LZMA_RUN; + /* Fall through */ + case SEQ_LZMA_RUN: /* * Set dictionary limit to indicate how much we want |
