diff options
472 files changed, 14980 insertions, 10133 deletions
diff --git a/.gitignore b/.gitignore index 42fa0d5626a..f4c0b091dcf 100644 --- a/.gitignore +++ b/.gitignore @@ -22,7 +22,6 @@ *.lst *.symtypes *.order -modules.builtin *.elf *.bin *.gz @@ -33,6 +32,8 @@ modules.builtin *.lzo *.patch *.gcno +modules.builtin +Module.symvers # # Top-level generic files @@ -44,7 +45,6 @@ modules.builtin /vmlinuz /System.map /Module.markers -/Module.symvers # # Debian directory (make deb-pkg) diff --git a/Documentation/devicetree/bindings/clock/sunxi.txt b/Documentation/devicetree/bindings/clock/sunxi.txt index a5160d8cbb5..b9ec668bfe6 100644 --- a/Documentation/devicetree/bindings/clock/sunxi.txt +++ b/Documentation/devicetree/bindings/clock/sunxi.txt @@ -20,12 +20,15 @@ Required properties: "allwinner,sun5i-a13-ahb-gates-clk" - for the AHB gates on A13 "allwinner,sun5i-a10s-ahb-gates-clk" - for the AHB gates on A10s "allwinner,sun7i-a20-ahb-gates-clk" - for the AHB gates on A20 + "allwinner,sun6i-a31-ar100-clk" - for the AR100 on A31 "allwinner,sun6i-a31-ahb1-mux-clk" - for the AHB1 multiplexer on A31 "allwinner,sun6i-a31-ahb1-gates-clk" - for the AHB1 gates on A31 "allwinner,sun4i-a10-apb0-clk" - for the APB0 clock + "allwinner,sun6i-a31-apb0-clk" - for the APB0 clock on A31 "allwinner,sun4i-a10-apb0-gates-clk" - for the APB0 gates on A10 "allwinner,sun5i-a13-apb0-gates-clk" - for the APB0 gates on A13 "allwinner,sun5i-a10s-apb0-gates-clk" - for the APB0 gates on A10s + "allwinner,sun6i-a31-apb0-gates-clk" - for the APB0 gates on A31 "allwinner,sun7i-a20-apb0-gates-clk" - for the APB0 gates on A20 "allwinner,sun4i-a10-apb1-clk" - for the APB1 clock "allwinner,sun4i-a10-apb1-mux-clk" - for the APB1 clock muxing @@ -41,6 +44,7 @@ Required properties: "allwinner,sun7i-a20-gmac-clk" - for the GMAC clock module on A20/A31 "allwinner,sun4i-a10-usb-clk" - for usb gates + resets on A10 / A20 "allwinner,sun5i-a13-usb-clk" - for usb gates + resets on A13 + "allwinner,sun6i-a31-usb-clk" - for usb gates + resets on A31 Required properties for all clocks: - reg : shall be the control register address for the clock. diff --git a/Documentation/devicetree/bindings/clock/ti/apll.txt b/Documentation/devicetree/bindings/clock/ti/apll.txt index 7faf5a68b3b..ade4dd4c30f 100644 --- a/Documentation/devicetree/bindings/clock/ti/apll.txt +++ b/Documentation/devicetree/bindings/clock/ti/apll.txt @@ -14,18 +14,32 @@ a subtype of a DPLL [2], although a simplified one at that. [2] Documentation/devicetree/bindings/clock/ti/dpll.txt Required properties: -- compatible : shall be "ti,dra7-apll-clock" +- compatible : shall be "ti,dra7-apll-clock" or "ti,omap2-apll-clock" - #clock-cells : from common clock binding; shall be set to 0. - clocks : link phandles of parent clocks (clk-ref and clk-bypass) - reg : address and length of the register set for controlling the APLL. It contains the information of registers in the following order: - "control" - contains the control register base address - "idlest" - contains the idlest register base address + "control" - contains the control register offset + "idlest" - contains the idlest register offset + "autoidle" - contains the autoidle register offset (OMAP2 only) +- ti,clock-frequency : static clock frequency for the clock (OMAP2 only) +- ti,idlest-shift : bit-shift for the idlest field (OMAP2 only) +- ti,bit-shift : bit-shift for enable and autoidle fields (OMAP2 only) Examples: - apll_pcie_ck: apll_pcie_ck@4a008200 { + apll_pcie_ck: apll_pcie_ck { #clock-cells = <0>; clocks = <&apll_pcie_in_clk_mux>, <&dpll_pcie_ref_ck>; - reg = <0x4a00821c 0x4>, <0x4a008220 0x4>; + reg = <0x021c>, <0x0220>; compatible = "ti,dra7-apll-clock"; }; + + apll96_ck: apll96_ck { + #clock-cells = <0>; + compatible = "ti,omap2-apll-clock"; + clocks = <&sys_ck>; + ti,bit-shift = <2>; + ti,idlest-shift = <8>; + ti,clock-frequency = <96000000>; + reg = <0x0500>, <0x0530>, <0x0520>; + }; diff --git a/Documentation/devicetree/bindings/clock/ti/dpll.txt b/Documentation/devicetree/bindings/clock/ti/dpll.txt index 30bfdb7c9f1..df57009ff8e 100644 --- a/Documentation/devicetree/bindings/clock/ti/dpll.txt +++ b/Documentation/devicetree/bindings/clock/ti/dpll.txt @@ -24,12 +24,14 @@ Required properties: "ti,omap4-dpll-core-clock", "ti,omap4-dpll-m4xen-clock", "ti,omap4-dpll-j-type-clock", + "ti,omap5-mpu-dpll-clock", "ti,am3-dpll-no-gate-clock", "ti,am3-dpll-j-type-clock", "ti,am3-dpll-no-gate-j-type-clock", "ti,am3-dpll-clock", "ti,am3-dpll-core-clock", "ti,am3-dpll-x2-clock", + "ti,omap2-dpll-core-clock", - #clock-cells : from common clock binding; shall be set to 0. - clocks : link phandles of parent clocks, first entry lists reference clock @@ -41,6 +43,7 @@ Required properties: "mult-div1" - contains the multiplier / divider register base address "autoidle" - contains the autoidle register base address (optional) ti,am3-* dpll types do not have autoidle register + ti,omap2-* dpll type does not support idlest / autoidle registers Optional properties: - DPLL mode setting - defining any one or more of the following overrides @@ -73,3 +76,10 @@ Examples: clocks = <&sys_clkin_ck>, <&sys_clkin_ck>; reg = <0x90>, <0x5c>, <0x68>; }; + + dpll_ck: dpll_ck { + #clock-cells = <0>; + compatible = "ti,omap2-dpll-core-clock"; + clocks = <&sys_ck>, <&sys_ck>; + reg = <0x0500>, <0x0540>; + }; diff --git a/Documentation/devicetree/bindings/clock/ti/dra7-atl.txt b/Documentation/devicetree/bindings/clock/ti/dra7-atl.txt new file mode 100644 index 00000000000..585e8c191f5 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/ti/dra7-atl.txt @@ -0,0 +1,96 @@ +Device Tree Clock bindings for ATL (Audio Tracking Logic) of DRA7 SoC. + +The ATL IP is used to generate clock to be used to synchronize baseband and +audio codec. A single ATL IP provides four ATL clock instances sharing the same +functional clock but can be configured to provide different clocks. +ATL can maintain a clock averages to some desired frequency based on the bws/aws +signals - can compensate the drift between the two ws signal. + +In order to provide the support for ATL and it's output clocks (which can be used +internally within the SoC or external components) two sets of bindings is needed: + +Clock tree binding: +This binding uses the common clock binding[1]. +To be able to integrate the ATL clocks with DT clock tree. +Provides ccf level representation of the ATL clocks to be used by drivers. +Since the clock instances are part of a single IP this binding is used as a node +for the DT clock tree, the IP driver is needed to handle the actual configuration +of the IP. + +[1] Documentation/devicetree/bindings/clock/clock-bindings.txt + +Required properties: +- compatible : shall be "ti,dra7-atl-clock" +- #clock-cells : from common clock binding; shall be set to 0. +- clocks : link phandles to functional clock of ATL + +Binding for the IP driver: +This binding is used to configure the IP driver which is going to handle the +configuration of the IP for the ATL clock instances. + +Required properties: +- compatible : shall be "ti,dra7-atl" +- reg : base address for the ATL IP +- ti,provided-clocks : List of phandles to the clocks associated with the ATL +- clocks : link phandles to functional clock of ATL +- clock-names : Shall be set to "fck" +- ti,hwmods : Shall be set to "atl" + +Optional properties: +Configuration of ATL instances: +- atl{0/1/2/3} { + - bws : Baseband word select signal selection + - aws : Audio word select signal selection +}; + +For valid word select signals, see the dt-bindings/clk/ti-dra7-atl.h include +file. + +Examples: +/* clock bindings for atl provided clocks */ +atl_clkin0_ck: atl_clkin0_ck { + #clock-cells = <0>; + compatible = "ti,dra7-atl-clock"; + clocks = <&atl_gfclk_mux>; +}; + +atl_clkin1_ck: atl_clkin1_ck { + #clock-cells = <0>; + compatible = "ti,dra7-atl-clock"; + clocks = <&atl_gfclk_mux>; +}; + +atl_clkin2_ck: atl_clkin2_ck { + #clock-cells = <0>; + compatible = "ti,dra7-atl-clock"; + clocks = <&atl_gfclk_mux>; +}; + +atl_clkin3_ck: atl_clkin3_ck { + #clock-cells = <0>; + compatible = "ti,dra7-atl-clock"; + clocks = <&atl_gfclk_mux>; +}; + +/* binding for the IP */ +atl: atl@4843c000 { + compatible = "ti,dra7-atl"; + reg = <0x4843c000 0x3ff>; + ti,hwmods = "atl"; + ti,provided-clocks = <&atl_clkin0_ck>, <&atl_clkin1_ck>, + <&atl_clkin2_ck>, <&atl_clkin3_ck>; + clocks = <&atl_gfclk_mux>; + clock-names = "fck"; + status = "disabled"; +}; + +#include <dt-bindings/clk/ti-dra7-atl.h> + +&atl { + status = "okay"; + + atl2 { + bws = <DRA7_ATL_WS_MCASP2_FSX>; + aws = <DRA7_ATL_WS_MCASP3_FSX>; + }; +}; diff --git a/Documentation/devicetree/bindings/clock/ti/gate.txt b/Documentation/devicetree/bindings/clock/ti/gate.txt index 125281aaa4c..03f8fdee62a 100644 --- a/Documentation/devicetree/bindings/clock/ti/gate.txt +++ b/Documentation/devicetree/bindings/clock/ti/gate.txt @@ -25,6 +25,11 @@ Required properties: to map clockdomains properly "ti,hsdiv-gate-clock" - gate clock with OMAP36xx specific hardware handling, required for a hardware errata + "ti,composite-gate-clock" - composite gate clock, to be part of composite + clock + "ti,composite-no-wait-gate-clock" - composite gate clock that does not wait + for clock to be active before returning + from clk_enable() - #clock-cells : from common clock binding; shall be set to 0 - clocks : link to phandle of parent clock - reg : offset for register controlling adjustable gate, not needed for @@ -41,7 +46,7 @@ Examples: #clock-cells = <0>; compatible = "ti,gate-clock"; clocks = <&core_96m_fck>; - reg = <0x48004a00 0x4>; + reg = <0x0a00>; ti,bit-shift = <25>; }; @@ -57,7 +62,7 @@ Examples: #clock-cells = <0>; compatible = "ti,dss-gate-clock"; clocks = <&dpll4_m4x2_ck>; - reg = <0x48004e00 0x4>; + reg = <0x0e00>; ti,bit-shift = <0>; }; @@ -65,7 +70,7 @@ Examples: #clock-cells = <0>; compatible = "ti,am35xx-gate-clock"; clocks = <&ipss_ick>; - reg = <0x4800259c 0x4>; + reg = <0x059c>; ti,bit-shift = <1>; }; @@ -80,6 +85,22 @@ Examples: compatible = "ti,hsdiv-gate-clock"; clocks = <&dpll4_m2x2_mul_ck>; ti,bit-shift = <0x1b>; - reg = <0x48004d00 0x4>; + reg = <0x0d00>; ti,set-bit-to-disable; }; + + vlynq_gate_fck: vlynq_gate_fck { + #clock-cells = <0>; + compatible = "ti,composite-gate-clock"; + clocks = <&core_ck>; + ti,bit-shift = <3>; + reg = <0x0200>; + }; + + sys_clkout2_src_gate: sys_clkout2_src_gate { + #clock-cells = <0>; + compatible = "ti,composite-no-wait-gate-clock"; + clocks = <&core_ck>; + ti,bit-shift = <15>; + reg = <0x0070>; + }; diff --git a/Documentation/devicetree/bindings/clock/ti/interface.txt b/Documentation/devicetree/bindings/clock/ti/interface.txt index 064e8caccac..3111a409fea 100644 --- a/Documentation/devicetree/bindings/clock/ti/interface.txt +++ b/Documentation/devicetree/bindings/clock/ti/interface.txt @@ -21,6 +21,8 @@ Required properties: "ti,omap3-dss-interface-clock" - interface clock with DSS specific HW handling "ti,omap3-ssi-interface-clock" - interface clock with SSI specific HW handling "ti,am35xx-interface-clock" - interface clock with AM35xx specific HW handling + "ti,omap2430-interface-clock" - interface clock with OMAP2430 specific HW + handling - #clock-cells : from common clock binding; shall be set to 0 - clocks : link to phandle of parent clock - reg : base address for the control register diff --git a/Documentation/hwmon/shtc1 b/Documentation/hwmon/shtc1 new file mode 100644 index 00000000000..6b1e05458f0 --- /dev/null +++ b/Documentation/hwmon/shtc1 @@ -0,0 +1,43 @@ +Kernel driver shtc1 +=================== + +Supported chips: + * Sensirion SHTC1 + Prefix: 'shtc1' + Addresses scanned: none + Datasheet: http://www.sensirion.com/file/datasheet_shtc1 + + * Sensirion SHTW1 + Prefix: 'shtw1' + Addresses scanned: none + Datasheet: Not publicly available + +Author: + Johannes Winkelmann <johannes.winkelmann@sensirion.com> + +Description +----------- + +This driver implements support for the Sensirion SHTC1 chip, a humidity and +temperature sensor. Temperature is measured in degrees celsius, relative +humidity is expressed as a percentage. Driver can be used as well for SHTW1 +chip, which has the same electrical interface. + +The device communicates with the I2C protocol. All sensors are set to I2C +address 0x70. See Documentation/i2c/instantiating-devices for methods to +instantiate the device. + +There are two options configurable by means of shtc1_platform_data: +1. blocking (pull the I2C clock line down while performing the measurement) or + non-blocking mode. Blocking mode will guarantee the fastest result but + the I2C bus will be busy during that time. By default, non-blocking mode + is used. Make sure clock-stretching works properly on your device if you + want to use blocking mode. +2. high or low accuracy. High accuracy is used by default and using it is + strongly recommended. + +sysfs-Interface +--------------- + +temp1_input - temperature input +humidity1_input - humidity input diff --git a/Documentation/kbuild/modules.txt b/Documentation/kbuild/modules.txt index 69372fb98cf..3fb39e0116b 100644 --- a/Documentation/kbuild/modules.txt +++ b/Documentation/kbuild/modules.txt @@ -470,7 +470,7 @@ build. Sometimes, an external module uses exported symbols from another external module. kbuild needs to have full knowledge of - all symbols to avoid spliitting out warnings about undefined + all symbols to avoid spitting out warnings about undefined symbols. Three solutions exist for this situation. NOTE: The method with a top-level kbuild file is recommended diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt index 0cfb00fd86f..4bbeca8483e 100644 --- a/Documentation/kprobes.txt +++ b/Documentation/kprobes.txt @@ -22,8 +22,9 @@ Appendix B: The kprobes sysctl interface Kprobes enables you to dynamically break into any kernel routine and collect debugging and performance information non-disruptively. You -can trap at almost any kernel code address, specifying a handler +can trap at almost any kernel code address(*), specifying a handler routine to be invoked when the breakpoint is hit. +(*: some parts of the kernel code can not be trapped, see 1.5 Blacklist) There are currently three types of probes: kprobes, jprobes, and kretprobes (also called return probes). A kprobe can be inserted @@ -273,6 +274,19 @@ using one of the following techniques: or - Execute 'sysctl -w debug.kprobes_optimization=n' +1.5 Blacklist + +Kprobes can probe most of the kernel except itself. This means +that there are some functions where kprobes cannot probe. Probing +(trapping) such functions can cause a recursive trap (e.g. double +fault) or the nested probe handler may never be called. +Kprobes manages such functions as a blacklist. +If you want to add a function into the blacklist, you just need +to (1) include linux/kprobes.h and (2) use NOKPROBE_SYMBOL() macro +to specify a blacklisted function. +Kprobes checks the given probe address against the blacklist and +rejects registering it, if the given address is in the blacklist. + 2. Architectures Supported Kprobes, jprobes, and return probes are implemented on the following diff --git a/Documentation/mutex-design.txt b/Documentation/mutex-design.txt index 1dfe62c3641..ee231ed09ec 100644 --- a/Documentation/mutex-design.txt +++ b/Documentation/mutex-design.txt @@ -1,139 +1,157 @@ Generic Mutex Subsystem started by Ingo Molnar <mingo@redhat.com> +updated by Davidlohr Bueso <davidlohr@hp.com> - "Why on earth do we need a new mutex subsystem, and what's wrong - with semaphores?" +What are mutexes? +----------------- -firstly, there's nothing wrong with semaphores. But if the simpler -mutex semantics are sufficient for your code, then there are a couple -of advantages of mutexes: +In the Linux kernel, mutexes refer to a particular locking primitive +that enforces serialization on shared memory systems, and not only to +the generic term referring to 'mutual exclusion' found in academia +or similar theoretical text books. Mutexes are sleeping locks which +behave similarly to binary semaphores, and were introduced in 2006[1] +as an alternative to these. This new data structure provided a number +of advantages, including simpler interfaces, and at that time smaller +code (see Disadvantages). - - 'struct mutex' is smaller on most architectures: E.g. on x86, - 'struct semaphore' is 20 bytes, 'struct mutex' is 16 bytes. - A smaller structure size means less RAM footprint, and better - CPU-cache utilization. +[1] http://lwn.net/Articles/164802/ - - tighter code. On x86 i get the following .text sizes when - switching all mutex-alike semaphores in the kernel to the mutex - subsystem: +Implementation +-------------- - text data bss dec hex filename - 3280380 868188 396860 4545428 455b94 vmlinux-semaphore - 3255329 865296 396732 4517357 44eded vmlinux-mutex +Mutexes are represented by 'struct mutex', defined in include/linux/mutex.h +and implemented in kernel/locking/mutex.c. These locks use a three +state atomic counter (->count) to represent the different possible +transitions that can occur during the lifetime of a lock: - that's 25051 bytes of code saved, or a 0.76% win - off the hottest - codepaths of the kernel. (The .data savings are 2892 bytes, or 0.33%) - Smaller code means better icache footprint, which is one of the - major optimization goals in the Linux kernel currently. + 1: unlocked + 0: locked, no waiters + negative: locked, with potential waiters - - the mutex subsystem is slightly faster and has better scalability for - contended workloads. On an 8-way x86 system, running a mutex-based - kernel and testing creat+unlink+close (of separate, per-task files) - in /tmp with 16 parallel tasks, the average number of ops/sec is: +In its most basic form it also includes a wait-queue and a spinlock +that serializes access to it. CONFIG_SMP systems can also include +a pointer to the lock task owner (->owner) as well as a spinner MCS +lock (->osq), both described below in (ii). - Semaphores: Mutexes: +When acquiring a mutex, there are three possible paths that can be +taken, depending on the state of the lock: - $ ./test-mutex V 16 10 $ ./test-mutex V 16 10 - 8 CPUs, running 16 tasks. 8 CPUs, running 16 tasks. - checking VFS performance. checking VFS performance. - avg loops/sec: 34713 avg loops/sec: 84153 - CPU utilization: 63% CPU utilization: 22% +(i) fastpath: tries to atomically acquire the lock by decrementing the + counter. If it was already taken by another task it goes to the next + possible path. This logic is architecture specific. On x86-64, the + locking fastpath is 2 instructions: - i.e. in this workload, the mutex based kernel was 2.4 times faster - than the semaphore based kernel, _and_ it also had 2.8 times less CPU - utilization. (In terms of 'ops per CPU cycle', the semaphore kernel - performed 551 ops/sec per 1% of CPU time used, while the mutex kernel - performed 3825 ops/sec per 1% of CPU time used - it was 6.9 times - more efficient.) - - the scalability difference is visible even on a 2-way P4 HT box: - - Semaphores: Mutexes: - - $ ./test-mutex V 16 10 $ ./test-mutex V 16 10 - 4 CPUs, running 16 tasks. 8 CPUs, running 16 tasks. - checking VFS performance. checking VFS performance. - avg loops/sec: 127659 avg loops/sec: 181082 - CPU utilization: 100% CPU utilization: 34% - - (the straight performance advantage of mutexes is 41%, the per-cycle - efficiency of mutexes is 4.1 times better.) - - - there are no fastpath tradeoffs, the mutex fastpath is just as tight - as the semaphore fastpath. On x86, the locking fastpath is 2 - instructions: - - c0377ccb <mutex_lock>: - c0377ccb: f0 ff 08 lock decl (%eax) - c0377cce: 78 0e js c0377cde <.text..lock.mutex> - c0377cd0: c3 ret + 0000000000000e10 <mutex_lock>: + e21: f0 ff 0b lock decl (%rbx) + e24: 79 08 jns e2e <mutex_lock+0x1e> the unlocking fastpath is equally tight: - c0377cd1 <mutex_unlock>: - c0377cd1: f0 ff 00 lock incl (%eax) - c0377cd4: 7e 0f jle c0377ce5 <.text..lock.mutex+0x7> - c0377cd6: c3 ret - - - 'struct mutex' semantics are well-defined and are enforced if - CONFIG_DEBUG_MUTEXES is turned on. Semaphores on the other hand have - virtually no debugging code or instrumentation. The mutex subsystem - checks and enforces the following rules: - - * - only one task can hold the mutex at a time - * - only the owner can unlock the mutex - * - multiple unlocks are not permitted - * - recursive locking is not permitted - * - a mutex object must be initialized via the API - * - a mutex object must not be initialized via memset or copying - * - task may not exit with mutex held - * - memory areas where held locks reside must not be freed - * - held mutexes must not be reinitialized - * - mutexes may not be used in hardware or software interrupt - * contexts such as tasklets and timers - - furthermore, there are also convenience features in the debugging - code: - - * - uses symbolic names of mutexes, whenever they are printed in debug output - * - point-of-acquire tracking, symbolic lookup of function names - * - list of all locks held in the system, printout of them - * - owner tracking - * - detects self-recursing locks and prints out all relevant info - * - detects multi-task circular deadlocks and prints out all affected - * locks and tasks (and only those tasks) + 0000000000000bc0 <mutex_unlock>: + bc8: f0 ff 07 lock incl (%rdi) + bcb: 7f 0a jg bd7 <mutex_unlock+0x17> + + +(ii) midpath: aka optimistic spinning, tries to spin for acquisition + while the lock owner is running and there are no other tasks ready + to run that have higher priority (need_resched). The rationale is + that if the lock owner is running, it is likely to release the lock + soon. The mutex spinners are queued up using MCS lock so that only + one spinner can compete for the mutex. + + The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spinlock + with the desirable properties of being fair and with each cpu trying + to acquire the lock spinning on a local variable. It avoids expensive + cacheline bouncing that common test-and-set spinlock implementations + incur. An MCS-like lock is specially tailored for optimistic spinning + for sleeping lock implementation. An important feature of the customized + MCS lock is that it has the extra property that spinners are able to exit + the MCS spinlock queue when they need to reschedule. This further helps + avoid situations where MCS spinners that need to reschedule would continue + waiting to spin on mutex owner, only to go directly to slowpath upon + obtaining the MCS lock. + + +(iii) slowpath: last resort, if the lock is still unable to be acquired, + the task is added to the wait-queue and sleeps until woken up by the + unlock path. Under normal circumstances it blocks as TASK_UNINTERRUPTIBLE. + +While formally kernel mutexes are sleepable locks, it is path (ii) that +makes them more practically a hybrid type. By simply not interrupting a +task and busy-waiting for a few cycles instead of immediately sleeping, +the performance of this lock has been seen to significantly improve a +number of workloads. Note that this technique is also used for rw-semaphores. + +Semantics +--------- + +The mutex subsystem checks and enforces the following rules: + + - Only one task can hold the mutex at a time. + - Only the owner can unlock the mutex. + - Multiple unlocks are not permitted. + - Recursive locking/unlocking is not permitted. + - A mutex must only be initialized via the API (see below). + - A task may not exit with a mutex held. + - Memory areas where held locks reside must not be freed. + - Held mutexes must not be reinitialized. + - Mutexes may not be used in hardware or software interrupt + contexts such as tasklets and timers. + +These semantics are fully enforced when CONFIG DEBUG_MUTEXES is enabled. +In addition, the mutex debugging code also implements a number of other +features that make lock debugging easier and faster: + + - Uses symbolic names of mutexes, whenever they are printed + in debug output. + - Point-of-acquire tracking, symbolic lookup of function names, + list of all locks held in the system, printout of them. + - Owner tracking. + - Detects self-recursing locks and prints out all relevant info. + - Detects multi-task circular deadlocks and prints out all affected + locks and tasks (and only those tasks). + + +Interfaces +---------- +Statically define the mutex: + DEFINE_MUTEX(name); + +Dynamically initialize the mutex: + mutex_init(mutex); + +Acquire the mutex, uninterruptible: + void mutex_lock(struct mutex *lock); + void mutex_lock_nested(struct mutex *lock, unsigned int subclass); + int mutex_trylock(struct mutex *lock); + +Acquire the mutex, interruptible: + int mutex_lock_interruptible_nested(struct mutex *lock, + unsigned int subclass); + int mutex_lock_interruptible(struct mutex *lock); + +Acquire the mutex, interruptible, if dec to 0: + int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); + +Unlock the mutex: + void mutex_unlock(struct mutex *lock); + +Test if the mutex is taken: + int mutex_is_locked(struct mutex *lock); Disadvantages ------------- -The stricter mutex API means you cannot use mutexes the same way you -can use semaphores: e.g. they cannot be used from an interrupt context, -nor can they be unlocked from a different context that which acquired -it. [ I'm not aware of any other (e.g. performance) disadvantages from -using mutexes at the moment, please let me know if you find any. ] - -Implementation of mutexes -------------------------- - -'struct mutex' is the new mutex type, defined in include/linux/mutex.h and -implemented in kernel/locking/mutex.c. It is a counter-based mutex with a -spinlock and a wait-list. The counter has 3 states: 1 for "unlocked", 0 for -"locked" and negative numbers (usually -1) for "locked, potential waiters -queued". - -the APIs of 'struct mutex' have been streamlined: - - DEFINE_MUTEX(name); +Unlike its original design and purpose, 'struct mutex' is larger than +most locks in the kernel. E.g: on x86-64 it is 40 bytes, almost twice +as large as 'struct semaphore' (24 bytes) and 8 bytes shy of the +'struct rw_semaphore' variant. Larger structure sizes mean more CPU +cache and memory footprint. - mutex_init(mutex); +When to use mutexes +------------------- - void mutex_lock(struct mutex *lock); - int mutex_lock_interruptible(struct mutex *lock); - int mutex_trylock(struct mutex *lock); - void mutex_unlock(struct mutex *lock); - int mutex_is_locked(struct mutex *lock); - void mutex_lock_nested(struct mutex *lock, unsigned int subclass); - int mutex_lock_interruptible_nested(struct mutex *lock, - unsigned int subclass); - int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); +Unless the strict semantics of mutexes are unsuitable and/or the critical +region prevents the lock from being shared, always prefer them to any other +locking primitive. diff --git a/Documentation/vDSO/parse_vdso.c b/Documentation/vDSO/parse_vdso.c index 85870208edc..1dbb4b87268 100644 --- a/Documentation/vDSO/parse_vdso.c +++ b/Documentation/vDSO/parse_vdso.c @@ -1,6 +1,6 @@ /* * parse_vdso.c: Linux reference vDSO parser - * Written by Andrew Lutomirski, 2011. + * Written by Andrew Lutomirski, 2011-2014. * * This code is meant to be linked in to various programs that run on Linux. * As such, it is available with as few restrictions as possible. This file @@ -11,13 +11,14 @@ * it starts a program. It works equally well in statically and dynamically * linked binaries. * - * This code is tested on x86_64. In principle it should work on any 64-bit + * This code is tested on x86. In principle it should work on any * architecture that has a vDSO. */ #include <stdbool.h> #include <stdint.h> #include <string.h> +#include <limits.h> #include <elf.h> /* @@ -45,11 +46,18 @@ extern void *vdso_sym(const char *version, const char *name); /* And here's the code. */ - -#ifndef __x86_64__ -# error Not yet ported to non-x86_64 architectures +#ifndef ELF_BITS +# if ULONG_MAX > 0xffffffffUL +# define ELF_BITS 64 +# else +# define ELF_BITS 32 +# endif #endif +#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x +#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x) +#define ELF(x) ELF_BITS_XFORM(ELF_BITS, x) + static struct vdso_info { bool valid; @@ -59,14 +67,14 @@ static struct vdso_info uintptr_t load_offset; /* load_addr - recorded vaddr */ /* Symbol table */ - Elf64_Sym *symtab; + ELF(Sym) *symtab; const char *symstrings; - Elf64_Word *bucket, *chain; - Elf64_Word nbucket, nchain; + ELF(Word) *bucket, *chain; + ELF(Word) nbucket, nchain; /* Version table */ - Elf64_Versym *versym; - Elf64_Verdef *verdef; + ELF(Versym) *versym; + ELF(Verdef) *verdef; } vdso_info; /* Straight from the ELF specification. */ @@ -92,9 +100,14 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base) vdso_info.load_addr = base; - Elf64_Ehdr *hdr = (Elf64_Ehdr*)base; - Elf64_Phdr *pt = (Elf64_Phdr*)(vdso_info.load_addr + hdr->e_phoff); - Elf64_Dyn *dyn = 0; + ELF(Ehdr) *hdr = (ELF(Ehdr)*)base; + if (hdr->e_ident[EI_CLASS] != + (ELF_BITS == 32 ? ELFCLASS32 : ELFCLASS64)) { + return; /* Wrong ELF class -- check ELF_BITS */ + } + + ELF(Phdr) *pt = (ELF(Phdr)*)(vdso_info.load_addr + hdr->e_phoff); + ELF(Dyn) *dyn = 0; /* * We need two things from the segment table: the load offset @@ -108,7 +121,7 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base) + (uintptr_t)pt[i].p_offset - (uintptr_t)pt[i].p_vaddr; } else if (pt[i].p_type == PT_DYNAMIC) { - dyn = (Elf64_Dyn*)(base + pt[i].p_offset); + dyn = (ELF(Dyn)*)(base + pt[i].p_offset); } } @@ -118,7 +131,7 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base) /* * Fish out the useful bits of the dynamic table. */ - Elf64_Word *hash = 0; + ELF(Word) *hash = 0; vdso_info.symstrings = 0; vdso_info.symtab = 0; vdso_info.versym = 0; @@ -131,22 +144,22 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base) + vdso_info.load_offset); break; case DT_SYMTAB: - vdso_info.symtab = (Elf64_Sym *) + vdso_info.symtab = (ELF(Sym) *) ((uintptr_t)dyn[i].d_un.d_ptr + vdso_info.load_offset); break; case DT_HASH: - hash = (Elf64_Word *) + hash = (ELF(Word) *) ((uintptr_t)dyn[i].d_un.d_ptr + vdso_info.load_offset); break; case DT_VERSYM: - vdso_info.versym = (Elf64_Versym *) + vdso_info.versym = (ELF(Versym) *) ((uintptr_t)dyn[i].d_un.d_ptr + vdso_info.load_offset); break; case DT_VERDEF: - vdso_info.verdef = (Elf64_Verdef *) + vdso_info.verdef = (ELF(Verdef) *) ((uintptr_t)dyn[i].d_un.d_ptr + vdso_info.load_offset); break; @@ -168,8 +181,8 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base) vdso_info.valid = true; } -static bool vdso_match_version(Elf64_Versym ver, - const char *name, Elf64_Word hash) +static bool vdso_match_version(ELF(Versym) ver, + const char *name, ELF(Word) hash) { /* * This is a helper function to check if the version indexed by @@ -188,7 +201,7 @@ static bool vdso_match_version(Elf64_Versym ver, /* First step: find the version definition */ ver &= 0x7fff; /* Apparently bit 15 means "hidden" */ - Elf64_Verdef *def = vdso_info.verdef; + ELF(Verdef) *def = vdso_info.verdef; while(true) { if ((def->vd_flags & VER_FLG_BASE) == 0 && (def->vd_ndx & 0x7fff) == ver) @@ -197,11 +210,11 @@ static bool vdso_match_version(Elf64_Versym ver, if (def->vd_next == 0) return false; /* No definition. */ - def = (Elf64_Verdef *)((char *)def + def->vd_next); + def = (ELF(Verdef) *)((char *)def + def->vd_next); } /* Now figure out whether it matches. */ - Elf64_Verdaux *aux = (Elf64_Verdaux*)((char *)def + def->vd_aux); + ELF(Verdaux) *aux = (ELF(Verdaux)*)((char *)def + def->vd_aux); return def->vd_hash == hash && !strcmp(name, vdso_info.symstrings + aux->vda_name); } @@ -213,10 +226,10 @@ void *vdso_sym(const char *version, const char *name) return 0; ver_hash = elf_hash(version); - Elf64_Word chain = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket]; + ELF(Word) chain = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket]; for (; chain != STN_UNDEF; chain = vdso_info.chain[chain]) { - Elf64_Sym *sym = &vdso_info.symtab[chain]; + ELF(Sym) *sym = &vdso_info.symtab[chain]; /* Check for a defined global or weak function w/ right name. */ if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC) @@ -243,7 +256,7 @@ void *vdso_sym(const char *version, const char *name) void vdso_init_from_auxv(void *auxv) { - Elf64_auxv_t *elf_auxv = auxv; + ELF(auxv_t) *elf_auxv = auxv; for (int i = 0; elf_auxv[i].a_type != AT_NULL; i++) { if (elf_auxv[i].a_type == AT_SYSINFO_EHDR) { diff --git a/Documentation/vDSO/vdso_standalone_test_x86.c b/Documentation/vDSO/vdso_standalone_test_x86.c new file mode 100644 index 00000000000..d46240265c5 --- /dev/null +++ b/Documentation/vDSO/vdso_standalone_test_x86.c @@ -0,0 +1,128 @@ +/* + * vdso_test.c: Sample code to test parse_vdso.c on x86 + * Copyright (c) 2011-2014 Andy Lutomirski + * Subject to the GNU General Public License, version 2 + * + * You can amuse yourself by compiling with: + * gcc -std=gnu99 -nostdlib + * -Os -fno-asynchronous-unwind-tables -flto -lgcc_s + * vdso_standalone_test_x86.c parse_vdso.c + * to generate a small binary. On x86_64, you can omit -lgcc_s + * if you want the binary to be completely standalone. + */ + +#include <sys/syscall.h> +#include <sys/time.h> +#include <unistd.h> +#include <stdint.h> + +extern void *vdso_sym(const char *version, const char *name); +extern void vdso_init_from_sysinfo_ehdr(uintptr_t base); +extern void vdso_init_from_auxv(void *auxv); + +/* We need a libc functions... */ +int strcmp(const char *a, const char *b) +{ + /* This implementation is buggy: it never returns -1. */ + while (*a || *b) { + if (*a != *b) + return 1; + if (*a == 0 || *b == 0) + return 1; + a++; + b++; + } + + return 0; +} + +/* ...and two syscalls. This is x86-specific. */ +static inline long x86_syscall3(long nr, long a0, long a1, long a2) +{ + long ret; +#ifdef __x86_64__ + asm volatile ("syscall" : "=a" (ret) : "a" (nr), + "D" (a0), "S" (a1), "d" (a2) : + "cc", "memory", "rcx", + "r8", "r9", "r10", "r11" ); +#else + asm volatile ("int $0x80" : "=a" (ret) : "a" (nr), + "b" (a0), "c" (a1), "d" (a2) : + "cc", "memory" ); +#endif + return ret; +} + +static inline long linux_write(int fd, const void *data, size_t len) +{ + return x86_syscall3(__NR_write, fd, (long)data, (long)len); +} + +static inline void linux_exit(int code) +{ + x86_syscall3(__NR_exit, code, 0, 0); +} + +void to_base10(char *lastdig, uint64_t n) +{ + while (n) { + *lastdig = (n % 10) + '0'; + n /= 10; + lastdig--; + } +} + +__attribute__((externally_visible)) void c_main(void **stack) +{ + /* Parse the stack */ + long argc = (long)*stack; + stack += argc + 2; + + /* Now we're pointing at the environment. Skip it. */ + while(*stack) + stack++; + stack++; + + /* Now we're pointing at auxv. Initialize the vDSO parser. */ + vdso_init_from_auxv((void *)stack); + + /* Find gettimeofday. */ + typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz); + gtod_t gtod = (gtod_t)vdso_sym("LINUX_2.6", "__vdso_gettimeofday"); + + if (!gtod) + linux_exit(1); + + struct timeval tv; + long ret = gtod(&tv, 0); + + if (ret == 0) { + char buf[] = "The time is .000000\n"; + to_base10(buf + 31, tv.tv_sec); + to_base10(buf + 38, tv.tv_usec); + linux_write(1, buf, sizeof(buf) - 1); + } else { + linux_exit(ret); + } + + linux_exit(0); +} + +/* + * This is the real entry point. It passes the initial stack into + * the C entry point. + */ +asm ( + ".text\n" + ".global _start\n" + ".type _start,@function\n" + "_start:\n\t" +#ifdef __x86_64__ + "mov %rsp,%rdi\n\t" + "jmp c_main" +#else + "push %esp\n\t" + "call c_main\n\t" + "int $3" +#endif + ); diff --git a/Documentation/vDSO/vdso_test.c b/Documentation/vDSO/vdso_test.c index fff633432df..8daeb7d7032 100644 --- a/Documentation/vDSO/vdso_test.c +++ b/Documentation/vDSO/vdso_test.c @@ -1,111 +1,52 @@ /* - * vdso_test.c: Sample code to test parse_vdso.c on x86_64 - * Copyright (c) 2011 Andy Lutomirski + * vdso_test.c: Sample code to test parse_vdso.c + * Copyright (c) 2014 Andy Lutomirski * Subject to the GNU General Public License, version 2 * - * You can amuse yourself by compiling with: - * gcc -std=gnu99 -nostdlib - * -Os -fno-asynchronous-unwind-tables -flto - * vdso_test.c parse_vdso.c -o vdso_test - * to generate a small binary with no dependencies at all. + * Compile with: + * gcc -std=gnu99 vdso_test.c parse_vdso.c + * + * Tested on x86, 32-bit and 64-bit. It may work on other architectures, too. */ -#include <sys/syscall.h> -#include <sys/time.h> -#include <unistd.h> #include <stdint.h> +#include <elf.h> +#include <stdio.h> +#include <sys/auxv.h> +#include <sys/time.h> extern void *vdso_sym(const char *version, const char *name); extern void vdso_init_from_sysinfo_ehdr(uintptr_t base); extern void vdso_init_from_auxv(void *auxv); -/* We need a libc functions... */ -int strcmp(const char *a, const char *b) +int main(int argc, char **argv) { - /* This implementation is buggy: it never returns -1. */ - while (*a || *b) { - if (*a != *b) - return 1; - if (*a == 0 || *b == 0) - return 1; - a++; - b++; + unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR); + if (!sysinfo_ehdr) { + printf("AT_SYSINFO_EHDR is not present!\n"); + return 0; } - return 0; -} - -/* ...and two syscalls. This is x86_64-specific. */ -static inline long linux_write(int fd, const void *data, size_t len) -{ - - long ret; - asm volatile ("syscall" : "=a" (ret) : "a" (__NR_write), - "D" (fd), "S" (data), "d" (len) : - "cc", "memory", "rcx", - "r8", "r9", "r10", "r11" ); - return ret; -} - -static inline void linux_exit(int code) -{ - asm volatile ("syscall" : : "a" (__NR_exit), "D" (code)); -} - -void to_base10(char *lastdig, uint64_t n) -{ - while (n) { - *lastdig = (n % 10) + '0'; - n /= 10; - lastdig--; - } -} - -__attribute__((externally_visible)) void c_main(void **stack) -{ - /* Parse the stack */ - long argc = (long)*stack; - stack += argc + 2; - - /* Now we're pointing at the environment. Skip it. */ - while(*stack) - stack++; - stack++; - - /* Now we're pointing at auxv. Initialize the vDSO parser. */ - vdso_init_from_auxv((void *)stack); + vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR)); /* Find gettimeofday. */ typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz); gtod_t gtod = (gtod_t)vdso_sym("LINUX_2.6", "__vdso_gettimeofday"); - if (!gtod) - linux_exit(1); + if (!gtod) { + printf("Could not find __vdso_gettimeofday\n"); + return 1; + } struct timeval tv; long ret = gtod(&tv, 0); if (ret == 0) { - char buf[] = "The time is .000000\n"; - to_base10(buf + 31, tv.tv_sec); - to_base10(buf + 38, tv.tv_usec); - linux_write(1, buf, sizeof(buf) - 1); + printf("The time is %lld.%06lld\n", + (long long)tv.tv_sec, (long long)tv.tv_usec); } else { - linux_exit(ret); + printf("__vdso_gettimeofday failed\n"); } - linux_exit(0); + return 0; } - -/* - * This is the real entry point. It passes the initial stack into - * the C entry point. - */ -asm ( - ".text\n" - ".global _start\n" - ".type _start,@function\n" - "_start:\n\t" - "mov %rsp,%rdi\n\t" - "jmp c_main" - ); @@ -105,10 +105,6 @@ ifeq ("$(origin O)", "command line") KBUILD_OUTPUT := $(O) endif -ifeq ("$(origin W)", "command line") - export KBUILD_ENABLE_EXTRA_GCC_CHECKS := $(W) -endif - # That's our default target when none is given on the command line PHONY := _all _all: @@ -153,8 +149,18 @@ else _all: modules endif -srctree := $(if $(KBUILD_SRC),$(KBUILD_SRC),$(CURDIR)) -objtree := $(CURDIR) +ifeq ($(KBUILD_SRC),) + # building in the source tree + srctree := . +else + ifeq ($(KBUILD_SRC)/,$(dir $(CURDIR))) + # building in a subdirectory of the source tree + srctree := .. + else + srctree := $(KBUILD_SRC) + endif +endif +objtree := . src := $(srctree) obj := $(objtree) @@ -166,7 +172,7 @@ export srctree objtree VPATH # SUBARCH tells the usermode build what the underlying arch is. That is set # first, and if a usermode build is happening, the "ARCH=um" on the command # line overrides the setting of ARCH below. If a native build is happening, -# then ARCH is assigned, getting whatever value it gets normally, and +# then ARCH is assigned, getting whatever value it gets normally, and # SUBARCH is subsequently ignored. SUBARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \ @@ -259,18 +265,18 @@ endif KBUILD_MODULES := KBUILD_BUILTIN := 1 -# If we have only "make modules", don't compile built-in objects. -# When we're building modules with modversions, we need to consider -# the built-in objects during the descend as well, in order to -# make sure the checksums are up to date before we record them. +# If we have only "make modules", don't compile built-in objects. +# When we're building modules with modversions, we need to consider +# the built-in objects during the descend as well, in order to +# make sure the checksums are up to date before we record them. ifeq ($(MAKECMDGOALS),modules) KBUILD_BUILTIN := $(if $(CONFIG_MODVERSIONS),1) endif -# If we have "make <whatever> modules", compile modules -# in addition to whatever we do anyway. -# Just "make" or "make all" shall build modules as well +# If we have "make <whatever> modules", compile modules +# in addition to whatever we do anyway. +# Just "make" or "make all" shall build modules as well ifneq ($(filter all _all modules,$(MAKECMDGOALS)),) KBUILD_MODULES := 1 @@ -294,7 +300,7 @@ export KBUILD_CHECKSRC KBUILD_SRC KBUILD_EXTMOD # cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $< # # If $(quiet) is empty, the whole command will be printed. -# If it is set to "quiet_", only the short version will be printed. +# If it is set to "quiet_", only the short version will be printed. # If it is set to "silent_", nothing will be printed at all, since # the variable $(silent_cmd_cc_o_c) doesn't exist. # @@ -346,7 +352,6 @@ $(srctree)/scripts/Kbuild.include: ; include $(srctree)/scripts/Kbuild.include # Make variables (CC, etc...) - AS = $(CROSS_COMPILE)as LD = $(CROSS_COMPILE)ld CC = $(CROSS_COMPILE)gcc @@ -395,8 +400,8 @@ KBUILD_CPPFLAGS := -D__KERNEL__ KBUILD_CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -fno-common \ -Werror-implicit-function-declaration \ - -Wno-format-security \ - $(call cc-option,-fno-delete-null-pointer-checks,) + -Wno-format-security + KBUILD_AFLAGS_KERNEL := KBUILD_CFLAGS_KERNEL := KBUILD_AFLAGS := -D__ASSEMBLY__ @@ -504,8 +509,16 @@ ifeq ($(mixed-targets),1) # We're called with mixed targets (*config and build targets). # Handle them one by one. -%:: FORCE - $(Q)$(MAKE) -C $(srctree) KBUILD_SRC= $@ +PHONY += $(MAKECMDGOALS) __build_one_by_one + +$(filter-out __build_one_by_one, $(MAKECMDGOALS)): __build_one_by_one + @: + +__build_one_by_one: + $(Q)set -e; \ + for i in $(MAKECMDGOALS); do \ + $(MAKE) -f $(srctree)/Makefile $$i; \ + done else ifeq ($(config-targets),1) @@ -520,11 +533,9 @@ include $(srctree)/arch/$(SRCARCH)/Makefile export KBUILD_DEFCONFIG KBUILD_KCONFIG config: scripts_basic outputmakefile FORCE - $(Q)mkdir -p include/linux include/config $(Q)$(MAKE) $(build)=scripts/kconfig $@ %config: scripts_basic outputmakefile FORCE - $(Q)mkdir -p include/linux include/config $(Q)$(MAKE) $(build)=scripts/kconfig $@ else @@ -594,14 +605,16 @@ endif # $(dot-config) # Defaults to vmlinux, but the arch makefile usually adds further targets all: vmlinux +include $(srctree)/arch/$(SRCARCH)/Makefile + +KBUILD_CFLAGS += $(call cc-option,-fno-delete-null-pointer-checks,) + ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE KBUILD_CFLAGS += -Os $(call cc-disable-warning,maybe-uninitialized,) else KBUILD_CFLAGS += -O2 endif -include $(srctree)/arch/$(SRCARCH)/Makefile - ifdef CONFIG_READABLE_ASM # Disable optimizations that make assembler listings hard to read. # reorder blocks reorders the control in the function @@ -731,6 +744,8 @@ ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y) KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO endif +include $(srctree)/scripts/Makefile.extrawarn + # Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments KBUILD_CPPFLAGS += $(KCPPFLAGS) KBUILD_AFLAGS += $(KAFLAGS) @@ -775,10 +790,10 @@ MODLIB = $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE) export MODLIB # -# INSTALL_MOD_STRIP, if defined, will cause modules to be -# stripped after they are installed. If INSTALL_MOD_STRIP is '1', then -# the default option --strip-debug will be used. Otherwise, -# INSTALL_MOD_STRIP value will be used as the options to the strip command. +# INSTALL_MOD_STRIP, if defined, will cause modules to be +# stripped after they are installed. If INSTALL_MOD_STRIP is '1', then +# the default option --strip-debug will be used. Otherwise, +# INSTALL_MOD_STRIP value will be used as the options to the strip command. ifdef INSTALL_MOD_STRIP ifeq ($(INSTALL_MOD_STRIP),1) @@ -863,7 +878,7 @@ ifdef CONFIG_BUILD_DOCSRC endif +$(call if_changed,link-vmlinux) -# The actual objects are generated when descending, +# The actual objects are generated when descending, # make sure no implicit rule kicks in $(sort $(vmlinux-deps)): $(vmlinux-dirs) ; @@ -1021,11 +1036,11 @@ ifdef CONFIG_MODULES all: modules -# Build modules +# Build modules # -# A module can be listed more than once in obj-m resulting in -# duplicate lines in modules.order files. Those are removed -# using awk while concatenating to the final file. +# A module can be listed more than once in obj-m resulting in +# duplicate lines in modules.order files. Those are removed +# using awk while concatenating to the final file. PHONY += modules modules: $(vmlinux-dirs) $(if $(KBUILD_BUILTIN),vmlinux) modules.builtin @@ -1054,10 +1069,10 @@ _modinst_: @rm -rf $(MODLIB)/kernel @rm -f $(MODLIB)/source @mkdir -p $(MODLIB)/kernel - @ln -s $(srctree) $(MODLIB)/source + @ln -s `cd $(srctree) && /bin/pwd` $(MODLIB)/source @if [ ! $(objtree) -ef $(MODLIB)/build ]; then \ rm -f $(MODLIB)/build ; \ - ln -s $(objtree) $(MODLIB)/build ; \ + ln -s $(CURDIR) $(MODLIB)/build ; \ fi @cp -f $(objtree)/modules.order $(MODLIB)/ @cp -f $(objtree)/modules.builtin $(MODLIB)/ @@ -1104,7 +1119,7 @@ CLEAN_DIRS += $(MODVERDIR) # Directories & files removed with 'make mrproper' MRPROPER_DIRS += include/config usr/include include/generated \ - arch/*/include/generated .tmp_objdiff + arch/*/include/generated .tmp_objdiff MRPROPER_FILES += .config .config.old .version .old_version $(version_h) \ Module.symvers tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS \ signing_key.priv signing_key.x509 x509.genkey \ @@ -1478,7 +1493,7 @@ endif $(build)=$(build-dir) $(@:.ko=.o) $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost -# FIXME Should go into a make.lib or something +# FIXME Should go into a make.lib or something # =========================================================================== quiet_cmd_rmdirs = $(if $(wildcard $(rm-dirs)),CLEAN $(wildcard $(rm-dirs))) diff --git a/arch/arm/boot/dts/dra7xx-clocks.dtsi b/arch/arm/boot/dts/dra7xx-clocks.dtsi index c7676871d9c..b03cfe49d22 100644 --- a/arch/arm/boot/dts/dra7xx-clocks.dtsi +++ b/arch/arm/boot/dts/dra7xx-clocks.dtsi @@ -26,7 +26,7 @@ clock-frequency = <0>; }; - atlclkin3_ck: atlclkin3_ck { + atl_clkin3_ck: atl_clkin3_ck { #clock-cells = <0>; compatible = "fixed-clock"; clock-frequency = <0>; @@ -277,7 +277,7 @@ dpll_mpu_ck: dpll_mpu_ck { #clock-cells = <0>; - compatible = "ti,omap4-dpll-clock"; + compatible = "ti,omap5-mpu-dpll-clock"; clocks = <&sys_clkin1>, <&mpu_dpll_hs_clk_div>; reg = <0x0160>, <0x0164>, <0x016c>, <0x0168>; }; @@ -730,7 +730,7 @@ mcasp1_ahclkr_mux: mcasp1_ahclkr_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <28>; reg = <0x0550>; }; @@ -738,7 +738,7 @@ mcasp1_ahclkx_mux: mcasp1_ahclkx_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <24>; reg = <0x0550>; }; @@ -1639,7 +1639,7 @@ mcasp2_ahclkr_mux: mcasp2_ahclkr_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <28>; reg = <0x1860>; }; @@ -1647,7 +1647,7 @@ mcasp2_ahclkx_mux: mcasp2_ahclkx_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <24>; reg = <0x1860>; }; @@ -1663,7 +1663,7 @@ mcasp3_ahclkx_mux: mcasp3_ahclkx_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <24>; reg = <0x1868>; }; @@ -1679,7 +1679,7 @@ mcasp4_ahclkx_mux: mcasp4_ahclkx_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <24>; reg = <0x1898>; }; @@ -1695,7 +1695,7 @@ mcasp5_ahclkx_mux: mcasp5_ahclkx_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <24>; reg = <0x1878>; }; @@ -1711,7 +1711,7 @@ mcasp6_ahclkx_mux: mcasp6_ahclkx_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <24>; reg = <0x1904>; }; @@ -1727,7 +1727,7 @@ mcasp7_ahclkx_mux: mcasp7_ahclkx_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <24>; reg = <0x1908>; }; @@ -1743,7 +1743,7 @@ mcasp8_ahclk_mux: mcasp8_ahclk_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <22>; reg = <0x1890>; }; diff --git a/arch/arm/boot/dts/omap54xx-clocks.dtsi b/arch/arm/boot/dts/omap54xx-clocks.dtsi index aeb142ce8e9..e67a23b5d78 100644 --- a/arch/arm/boot/dts/omap54xx-clocks.dtsi +++ b/arch/arm/boot/dts/omap54xx-clocks.dtsi @@ -335,7 +335,7 @@ dpll_mpu_ck: dpll_mpu_ck { #clock-cells = <0>; - compatible = "ti,omap4-dpll-clock"; + compatible = "ti,omap5-mpu-dpll-clock"; clocks = <&sys_clkin>, <&mpu_dpll_hs_clk_div>; reg = <0x0160>, <0x0164>, <0x016c>, <0x0168>; }; diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index a6bc431cde7..4238bcba9d6 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -410,7 +410,7 @@ __hw_perf_event_init(struct perf_event *event) */ hwc->config_base |= (unsigned long)mapping; - if (!hwc->sample_period) { + if (!is_sampling_event(event)) { /* * For non-sampling runs, limit the sample_period to half * of the counter width. That way, the new counter value diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index a71ae152362..af9e35e8836 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -126,8 +126,8 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) irqs = min(pmu_device->num_resources, num_possible_cpus()); if (irqs < 1) { - pr_err("no irqs for PMUs defined\n"); - return -ENODEV; + printk_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n"); + return 0; } irq = platform_get_irq(pmu_device, 0); @@ -191,6 +191,10 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu) /* Ensure the PMU has sane values out of reset. */ if (cpu_pmu->reset) on_each_cpu(cpu_pmu->reset, cpu_pmu, 1); + + /* If no interrupts available, set the corresponding capability flag */ + if (!platform_get_irq(cpu_pmu->plat_device, 0)) + cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; } /* diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index 3997c411c14..9d853189028 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -26,30 +26,30 @@ #include <asm/topology.h> /* - * cpu power scale management + * cpu capacity scale management */ /* - * cpu power table + * cpu capacity table * This per cpu data structure describes the relative capacity of each core. * On a heteregenous system, cores don't have the same computation capacity - * and we reflect that difference in the cpu_power field so the scheduler can - * take this difference into account during load balance. A per cpu structure - * is preferred because each CPU updates its own cpu_power field during the - * load balance except for idle cores. One idle core is selected to run the - * rebalance_domains for all idle cores and the cpu_power can be updated - * during this sequence. + * and we reflect that difference in the cpu_capacity field so the scheduler + * can take this difference into account during load balance. A per cpu + * structure is preferred because each CPU updates its own cpu_capacity field + * during the load balance except for idle cores. One idle core is selected + * to run the rebalance_domains for all idle cores and the cpu_capacity can be + * updated during this sequence. */ static DEFINE_PER_CPU(unsigned long, cpu_scale); -unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu) +unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu) { return per_cpu(cpu_scale, cpu); } -static void set_power_scale(unsigned int cpu, unsigned long power) +static void set_capacity_scale(unsigned int cpu, unsigned long capacity) { - per_cpu(cpu_scale, cpu) = power; + per_cpu(cpu_scale, cpu) = capacity; } #ifdef CONFIG_OF @@ -62,11 +62,11 @@ struct cpu_efficiency { * Table of relative efficiency of each processors * The efficiency value must fit in 20bit and the final * cpu_scale value must be in the range - * 0 < cpu_scale < 3*SCHED_POWER_SCALE/2 + * 0 < cpu_scale < 3*SCHED_CAPACITY_SCALE/2 * in order to return at most 1 when DIV_ROUND_CLOSEST * is used to compute the capacity of a CPU. * Processors that are not defined in the table, - * use the default SCHED_POWER_SCALE value for cpu_scale. + * use the default SCHED_CAPACITY_SCALE value for cpu_scale. */ static const struct cpu_efficiency table_efficiency[] = { {"arm,cortex-a15", 3891}, @@ -83,9 +83,9 @@ static unsigned long middle_capacity = 1; * Iterate all CPUs' descriptor in DT and compute the efficiency * (as per table_efficiency). Also calculate a middle efficiency * as close as possible to (max{eff_i} - min{eff_i}) / 2 - * This is later used to scale the cpu_power field such that an - * 'average' CPU is of middle power. Also see the comments near - * table_efficiency[] and update_cpu_power(). + * This is later used to scale the cpu_capacity field such that an + * 'average' CPU is of middle capacity. Also see the comments near + * table_efficiency[] and update_cpu_capacity(). */ static void __init parse_dt_topology(void) { @@ -141,15 +141,15 @@ static void __init parse_dt_topology(void) * cpu_scale because all CPUs have the same capacity. Otherwise, we * compute a middle_capacity factor that will ensure that the capacity * of an 'average' CPU of the system will be as close as possible to - * SCHED_POWER_SCALE, which is the default value, but with the + * SCHED_CAPACITY_SCALE, which is the default value, but with the * constraint explained near table_efficiency[]. */ if (4*max_capacity < (3*(max_capacity + min_capacity))) middle_capacity = (min_capacity + max_capacity) - >> (SCHED_POWER_SHIFT+1); + >> (SCHED_CAPACITY_SHIFT+1); else middle_capacity = ((max_capacity / 3) - >> (SCHED_POWER_SHIFT-1)) + 1; + >> (SCHED_CAPACITY_SHIFT-1)) + 1; } @@ -158,20 +158,20 @@ static void __init parse_dt_topology(void) * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the * function returns directly for SMP system. */ -static void update_cpu_power(unsigned int cpu) +static void update_cpu_capacity(unsigned int cpu) { if (!cpu_capacity(cpu)) return; - set_power_scale(cpu, cpu_capacity(cpu) / middle_capacity); + set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity); - printk(KERN_INFO "CPU%u: update cpu_power %lu\n", - cpu, arch_scale_freq_power(NULL, cpu)); + printk(KERN_INFO "CPU%u: update cpu_capacity %lu\n", + cpu, arch_scale_freq_capacity(NULL, cpu)); } #else static inline void parse_dt_topology(void) {} -static inline void update_cpu_power(unsigned int cpuid) {} +static inline void update_cpu_capacity(unsigned int cpuid) {} #endif /* @@ -267,7 +267,7 @@ void store_cpu_topology(unsigned int cpuid) update_siblings_masks(cpuid); - update_cpu_power(cpuid); + update_cpu_capacity(cpuid); printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n", cpuid, cpu_topology[cpuid].thread_id, @@ -297,7 +297,7 @@ void __init init_cpu_topology(void) { unsigned int cpu; - /* init core mask and power*/ + /* init core mask and capacity */ for_each_possible_cpu(cpu) { struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]); @@ -307,7 +307,7 @@ void __init init_cpu_topology(void) cpumask_clear(&cpu_topo->core_sibling); cpumask_clear(&cpu_topo->thread_sibling); - set_power_scale(cpu, SCHED_POWER_SCALE); + set_capacity_scale(cpu, SCHED_CAPACITY_SCALE); } smp_wmb(); diff --git a/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c b/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c index b935ed2922d..85e0b0c0671 100644 --- a/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c +++ b/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c @@ -208,3 +208,56 @@ void omap2xxx_clkt_vps_late_init(void) clk_put(c); } } + +#ifdef CONFIG_OF +#include <linux/clk-provider.h> +#include <linux/clkdev.h> + +static const struct clk_ops virt_prcm_set_ops = { + .recalc_rate = &omap2_table_mpu_recalc, + .set_rate = &omap2_select_table_rate, + .round_rate = &omap2_round_to_table_rate, +}; + +/** + * omap2xxx_clkt_vps_init - initialize virt_prcm_set clock + * + * Does a manual init for the virtual prcm DVFS clock for OMAP2. This + * function is called only from omap2 DT clock init, as the virtual + * node is not modelled in the DT clock data. + */ +void omap2xxx_clkt_vps_init(void) +{ + struct clk_init_data init = { NULL }; + struct clk_hw_omap *hw = NULL; + struct clk *clk; + const char *parent_name = "mpu_ck"; + struct clk_lookup *lookup = NULL; + + omap2xxx_clkt_vps_late_init(); + omap2xxx_clkt_vps_check_bootloader_rates(); + + hw = kzalloc(sizeof(*hw), GFP_KERNEL); + lookup = kzalloc(sizeof(*lookup), GFP_KERNEL); + if (!hw || !lookup) + goto cleanup; + init.name = "virt_prcm_set"; + init.ops = &virt_prcm_set_ops; + init.parent_names = &parent_name; + init.num_parents = 1; + + hw->hw.init = &init; + + clk = clk_register(NULL, &hw->hw); + + lookup->dev_id = NULL; + lookup->con_id = "cpufreq_ck"; + lookup->clk = clk; + + clkdev_add(lookup); + return; +cleanup: + kfree(hw); + kfree(lookup); +} +#endif diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h index bda767a9dea..12f54d428d7 100644 --- a/arch/arm/mach-omap2/clock.h +++ b/arch/arm/mach-omap2/clock.h @@ -178,17 +178,6 @@ struct clksel { const struct clksel_rate *rates; }; -struct clk_hw_omap_ops { - void (*find_idlest)(struct clk_hw_omap *oclk, - void __iomem **idlest_reg, - u8 *idlest_bit, u8 *idlest_val); - void (*find_companion)(struct clk_hw_omap *oclk, - void __iomem **other_reg, - u8 *other_bit); - void (*allow_idle)(struct clk_hw_omap *oclk); - void (*deny_idle)(struct clk_hw_omap *oclk); -}; - unsigned long omap_fixed_divisor_recalc(struct clk_hw *hw, unsigned long parent_rate); @@ -279,8 +268,6 @@ extern const struct clk_hw_omap_ops clkhwops_omap3430es2_hsotgusb_wait; extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_module_wait; extern const struct clk_hw_omap_ops clkhwops_apll54; extern const struct clk_hw_omap_ops clkhwops_apll96; -extern const struct clk_hw_omap_ops clkhwops_omap2xxx_dpll; -extern const struct clk_hw_omap_ops clkhwops_omap2430_i2chs_wait; /* clksel_rate blocks shared between OMAP44xx and AM33xx */ extern const struct clksel_rate div_1_0_rates[]; diff --git a/arch/arm/mach-omap2/clock2xxx.h b/arch/arm/mach-omap2/clock2xxx.h index 539dc08afbb..45f41a41160 100644 --- a/arch/arm/mach-omap2/clock2xxx.h +++ b/arch/arm/mach-omap2/clock2xxx.h @@ -21,10 +21,6 @@ unsigned long omap2xxx_sys_clk_recalc(struct clk_hw *clk, unsigned long parent_rate); unsigned long omap2_osc_clk_recalc(struct clk_hw *clk, unsigned long parent_rate); -unsigned long omap2_dpllcore_recalc(struct clk_hw *hw, - unsigned long parent_rate); -int omap2_reprogram_dpllcore(struct clk_hw *clk, unsigned long rate, - unsigned long parent_rate); void omap2xxx_clkt_dpllcore_init(struct clk_hw *hw); unsigned long omap2_clk_apll54_recalc(struct clk_hw *hw, unsigned long parent_rate); diff --git a/arch/arm/mach-omap2/dpll3xxx.c b/arch/arm/mach-omap2/dpll3xxx.c index fcd8036af91..6d7ba37e225 100644 --- a/arch/arm/mach-omap2/dpll3xxx.c +++ b/arch/arm/mach-omap2/dpll3xxx.c @@ -319,6 +319,15 @@ static int omap3_noncore_dpll_program(struct clk_hw_omap *clk, u16 freqsel) /* Set DPLL multiplier, divider */ v = omap2_clk_readl(clk, dd->mult_div1_reg); + + /* Handle Duty Cycle Correction */ + if (dd->dcc_mask) { + if (dd->last_rounded_rate >= dd->dcc_rate) + v |= dd->dcc_mask; /* Enable DCC */ + else + v &= ~dd->dcc_mask; /* Disable DCC */ + } + v &= ~(dd->mult_mask | dd->div1_mask); v |= dd->last_rounded_m << __ffs(dd->mult_mask); v |= (dd->last_rounded_n - 1) << __ffs(dd->div1_mask); diff --git a/arch/blackfin/configs/BF526-EZBRD_defconfig b/arch/blackfin/configs/BF526-EZBRD_defconfig index 1759fad5401..e66ba31ef84 100644 --- a/arch/blackfin/configs/BF526-EZBRD_defconfig +++ b/arch/blackfin/configs/BF526-EZBRD_defconfig @@ -53,7 +53,6 @@ CONFIG_IP_PNP=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_INTELEXT=y @@ -63,6 +62,7 @@ CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_PHYSMAP=y CONFIG_MTD_M25P80=y CONFIG_MTD_NAND=m +CONFIG_MTD_SPI_NOR=y CONFIG_BLK_DEV_RAM=y CONFIG_SCSI=y # CONFIG_SCSI_PROC_FS is not set diff --git a/arch/blackfin/configs/BF527-EZKIT-V2_defconfig b/arch/blackfin/configs/BF527-EZKIT-V2_defconfig index 357729682c0..0207c588c19 100644 --- a/arch/blackfin/configs/BF527-EZKIT-V2_defconfig +++ b/arch/blackfin/configs/BF527-EZKIT-V2_defconfig @@ -58,7 +58,6 @@ CONFIG_BFIN_SIR0=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=m CONFIG_MTD_BLOCK=y CONFIG_MTD_JEDECPROBE=m CONFIG_MTD_RAM=y @@ -66,6 +65,7 @@ CONFIG_MTD_ROM=m CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_M25P80=y CONFIG_MTD_NAND=m +CONFIG_MTD_SPI_NOR=y CONFIG_BLK_DEV_RAM=y CONFIG_SCSI=y # CONFIG_SCSI_PROC_FS is not set diff --git a/arch/blackfin/configs/BF527-EZKIT_defconfig b/arch/blackfin/configs/BF527-EZKIT_defconfig index 2e73a5d33da..99c131ba7d9 100644 --- a/arch/blackfin/configs/BF527-EZKIT_defconfig +++ b/arch/blackfin/configs/BF527-EZKIT_defconfig @@ -57,7 +57,6 @@ CONFIG_BFIN_SIR0=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=m CONFIG_MTD_BLOCK=y CONFIG_MTD_JEDECPROBE=m CONFIG_MTD_RAM=y @@ -65,6 +64,7 @@ CONFIG_MTD_ROM=m CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_M25P80=y CONFIG_MTD_NAND=m +CONFIG_MTD_SPI_NOR=y CONFIG_BLK_DEV_RAM=y CONFIG_SCSI=y # CONFIG_SCSI_PROC_FS is not set diff --git a/arch/blackfin/configs/BF548-EZKIT_defconfig b/arch/blackfin/configs/BF548-EZKIT_defconfig index f0a2ddf5de4..38cb17d218d 100644 --- a/arch/blackfin/configs/BF548-EZKIT_defconfig +++ b/arch/blackfin/configs/BF548-EZKIT_defconfig @@ -64,7 +64,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_FW_LOADER=m CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_INTELEXT=y @@ -75,6 +74,7 @@ CONFIG_MTD_M25P80=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_BF5XX=y # CONFIG_MTD_NAND_BF5XX_HWECC is not set +CONFIG_MTD_SPI_NOR=y CONFIG_BLK_DEV_RAM=y # CONFIG_SCSI_PROC_FS is not set CONFIG_BLK_DEV_SD=y diff --git a/arch/blackfin/configs/BF609-EZKIT_defconfig b/arch/blackfin/configs/BF609-EZKIT_defconfig index 4ca39ab6b2b..a7e9bfd8418 100644 --- a/arch/blackfin/configs/BF609-EZKIT_defconfig +++ b/arch/blackfin/configs/BF609-EZKIT_defconfig @@ -57,7 +57,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_FW_LOADER=m CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_INTELEXT=y @@ -65,6 +64,7 @@ CONFIG_MTD_CFI_STAA=y CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_PHYSMAP=y CONFIG_MTD_M25P80=y +CONFIG_MTD_SPI_NOR=y CONFIG_MTD_UBI=m CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y diff --git a/arch/blackfin/configs/BlackStamp_defconfig b/arch/blackfin/configs/BlackStamp_defconfig index 3853c473b44..f4a9200e1ab 100644 --- a/arch/blackfin/configs/BlackStamp_defconfig +++ b/arch/blackfin/configs/BlackStamp_defconfig @@ -45,7 +45,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=m CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=m CONFIG_MTD_CFI_AMDSTD=m @@ -53,7 +52,7 @@ CONFIG_MTD_RAM=y CONFIG_MTD_ROM=m CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_M25P80=y -# CONFIG_M25PXX_USE_FAST_READ is not set +CONFIG_MTD_SPI_NOR=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/blackfin/configs/H8606_defconfig b/arch/blackfin/configs/H8606_defconfig index f754e490bbf..0ff97d8d047 100644 --- a/arch/blackfin/configs/H8606_defconfig +++ b/arch/blackfin/configs/H8606_defconfig @@ -36,13 +36,12 @@ CONFIG_IRTTY_SIR=m # CONFIG_WIRELESS is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_RAM=y CONFIG_MTD_ROM=y CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_M25P80=y -# CONFIG_M25PXX_USE_FAST_READ is not set +CONFIG_MTD_SPI_NOR=y CONFIG_BLK_DEV_RAM=y CONFIG_MISC_DEVICES=y CONFIG_EEPROM_AT25=y diff --git a/arch/blackfin/include/asm/dma.h b/arch/blackfin/include/asm/dma.h index 8d1e4c2d2c3..40e9c2bbc6e 100644 --- a/arch/blackfin/include/asm/dma.h +++ b/arch/blackfin/include/asm/dma.h @@ -316,6 +316,8 @@ static inline void disable_dma(unsigned int channel) } static inline void enable_dma(unsigned int channel) { + dma_ch[channel].regs->curr_x_count = 0; + dma_ch[channel].regs->curr_y_count = 0; dma_ch[channel].regs->cfg |= DMAEN; } int set_dma_callback(unsigned int channel, irq_handler_t callback, void *data); diff --git a/arch/blackfin/mach-bf533/boards/stamp.c b/arch/blackfin/mach-bf533/boards/stamp.c index d0989290f54..6f4bac969bf 100644 --- a/arch/blackfin/mach-bf533/boards/stamp.c +++ b/arch/blackfin/mach-bf533/boards/stamp.c @@ -17,6 +17,7 @@ #if IS_ENABLED(CONFIG_USB_ISP1362_HCD) #include <linux/usb/isp1362.h> #endif +#include <linux/gpio.h> #include <linux/irq.h> #include <linux/i2c.h> #include <asm/dma.h> diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 21c9f304e96..790352f9370 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -235,11 +235,6 @@ config PPC_EARLY_DEBUG_USBGECKO Select this to enable early debugging for Nintendo GameCube/Wii consoles via an external USB Gecko adapter. -config PPC_EARLY_DEBUG_WSP - bool "Early debugging via WSP's internal UART" - depends on PPC_WSP - select PPC_UDBG_16550 - config PPC_EARLY_DEBUG_PS3GELIC bool "Early debugging through the PS3 Ethernet port" depends on PPC_PS3 diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 426dce7ae7c..ccc25eddbcb 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -333,8 +333,8 @@ $(addprefix $(obj)/, $(initrd-y)): $(obj)/ramdisk.image.gz $(obj)/zImage.initrd.%: vmlinux $(wrapperbits) $(call if_changed,wrap,$*,,,$(obj)/ramdisk.image.gz) -$(obj)/zImage.%: vmlinux $(wrapperbits) - $(call if_changed,wrap,$*) +$(addprefix $(obj)/, $(sort $(filter zImage.%, $(image-y)))): vmlinux $(wrapperbits) + $(call if_changed,wrap,$(subst $(obj)/zImage.,,$@)) # dtbImage% - a dtbImage is a zImage with an embedded device tree blob $(obj)/dtbImage.initrd.%: vmlinux $(wrapperbits) $(obj)/%.dtb diff --git a/arch/powerpc/configs/chroma_defconfig b/arch/powerpc/configs/chroma_defconfig deleted file mode 100644 index 4f35fc46238..00000000000 --- a/arch/powerpc/configs/chroma_defconfig +++ /dev/null @@ -1,307 +0,0 @@ -CONFIG_PPC64=y -CONFIG_PPC_BOOK3E_64=y -# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set -CONFIG_SMP=y -CONFIG_NR_CPUS=256 -CONFIG_EXPERIMENTAL=y -CONFIG_SYSVIPC=y -CONFIG_POSIX_MQUEUE=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_TASKSTATS=y -CONFIG_TASK_DELAY_ACCT=y -CONFIG_TASK_XACCT=y -CONFIG_TASK_IO_ACCOUNTING=y -CONFIG_AUDIT=y -CONFIG_AUDITSYSCALL=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_LOG_BUF_SHIFT=19 -CONFIG_CGROUPS=y -CONFIG_CGROUP_DEVICE=y -CONFIG_CPUSETS=y -CONFIG_CGROUP_CPUACCT=y -CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEMCG=y -CONFIG_CGROUP_MEMCG_SWAP=y -CONFIG_NAMESPACES=y -CONFIG_RELAY=y -CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="" -CONFIG_RD_BZIP2=y -CONFIG_RD_LZMA=y -CONFIG_INITRAMFS_COMPRESSION_GZIP=y -CONFIG_KALLSYMS_ALL=y -CONFIG_EMBEDDED=y -CONFIG_PERF_EVENTS=y -CONFIG_PROFILING=y -CONFIG_OPROFILE=y -CONFIG_KPROBES=y -CONFIG_MODULES=y -CONFIG_MODULE_FORCE_LOAD=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODULE_FORCE_UNLOAD=y -CONFIG_MODVERSIONS=y -CONFIG_MODULE_SRCVERSION_ALL=y -CONFIG_SCOM_DEBUGFS=y -CONFIG_PPC_A2_DD2=y -CONFIG_KVM_GUEST=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_HZ_100=y -# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set -CONFIG_BINFMT_MISC=y -CONFIG_NUMA=y -# CONFIG_MIGRATION is not set -CONFIG_PPC_64K_PAGES=y -CONFIG_SCHED_SMT=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" -# CONFIG_SECCOMP is not set -CONFIG_PCIEPORTBUS=y -# CONFIG_PCIEASPM is not set -CONFIG_PCI_MSI=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_XFRM_USER=m -CONFIG_XFRM_SUB_POLICY=y -CONFIG_XFRM_STATISTICS=y -CONFIG_NET_KEY=m -CONFIG_NET_KEY_MIGRATE=y -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_IP_PNP_BOOTP=y -CONFIG_NET_IPIP=y -CONFIG_IP_MROUTE=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -CONFIG_SYN_COOKIES=y -CONFIG_INET_AH=m -CONFIG_INET_ESP=m -CONFIG_INET_IPCOMP=m -CONFIG_IPV6=y -CONFIG_IPV6_PRIVACY=y -CONFIG_IPV6_ROUTER_PREF=y -CONFIG_IPV6_ROUTE_INFO=y -CONFIG_IPV6_OPTIMISTIC_DAD=y -CONFIG_INET6_AH=y -CONFIG_INET6_ESP=y -CONFIG_INET6_IPCOMP=y -CONFIG_IPV6_MIP6=y -CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=y -CONFIG_IPV6_TUNNEL=y -CONFIG_IPV6_MULTIPLE_TABLES=y -CONFIG_IPV6_SUBTREES=y -CONFIG_IPV6_MROUTE=y -CONFIG_IPV6_PIMSM_V2=y -CONFIG_NETFILTER=y -CONFIG_NF_CONNTRACK=m -CONFIG_NF_CONNTRACK_EVENTS=y -CONFIG_NF_CT_PROTO_UDPLITE=m -CONFIG_NF_CONNTRACK_FTP=m -CONFIG_NF_CONNTRACK_IRC=m -CONFIG_NF_CONNTRACK_TFTP=m -CONFIG_NF_CT_NETLINK=m -CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m -CONFIG_NETFILTER_XT_TARGET_CONNMARK=m -CONFIG_NETFILTER_XT_TARGET_MARK=m -CONFIG_NETFILTER_XT_TARGET_NFLOG=m -CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_TCPMSS=m -CONFIG_NETFILTER_XT_MATCH_COMMENT=m -CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m -CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m -CONFIG_NETFILTER_XT_MATCH_CONNMARK=m -CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m -CONFIG_NETFILTER_XT_MATCH_DCCP=m -CONFIG_NETFILTER_XT_MATCH_DSCP=m -CONFIG_NETFILTER_XT_MATCH_ESP=m -CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m -CONFIG_NETFILTER_XT_MATCH_HELPER=m -CONFIG_NETFILTER_XT_MATCH_IPRANGE=m -CONFIG_NETFILTER_XT_MATCH_LENGTH=m -CONFIG_NETFILTER_XT_MATCH_LIMIT=m -CONFIG_NETFILTER_XT_MATCH_MAC=m -CONFIG_NETFILTER_XT_MATCH_MARK=m -CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m -CONFIG_NETFILTER_XT_MATCH_OWNER=m -CONFIG_NETFILTER_XT_MATCH_POLICY=m -CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m -CONFIG_NETFILTER_XT_MATCH_QUOTA=m -CONFIG_NETFILTER_XT_MATCH_RATEEST=m -CONFIG_NETFILTER_XT_MATCH_REALM=m -CONFIG_NETFILTER_XT_MATCH_RECENT=m -CONFIG_NETFILTER_XT_MATCH_SCTP=m -CONFIG_NETFILTER_XT_MATCH_STATE=m -CONFIG_NETFILTER_XT_MATCH_STATISTIC=m -CONFIG_NETFILTER_XT_MATCH_STRING=m -CONFIG_NETFILTER_XT_MATCH_TCPMSS=m -CONFIG_NETFILTER_XT_MATCH_TIME=m -CONFIG_NETFILTER_XT_MATCH_U32=m -CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_IP_NF_QUEUE=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_AH=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m -CONFIG_NF_NAT=m -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_NETMAP=m -CONFIG_IP_NF_TARGET_REDIRECT=m -CONFIG_NET_TCPPROBE=y -# CONFIG_WIRELESS is not set -CONFIG_NET_9P=y -CONFIG_NET_9P_DEBUG=y -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_DEVTMPFS=y -CONFIG_MTD=y -CONFIG_MTD_CHAR=y -CONFIG_MTD_BLOCK=y -CONFIG_MTD_CFI=y -CONFIG_MTD_CFI_ADV_OPTIONS=y -CONFIG_MTD_CFI_LE_BYTE_SWAP=y -CONFIG_MTD_CFI_INTELEXT=y -CONFIG_MTD_CFI_AMDSTD=y -CONFIG_MTD_CFI_STAA=y -CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_CRYPTOLOOP=y -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=65536 -CONFIG_CDROM_PKTCDVD=y -CONFIG_MISC_DEVICES=y -CONFIG_BLK_DEV_SD=y -CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y -CONFIG_CHR_DEV_SG=y -CONFIG_SCSI_MULTI_LUN=y -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_SPI_ATTRS=y -CONFIG_SCSI_FC_ATTRS=y -CONFIG_SCSI_ISCSI_ATTRS=m -CONFIG_SCSI_SAS_ATTRS=m -CONFIG_SCSI_SRP_ATTRS=y -CONFIG_ATA=y -CONFIG_SATA_AHCI=y -CONFIG_SATA_SIL24=y -CONFIG_SATA_MV=y -CONFIG_SATA_SIL=y -CONFIG_PATA_CMD64X=y -CONFIG_PATA_MARVELL=y -CONFIG_PATA_SIL680=y -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=y -CONFIG_BLK_DEV_DM=y -CONFIG_DM_CRYPT=y -CONFIG_DM_SNAPSHOT=y -CONFIG_DM_MIRROR=y -CONFIG_DM_ZERO=y -CONFIG_DM_UEVENT=y -CONFIG_NETDEVICES=y -CONFIG_TUN=y -CONFIG_E1000E=y -CONFIG_TIGON3=y -# CONFIG_WLAN is not set -# CONFIG_INPUT is not set -# CONFIG_SERIO is not set -# CONFIG_VT is not set -CONFIG_DEVPTS_MULTIPLE_INSTANCES=y -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_HW_RANDOM=y -CONFIG_RAW_DRIVER=y -CONFIG_MAX_RAW_DEVS=1024 -# CONFIG_HWMON is not set -# CONFIG_VGA_ARB is not set -# CONFIG_USB_SUPPORT is not set -CONFIG_EDAC=y -CONFIG_EDAC_MM_EDAC=y -CONFIG_RTC_CLASS=y -CONFIG_RTC_DRV_DS1511=y -CONFIG_RTC_DRV_DS1553=y -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT2_FS_XIP=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y -CONFIG_EXT4_FS=y -# CONFIG_DNOTIFY is not set -CONFIG_FUSE_FS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_UDF_FS=m -CONFIG_MSDOS_FS=y -CONFIG_VFAT_FS=y -CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y -CONFIG_TMPFS_POSIX_ACL=y -CONFIG_CONFIGFS_FS=m -CONFIG_CRAMFS=y -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -CONFIG_NFS_V3_ACL=y -CONFIG_NFS_V4=y -CONFIG_NFS_V4_1=y -CONFIG_ROOT_NFS=y -CONFIG_CIFS=y -CONFIG_CIFS_WEAK_PW_HASH=y -CONFIG_CIFS_XATTR=y -CONFIG_CIFS_POSIX=y -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_ASCII=y -CONFIG_NLS_ISO8859_1=y -CONFIG_CRC_CCITT=m -CONFIG_CRC_T10DIF=y -CONFIG_LIBCRC32C=m -CONFIG_PRINTK_TIME=y -CONFIG_MAGIC_SYSRQ=y -CONFIG_STRIP_ASM_SYMS=y -CONFIG_DETECT_HUNG_TASK=y -# CONFIG_SCHED_DEBUG is not set -CONFIG_DEBUG_INFO=y -CONFIG_FTRACE_SYSCALLS=y -CONFIG_PPC_EMULATED_STATS=y -CONFIG_XMON=y -CONFIG_XMON_DEFAULT=y -CONFIG_IRQ_DOMAIN_DEBUG=y -CONFIG_PPC_EARLY_DEBUG=y -CONFIG_KEYS_DEBUG_PROC_KEYS=y -CONFIG_CRYPTO_NULL=m -CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CCM=m -CONFIG_CRYPTO_GCM=m -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_SHA256=m -CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_TGR192=m -CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_AES=m -CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_CAST5=m -CONFIG_CRYPTO_CAST6=m -CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SALSA20=m -CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_TEA=m -CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_LZO=m -# CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_VIRTUALIZATION=y diff --git a/arch/powerpc/include/asm/cpm2.h b/arch/powerpc/include/asm/cpm2.h index f42e9baf3a4..7c8608b0969 100644 --- a/arch/powerpc/include/asm/cpm2.h +++ b/arch/powerpc/include/asm/cpm2.h @@ -489,7 +489,6 @@ typedef struct scc_trans { #define FCC_GFMR_TCI ((uint)0x20000000) #define FCC_GFMR_TRX ((uint)0x10000000) #define FCC_GFMR_TTX ((uint)0x08000000) -#define FCC_GFMR_TTX ((uint)0x08000000) #define FCC_GFMR_CDP ((uint)0x04000000) #define FCC_GFMR_CTSP ((uint)0x02000000) #define FCC_GFMR_CDS ((uint)0x01000000) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index b76f58c124c..fab7743c264 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -254,6 +254,7 @@ void *eeh_pe_traverse(struct eeh_pe *root, void *eeh_pe_dev_traverse(struct eeh_pe *root, eeh_traverse_func fn, void *flag); void eeh_pe_restore_bars(struct eeh_pe *pe); +const char *eeh_pe_loc_get(struct eeh_pe *pe); struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe); void *eeh_dev_init(struct device_node *dn, void *data); diff --git a/arch/powerpc/include/asm/eeh_event.h b/arch/powerpc/include/asm/eeh_event.h index 89d5670b2ee..1e551a2d6f8 100644 --- a/arch/powerpc/include/asm/eeh_event.h +++ b/arch/powerpc/include/asm/eeh_event.h @@ -33,7 +33,7 @@ struct eeh_event { int eeh_event_init(void); int eeh_send_failure_event(struct eeh_pe *pe); -void eeh_remove_event(struct eeh_pe *pe); +void eeh_remove_event(struct eeh_pe *pe, bool force); void eeh_handle_event(struct eeh_pe *pe); #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h index 901dac6b6cb..d0918e09557 100644 --- a/arch/powerpc/include/asm/mmu-book3e.h +++ b/arch/powerpc/include/asm/mmu-book3e.h @@ -223,10 +223,6 @@ typedef struct { unsigned int id; unsigned int active; unsigned long vdso_base; -#ifdef CONFIG_PPC_ICSWX - struct spinlock *cop_lockp; /* guard cop related stuff */ - unsigned long acop; /* mask of enabled coprocessor types */ -#endif /* CONFIG_PPC_ICSWX */ #ifdef CONFIG_PPC_MM_SLICES u64 low_slices_psize; /* SLB page size encodings */ u64 high_slices_psize; /* 4 bits per slice for now */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index cb15cbb5160..460018889ba 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -599,9 +599,9 @@ enum { }; struct OpalIoPhbErrorCommon { - uint32_t version; - uint32_t ioType; - uint32_t len; + __be32 version; + __be32 ioType; + __be32 len; }; struct OpalIoP7IOCPhbErrorData { @@ -666,64 +666,64 @@ struct OpalIoP7IOCPhbErrorData { struct OpalIoPhb3ErrorData { struct OpalIoPhbErrorCommon common; - uint32_t brdgCtl; + __be32 brdgCtl; /* PHB3 UTL regs */ - uint32_t portStatusReg; - uint32_t rootCmplxStatus; - uint32_t busAgentStatus; + __be32 portStatusReg; + __be32 rootCmplxStatus; + __be32 busAgentStatus; /* PHB3 cfg regs */ - uint32_t deviceStatus; - uint32_t slotStatus; - uint32_t linkStatus; - uint32_t devCmdStatus; - uint32_t devSecStatus; + __be32 deviceStatus; + __be32 slotStatus; + __be32 linkStatus; + __be32 devCmdStatus; + __be32 devSecStatus; /* cfg AER regs */ - uint32_t rootErrorStatus; - uint32_t uncorrErrorStatus; - uint32_t corrErrorStatus; - uint32_t tlpHdr1; - uint32_t tlpHdr2; - uint32_t tlpHdr3; - uint32_t tlpHdr4; - uint32_t sourceId; + __be32 rootErrorStatus; + __be32 uncorrErrorStatus; + __be32 corrErrorStatus; + __be32 tlpHdr1; + __be32 tlpHdr2; + __be32 tlpHdr3; + __be32 tlpHdr4; + __be32 sourceId; - uint32_t rsv3; + __be32 rsv3; /* Record data about the call to allocate a buffer */ - uint64_t errorClass; - uint64_t correlator; + __be64 errorClass; + __be64 correlator; - uint64_t nFir; /* 000 */ - uint64_t nFirMask; /* 003 */ - uint64_t nFirWOF; /* 008 */ + __be64 nFir; /* 000 */ + __be64 nFirMask; /* 003 */ + __be64 nFirWOF; /* 008 */ /* PHB3 MMIO Error Regs */ - uint64_t phbPlssr; /* 120 */ - uint64_t phbCsr; /* 110 */ - uint64_t lemFir; /* C00 */ - uint64_t lemErrorMask; /* C18 */ - uint64_t lemWOF; /* C40 */ - uint64_t phbErrorStatus; /* C80 */ - uint64_t phbFirstErrorStatus; /* C88 */ - uint64_t phbErrorLog0; /* CC0 */ - uint64_t phbErrorLog1; /* CC8 */ - uint64_t mmioErrorStatus; /* D00 */ - uint64_t mmioFirstErrorStatus; /* D08 */ - uint64_t mmioErrorLog0; /* D40 */ - uint64_t mmioErrorLog1; /* D48 */ - uint64_t dma0ErrorStatus; /* D80 */ - uint64_t dma0FirstErrorStatus; /* D88 */ - uint64_t dma0ErrorLog0; /* DC0 */ - uint64_t dma0ErrorLog1; /* DC8 */ - uint64_t dma1ErrorStatus; /* E00 */ - uint64_t dma1FirstErrorStatus; /* E08 */ - uint64_t dma1ErrorLog0; /* E40 */ - uint64_t dma1ErrorLog1; /* E48 */ - uint64_t pestA[OPAL_PHB3_NUM_PEST_REGS]; - uint64_t pestB[OPAL_PHB3_NUM_PEST_REGS]; + __be64 phbPlssr; /* 120 */ + __be64 phbCsr; /* 110 */ + __be64 lemFir; /* C00 */ + __be64 lemErrorMask; /* C18 */ + __be64 lemWOF; /* C40 */ + __be64 phbErrorStatus; /* C80 */ + __be64 phbFirstErrorStatus; /* C88 */ + __be64 phbErrorLog0; /* CC0 */ + __be64 phbErrorLog1; /* CC8 */ + __be64 mmioErrorStatus; /* D00 */ + __be64 mmioFirstErrorStatus; /* D08 */ + __be64 mmioErrorLog0; /* D40 */ + __be64 mmioErrorLog1; /* D48 */ + __be64 dma0ErrorStatus; /* D80 */ + __be64 dma0FirstErrorStatus; /* D88 */ + __be64 dma0ErrorLog0; /* DC0 */ + __be64 dma0ErrorLog1; /* DC8 */ + __be64 dma1ErrorStatus; /* E00 */ + __be64 dma1FirstErrorStatus; /* E08 */ + __be64 dma1ErrorLog0; /* E40 */ + __be64 dma1ErrorLog1; /* E48 */ + __be64 pestA[OPAL_PHB3_NUM_PEST_REGS]; + __be64 pestB[OPAL_PHB3_NUM_PEST_REGS]; }; enum { @@ -851,8 +851,8 @@ int64_t opal_pci_mask_pe_error(uint64_t phb_id, uint16_t pe_number, uint8_t erro int64_t opal_set_slot_led_status(uint64_t phb_id, uint64_t slot_id, uint8_t led_type, uint8_t led_action); int64_t opal_get_epow_status(__be64 *status); int64_t opal_set_system_attention_led(uint8_t led_action); -int64_t opal_pci_next_error(uint64_t phb_id, uint64_t *first_frozen_pe, - uint16_t *pci_error_type, uint16_t *severity); +int64_t opal_pci_next_error(uint64_t phb_id, __be64 *first_frozen_pe, + __be16 *pci_error_type, __be16 *severity); int64_t opal_pci_poll(uint64_t phb_id); int64_t opal_return_cpu(void); int64_t opal_reinit_cpus(uint64_t flags); diff --git a/arch/powerpc/include/asm/reg_a2.h b/arch/powerpc/include/asm/reg_a2.h index 3d52a1132f3..3ba9c6f096f 100644 --- a/arch/powerpc/include/asm/reg_a2.h +++ b/arch/powerpc/include/asm/reg_a2.h @@ -110,15 +110,6 @@ #define TLB1_UR ASM_CONST(0x0000000000000002) #define TLB1_SR ASM_CONST(0x0000000000000001) -#ifdef CONFIG_PPC_EARLY_DEBUG_WSP -#define WSP_UART_PHYS 0xffc000c000 -/* This needs to be careful chosen to hit a !0 congruence class - * in the TLB since we bolt it in way 3, which is already occupied - * by our linear mapping primary bolted entry in CC 0. - */ -#define WSP_UART_VIRT 0xf000000000001000 -#endif - /* A2 erativax attributes definitions */ #define ERATIVAX_RS_IS_ALL 0x000 #define ERATIVAX_RS_IS_TID 0x040 diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 0e83e7d8c73..58abeda64cb 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -16,13 +16,15 @@ struct thread_struct; extern struct task_struct *_switch(struct thread_struct *prev, struct thread_struct *next); #ifdef CONFIG_PPC_BOOK3S_64 -static inline void save_tar(struct thread_struct *prev) +static inline void save_early_sprs(struct thread_struct *prev) { if (cpu_has_feature(CPU_FTR_ARCH_207S)) prev->tar = mfspr(SPRN_TAR); + if (cpu_has_feature(CPU_FTR_DSCR)) + prev->dscr = mfspr(SPRN_DSCR); } #else -static inline void save_tar(struct thread_struct *prev) {} +static inline void save_early_sprs(struct thread_struct *prev) {} #endif extern void enable_kernel_fp(void); @@ -84,6 +86,8 @@ static inline void clear_task_ebb(struct task_struct *t) { #ifdef CONFIG_PPC_BOOK3S_64 /* EBB perf events are not inherited, so clear all EBB state. */ + t->thread.ebbrr = 0; + t->thread.ebbhr = 0; t->thread.bescr = 0; t->thread.mmcr2 = 0; t->thread.mmcr0 = 0; diff --git a/arch/powerpc/include/asm/wsp.h b/arch/powerpc/include/asm/wsp.h deleted file mode 100644 index c7dc83088a3..00000000000 --- a/arch/powerpc/include/asm/wsp.h +++ /dev/null @@ -1,14 +0,0 @@ -/* - * Copyright 2011 Michael Ellerman, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#ifndef __ASM_POWERPC_WSP_H -#define __ASM_POWERPC_WSP_H - -extern int wsp_get_chip_id(struct device_node *dn); - -#endif /* __ASM_POWERPC_WSP_H */ diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h index 5b7657959fa..de2c0e4ee1a 100644 --- a/arch/powerpc/include/uapi/asm/cputable.h +++ b/arch/powerpc/include/uapi/asm/cputable.h @@ -41,5 +41,6 @@ #define PPC_FEATURE2_EBB 0x10000000 #define PPC_FEATURE2_ISEL 0x08000000 #define PPC_FEATURE2_TAR 0x04000000 +#define PPC_FEATURE2_VEC_CRYPTO 0x02000000 #endif /* _UAPI__ASM_POWERPC_CPUTABLE_H */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index fab19ec2559..670c312d914 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -43,7 +43,6 @@ obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o obj64-$(CONFIG_RELOCATABLE) += reloc_64.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o -obj-$(CONFIG_PPC_A2) += cpu_setup_a2.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o obj-$(CONFIG_PPC_970_NAP) += idle_power4.o diff --git a/arch/powerpc/kernel/cpu_setup_a2.S b/arch/powerpc/kernel/cpu_setup_a2.S deleted file mode 100644 index 61f079e05b6..00000000000 --- a/arch/powerpc/kernel/cpu_setup_a2.S +++ /dev/null @@ -1,120 +0,0 @@ -/* - * A2 specific assembly support code - * - * Copyright 2009 Ben Herrenschmidt, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <asm/asm-offsets.h> -#include <asm/ppc_asm.h> -#include <asm/ppc-opcode.h> -#include <asm/processor.h> -#include <asm/reg_a2.h> -#include <asm/reg.h> -#include <asm/thread_info.h> - -/* - * Disable thdid and class fields in ERATs to bump PID to full 14 bits capacity. - * This also prevents external LPID accesses but that isn't a problem when not a - * guest. Under PV, this setting will be ignored and MMUCR will return the right - * number of PID bits we can use. - */ -#define MMUCR1_EXTEND_PID \ - (MMUCR1_ICTID | MMUCR1_ITTID | MMUCR1_DCTID | \ - MMUCR1_DTTID | MMUCR1_DCCD) - -/* - * Use extended PIDs if enabled. - * Don't clear the ERATs on context sync events and enable I & D LRU. - * Enable ERAT back invalidate when tlbwe overwrites an entry. - */ -#define INITIAL_MMUCR1 \ - (MMUCR1_EXTEND_PID | MMUCR1_CSINV_NEVER | MMUCR1_IRRE | \ - MMUCR1_DRRE | MMUCR1_TLBWE_BINV) - -_GLOBAL(__setup_cpu_a2) - /* Some of these are actually thread local and some are - * core local but doing it always won't hurt - */ - -#ifdef CONFIG_PPC_ICSWX - /* Make sure ACOP starts out as zero */ - li r3,0 - mtspr SPRN_ACOP,r3 - - /* Skip the following if we are in Guest mode */ - mfmsr r3 - andis. r0,r3,MSR_GS@h - bne _icswx_skip_guest - - /* Enable icswx instruction */ - mfspr r3,SPRN_A2_CCR2 - ori r3,r3,A2_CCR2_ENABLE_ICSWX - mtspr SPRN_A2_CCR2,r3 - - /* Unmask all CTs in HACOP */ - li r3,-1 - mtspr SPRN_HACOP,r3 -_icswx_skip_guest: -#endif /* CONFIG_PPC_ICSWX */ - - /* Enable doorbell */ - mfspr r3,SPRN_A2_CCR2 - oris r3,r3,A2_CCR2_ENABLE_PC@h - mtspr SPRN_A2_CCR2,r3 - isync - - /* Setup CCR0 to disable power saving for now as it's busted - * in the current implementations. Setup CCR1 to wake on - * interrupts normally (we write the default value but who - * knows what FW may have clobbered...) - */ - li r3,0 - mtspr SPRN_A2_CCR0, r3 - LOAD_REG_IMMEDIATE(r3,0x0f0f0f0f) - mtspr SPRN_A2_CCR1, r3 - - /* Initialise MMUCR1 */ - lis r3,INITIAL_MMUCR1@h - ori r3,r3,INITIAL_MMUCR1@l - mtspr SPRN_MMUCR1,r3 - - /* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */ - LOAD_REG_IMMEDIATE(r3, 0x000a7531) - mtspr SPRN_MMUCR2,r3 - - /* Set MMUCR3 to write all thids bit to the TLB */ - LOAD_REG_IMMEDIATE(r3, 0x0000000f) - mtspr SPRN_MMUCR3,r3 - - /* Don't do ERAT stuff if running guest mode */ - mfmsr r3 - andis. r0,r3,MSR_GS@h - bne 1f - - /* Now set the I-ERAT watermark to 15 */ - lis r4,(MMUCR0_TLBSEL_I|MMUCR0_ECL)@h - mtspr SPRN_MMUCR0, r4 - li r4,A2_IERAT_SIZE-1 - PPC_ERATWE(R4,R4,3) - - /* Now set the D-ERAT watermark to 31 */ - lis r4,(MMUCR0_TLBSEL_D|MMUCR0_ECL)@h - mtspr SPRN_MMUCR0, r4 - li r4,A2_DERAT_SIZE-1 - PPC_ERATWE(R4,R4,3) - - /* And invalidate the beast just in case. That won't get rid of - * a bolted entry though it will be in LRU and so will go away eventually - * but let's not bother for now - */ - PPC_ERATILX(0,0,R0) -1: - blr - -_GLOBAL(__restore_cpu_a2) - b __setup_cpu_a2 diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 1557e7c2c7e..46733535cc0 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -56,6 +56,7 @@ _GLOBAL(__setup_cpu_power8) li r0,0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR + ori r3, r3, LPCR_PECEDH bl __init_LPCR bl __init_HFSCR bl __init_tlb_power8 @@ -74,6 +75,7 @@ _GLOBAL(__restore_cpu_power8) li r0,0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR + ori r3, r3, LPCR_PECEDH bl __init_LPCR bl __init_HFSCR bl __init_tlb_power8 diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index c1faade6506..965291b4c2f 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -109,7 +109,8 @@ extern void __restore_cpu_e6500(void); PPC_FEATURE_PSERIES_PERFMON_COMPAT) #define COMMON_USER2_POWER8 (PPC_FEATURE2_ARCH_2_07 | \ PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_DSCR | \ - PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR) + PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \ + PPC_FEATURE2_VEC_CRYPTO) #define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\ PPC_FEATURE_TRUE_LE | \ PPC_FEATURE_HAS_ALTIVEC_COMP) @@ -2148,44 +2149,6 @@ static struct cpu_spec __initdata cpu_specs[] = { } #endif /* CONFIG_PPC32 */ #endif /* CONFIG_E500 */ - -#ifdef CONFIG_PPC_A2 - { /* Standard A2 (>= DD2) + FPU core */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00480000, - .cpu_name = "A2 (>= DD2)", - .cpu_features = CPU_FTRS_A2, - .cpu_user_features = COMMON_USER_PPC64, - .mmu_features = MMU_FTRS_A2, - .icache_bsize = 64, - .dcache_bsize = 64, - .num_pmcs = 0, - .cpu_setup = __setup_cpu_a2, - .cpu_restore = __restore_cpu_a2, - .machine_check = machine_check_generic, - .platform = "ppca2", - }, - { /* This is a default entry to get going, to be replaced by - * a real one at some stage - */ -#define CPU_FTRS_BASE_BOOK3E (CPU_FTR_USE_TB | \ - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_SMT | \ - CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) - .pvr_mask = 0x00000000, - .pvr_value = 0x00000000, - .cpu_name = "Book3E", - .cpu_features = CPU_FTRS_BASE_BOOK3E, - .cpu_user_features = COMMON_USER_PPC64, - .mmu_features = MMU_FTR_TYPE_3E | MMU_FTR_USE_TLBILX | - MMU_FTR_USE_TLBIVAX_BCAST | - MMU_FTR_LOCK_BCAST_INVAL, - .icache_bsize = 64, - .dcache_bsize = 64, - .num_pmcs = 0, - .machine_check = machine_check_generic, - .platform = "power6", - }, -#endif /* CONFIG_PPC_A2 */ }; static struct cpu_spec the_cpu_spec; diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 7051ea3101b..86e25702aac 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -330,8 +330,8 @@ static int eeh_phb_check_failure(struct eeh_pe *pe) eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); eeh_serialize_unlock(flags); - pr_err("EEH: PHB#%x failure detected\n", - phb_pe->phb->global_number); + pr_err("EEH: PHB#%x failure detected, location: %s\n", + phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe)); dump_stack(); eeh_send_failure_event(phb_pe); @@ -358,10 +358,11 @@ out: int eeh_dev_check_failure(struct eeh_dev *edev) { int ret; + int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); unsigned long flags; struct device_node *dn; struct pci_dev *dev; - struct eeh_pe *pe; + struct eeh_pe *pe, *parent_pe, *phb_pe; int rc = 0; const char *location; @@ -439,14 +440,34 @@ int eeh_dev_check_failure(struct eeh_dev *edev) */ if ((ret < 0) || (ret == EEH_STATE_NOT_SUPPORT) || - (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == - (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { + ((ret & active_flags) == active_flags)) { eeh_stats.false_positives++; pe->false_positives++; rc = 0; goto dn_unlock; } + /* + * It should be corner case that the parent PE has been + * put into frozen state as well. We should take care + * that at first. + */ + parent_pe = pe->parent; + while (parent_pe) { + /* Hit the ceiling ? */ + if (parent_pe->type & EEH_PE_PHB) + break; + + /* Frozen parent PE ? */ + ret = eeh_ops->get_state(parent_pe, NULL); + if (ret > 0 && + (ret & active_flags) != active_flags) + pe = parent_pe; + + /* Next parent level */ + parent_pe = parent_pe->parent; + } + eeh_stats.slot_resets++; /* Avoid repeated reports of this failure, including problems @@ -460,8 +481,11 @@ int eeh_dev_check_failure(struct eeh_dev *edev) * a stack trace will help the device-driver authors figure * out what happened. So print that out. */ - pr_err("EEH: Frozen PE#%x detected on PHB#%x\n", - pe->addr, pe->phb->global_number); + phb_pe = eeh_phb_pe_get(pe->phb); + pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", + pe->phb->global_number, pe->addr); + pr_err("EEH: PE location: %s, PHB location: %s\n", + eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe)); dump_stack(); eeh_send_failure_event(pe); diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 7100a5b96e7..420da61d4ce 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -447,8 +447,9 @@ static void *eeh_pe_detach_dev(void *data, void *userdata) * PE reset (for 3 times), we try to clear the frozen state * for 3 times as well. */ -static int eeh_clear_pe_frozen_state(struct eeh_pe *pe) +static void *__eeh_clear_pe_frozen_state(void *data, void *flag) { + struct eeh_pe *pe = (struct eeh_pe *)data; int i, rc; for (i = 0; i < 3; i++) { @@ -461,13 +462,24 @@ static int eeh_clear_pe_frozen_state(struct eeh_pe *pe) } /* The PE has been isolated, clear it */ - if (rc) + if (rc) { pr_warn("%s: Can't clear frozen PHB#%x-PE#%x (%d)\n", __func__, pe->phb->global_number, pe->addr, rc); - else + return (void *)pe; + } + + return NULL; +} + +static int eeh_clear_pe_frozen_state(struct eeh_pe *pe) +{ + void *rc; + + rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, NULL); + if (!rc) eeh_pe_state_clear(pe, EEH_PE_ISOLATED); - return rc; + return rc ? -EIO : 0; } /** @@ -758,7 +770,7 @@ static void eeh_handle_special_event(void) eeh_serialize_lock(&flags); /* Purge all events */ - eeh_remove_event(NULL); + eeh_remove_event(NULL, true); list_for_each_entry(hose, &hose_list, list_node) { phb_pe = eeh_phb_pe_get(hose); @@ -777,7 +789,7 @@ static void eeh_handle_special_event(void) eeh_serialize_lock(&flags); /* Purge all events of the PHB */ - eeh_remove_event(pe); + eeh_remove_event(pe, true); if (rc == EEH_NEXT_ERR_DEAD_PHB) eeh_pe_state_mark(pe, EEH_PE_ISOLATED); diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c index 72d748b56c8..4eefb6e34db 100644 --- a/arch/powerpc/kernel/eeh_event.c +++ b/arch/powerpc/kernel/eeh_event.c @@ -152,24 +152,33 @@ int eeh_send_failure_event(struct eeh_pe *pe) /** * eeh_remove_event - Remove EEH event from the queue * @pe: Event binding to the PE + * @force: Event will be removed unconditionally * * On PowerNV platform, we might have subsequent coming events * is part of the former one. For that case, those subsequent * coming events are totally duplicated and unnecessary, thus * they should be removed. */ -void eeh_remove_event(struct eeh_pe *pe) +void eeh_remove_event(struct eeh_pe *pe, bool force) { unsigned long flags; struct eeh_event *event, *tmp; + /* + * If we have NULL PE passed in, we have dead IOC + * or we're sure we can report all existing errors + * by the caller. + * + * With "force", the event with associated PE that + * have been isolated, the event won't be removed + * to avoid event lost. + */ spin_lock_irqsave(&eeh_eventlist_lock, flags); list_for_each_entry_safe(event, tmp, &eeh_eventlist, list) { - /* - * If we don't have valid PE passed in, that means - * we already have event corresponding to dead IOC - * and all events should be purged. - */ + if (!force && event->pe && + (event->pe->state & EEH_PE_ISOLATED)) + continue; + if (!pe) { list_del(&event->list); kfree(event); diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 995c2a28463..fbd01eba447 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -792,6 +792,66 @@ void eeh_pe_restore_bars(struct eeh_pe *pe) } /** + * eeh_pe_loc_get - Retrieve location code binding to the given PE + * @pe: EEH PE + * + * Retrieve the location code of the given PE. If the primary PE bus + * is root bus, we will grab location code from PHB device tree node + * or root port. Otherwise, the upstream bridge's device tree node + * of the primary PE bus will be checked for the location code. + */ +const char *eeh_pe_loc_get(struct eeh_pe *pe) +{ + struct pci_controller *hose; + struct pci_bus *bus = eeh_pe_bus_get(pe); + struct pci_dev *pdev; + struct device_node *dn; + const char *loc; + + if (!bus) + return "N/A"; + + /* PHB PE or root PE ? */ + if (pci_is_root_bus(bus)) { + hose = pci_bus_to_host(bus); + loc = of_get_property(hose->dn, + "ibm,loc-code", NULL); + if (loc) + return loc; + loc = of_get_property(hose->dn, + "ibm,io-base-loc-code", NULL); + if (loc) + return loc; + + pdev = pci_get_slot(bus, 0x0); + } else { + pdev = bus->self; + } + + if (!pdev) { + loc = "N/A"; + goto out; + } + + dn = pci_device_to_OF_node(pdev); + if (!dn) { + loc = "N/A"; + goto out; + } + + loc = of_get_property(dn, "ibm,loc-code", NULL); + if (!loc) + loc = of_get_property(dn, "ibm,slot-location-code", NULL); + if (!loc) + loc = "N/A"; + +out: + if (pci_is_root_bus(bus) && pdev) + pci_dev_put(pdev); + return loc; +} + +/** * eeh_pe_bus_get - Retrieve PCI bus according to the given PE * @pe: EEH PE * diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 911d45366f5..6528c5e2cc4 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -428,12 +428,6 @@ BEGIN_FTR_SECTION std r24,THREAD_VRSAVE(r3) END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_PPC64 -BEGIN_FTR_SECTION - mfspr r25,SPRN_DSCR - std r25,THREAD_DSCR(r3) -END_FTR_SECTION_IFSET(CPU_FTR_DSCR) -#endif and. r0,r0,r22 beq+ 1f andc r22,r22,r0 diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 771b4e92e5d..bb9cac6c805 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -1467,22 +1467,6 @@ a2_tlbinit_after_linear_map: .globl a2_tlbinit_after_iprot_flush a2_tlbinit_after_iprot_flush: -#ifdef CONFIG_PPC_EARLY_DEBUG_WSP - /* Now establish early debug mappings if applicable */ - /* Restore the MAS0 we used for linear mapping load */ - mtspr SPRN_MAS0,r11 - - lis r3,(MAS1_VALID | MAS1_IPROT)@h - ori r3,r3,(BOOK3E_PAGESZ_4K << MAS1_TSIZE_SHIFT) - mtspr SPRN_MAS1,r3 - LOAD_REG_IMMEDIATE(r3, WSP_UART_VIRT | MAS2_I | MAS2_G) - mtspr SPRN_MAS2,r3 - LOAD_REG_IMMEDIATE(r3, WSP_UART_PHYS | MAS3_SR | MAS3_SW) - mtspr SPRN_MAS7_MAS3,r3 - /* re-use the MAS8 value from the linear mapping */ - tlbwe -#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */ - PPC_TLBILX(0,0,R0) sync isync diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 20f11eb4dff..a7d36b19221 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -439,9 +439,9 @@ BEGIN_FTR_SECTION * R9 = CR * Original R9 to R13 is saved on PACA_EXMC * - * Switch to mc_emergency stack and handle re-entrancy (though we - * currently don't test for overflow). Save MCE registers srr1, - * srr0, dar and dsisr and then set ME=1 + * Switch to mc_emergency stack and handle re-entrancy (we limit + * the nested MCE upto level 4 to avoid stack overflow). + * Save MCE registers srr1, srr0, dar and dsisr and then set ME=1 * * We use paca->in_mce to check whether this is the first entry or * nested machine check. We increment paca->in_mce to track nested @@ -464,6 +464,9 @@ BEGIN_FTR_SECTION 0: subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ addi r10,r10,1 /* increment paca->in_mce */ sth r10,PACA_IN_MCE(r13) + /* Limit nested MCE to level 4 to avoid stack overflow */ + cmpwi r10,4 + bgt 2f /* Check if we hit limit of 4 */ std r11,GPR1(r1) /* Save r1 on the stack. */ std r11,0(r1) /* make stack chain pointer */ mfspr r11,SPRN_SRR0 /* Save SRR0 */ @@ -482,10 +485,23 @@ BEGIN_FTR_SECTION ori r11,r11,MSR_RI /* turn on RI bit */ ld r12,PACAKBASE(r13) /* get high part of &label */ LOAD_HANDLER(r12, machine_check_handle_early) - mtspr SPRN_SRR0,r12 +1: mtspr SPRN_SRR0,r12 mtspr SPRN_SRR1,r11 rfid b . /* prevent speculative execution */ +2: + /* Stack overflow. Stay on emergency stack and panic. + * Keep the ME bit off while panic-ing, so that if we hit + * another machine check we checkstop. + */ + addi r1,r1,INT_FRAME_SIZE /* go back to previous stack frame */ + ld r11,PACAKMSR(r13) + ld r12,PACAKBASE(r13) + LOAD_HANDLER(r12, unrecover_mce) + li r10,MSR_ME + andc r11,r11,r10 /* Turn off MSR_ME */ + b 1b + b . /* prevent speculative execution */ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) machine_check_pSeries: @@ -1389,6 +1405,7 @@ machine_check_handle_early: bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl machine_check_early + std r3,RESULT(r1) /* Save result */ ld r12,_MSR(r1) #ifdef CONFIG_PPC_P7_NAP /* @@ -1443,11 +1460,33 @@ machine_check_handle_early: */ andi. r11,r12,MSR_RI bne 2f -1: addi r3,r1,STACK_FRAME_OVERHEAD - bl unrecoverable_exception - b 1b +1: mfspr r11,SPRN_SRR0 + ld r10,PACAKBASE(r13) + LOAD_HANDLER(r10,unrecover_mce) + mtspr SPRN_SRR0,r10 + ld r10,PACAKMSR(r13) + /* + * We are going down. But there are chances that we might get hit by + * another MCE during panic path and we may run into unstable state + * with no way out. Hence, turn ME bit off while going down, so that + * when another MCE is hit during panic path, system will checkstop + * and hypervisor will get restarted cleanly by SP. + */ + li r3,MSR_ME + andc r10,r10,r3 /* Turn off MSR_ME */ + mtspr SPRN_SRR1,r10 + rfid + b . 2: /* + * Check if we have successfully handled/recovered from error, if not + * then stay on emergency stack and panic. + */ + ld r3,RESULT(r1) /* Load result */ + cmpdi r3,0 /* see if we handled MCE successfully */ + + beq 1b /* if !handled then panic */ + /* * Return from MC interrupt. * Queue up the MCE event so that we can log it later, while * returning from kernel or opal call. @@ -1460,6 +1499,17 @@ machine_check_handle_early: MACHINE_CHECK_HANDLER_WINDUP b machine_check_pSeries +unrecover_mce: + /* Invoke machine_check_exception to print MCE event and panic. */ + addi r3,r1,STACK_FRAME_OVERHEAD + bl machine_check_exception + /* + * We will not reach here. Even if we did, there is no way out. Call + * unrecoverable_exception and die. + */ +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl unrecoverable_exception + b 1b /* * r13 points to the PACA, r9 contains the saved CR, * r12 contain the saved SRR1, SRR0 is still ready for return diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 67ee0d6c107..7d7d8635227 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -930,25 +930,6 @@ initial_mmu: tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */ tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */ -#if defined(CONFIG_SERIAL_TEXT_DEBUG) && defined(SERIAL_DEBUG_IO_BASE) - - /* Load a TLB entry for the UART, so that ppc4xx_progress() can use - * the UARTs nice and early. We use a 4k real==virtual mapping. */ - - lis r3,SERIAL_DEBUG_IO_BASE@h - ori r3,r3,SERIAL_DEBUG_IO_BASE@l - mr r4,r3 - clrrwi r4,r4,12 - ori r4,r4,(TLB_WR|TLB_I|TLB_M|TLB_G) - - clrrwi r3,r3,12 - ori r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_4K)) - - li r0,0 /* TLB slot 0 */ - tlbwe r4,r0,TLB_DATA - tlbwe r3,r0,TLB_TAG -#endif /* CONFIG_SERIAL_DEBUG_TEXT && SERIAL_DEBUG_IO_BASE */ - isync /* Establish the exception vector base diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 8a1edbe26b8..be99774d3f4 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -755,15 +755,15 @@ struct task_struct *__switch_to(struct task_struct *prev, WARN_ON(!irqs_disabled()); - /* Back up the TAR across context switches. + /* Back up the TAR and DSCR across context switches. * Note that the TAR is not available for use in the kernel. (To * provide this, the TAR should be backed up/restored on exception * entry/exit instead, and be in pt_regs. FIXME, this should be in * pt_regs anyway (for debug).) - * Save the TAR here before we do treclaim/trecheckpoint as these - * will change the TAR. + * Save the TAR and DSCR here before we do treclaim/trecheckpoint as + * these will change them. */ - save_tar(&prev->thread); + save_early_sprs(&prev->thread); __switch_to_tm(prev); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index d4d418376f9..e239df3768a 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -471,7 +471,7 @@ void __init smp_setup_cpu_maps(void) for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) { DBG(" thread %d -> cpu %d (hard id %d)\n", j, cpu, be32_to_cpu(intserv[j])); - set_cpu_present(cpu, true); + set_cpu_present(cpu, of_device_is_available(dn)); set_hard_smp_processor_id(cpu, be32_to_cpu(intserv[j])); set_cpu_possible(cpu, true); cpu++; diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 7753af2d261..51a3ff78838 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -749,7 +749,7 @@ int setup_profiling_timer(unsigned int multiplier) /* cpumask of CPUs with asymetric SMT dependancy */ static const int powerpc_smt_flags(void) { - int flags = SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES; + int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 7e711bdcc6d..9fff9cdcc51 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -551,7 +551,7 @@ void timer_interrupt(struct pt_regs * regs) may_hard_irq_enable(); -#if defined(CONFIG_PPC32) && defined(CONFIG_PMAC) +#if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC) if (atomic_read(&ppc_n_lost_interrupts) != 0) do_IRQ(regs); #endif diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 1bd7ca298fa..239f1cde3ff 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -295,6 +295,8 @@ long machine_check_early(struct pt_regs *regs) { long handled = 0; + __get_cpu_var(irq_stat).mce_exceptions++; + if (cur_cpu_spec && cur_cpu_spec->machine_check_early) handled = cur_cpu_spec->machine_check_early(regs); return handled; diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index a15837519dc..b7aa07279a6 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -62,8 +62,6 @@ void __init udbg_early_init(void) udbg_init_cpm(); #elif defined(CONFIG_PPC_EARLY_DEBUG_USBGECKO) udbg_init_usbgecko(); -#elif defined(CONFIG_PPC_EARLY_DEBUG_WSP) - udbg_init_wsp(); #elif defined(CONFIG_PPC_EARLY_DEBUG_MEMCONS) /* In memory console */ udbg_init_memcons(); diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index 75702e207b2..6e7c4923b5e 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -296,14 +296,3 @@ void __init udbg_init_40x_realmode(void) } #endif /* CONFIG_PPC_EARLY_DEBUG_40x */ - - -#ifdef CONFIG_PPC_EARLY_DEBUG_WSP - -void __init udbg_init_wsp(void) -{ - udbg_uart_init_mmio((void *)WSP_UART_VIRT, 1); - udbg_uart_setup(57600, 50000000); -} - -#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */ diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index 768a9f977c0..3a5c568b1e8 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -113,10 +113,8 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) * We assume that if the condition is recovered then linux host * will have generated an error log event that we will pick * up and log later. - * Don't release mce event now. In case if condition is not - * recovered we do guest exit and go back to linux host machine - * check handler. Hence we need make sure that current mce event - * is available for linux host to consume. + * Don't release mce event now. We will queue up the event so that + * we can log the MCE event info on host console. */ if (!get_mce_event(&mce_evt, MCE_EVENT_DONTRELEASE)) goto out; @@ -128,11 +126,12 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) out: /* - * If we have handled the error, then release the mce event because - * we will be delivering machine check to guest. + * We are now going enter guest either through machine check + * interrupt (for unhandled errors) or will continue from + * current HSRR0 (for handled errors) in guest. Hence + * queue up the event so that we can log it from host console later. */ - if (handled) - release_mce_event(); + machine_check_queue_event(); return handled; } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 77356fd25cc..868347ef09f 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2257,15 +2257,28 @@ machine_check_realmode: mr r3, r9 /* get vcpu pointer */ bl kvmppc_realmode_machine_check nop - cmpdi r3, 0 /* continue exiting from guest? */ + cmpdi r3, 0 /* Did we handle MCE ? */ ld r9, HSTATE_KVM_VCPU(r13) li r12, BOOK3S_INTERRUPT_MACHINE_CHECK - beq mc_cont + /* + * Deliver unhandled/fatal (e.g. UE) MCE errors to guest through + * machine check interrupt (set HSRR0 to 0x200). And for handled + * errors (no-fatal), just go back to guest execution with current + * HSRR0 instead of exiting guest. This new approach will inject + * machine check to guest for fatal error causing guest to crash. + * + * The old code used to return to host for unhandled errors which + * was causing guest to hang with soft lockups inside guest and + * makes it difficult to recover guest instance. + */ + ld r10, VCPU_PC(r9) + ld r11, VCPU_MSR(r9) + bne 2f /* Continue guest execution. */ /* If not, deliver a machine check. SRR0/1 are already set */ li r10, BOOK3S_INTERRUPT_MACHINE_CHECK ld r11, VCPU_MSR(r9) bl kvmppc_msr_interrupt - b fast_interrupt_c_return +2: b fast_interrupt_c_return /* * Check the reason we woke from nap, and take appropriate action. diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index c0511c27a73..412dd46dd0b 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1470,7 +1470,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) regs->gpr[rd] = byterev_4(val); goto ldst_done; -#ifdef CONFIG_PPC_CPU +#ifdef CONFIG_PPC_FPU case 535: /* lfsx */ case 567: /* lfsux */ if (!(regs->msr & MSR_FP)) diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index bf9c6d4cd26..391b3f6b54a 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -19,7 +19,6 @@ source "arch/powerpc/platforms/embedded6xx/Kconfig" source "arch/powerpc/platforms/44x/Kconfig" source "arch/powerpc/platforms/40x/Kconfig" source "arch/powerpc/platforms/amigaone/Kconfig" -source "arch/powerpc/platforms/wsp/Kconfig" config KVM_GUEST bool "KVM Guest support" diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 43b65ad1970..a41bd023647 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -148,10 +148,6 @@ config POWER4 depends on PPC64 && PPC_BOOK3S def_bool y -config PPC_A2 - bool - depends on PPC_BOOK3E_64 - config TUNE_CELL bool "Optimize for Cell Broadband Engine" depends on PPC64 && PPC_BOOK3S @@ -280,7 +276,7 @@ config VSX config PPC_ICSWX bool "Support for PowerPC icswx coprocessor instruction" - depends on POWER4 || PPC_A2 + depends on POWER4 default n ---help--- diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile index 879b4a44849..469ef170d21 100644 --- a/arch/powerpc/platforms/Makefile +++ b/arch/powerpc/platforms/Makefile @@ -22,4 +22,3 @@ obj-$(CONFIG_PPC_CELL) += cell/ obj-$(CONFIG_PPC_PS3) += ps3/ obj-$(CONFIG_EMBEDDED6xx) += embedded6xx/ obj-$(CONFIG_AMIGAONE) += amigaone/ -obj-$(CONFIG_PPC_WSP) += wsp/ diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index 0ba3c959835..bcfd6f063ef 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -35,7 +35,6 @@ #define SPUFS_PS_MAP_SIZE 0x20000 #define SPUFS_MFC_MAP_SIZE 0x1000 #define SPUFS_CNTL_MAP_SIZE 0x1000 -#define SPUFS_CNTL_MAP_SIZE 0x1000 #define SPUFS_SIGNAL_MAP_SIZE PAGE_SIZE #define SPUFS_MSS_MAP_SIZE 0x1000 diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig index c252ee95bdd..45a8ed0585c 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -17,6 +17,7 @@ config PPC_POWERNV select CPU_FREQ_GOV_USERSPACE select CPU_FREQ_GOV_ONDEMAND select CPU_FREQ_GOV_CONSERVATIVE + select PPC_DOORBELL default y config PPC_POWERNV_RTAS diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 4ad0d345bc9..d55891f89a2 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -1,9 +1,9 @@ obj-y += setup.o opal-takeover.o opal-wrappers.o opal.o opal-async.o obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o -obj-y += opal-msglog.o subcore.o subcore-asm.o +obj-y += opal-msglog.o -obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o obj-$(CONFIG_EEH) += eeh-ioda.o eeh-powernv.o obj-$(CONFIG_PPC_SCOM) += opal-xscom.o diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index 753f08e36df..8ad0c5b891f 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -267,7 +267,7 @@ static int ioda_eeh_get_state(struct eeh_pe *pe) { s64 ret = 0; u8 fstate; - u16 pcierr; + __be16 pcierr; u32 pe_no; int result; struct pci_controller *hose = pe->phb; @@ -316,7 +316,7 @@ static int ioda_eeh_get_state(struct eeh_pe *pe) result = 0; result &= ~EEH_STATE_RESET_ACTIVE; - if (pcierr != OPAL_EEH_PHB_ERROR) { + if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) { result |= EEH_STATE_MMIO_ACTIVE; result |= EEH_STATE_DMA_ACTIVE; result |= EEH_STATE_MMIO_ENABLED; @@ -705,18 +705,19 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) { struct pci_controller *hose; struct pnv_phb *phb; - struct eeh_pe *phb_pe; - u64 frozen_pe_no; - u16 err_type, severity; + struct eeh_pe *phb_pe, *parent_pe; + __be64 frozen_pe_no; + __be16 err_type, severity; + int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); long rc; - int ret = EEH_NEXT_ERR_NONE; + int state, ret = EEH_NEXT_ERR_NONE; /* * While running here, it's safe to purge the event queue. * And we should keep the cached OPAL notifier event sychronized * between the kernel and firmware. */ - eeh_remove_event(NULL); + eeh_remove_event(NULL, false); opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul); list_for_each_entry(hose, &hose_list, list_node) { @@ -742,8 +743,8 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) } /* If the PHB doesn't have error, stop processing */ - if (err_type == OPAL_EEH_NO_ERROR || - severity == OPAL_EEH_SEV_NO_ERROR) { + if (be16_to_cpu(err_type) == OPAL_EEH_NO_ERROR || + be16_to_cpu(severity) == OPAL_EEH_SEV_NO_ERROR) { pr_devel("%s: No error found on PHB#%x\n", __func__, hose->global_number); continue; @@ -755,14 +756,14 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) * specific PHB. */ pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n", - __func__, err_type, severity, - frozen_pe_no, hose->global_number); - switch (err_type) { + __func__, be16_to_cpu(err_type), be16_to_cpu(severity), + be64_to_cpu(frozen_pe_no), hose->global_number); + switch (be16_to_cpu(err_type)) { case OPAL_EEH_IOC_ERROR: - if (severity == OPAL_EEH_SEV_IOC_DEAD) { + if (be16_to_cpu(severity) == OPAL_EEH_SEV_IOC_DEAD) { pr_err("EEH: dead IOC detected\n"); ret = EEH_NEXT_ERR_DEAD_IOC; - } else if (severity == OPAL_EEH_SEV_INF) { + } else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) { pr_info("EEH: IOC informative error " "detected\n"); ioda_eeh_hub_diag(hose); @@ -771,20 +772,26 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) break; case OPAL_EEH_PHB_ERROR: - if (severity == OPAL_EEH_SEV_PHB_DEAD) { + if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) { *pe = phb_pe; - pr_err("EEH: dead PHB#%x detected\n", - hose->global_number); + pr_err("EEH: dead PHB#%x detected, " + "location: %s\n", + hose->global_number, + eeh_pe_loc_get(phb_pe)); ret = EEH_NEXT_ERR_DEAD_PHB; - } else if (severity == OPAL_EEH_SEV_PHB_FENCED) { + } else if (be16_to_cpu(severity) == + OPAL_EEH_SEV_PHB_FENCED) { *pe = phb_pe; - pr_err("EEH: fenced PHB#%x detected\n", - hose->global_number); + pr_err("EEH: Fenced PHB#%x detected, " + "location: %s\n", + hose->global_number, + eeh_pe_loc_get(phb_pe)); ret = EEH_NEXT_ERR_FENCED_PHB; - } else if (severity == OPAL_EEH_SEV_INF) { + } else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) { pr_info("EEH: PHB#%x informative error " - "detected\n", - hose->global_number); + "detected, location: %s\n", + hose->global_number, + eeh_pe_loc_get(phb_pe)); ioda_eeh_phb_diag(hose); ret = EEH_NEXT_ERR_NONE; } @@ -792,34 +799,33 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) break; case OPAL_EEH_PE_ERROR: /* - * If we can't find the corresponding PE, the - * PEEV / PEST would be messy. So we force an - * fenced PHB so that it can be recovered. - * - * If the PE has been marked as isolated, that - * should have been removed permanently or in - * progress with recovery. We needn't report - * it again. + * If we can't find the corresponding PE, we + * just try to unfreeze. */ - if (ioda_eeh_get_pe(hose, frozen_pe_no, pe)) { - *pe = phb_pe; - pr_err("EEH: Escalated fenced PHB#%x " - "detected for PE#%llx\n", - hose->global_number, - frozen_pe_no); - ret = EEH_NEXT_ERR_FENCED_PHB; + if (ioda_eeh_get_pe(hose, + be64_to_cpu(frozen_pe_no), pe)) { + /* Try best to clear it */ + pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n", + hose->global_number, frozen_pe_no); + pr_info("EEH: PHB location: %s\n", + eeh_pe_loc_get(phb_pe)); + opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + ret = EEH_NEXT_ERR_NONE; } else if ((*pe)->state & EEH_PE_ISOLATED) { ret = EEH_NEXT_ERR_NONE; } else { pr_err("EEH: Frozen PE#%x on PHB#%x detected\n", (*pe)->addr, (*pe)->phb->global_number); + pr_err("EEH: PE location: %s, PHB location: %s\n", + eeh_pe_loc_get(*pe), eeh_pe_loc_get(phb_pe)); ret = EEH_NEXT_ERR_FROZEN_PE; } break; default: pr_warn("%s: Unexpected error type %d\n", - __func__, err_type); + __func__, be16_to_cpu(err_type)); } /* @@ -837,6 +843,31 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) } /* + * We probably have the frozen parent PE out there and + * we need have to handle frozen parent PE firstly. + */ + if (ret == EEH_NEXT_ERR_FROZEN_PE) { + parent_pe = (*pe)->parent; + while (parent_pe) { + /* Hit the ceiling ? */ + if (parent_pe->type & EEH_PE_PHB) + break; + + /* Frozen parent PE ? */ + state = ioda_eeh_get_state(parent_pe); + if (state > 0 && + (state & active_flags) != active_flags) + *pe = parent_pe; + + /* Next parent level */ + parent_pe = parent_pe->parent; + } + + /* We possibly migrate to another PE */ + eeh_pe_state_mark(*pe, EEH_PE_ISOLATED); + } + + /* * If we have no errors on the specific PHB or only * informative error there, we continue poking it. * Otherwise, we need actions to be taken by upper diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c index 1bb25b95250..44ed78af1a0 100644 --- a/arch/powerpc/platforms/powernv/opal-msglog.c +++ b/arch/powerpc/platforms/powernv/opal-msglog.c @@ -37,7 +37,8 @@ static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj, { struct memcons *mc = bin_attr->private; const char *conbuf; - size_t ret, first_read = 0; + ssize_t ret; + size_t first_read = 0; uint32_t out_pos, avail; if (!mc) @@ -69,6 +70,9 @@ static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj, to += first_read; count -= first_read; pos -= avail; + + if (count <= 0) + goto out; } /* Sanity check. The firmware should not do this to us. */ diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c index d202f9bc368..9d1acf22a09 100644 --- a/arch/powerpc/platforms/powernv/opal-sysparam.c +++ b/arch/powerpc/platforms/powernv/opal-sysparam.c @@ -260,10 +260,10 @@ void __init opal_sys_param_init(void) attr[i].kobj_attr.attr.mode = S_IRUGO; break; case OPAL_SYSPARAM_WRITE: - attr[i].kobj_attr.attr.mode = S_IWUGO; + attr[i].kobj_attr.attr.mode = S_IWUSR; break; case OPAL_SYSPARAM_RW: - attr[i].kobj_attr.attr.mode = S_IRUGO | S_IWUGO; + attr[i].kobj_attr.attr.mode = S_IRUGO | S_IWUSR; break; default: break; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index eefbfcc3fd8..f91a4e5d872 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -206,72 +206,91 @@ static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose, data = (struct OpalIoPhb3ErrorData*)common; pr_info("PHB3 PHB#%d Diag-data (Version: %d)\n", - hose->global_number, common->version); + hose->global_number, be32_to_cpu(common->version)); if (data->brdgCtl) pr_info("brdgCtl: %08x\n", - data->brdgCtl); + be32_to_cpu(data->brdgCtl)); if (data->portStatusReg || data->rootCmplxStatus || data->busAgentStatus) pr_info("UtlSts: %08x %08x %08x\n", - data->portStatusReg, data->rootCmplxStatus, - data->busAgentStatus); + be32_to_cpu(data->portStatusReg), + be32_to_cpu(data->rootCmplxStatus), + be32_to_cpu(data->busAgentStatus)); if (data->deviceStatus || data->slotStatus || data->linkStatus || data->devCmdStatus || data->devSecStatus) pr_info("RootSts: %08x %08x %08x %08x %08x\n", - data->deviceStatus, data->slotStatus, - data->linkStatus, data->devCmdStatus, - data->devSecStatus); + be32_to_cpu(data->deviceStatus), + be32_to_cpu(data->slotStatus), + be32_to_cpu(data->linkStatus), + be32_to_cpu(data->devCmdStatus), + be32_to_cpu(data->devSecStatus)); if (data->rootErrorStatus || data->uncorrErrorStatus || data->corrErrorStatus) pr_info("RootErrSts: %08x %08x %08x\n", - data->rootErrorStatus, data->uncorrErrorStatus, - data->corrErrorStatus); + be32_to_cpu(data->rootErrorStatus), + be32_to_cpu(data->uncorrErrorStatus), + be32_to_cpu(data->corrErrorStatus)); if (data->tlpHdr1 || data->tlpHdr2 || data->tlpHdr3 || data->tlpHdr4) pr_info("RootErrLog: %08x %08x %08x %08x\n", - data->tlpHdr1, data->tlpHdr2, - data->tlpHdr3, data->tlpHdr4); + be32_to_cpu(data->tlpHdr1), + be32_to_cpu(data->tlpHdr2), + be32_to_cpu(data->tlpHdr3), + be32_to_cpu(data->tlpHdr4)); if (data->sourceId || data->errorClass || data->correlator) pr_info("RootErrLog1: %08x %016llx %016llx\n", - data->sourceId, data->errorClass, - data->correlator); + be32_to_cpu(data->sourceId), + be64_to_cpu(data->errorClass), + be64_to_cpu(data->correlator)); if (data->nFir) pr_info("nFir: %016llx %016llx %016llx\n", - data->nFir, data->nFirMask, - data->nFirWOF); + be64_to_cpu(data->nFir), + be64_to_cpu(data->nFirMask), + be64_to_cpu(data->nFirWOF)); if (data->phbPlssr || data->phbCsr) pr_info("PhbSts: %016llx %016llx\n", - data->phbPlssr, data->phbCsr); + be64_to_cpu(data->phbPlssr), + be64_to_cpu(data->phbCsr)); if (data->lemFir) pr_info("Lem: %016llx %016llx %016llx\n", - data->lemFir, data->lemErrorMask, - data->lemWOF); + be64_to_cpu(data->lemFir), + be64_to_cpu(data->lemErrorMask), + be64_to_cpu(data->lemWOF)); if (data->phbErrorStatus) pr_info("PhbErr: %016llx %016llx %016llx %016llx\n", - data->phbErrorStatus, data->phbFirstErrorStatus, - data->phbErrorLog0, data->phbErrorLog1); + be64_to_cpu(data->phbErrorStatus), + be64_to_cpu(data->phbFirstErrorStatus), + be64_to_cpu(data->phbErrorLog0), + be64_to_cpu(data->phbErrorLog1)); if (data->mmioErrorStatus) pr_info("OutErr: %016llx %016llx %016llx %016llx\n", - data->mmioErrorStatus, data->mmioFirstErrorStatus, - data->mmioErrorLog0, data->mmioErrorLog1); + be64_to_cpu(data->mmioErrorStatus), + be64_to_cpu(data->mmioFirstErrorStatus), + be64_to_cpu(data->mmioErrorLog0), + be64_to_cpu(data->mmioErrorLog1)); if (data->dma0ErrorStatus) pr_info("InAErr: %016llx %016llx %016llx %016llx\n", - data->dma0ErrorStatus, data->dma0FirstErrorStatus, - data->dma0ErrorLog0, data->dma0ErrorLog1); + be64_to_cpu(data->dma0ErrorStatus), + be64_to_cpu(data->dma0FirstErrorStatus), + be64_to_cpu(data->dma0ErrorLog0), + be64_to_cpu(data->dma0ErrorLog1)); if (data->dma1ErrorStatus) pr_info("InBErr: %016llx %016llx %016llx %016llx\n", - data->dma1ErrorStatus, data->dma1FirstErrorStatus, - data->dma1ErrorLog0, data->dma1ErrorLog1); + be64_to_cpu(data->dma1ErrorStatus), + be64_to_cpu(data->dma1FirstErrorStatus), + be64_to_cpu(data->dma1ErrorLog0), + be64_to_cpu(data->dma1ErrorLog1)); for (i = 0; i < OPAL_PHB3_NUM_PEST_REGS; i++) { - if ((data->pestA[i] >> 63) == 0 && - (data->pestB[i] >> 63) == 0) + if ((be64_to_cpu(data->pestA[i]) >> 63) == 0 && + (be64_to_cpu(data->pestB[i]) >> 63) == 0) continue; pr_info("PE[%3d] A/B: %016llx %016llx\n", - i, data->pestA[i], data->pestB[i]); + i, be64_to_cpu(data->pestA[i]), + be64_to_cpu(data->pestB[i])); } } @@ -284,7 +303,7 @@ void pnv_pci_dump_phb_diag_data(struct pci_controller *hose, return; common = (struct OpalIoPhbErrorCommon *)log_buff; - switch (common->ioType) { + switch (be32_to_cpu(common->ioType)) { case OPAL_PHB_ERROR_DATA_TYPE_P7IOC: pnv_pci_dump_p7ioc_diag_data(hose, common); break; @@ -293,7 +312,7 @@ void pnv_pci_dump_phb_diag_data(struct pci_controller *hose, break; default: pr_warn("%s: Unrecognized ioType %d\n", - __func__, common->ioType); + __func__, be32_to_cpu(common->ioType)); } } diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 8c16a5f9672..d9b88fa7c5a 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -35,11 +35,14 @@ #include <asm/rtas.h> #include <asm/opal.h> #include <asm/kexec.h> +#include <asm/smp.h> #include "powernv.h" static void __init pnv_setup_arch(void) { + set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); + /* Initialize SMP */ pnv_smp_init(); diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 0062a43a2e0..5fcfcf44e3a 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -32,6 +32,7 @@ #include <asm/opal.h> #include <asm/runlatch.h> #include <asm/code-patching.h> +#include <asm/dbell.h> #include "powernv.h" @@ -46,6 +47,11 @@ static void pnv_smp_setup_cpu(int cpu) { if (cpu != boot_cpuid) xics_setup_cpu(); + +#ifdef CONFIG_PPC_DOORBELL + if (cpu_has_feature(CPU_FTR_DBELL)) + doorbell_setup_this_cpu(); +#endif } int pnv_smp_kick_cpu(int nr) diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 2cb8b776c84..756b482f819 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -21,6 +21,7 @@ config PPC_PSERIES select HAVE_CONTEXT_TRACKING select HOTPLUG_CPU if SMP select ARCH_RANDOM + select PPC_DOORBELL default y config PPC_SPLPAR diff --git a/arch/powerpc/platforms/wsp/Kconfig b/arch/powerpc/platforms/wsp/Kconfig deleted file mode 100644 index 422a175b10e..00000000000 --- a/arch/powerpc/platforms/wsp/Kconfig +++ /dev/null @@ -1,30 +0,0 @@ -config PPC_WSP - bool - select PPC_A2 - select GENERIC_TBSYNC - select PPC_ICSWX - select PPC_SCOM - select PPC_XICS - select PPC_ICP_NATIVE - select PCI - select PPC_IO_WORKAROUNDS if PCI - select PPC_INDIRECT_PIO if PCI - default n - -menu "WSP platform selection" - depends on PPC_BOOK3E_64 - -config PPC_PSR2 - bool "PowerEN System Reference Platform 2" - select EPAPR_BOOT - select PPC_WSP - default y - -config PPC_CHROMA - bool "PowerEN PCIe Chroma Card" - select EPAPR_BOOT - select PPC_WSP - select OF_DYNAMIC - default y - -endmenu diff --git a/arch/powerpc/platforms/wsp/Makefile b/arch/powerpc/platforms/wsp/Makefile deleted file mode 100644 index 162fc60125a..00000000000 --- a/arch/powerpc/platforms/wsp/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -ccflags-y += $(NO_MINIMAL_TOC) - -obj-y += setup.o ics.o wsp.o -obj-$(CONFIG_PPC_PSR2) += psr2.o -obj-$(CONFIG_PPC_CHROMA) += chroma.o h8.o -obj-$(CONFIG_PPC_WSP) += opb_pic.o -obj-$(CONFIG_PPC_WSP) += scom_wsp.o -obj-$(CONFIG_SMP) += smp.o scom_smp.o -obj-$(CONFIG_PCI) += wsp_pci.o -obj-$(CONFIG_PCI_MSI) += msi.o diff --git a/arch/powerpc/platforms/wsp/chroma.c b/arch/powerpc/platforms/wsp/chroma.c deleted file mode 100644 index aaa46b35371..00000000000 --- a/arch/powerpc/platforms/wsp/chroma.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright 2008-2011, IBM Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/delay.h> -#include <linux/init.h> -#include <linux/irq.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/of.h> -#include <linux/smp.h> -#include <linux/time.h> -#include <linux/of_fdt.h> - -#include <asm/machdep.h> -#include <asm/udbg.h> - -#include "ics.h" -#include "wsp.h" - -void __init chroma_setup_arch(void) -{ - wsp_setup_arch(); - wsp_setup_h8(); - -} - -static int __init chroma_probe(void) -{ - unsigned long root = of_get_flat_dt_root(); - - if (!of_flat_dt_is_compatible(root, "ibm,wsp-chroma")) - return 0; - - return 1; -} - -define_machine(chroma_md) { - .name = "Chroma PCIe", - .probe = chroma_probe, - .setup_arch = chroma_setup_arch, - .restart = wsp_h8_restart, - .power_off = wsp_h8_power_off, - .halt = wsp_halt, - .calibrate_decr = generic_calibrate_decr, - .init_IRQ = wsp_setup_irq, - .progress = udbg_progress, - .power_save = book3e_idle, -}; - -machine_arch_initcall(chroma_md, wsp_probe_devices); diff --git a/arch/powerpc/platforms/wsp/h8.c b/arch/powerpc/platforms/wsp/h8.c deleted file mode 100644 index a3c87f39575..00000000000 --- a/arch/powerpc/platforms/wsp/h8.c +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright 2008-2011, IBM Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/of.h> -#include <linux/io.h> -#include <linux/of_address.h> - -#include "wsp.h" - -/* - * The UART connection to the H8 is over ttyS1 which is just a 16550. - * We assume that FW has it setup right and no one messes with it. - */ - - -static u8 __iomem *h8; - -#define RBR 0 /* Receiver Buffer Register */ -#define THR 0 /* Transmitter Holding Register */ -#define LSR 5 /* Line Status Register */ -#define LSR_DR 0x01 /* LSR value for Data-Ready */ -#define LSR_THRE 0x20 /* LSR value for Transmitter-Holding-Register-Empty */ -static void wsp_h8_putc(int c) -{ - u8 lsr; - - do { - lsr = readb(h8 + LSR); - } while ((lsr & LSR_THRE) != LSR_THRE); - writeb(c, h8 + THR); -} - -static int wsp_h8_getc(void) -{ - u8 lsr; - - do { - lsr = readb(h8 + LSR); - } while ((lsr & LSR_DR) != LSR_DR); - - return readb(h8 + RBR); -} - -static void wsp_h8_puts(const char *s, int sz) -{ - int i; - - for (i = 0; i < sz; i++) { - wsp_h8_putc(s[i]); - - /* no flow control so wait for echo */ - wsp_h8_getc(); - } - wsp_h8_putc('\r'); - wsp_h8_putc('\n'); -} - -static void wsp_h8_terminal_cmd(const char *cmd, int sz) -{ - hard_irq_disable(); - wsp_h8_puts(cmd, sz); - /* should never return, but just in case */ - for (;;) - continue; -} - - -void wsp_h8_restart(char *cmd) -{ - static const char restart[] = "warm-reset"; - - (void)cmd; - wsp_h8_terminal_cmd(restart, sizeof(restart) - 1); -} - -void wsp_h8_power_off(void) -{ - static const char off[] = "power-off"; - - wsp_h8_terminal_cmd(off, sizeof(off) - 1); -} - -static void __iomem *wsp_h8_getaddr(void) -{ - struct device_node *aliases; - struct device_node *uart; - struct property *path; - void __iomem *va = NULL; - - /* - * there is nothing in the devtree to tell us which is mapped - * to the H8, but se know it is the second serial port. - */ - - aliases = of_find_node_by_path("/aliases"); - if (aliases == NULL) - return NULL; - - path = of_find_property(aliases, "serial1", NULL); - if (path == NULL) - goto out; - - uart = of_find_node_by_path(path->value); - if (uart == NULL) - goto out; - - va = of_iomap(uart, 0); - - /* remove it so no one messes with it */ - of_detach_node(uart); - of_node_put(uart); - -out: - of_node_put(aliases); - - return va; -} - -void __init wsp_setup_h8(void) -{ - h8 = wsp_h8_getaddr(); - - /* Devtree change? lets hard map it anyway */ - if (h8 == NULL) { - pr_warn("UART to H8 could not be found"); - h8 = ioremap(0xffc0008000ULL, 0x100); - } -} diff --git a/arch/powerpc/platforms/wsp/ics.c b/arch/powerpc/platforms/wsp/ics.c deleted file mode 100644 index 9cd92e64502..00000000000 --- a/arch/powerpc/platforms/wsp/ics.c +++ /dev/null @@ -1,762 +0,0 @@ -/* - * Copyright 2008-2011 IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/cpu.h> -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/irq.h> -#include <linux/kernel.h> -#include <linux/msi.h> -#include <linux/of.h> -#include <linux/slab.h> -#include <linux/smp.h> -#include <linux/spinlock.h> -#include <linux/types.h> -#include <linux/of_address.h> -#include <linux/of_irq.h> - -#include <asm/io.h> -#include <asm/irq.h> -#include <asm/xics.h> - -#include "wsp.h" -#include "ics.h" - - -/* WSP ICS */ - -struct wsp_ics { - struct ics ics; - struct device_node *dn; - void __iomem *regs; - spinlock_t lock; - unsigned long *bitmap; - u32 chip_id; - u32 lsi_base; - u32 lsi_count; - u64 hwirq_start; - u64 count; -#ifdef CONFIG_SMP - int *hwirq_cpu_map; -#endif -}; - -#define to_wsp_ics(ics) container_of(ics, struct wsp_ics, ics) - -#define INT_SRC_LAYER_BUID_REG(base) ((base) + 0x00) -#define IODA_TBL_ADDR_REG(base) ((base) + 0x18) -#define IODA_TBL_DATA_REG(base) ((base) + 0x20) -#define XIVE_UPDATE_REG(base) ((base) + 0x28) -#define ICS_INT_CAPS_REG(base) ((base) + 0x30) - -#define TBL_AUTO_INCREMENT ((1UL << 63) | (1UL << 15)) -#define TBL_SELECT_XIST (1UL << 48) -#define TBL_SELECT_XIVT (1UL << 49) - -#define IODA_IRQ(irq) ((irq) & (0x7FFULL)) /* HRM 5.1.3.4 */ - -#define XIST_REQUIRED 0x8 -#define XIST_REJECTED 0x4 -#define XIST_PRESENTED 0x2 -#define XIST_PENDING 0x1 - -#define XIVE_SERVER_SHIFT 42 -#define XIVE_SERVER_MASK 0xFFFFULL -#define XIVE_PRIORITY_MASK 0xFFULL -#define XIVE_PRIORITY_SHIFT 32 -#define XIVE_WRITE_ENABLE (1ULL << 63) - -/* - * The docs refer to a 6 bit field called ChipID, which consists of a - * 3 bit NodeID and a 3 bit ChipID. On WSP the ChipID is always zero - * so we ignore it, and every where we use "chip id" in this code we - * mean the NodeID. - */ -#define WSP_ICS_CHIP_SHIFT 17 - - -static struct wsp_ics *ics_list; -static int num_ics; - -/* ICS Source controller accessors */ - -static u64 wsp_ics_get_xive(struct wsp_ics *ics, unsigned int irq) -{ - unsigned long flags; - u64 xive; - - spin_lock_irqsave(&ics->lock, flags); - out_be64(IODA_TBL_ADDR_REG(ics->regs), TBL_SELECT_XIVT | IODA_IRQ(irq)); - xive = in_be64(IODA_TBL_DATA_REG(ics->regs)); - spin_unlock_irqrestore(&ics->lock, flags); - - return xive; -} - -static void wsp_ics_set_xive(struct wsp_ics *ics, unsigned int irq, u64 xive) -{ - xive &= ~XIVE_ADDR_MASK; - xive |= (irq & XIVE_ADDR_MASK); - xive |= XIVE_WRITE_ENABLE; - - out_be64(XIVE_UPDATE_REG(ics->regs), xive); -} - -static u64 xive_set_server(u64 xive, unsigned int server) -{ - u64 mask = ~(XIVE_SERVER_MASK << XIVE_SERVER_SHIFT); - - xive &= mask; - xive |= (server & XIVE_SERVER_MASK) << XIVE_SERVER_SHIFT; - - return xive; -} - -static u64 xive_set_priority(u64 xive, unsigned int priority) -{ - u64 mask = ~(XIVE_PRIORITY_MASK << XIVE_PRIORITY_SHIFT); - - xive &= mask; - xive |= (priority & XIVE_PRIORITY_MASK) << XIVE_PRIORITY_SHIFT; - - return xive; -} - - -#ifdef CONFIG_SMP -/* Find logical CPUs within mask on a given chip and store result in ret */ -void cpus_on_chip(int chip_id, cpumask_t *mask, cpumask_t *ret) -{ - int cpu, chip; - struct device_node *cpu_dn, *dn; - const u32 *prop; - - cpumask_clear(ret); - for_each_cpu(cpu, mask) { - cpu_dn = of_get_cpu_node(cpu, NULL); - if (!cpu_dn) - continue; - - prop = of_get_property(cpu_dn, "at-node", NULL); - if (!prop) { - of_node_put(cpu_dn); - continue; - } - - dn = of_find_node_by_phandle(*prop); - of_node_put(cpu_dn); - - chip = wsp_get_chip_id(dn); - if (chip == chip_id) - cpumask_set_cpu(cpu, ret); - - of_node_put(dn); - } -} - -/* Store a suitable CPU to handle a hwirq in the ics->hwirq_cpu_map cache */ -static int cache_hwirq_map(struct wsp_ics *ics, unsigned int hwirq, - const cpumask_t *affinity) -{ - cpumask_var_t avail, newmask; - int ret = -ENOMEM, cpu, cpu_rover = 0, target; - int index = hwirq - ics->hwirq_start; - unsigned int nodeid; - - BUG_ON(index < 0 || index >= ics->count); - - if (!ics->hwirq_cpu_map) - return -ENOMEM; - - if (!distribute_irqs) { - ics->hwirq_cpu_map[hwirq - ics->hwirq_start] = xics_default_server; - return 0; - } - - /* Allocate needed CPU masks */ - if (!alloc_cpumask_var(&avail, GFP_KERNEL)) - goto ret; - if (!alloc_cpumask_var(&newmask, GFP_KERNEL)) - goto freeavail; - - /* Find PBus attached to the source of this IRQ */ - nodeid = (hwirq >> WSP_ICS_CHIP_SHIFT) & 0x3; /* 12:14 */ - - /* Find CPUs that could handle this IRQ */ - if (affinity) - cpumask_and(avail, cpu_online_mask, affinity); - else - cpumask_copy(avail, cpu_online_mask); - - /* Narrow selection down to logical CPUs on the same chip */ - cpus_on_chip(nodeid, avail, newmask); - - /* Ensure we haven't narrowed it down to 0 */ - if (unlikely(cpumask_empty(newmask))) { - if (unlikely(cpumask_empty(avail))) { - ret = -1; - goto out; - } - cpumask_copy(newmask, avail); - } - - /* Choose a CPU out of those we narrowed it down to in round robin */ - target = hwirq % cpumask_weight(newmask); - for_each_cpu(cpu, newmask) { - if (cpu_rover++ >= target) { - ics->hwirq_cpu_map[index] = get_hard_smp_processor_id(cpu); - ret = 0; - goto out; - } - } - - /* Shouldn't happen */ - WARN_ON(1); - -out: - free_cpumask_var(newmask); -freeavail: - free_cpumask_var(avail); -ret: - if (ret < 0) { - ics->hwirq_cpu_map[index] = cpumask_first(cpu_online_mask); - pr_warning("Error, falling hwirq 0x%x routing back to CPU %i\n", - hwirq, ics->hwirq_cpu_map[index]); - } - return ret; -} - -static void alloc_irq_map(struct wsp_ics *ics) -{ - int i; - - ics->hwirq_cpu_map = kmalloc(sizeof(int) * ics->count, GFP_KERNEL); - if (!ics->hwirq_cpu_map) { - pr_warning("Allocate hwirq_cpu_map failed, " - "IRQ balancing disabled\n"); - return; - } - - for (i=0; i < ics->count; i++) - ics->hwirq_cpu_map[i] = xics_default_server; -} - -static int get_irq_server(struct wsp_ics *ics, unsigned int hwirq) -{ - int index = hwirq - ics->hwirq_start; - - BUG_ON(index < 0 || index >= ics->count); - - if (!ics->hwirq_cpu_map) - return xics_default_server; - - return ics->hwirq_cpu_map[index]; -} -#else /* !CONFIG_SMP */ -static int cache_hwirq_map(struct wsp_ics *ics, unsigned int hwirq, - const cpumask_t *affinity) -{ - return 0; -} - -static int get_irq_server(struct wsp_ics *ics, unsigned int hwirq) -{ - return xics_default_server; -} - -static void alloc_irq_map(struct wsp_ics *ics) { } -#endif - -static void wsp_chip_unmask_irq(struct irq_data *d) -{ - unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); - struct wsp_ics *ics; - int server; - u64 xive; - - if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) - return; - - ics = d->chip_data; - if (WARN_ON(!ics)) - return; - - server = get_irq_server(ics, hw_irq); - - xive = wsp_ics_get_xive(ics, hw_irq); - xive = xive_set_server(xive, server); - xive = xive_set_priority(xive, DEFAULT_PRIORITY); - wsp_ics_set_xive(ics, hw_irq, xive); -} - -static unsigned int wsp_chip_startup(struct irq_data *d) -{ - /* unmask it */ - wsp_chip_unmask_irq(d); - return 0; -} - -static void wsp_mask_real_irq(unsigned int hw_irq, struct wsp_ics *ics) -{ - u64 xive; - - if (hw_irq == XICS_IPI) - return; - - if (WARN_ON(!ics)) - return; - xive = wsp_ics_get_xive(ics, hw_irq); - xive = xive_set_server(xive, xics_default_server); - xive = xive_set_priority(xive, LOWEST_PRIORITY); - wsp_ics_set_xive(ics, hw_irq, xive); -} - -static void wsp_chip_mask_irq(struct irq_data *d) -{ - unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); - struct wsp_ics *ics = d->chip_data; - - if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) - return; - - wsp_mask_real_irq(hw_irq, ics); -} - -static int wsp_chip_set_affinity(struct irq_data *d, - const struct cpumask *cpumask, bool force) -{ - unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); - struct wsp_ics *ics; - int ret; - u64 xive; - - if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) - return -1; - - ics = d->chip_data; - if (WARN_ON(!ics)) - return -1; - xive = wsp_ics_get_xive(ics, hw_irq); - - /* - * For the moment only implement delivery to all cpus or one cpu. - * Get current irq_server for the given irq - */ - ret = cache_hwirq_map(ics, hw_irq, cpumask); - if (ret == -1) { - char cpulist[128]; - cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask); - pr_warning("%s: No online cpus in the mask %s for irq %d\n", - __func__, cpulist, d->irq); - return -1; - } else if (ret == -ENOMEM) { - pr_warning("%s: Out of memory\n", __func__); - return -1; - } - - xive = xive_set_server(xive, get_irq_server(ics, hw_irq)); - wsp_ics_set_xive(ics, hw_irq, xive); - - return IRQ_SET_MASK_OK; -} - -static struct irq_chip wsp_irq_chip = { - .name = "WSP ICS", - .irq_startup = wsp_chip_startup, - .irq_mask = wsp_chip_mask_irq, - .irq_unmask = wsp_chip_unmask_irq, - .irq_set_affinity = wsp_chip_set_affinity -}; - -static int wsp_ics_host_match(struct ics *ics, struct device_node *dn) -{ - /* All ICSs in the system implement a global irq number space, - * so match against them all. */ - return of_device_is_compatible(dn, "ibm,ppc-xics"); -} - -static int wsp_ics_match_hwirq(struct wsp_ics *wsp_ics, unsigned int hwirq) -{ - if (hwirq >= wsp_ics->hwirq_start && - hwirq < wsp_ics->hwirq_start + wsp_ics->count) - return 1; - - return 0; -} - -static int wsp_ics_map(struct ics *ics, unsigned int virq) -{ - struct wsp_ics *wsp_ics = to_wsp_ics(ics); - unsigned int hw_irq = virq_to_hw(virq); - unsigned long flags; - - if (!wsp_ics_match_hwirq(wsp_ics, hw_irq)) - return -ENOENT; - - irq_set_chip_and_handler(virq, &wsp_irq_chip, handle_fasteoi_irq); - - irq_set_chip_data(virq, wsp_ics); - - spin_lock_irqsave(&wsp_ics->lock, flags); - bitmap_allocate_region(wsp_ics->bitmap, hw_irq - wsp_ics->hwirq_start, 0); - spin_unlock_irqrestore(&wsp_ics->lock, flags); - - return 0; -} - -static void wsp_ics_mask_unknown(struct ics *ics, unsigned long hw_irq) -{ - struct wsp_ics *wsp_ics = to_wsp_ics(ics); - - if (!wsp_ics_match_hwirq(wsp_ics, hw_irq)) - return; - - pr_err("%s: IRQ %lu (real) is invalid, disabling it.\n", __func__, hw_irq); - wsp_mask_real_irq(hw_irq, wsp_ics); -} - -static long wsp_ics_get_server(struct ics *ics, unsigned long hw_irq) -{ - struct wsp_ics *wsp_ics = to_wsp_ics(ics); - - if (!wsp_ics_match_hwirq(wsp_ics, hw_irq)) - return -ENOENT; - - return get_irq_server(wsp_ics, hw_irq); -} - -/* HW Number allocation API */ - -static struct wsp_ics *wsp_ics_find_dn_ics(struct device_node *dn) -{ - struct device_node *iparent; - int i; - - iparent = of_irq_find_parent(dn); - if (!iparent) { - pr_err("wsp_ics: Failed to find interrupt parent!\n"); - return NULL; - } - - for(i = 0; i < num_ics; i++) { - if(ics_list[i].dn == iparent) - break; - } - - if (i >= num_ics) { - pr_err("wsp_ics: Unable to find parent bitmap!\n"); - return NULL; - } - - return &ics_list[i]; -} - -int wsp_ics_alloc_irq(struct device_node *dn, int num) -{ - struct wsp_ics *ics; - int order, offset; - - ics = wsp_ics_find_dn_ics(dn); - if (!ics) - return -ENODEV; - - /* Fast, but overly strict if num isn't a power of two */ - order = get_count_order(num); - - spin_lock_irq(&ics->lock); - offset = bitmap_find_free_region(ics->bitmap, ics->count, order); - spin_unlock_irq(&ics->lock); - - if (offset < 0) - return offset; - - return offset + ics->hwirq_start; -} - -void wsp_ics_free_irq(struct device_node *dn, unsigned int irq) -{ - struct wsp_ics *ics; - - ics = wsp_ics_find_dn_ics(dn); - if (WARN_ON(!ics)) - return; - - spin_lock_irq(&ics->lock); - bitmap_release_region(ics->bitmap, irq, 0); - spin_unlock_irq(&ics->lock); -} - -/* Initialisation */ - -static int __init wsp_ics_bitmap_setup(struct wsp_ics *ics, - struct device_node *dn) -{ - int len, i, j, size; - u32 start, count; - const u32 *p; - - size = BITS_TO_LONGS(ics->count) * sizeof(long); - ics->bitmap = kzalloc(size, GFP_KERNEL); - if (!ics->bitmap) { - pr_err("wsp_ics: ENOMEM allocating IRQ bitmap!\n"); - return -ENOMEM; - } - - spin_lock_init(&ics->lock); - - p = of_get_property(dn, "available-ranges", &len); - if (!p || !len) { - /* FIXME this should be a WARN() once mambo is updated */ - pr_err("wsp_ics: No available-ranges defined for %s\n", - dn->full_name); - return 0; - } - - if (len % (2 * sizeof(u32)) != 0) { - /* FIXME this should be a WARN() once mambo is updated */ - pr_err("wsp_ics: Invalid available-ranges for %s\n", - dn->full_name); - return 0; - } - - bitmap_fill(ics->bitmap, ics->count); - - for (i = 0; i < len / sizeof(u32); i += 2) { - start = of_read_number(p + i, 1); - count = of_read_number(p + i + 1, 1); - - pr_devel("%s: start: %d count: %d\n", __func__, start, count); - - if ((start + count) > (ics->hwirq_start + ics->count) || - start < ics->hwirq_start) { - pr_err("wsp_ics: Invalid range! -> %d to %d\n", - start, start + count); - break; - } - - for (j = 0; j < count; j++) - bitmap_release_region(ics->bitmap, - (start + j) - ics->hwirq_start, 0); - } - - /* Ensure LSIs are not available for allocation */ - bitmap_allocate_region(ics->bitmap, ics->lsi_base, - get_count_order(ics->lsi_count)); - - return 0; -} - -static int __init wsp_ics_setup(struct wsp_ics *ics, struct device_node *dn) -{ - u32 lsi_buid, msi_buid, msi_base, msi_count; - void __iomem *regs; - const u32 *p; - int rc, len, i; - u64 caps, buid; - - p = of_get_property(dn, "interrupt-ranges", &len); - if (!p || len < (2 * sizeof(u32))) { - pr_err("wsp_ics: No/bad interrupt-ranges found on %s\n", - dn->full_name); - return -ENOENT; - } - - if (len > (2 * sizeof(u32))) { - pr_err("wsp_ics: Multiple ics ranges not supported.\n"); - return -EINVAL; - } - - regs = of_iomap(dn, 0); - if (!regs) { - pr_err("wsp_ics: of_iomap(%s) failed\n", dn->full_name); - return -ENXIO; - } - - ics->hwirq_start = of_read_number(p, 1); - ics->count = of_read_number(p + 1, 1); - ics->regs = regs; - - ics->chip_id = wsp_get_chip_id(dn); - if (WARN_ON(ics->chip_id < 0)) - ics->chip_id = 0; - - /* Get some informations about the critter */ - caps = in_be64(ICS_INT_CAPS_REG(ics->regs)); - buid = in_be64(INT_SRC_LAYER_BUID_REG(ics->regs)); - ics->lsi_count = caps >> 56; - msi_count = (caps >> 44) & 0x7ff; - - /* Note: LSI BUID is 9 bits, but really only 3 are BUID and the - * rest is mixed in the interrupt number. We store the whole - * thing though - */ - lsi_buid = (buid >> 48) & 0x1ff; - ics->lsi_base = (ics->chip_id << WSP_ICS_CHIP_SHIFT) | lsi_buid << 5; - msi_buid = (buid >> 37) & 0x7; - msi_base = (ics->chip_id << WSP_ICS_CHIP_SHIFT) | msi_buid << 11; - - pr_info("wsp_ics: Found %s\n", dn->full_name); - pr_info("wsp_ics: irq range : 0x%06llx..0x%06llx\n", - ics->hwirq_start, ics->hwirq_start + ics->count - 1); - pr_info("wsp_ics: %4d LSIs : 0x%06x..0x%06x\n", - ics->lsi_count, ics->lsi_base, - ics->lsi_base + ics->lsi_count - 1); - pr_info("wsp_ics: %4d MSIs : 0x%06x..0x%06x\n", - msi_count, msi_base, - msi_base + msi_count - 1); - - /* Let's check the HW config is sane */ - if (ics->lsi_base < ics->hwirq_start || - (ics->lsi_base + ics->lsi_count) > (ics->hwirq_start + ics->count)) - pr_warning("wsp_ics: WARNING ! LSIs out of interrupt-ranges !\n"); - if (msi_base < ics->hwirq_start || - (msi_base + msi_count) > (ics->hwirq_start + ics->count)) - pr_warning("wsp_ics: WARNING ! MSIs out of interrupt-ranges !\n"); - - /* We don't check for overlap between LSI and MSI, which will happen - * if we use the same BUID, I'm not sure yet how legit that is. - */ - - rc = wsp_ics_bitmap_setup(ics, dn); - if (rc) { - iounmap(regs); - return rc; - } - - ics->dn = of_node_get(dn); - alloc_irq_map(ics); - - for(i = 0; i < ics->count; i++) - wsp_mask_real_irq(ics->hwirq_start + i, ics); - - ics->ics.map = wsp_ics_map; - ics->ics.mask_unknown = wsp_ics_mask_unknown; - ics->ics.get_server = wsp_ics_get_server; - ics->ics.host_match = wsp_ics_host_match; - - xics_register_ics(&ics->ics); - - return 0; -} - -static void __init wsp_ics_set_default_server(void) -{ - struct device_node *np; - u32 hwid; - - /* Find the server number for the boot cpu. */ - np = of_get_cpu_node(boot_cpuid, NULL); - BUG_ON(!np); - - hwid = get_hard_smp_processor_id(boot_cpuid); - - pr_info("wsp_ics: default server is %#x, CPU %s\n", hwid, np->full_name); - xics_default_server = hwid; - - of_node_put(np); -} - -static int __init wsp_ics_init(void) -{ - struct device_node *dn; - struct wsp_ics *ics; - int rc, found; - - wsp_ics_set_default_server(); - - found = 0; - for_each_compatible_node(dn, NULL, "ibm,ppc-xics") - found++; - - if (found == 0) { - pr_err("wsp_ics: No ICS's found!\n"); - return -ENODEV; - } - - ics_list = kmalloc(sizeof(*ics) * found, GFP_KERNEL); - if (!ics_list) { - pr_err("wsp_ics: No memory for structs.\n"); - return -ENOMEM; - } - - num_ics = 0; - ics = ics_list; - for_each_compatible_node(dn, NULL, "ibm,wsp-xics") { - rc = wsp_ics_setup(ics, dn); - if (rc == 0) { - ics++; - num_ics++; - } - } - - if (found != num_ics) { - pr_err("wsp_ics: Failed setting up %d ICS's\n", - found - num_ics); - return -1; - } - - return 0; -} - -void __init wsp_init_irq(void) -{ - wsp_ics_init(); - xics_init(); - - /* We need to patch our irq chip's EOI to point to the right ICP */ - wsp_irq_chip.irq_eoi = icp_ops->eoi; -} - -#ifdef CONFIG_PCI_MSI -static void wsp_ics_msi_unmask_irq(struct irq_data *d) -{ - wsp_chip_unmask_irq(d); - unmask_msi_irq(d); -} - -static unsigned int wsp_ics_msi_startup(struct irq_data *d) -{ - wsp_ics_msi_unmask_irq(d); - return 0; -} - -static void wsp_ics_msi_mask_irq(struct irq_data *d) -{ - mask_msi_irq(d); - wsp_chip_mask_irq(d); -} - -/* - * we do it this way because we reassinge default EOI handling in - * irq_init() above - */ -static void wsp_ics_eoi(struct irq_data *data) -{ - wsp_irq_chip.irq_eoi(data); -} - -static struct irq_chip wsp_ics_msi = { - .name = "WSP ICS MSI", - .irq_startup = wsp_ics_msi_startup, - .irq_mask = wsp_ics_msi_mask_irq, - .irq_unmask = wsp_ics_msi_unmask_irq, - .irq_eoi = wsp_ics_eoi, - .irq_set_affinity = wsp_chip_set_affinity -}; - -void wsp_ics_set_msi_chip(unsigned int irq) -{ - irq_set_chip(irq, &wsp_ics_msi); -} - -void wsp_ics_set_std_chip(unsigned int irq) -{ - irq_set_chip(irq, &wsp_irq_chip); -} -#endif /* CONFIG_PCI_MSI */ diff --git a/arch/powerpc/platforms/wsp/ics.h b/arch/powerpc/platforms/wsp/ics.h deleted file mode 100644 index 07b644e0cf9..00000000000 --- a/arch/powerpc/platforms/wsp/ics.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright 2009 IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef __ICS_H -#define __ICS_H - -#define XIVE_ADDR_MASK 0x7FFULL - -extern void wsp_init_irq(void); - -extern int wsp_ics_alloc_irq(struct device_node *dn, int num); -extern void wsp_ics_free_irq(struct device_node *dn, unsigned int irq); - -#ifdef CONFIG_PCI_MSI -extern void wsp_ics_set_msi_chip(unsigned int irq); -extern void wsp_ics_set_std_chip(unsigned int irq); -#endif /* CONFIG_PCI_MSI */ - -#endif /* __ICS_H */ diff --git a/arch/powerpc/platforms/wsp/msi.c b/arch/powerpc/platforms/wsp/msi.c deleted file mode 100644 index 380882f27ad..00000000000 --- a/arch/powerpc/platforms/wsp/msi.c +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright 2011 Michael Ellerman, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/msi.h> -#include <linux/irq.h> -#include <linux/interrupt.h> - -#include "msi.h" -#include "ics.h" -#include "wsp_pci.h" - -/* Magic addresses for 32 & 64-bit MSIs with hardcoded MVE 0 */ -#define MSI_ADDR_32 0xFFFF0000ul -#define MSI_ADDR_64 0x1000000000000000ul - -int wsp_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) -{ - struct pci_controller *phb; - struct msi_desc *entry; - struct msi_msg msg; - unsigned int virq; - int hwirq; - - phb = pci_bus_to_host(dev->bus); - if (!phb) - return -ENOENT; - - entry = list_first_entry(&dev->msi_list, struct msi_desc, list); - if (entry->msi_attrib.is_64) { - msg.address_lo = 0; - msg.address_hi = MSI_ADDR_64 >> 32; - } else { - msg.address_lo = MSI_ADDR_32; - msg.address_hi = 0; - } - - list_for_each_entry(entry, &dev->msi_list, list) { - hwirq = wsp_ics_alloc_irq(phb->dn, 1); - if (hwirq < 0) { - dev_warn(&dev->dev, "wsp_msi: hwirq alloc failed!\n"); - return hwirq; - } - - virq = irq_create_mapping(NULL, hwirq); - if (virq == NO_IRQ) { - dev_warn(&dev->dev, "wsp_msi: virq alloc failed!\n"); - return -1; - } - - dev_dbg(&dev->dev, "wsp_msi: allocated irq %#x/%#x\n", - hwirq, virq); - - wsp_ics_set_msi_chip(virq); - irq_set_msi_desc(virq, entry); - msg.data = hwirq & XIVE_ADDR_MASK; - write_msi_msg(virq, &msg); - } - - return 0; -} - -void wsp_teardown_msi_irqs(struct pci_dev *dev) -{ - struct pci_controller *phb; - struct msi_desc *entry; - int hwirq; - - phb = pci_bus_to_host(dev->bus); - - dev_dbg(&dev->dev, "wsp_msi: tearing down msi irqs\n"); - - list_for_each_entry(entry, &dev->msi_list, list) { - if (entry->irq == NO_IRQ) - continue; - - irq_set_msi_desc(entry->irq, NULL); - wsp_ics_set_std_chip(entry->irq); - - hwirq = virq_to_hw(entry->irq); - /* In this order to avoid racing with irq_create_mapping() */ - irq_dispose_mapping(entry->irq); - wsp_ics_free_irq(phb->dn, hwirq); - } -} - -void wsp_setup_phb_msi(struct pci_controller *phb) -{ - /* Create a single MVE at offset 0 that matches everything */ - out_be64(phb->cfg_data + PCIE_REG_IODA_ADDR, PCIE_REG_IODA_AD_TBL_MVT); - out_be64(phb->cfg_data + PCIE_REG_IODA_DATA0, 1ull << 63); - - ppc_md.setup_msi_irqs = wsp_setup_msi_irqs; - ppc_md.teardown_msi_irqs = wsp_teardown_msi_irqs; -} diff --git a/arch/powerpc/platforms/wsp/msi.h b/arch/powerpc/platforms/wsp/msi.h deleted file mode 100644 index 0ab27b71b24..00000000000 --- a/arch/powerpc/platforms/wsp/msi.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright 2011 Michael Ellerman, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef __WSP_MSI_H -#define __WSP_MSI_H - -#ifdef CONFIG_PCI_MSI -extern void wsp_setup_phb_msi(struct pci_controller *phb); -#else -static inline void wsp_setup_phb_msi(struct pci_controller *phb) { } -#endif - -#endif /* __WSP_MSI_H */ diff --git a/arch/powerpc/platforms/wsp/opb_pic.c b/arch/powerpc/platforms/wsp/opb_pic.c deleted file mode 100644 index 3f672980793..00000000000 --- a/arch/powerpc/platforms/wsp/opb_pic.c +++ /dev/null @@ -1,321 +0,0 @@ -/* - * IBM Onboard Peripheral Bus Interrupt Controller - * - * Copyright 2010 Jack Miller, IBM Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#include <linux/interrupt.h> -#include <linux/io.h> -#include <linux/irq.h> -#include <linux/of.h> -#include <linux/slab.h> -#include <linux/time.h> -#include <linux/of_address.h> -#include <linux/of_irq.h> - -#include <asm/reg_a2.h> -#include <asm/irq.h> - -#define OPB_NR_IRQS 32 - -#define OPB_MLSASIER 0x04 /* MLS Accumulated Status IER */ -#define OPB_MLSIR 0x50 /* MLS Interrupt Register */ -#define OPB_MLSIER 0x54 /* MLS Interrupt Enable Register */ -#define OPB_MLSIPR 0x58 /* MLS Interrupt Polarity Register */ -#define OPB_MLSIIR 0x5c /* MLS Interrupt Inputs Register */ - -static int opb_index = 0; - -struct opb_pic { - struct irq_domain *host; - void *regs; - int index; - spinlock_t lock; -}; - -static u32 opb_in(struct opb_pic *opb, int offset) -{ - return in_be32(opb->regs + offset); -} - -static void opb_out(struct opb_pic *opb, int offset, u32 val) -{ - out_be32(opb->regs + offset, val); -} - -static void opb_unmask_irq(struct irq_data *d) -{ - struct opb_pic *opb; - unsigned long flags; - u32 ier, bitset; - - opb = d->chip_data; - bitset = (1 << (31 - irqd_to_hwirq(d))); - - spin_lock_irqsave(&opb->lock, flags); - - ier = opb_in(opb, OPB_MLSIER); - opb_out(opb, OPB_MLSIER, ier | bitset); - ier = opb_in(opb, OPB_MLSIER); - - spin_unlock_irqrestore(&opb->lock, flags); -} - -static void opb_mask_irq(struct irq_data *d) -{ - struct opb_pic *opb; - unsigned long flags; - u32 ier, mask; - - opb = d->chip_data; - mask = ~(1 << (31 - irqd_to_hwirq(d))); - - spin_lock_irqsave(&opb->lock, flags); - - ier = opb_in(opb, OPB_MLSIER); - opb_out(opb, OPB_MLSIER, ier & mask); - ier = opb_in(opb, OPB_MLSIER); // Flush posted writes - - spin_unlock_irqrestore(&opb->lock, flags); -} - -static void opb_ack_irq(struct irq_data *d) -{ - struct opb_pic *opb; - unsigned long flags; - u32 bitset; - - opb = d->chip_data; - bitset = (1 << (31 - irqd_to_hwirq(d))); - - spin_lock_irqsave(&opb->lock, flags); - - opb_out(opb, OPB_MLSIR, bitset); - opb_in(opb, OPB_MLSIR); // Flush posted writes - - spin_unlock_irqrestore(&opb->lock, flags); -} - -static void opb_mask_ack_irq(struct irq_data *d) -{ - struct opb_pic *opb; - unsigned long flags; - u32 bitset; - u32 ier, ir; - - opb = d->chip_data; - bitset = (1 << (31 - irqd_to_hwirq(d))); - - spin_lock_irqsave(&opb->lock, flags); - - ier = opb_in(opb, OPB_MLSIER); - opb_out(opb, OPB_MLSIER, ier & ~bitset); - ier = opb_in(opb, OPB_MLSIER); // Flush posted writes - - opb_out(opb, OPB_MLSIR, bitset); - ir = opb_in(opb, OPB_MLSIR); // Flush posted writes - - spin_unlock_irqrestore(&opb->lock, flags); -} - -static int opb_set_irq_type(struct irq_data *d, unsigned int flow) -{ - struct opb_pic *opb; - unsigned long flags; - int invert, ipr, mask, bit; - - opb = d->chip_data; - - /* The only information we're interested in in the type is whether it's - * a high or low trigger. For high triggered interrupts, the polarity - * set for it in the MLS Interrupt Polarity Register is 0, for low - * interrupts it's 1 so that the proper input in the MLS Interrupt Input - * Register is interrupted as asserting the interrupt. */ - - switch (flow) { - case IRQ_TYPE_NONE: - opb_mask_irq(d); - return 0; - - case IRQ_TYPE_LEVEL_HIGH: - invert = 0; - break; - - case IRQ_TYPE_LEVEL_LOW: - invert = 1; - break; - - default: - return -EINVAL; - } - - bit = (1 << (31 - irqd_to_hwirq(d))); - mask = ~bit; - - spin_lock_irqsave(&opb->lock, flags); - - ipr = opb_in(opb, OPB_MLSIPR); - ipr = (ipr & mask) | (invert ? bit : 0); - opb_out(opb, OPB_MLSIPR, ipr); - ipr = opb_in(opb, OPB_MLSIPR); // Flush posted writes - - spin_unlock_irqrestore(&opb->lock, flags); - - /* Record the type in the interrupt descriptor */ - irqd_set_trigger_type(d, flow); - - return 0; -} - -static struct irq_chip opb_irq_chip = { - .name = "OPB", - .irq_mask = opb_mask_irq, - .irq_unmask = opb_unmask_irq, - .irq_mask_ack = opb_mask_ack_irq, - .irq_ack = opb_ack_irq, - .irq_set_type = opb_set_irq_type -}; - -static int opb_host_map(struct irq_domain *host, unsigned int virq, - irq_hw_number_t hwirq) -{ - struct opb_pic *opb; - - opb = host->host_data; - - /* Most of the important stuff is handled by the generic host code, like - * the lookup, so just attach some info to the virtual irq */ - - irq_set_chip_data(virq, opb); - irq_set_chip_and_handler(virq, &opb_irq_chip, handle_level_irq); - irq_set_irq_type(virq, IRQ_TYPE_NONE); - - return 0; -} - -static const struct irq_domain_ops opb_host_ops = { - .map = opb_host_map, - .xlate = irq_domain_xlate_twocell, -}; - -irqreturn_t opb_irq_handler(int irq, void *private) -{ - struct opb_pic *opb; - u32 ir, src, subvirq; - - opb = (struct opb_pic *) private; - - /* Read the OPB MLS Interrupt Register for - * asserted interrupts */ - ir = opb_in(opb, OPB_MLSIR); - if (!ir) - return IRQ_NONE; - - do { - /* Get 1 - 32 source, *NOT* bit */ - src = 32 - ffs(ir); - - /* Translate from the OPB's conception of interrupt number to - * Linux's virtual IRQ */ - - subvirq = irq_linear_revmap(opb->host, src); - - generic_handle_irq(subvirq); - } while ((ir = opb_in(opb, OPB_MLSIR))); - - return IRQ_HANDLED; -} - -struct opb_pic *opb_pic_init_one(struct device_node *dn) -{ - struct opb_pic *opb; - struct resource res; - - if (of_address_to_resource(dn, 0, &res)) { - printk(KERN_ERR "opb: Couldn't translate resource\n"); - return NULL; - } - - opb = kzalloc(sizeof(struct opb_pic), GFP_KERNEL); - if (!opb) { - printk(KERN_ERR "opb: Failed to allocate opb struct!\n"); - return NULL; - } - - /* Get access to the OPB MMIO registers */ - opb->regs = ioremap(res.start + 0x10000, 0x1000); - if (!opb->regs) { - printk(KERN_ERR "opb: Failed to allocate register space!\n"); - goto free_opb; - } - - /* Allocate an irq domain so that Linux knows that despite only - * having one interrupt to issue, we're the controller for multiple - * hardware IRQs, so later we can lookup their virtual IRQs. */ - - opb->host = irq_domain_add_linear(dn, OPB_NR_IRQS, &opb_host_ops, opb); - if (!opb->host) { - printk(KERN_ERR "opb: Failed to allocate IRQ host!\n"); - goto free_regs; - } - - opb->index = opb_index++; - spin_lock_init(&opb->lock); - - /* Disable all interrupts by default */ - opb_out(opb, OPB_MLSASIER, 0); - opb_out(opb, OPB_MLSIER, 0); - - /* ACK any interrupts left by FW */ - opb_out(opb, OPB_MLSIR, 0xFFFFFFFF); - - return opb; - -free_regs: - iounmap(opb->regs); -free_opb: - kfree(opb); - return NULL; -} - -void __init opb_pic_init(void) -{ - struct device_node *dn; - struct opb_pic *opb; - int virq; - int rc; - - /* Call init_one for each OPB device */ - for_each_compatible_node(dn, NULL, "ibm,opb") { - - /* Fill in an OPB struct */ - opb = opb_pic_init_one(dn); - if (!opb) { - printk(KERN_WARNING "opb: Failed to init node, skipped!\n"); - continue; - } - - /* Map / get opb's hardware virtual irq */ - virq = irq_of_parse_and_map(dn, 0); - if (virq <= 0) { - printk("opb: irq_op_parse_and_map failed!\n"); - continue; - } - - /* Attach opb interrupt handler to new virtual IRQ */ - rc = request_irq(virq, opb_irq_handler, IRQF_NO_THREAD, - "OPB LS Cascade", opb); - if (rc) { - printk("opb: request_irq failed: %d\n", rc); - continue; - } - - printk("OPB%d init with %d IRQs at %p\n", opb->index, - OPB_NR_IRQS, opb->regs); - } -} diff --git a/arch/powerpc/platforms/wsp/psr2.c b/arch/powerpc/platforms/wsp/psr2.c deleted file mode 100644 index a87b414c766..00000000000 --- a/arch/powerpc/platforms/wsp/psr2.c +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright 2008-2011, IBM Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/delay.h> -#include <linux/init.h> -#include <linux/irq.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/of.h> -#include <linux/smp.h> -#include <linux/time.h> -#include <linux/of_fdt.h> - -#include <asm/machdep.h> -#include <asm/udbg.h> - -#include "ics.h" -#include "wsp.h" - - -static void psr2_spin(void) -{ - hard_irq_disable(); - for (;;) - continue; -} - -static void psr2_restart(char *cmd) -{ - psr2_spin(); -} - -static int __init psr2_probe(void) -{ - unsigned long root = of_get_flat_dt_root(); - - if (of_flat_dt_is_compatible(root, "ibm,wsp-chroma")) { - /* chroma systems also claim they are psr2s */ - return 0; - } - - if (!of_flat_dt_is_compatible(root, "ibm,psr2")) - return 0; - - return 1; -} - -define_machine(psr2_md) { - .name = "PSR2 A2", - .probe = psr2_probe, - .setup_arch = wsp_setup_arch, - .restart = psr2_restart, - .power_off = psr2_spin, - .halt = psr2_spin, - .calibrate_decr = generic_calibrate_decr, - .init_IRQ = wsp_setup_irq, - .progress = udbg_progress, - .power_save = book3e_idle, -}; - -machine_arch_initcall(psr2_md, wsp_probe_devices); diff --git a/arch/powerpc/platforms/wsp/scom_smp.c b/arch/powerpc/platforms/wsp/scom_smp.c deleted file mode 100644 index 8c79ce016cf..00000000000 --- a/arch/powerpc/platforms/wsp/scom_smp.c +++ /dev/null @@ -1,435 +0,0 @@ -/* - * SCOM support for A2 platforms - * - * Copyright 2007-2011 Benjamin Herrenschmidt, David Gibson, - * Michael Ellerman, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/cpumask.h> -#include <linux/io.h> -#include <linux/of.h> -#include <linux/spinlock.h> -#include <linux/types.h> - -#include <asm/cputhreads.h> -#include <asm/reg_a2.h> -#include <asm/scom.h> -#include <asm/udbg.h> -#include <asm/code-patching.h> - -#include "wsp.h" - -#define SCOM_RAMC 0x2a /* Ram Command */ -#define SCOM_RAMC_TGT1_EXT 0x80000000 -#define SCOM_RAMC_SRC1_EXT 0x40000000 -#define SCOM_RAMC_SRC2_EXT 0x20000000 -#define SCOM_RAMC_SRC3_EXT 0x10000000 -#define SCOM_RAMC_ENABLE 0x00080000 -#define SCOM_RAMC_THREADSEL 0x00060000 -#define SCOM_RAMC_EXECUTE 0x00010000 -#define SCOM_RAMC_MSR_OVERRIDE 0x00008000 -#define SCOM_RAMC_MSR_PR 0x00004000 -#define SCOM_RAMC_MSR_GS 0x00002000 -#define SCOM_RAMC_FORCE 0x00001000 -#define SCOM_RAMC_FLUSH 0x00000800 -#define SCOM_RAMC_INTERRUPT 0x00000004 -#define SCOM_RAMC_ERROR 0x00000002 -#define SCOM_RAMC_DONE 0x00000001 -#define SCOM_RAMI 0x29 /* Ram Instruction */ -#define SCOM_RAMIC 0x28 /* Ram Instruction and Command */ -#define SCOM_RAMIC_INSN 0xffffffff00000000 -#define SCOM_RAMD 0x2d /* Ram Data */ -#define SCOM_RAMDH 0x2e /* Ram Data High */ -#define SCOM_RAMDL 0x2f /* Ram Data Low */ -#define SCOM_PCCR0 0x33 /* PC Configuration Register 0 */ -#define SCOM_PCCR0_ENABLE_DEBUG 0x80000000 -#define SCOM_PCCR0_ENABLE_RAM 0x40000000 -#define SCOM_THRCTL 0x30 /* Thread Control and Status */ -#define SCOM_THRCTL_T0_STOP 0x80000000 -#define SCOM_THRCTL_T1_STOP 0x40000000 -#define SCOM_THRCTL_T2_STOP 0x20000000 -#define SCOM_THRCTL_T3_STOP 0x10000000 -#define SCOM_THRCTL_T0_STEP 0x08000000 -#define SCOM_THRCTL_T1_STEP 0x04000000 -#define SCOM_THRCTL_T2_STEP 0x02000000 -#define SCOM_THRCTL_T3_STEP 0x01000000 -#define SCOM_THRCTL_T0_RUN 0x00800000 -#define SCOM_THRCTL_T1_RUN 0x00400000 -#define SCOM_THRCTL_T2_RUN 0x00200000 -#define SCOM_THRCTL_T3_RUN 0x00100000 -#define SCOM_THRCTL_T0_PM 0x00080000 -#define SCOM_THRCTL_T1_PM 0x00040000 -#define SCOM_THRCTL_T2_PM 0x00020000 -#define SCOM_THRCTL_T3_PM 0x00010000 -#define SCOM_THRCTL_T0_UDE 0x00008000 -#define SCOM_THRCTL_T1_UDE 0x00004000 -#define SCOM_THRCTL_T2_UDE 0x00002000 -#define SCOM_THRCTL_T3_UDE 0x00001000 -#define SCOM_THRCTL_ASYNC_DIS 0x00000800 -#define SCOM_THRCTL_TB_DIS 0x00000400 -#define SCOM_THRCTL_DEC_DIS 0x00000200 -#define SCOM_THRCTL_AND 0x31 /* Thread Control and Status */ -#define SCOM_THRCTL_OR 0x32 /* Thread Control and Status */ - - -static DEFINE_PER_CPU(scom_map_t, scom_ptrs); - -static scom_map_t get_scom(int cpu, struct device_node *np, int *first_thread) -{ - scom_map_t scom = per_cpu(scom_ptrs, cpu); - int tcpu; - - if (scom_map_ok(scom)) { - *first_thread = 0; - return scom; - } - - *first_thread = 1; - - scom = scom_map_device(np, 0); - - for (tcpu = cpu_first_thread_sibling(cpu); - tcpu <= cpu_last_thread_sibling(cpu); tcpu++) - per_cpu(scom_ptrs, tcpu) = scom; - - /* Hack: for the boot core, this will actually get called on - * the second thread up, not the first so our test above will - * set first_thread incorrectly. */ - if (cpu_first_thread_sibling(cpu) == 0) - *first_thread = 0; - - return scom; -} - -static int a2_scom_ram(scom_map_t scom, int thread, u32 insn, int extmask) -{ - u64 cmd, mask, val; - int n = 0; - - cmd = ((u64)insn << 32) | (((u64)extmask & 0xf) << 28) - | ((u64)thread << 17) | SCOM_RAMC_ENABLE | SCOM_RAMC_EXECUTE; - mask = SCOM_RAMC_DONE | SCOM_RAMC_INTERRUPT | SCOM_RAMC_ERROR; - - scom_write(scom, SCOM_RAMIC, cmd); - - for (;;) { - if (scom_read(scom, SCOM_RAMC, &val) != 0) { - pr_err("SCOM error on instruction 0x%08x, thread %d\n", - insn, thread); - return -1; - } - if (val & mask) - break; - pr_devel("Waiting on RAMC = 0x%llx\n", val); - if (++n == 3) { - pr_err("RAMC timeout on instruction 0x%08x, thread %d\n", - insn, thread); - return -1; - } - } - - if (val & SCOM_RAMC_INTERRUPT) { - pr_err("RAMC interrupt on instruction 0x%08x, thread %d\n", - insn, thread); - return -SCOM_RAMC_INTERRUPT; - } - - if (val & SCOM_RAMC_ERROR) { - pr_err("RAMC error on instruction 0x%08x, thread %d\n", - insn, thread); - return -SCOM_RAMC_ERROR; - } - - return 0; -} - -static int a2_scom_getgpr(scom_map_t scom, int thread, int gpr, int alt, - u64 *out_gpr) -{ - int rc; - - /* or rN, rN, rN */ - u32 insn = 0x7c000378 | (gpr << 21) | (gpr << 16) | (gpr << 11); - rc = a2_scom_ram(scom, thread, insn, alt ? 0xf : 0x0); - if (rc) - return rc; - - return scom_read(scom, SCOM_RAMD, out_gpr); -} - -static int a2_scom_getspr(scom_map_t scom, int thread, int spr, u64 *out_spr) -{ - int rc, sprhi, sprlo; - u32 insn; - - sprhi = spr >> 5; - sprlo = spr & 0x1f; - insn = 0x7c2002a6 | (sprlo << 16) | (sprhi << 11); /* mfspr r1,spr */ - - if (spr == 0x0ff0) - insn = 0x7c2000a6; /* mfmsr r1 */ - - rc = a2_scom_ram(scom, thread, insn, 0xf); - if (rc) - return rc; - return a2_scom_getgpr(scom, thread, 1, 1, out_spr); -} - -static int a2_scom_setgpr(scom_map_t scom, int thread, int gpr, - int alt, u64 val) -{ - u32 lis = 0x3c000000 | (gpr << 21); - u32 li = 0x38000000 | (gpr << 21); - u32 oris = 0x64000000 | (gpr << 21) | (gpr << 16); - u32 ori = 0x60000000 | (gpr << 21) | (gpr << 16); - u32 rldicr32 = 0x780007c6 | (gpr << 21) | (gpr << 16); - u32 highest = val >> 48; - u32 higher = (val >> 32) & 0xffff; - u32 high = (val >> 16) & 0xffff; - u32 low = val & 0xffff; - int lext = alt ? 0x8 : 0x0; - int oext = alt ? 0xf : 0x0; - int rc = 0; - - if (highest) - rc |= a2_scom_ram(scom, thread, lis | highest, lext); - - if (higher) { - if (highest) - rc |= a2_scom_ram(scom, thread, oris | higher, oext); - else - rc |= a2_scom_ram(scom, thread, li | higher, lext); - } - - if (highest || higher) - rc |= a2_scom_ram(scom, thread, rldicr32, oext); - - if (high) { - if (highest || higher) - rc |= a2_scom_ram(scom, thread, oris | high, oext); - else - rc |= a2_scom_ram(scom, thread, lis | high, lext); - } - - if (highest || higher || high) - rc |= a2_scom_ram(scom, thread, ori | low, oext); - else - rc |= a2_scom_ram(scom, thread, li | low, lext); - - return rc; -} - -static int a2_scom_setspr(scom_map_t scom, int thread, int spr, u64 val) -{ - int sprhi = spr >> 5; - int sprlo = spr & 0x1f; - /* mtspr spr, r1 */ - u32 insn = 0x7c2003a6 | (sprlo << 16) | (sprhi << 11); - - if (spr == 0x0ff0) - insn = 0x7c200124; /* mtmsr r1 */ - - if (a2_scom_setgpr(scom, thread, 1, 1, val)) - return -1; - - return a2_scom_ram(scom, thread, insn, 0xf); -} - -static int a2_scom_initial_tlb(scom_map_t scom, int thread) -{ - extern u32 a2_tlbinit_code_start[], a2_tlbinit_code_end[]; - extern u32 a2_tlbinit_after_iprot_flush[]; - extern u32 a2_tlbinit_after_linear_map[]; - u32 assoc, entries, i; - u64 epn, tlbcfg; - u32 *p; - int rc; - - /* Invalidate all entries (including iprot) */ - - rc = a2_scom_getspr(scom, thread, SPRN_TLB0CFG, &tlbcfg); - if (rc) - goto scom_fail; - entries = tlbcfg & TLBnCFG_N_ENTRY; - assoc = (tlbcfg & TLBnCFG_ASSOC) >> 24; - epn = 0; - - /* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */ - a2_scom_setspr(scom, thread, SPRN_MMUCR2, 0x000a7531); - /* Set MMUCR3 to write all thids bit to the TLB */ - a2_scom_setspr(scom, thread, SPRN_MMUCR3, 0x0000000f); - - /* Set MAS1 for 1G page size, and MAS2 to our initial EPN */ - a2_scom_setspr(scom, thread, SPRN_MAS1, MAS1_TSIZE(BOOK3E_PAGESZ_1GB)); - a2_scom_setspr(scom, thread, SPRN_MAS2, epn); - for (i = 0; i < entries; i++) { - - a2_scom_setspr(scom, thread, SPRN_MAS0, MAS0_ESEL(i % assoc)); - - /* tlbwe */ - rc = a2_scom_ram(scom, thread, 0x7c0007a4, 0); - if (rc) - goto scom_fail; - - /* Next entry is new address? */ - if((i + 1) % assoc == 0) { - epn += (1 << 30); - a2_scom_setspr(scom, thread, SPRN_MAS2, epn); - } - } - - /* Setup args for linear mapping */ - rc = a2_scom_setgpr(scom, thread, 3, 0, MAS0_TLBSEL(0)); - if (rc) - goto scom_fail; - - /* Linear mapping */ - for (p = a2_tlbinit_code_start; p < a2_tlbinit_after_linear_map; p++) { - rc = a2_scom_ram(scom, thread, *p, 0); - if (rc) - goto scom_fail; - } - - /* - * For the boot thread, between the linear mapping and the debug - * mappings there is a loop to flush iprot mappings. Ramming doesn't do - * branches, but the secondary threads don't need to be nearly as smart - * (i.e. we don't need to worry about invalidating the mapping we're - * standing on). - */ - - /* Debug mappings. Expects r11 = MAS0 from linear map (set above) */ - for (p = a2_tlbinit_after_iprot_flush; p < a2_tlbinit_code_end; p++) { - rc = a2_scom_ram(scom, thread, *p, 0); - if (rc) - goto scom_fail; - } - -scom_fail: - if (rc) - pr_err("Setting up initial TLB failed, err %d\n", rc); - - if (rc == -SCOM_RAMC_INTERRUPT) { - /* Interrupt, dump some status */ - int rc[10]; - u64 iar, srr0, srr1, esr, mas0, mas1, mas2, mas7_3, mas8, ccr2; - rc[0] = a2_scom_getspr(scom, thread, SPRN_IAR, &iar); - rc[1] = a2_scom_getspr(scom, thread, SPRN_SRR0, &srr0); - rc[2] = a2_scom_getspr(scom, thread, SPRN_SRR1, &srr1); - rc[3] = a2_scom_getspr(scom, thread, SPRN_ESR, &esr); - rc[4] = a2_scom_getspr(scom, thread, SPRN_MAS0, &mas0); - rc[5] = a2_scom_getspr(scom, thread, SPRN_MAS1, &mas1); - rc[6] = a2_scom_getspr(scom, thread, SPRN_MAS2, &mas2); - rc[7] = a2_scom_getspr(scom, thread, SPRN_MAS7_MAS3, &mas7_3); - rc[8] = a2_scom_getspr(scom, thread, SPRN_MAS8, &mas8); - rc[9] = a2_scom_getspr(scom, thread, SPRN_A2_CCR2, &ccr2); - pr_err(" -> retreived IAR =0x%llx (err %d)\n", iar, rc[0]); - pr_err(" retreived SRR0=0x%llx (err %d)\n", srr0, rc[1]); - pr_err(" retreived SRR1=0x%llx (err %d)\n", srr1, rc[2]); - pr_err(" retreived ESR =0x%llx (err %d)\n", esr, rc[3]); - pr_err(" retreived MAS0=0x%llx (err %d)\n", mas0, rc[4]); - pr_err(" retreived MAS1=0x%llx (err %d)\n", mas1, rc[5]); - pr_err(" retreived MAS2=0x%llx (err %d)\n", mas2, rc[6]); - pr_err(" retreived MS73=0x%llx (err %d)\n", mas7_3, rc[7]); - pr_err(" retreived MAS8=0x%llx (err %d)\n", mas8, rc[8]); - pr_err(" retreived CCR2=0x%llx (err %d)\n", ccr2, rc[9]); - } - - return rc; -} - -int a2_scom_startup_cpu(unsigned int lcpu, int thr_idx, struct device_node *np) -{ - u64 init_iar, init_msr, init_ccr2; - unsigned long start_here; - int rc, core_setup; - scom_map_t scom; - u64 pccr0; - - scom = get_scom(lcpu, np, &core_setup); - if (!scom) { - printk(KERN_ERR "Couldn't map SCOM for CPU%d\n", lcpu); - return -1; - } - - pr_devel("Bringing up CPU%d using SCOM...\n", lcpu); - - if (scom_read(scom, SCOM_PCCR0, &pccr0) != 0) { - printk(KERN_ERR "XSCOM failure readng PCCR0 on CPU%d\n", lcpu); - return -1; - } - scom_write(scom, SCOM_PCCR0, pccr0 | SCOM_PCCR0_ENABLE_DEBUG | - SCOM_PCCR0_ENABLE_RAM); - - /* Stop the thead with THRCTL. If we are setting up the TLB we stop all - * threads. We also disable asynchronous interrupts while RAMing. - */ - if (core_setup) - scom_write(scom, SCOM_THRCTL_OR, - SCOM_THRCTL_T0_STOP | - SCOM_THRCTL_T1_STOP | - SCOM_THRCTL_T2_STOP | - SCOM_THRCTL_T3_STOP | - SCOM_THRCTL_ASYNC_DIS); - else - scom_write(scom, SCOM_THRCTL_OR, SCOM_THRCTL_T0_STOP >> thr_idx); - - /* Flush its pipeline just in case */ - scom_write(scom, SCOM_RAMC, ((u64)thr_idx << 17) | - SCOM_RAMC_FLUSH | SCOM_RAMC_ENABLE); - - a2_scom_getspr(scom, thr_idx, SPRN_IAR, &init_iar); - a2_scom_getspr(scom, thr_idx, 0x0ff0, &init_msr); - a2_scom_getspr(scom, thr_idx, SPRN_A2_CCR2, &init_ccr2); - - /* Set MSR to MSR_CM (0x0ff0 is magic value for MSR_CM) */ - rc = a2_scom_setspr(scom, thr_idx, 0x0ff0, MSR_CM); - if (rc) { - pr_err("Failed to set MSR ! err %d\n", rc); - return rc; - } - - /* RAM in an sync/isync for the sake of it */ - a2_scom_ram(scom, thr_idx, 0x7c0004ac, 0); - a2_scom_ram(scom, thr_idx, 0x4c00012c, 0); - - if (core_setup) { - pr_devel("CPU%d is first thread in core, initializing TLB...\n", - lcpu); - rc = a2_scom_initial_tlb(scom, thr_idx); - if (rc) - goto fail; - } - - start_here = ppc_function_entry(core_setup ? generic_secondary_smp_init - : generic_secondary_thread_init); - pr_devel("CPU%d entry point at 0x%lx...\n", lcpu, start_here); - - rc |= a2_scom_setspr(scom, thr_idx, SPRN_IAR, start_here); - rc |= a2_scom_setgpr(scom, thr_idx, 3, 0, - get_hard_smp_processor_id(lcpu)); - /* - * Tell book3e_secondary_core_init not to set up the TLB, we've - * already done that. - */ - rc |= a2_scom_setgpr(scom, thr_idx, 4, 0, 1); - - rc |= a2_scom_setspr(scom, thr_idx, SPRN_TENS, 0x1 << thr_idx); - - scom_write(scom, SCOM_RAMC, 0); - scom_write(scom, SCOM_THRCTL_AND, ~(SCOM_THRCTL_T0_STOP >> thr_idx)); - scom_write(scom, SCOM_PCCR0, pccr0); -fail: - pr_devel(" SCOM initialization %s\n", rc ? "failed" : "succeeded"); - if (rc) { - pr_err("Old IAR=0x%08llx MSR=0x%08llx CCR2=0x%08llx\n", - init_iar, init_msr, init_ccr2); - } - - return rc; -} diff --git a/arch/powerpc/platforms/wsp/scom_wsp.c b/arch/powerpc/platforms/wsp/scom_wsp.c deleted file mode 100644 index 6538b4de34f..00000000000 --- a/arch/powerpc/platforms/wsp/scom_wsp.c +++ /dev/null @@ -1,82 +0,0 @@ -/* - * SCOM backend for WSP - * - * Copyright 2010 Benjamin Herrenschmidt, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/cpumask.h> -#include <linux/io.h> -#include <linux/of.h> -#include <linux/spinlock.h> -#include <linux/types.h> -#include <linux/of_address.h> - -#include <asm/cputhreads.h> -#include <asm/reg_a2.h> -#include <asm/scom.h> -#include <asm/udbg.h> - -#include "wsp.h" - - -static scom_map_t wsp_scom_map(struct device_node *dev, u64 reg, u64 count) -{ - struct resource r; - u64 xscom_addr; - - if (!of_get_property(dev, "scom-controller", NULL)) { - pr_err("%s: device %s is not a SCOM controller\n", - __func__, dev->full_name); - return SCOM_MAP_INVALID; - } - - if (of_address_to_resource(dev, 0, &r)) { - pr_debug("Failed to find SCOM controller address\n"); - return 0; - } - - /* Transform the SCOM address into an XSCOM offset */ - xscom_addr = ((reg & 0x7f000000) >> 1) | ((reg & 0xfffff) << 3); - - return (scom_map_t)ioremap(r.start + xscom_addr, count << 3); -} - -static void wsp_scom_unmap(scom_map_t map) -{ - iounmap((void *)map); -} - -static int wsp_scom_read(scom_map_t map, u64 reg, u64 *value) -{ - u64 __iomem *addr = (u64 __iomem *)map; - - *value = in_be64(addr + reg); - - return 0; -} - -static int wsp_scom_write(scom_map_t map, u64 reg, u64 value) -{ - u64 __iomem *addr = (u64 __iomem *)map; - - out_be64(addr + reg, value); - - return 0; -} - -static const struct scom_controller wsp_scom_controller = { - .map = wsp_scom_map, - .unmap = wsp_scom_unmap, - .read = wsp_scom_read, - .write = wsp_scom_write -}; - -void scom_init_wsp(void) -{ - scom_init(&wsp_scom_controller); -} diff --git a/arch/powerpc/platforms/wsp/setup.c b/arch/powerpc/platforms/wsp/setup.c deleted file mode 100644 index 11ac2f05e01..00000000000 --- a/arch/powerpc/platforms/wsp/setup.c +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright 2010 Michael Ellerman, IBM Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/of_platform.h> - -#include "wsp.h" - -/* - * Find chip-id by walking up device tree looking for ibm,wsp-chip-id property. - * Won't work for nodes that are not a descendant of a wsp node. - */ -int wsp_get_chip_id(struct device_node *dn) -{ - const u32 *p; - int rc; - - /* Start looking at the specified node, not its parent */ - dn = of_node_get(dn); - while (dn && !(p = of_get_property(dn, "ibm,wsp-chip-id", NULL))) - dn = of_get_next_parent(dn); - - if (!dn) - return -1; - - rc = *p; - of_node_put(dn); - - return rc; -} diff --git a/arch/powerpc/platforms/wsp/smp.c b/arch/powerpc/platforms/wsp/smp.c deleted file mode 100644 index 332a18b8140..00000000000 --- a/arch/powerpc/platforms/wsp/smp.c +++ /dev/null @@ -1,88 +0,0 @@ -/* - * SMP Support for A2 platforms - * - * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include <linux/cpumask.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/of.h> -#include <linux/smp.h> - -#include <asm/dbell.h> -#include <asm/machdep.h> -#include <asm/xics.h> - -#include "ics.h" -#include "wsp.h" - -static void smp_a2_setup_cpu(int cpu) -{ - doorbell_setup_this_cpu(); - - if (cpu != boot_cpuid) - xics_setup_cpu(); -} - -int smp_a2_kick_cpu(int nr) -{ - const char *enable_method; - struct device_node *np; - int thr_idx; - - if (nr < 0 || nr >= NR_CPUS) - return -ENOENT; - - np = of_get_cpu_node(nr, &thr_idx); - if (!np) - return -ENODEV; - - enable_method = of_get_property(np, "enable-method", NULL); - pr_devel("CPU%d has enable-method: \"%s\"\n", nr, enable_method); - - if (!enable_method) { - printk(KERN_ERR "CPU%d has no enable-method\n", nr); - return -ENOENT; - } else if (strcmp(enable_method, "ibm,a2-scom") == 0) { - if (a2_scom_startup_cpu(nr, thr_idx, np)) - return -1; - } else { - printk(KERN_ERR "CPU%d: Don't understand enable-method \"%s\"\n", - nr, enable_method); - return -EINVAL; - } - - /* - * The processor is currently spinning, waiting for the - * cpu_start field to become non-zero After we set cpu_start, - * the processor will continue on to secondary_start - */ - paca[nr].cpu_start = 1; - - return 0; -} - -static int __init smp_a2_probe(void) -{ - return num_possible_cpus(); -} - -static struct smp_ops_t a2_smp_ops = { - .message_pass = NULL, /* Use smp_muxed_ipi_message_pass */ - .cause_ipi = doorbell_cause_ipi, - .probe = smp_a2_probe, - .kick_cpu = smp_a2_kick_cpu, - .setup_cpu = smp_a2_setup_cpu, -}; - -void __init a2_setup_smp(void) -{ - smp_ops = &a2_smp_ops; -} diff --git a/arch/powerpc/platforms/wsp/wsp.c b/arch/powerpc/platforms/wsp/wsp.c deleted file mode 100644 index 58cd1f00e1e..00000000000 --- a/arch/powerpc/platforms/wsp/wsp.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright 2008-2011, IBM Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/of.h> -#include <linux/of_device.h> -#include <linux/smp.h> -#include <linux/delay.h> -#include <linux/time.h> -#include <linux/of_address.h> - -#include <asm/scom.h> - -#include "wsp.h" -#include "ics.h" - -#define WSP_SOC_COMPATIBLE "ibm,wsp-soc" -#define PBIC_COMPATIBLE "ibm,wsp-pbic" -#define COPRO_COMPATIBLE "ibm,wsp-coprocessor" - -static int __init wsp_probe_buses(void) -{ - static __initdata struct of_device_id bus_ids[] = { - /* - * every node in between needs to be here or you won't - * find it - */ - { .compatible = WSP_SOC_COMPATIBLE, }, - { .compatible = PBIC_COMPATIBLE, }, - { .compatible = COPRO_COMPATIBLE, }, - {}, - }; - of_platform_bus_probe(NULL, bus_ids, NULL); - - return 0; -} - -void __init wsp_setup_arch(void) -{ - /* init to some ~sane value until calibrate_delay() runs */ - loops_per_jiffy = 50000000; - - scom_init_wsp(); - - /* Setup SMP callback */ -#ifdef CONFIG_SMP - a2_setup_smp(); -#endif -#ifdef CONFIG_PCI - wsp_setup_pci(); -#endif -} - -void __init wsp_setup_irq(void) -{ - wsp_init_irq(); - opb_pic_init(); -} - - -int __init wsp_probe_devices(void) -{ - struct device_node *np; - - /* Our RTC is a ds1500. It seems to be programatically compatible - * with the ds1511 for which we have a driver so let's use that - */ - np = of_find_compatible_node(NULL, NULL, "dallas,ds1500"); - if (np != NULL) { - struct resource res; - if (of_address_to_resource(np, 0, &res) == 0) - platform_device_register_simple("ds1511", 0, &res, 1); - } - - wsp_probe_buses(); - - return 0; -} - -void wsp_halt(void) -{ - u64 val; - scom_map_t m; - struct device_node *dn; - struct device_node *mine; - struct device_node *me; - int rc; - - me = of_get_cpu_node(smp_processor_id(), NULL); - mine = scom_find_parent(me); - - /* This will halt all the A2s but not power off the chip */ - for_each_node_with_property(dn, "scom-controller") { - if (dn == mine) - continue; - m = scom_map(dn, 0, 1); - - /* read-modify-write it so the HW probe does not get - * confused */ - rc = scom_read(m, 0, &val); - if (rc == 0) - scom_write(m, 0, val | 1); - scom_unmap(m); - } - m = scom_map(mine, 0, 1); - rc = scom_read(m, 0, &val); - if (rc == 0) - scom_write(m, 0, val | 1); - /* should never return */ - scom_unmap(m); -} diff --git a/arch/powerpc/platforms/wsp/wsp.h b/arch/powerpc/platforms/wsp/wsp.h deleted file mode 100644 index a563a8aaf81..00000000000 --- a/arch/powerpc/platforms/wsp/wsp.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef __WSP_H -#define __WSP_H - -#include <asm/wsp.h> - -/* Devtree compatible strings for major devices */ -#define PCIE_COMPATIBLE "ibm,wsp-pciex" - -extern void wsp_setup_arch(void); -extern void wsp_setup_irq(void); -extern int wsp_probe_devices(void); -extern void wsp_halt(void); - -extern void wsp_setup_pci(void); -extern void scom_init_wsp(void); - -extern void a2_setup_smp(void); -extern int a2_scom_startup_cpu(unsigned int lcpu, int thr_idx, - struct device_node *np); -extern int smp_a2_kick_cpu(int nr); - -extern void opb_pic_init(void); - -/* chroma specific managment */ -extern void wsp_h8_restart(char *cmd); -extern void wsp_h8_power_off(void); -extern void __init wsp_setup_h8(void); - -#endif /* __WSP_H */ diff --git a/arch/powerpc/platforms/wsp/wsp_pci.c b/arch/powerpc/platforms/wsp/wsp_pci.c deleted file mode 100644 index 9a15e5b39bb..00000000000 --- a/arch/powerpc/platforms/wsp/wsp_pci.c +++ /dev/null @@ -1,1134 +0,0 @@ -/* - * Copyright 2010 Ben Herrenschmidt, IBM Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#define DEBUG - -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/delay.h> -#include <linux/string.h> -#include <linux/init.h> -#include <linux/bootmem.h> -#include <linux/irq.h> -#include <linux/interrupt.h> -#include <linux/debugfs.h> - -#include <asm/sections.h> -#include <asm/io.h> -#include <asm/prom.h> -#include <asm/pci-bridge.h> -#include <asm/machdep.h> -#include <asm/ppc-pci.h> -#include <asm/iommu.h> -#include <asm/io-workarounds.h> -#include <asm/debug.h> - -#include "wsp.h" -#include "wsp_pci.h" -#include "msi.h" - - -/* Max number of TVTs for one table. Only 32-bit tables can use - * multiple TVTs and so the max currently supported is thus 8 - * since only 2G of DMA space is supported - */ -#define MAX_TABLE_TVT_COUNT 8 - -struct wsp_dma_table { - struct list_head link; - struct iommu_table table; - struct wsp_phb *phb; - struct page *tces[MAX_TABLE_TVT_COUNT]; -}; - -/* We support DMA regions from 0...2G in 32bit space (no support for - * 64-bit DMA just yet). Each device gets a separate TCE table (TVT - * entry) with validation enabled (though not supported by SimiCS - * just yet). - * - * To simplify things, we divide this 2G space into N regions based - * on the constant below which could be turned into a tunable eventually - * - * We then assign dynamically those regions to devices as they show up. - * - * We use a bitmap as an allocator for these. - * - * Tables are allocated/created dynamically as devices are discovered, - * multiple TVT entries are used if needed - * - * When 64-bit DMA support is added we should simply use a separate set - * of larger regions (the HW supports 64 TVT entries). We can - * additionally create a bypass region in 64-bit space for performances - * though that would have a cost in term of security. - * - * If you set NUM_DMA32_REGIONS to 1, then a single table is shared - * for all devices and bus/dev/fn validation is disabled - * - * Note that a DMA32 region cannot be smaller than 256M so the max - * supported here for now is 8. We don't yet support sharing regions - * between multiple devices so the max number of devices supported - * is MAX_TABLE_TVT_COUNT. - */ -#define NUM_DMA32_REGIONS 1 - -struct wsp_phb { - struct pci_controller *hose; - - /* Lock controlling access to the list of dma tables. - * It does -not- protect against dma_* operations on - * those tables, those should be stopped before an entry - * is removed from the list. - * - * The lock is also used for error handling operations - */ - spinlock_t lock; - struct list_head dma_tables; - unsigned long dma32_map; - unsigned long dma32_base; - unsigned int dma32_num_regions; - unsigned long dma32_region_size; - - /* Debugfs stuff */ - struct dentry *ddir; - - struct list_head all; -}; -static LIST_HEAD(wsp_phbs); - -//#define cfg_debug(fmt...) pr_debug(fmt) -#define cfg_debug(fmt...) - - -static int wsp_pcie_read_config(struct pci_bus *bus, unsigned int devfn, - int offset, int len, u32 *val) -{ - struct pci_controller *hose; - int suboff; - u64 addr; - - hose = pci_bus_to_host(bus); - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; - if (offset >= 0x1000) - return PCIBIOS_BAD_REGISTER_NUMBER; - addr = PCIE_REG_CA_ENABLE | - ((u64)bus->number) << PCIE_REG_CA_BUS_SHIFT | - ((u64)devfn) << PCIE_REG_CA_FUNC_SHIFT | - ((u64)offset & ~3) << PCIE_REG_CA_REG_SHIFT; - suboff = offset & 3; - - /* - * Note: the caller has already checked that offset is - * suitably aligned and that len is 1, 2 or 4. - */ - - switch (len) { - case 1: - addr |= (0x8ul >> suboff) << PCIE_REG_CA_BE_SHIFT; - out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr); - *val = (in_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA) - >> (suboff << 3)) & 0xff; - cfg_debug("read 1 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%02x\n", - bus->number, devfn >> 3, devfn & 7, - offset, suboff, addr, *val); - break; - case 2: - addr |= (0xcul >> suboff) << PCIE_REG_CA_BE_SHIFT; - out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr); - *val = (in_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA) - >> (suboff << 3)) & 0xffff; - cfg_debug("read 2 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%04x\n", - bus->number, devfn >> 3, devfn & 7, - offset, suboff, addr, *val); - break; - default: - addr |= 0xful << PCIE_REG_CA_BE_SHIFT; - out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr); - *val = in_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA); - cfg_debug("read 4 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%08x\n", - bus->number, devfn >> 3, devfn & 7, - offset, suboff, addr, *val); - break; - } - return PCIBIOS_SUCCESSFUL; -} - -static int wsp_pcie_write_config(struct pci_bus *bus, unsigned int devfn, - int offset, int len, u32 val) -{ - struct pci_controller *hose; - int suboff; - u64 addr; - - hose = pci_bus_to_host(bus); - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; - if (offset >= 0x1000) - return PCIBIOS_BAD_REGISTER_NUMBER; - addr = PCIE_REG_CA_ENABLE | - ((u64)bus->number) << PCIE_REG_CA_BUS_SHIFT | - ((u64)devfn) << PCIE_REG_CA_FUNC_SHIFT | - ((u64)offset & ~3) << PCIE_REG_CA_REG_SHIFT; - suboff = offset & 3; - - /* - * Note: the caller has already checked that offset is - * suitably aligned and that len is 1, 2 or 4. - */ - switch (len) { - case 1: - addr |= (0x8ul >> suboff) << PCIE_REG_CA_BE_SHIFT; - val <<= suboff << 3; - out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr); - out_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA, val); - cfg_debug("write 1 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%02x\n", - bus->number, devfn >> 3, devfn & 7, - offset, suboff, addr, val); - break; - case 2: - addr |= (0xcul >> suboff) << PCIE_REG_CA_BE_SHIFT; - val <<= suboff << 3; - out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr); - out_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA, val); - cfg_debug("write 2 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%04x\n", - bus->number, devfn >> 3, devfn & 7, - offset, suboff, addr, val); - break; - default: - addr |= 0xful << PCIE_REG_CA_BE_SHIFT; - out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr); - out_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA, val); - cfg_debug("write 4 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%08x\n", - bus->number, devfn >> 3, devfn & 7, - offset, suboff, addr, val); - break; - } - return PCIBIOS_SUCCESSFUL; -} - -static struct pci_ops wsp_pcie_pci_ops = -{ - .read = wsp_pcie_read_config, - .write = wsp_pcie_write_config, -}; - -#define TCE_SHIFT 12 -#define TCE_PAGE_SIZE (1 << TCE_SHIFT) -#define TCE_PCI_WRITE 0x2 /* write from PCI allowed */ -#define TCE_PCI_READ 0x1 /* read from PCI allowed */ -#define TCE_RPN_MASK 0x3fffffffffful /* 42-bit RPN (4K pages) */ -#define TCE_RPN_SHIFT 12 - -//#define dma_debug(fmt...) pr_debug(fmt) -#define dma_debug(fmt...) - -static int tce_build_wsp(struct iommu_table *tbl, long index, long npages, - unsigned long uaddr, enum dma_data_direction direction, - struct dma_attrs *attrs) -{ - struct wsp_dma_table *ptbl = container_of(tbl, - struct wsp_dma_table, - table); - u64 proto_tce; - u64 *tcep; - u64 rpn; - - proto_tce = TCE_PCI_READ; -#ifdef CONFIG_WSP_DD1_WORKAROUND_DD1_TCE_BUGS - proto_tce |= TCE_PCI_WRITE; -#else - if (direction != DMA_TO_DEVICE) - proto_tce |= TCE_PCI_WRITE; -#endif - - /* XXX Make this faster by factoring out the page address for - * within a TCE table - */ - while (npages--) { - /* We don't use it->base as the table can be scattered */ - tcep = (u64 *)page_address(ptbl->tces[index >> 16]); - tcep += (index & 0xffff); - - /* can't move this out since we might cross LMB boundary */ - rpn = __pa(uaddr) >> TCE_SHIFT; - *tcep = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; - - dma_debug("[DMA] TCE %p set to 0x%016llx (dma addr: 0x%lx)\n", - tcep, *tcep, (tbl->it_offset + index) << IOMMU_PAGE_SHIFT_4K); - - uaddr += TCE_PAGE_SIZE; - index++; - } - return 0; -} - -static void tce_free_wsp(struct iommu_table *tbl, long index, long npages) -{ - struct wsp_dma_table *ptbl = container_of(tbl, - struct wsp_dma_table, - table); -#ifndef CONFIG_WSP_DD1_WORKAROUND_DD1_TCE_BUGS - struct pci_controller *hose = ptbl->phb->hose; -#endif - u64 *tcep; - - /* XXX Make this faster by factoring out the page address for - * within a TCE table. Also use line-kill option to kill multiple - * TCEs at once - */ - while (npages--) { - /* We don't use it->base as the table can be scattered */ - tcep = (u64 *)page_address(ptbl->tces[index >> 16]); - tcep += (index & 0xffff); - dma_debug("[DMA] TCE %p cleared\n", tcep); - *tcep = 0; -#ifndef CONFIG_WSP_DD1_WORKAROUND_DD1_TCE_BUGS - /* Don't write there since it would pollute other MMIO accesses */ - out_be64(hose->cfg_data + PCIE_REG_TCE_KILL, - PCIE_REG_TCEKILL_SINGLE | PCIE_REG_TCEKILL_PS_4K | - (__pa(tcep) & PCIE_REG_TCEKILL_ADDR_MASK)); -#endif - index++; - } -} - -static struct wsp_dma_table *wsp_pci_create_dma32_table(struct wsp_phb *phb, - unsigned int region, - struct pci_dev *validate) -{ - struct pci_controller *hose = phb->hose; - unsigned long size = phb->dma32_region_size; - unsigned long addr = phb->dma32_region_size * region + phb->dma32_base; - struct wsp_dma_table *tbl; - int tvts_per_table, i, tvt, nid; - unsigned long flags; - - nid = of_node_to_nid(phb->hose->dn); - - /* Calculate how many TVTs are needed */ - tvts_per_table = size / 0x10000000; - if (tvts_per_table == 0) - tvts_per_table = 1; - - /* Calculate the base TVT index. We know all tables have the same - * size so we just do a simple multiply here - */ - tvt = region * tvts_per_table; - - pr_debug(" Region : %d\n", region); - pr_debug(" DMA range : 0x%08lx..0x%08lx\n", addr, addr + size - 1); - pr_debug(" Number of TVTs : %d\n", tvts_per_table); - pr_debug(" Base TVT : %d\n", tvt); - pr_debug(" Node : %d\n", nid); - - tbl = kzalloc_node(sizeof(struct wsp_dma_table), GFP_KERNEL, nid); - if (!tbl) - return ERR_PTR(-ENOMEM); - tbl->phb = phb; - - /* Create as many TVTs as needed, each represents 256M at most */ - for (i = 0; i < tvts_per_table; i++) { - u64 tvt_data1, tvt_data0; - - /* Allocate table. We use a 4K TCE size for now always so - * one table is always 8 * (258M / 4K) == 512K - */ - tbl->tces[i] = alloc_pages_node(nid, GFP_KERNEL, get_order(0x80000)); - if (tbl->tces[i] == NULL) - goto fail; - memset(page_address(tbl->tces[i]), 0, 0x80000); - - pr_debug(" TCE table %d at : %p\n", i, page_address(tbl->tces[i])); - - /* Table size. We currently set it to be the whole 256M region */ - tvt_data0 = 2ull << IODA_TVT0_TCE_TABLE_SIZE_SHIFT; - /* IO page size set to 4K */ - tvt_data1 = 1ull << IODA_TVT1_IO_PAGE_SIZE_SHIFT; - /* Shift in the address */ - tvt_data0 |= __pa(page_address(tbl->tces[i])) << IODA_TVT0_TTA_SHIFT; - - /* Validation stuff. We only validate fully bus/dev/fn for now - * one day maybe we can group devices but that isn't the case - * at the moment - */ - if (validate) { - tvt_data0 |= IODA_TVT0_BUSNUM_VALID_MASK; - tvt_data0 |= validate->bus->number; - tvt_data1 |= IODA_TVT1_DEVNUM_VALID; - tvt_data1 |= ((u64)PCI_SLOT(validate->devfn)) - << IODA_TVT1_DEVNUM_VALUE_SHIFT; - tvt_data1 |= IODA_TVT1_FUNCNUM_VALID; - tvt_data1 |= ((u64)PCI_FUNC(validate->devfn)) - << IODA_TVT1_FUNCNUM_VALUE_SHIFT; - } - - /* XX PE number is always 0 for now */ - - /* Program the values using the PHB lock */ - spin_lock_irqsave(&phb->lock, flags); - out_be64(hose->cfg_data + PCIE_REG_IODA_ADDR, - (tvt + i) | PCIE_REG_IODA_AD_TBL_TVT); - out_be64(hose->cfg_data + PCIE_REG_IODA_DATA1, tvt_data1); - out_be64(hose->cfg_data + PCIE_REG_IODA_DATA0, tvt_data0); - spin_unlock_irqrestore(&phb->lock, flags); - } - - /* Init bits and pieces */ - tbl->table.it_blocksize = 16; - tbl->table.it_page_shift = IOMMU_PAGE_SHIFT_4K; - tbl->table.it_offset = addr >> tbl->table.it_page_shift; - tbl->table.it_size = size >> tbl->table.it_page_shift; - - /* - * It's already blank but we clear it anyway. - * Consider an aditiona interface that makes cleaing optional - */ - iommu_init_table(&tbl->table, nid); - - list_add(&tbl->link, &phb->dma_tables); - return tbl; - - fail: - pr_debug(" Failed to allocate a 256M TCE table !\n"); - for (i = 0; i < tvts_per_table; i++) - if (tbl->tces[i]) - __free_pages(tbl->tces[i], get_order(0x80000)); - kfree(tbl); - return ERR_PTR(-ENOMEM); -} - -static void wsp_pci_dma_dev_setup(struct pci_dev *pdev) -{ - struct dev_archdata *archdata = &pdev->dev.archdata; - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct wsp_phb *phb = hose->private_data; - struct wsp_dma_table *table = NULL; - unsigned long flags; - int i; - - /* Don't assign an iommu table to a bridge */ - if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE) - return; - - pr_debug("%s: Setting up DMA...\n", pci_name(pdev)); - - spin_lock_irqsave(&phb->lock, flags); - - /* If only one region, check if it already exist */ - if (phb->dma32_num_regions == 1) { - spin_unlock_irqrestore(&phb->lock, flags); - if (list_empty(&phb->dma_tables)) - table = wsp_pci_create_dma32_table(phb, 0, NULL); - else - table = list_first_entry(&phb->dma_tables, - struct wsp_dma_table, - link); - } else { - /* else find a free region */ - for (i = 0; i < phb->dma32_num_regions && !table; i++) { - if (__test_and_set_bit(i, &phb->dma32_map)) - continue; - spin_unlock_irqrestore(&phb->lock, flags); - table = wsp_pci_create_dma32_table(phb, i, pdev); - } - } - - /* Check if we got an error */ - if (IS_ERR(table)) { - pr_err("%s: Failed to create DMA table, err %ld !\n", - pci_name(pdev), PTR_ERR(table)); - return; - } - - /* Or a valid table */ - if (table) { - pr_info("%s: Setup iommu: 32-bit DMA region 0x%08lx..0x%08lx\n", - pci_name(pdev), - table->table.it_offset << IOMMU_PAGE_SHIFT_4K, - (table->table.it_offset << IOMMU_PAGE_SHIFT_4K) - + phb->dma32_region_size - 1); - archdata->dma_data.iommu_table_base = &table->table; - return; - } - - /* Or no room */ - spin_unlock_irqrestore(&phb->lock, flags); - pr_err("%s: Out of DMA space !\n", pci_name(pdev)); -} - -static void __init wsp_pcie_configure_hw(struct pci_controller *hose) -{ - u64 val; - int i; - -#define DUMP_REG(x) \ - pr_debug("%-30s : 0x%016llx\n", #x, in_be64(hose->cfg_data + x)) - - /* - * Some WSP variants has a bogus class code by default in the PCI-E - * root complex's built-in P2P bridge - */ - val = in_be64(hose->cfg_data + PCIE_REG_SYS_CFG1); - pr_debug("PCI-E SYS_CFG1 : 0x%llx\n", val); - out_be64(hose->cfg_data + PCIE_REG_SYS_CFG1, - (val & ~PCIE_REG_SYS_CFG1_CLASS_CODE) | (PCI_CLASS_BRIDGE_PCI << 8)); - pr_debug("PCI-E SYS_CFG1 : 0x%llx\n", in_be64(hose->cfg_data + PCIE_REG_SYS_CFG1)); - -#ifdef CONFIG_WSP_DD1_WORKAROUND_DD1_TCE_BUGS - /* XXX Disable TCE caching, it doesn't work on DD1 */ - out_be64(hose->cfg_data + 0xe50, - in_be64(hose->cfg_data + 0xe50) | (3ull << 62)); - printk("PCI-E DEBUG CONTROL 5 = 0x%llx\n", in_be64(hose->cfg_data + 0xe50)); -#endif - - /* Configure M32A and IO. IO is hard wired to be 1M for now */ - out_be64(hose->cfg_data + PCIE_REG_IO_BASE_ADDR, hose->io_base_phys); - out_be64(hose->cfg_data + PCIE_REG_IO_BASE_MASK, - (~(hose->io_resource.end - hose->io_resource.start)) & - 0x3fffffff000ul); - out_be64(hose->cfg_data + PCIE_REG_IO_START_ADDR, 0 | 1); - - out_be64(hose->cfg_data + PCIE_REG_M32A_BASE_ADDR, - hose->mem_resources[0].start); - printk("Want to write to M32A_BASE_MASK : 0x%llx\n", - (~(hose->mem_resources[0].end - - hose->mem_resources[0].start)) & 0x3ffffff0000ul); - out_be64(hose->cfg_data + PCIE_REG_M32A_BASE_MASK, - (~(hose->mem_resources[0].end - - hose->mem_resources[0].start)) & 0x3ffffff0000ul); - out_be64(hose->cfg_data + PCIE_REG_M32A_START_ADDR, - (hose->mem_resources[0].start - hose->mem_offset[0]) | 1); - - /* Clear all TVT entries - * - * XX Might get TVT count from device-tree - */ - for (i = 0; i < IODA_TVT_COUNT; i++) { - out_be64(hose->cfg_data + PCIE_REG_IODA_ADDR, - PCIE_REG_IODA_AD_TBL_TVT | i); - out_be64(hose->cfg_data + PCIE_REG_IODA_DATA1, 0); - out_be64(hose->cfg_data + PCIE_REG_IODA_DATA0, 0); - } - - /* Kill the TCE cache */ - out_be64(hose->cfg_data + PCIE_REG_PHB_CONFIG, - in_be64(hose->cfg_data + PCIE_REG_PHB_CONFIG) | - PCIE_REG_PHBC_64B_TCE_EN); - - /* Enable 32 & 64-bit MSIs, IO space and M32A */ - val = PCIE_REG_PHBC_32BIT_MSI_EN | - PCIE_REG_PHBC_IO_EN | - PCIE_REG_PHBC_64BIT_MSI_EN | - PCIE_REG_PHBC_M32A_EN; - if (iommu_is_off) - val |= PCIE_REG_PHBC_DMA_XLATE_BYPASS; - pr_debug("Will write config: 0x%llx\n", val); - out_be64(hose->cfg_data + PCIE_REG_PHB_CONFIG, val); - - /* Enable error reporting */ - out_be64(hose->cfg_data + 0xe00, - in_be64(hose->cfg_data + 0xe00) | 0x0008000000000000ull); - - /* Mask an error that's generated when doing config space probe - * - * XXX Maybe we should only mask it around config space cycles... that or - * ignore it when we know we had a config space cycle recently ? - */ - out_be64(hose->cfg_data + PCIE_REG_DMA_ERR_STATUS_MASK, 0x8000000000000000ull); - out_be64(hose->cfg_data + PCIE_REG_DMA_ERR1_STATUS_MASK, 0x8000000000000000ull); - - /* Enable UTL errors, for now, all of them got to UTL irq 1 - * - * We similarily mask one UTL error caused apparently during normal - * probing. We also mask the link up error - */ - out_be64(hose->cfg_data + PCIE_UTL_SYS_BUS_AGENT_ERR_SEV, 0); - out_be64(hose->cfg_data + PCIE_UTL_RC_ERR_SEVERITY, 0); - out_be64(hose->cfg_data + PCIE_UTL_PCIE_PORT_ERROR_SEV, 0); - out_be64(hose->cfg_data + PCIE_UTL_SYS_BUS_AGENT_IRQ_EN, 0xffffffff00000000ull); - out_be64(hose->cfg_data + PCIE_UTL_PCIE_PORT_IRQ_EN, 0xff5fffff00000000ull); - out_be64(hose->cfg_data + PCIE_UTL_EP_ERR_IRQ_EN, 0xffffffff00000000ull); - - DUMP_REG(PCIE_REG_IO_BASE_ADDR); - DUMP_REG(PCIE_REG_IO_BASE_MASK); - DUMP_REG(PCIE_REG_IO_START_ADDR); - DUMP_REG(PCIE_REG_M32A_BASE_ADDR); - DUMP_REG(PCIE_REG_M32A_BASE_MASK); - DUMP_REG(PCIE_REG_M32A_START_ADDR); - DUMP_REG(PCIE_REG_M32B_BASE_ADDR); - DUMP_REG(PCIE_REG_M32B_BASE_MASK); - DUMP_REG(PCIE_REG_M32B_START_ADDR); - DUMP_REG(PCIE_REG_M64_BASE_ADDR); - DUMP_REG(PCIE_REG_M64_BASE_MASK); - DUMP_REG(PCIE_REG_M64_START_ADDR); - DUMP_REG(PCIE_REG_PHB_CONFIG); -} - -static void wsp_pci_wait_io_idle(struct wsp_phb *phb, unsigned long port) -{ - u64 val; - int i; - - for (i = 0; i < 10000; i++) { - val = in_be64(phb->hose->cfg_data + 0xe08); - if ((val & 0x1900000000000000ull) == 0x0100000000000000ull) - return; - udelay(1); - } - pr_warning("PCI IO timeout on domain %d port 0x%lx\n", - phb->hose->global_number, port); -} - -#define DEF_PCI_AC_RET_pio(name, ret, at, al, aa) \ -static ret wsp_pci_##name at \ -{ \ - struct iowa_bus *bus; \ - struct wsp_phb *phb; \ - unsigned long flags; \ - ret rval; \ - bus = iowa_pio_find_bus(aa); \ - WARN_ON(!bus); \ - phb = bus->private; \ - spin_lock_irqsave(&phb->lock, flags); \ - wsp_pci_wait_io_idle(phb, aa); \ - rval = __do_##name al; \ - spin_unlock_irqrestore(&phb->lock, flags); \ - return rval; \ -} - -#define DEF_PCI_AC_NORET_pio(name, at, al, aa) \ -static void wsp_pci_##name at \ -{ \ - struct iowa_bus *bus; \ - struct wsp_phb *phb; \ - unsigned long flags; \ - bus = iowa_pio_find_bus(aa); \ - WARN_ON(!bus); \ - phb = bus->private; \ - spin_lock_irqsave(&phb->lock, flags); \ - wsp_pci_wait_io_idle(phb, aa); \ - __do_##name al; \ - spin_unlock_irqrestore(&phb->lock, flags); \ -} - -#define DEF_PCI_AC_RET_mem(name, ret, at, al, aa) -#define DEF_PCI_AC_NORET_mem(name, at, al, aa) - -#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) \ - DEF_PCI_AC_RET_##space(name, ret, at, al, aa) - -#define DEF_PCI_AC_NORET(name, at, al, space, aa) \ - DEF_PCI_AC_NORET_##space(name, at, al, aa) \ - - -#include <asm/io-defs.h> - -#undef DEF_PCI_AC_RET -#undef DEF_PCI_AC_NORET - -static struct ppc_pci_io wsp_pci_iops = { - .inb = wsp_pci_inb, - .inw = wsp_pci_inw, - .inl = wsp_pci_inl, - .outb = wsp_pci_outb, - .outw = wsp_pci_outw, - .outl = wsp_pci_outl, - .insb = wsp_pci_insb, - .insw = wsp_pci_insw, - .insl = wsp_pci_insl, - .outsb = wsp_pci_outsb, - .outsw = wsp_pci_outsw, - .outsl = wsp_pci_outsl, -}; - -static int __init wsp_setup_one_phb(struct device_node *np) -{ - struct pci_controller *hose; - struct wsp_phb *phb; - - pr_info("PCI: Setting up PCIe host bridge 0x%s\n", np->full_name); - - phb = zalloc_maybe_bootmem(sizeof(struct wsp_phb), GFP_KERNEL); - if (!phb) - return -ENOMEM; - hose = pcibios_alloc_controller(np); - if (!hose) { - /* Can't really free the phb */ - return -ENOMEM; - } - hose->private_data = phb; - phb->hose = hose; - - INIT_LIST_HEAD(&phb->dma_tables); - spin_lock_init(&phb->lock); - - /* XXX Use bus-range property ? */ - hose->first_busno = 0; - hose->last_busno = 0xff; - - /* We use cfg_data as the address for the whole bridge MMIO space - */ - hose->cfg_data = of_iomap(hose->dn, 0); - - pr_debug("PCIe registers mapped at 0x%p\n", hose->cfg_data); - - /* Get the ranges of the device-tree */ - pci_process_bridge_OF_ranges(hose, np, 0); - - /* XXX Force re-assigning of everything for now */ - pci_add_flags(PCI_REASSIGN_ALL_BUS | PCI_REASSIGN_ALL_RSRC | - PCI_ENABLE_PROC_DOMAINS); - - /* Calculate how the TCE space is divided */ - phb->dma32_base = 0; - phb->dma32_num_regions = NUM_DMA32_REGIONS; - if (phb->dma32_num_regions > MAX_TABLE_TVT_COUNT) { - pr_warning("IOMMU: Clamped to %d DMA32 regions\n", - MAX_TABLE_TVT_COUNT); - phb->dma32_num_regions = MAX_TABLE_TVT_COUNT; - } - phb->dma32_region_size = 0x80000000 / phb->dma32_num_regions; - - BUG_ON(!is_power_of_2(phb->dma32_region_size)); - - /* Setup config ops */ - hose->ops = &wsp_pcie_pci_ops; - - /* Configure the HW */ - wsp_pcie_configure_hw(hose); - - /* Instanciate IO workarounds */ - iowa_register_bus(hose, &wsp_pci_iops, NULL, phb); -#ifdef CONFIG_PCI_MSI - wsp_setup_phb_msi(hose); -#endif - - /* Add to global list */ - list_add(&phb->all, &wsp_phbs); - - return 0; -} - -void __init wsp_setup_pci(void) -{ - struct device_node *np; - int rc; - - /* Find host bridges */ - for_each_compatible_node(np, "pciex", PCIE_COMPATIBLE) { - rc = wsp_setup_one_phb(np); - if (rc) - pr_err("Failed to setup PCIe bridge %s, rc=%d\n", - np->full_name, rc); - } - - /* Establish device-tree linkage */ - pci_devs_phb_init(); - - /* Set DMA ops to use TCEs */ - if (iommu_is_off) { - pr_info("PCI-E: Disabled TCEs, using direct DMA\n"); - set_pci_dma_ops(&dma_direct_ops); - } else { - ppc_md.pci_dma_dev_setup = wsp_pci_dma_dev_setup; - ppc_md.tce_build = tce_build_wsp; - ppc_md.tce_free = tce_free_wsp; - set_pci_dma_ops(&dma_iommu_ops); - } -} - -#define err_debug(fmt...) pr_debug(fmt) -//#define err_debug(fmt...) - -static int __init wsp_pci_get_err_irq_no_dt(struct device_node *np) -{ - const u32 *prop; - int hw_irq; - - /* Ok, no interrupts property, let's try to find our child P2P */ - np = of_get_next_child(np, NULL); - if (np == NULL) - return 0; - - /* Grab it's interrupt map */ - prop = of_get_property(np, "interrupt-map", NULL); - if (prop == NULL) - return 0; - - /* Grab one of the interrupts in there, keep the low 4 bits */ - hw_irq = prop[5] & 0xf; - - /* 0..4 for PHB 0 and 5..9 for PHB 1 */ - if (hw_irq < 5) - hw_irq = 4; - else - hw_irq = 9; - hw_irq |= prop[5] & ~0xf; - - err_debug("PCI: Using 0x%x as error IRQ for %s\n", - hw_irq, np->parent->full_name); - return irq_create_mapping(NULL, hw_irq); -} - -static const struct { - u32 offset; - const char *name; -} wsp_pci_regs[] = { -#define DREG(x) { PCIE_REG_##x, #x } -#define DUTL(x) { PCIE_UTL_##x, "UTL_" #x } - /* Architected registers except CONFIG_ and IODA - * to avoid side effects - */ - DREG(DMA_CHAN_STATUS), - DREG(CPU_LOADSTORE_STATUS), - DREG(LOCK0), - DREG(LOCK1), - DREG(PHB_CONFIG), - DREG(IO_BASE_ADDR), - DREG(IO_BASE_MASK), - DREG(IO_START_ADDR), - DREG(M32A_BASE_ADDR), - DREG(M32A_BASE_MASK), - DREG(M32A_START_ADDR), - DREG(M32B_BASE_ADDR), - DREG(M32B_BASE_MASK), - DREG(M32B_START_ADDR), - DREG(M64_BASE_ADDR), - DREG(M64_BASE_MASK), - DREG(M64_START_ADDR), - DREG(TCE_KILL), - DREG(LOCK2), - DREG(PHB_GEN_CAP), - DREG(PHB_TCE_CAP), - DREG(PHB_IRQ_CAP), - DREG(PHB_EEH_CAP), - DREG(PAPR_ERR_INJ_CONTROL), - DREG(PAPR_ERR_INJ_ADDR), - DREG(PAPR_ERR_INJ_MASK), - - /* UTL core regs */ - DUTL(SYS_BUS_CONTROL), - DUTL(STATUS), - DUTL(SYS_BUS_AGENT_STATUS), - DUTL(SYS_BUS_AGENT_ERR_SEV), - DUTL(SYS_BUS_AGENT_IRQ_EN), - DUTL(SYS_BUS_BURST_SZ_CONF), - DUTL(REVISION_ID), - DUTL(OUT_POST_HDR_BUF_ALLOC), - DUTL(OUT_POST_DAT_BUF_ALLOC), - DUTL(IN_POST_HDR_BUF_ALLOC), - DUTL(IN_POST_DAT_BUF_ALLOC), - DUTL(OUT_NP_BUF_ALLOC), - DUTL(IN_NP_BUF_ALLOC), - DUTL(PCIE_TAGS_ALLOC), - DUTL(GBIF_READ_TAGS_ALLOC), - - DUTL(PCIE_PORT_CONTROL), - DUTL(PCIE_PORT_STATUS), - DUTL(PCIE_PORT_ERROR_SEV), - DUTL(PCIE_PORT_IRQ_EN), - DUTL(RC_STATUS), - DUTL(RC_ERR_SEVERITY), - DUTL(RC_IRQ_EN), - DUTL(EP_STATUS), - DUTL(EP_ERR_SEVERITY), - DUTL(EP_ERR_IRQ_EN), - DUTL(PCI_PM_CTRL1), - DUTL(PCI_PM_CTRL2), - - /* PCIe stack regs */ - DREG(SYSTEM_CONFIG1), - DREG(SYSTEM_CONFIG2), - DREG(EP_SYSTEM_CONFIG), - DREG(EP_FLR), - DREG(EP_BAR_CONFIG), - DREG(LINK_CONFIG), - DREG(PM_CONFIG), - DREG(DLP_CONTROL), - DREG(DLP_STATUS), - DREG(ERR_REPORT_CONTROL), - DREG(SLOT_CONTROL1), - DREG(SLOT_CONTROL2), - DREG(UTL_CONFIG), - DREG(BUFFERS_CONFIG), - DREG(ERROR_INJECT), - DREG(SRIOV_CONFIG), - DREG(PF0_SRIOV_STATUS), - DREG(PF1_SRIOV_STATUS), - DREG(PORT_NUMBER), - DREG(POR_SYSTEM_CONFIG), - - /* Internal logic regs */ - DREG(PHB_VERSION), - DREG(RESET), - DREG(PHB_CONTROL), - DREG(PHB_TIMEOUT_CONTROL1), - DREG(PHB_QUIESCE_DMA), - DREG(PHB_DMA_READ_TAG_ACTV), - DREG(PHB_TCE_READ_TAG_ACTV), - - /* FIR registers */ - DREG(LEM_FIR_ACCUM), - DREG(LEM_FIR_AND_MASK), - DREG(LEM_FIR_OR_MASK), - DREG(LEM_ACTION0), - DREG(LEM_ACTION1), - DREG(LEM_ERROR_MASK), - DREG(LEM_ERROR_AND_MASK), - DREG(LEM_ERROR_OR_MASK), - - /* Error traps registers */ - DREG(PHB_ERR_STATUS), - DREG(PHB_ERR_STATUS), - DREG(PHB_ERR1_STATUS), - DREG(PHB_ERR_INJECT), - DREG(PHB_ERR_LEM_ENABLE), - DREG(PHB_ERR_IRQ_ENABLE), - DREG(PHB_ERR_FREEZE_ENABLE), - DREG(PHB_ERR_SIDE_ENABLE), - DREG(PHB_ERR_LOG_0), - DREG(PHB_ERR_LOG_1), - DREG(PHB_ERR_STATUS_MASK), - DREG(PHB_ERR1_STATUS_MASK), - DREG(MMIO_ERR_STATUS), - DREG(MMIO_ERR1_STATUS), - DREG(MMIO_ERR_INJECT), - DREG(MMIO_ERR_LEM_ENABLE), - DREG(MMIO_ERR_IRQ_ENABLE), - DREG(MMIO_ERR_FREEZE_ENABLE), - DREG(MMIO_ERR_SIDE_ENABLE), - DREG(MMIO_ERR_LOG_0), - DREG(MMIO_ERR_LOG_1), - DREG(MMIO_ERR_STATUS_MASK), - DREG(MMIO_ERR1_STATUS_MASK), - DREG(DMA_ERR_STATUS), - DREG(DMA_ERR1_STATUS), - DREG(DMA_ERR_INJECT), - DREG(DMA_ERR_LEM_ENABLE), - DREG(DMA_ERR_IRQ_ENABLE), - DREG(DMA_ERR_FREEZE_ENABLE), - DREG(DMA_ERR_SIDE_ENABLE), - DREG(DMA_ERR_LOG_0), - DREG(DMA_ERR_LOG_1), - DREG(DMA_ERR_STATUS_MASK), - DREG(DMA_ERR1_STATUS_MASK), - - /* Debug and Trace registers */ - DREG(PHB_DEBUG_CONTROL0), - DREG(PHB_DEBUG_STATUS0), - DREG(PHB_DEBUG_CONTROL1), - DREG(PHB_DEBUG_STATUS1), - DREG(PHB_DEBUG_CONTROL2), - DREG(PHB_DEBUG_STATUS2), - DREG(PHB_DEBUG_CONTROL3), - DREG(PHB_DEBUG_STATUS3), - DREG(PHB_DEBUG_CONTROL4), - DREG(PHB_DEBUG_STATUS4), - DREG(PHB_DEBUG_CONTROL5), - DREG(PHB_DEBUG_STATUS5), - - /* Don't seem to exist ... - DREG(PHB_DEBUG_CONTROL6), - DREG(PHB_DEBUG_STATUS6), - */ -}; - -static int wsp_pci_regs_show(struct seq_file *m, void *private) -{ - struct wsp_phb *phb = m->private; - struct pci_controller *hose = phb->hose; - int i; - - for (i = 0; i < ARRAY_SIZE(wsp_pci_regs); i++) { - /* Skip write-only regs */ - if (wsp_pci_regs[i].offset == 0xc08 || - wsp_pci_regs[i].offset == 0xc10 || - wsp_pci_regs[i].offset == 0xc38 || - wsp_pci_regs[i].offset == 0xc40) - continue; - seq_printf(m, "0x%03x: 0x%016llx %s\n", - wsp_pci_regs[i].offset, - in_be64(hose->cfg_data + wsp_pci_regs[i].offset), - wsp_pci_regs[i].name); - } - return 0; -} - -static int wsp_pci_regs_open(struct inode *inode, struct file *file) -{ - return single_open(file, wsp_pci_regs_show, inode->i_private); -} - -static const struct file_operations wsp_pci_regs_fops = { - .open = wsp_pci_regs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int wsp_pci_reg_set(void *data, u64 val) -{ - out_be64((void __iomem *)data, val); - return 0; -} - -static int wsp_pci_reg_get(void *data, u64 *val) -{ - *val = in_be64((void __iomem *)data); - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(wsp_pci_reg_fops, wsp_pci_reg_get, wsp_pci_reg_set, "0x%llx\n"); - -static irqreturn_t wsp_pci_err_irq(int irq, void *dev_id) -{ - struct wsp_phb *phb = dev_id; - struct pci_controller *hose = phb->hose; - irqreturn_t handled = IRQ_NONE; - struct wsp_pcie_err_log_data ed; - - pr_err("PCI: Error interrupt on %s (PHB %d)\n", - hose->dn->full_name, hose->global_number); - again: - memset(&ed, 0, sizeof(ed)); - - /* Read and clear UTL errors */ - ed.utl_sys_err = in_be64(hose->cfg_data + PCIE_UTL_SYS_BUS_AGENT_STATUS); - if (ed.utl_sys_err) - out_be64(hose->cfg_data + PCIE_UTL_SYS_BUS_AGENT_STATUS, ed.utl_sys_err); - ed.utl_port_err = in_be64(hose->cfg_data + PCIE_UTL_PCIE_PORT_STATUS); - if (ed.utl_port_err) - out_be64(hose->cfg_data + PCIE_UTL_PCIE_PORT_STATUS, ed.utl_port_err); - ed.utl_rc_err = in_be64(hose->cfg_data + PCIE_UTL_RC_STATUS); - if (ed.utl_rc_err) - out_be64(hose->cfg_data + PCIE_UTL_RC_STATUS, ed.utl_rc_err); - - /* Read and clear main trap errors */ - ed.phb_err = in_be64(hose->cfg_data + PCIE_REG_PHB_ERR_STATUS); - if (ed.phb_err) { - ed.phb_err1 = in_be64(hose->cfg_data + PCIE_REG_PHB_ERR1_STATUS); - ed.phb_log0 = in_be64(hose->cfg_data + PCIE_REG_PHB_ERR_LOG_0); - ed.phb_log1 = in_be64(hose->cfg_data + PCIE_REG_PHB_ERR_LOG_1); - out_be64(hose->cfg_data + PCIE_REG_PHB_ERR1_STATUS, 0); - out_be64(hose->cfg_data + PCIE_REG_PHB_ERR_STATUS, 0); - } - ed.mmio_err = in_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_STATUS); - if (ed.mmio_err) { - ed.mmio_err1 = in_be64(hose->cfg_data + PCIE_REG_MMIO_ERR1_STATUS); - ed.mmio_log0 = in_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_LOG_0); - ed.mmio_log1 = in_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_LOG_1); - out_be64(hose->cfg_data + PCIE_REG_MMIO_ERR1_STATUS, 0); - out_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_STATUS, 0); - } - ed.dma_err = in_be64(hose->cfg_data + PCIE_REG_DMA_ERR_STATUS); - if (ed.dma_err) { - ed.dma_err1 = in_be64(hose->cfg_data + PCIE_REG_DMA_ERR1_STATUS); - ed.dma_log0 = in_be64(hose->cfg_data + PCIE_REG_DMA_ERR_LOG_0); - ed.dma_log1 = in_be64(hose->cfg_data + PCIE_REG_DMA_ERR_LOG_1); - out_be64(hose->cfg_data + PCIE_REG_DMA_ERR1_STATUS, 0); - out_be64(hose->cfg_data + PCIE_REG_DMA_ERR_STATUS, 0); - } - - /* Now print things out */ - if (ed.phb_err) { - pr_err(" PHB Error Status : 0x%016llx\n", ed.phb_err); - pr_err(" PHB First Error Status: 0x%016llx\n", ed.phb_err1); - pr_err(" PHB Error Log 0 : 0x%016llx\n", ed.phb_log0); - pr_err(" PHB Error Log 1 : 0x%016llx\n", ed.phb_log1); - } - if (ed.mmio_err) { - pr_err(" MMIO Error Status : 0x%016llx\n", ed.mmio_err); - pr_err(" MMIO First Error Status: 0x%016llx\n", ed.mmio_err1); - pr_err(" MMIO Error Log 0 : 0x%016llx\n", ed.mmio_log0); - pr_err(" MMIO Error Log 1 : 0x%016llx\n", ed.mmio_log1); - } - if (ed.dma_err) { - pr_err(" DMA Error Status : 0x%016llx\n", ed.dma_err); - pr_err(" DMA First Error Status: 0x%016llx\n", ed.dma_err1); - pr_err(" DMA Error Log 0 : 0x%016llx\n", ed.dma_log0); - pr_err(" DMA Error Log 1 : 0x%016llx\n", ed.dma_log1); - } - if (ed.utl_sys_err) - pr_err(" UTL Sys Error Status : 0x%016llx\n", ed.utl_sys_err); - if (ed.utl_port_err) - pr_err(" UTL Port Error Status : 0x%016llx\n", ed.utl_port_err); - if (ed.utl_rc_err) - pr_err(" UTL RC Error Status : 0x%016llx\n", ed.utl_rc_err); - - /* Interrupts are caused by the error traps. If we had any error there - * we loop again in case the UTL buffered some new stuff between - * going there and going to the traps - */ - if (ed.dma_err || ed.mmio_err || ed.phb_err) { - handled = IRQ_HANDLED; - goto again; - } - return handled; -} - -static void __init wsp_setup_pci_err_reporting(struct wsp_phb *phb) -{ - struct pci_controller *hose = phb->hose; - int err_irq, i, rc; - char fname[16]; - - /* Create a debugfs file for that PHB */ - sprintf(fname, "phb%d", phb->hose->global_number); - phb->ddir = debugfs_create_dir(fname, powerpc_debugfs_root); - - /* Some useful debug output */ - if (phb->ddir) { - struct dentry *d = debugfs_create_dir("regs", phb->ddir); - char tmp[64]; - - for (i = 0; i < ARRAY_SIZE(wsp_pci_regs); i++) { - sprintf(tmp, "%03x_%s", wsp_pci_regs[i].offset, - wsp_pci_regs[i].name); - debugfs_create_file(tmp, 0600, d, - hose->cfg_data + wsp_pci_regs[i].offset, - &wsp_pci_reg_fops); - } - debugfs_create_file("all_regs", 0600, phb->ddir, phb, &wsp_pci_regs_fops); - } - - /* Find the IRQ number for that PHB */ - err_irq = irq_of_parse_and_map(hose->dn, 0); - if (err_irq == 0) - /* XXX Error IRQ lacking from device-tree */ - err_irq = wsp_pci_get_err_irq_no_dt(hose->dn); - if (err_irq == 0) { - pr_err("PCI: Failed to fetch error interrupt for %s\n", - hose->dn->full_name); - return; - } - /* Request it */ - rc = request_irq(err_irq, wsp_pci_err_irq, 0, "wsp_pci error", phb); - if (rc) { - pr_err("PCI: Failed to request interrupt for %s\n", - hose->dn->full_name); - } - /* Enable interrupts for all errors for now */ - out_be64(hose->cfg_data + PCIE_REG_PHB_ERR_IRQ_ENABLE, 0xffffffffffffffffull); - out_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_IRQ_ENABLE, 0xffffffffffffffffull); - out_be64(hose->cfg_data + PCIE_REG_DMA_ERR_IRQ_ENABLE, 0xffffffffffffffffull); -} - -/* - * This is called later to hookup with the error interrupt - */ -static int __init wsp_setup_pci_late(void) -{ - struct wsp_phb *phb; - - list_for_each_entry(phb, &wsp_phbs, all) - wsp_setup_pci_err_reporting(phb); - - return 0; -} -arch_initcall(wsp_setup_pci_late); diff --git a/arch/powerpc/platforms/wsp/wsp_pci.h b/arch/powerpc/platforms/wsp/wsp_pci.h deleted file mode 100644 index 52e9bd95250..00000000000 --- a/arch/powerpc/platforms/wsp/wsp_pci.h +++ /dev/null @@ -1,268 +0,0 @@ -/* - * Copyright 2010 Ben Herrenschmidt, IBM Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef __WSP_PCI_H -#define __WSP_PCI_H - -/* Architected registers */ -#define PCIE_REG_DMA_CHAN_STATUS 0x110 -#define PCIE_REG_CPU_LOADSTORE_STATUS 0x120 - -#define PCIE_REG_CONFIG_DATA 0x130 -#define PCIE_REG_LOCK0 0x138 -#define PCIE_REG_CONFIG_ADDRESS 0x140 -#define PCIE_REG_CA_ENABLE 0x8000000000000000ull -#define PCIE_REG_CA_BUS_MASK 0x0ff0000000000000ull -#define PCIE_REG_CA_BUS_SHIFT (20+32) -#define PCIE_REG_CA_DEV_MASK 0x000f800000000000ull -#define PCIE_REG_CA_DEV_SHIFT (15+32) -#define PCIE_REG_CA_FUNC_MASK 0x0000700000000000ull -#define PCIE_REG_CA_FUNC_SHIFT (12+32) -#define PCIE_REG_CA_REG_MASK 0x00000fff00000000ull -#define PCIE_REG_CA_REG_SHIFT ( 0+32) -#define PCIE_REG_CA_BE_MASK 0x00000000f0000000ull -#define PCIE_REG_CA_BE_SHIFT ( 28) -#define PCIE_REG_LOCK1 0x148 - -#define PCIE_REG_PHB_CONFIG 0x160 -#define PCIE_REG_PHBC_64B_TCE_EN 0x2000000000000000ull -#define PCIE_REG_PHBC_MMIO_DMA_FREEZE_EN 0x1000000000000000ull -#define PCIE_REG_PHBC_32BIT_MSI_EN 0x0080000000000000ull -#define PCIE_REG_PHBC_M64_EN 0x0040000000000000ull -#define PCIE_REG_PHBC_IO_EN 0x0008000000000000ull -#define PCIE_REG_PHBC_64BIT_MSI_EN 0x0002000000000000ull -#define PCIE_REG_PHBC_M32A_EN 0x0000800000000000ull -#define PCIE_REG_PHBC_M32B_EN 0x0000400000000000ull -#define PCIE_REG_PHBC_MSI_PE_VALIDATE 0x0000200000000000ull -#define PCIE_REG_PHBC_DMA_XLATE_BYPASS 0x0000100000000000ull - -#define PCIE_REG_IO_BASE_ADDR 0x170 -#define PCIE_REG_IO_BASE_MASK 0x178 -#define PCIE_REG_IO_START_ADDR 0x180 - -#define PCIE_REG_M32A_BASE_ADDR 0x190 -#define PCIE_REG_M32A_BASE_MASK 0x198 -#define PCIE_REG_M32A_START_ADDR 0x1a0 - -#define PCIE_REG_M32B_BASE_ADDR 0x1b0 -#define PCIE_REG_M32B_BASE_MASK 0x1b8 -#define PCIE_REG_M32B_START_ADDR 0x1c0 - -#define PCIE_REG_M64_BASE_ADDR 0x1e0 -#define PCIE_REG_M64_BASE_MASK 0x1e8 -#define PCIE_REG_M64_START_ADDR 0x1f0 - -#define PCIE_REG_TCE_KILL 0x210 -#define PCIE_REG_TCEKILL_SINGLE 0x8000000000000000ull -#define PCIE_REG_TCEKILL_ADDR_MASK 0x000003fffffffff8ull -#define PCIE_REG_TCEKILL_PS_4K 0 -#define PCIE_REG_TCEKILL_PS_64K 1 -#define PCIE_REG_TCEKILL_PS_16M 2 -#define PCIE_REG_TCEKILL_PS_16G 3 - -#define PCIE_REG_IODA_ADDR 0x220 -#define PCIE_REG_IODA_AD_AUTOINC 0x8000000000000000ull -#define PCIE_REG_IODA_AD_TBL_MVT 0x0005000000000000ull -#define PCIE_REG_IODA_AD_TBL_PELT 0x0006000000000000ull -#define PCIE_REG_IODA_AD_TBL_PESTA 0x0007000000000000ull -#define PCIE_REG_IODA_AD_TBL_PESTB 0x0008000000000000ull -#define PCIE_REG_IODA_AD_TBL_TVT 0x0009000000000000ull -#define PCIE_REG_IODA_AD_TBL_TCE 0x000a000000000000ull -#define PCIE_REG_IODA_DATA0 0x228 -#define PCIE_REG_IODA_DATA1 0x230 - -#define PCIE_REG_LOCK2 0x240 - -#define PCIE_REG_PHB_GEN_CAP 0x250 -#define PCIE_REG_PHB_TCE_CAP 0x258 -#define PCIE_REG_PHB_IRQ_CAP 0x260 -#define PCIE_REG_PHB_EEH_CAP 0x268 - -#define PCIE_REG_PAPR_ERR_INJ_CONTROL 0x2b0 -#define PCIE_REG_PAPR_ERR_INJ_ADDR 0x2b8 -#define PCIE_REG_PAPR_ERR_INJ_MASK 0x2c0 - - -#define PCIE_REG_SYS_CFG1 0x600 -#define PCIE_REG_SYS_CFG1_CLASS_CODE 0x0000000000ffffffull - -#define IODA_TVT0_TTA_MASK 0x000fffffffff0000ull -#define IODA_TVT0_TTA_SHIFT 4 -#define IODA_TVT0_BUSNUM_VALID_MASK 0x000000000000e000ull -#define IODA_TVT0_TCE_TABLE_SIZE_MASK 0x0000000000001f00ull -#define IODA_TVT0_TCE_TABLE_SIZE_SHIFT 8 -#define IODA_TVT0_BUSNUM_VALUE_MASK 0x00000000000000ffull -#define IODA_TVT0_BUSNUM_VALID_SHIFT 0 -#define IODA_TVT1_DEVNUM_VALID 0x2000000000000000ull -#define IODA_TVT1_DEVNUM_VALUE_MASK 0x1f00000000000000ull -#define IODA_TVT1_DEVNUM_VALUE_SHIFT 56 -#define IODA_TVT1_FUNCNUM_VALID 0x0008000000000000ull -#define IODA_TVT1_FUNCNUM_VALUE_MASK 0x0007000000000000ull -#define IODA_TVT1_FUNCNUM_VALUE_SHIFT 48 -#define IODA_TVT1_IO_PAGE_SIZE_MASK 0x00001f0000000000ull -#define IODA_TVT1_IO_PAGE_SIZE_SHIFT 40 -#define IODA_TVT1_PE_NUMBER_MASK 0x000000000000003full -#define IODA_TVT1_PE_NUMBER_SHIFT 0 - -#define IODA_TVT_COUNT 64 - -/* UTL Core registers */ -#define PCIE_UTL_SYS_BUS_CONTROL 0x400 -#define PCIE_UTL_STATUS 0x408 -#define PCIE_UTL_SYS_BUS_AGENT_STATUS 0x410 -#define PCIE_UTL_SYS_BUS_AGENT_ERR_SEV 0x418 -#define PCIE_UTL_SYS_BUS_AGENT_IRQ_EN 0x420 -#define PCIE_UTL_SYS_BUS_BURST_SZ_CONF 0x440 -#define PCIE_UTL_REVISION_ID 0x448 - -#define PCIE_UTL_OUT_POST_HDR_BUF_ALLOC 0x4c0 -#define PCIE_UTL_OUT_POST_DAT_BUF_ALLOC 0x4d0 -#define PCIE_UTL_IN_POST_HDR_BUF_ALLOC 0x4e0 -#define PCIE_UTL_IN_POST_DAT_BUF_ALLOC 0x4f0 -#define PCIE_UTL_OUT_NP_BUF_ALLOC 0x500 -#define PCIE_UTL_IN_NP_BUF_ALLOC 0x510 -#define PCIE_UTL_PCIE_TAGS_ALLOC 0x520 -#define PCIE_UTL_GBIF_READ_TAGS_ALLOC 0x530 - -#define PCIE_UTL_PCIE_PORT_CONTROL 0x540 -#define PCIE_UTL_PCIE_PORT_STATUS 0x548 -#define PCIE_UTL_PCIE_PORT_ERROR_SEV 0x550 -#define PCIE_UTL_PCIE_PORT_IRQ_EN 0x558 -#define PCIE_UTL_RC_STATUS 0x560 -#define PCIE_UTL_RC_ERR_SEVERITY 0x568 -#define PCIE_UTL_RC_IRQ_EN 0x570 -#define PCIE_UTL_EP_STATUS 0x578 -#define PCIE_UTL_EP_ERR_SEVERITY 0x580 -#define PCIE_UTL_EP_ERR_IRQ_EN 0x588 - -#define PCIE_UTL_PCI_PM_CTRL1 0x590 -#define PCIE_UTL_PCI_PM_CTRL2 0x598 - -/* PCIe stack registers */ -#define PCIE_REG_SYSTEM_CONFIG1 0x600 -#define PCIE_REG_SYSTEM_CONFIG2 0x608 -#define PCIE_REG_EP_SYSTEM_CONFIG 0x618 -#define PCIE_REG_EP_FLR 0x620 -#define PCIE_REG_EP_BAR_CONFIG 0x628 -#define PCIE_REG_LINK_CONFIG 0x630 -#define PCIE_REG_PM_CONFIG 0x640 -#define PCIE_REG_DLP_CONTROL 0x650 -#define PCIE_REG_DLP_STATUS 0x658 -#define PCIE_REG_ERR_REPORT_CONTROL 0x660 -#define PCIE_REG_SLOT_CONTROL1 0x670 -#define PCIE_REG_SLOT_CONTROL2 0x678 -#define PCIE_REG_UTL_CONFIG 0x680 -#define PCIE_REG_BUFFERS_CONFIG 0x690 -#define PCIE_REG_ERROR_INJECT 0x698 -#define PCIE_REG_SRIOV_CONFIG 0x6a0 -#define PCIE_REG_PF0_SRIOV_STATUS 0x6a8 -#define PCIE_REG_PF1_SRIOV_STATUS 0x6b0 -#define PCIE_REG_PORT_NUMBER 0x700 -#define PCIE_REG_POR_SYSTEM_CONFIG 0x708 - -/* PHB internal logic registers */ -#define PCIE_REG_PHB_VERSION 0x800 -#define PCIE_REG_RESET 0x808 -#define PCIE_REG_PHB_CONTROL 0x810 -#define PCIE_REG_PHB_TIMEOUT_CONTROL1 0x878 -#define PCIE_REG_PHB_QUIESCE_DMA 0x888 -#define PCIE_REG_PHB_DMA_READ_TAG_ACTV 0x900 -#define PCIE_REG_PHB_TCE_READ_TAG_ACTV 0x908 - -/* FIR registers */ -#define PCIE_REG_LEM_FIR_ACCUM 0xc00 -#define PCIE_REG_LEM_FIR_AND_MASK 0xc08 -#define PCIE_REG_LEM_FIR_OR_MASK 0xc10 -#define PCIE_REG_LEM_ACTION0 0xc18 -#define PCIE_REG_LEM_ACTION1 0xc20 -#define PCIE_REG_LEM_ERROR_MASK 0xc30 -#define PCIE_REG_LEM_ERROR_AND_MASK 0xc38 -#define PCIE_REG_LEM_ERROR_OR_MASK 0xc40 - -/* PHB Error registers */ -#define PCIE_REG_PHB_ERR_STATUS 0xc80 -#define PCIE_REG_PHB_ERR1_STATUS 0xc88 -#define PCIE_REG_PHB_ERR_INJECT 0xc90 -#define PCIE_REG_PHB_ERR_LEM_ENABLE 0xc98 -#define PCIE_REG_PHB_ERR_IRQ_ENABLE 0xca0 -#define PCIE_REG_PHB_ERR_FREEZE_ENABLE 0xca8 -#define PCIE_REG_PHB_ERR_SIDE_ENABLE 0xcb8 -#define PCIE_REG_PHB_ERR_LOG_0 0xcc0 -#define PCIE_REG_PHB_ERR_LOG_1 0xcc8 -#define PCIE_REG_PHB_ERR_STATUS_MASK 0xcd0 -#define PCIE_REG_PHB_ERR1_STATUS_MASK 0xcd8 - -#define PCIE_REG_MMIO_ERR_STATUS 0xd00 -#define PCIE_REG_MMIO_ERR1_STATUS 0xd08 -#define PCIE_REG_MMIO_ERR_INJECT 0xd10 -#define PCIE_REG_MMIO_ERR_LEM_ENABLE 0xd18 -#define PCIE_REG_MMIO_ERR_IRQ_ENABLE 0xd20 -#define PCIE_REG_MMIO_ERR_FREEZE_ENABLE 0xd28 -#define PCIE_REG_MMIO_ERR_SIDE_ENABLE 0xd38 -#define PCIE_REG_MMIO_ERR_LOG_0 0xd40 -#define PCIE_REG_MMIO_ERR_LOG_1 0xd48 -#define PCIE_REG_MMIO_ERR_STATUS_MASK 0xd50 -#define PCIE_REG_MMIO_ERR1_STATUS_MASK 0xd58 - -#define PCIE_REG_DMA_ERR_STATUS 0xd80 -#define PCIE_REG_DMA_ERR1_STATUS 0xd88 -#define PCIE_REG_DMA_ERR_INJECT 0xd90 -#define PCIE_REG_DMA_ERR_LEM_ENABLE 0xd98 -#define PCIE_REG_DMA_ERR_IRQ_ENABLE 0xda0 -#define PCIE_REG_DMA_ERR_FREEZE_ENABLE 0xda8 -#define PCIE_REG_DMA_ERR_SIDE_ENABLE 0xdb8 -#define PCIE_REG_DMA_ERR_LOG_0 0xdc0 -#define PCIE_REG_DMA_ERR_LOG_1 0xdc8 -#define PCIE_REG_DMA_ERR_STATUS_MASK 0xdd0 -#define PCIE_REG_DMA_ERR1_STATUS_MASK 0xdd8 - -/* Shortcuts for access to the above using the PHB definitions - * with an offset - */ -#define PCIE_REG_ERR_PHB_OFFSET 0x0 -#define PCIE_REG_ERR_MMIO_OFFSET 0x80 -#define PCIE_REG_ERR_DMA_OFFSET 0x100 - -/* Debug and Trace registers */ -#define PCIE_REG_PHB_DEBUG_CONTROL0 0xe00 -#define PCIE_REG_PHB_DEBUG_STATUS0 0xe08 -#define PCIE_REG_PHB_DEBUG_CONTROL1 0xe10 -#define PCIE_REG_PHB_DEBUG_STATUS1 0xe18 -#define PCIE_REG_PHB_DEBUG_CONTROL2 0xe20 -#define PCIE_REG_PHB_DEBUG_STATUS2 0xe28 -#define PCIE_REG_PHB_DEBUG_CONTROL3 0xe30 -#define PCIE_REG_PHB_DEBUG_STATUS3 0xe38 -#define PCIE_REG_PHB_DEBUG_CONTROL4 0xe40 -#define PCIE_REG_PHB_DEBUG_STATUS4 0xe48 -#define PCIE_REG_PHB_DEBUG_CONTROL5 0xe50 -#define PCIE_REG_PHB_DEBUG_STATUS5 0xe58 -#define PCIE_REG_PHB_DEBUG_CONTROL6 0xe60 -#define PCIE_REG_PHB_DEBUG_STATUS6 0xe68 - -/* Definition for PCIe errors */ -struct wsp_pcie_err_log_data { - __u64 phb_err; - __u64 phb_err1; - __u64 phb_log0; - __u64 phb_log1; - __u64 mmio_err; - __u64 mmio_err1; - __u64 mmio_log0; - __u64 mmio_log1; - __u64 dma_err; - __u64 dma_err1; - __u64 dma_log0; - __u64 dma_log1; - __u64 utl_sys_err; - __u64 utl_port_err; - __u64 utl_rc_err; - __u64 unused; -}; - -#endif /* __WSP_PCI_H */ diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c index 9dee47071af..de8d9483bbe 100644 --- a/arch/powerpc/sysdev/xics/icp-native.c +++ b/arch/powerpc/sysdev/xics/icp-native.c @@ -26,6 +26,7 @@ #include <asm/errno.h> #include <asm/xics.h> #include <asm/kvm_ppc.h> +#include <asm/dbell.h> struct icp_ipl { union { @@ -145,7 +146,13 @@ static unsigned int icp_native_get_irq(void) static void icp_native_cause_ipi(int cpu, unsigned long data) { kvmppc_set_host_ipi(cpu, 1); - icp_native_set_qirr(cpu, IPI_PRIORITY); +#ifdef CONFIG_PPC_DOORBELL + if (cpu_has_feature(CPU_FTR_DBELL) && + (cpumask_test_cpu(cpu, cpu_sibling_mask(smp_processor_id())))) + doorbell_cause_ipi(cpu, data); + else +#endif + icp_native_set_qirr(cpu, IPI_PRIORITY); } void xics_wake_cpu(int cpu) diff --git a/arch/powerpc/xmon/nonstdio.c b/arch/powerpc/xmon/nonstdio.c index bce3dcfe505..c9874861789 100644 --- a/arch/powerpc/xmon/nonstdio.c +++ b/arch/powerpc/xmon/nonstdio.c @@ -122,7 +122,7 @@ void xmon_printf(const char *format, ...) if (n && rc == 0) { /* No udbg hooks, fallback to printk() - dangerous */ - printk(xmon_outbuf); + printk("%s", xmon_outbuf); } } diff --git a/arch/um/Makefile b/arch/um/Makefile index 36e658a4291..e4b1a9639c4 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -111,8 +111,7 @@ endef KBUILD_KCONFIG := $(HOST_DIR)/um/Kconfig archheaders: - $(Q)$(MAKE) -C '$(srctree)' KBUILD_SRC= \ - ARCH=$(HEADER_ARCH) O='$(objtree)' archheaders + $(Q)$(MAKE) KBUILD_SRC= ARCH=$(HEADER_ARCH) archheaders archprepare: include/generated/user_constants.h diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b660088c220..fcefdda5136 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -121,6 +121,7 @@ config X86 select MODULES_USE_ELF_RELA if X86_64 select CLONE_BACKWARDS if X86_32 select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_QUEUE_RWLOCK select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION select OLD_SIGACTION if X86_32 select COMPAT_OLD_SIGACTION if IA32_EMULATION diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 4582e8e1cd1..7730c1c5c83 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -57,6 +57,12 @@ .long (from) - . ; \ .long (to) - . + 0x7ffffff0 ; \ .popsection + +# define _ASM_NOKPROBE(entry) \ + .pushsection "_kprobe_blacklist","aw" ; \ + _ASM_ALIGN ; \ + _ASM_PTR (entry); \ + .popsection #else # define _ASM_EXTABLE(from,to) \ " .pushsection \"__ex_table\",\"a\"\n" \ @@ -71,6 +77,7 @@ " .long (" #from ") - .\n" \ " .long (" #to ") - . + 0x7ffffff0\n" \ " .popsection\n" +/* For C file, we already have NOKPROBE_SYMBOL macro */ #endif #endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 9454c167629..53cdfb2857a 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -116,4 +116,6 @@ struct kprobe_ctlblk { extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); extern int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); +extern int kprobe_int3_handler(struct pt_regs *regs); +extern int kprobe_debug_handler(struct pt_regs *regs); #endif /* _ASM_X86_KPROBES_H */ diff --git a/arch/x86/include/asm/qrwlock.h b/arch/x86/include/asm/qrwlock.h new file mode 100644 index 00000000000..70f46f07f94 --- /dev/null +++ b/arch/x86/include/asm/qrwlock.h @@ -0,0 +1,17 @@ +#ifndef _ASM_X86_QRWLOCK_H +#define _ASM_X86_QRWLOCK_H + +#include <asm-generic/qrwlock_types.h> + +#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE) +#define queue_write_unlock queue_write_unlock +static inline void queue_write_unlock(struct qrwlock *lock) +{ + barrier(); + ACCESS_ONCE(*(u8 *)&lock->cnts) = 0; +} +#endif + +#include <asm-generic/qrwlock.h> + +#endif /* _ASM_X86_QRWLOCK_H */ diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 0f62f5482d9..54f1c8068c0 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -187,6 +187,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) cpu_relax(); } +#ifndef CONFIG_QUEUE_RWLOCK /* * Read-write spinlocks, allowing multiple readers * but only one writer. @@ -269,6 +270,9 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) asm volatile(LOCK_PREFIX WRITE_LOCK_ADD(%1) "%0" : "+m" (rw->write) : "i" (RW_LOCK_BIAS) : "memory"); } +#else +#include <asm/qrwlock.h> +#endif /* CONFIG_QUEUE_RWLOCK */ #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h index 4f1bea19945..73c4c007200 100644 --- a/arch/x86/include/asm/spinlock_types.h +++ b/arch/x86/include/asm/spinlock_types.h @@ -34,6 +34,10 @@ typedef struct arch_spinlock { #define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } +#ifdef CONFIG_QUEUE_RWLOCK +#include <asm-generic/qrwlock_types.h> +#else #include <asm/rwlock.h> +#endif #endif /* _ASM_X86_SPINLOCK_TYPES_H */ diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 8ba18842c48..bc8352e7010 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -68,7 +68,7 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long); dotraplinkage void do_stack_segment(struct pt_regs *, long); #ifdef CONFIG_X86_64 dotraplinkage void do_double_fault(struct pt_regs *, long); -asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *); +asmlinkage struct pt_regs *sync_regs(struct pt_regs *); #endif dotraplinkage void do_general_protection(struct pt_regs *, long); dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); @@ -103,7 +103,6 @@ static inline int get_si_code(unsigned long condition) extern int panic_on_unrecovered_nmi; -void math_error(struct pt_regs *, int, int); void math_emulate(struct math_emu_info *); #ifndef CONFIG_X86_32 asmlinkage void smp_thermal_interrupt(void); diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 93bee7b9385..74f4c2ff642 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -41,18 +41,18 @@ struct arch_uprobe { u8 ixol[MAX_UINSN_BYTES]; }; - u16 fixups; const struct uprobe_xol_ops *ops; union { -#ifdef CONFIG_X86_64 - unsigned long rip_rela_target_address; -#endif struct { s32 offs; u8 ilen; u8 opc1; - } branch; + } branch; + struct { + u8 fixups; + u8 ilen; + } defparam; }; }; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index df94598ad05..703130f469e 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -5,7 +5,6 @@ #include <linux/mutex.h> #include <linux/list.h> #include <linux/stringify.h> -#include <linux/kprobes.h> #include <linux/mm.h> #include <linux/vmalloc.h> #include <linux/memory.h> @@ -551,7 +550,7 @@ void *__init_or_module text_poke_early(void *addr, const void *opcode, * * Note: Must be called under text_mutex. */ -void *__kprobes text_poke(void *addr, const void *opcode, size_t len) +void *text_poke(void *addr, const void *opcode, size_t len) { unsigned long flags; char *vaddr; diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index eab67047dec..c3fcb5de508 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -60,7 +60,7 @@ void arch_trigger_all_cpu_backtrace(void) smp_mb__after_atomic(); } -static int __kprobes +static int arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) { int cpu; @@ -80,6 +80,7 @@ arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) return NMI_DONE; } +NOKPROBE_SYMBOL(arch_trigger_all_cpu_backtrace_handler); static int __init register_trigger_all_cpu_backtrace(void) { diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 9d0a9795a0f..81e08eff05e 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2297,7 +2297,7 @@ int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, int err; if (!config_enabled(CONFIG_SMP)) - return -1; + return -EPERM; if (!cpumask_intersects(mask, cpu_online_mask)) return -EINVAL; @@ -2328,7 +2328,7 @@ int native_ioapic_set_affinity(struct irq_data *data, int ret; if (!config_enabled(CONFIG_SMP)) - return -1; + return -EPERM; raw_spin_lock_irqsave(&ioapic_lock, flags); ret = __ioapic_set_affinity(data, mask, &dest); @@ -3001,9 +3001,11 @@ msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) struct irq_cfg *cfg = data->chip_data; struct msi_msg msg; unsigned int dest; + int ret; - if (__ioapic_set_affinity(data, mask, &dest)) - return -1; + ret = __ioapic_set_affinity(data, mask, &dest); + if (ret) + return ret; __get_cached_msi_msg(data->msi_desc, &msg); @@ -3100,9 +3102,11 @@ dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, struct irq_cfg *cfg = data->chip_data; unsigned int dest, irq = data->irq; struct msi_msg msg; + int ret; - if (__ioapic_set_affinity(data, mask, &dest)) - return -1; + ret = __ioapic_set_affinity(data, mask, &dest); + if (ret) + return ret; dmar_msi_read(irq, &msg); @@ -3149,9 +3153,11 @@ static int hpet_msi_set_affinity(struct irq_data *data, struct irq_cfg *cfg = data->chip_data; struct msi_msg msg; unsigned int dest; + int ret; - if (__ioapic_set_affinity(data, mask, &dest)) - return -1; + ret = __ioapic_set_affinity(data, mask, &dest); + if (ret) + return ret; hpet_msi_read(data->handler_data, &msg); @@ -3218,9 +3224,11 @@ ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { struct irq_cfg *cfg = data->chip_data; unsigned int dest; + int ret; - if (__ioapic_set_affinity(data, mask, &dest)) - return -1; + ret = __ioapic_set_affinity(data, mask, &dest); + if (ret) + return ret; target_ht_irq(data->irq, dest, cfg->vector); return IRQ_SET_MASK_OK_NOCOPY; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2cbbf88d8f2..ef1b93f18ed 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -8,6 +8,7 @@ #include <linux/delay.h> #include <linux/sched.h> #include <linux/init.h> +#include <linux/kprobes.h> #include <linux/kgdb.h> #include <linux/smp.h> #include <linux/io.h> @@ -1193,6 +1194,7 @@ int is_debug_stack(unsigned long addr) (addr <= __get_cpu_var(debug_stack_addr) && addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); } +NOKPROBE_SYMBOL(is_debug_stack); DEFINE_PER_CPU(u32, debug_idt_ctr); @@ -1201,6 +1203,7 @@ void debug_stack_set_zero(void) this_cpu_inc(debug_idt_ctr); load_current_idt(); } +NOKPROBE_SYMBOL(debug_stack_set_zero); void debug_stack_reset(void) { @@ -1209,6 +1212,7 @@ void debug_stack_reset(void) if (this_cpu_dec_return(debug_idt_ctr) == 0) load_current_idt(); } +NOKPROBE_SYMBOL(debug_stack_reset); #else /* CONFIG_X86_64 */ diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 76f98fe5b35..a450373e8e9 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -132,15 +132,6 @@ static void __init ms_hyperv_init_platform(void) lapic_timer_frequency = hv_lapic_frequency; printk(KERN_INFO "HyperV: LAPIC Timer Frequency: %#x\n", lapic_timer_frequency); - - /* - * On Hyper-V, when we are booting off an EFI firmware stack, - * we do not have many legacy devices including PIC, PIT etc. - */ - if (efi_enabled(EFI_BOOT)) { - printk(KERN_INFO "HyperV: Using null_legacy_pic\n"); - legacy_pic = &null_legacy_pic; - } } #endif diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 89f3b7c1af2..2bdfbff8a4f 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -303,15 +303,6 @@ int x86_setup_perfctr(struct perf_event *event) hwc->sample_period = x86_pmu.max_period; hwc->last_period = hwc->sample_period; local64_set(&hwc->period_left, hwc->sample_period); - } else { - /* - * If we have a PMU initialized but no APIC - * interrupts, we cannot sample hardware - * events (user-space has to fall back and - * sample via a hrtimer based software event): - */ - if (!x86_pmu.apic) - return -EOPNOTSUPP; } if (attr->type == PERF_TYPE_RAW) @@ -1293,7 +1284,7 @@ void perf_events_lapic_init(void) apic_write(APIC_LVTPC, APIC_DM_NMI); } -static int __kprobes +static int perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) { u64 start_clock; @@ -1311,6 +1302,7 @@ perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) return ret; } +NOKPROBE_SYMBOL(perf_event_nmi_handler); struct event_constraint emptyconstraint; struct event_constraint unconstrained; @@ -1366,6 +1358,15 @@ static void __init pmu_check_apic(void) x86_pmu.apic = 0; pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); pr_info("no hardware sampling interrupt available.\n"); + + /* + * If we have a PMU initialized but no APIC + * interrupts, we cannot sample hardware + * events (user-space has to fall back and + * sample via a hrtimer based software event): + */ + pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + } static struct attribute_group x86_pmu_format_group = { diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index 4c36bbe3173..cbb1be3ed9e 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c @@ -593,7 +593,7 @@ out: return 1; } -static int __kprobes +static int perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs) { int handled = 0; @@ -606,6 +606,7 @@ perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs) return handled; } +NOKPROBE_SYMBOL(perf_ibs_nmi_handler); static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) { diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index d82d155aca8..9dd2459a4c7 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -384,6 +384,9 @@ static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event) if (br_type & PERF_SAMPLE_BRANCH_NO_TX) mask |= X86_BR_NO_TX; + if (br_type & PERF_SAMPLE_BRANCH_COND) + mask |= X86_BR_JCC; + /* * stash actual user request into reg, it may * be used by fixup code for some CPU @@ -678,6 +681,7 @@ static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { * NHM/WSM erratum: must include IND_JMP to capture IND_CALL */ [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP, + [PERF_SAMPLE_BRANCH_COND] = LBR_JCC, }; static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { @@ -689,6 +693,7 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL | LBR_FAR, [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL, + [PERF_SAMPLE_BRANCH_COND] = LBR_JCC, }; /* core */ diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index d9c12d3022a..b74ebc7c440 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -200,7 +200,7 @@ static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; static int die_owner = -1; static unsigned int die_nest_count; -unsigned __kprobes long oops_begin(void) +unsigned long oops_begin(void) { int cpu; unsigned long flags; @@ -223,8 +223,9 @@ unsigned __kprobes long oops_begin(void) return flags; } EXPORT_SYMBOL_GPL(oops_begin); +NOKPROBE_SYMBOL(oops_begin); -void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) +void oops_end(unsigned long flags, struct pt_regs *regs, int signr) { if (regs && kexec_should_crash(current)) crash_kexec(regs); @@ -247,8 +248,9 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) panic("Fatal exception"); do_exit(signr); } +NOKPROBE_SYMBOL(oops_end); -int __kprobes __die(const char *str, struct pt_regs *regs, long err) +int __die(const char *str, struct pt_regs *regs, long err) { #ifdef CONFIG_X86_32 unsigned short ss; @@ -291,6 +293,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) #endif return 0; } +NOKPROBE_SYMBOL(__die); /* * This is gone through when something in the kernel has done something bad diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 98313ffaae6..f0da82b8e63 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -315,10 +315,6 @@ ENTRY(ret_from_kernel_thread) ENDPROC(ret_from_kernel_thread) /* - * Interrupt exit functions should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" -/* * Return to user mode is not as complex as all this looks, * but we want the default path for a system call return to * go as quickly as possible which is why some of this is @@ -372,10 +368,6 @@ need_resched: END(resume_kernel) #endif CFI_ENDPROC -/* - * End of kprobes section - */ - .popsection /* SYSENTER_RETURN points to after the "sysenter" instruction in the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ @@ -495,10 +487,6 @@ sysexit_audit: PTGS_TO_GS_EX ENDPROC(ia32_sysenter_target) -/* - * syscall stub including irq exit should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" # system call handler stub ENTRY(system_call) RING0_INT_FRAME # can't unwind into user space anyway @@ -690,10 +678,6 @@ syscall_badsys: jmp resume_userspace END(syscall_badsys) CFI_ENDPROC -/* - * End of kprobes section - */ - .popsection .macro FIXUP_ESPFIX_STACK /* @@ -784,10 +768,6 @@ common_interrupt: ENDPROC(common_interrupt) CFI_ENDPROC -/* - * Irq entries should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" #define BUILD_INTERRUPT3(name, nr, fn) \ ENTRY(name) \ RING0_INT_FRAME; \ @@ -964,10 +944,6 @@ ENTRY(spurious_interrupt_bug) jmp error_code CFI_ENDPROC END(spurious_interrupt_bug) -/* - * End of kprobes section - */ - .popsection #ifdef CONFIG_XEN /* Xen doesn't set %esp to be precisely what the normal sysenter @@ -1242,11 +1218,6 @@ return_to_handler: jmp *%ecx #endif -/* - * Some functions should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" - #ifdef CONFIG_TRACING ENTRY(trace_page_fault) RING0_EC_FRAME @@ -1460,7 +1431,3 @@ ENTRY(async_page_fault) END(async_page_fault) #endif -/* - * End of kprobes section - */ - .popsection diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 48a2644a082..b25ca969edd 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -284,8 +284,6 @@ ENDPROC(native_usergs_sysret64) TRACE_IRQS_OFF .endm -/* save complete stack frame */ - .pushsection .kprobes.text, "ax" ENTRY(save_paranoid) XCPT_FRAME 1 RDI+8 cld @@ -314,7 +312,6 @@ ENTRY(save_paranoid) 1: ret CFI_ENDPROC END(save_paranoid) - .popsection /* * A newly forked process directly context switches into this address. @@ -772,10 +769,6 @@ END(interrupt) call \func .endm -/* - * Interrupt entry/exit should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" /* * The interrupt stubs push (~vector+0x80) onto the stack and * then jump to common_interrupt. @@ -983,11 +976,6 @@ END(__do_double_fault) #endif /* - * End of kprobes section - */ - .popsection - -/* * APIC interrupts. */ .macro apicinterrupt3 num sym do_sym @@ -1321,11 +1309,6 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ hyperv_callback_vector hyperv_vector_handler #endif /* CONFIG_HYPERV */ -/* - * Some functions should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" - idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK idtentry stack_segment do_stack_segment has_error_code=1 paranoid=1 @@ -1742,7 +1725,3 @@ ENTRY(ignore_sysret) CFI_ENDPROC END(ignore_sysret) -/* - * End of kprobes section - */ - .popsection diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index a67b47c3131..5f9cf20cdb6 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -32,7 +32,6 @@ #include <linux/irqflags.h> #include <linux/notifier.h> #include <linux/kallsyms.h> -#include <linux/kprobes.h> #include <linux/percpu.h> #include <linux/kdebug.h> #include <linux/kernel.h> @@ -424,7 +423,7 @@ EXPORT_SYMBOL_GPL(hw_breakpoint_restore); * NOTIFY_STOP returned for all other cases * */ -static int __kprobes hw_breakpoint_handler(struct die_args *args) +static int hw_breakpoint_handler(struct die_args *args) { int i, cpu, rc = NOTIFY_STOP; struct perf_event *bp; @@ -511,7 +510,7 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) /* * Handle debug exception notifications. */ -int __kprobes hw_breakpoint_exceptions_notify( +int hw_breakpoint_exceptions_notify( struct notifier_block *unused, unsigned long val, void *data) { if (val != DIE_DEBUG) diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 2e977b5d61d..8af817105e2 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -299,13 +299,31 @@ static void unmask_8259A(void) static void init_8259A(int auto_eoi) { unsigned long flags; + unsigned char probe_val = ~(1 << PIC_CASCADE_IR); + unsigned char new_val; i8259A_auto_eoi = auto_eoi; raw_spin_lock_irqsave(&i8259A_lock, flags); - outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ + /* + * Check to see if we have a PIC. + * Mask all except the cascade and read + * back the value we just wrote. If we don't + * have a PIC, we will read 0xff as opposed to the + * value we wrote. + */ outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ + outb(probe_val, PIC_MASTER_IMR); + new_val = inb(PIC_MASTER_IMR); + if (new_val != probe_val) { + printk(KERN_INFO "Using NULL legacy PIC\n"); + legacy_pic = &null_legacy_pic; + raw_spin_unlock_irqrestore(&i8259A_lock, flags); + return; + } + + outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ /* * outb_pic - this has to work on a wide range of PC hardware. diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 11ccfb0a63e..922d2858102 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -365,6 +365,7 @@ void fixup_irqs(void) struct irq_desc *desc; struct irq_data *data; struct irq_chip *chip; + int ret; for_each_irq_desc(irq, desc) { int break_affinity = 0; @@ -403,10 +404,14 @@ void fixup_irqs(void) if (!irqd_can_move_in_process_context(data) && chip->irq_mask) chip->irq_mask(data); - if (chip->irq_set_affinity) - chip->irq_set_affinity(data, affinity, true); - else if (!(warned++)) - set_affinity = 0; + if (chip->irq_set_affinity) { + ret = chip->irq_set_affinity(data, affinity, true); + if (ret == -ENOSPC) + pr_crit("IRQ %d set affinity failed because there are no available vectors. The device assigned to this IRQ is unstable.\n", irq); + } else { + if (!(warned++)) + set_affinity = 0; + } /* * We unmask if the irq was not marked masked by the diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 61b17dc2c27..7596df66490 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -112,7 +112,8 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = { const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); -static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) +static nokprobe_inline void +__synthesize_relative_insn(void *from, void *to, u8 op) { struct __arch_relative_insn { u8 op; @@ -125,21 +126,23 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) } /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ -void __kprobes synthesize_reljump(void *from, void *to) +void synthesize_reljump(void *from, void *to) { __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE); } +NOKPROBE_SYMBOL(synthesize_reljump); /* Insert a call instruction at address 'from', which calls address 'to'.*/ -void __kprobes synthesize_relcall(void *from, void *to) +void synthesize_relcall(void *from, void *to) { __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE); } +NOKPROBE_SYMBOL(synthesize_relcall); /* * Skip the prefixes of the instruction. */ -static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn) +static kprobe_opcode_t *skip_prefixes(kprobe_opcode_t *insn) { insn_attr_t attr; @@ -154,12 +157,13 @@ static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn) #endif return insn; } +NOKPROBE_SYMBOL(skip_prefixes); /* * Returns non-zero if opcode is boostable. * RIP relative instructions are adjusted at copying time in 64 bits mode */ -int __kprobes can_boost(kprobe_opcode_t *opcodes) +int can_boost(kprobe_opcode_t *opcodes) { kprobe_opcode_t opcode; kprobe_opcode_t *orig_opcodes = opcodes; @@ -260,7 +264,7 @@ unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long add } /* Check if paddr is at an instruction boundary */ -static int __kprobes can_probe(unsigned long paddr) +static int can_probe(unsigned long paddr) { unsigned long addr, __addr, offset = 0; struct insn insn; @@ -299,7 +303,7 @@ static int __kprobes can_probe(unsigned long paddr) /* * Returns non-zero if opcode modifies the interrupt flag. */ -static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) +static int is_IF_modifier(kprobe_opcode_t *insn) { /* Skip prefixes */ insn = skip_prefixes(insn); @@ -322,7 +326,7 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) * If not, return null. * Only applicable to 64-bit x86. */ -int __kprobes __copy_instruction(u8 *dest, u8 *src) +int __copy_instruction(u8 *dest, u8 *src) { struct insn insn; kprobe_opcode_t buf[MAX_INSN_SIZE]; @@ -365,7 +369,7 @@ int __kprobes __copy_instruction(u8 *dest, u8 *src) return insn.length; } -static int __kprobes arch_copy_kprobe(struct kprobe *p) +static int arch_copy_kprobe(struct kprobe *p) { int ret; @@ -392,7 +396,7 @@ static int __kprobes arch_copy_kprobe(struct kprobe *p) return 0; } -int __kprobes arch_prepare_kprobe(struct kprobe *p) +int arch_prepare_kprobe(struct kprobe *p) { if (alternatives_text_reserved(p->addr, p->addr)) return -EINVAL; @@ -407,17 +411,17 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return arch_copy_kprobe(p); } -void __kprobes arch_arm_kprobe(struct kprobe *p) +void arch_arm_kprobe(struct kprobe *p) { text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1); } -void __kprobes arch_disarm_kprobe(struct kprobe *p) +void arch_disarm_kprobe(struct kprobe *p) { text_poke(p->addr, &p->opcode, 1); } -void __kprobes arch_remove_kprobe(struct kprobe *p) +void arch_remove_kprobe(struct kprobe *p) { if (p->ainsn.insn) { free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1)); @@ -425,7 +429,8 @@ void __kprobes arch_remove_kprobe(struct kprobe *p) } } -static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) +static nokprobe_inline void +save_previous_kprobe(struct kprobe_ctlblk *kcb) { kcb->prev_kprobe.kp = kprobe_running(); kcb->prev_kprobe.status = kcb->kprobe_status; @@ -433,7 +438,8 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) kcb->prev_kprobe.saved_flags = kcb->kprobe_saved_flags; } -static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) +static nokprobe_inline void +restore_previous_kprobe(struct kprobe_ctlblk *kcb) { __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); kcb->kprobe_status = kcb->prev_kprobe.status; @@ -441,8 +447,9 @@ static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags; } -static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) +static nokprobe_inline void +set_current_kprobe(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) { __this_cpu_write(current_kprobe, p); kcb->kprobe_saved_flags = kcb->kprobe_old_flags @@ -451,7 +458,7 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF; } -static void __kprobes clear_btf(void) +static nokprobe_inline void clear_btf(void) { if (test_thread_flag(TIF_BLOCKSTEP)) { unsigned long debugctl = get_debugctlmsr(); @@ -461,7 +468,7 @@ static void __kprobes clear_btf(void) } } -static void __kprobes restore_btf(void) +static nokprobe_inline void restore_btf(void) { if (test_thread_flag(TIF_BLOCKSTEP)) { unsigned long debugctl = get_debugctlmsr(); @@ -471,8 +478,7 @@ static void __kprobes restore_btf(void) } } -void __kprobes -arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) +void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { unsigned long *sara = stack_addr(regs); @@ -481,9 +487,10 @@ arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) /* Replace the return addr with trampoline addr */ *sara = (unsigned long) &kretprobe_trampoline; } +NOKPROBE_SYMBOL(arch_prepare_kretprobe); -static void __kprobes -setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, int reenter) +static void setup_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb, int reenter) { if (setup_detour_execution(p, regs, reenter)) return; @@ -519,22 +526,24 @@ setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *k else regs->ip = (unsigned long)p->ainsn.insn; } +NOKPROBE_SYMBOL(setup_singlestep); /* * We have reentered the kprobe_handler(), since another probe was hit while * within the handler. We save the original kprobes variables and just single * step on the instruction of the new probe without calling any user handlers. */ -static int __kprobes -reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) +static int reenter_kprobe(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) { switch (kcb->kprobe_status) { case KPROBE_HIT_SSDONE: case KPROBE_HIT_ACTIVE: + case KPROBE_HIT_SS: kprobes_inc_nmissed_count(p); setup_singlestep(p, regs, kcb, 1); break; - case KPROBE_HIT_SS: + case KPROBE_REENTER: /* A probe has been hit in the codepath leading up to, or just * after, single-stepping of a probed instruction. This entire * codepath should strictly reside in .kprobes.text section. @@ -553,12 +562,13 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb return 1; } +NOKPROBE_SYMBOL(reenter_kprobe); /* * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled throughout this function. */ -static int __kprobes kprobe_handler(struct pt_regs *regs) +int kprobe_int3_handler(struct pt_regs *regs) { kprobe_opcode_t *addr; struct kprobe *p; @@ -621,12 +631,13 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) preempt_enable_no_resched(); return 0; } +NOKPROBE_SYMBOL(kprobe_int3_handler); /* * When a retprobed function returns, this code saves registers and * calls trampoline_handler() runs, which calls the kretprobe's handler. */ -static void __used __kprobes kretprobe_trampoline_holder(void) +static void __used kretprobe_trampoline_holder(void) { asm volatile ( ".global kretprobe_trampoline\n" @@ -657,11 +668,13 @@ static void __used __kprobes kretprobe_trampoline_holder(void) #endif " ret\n"); } +NOKPROBE_SYMBOL(kretprobe_trampoline_holder); +NOKPROBE_SYMBOL(kretprobe_trampoline); /* * Called from kretprobe_trampoline */ -__visible __used __kprobes void *trampoline_handler(struct pt_regs *regs) +__visible __used void *trampoline_handler(struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; @@ -747,6 +760,7 @@ __visible __used __kprobes void *trampoline_handler(struct pt_regs *regs) } return (void *)orig_ret_address; } +NOKPROBE_SYMBOL(trampoline_handler); /* * Called after single-stepping. p->addr is the address of the @@ -775,8 +789,8 @@ __visible __used __kprobes void *trampoline_handler(struct pt_regs *regs) * jump instruction after the copied instruction, that jumps to the next * instruction after the probepoint. */ -static void __kprobes -resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) +static void resume_execution(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) { unsigned long *tos = stack_addr(regs); unsigned long copy_ip = (unsigned long)p->ainsn.insn; @@ -851,12 +865,13 @@ resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *k no_change: restore_btf(); } +NOKPROBE_SYMBOL(resume_execution); /* * Interrupts are disabled on entry as trap1 is an interrupt gate and they * remain disabled throughout this function. */ -static int __kprobes post_kprobe_handler(struct pt_regs *regs) +int kprobe_debug_handler(struct pt_regs *regs) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -891,8 +906,9 @@ out: return 1; } +NOKPROBE_SYMBOL(kprobe_debug_handler); -int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) +int kprobe_fault_handler(struct pt_regs *regs, int trapnr) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -949,12 +965,13 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) return 0; } +NOKPROBE_SYMBOL(kprobe_fault_handler); /* * Wrapper routine for handling exceptions. */ -int __kprobes -kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) +int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, + void *data) { struct die_args *args = data; int ret = NOTIFY_DONE; @@ -962,22 +979,7 @@ kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *d if (args->regs && user_mode_vm(args->regs)) return ret; - switch (val) { - case DIE_INT3: - if (kprobe_handler(args->regs)) - ret = NOTIFY_STOP; - break; - case DIE_DEBUG: - if (post_kprobe_handler(args->regs)) { - /* - * Reset the BS bit in dr6 (pointed by args->err) to - * denote completion of processing - */ - (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; - ret = NOTIFY_STOP; - } - break; - case DIE_GPF: + if (val == DIE_GPF) { /* * To be potentially processing a kprobe fault and to * trust the result from kprobe_running(), we have @@ -986,14 +988,12 @@ kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *d if (!preemptible() && kprobe_running() && kprobe_fault_handler(args->regs, args->trapnr)) ret = NOTIFY_STOP; - break; - default: - break; } return ret; } +NOKPROBE_SYMBOL(kprobe_exceptions_notify); -int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct jprobe *jp = container_of(p, struct jprobe, kp); unsigned long addr; @@ -1017,8 +1017,9 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) regs->ip = (unsigned long)(jp->entry); return 1; } +NOKPROBE_SYMBOL(setjmp_pre_handler); -void __kprobes jprobe_return(void) +void jprobe_return(void) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -1034,8 +1035,10 @@ void __kprobes jprobe_return(void) " nop \n"::"b" (kcb->jprobe_saved_sp):"memory"); } +NOKPROBE_SYMBOL(jprobe_return); +NOKPROBE_SYMBOL(jprobe_return_end); -int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); u8 *addr = (u8 *) (regs->ip - 1); @@ -1063,13 +1066,22 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) } return 0; } +NOKPROBE_SYMBOL(longjmp_break_handler); + +bool arch_within_kprobe_blacklist(unsigned long addr) +{ + return (addr >= (unsigned long)__kprobes_text_start && + addr < (unsigned long)__kprobes_text_end) || + (addr >= (unsigned long)__entry_text_start && + addr < (unsigned long)__entry_text_end); +} int __init arch_init_kprobes(void) { return 0; } -int __kprobes arch_trampoline_kprobe(struct kprobe *p) +int arch_trampoline_kprobe(struct kprobe *p) { return 0; } diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 23ef5c556f0..717b02a22e6 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -25,8 +25,9 @@ #include "common.h" -static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) +static nokprobe_inline +int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) { /* * Emulate singlestep (and also recover regs->ip) @@ -41,18 +42,19 @@ static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, return 1; } -int __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) +int skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) { if (kprobe_ftrace(p)) return __skip_singlestep(p, regs, kcb); else return 0; } +NOKPROBE_SYMBOL(skip_singlestep); /* Ftrace callback handler for kprobes */ -void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct pt_regs *regs) +void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct pt_regs *regs) { struct kprobe *p; struct kprobe_ctlblk *kcb; @@ -84,8 +86,9 @@ void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, end: local_irq_restore(flags); } +NOKPROBE_SYMBOL(kprobe_ftrace_handler); -int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p) +int arch_prepare_kprobe_ftrace(struct kprobe *p) { p->ainsn.insn = NULL; p->ainsn.boostable = -1; diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 898160b42e4..f304773285a 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -77,7 +77,7 @@ found: } /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ -static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) +static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) { #ifdef CONFIG_X86_64 *addr++ = 0x48; @@ -138,7 +138,8 @@ asm ( #define INT3_SIZE sizeof(kprobe_opcode_t) /* Optimized kprobe call back function: called from optinsn */ -static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) +static void +optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); unsigned long flags; @@ -168,8 +169,9 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_ } local_irq_restore(flags); } +NOKPROBE_SYMBOL(optimized_callback); -static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) +static int copy_optimized_instructions(u8 *dest, u8 *src) { int len = 0, ret; @@ -189,7 +191,7 @@ static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) } /* Check whether insn is indirect jump */ -static int __kprobes insn_is_indirect_jump(struct insn *insn) +static int insn_is_indirect_jump(struct insn *insn) { return ((insn->opcode.bytes[0] == 0xff && (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ @@ -224,7 +226,7 @@ static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) } /* Decode whole function to ensure any instructions don't jump into target */ -static int __kprobes can_optimize(unsigned long paddr) +static int can_optimize(unsigned long paddr) { unsigned long addr, size = 0, offset = 0; struct insn insn; @@ -275,7 +277,7 @@ static int __kprobes can_optimize(unsigned long paddr) } /* Check optimized_kprobe can actually be optimized. */ -int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op) +int arch_check_optimized_kprobe(struct optimized_kprobe *op) { int i; struct kprobe *p; @@ -290,15 +292,15 @@ int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op) } /* Check the addr is within the optimized instructions. */ -int __kprobes -arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr) +int arch_within_optimized_kprobe(struct optimized_kprobe *op, + unsigned long addr) { return ((unsigned long)op->kp.addr <= addr && (unsigned long)op->kp.addr + op->optinsn.size > addr); } /* Free optimized instruction slot */ -static __kprobes +static void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) { if (op->optinsn.insn) { @@ -308,7 +310,7 @@ void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) } } -void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) +void arch_remove_optimized_kprobe(struct optimized_kprobe *op) { __arch_remove_optimized_kprobe(op, 1); } @@ -318,7 +320,7 @@ void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) * Target instructions MUST be relocatable (checked inside) * This is called when new aggr(opt)probe is allocated or reused. */ -int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) +int arch_prepare_optimized_kprobe(struct optimized_kprobe *op) { u8 *buf; int ret; @@ -372,7 +374,7 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) * Replace breakpoints (int3) with relative jumps. * Caller must call with locking kprobe_mutex and text_mutex. */ -void __kprobes arch_optimize_kprobes(struct list_head *oplist) +void arch_optimize_kprobes(struct list_head *oplist) { struct optimized_kprobe *op, *tmp; u8 insn_buf[RELATIVEJUMP_SIZE]; @@ -398,7 +400,7 @@ void __kprobes arch_optimize_kprobes(struct list_head *oplist) } /* Replace a relative jump with a breakpoint (int3). */ -void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op) +void arch_unoptimize_kprobe(struct optimized_kprobe *op) { u8 insn_buf[RELATIVEJUMP_SIZE]; @@ -424,8 +426,7 @@ extern void arch_unoptimize_kprobes(struct list_head *oplist, } } -int __kprobes -setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) +int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) { struct optimized_kprobe *op; @@ -441,3 +442,4 @@ setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) } return 0; } +NOKPROBE_SYMBOL(setup_detour_execution); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 7e97371387f..3dd8e2c4d74 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -251,8 +251,9 @@ u32 kvm_read_and_reset_pf_reason(void) return reason; } EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason); +NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason); -dotraplinkage void __kprobes +dotraplinkage void do_async_page_fault(struct pt_regs *regs, unsigned long error_code) { enum ctx_state prev_state; @@ -276,6 +277,7 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code) break; } } +NOKPROBE_SYMBOL(do_async_page_fault); static void __init paravirt_ops_setup(void) { diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index b4872b999a7..c3e985d1751 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -110,7 +110,7 @@ static void nmi_max_handler(struct irq_work *w) a->handler, whole_msecs, decimal_msecs); } -static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) +static int nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) { struct nmi_desc *desc = nmi_to_desc(type); struct nmiaction *a; @@ -146,6 +146,7 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2 /* return total number of NMI events handled */ return handled; } +NOKPROBE_SYMBOL(nmi_handle); int __register_nmi_handler(unsigned int type, struct nmiaction *action) { @@ -208,7 +209,7 @@ void unregister_nmi_handler(unsigned int type, const char *name) } EXPORT_SYMBOL_GPL(unregister_nmi_handler); -static __kprobes void +static void pci_serr_error(unsigned char reason, struct pt_regs *regs) { /* check to see if anyone registered against these types of errors */ @@ -238,8 +239,9 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs) reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR; outb(reason, NMI_REASON_PORT); } +NOKPROBE_SYMBOL(pci_serr_error); -static __kprobes void +static void io_check_error(unsigned char reason, struct pt_regs *regs) { unsigned long i; @@ -269,8 +271,9 @@ io_check_error(unsigned char reason, struct pt_regs *regs) reason &= ~NMI_REASON_CLEAR_IOCHK; outb(reason, NMI_REASON_PORT); } +NOKPROBE_SYMBOL(io_check_error); -static __kprobes void +static void unknown_nmi_error(unsigned char reason, struct pt_regs *regs) { int handled; @@ -298,11 +301,12 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) pr_emerg("Dazed and confused, but trying to continue\n"); } +NOKPROBE_SYMBOL(unknown_nmi_error); static DEFINE_PER_CPU(bool, swallow_nmi); static DEFINE_PER_CPU(unsigned long, last_nmi_rip); -static __kprobes void default_do_nmi(struct pt_regs *regs) +static void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; int handled; @@ -401,6 +405,7 @@ static __kprobes void default_do_nmi(struct pt_regs *regs) else unknown_nmi_error(reason, regs); } +NOKPROBE_SYMBOL(default_do_nmi); /* * NMIs can hit breakpoints which will cause it to lose its @@ -520,7 +525,7 @@ static inline void nmi_nesting_postprocess(void) } #endif -dotraplinkage notrace __kprobes void +dotraplinkage notrace void do_nmi(struct pt_regs *regs, long error_code) { nmi_nesting_preprocess(regs); @@ -537,6 +542,7 @@ do_nmi(struct pt_regs *regs, long error_code) /* On i386, may loop back to preprocess */ nmi_nesting_postprocess(); } +NOKPROBE_SYMBOL(do_nmi); void stop_nmi(void) { diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 1b10af835c3..548d25f00c9 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -23,6 +23,7 @@ #include <linux/efi.h> #include <linux/bcd.h> #include <linux/highmem.h> +#include <linux/kprobes.h> #include <asm/bug.h> #include <asm/paravirt.h> @@ -389,6 +390,11 @@ __visible struct pv_cpu_ops pv_cpu_ops = { .end_context_switch = paravirt_nop, }; +/* At this point, native_get/set_debugreg has real function entries */ +NOKPROBE_SYMBOL(native_get_debugreg); +NOKPROBE_SYMBOL(native_set_debugreg); +NOKPROBE_SYMBOL(native_load_idt); + struct pv_apic_ops pv_apic_ops = { #ifdef CONFIG_X86_LOCAL_APIC .startup_ipi_hook = paravirt_nop, diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 898d077617a..ca5b02d405c 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -413,12 +413,11 @@ void set_personality_ia32(bool x32) set_thread_flag(TIF_ADDR32); /* Mark the associated mm as containing 32-bit tasks. */ - if (current->mm) - current->mm->context.ia32_compat = 1; - if (x32) { clear_thread_flag(TIF_IA32); set_thread_flag(TIF_X32); + if (current->mm) + current->mm->context.ia32_compat = TIF_X32; current->personality &= ~READ_IMPLIES_EXEC; /* is_compat_task() uses the presence of the x32 syscall bit flag to determine compat status */ @@ -426,6 +425,8 @@ void set_personality_ia32(bool x32) } else { set_thread_flag(TIF_IA32); clear_thread_flag(TIF_X32); + if (current->mm) + current->mm->context.ia32_compat = TIF_IA32; current->personality |= force_personality32; /* Prepare the first "return" to user space */ current_thread_info()->status |= TS_COMPAT; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index f73b5d435bd..c6eb418c562 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -23,6 +23,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/ptrace.h> +#include <linux/uprobes.h> #include <linux/string.h> #include <linux/delay.h> #include <linux/errno.h> @@ -106,7 +107,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) preempt_count_dec(); } -static int __kprobes +static nokprobe_inline int do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, struct pt_regs *regs, long error_code) { @@ -136,7 +137,38 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, return -1; } -static void __kprobes +static siginfo_t *fill_trap_info(struct pt_regs *regs, int signr, int trapnr, + siginfo_t *info) +{ + unsigned long siaddr; + int sicode; + + switch (trapnr) { + default: + return SEND_SIG_PRIV; + + case X86_TRAP_DE: + sicode = FPE_INTDIV; + siaddr = uprobe_get_trap_addr(regs); + break; + case X86_TRAP_UD: + sicode = ILL_ILLOPN; + siaddr = uprobe_get_trap_addr(regs); + break; + case X86_TRAP_AC: + sicode = BUS_ADRALN; + siaddr = 0; + break; + } + + info->si_signo = signr; + info->si_errno = 0; + info->si_code = sicode; + info->si_addr = (void __user *)siaddr; + return info; +} + +static void do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, long error_code, siginfo_t *info) { @@ -168,60 +200,43 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, } #endif - if (info) - force_sig_info(signr, info, tsk); - else - force_sig(signr, tsk); + force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk); } +NOKPROBE_SYMBOL(do_trap); -#define DO_ERROR(trapnr, signr, str, name) \ -dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ -{ \ - enum ctx_state prev_state; \ - \ - prev_state = exception_enter(); \ - if (notify_die(DIE_TRAP, str, regs, error_code, \ - trapnr, signr) == NOTIFY_STOP) { \ - exception_exit(prev_state); \ - return; \ - } \ - conditional_sti(regs); \ - do_trap(trapnr, signr, str, regs, error_code, NULL); \ - exception_exit(prev_state); \ +static void do_error_trap(struct pt_regs *regs, long error_code, char *str, + unsigned long trapnr, int signr) +{ + enum ctx_state prev_state = exception_enter(); + siginfo_t info; + + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != + NOTIFY_STOP) { + conditional_sti(regs); + do_trap(trapnr, signr, str, regs, error_code, + fill_trap_info(regs, signr, trapnr, &info)); + } + + exception_exit(prev_state); } -#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ +#define DO_ERROR(trapnr, signr, str, name) \ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ - siginfo_t info; \ - enum ctx_state prev_state; \ - \ - info.si_signo = signr; \ - info.si_errno = 0; \ - info.si_code = sicode; \ - info.si_addr = (void __user *)siaddr; \ - prev_state = exception_enter(); \ - if (notify_die(DIE_TRAP, str, regs, error_code, \ - trapnr, signr) == NOTIFY_STOP) { \ - exception_exit(prev_state); \ - return; \ - } \ - conditional_sti(regs); \ - do_trap(trapnr, signr, str, regs, error_code, &info); \ - exception_exit(prev_state); \ + do_error_trap(regs, error_code, str, trapnr, signr); \ } -DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip ) -DO_ERROR (X86_TRAP_OF, SIGSEGV, "overflow", overflow ) -DO_ERROR (X86_TRAP_BR, SIGSEGV, "bounds", bounds ) -DO_ERROR_INFO(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip ) -DO_ERROR (X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun ) -DO_ERROR (X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS ) -DO_ERROR (X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present ) +DO_ERROR(X86_TRAP_DE, SIGFPE, "divide error", divide_error) +DO_ERROR(X86_TRAP_OF, SIGSEGV, "overflow", overflow) +DO_ERROR(X86_TRAP_BR, SIGSEGV, "bounds", bounds) +DO_ERROR(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op) +DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun) +DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) +DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) #ifdef CONFIG_X86_32 -DO_ERROR (X86_TRAP_SS, SIGBUS, "stack segment", stack_segment ) +DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) #endif -DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0 ) +DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) #ifdef CONFIG_X86_64 /* Runs on IST stack */ @@ -263,7 +278,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) } #endif -dotraplinkage void __kprobes +dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code) { struct task_struct *tsk; @@ -305,13 +320,14 @@ do_general_protection(struct pt_regs *regs, long error_code) pr_cont("\n"); } - force_sig(SIGSEGV, tsk); + force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); exit: exception_exit(prev_state); } +NOKPROBE_SYMBOL(do_general_protection); /* May run on IST stack. */ -dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code) +dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) { enum ctx_state prev_state; @@ -327,13 +343,18 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co if (poke_int3_handler(regs)) return; - prev_state = exception_enter(); #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) goto exit; #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ +#ifdef CONFIG_KPROBES + if (kprobe_int3_handler(regs)) + return; +#endif + prev_state = exception_enter(); + if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) goto exit; @@ -350,6 +371,7 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co exit: exception_exit(prev_state); } +NOKPROBE_SYMBOL(do_int3); #ifdef CONFIG_X86_64 /* @@ -357,7 +379,7 @@ exit: * for scheduling or signal handling. The actual stack switch is done in * entry.S */ -asmlinkage __visible __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) +asmlinkage __visible struct pt_regs *sync_regs(struct pt_regs *eregs) { struct pt_regs *regs = eregs; /* Did already sync */ @@ -376,6 +398,7 @@ asmlinkage __visible __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) *regs = *eregs; return regs; } +NOKPROBE_SYMBOL(sync_regs); #endif /* @@ -402,7 +425,7 @@ asmlinkage __visible __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) * * May run on IST stack. */ -dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) +dotraplinkage void do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; enum ctx_state prev_state; @@ -410,8 +433,6 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) unsigned long dr6; int si_code; - prev_state = exception_enter(); - get_debugreg(dr6, 6); /* Filter out all the reserved bits which are preset to 1 */ @@ -440,6 +461,12 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) /* Store the virtualized DR6 value */ tsk->thread.debugreg6 = dr6; +#ifdef CONFIG_KPROBES + if (kprobe_debug_handler(regs)) + goto exit; +#endif + prev_state = exception_enter(); + if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, error_code, SIGTRAP) == NOTIFY_STOP) goto exit; @@ -482,13 +509,14 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) exit: exception_exit(prev_state); } +NOKPROBE_SYMBOL(do_debug); /* * Note that we play around with the 'TS' bit in an attempt to get * the correct behaviour even in the presence of the asynchronous * IRQ13 behaviour */ -void math_error(struct pt_regs *regs, int error_code, int trapnr) +static void math_error(struct pt_regs *regs, int error_code, int trapnr) { struct task_struct *task = current; siginfo_t info; @@ -518,7 +546,7 @@ void math_error(struct pt_regs *regs, int error_code, int trapnr) task->thread.error_code = error_code; info.si_signo = SIGFPE; info.si_errno = 0; - info.si_addr = (void __user *)regs->ip; + info.si_addr = (void __user *)uprobe_get_trap_addr(regs); if (trapnr == X86_TRAP_MF) { unsigned short cwd, swd; /* @@ -645,7 +673,7 @@ void math_state_restore(void) */ if (unlikely(restore_fpu_checking(tsk))) { drop_init_fpu(tsk); - force_sig(SIGSEGV, tsk); + force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); return; } @@ -653,7 +681,7 @@ void math_state_restore(void) } EXPORT_SYMBOL_GPL(math_state_restore); -dotraplinkage void __kprobes +dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code) { enum ctx_state prev_state; @@ -679,6 +707,7 @@ do_device_not_available(struct pt_regs *regs, long error_code) #endif exception_exit(prev_state); } +NOKPROBE_SYMBOL(do_device_not_available); #ifdef CONFIG_X86_32 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index ace22916ade..5d1cbfe4ae5 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -32,20 +32,20 @@ /* Post-execution fixups. */ -/* No fixup needed */ -#define UPROBE_FIX_NONE 0x0 - /* Adjust IP back to vicinity of actual insn */ -#define UPROBE_FIX_IP 0x1 +#define UPROBE_FIX_IP 0x01 /* Adjust the return address of a call insn */ -#define UPROBE_FIX_CALL 0x2 +#define UPROBE_FIX_CALL 0x02 /* Instruction will modify TF, don't change it */ -#define UPROBE_FIX_SETF 0x4 +#define UPROBE_FIX_SETF 0x04 -#define UPROBE_FIX_RIP_AX 0x8000 -#define UPROBE_FIX_RIP_CX 0x4000 +#define UPROBE_FIX_RIP_SI 0x08 +#define UPROBE_FIX_RIP_DI 0x10 +#define UPROBE_FIX_RIP_BX 0x20 +#define UPROBE_FIX_RIP_MASK \ + (UPROBE_FIX_RIP_SI | UPROBE_FIX_RIP_DI | UPROBE_FIX_RIP_BX) #define UPROBE_TRAP_NR UINT_MAX @@ -67,6 +67,7 @@ * to keep gcc from statically optimizing it out, as variable_test_bit makes * some versions of gcc to think only *(unsigned long*) is used. */ +#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) static volatile u32 good_insns_32[256 / 32] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* ---------------------------------------------- */ @@ -89,33 +90,12 @@ static volatile u32 good_insns_32[256 / 32] = { /* ---------------------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; +#else +#define good_insns_32 NULL +#endif -/* Using this for both 64-bit and 32-bit apps */ -static volatile u32 good_2byte_insns[256 / 32] = { - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ - /* ---------------------------------------------- */ - W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */ - W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */ - W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */ - W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */ - W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ - W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ - W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */ - W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */ - W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ - W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ - W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */ - W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ - W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */ - W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ - W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */ - W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */ - /* ---------------------------------------------- */ - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ -}; - -#ifdef CONFIG_X86_64 /* Good-instruction tables for 64-bit apps */ +#if defined(CONFIG_X86_64) static volatile u32 good_insns_64[256 / 32] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* ---------------------------------------------- */ @@ -138,7 +118,33 @@ static volatile u32 good_insns_64[256 / 32] = { /* ---------------------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; +#else +#define good_insns_64 NULL #endif + +/* Using this for both 64-bit and 32-bit apps */ +static volatile u32 good_2byte_insns[256 / 32] = { + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* ---------------------------------------------- */ + W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */ + W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */ + W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */ + W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */ + W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ + W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ + W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */ + W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */ + W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ + W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ + W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */ + W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ + W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */ + W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ + W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */ + W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */ + /* ---------------------------------------------- */ + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ +}; #undef W /* @@ -209,16 +215,25 @@ static bool is_prefix_bad(struct insn *insn) return false; } -static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn) +static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool x86_64) { - insn_init(insn, auprobe->insn, false); + u32 volatile *good_insns; + + insn_init(insn, auprobe->insn, x86_64); + /* has the side-effect of processing the entire instruction */ + insn_get_length(insn); + if (WARN_ON_ONCE(!insn_complete(insn))) + return -ENOEXEC; - /* Skip good instruction prefixes; reject "bad" ones. */ - insn_get_opcode(insn); if (is_prefix_bad(insn)) return -ENOTSUPP; - if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32)) + if (x86_64) + good_insns = good_insns_64; + else + good_insns = good_insns_32; + + if (test_bit(OPCODE1(insn), (unsigned long *)good_insns)) return 0; if (insn->opcode.nbytes == 2) { @@ -230,14 +245,18 @@ static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn) } #ifdef CONFIG_X86_64 +static inline bool is_64bit_mm(struct mm_struct *mm) +{ + return !config_enabled(CONFIG_IA32_EMULATION) || + !(mm->context.ia32_compat == TIF_IA32); +} /* * If arch_uprobe->insn doesn't use rip-relative addressing, return * immediately. Otherwise, rewrite the instruction so that it accesses * its memory operand indirectly through a scratch register. Set - * arch_uprobe->fixups and arch_uprobe->rip_rela_target_address - * accordingly. (The contents of the scratch register will be saved - * before we single-step the modified instruction, and restored - * afterward.) + * defparam->fixups accordingly. (The contents of the scratch register + * will be saved before we single-step the modified instruction, + * and restored afterward). * * We do this because a rip-relative instruction can access only a * relatively small area (+/- 2 GB from the instruction), and the XOL @@ -248,164 +267,192 @@ static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn) * * Some useful facts about rip-relative instructions: * - * - There's always a modrm byte. + * - There's always a modrm byte with bit layout "00 reg 101". * - There's never a SIB byte. * - The displacement is always 4 bytes. + * - REX.B=1 bit in REX prefix, which normally extends r/m field, + * has no effect on rip-relative mode. It doesn't make modrm byte + * with r/m=101 refer to register 1101 = R13. */ -static void -handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) +static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) { u8 *cursor; u8 reg; + u8 reg2; if (!insn_rip_relative(insn)) return; /* - * insn_rip_relative() would have decoded rex_prefix, modrm. + * insn_rip_relative() would have decoded rex_prefix, vex_prefix, modrm. * Clear REX.b bit (extension of MODRM.rm field): - * we want to encode rax/rcx, not r8/r9. + * we want to encode low numbered reg, not r8+. */ if (insn->rex_prefix.nbytes) { cursor = auprobe->insn + insn_offset_rex_prefix(insn); - *cursor &= 0xfe; /* Clearing REX.B bit */ + /* REX byte has 0100wrxb layout, clearing REX.b bit */ + *cursor &= 0xfe; + } + /* + * Similar treatment for VEX3 prefix. + * TODO: add XOP/EVEX treatment when insn decoder supports them + */ + if (insn->vex_prefix.nbytes == 3) { + /* + * vex2: c5 rvvvvLpp (has no b bit) + * vex3/xop: c4/8f rxbmmmmm wvvvvLpp + * evex: 62 rxbR00mm wvvvv1pp zllBVaaa + * (evex will need setting of both b and x since + * in non-sib encoding evex.x is 4th bit of MODRM.rm) + * Setting VEX3.b (setting because it has inverted meaning): + */ + cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1; + *cursor |= 0x20; } /* + * Convert from rip-relative addressing to register-relative addressing + * via a scratch register. + * + * This is tricky since there are insns with modrm byte + * which also use registers not encoded in modrm byte: + * [i]div/[i]mul: implicitly use dx:ax + * shift ops: implicitly use cx + * cmpxchg: implicitly uses ax + * cmpxchg8/16b: implicitly uses dx:ax and bx:cx + * Encoding: 0f c7/1 modrm + * The code below thinks that reg=1 (cx), chooses si as scratch. + * mulx: implicitly uses dx: mulx r/m,r1,r2 does r1:r2 = dx * r/m. + * First appeared in Haswell (BMI2 insn). It is vex-encoded. + * Example where none of bx,cx,dx can be used as scratch reg: + * c4 e2 63 f6 0d disp32 mulx disp32(%rip),%ebx,%ecx + * [v]pcmpistri: implicitly uses cx, xmm0 + * [v]pcmpistrm: implicitly uses xmm0 + * [v]pcmpestri: implicitly uses ax, dx, cx, xmm0 + * [v]pcmpestrm: implicitly uses ax, dx, xmm0 + * Evil SSE4.2 string comparison ops from hell. + * maskmovq/[v]maskmovdqu: implicitly uses (ds:rdi) as destination. + * Encoding: 0f f7 modrm, 66 0f f7 modrm, vex-encoded: c5 f9 f7 modrm. + * Store op1, byte-masked by op2 msb's in each byte, to (ds:rdi). + * AMD says it has no 3-operand form (vex.vvvv must be 1111) + * and that it can have only register operands, not mem + * (its modrm byte must have mode=11). + * If these restrictions will ever be lifted, + * we'll need code to prevent selection of di as scratch reg! + * + * Summary: I don't know any insns with modrm byte which + * use SI register implicitly. DI register is used only + * by one insn (maskmovq) and BX register is used + * only by one too (cmpxchg8b). + * BP is stack-segment based (may be a problem?). + * AX, DX, CX are off-limits (many implicit users). + * SP is unusable (it's stack pointer - think about "pop mem"; + * also, rsp+disp32 needs sib encoding -> insn length change). + */ + + reg = MODRM_REG(insn); /* Fetch modrm.reg */ + reg2 = 0xff; /* Fetch vex.vvvv */ + if (insn->vex_prefix.nbytes == 2) + reg2 = insn->vex_prefix.bytes[1]; + else if (insn->vex_prefix.nbytes == 3) + reg2 = insn->vex_prefix.bytes[2]; + /* + * TODO: add XOP, EXEV vvvv reading. + * + * vex.vvvv field is in bits 6-3, bits are inverted. + * But in 32-bit mode, high-order bit may be ignored. + * Therefore, let's consider only 3 low-order bits. + */ + reg2 = ((reg2 >> 3) & 0x7) ^ 0x7; + /* + * Register numbering is ax,cx,dx,bx, sp,bp,si,di, r8..r15. + * + * Choose scratch reg. Order is important: must not select bx + * if we can use si (cmpxchg8b case!) + */ + if (reg != 6 && reg2 != 6) { + reg2 = 6; + auprobe->defparam.fixups |= UPROBE_FIX_RIP_SI; + } else if (reg != 7 && reg2 != 7) { + reg2 = 7; + auprobe->defparam.fixups |= UPROBE_FIX_RIP_DI; + /* TODO (paranoia): force maskmovq to not use di */ + } else { + reg2 = 3; + auprobe->defparam.fixups |= UPROBE_FIX_RIP_BX; + } + /* * Point cursor at the modrm byte. The next 4 bytes are the * displacement. Beyond the displacement, for some instructions, * is the immediate operand. */ cursor = auprobe->insn + insn_offset_modrm(insn); - insn_get_length(insn); - /* - * Convert from rip-relative addressing to indirect addressing - * via a scratch register. Change the r/m field from 0x5 (%rip) - * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field. + * Change modrm from "00 reg 101" to "10 reg reg2". Example: + * 89 05 disp32 mov %eax,disp32(%rip) becomes + * 89 86 disp32 mov %eax,disp32(%rsi) */ - reg = MODRM_REG(insn); - if (reg == 0) { - /* - * The register operand (if any) is either the A register - * (%rax, %eax, etc.) or (if the 0x4 bit is set in the - * REX prefix) %r8. In any case, we know the C register - * is NOT the register operand, so we use %rcx (register - * #1) for the scratch register. - */ - auprobe->fixups = UPROBE_FIX_RIP_CX; - /* Change modrm from 00 000 101 to 00 000 001. */ - *cursor = 0x1; - } else { - /* Use %rax (register #0) for the scratch register. */ - auprobe->fixups = UPROBE_FIX_RIP_AX; - /* Change modrm from 00 xxx 101 to 00 xxx 000 */ - *cursor = (reg << 3); - } - - /* Target address = address of next instruction + (signed) offset */ - auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value; + *cursor = 0x80 | (reg << 3) | reg2; +} - /* Displacement field is gone; slide immediate field (if any) over. */ - if (insn->immediate.nbytes) { - cursor++; - memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes); - } +static inline unsigned long * +scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + if (auprobe->defparam.fixups & UPROBE_FIX_RIP_SI) + return ®s->si; + if (auprobe->defparam.fixups & UPROBE_FIX_RIP_DI) + return ®s->di; + return ®s->bx; } /* * If we're emulating a rip-relative instruction, save the contents * of the scratch register and store the target address in that register. */ -static void -pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, - struct arch_uprobe_task *autask) -{ - if (auprobe->fixups & UPROBE_FIX_RIP_AX) { - autask->saved_scratch_register = regs->ax; - regs->ax = current->utask->vaddr; - regs->ax += auprobe->rip_rela_target_address; - } else if (auprobe->fixups & UPROBE_FIX_RIP_CX) { - autask->saved_scratch_register = regs->cx; - regs->cx = current->utask->vaddr; - regs->cx += auprobe->rip_rela_target_address; - } -} - -static void -handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) +static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { - if (auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { - struct arch_uprobe_task *autask; - - autask = ¤t->utask->autask; - if (auprobe->fixups & UPROBE_FIX_RIP_AX) - regs->ax = autask->saved_scratch_register; - else - regs->cx = autask->saved_scratch_register; + if (auprobe->defparam.fixups & UPROBE_FIX_RIP_MASK) { + struct uprobe_task *utask = current->utask; + unsigned long *sr = scratch_reg(auprobe, regs); - /* - * The original instruction includes a displacement, and so - * is 4 bytes longer than what we've just single-stepped. - * Caller may need to apply other fixups to handle stuff - * like "jmpq *...(%rip)" and "callq *...(%rip)". - */ - if (correction) - *correction += 4; + utask->autask.saved_scratch_register = *sr; + *sr = utask->vaddr + auprobe->defparam.ilen; } } -static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn) +static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { - insn_init(insn, auprobe->insn, true); - - /* Skip good instruction prefixes; reject "bad" ones. */ - insn_get_opcode(insn); - if (is_prefix_bad(insn)) - return -ENOTSUPP; + if (auprobe->defparam.fixups & UPROBE_FIX_RIP_MASK) { + struct uprobe_task *utask = current->utask; + unsigned long *sr = scratch_reg(auprobe, regs); - if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64)) - return 0; - - if (insn->opcode.nbytes == 2) { - if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns)) - return 0; + *sr = utask->autask.saved_scratch_register; } - return -ENOTSUPP; } - -static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) +#else /* 32-bit: */ +static inline bool is_64bit_mm(struct mm_struct *mm) { - if (mm->context.ia32_compat) - return validate_insn_32bits(auprobe, insn); - return validate_insn_64bits(auprobe, insn); + return false; } -#else /* 32-bit: */ /* * No RIP-relative addressing on 32-bit */ -static void handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) +static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) { } -static void pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, - struct arch_uprobe_task *autask) +static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { } -static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, - long *correction) +static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { } - -static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) -{ - return validate_insn_32bits(auprobe, insn); -} #endif /* CONFIG_X86_64 */ struct uprobe_xol_ops { bool (*emulate)(struct arch_uprobe *, struct pt_regs *); int (*pre_xol)(struct arch_uprobe *, struct pt_regs *); int (*post_xol)(struct arch_uprobe *, struct pt_regs *); + void (*abort)(struct arch_uprobe *, struct pt_regs *); }; static inline int sizeof_long(void) @@ -415,50 +462,67 @@ static inline int sizeof_long(void) static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) { - pre_xol_rip_insn(auprobe, regs, ¤t->utask->autask); + riprel_pre_xol(auprobe, regs); return 0; } -/* - * Adjust the return address pushed by a call insn executed out of line. - */ -static int adjust_ret_addr(unsigned long sp, long correction) +static int push_ret_address(struct pt_regs *regs, unsigned long ip) { - int rasize = sizeof_long(); - long ra; - - if (copy_from_user(&ra, (void __user *)sp, rasize)) - return -EFAULT; + unsigned long new_sp = regs->sp - sizeof_long(); - ra += correction; - if (copy_to_user((void __user *)sp, &ra, rasize)) + if (copy_to_user((void __user *)new_sp, &ip, sizeof_long())) return -EFAULT; + regs->sp = new_sp; return 0; } +/* + * We have to fix things up as follows: + * + * Typically, the new ip is relative to the copied instruction. We need + * to make it relative to the original instruction (FIX_IP). Exceptions + * are return instructions and absolute or indirect jump or call instructions. + * + * If the single-stepped instruction was a call, the return address that + * is atop the stack is the address following the copied instruction. We + * need to make it the address following the original instruction (FIX_CALL). + * + * If the original instruction was a rip-relative instruction such as + * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent + * instruction using a scratch register -- e.g., "movl %edx,0xnnnn(%rsi)". + * We need to restore the contents of the scratch register + * (FIX_RIP_reg). + */ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) { struct uprobe_task *utask = current->utask; - long correction = (long)(utask->vaddr - utask->xol_vaddr); - handle_riprel_post_xol(auprobe, regs, &correction); - if (auprobe->fixups & UPROBE_FIX_IP) + riprel_post_xol(auprobe, regs); + if (auprobe->defparam.fixups & UPROBE_FIX_IP) { + long correction = utask->vaddr - utask->xol_vaddr; regs->ip += correction; - - if (auprobe->fixups & UPROBE_FIX_CALL) { - if (adjust_ret_addr(regs->sp, correction)) { - regs->sp += sizeof_long(); + } else if (auprobe->defparam.fixups & UPROBE_FIX_CALL) { + regs->sp += sizeof_long(); /* Pop incorrect return address */ + if (push_ret_address(regs, utask->vaddr + auprobe->defparam.ilen)) return -ERESTART; - } } + /* popf; tell the caller to not touch TF */ + if (auprobe->defparam.fixups & UPROBE_FIX_SETF) + utask->autask.saved_tf = true; return 0; } +static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + riprel_post_xol(auprobe, regs); +} + static struct uprobe_xol_ops default_xol_ops = { .pre_xol = default_pre_xol_op, .post_xol = default_post_xol_op, + .abort = default_abort_op, }; static bool branch_is_call(struct arch_uprobe *auprobe) @@ -520,7 +584,6 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs) unsigned long offs = (long)auprobe->branch.offs; if (branch_is_call(auprobe)) { - unsigned long new_sp = regs->sp - sizeof_long(); /* * If it fails we execute this (mangled, see the comment in * branch_clear_offset) insn out-of-line. In the likely case @@ -530,9 +593,8 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs) * * But there is corner case, see the comment in ->post_xol(). */ - if (copy_to_user((void __user *)new_sp, &new_ip, sizeof_long())) + if (push_ret_address(regs, new_ip)) return false; - regs->sp = new_sp; } else if (!check_jmp_cond(auprobe, regs)) { offs = 0; } @@ -583,11 +645,7 @@ static struct uprobe_xol_ops branch_xol_ops = { static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) { u8 opc1 = OPCODE1(insn); - - /* has the side-effect of processing the entire instruction */ - insn_get_length(insn); - if (WARN_ON_ONCE(!insn_complete(insn))) - return -ENOEXEC; + int i; switch (opc1) { case 0xeb: /* jmp 8 */ @@ -612,6 +670,16 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) return -ENOSYS; } + /* + * 16-bit overrides such as CALLW (66 e8 nn nn) are not supported. + * Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix. + * No one uses these insns, reject any branch insns with such prefix. + */ + for (i = 0; i < insn->prefixes.nbytes; i++) { + if (insn->prefixes.bytes[i] == 0x66) + return -ENOTSUPP; + } + auprobe->branch.opc1 = opc1; auprobe->branch.ilen = insn->length; auprobe->branch.offs = insn->immediate.value; @@ -630,10 +698,10 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr) { struct insn insn; - bool fix_ip = true, fix_call = false; + u8 fix_ip_or_call = UPROBE_FIX_IP; int ret; - ret = validate_insn_bits(auprobe, mm, &insn); + ret = uprobe_init_insn(auprobe, &insn, is_64bit_mm(mm)); if (ret) return ret; @@ -642,44 +710,39 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, return ret; /* - * Figure out which fixups arch_uprobe_post_xol() will need to perform, - * and annotate arch_uprobe->fixups accordingly. To start with, ->fixups - * is either zero or it reflects rip-related fixups. + * Figure out which fixups default_post_xol_op() will need to perform, + * and annotate defparam->fixups accordingly. */ switch (OPCODE1(&insn)) { case 0x9d: /* popf */ - auprobe->fixups |= UPROBE_FIX_SETF; + auprobe->defparam.fixups |= UPROBE_FIX_SETF; break; case 0xc3: /* ret or lret -- ip is correct */ case 0xcb: case 0xc2: case 0xca: - fix_ip = false; + case 0xea: /* jmp absolute -- ip is correct */ + fix_ip_or_call = 0; break; case 0x9a: /* call absolute - Fix return addr, not ip */ - fix_call = true; - fix_ip = false; - break; - case 0xea: /* jmp absolute -- ip is correct */ - fix_ip = false; + fix_ip_or_call = UPROBE_FIX_CALL; break; case 0xff: - insn_get_modrm(&insn); switch (MODRM_REG(&insn)) { case 2: case 3: /* call or lcall, indirect */ - fix_call = true; + fix_ip_or_call = UPROBE_FIX_CALL; + break; case 4: case 5: /* jmp or ljmp, indirect */ - fix_ip = false; + fix_ip_or_call = 0; + break; } /* fall through */ default: - handle_riprel_insn(auprobe, &insn); + riprel_analyze(auprobe, &insn); } - if (fix_ip) - auprobe->fixups |= UPROBE_FIX_IP; - if (fix_call) - auprobe->fixups |= UPROBE_FIX_CALL; + auprobe->defparam.ilen = insn.length; + auprobe->defparam.fixups |= fix_ip_or_call; auprobe->ops = &default_xol_ops; return 0; @@ -694,6 +757,12 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { struct uprobe_task *utask = current->utask; + if (auprobe->ops->pre_xol) { + int err = auprobe->ops->pre_xol(auprobe, regs); + if (err) + return err; + } + regs->ip = utask->xol_vaddr; utask->autask.saved_trap_nr = current->thread.trap_nr; current->thread.trap_nr = UPROBE_TRAP_NR; @@ -703,8 +772,6 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) if (test_tsk_thread_flag(current, TIF_BLOCKSTEP)) set_task_blockstep(current, false); - if (auprobe->ops->pre_xol) - return auprobe->ops->pre_xol(auprobe, regs); return 0; } @@ -732,56 +799,42 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t) * single-step, we single-stepped a copy of the instruction. * * This function prepares to resume execution after the single-step. - * We have to fix things up as follows: - * - * Typically, the new ip is relative to the copied instruction. We need - * to make it relative to the original instruction (FIX_IP). Exceptions - * are return instructions and absolute or indirect jump or call instructions. - * - * If the single-stepped instruction was a call, the return address that - * is atop the stack is the address following the copied instruction. We - * need to make it the address following the original instruction (FIX_CALL). - * - * If the original instruction was a rip-relative instruction such as - * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent - * instruction using a scratch register -- e.g., "movl %edx,(%rax)". - * We need to restore the contents of the scratch register and adjust - * the ip, keeping in mind that the instruction we executed is 4 bytes - * shorter than the original instruction (since we squeezed out the offset - * field). (FIX_RIP_AX or FIX_RIP_CX) */ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { struct uprobe_task *utask = current->utask; + bool send_sigtrap = utask->autask.saved_tf; + int err = 0; WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR); + current->thread.trap_nr = utask->autask.saved_trap_nr; if (auprobe->ops->post_xol) { - int err = auprobe->ops->post_xol(auprobe, regs); + err = auprobe->ops->post_xol(auprobe, regs); if (err) { - arch_uprobe_abort_xol(auprobe, regs); /* - * Restart the probed insn. ->post_xol() must ensure - * this is really possible if it returns -ERESTART. + * Restore ->ip for restart or post mortem analysis. + * ->post_xol() must not return -ERESTART unless this + * is really possible. */ + regs->ip = utask->vaddr; if (err == -ERESTART) - return 0; - return err; + err = 0; + send_sigtrap = false; } } - - current->thread.trap_nr = utask->autask.saved_trap_nr; /* * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP * so we can get an extra SIGTRAP if we do not clear TF. We need * to examine the opcode to make it right. */ - if (utask->autask.saved_tf) + if (send_sigtrap) send_sig(SIGTRAP, current, 0); - else if (!(auprobe->fixups & UPROBE_FIX_SETF)) + + if (!utask->autask.saved_tf) regs->flags &= ~X86_EFLAGS_TF; - return 0; + return err; } /* callback routine for handling exceptions. */ @@ -815,18 +868,18 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, /* * This function gets called when XOL instruction either gets trapped or - * the thread has a fatal signal, or if arch_uprobe_post_xol() failed. - * Reset the instruction pointer to its probed address for the potential - * restart or for post mortem analysis. + * the thread has a fatal signal. Reset the instruction pointer to its + * probed address for the potential restart or for post mortem analysis. */ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { struct uprobe_task *utask = current->utask; - current->thread.trap_nr = utask->autask.saved_trap_nr; - handle_riprel_post_xol(auprobe, regs, NULL); - instruction_pointer_set(regs, utask->vaddr); + if (auprobe->ops->abort) + auprobe->ops->abort(auprobe, regs); + current->thread.trap_nr = utask->autask.saved_trap_nr; + regs->ip = utask->vaddr; /* clear TF if it was set by us in arch_uprobe_pre_xol() */ if (!utask->autask.saved_tf) regs->flags &= ~X86_EFLAGS_TF; diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S index 2930ae05d77..28f85c91671 100644 --- a/arch/x86/lib/thunk_32.S +++ b/arch/x86/lib/thunk_32.S @@ -4,8 +4,8 @@ * (inspired by Andi Kleen's thunk_64.S) * Subject to the GNU public license, v.2. No warranty of any kind. */ - #include <linux/linkage.h> + #include <asm/asm.h> #ifdef CONFIG_TRACE_IRQFLAGS /* put return address in eax (arg1) */ @@ -22,6 +22,7 @@ popl %ecx popl %eax ret + _ASM_NOKPROBE(\name) .endm thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index a63efd6bb6a..92d9feaff42 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S @@ -8,6 +8,7 @@ #include <linux/linkage.h> #include <asm/dwarf2.h> #include <asm/calling.h> +#include <asm/asm.h> /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ .macro THUNK name, func, put_ret_addr_in_rdi=0 @@ -25,6 +26,7 @@ call \func jmp restore CFI_ENDPROC + _ASM_NOKPROBE(\name) .endm #ifdef CONFIG_TRACE_IRQFLAGS @@ -43,3 +45,4 @@ restore: RESTORE_ARGS ret CFI_ENDPROC + _ASM_NOKPROBE(restore) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 858b47b5221..36642793e31 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -8,7 +8,7 @@ #include <linux/kdebug.h> /* oops_begin/end, ... */ #include <linux/module.h> /* search_exception_table */ #include <linux/bootmem.h> /* max_low_pfn */ -#include <linux/kprobes.h> /* __kprobes, ... */ +#include <linux/kprobes.h> /* NOKPROBE_SYMBOL, ... */ #include <linux/mmiotrace.h> /* kmmio_handler, ... */ #include <linux/perf_event.h> /* perf_sw_event */ #include <linux/hugetlb.h> /* hstate_index_to_shift */ @@ -46,7 +46,7 @@ enum x86_pf_error_code { * Returns 0 if mmiotrace is disabled, or if the fault is not * handled by mmiotrace: */ -static inline int __kprobes +static nokprobe_inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) { if (unlikely(is_kmmio_active())) @@ -55,7 +55,7 @@ kmmio_fault(struct pt_regs *regs, unsigned long addr) return 0; } -static inline int __kprobes kprobes_fault(struct pt_regs *regs) +static nokprobe_inline int kprobes_fault(struct pt_regs *regs) { int ret = 0; @@ -262,7 +262,7 @@ void vmalloc_sync_all(void) * * Handle a fault on the vmalloc or module mapping area */ -static noinline __kprobes int vmalloc_fault(unsigned long address) +static noinline int vmalloc_fault(unsigned long address) { unsigned long pgd_paddr; pmd_t *pmd_k; @@ -292,6 +292,7 @@ static noinline __kprobes int vmalloc_fault(unsigned long address) return 0; } +NOKPROBE_SYMBOL(vmalloc_fault); /* * Did it hit the DOS screen memory VA from vm86 mode? @@ -359,7 +360,7 @@ void vmalloc_sync_all(void) * * This assumes no large pages in there. */ -static noinline __kprobes int vmalloc_fault(unsigned long address) +static noinline int vmalloc_fault(unsigned long address) { pgd_t *pgd, *pgd_ref; pud_t *pud, *pud_ref; @@ -426,6 +427,7 @@ static noinline __kprobes int vmalloc_fault(unsigned long address) return 0; } +NOKPROBE_SYMBOL(vmalloc_fault); #ifdef CONFIG_CPU_SUP_AMD static const char errata93_warning[] = @@ -928,7 +930,7 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte) * There are no security implications to leaving a stale TLB when * increasing the permissions on a page. */ -static noinline __kprobes int +static noinline int spurious_fault(unsigned long error_code, unsigned long address) { pgd_t *pgd; @@ -976,6 +978,7 @@ spurious_fault(unsigned long error_code, unsigned long address) return ret; } +NOKPROBE_SYMBOL(spurious_fault); int show_unhandled_signals = 1; @@ -1031,7 +1034,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs) * {,trace_}do_page_fault() have notrace on. Having this an actual function * guarantees there's a function trace entry. */ -static void __kprobes noinline +static noinline void __do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address) { @@ -1254,8 +1257,9 @@ good_area: up_read(&mm->mmap_sem); } +NOKPROBE_SYMBOL(__do_page_fault); -dotraplinkage void __kprobes notrace +dotraplinkage void notrace do_page_fault(struct pt_regs *regs, unsigned long error_code) { unsigned long address = read_cr2(); /* Get the faulting address */ @@ -1273,10 +1277,12 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) __do_page_fault(regs, error_code, address); exception_exit(prev_state); } +NOKPROBE_SYMBOL(do_page_fault); #ifdef CONFIG_TRACING -static void trace_page_fault_entries(unsigned long address, struct pt_regs *regs, - unsigned long error_code) +static nokprobe_inline void +trace_page_fault_entries(unsigned long address, struct pt_regs *regs, + unsigned long error_code) { if (user_mode(regs)) trace_page_fault_user(address, regs, error_code); @@ -1284,7 +1290,7 @@ static void trace_page_fault_entries(unsigned long address, struct pt_regs *regs trace_page_fault_kernel(address, regs, error_code); } -dotraplinkage void __kprobes notrace +dotraplinkage void notrace trace_do_page_fault(struct pt_regs *regs, unsigned long error_code) { /* @@ -1301,4 +1307,5 @@ trace_do_page_fault(struct pt_regs *regs, unsigned long error_code) __do_page_fault(regs, error_code, address); exception_exit(prev_state); } +NOKPROBE_SYMBOL(trace_do_page_fault); #endif /* CONFIG_TRACING */ diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 9769df09403..3c0809a0631 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -9,18 +9,9 @@ VDSOX32-$(CONFIG_X86_X32_ABI) := y VDSO32-$(CONFIG_X86_32) := y VDSO32-$(CONFIG_COMPAT) := y -vdso-install-$(VDSO64-y) += vdso.so -vdso-install-$(VDSOX32-y) += vdsox32.so -vdso-install-$(VDSO32-y) += $(vdso32-images) - - # files to link into the vdso -vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o - -vobjs-$(VDSOX32-y) += $(vobjx32s-compat) - -# Filter out x32 objects. -vobj64s := $(filter-out $(vobjx32s-compat),$(vobjs-y)) +vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vdso-fakesections.o +vobjs-nox32 := vdso-fakesections.o # files to link into kernel obj-y += vma.o @@ -34,7 +25,7 @@ vdso_img-$(VDSO32-y) += 32-sysenter obj-$(VDSO32-y) += vdso32-setup.o -vobjs := $(foreach F,$(vobj64s),$(obj)/$F) +vobjs := $(foreach F,$(vobjs-y),$(obj)/$F) $(obj)/vdso.o: $(obj)/vdso.so @@ -104,7 +95,13 @@ VDSO_LDFLAGS_vdsox32.lds = -Wl,-m,elf32_x86_64 \ -Wl,-z,max-page-size=4096 \ -Wl,-z,common-page-size=4096 -vobjx32s-y := $(vobj64s:.o=-x32.o) +# 64-bit objects to re-brand as x32 +vobjs64-for-x32 := $(filter-out $(vobjs-nox32),$(vobjs-y)) + +# x32-rebranded versions +vobjx32s-y := $(vobjs64-for-x32:.o=-x32.o) + +# same thing, but in the output directory vobjx32s := $(foreach F,$(vobjx32s-y),$(obj)/$F) # Convert 64bit object file to x32 for x32 vDSO. @@ -176,15 +173,20 @@ VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \ GCOV_PROFILE := n # -# Install the unstripped copy of vdso*.so listed in $(vdso-install-y). +# Install the unstripped copies of vdso*.so. # -quiet_cmd_vdso_install = INSTALL $@ - cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ -$(vdso-install-y): %.so: $(obj)/%.so.dbg FORCE +quiet_cmd_vdso_install = INSTALL $(@:install_%=%) + cmd_vdso_install = cp $< $(MODLIB)/vdso/$(@:install_%=%) + +vdso_img_insttargets := $(vdso_img_sodbg:%.dbg=install_%) + +$(MODLIB)/vdso: FORCE @mkdir -p $(MODLIB)/vdso + +$(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE $(call cmd,vdso_install) -PHONY += vdso_install $(vdso-install-y) -vdso_install: $(vdso-install-y) +PHONY += vdso_install $(vdso_img_insttargets) +vdso_install: $(vdso_img_insttargets) FORCE clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* diff --git a/arch/x86/vdso/vdso-fakesections.c b/arch/x86/vdso/vdso-fakesections.c new file mode 100644 index 00000000000..cb8a8d72c24 --- /dev/null +++ b/arch/x86/vdso/vdso-fakesections.c @@ -0,0 +1,32 @@ +/* + * Copyright 2014 Andy Lutomirski + * Subject to the GNU Public License, v.2 + * + * Hack to keep broken Go programs working. + * + * The Go runtime had a couple of bugs: it would read the section table to try + * to figure out how many dynamic symbols there were (it shouldn't have looked + * at the section table at all) and, if there were no SHT_SYNDYM section table + * entry, it would use an uninitialized value for the number of symbols. As a + * workaround, we supply a minimal section table. vdso2c will adjust the + * in-memory image so that "vdso_fake_sections" becomes the section table. + * + * The bug was introduced by: + * https://code.google.com/p/go/source/detail?r=56ea40aac72b (2012-08-31) + * and is being addressed in the Go runtime in this issue: + * https://code.google.com/p/go/issues/detail?id=8197 + */ + +#ifndef __x86_64__ +#error This hack is specific to the 64-bit vDSO +#endif + +#include <linux/elf.h> + +extern const __visible struct elf64_shdr vdso_fake_sections[]; +const __visible struct elf64_shdr vdso_fake_sections[] = { + { + .sh_type = SHT_DYNSYM, + .sh_entsize = sizeof(Elf64_Sym), + } +}; diff --git a/arch/x86/vdso/vdso2c.c b/arch/x86/vdso/vdso2c.c index 450ac6eaf61..7a6bf50f916 100644 --- a/arch/x86/vdso/vdso2c.c +++ b/arch/x86/vdso/vdso2c.c @@ -54,7 +54,7 @@ static void fail(const char *format, ...) } /* - * Evil macros to do a little-endian read. + * Evil macros for little-endian reads and writes */ #define GLE(x, bits, ifnot) \ __builtin_choose_expr( \ @@ -62,11 +62,24 @@ static void fail(const char *format, ...) (__typeof__(*(x)))get_unaligned_le##bits(x), ifnot) extern void bad_get_le(void); -#define LAST_LE(x) \ +#define LAST_GLE(x) \ __builtin_choose_expr(sizeof(*(x)) == 1, *(x), bad_get_le()) #define GET_LE(x) \ - GLE(x, 64, GLE(x, 32, GLE(x, 16, LAST_LE(x)))) + GLE(x, 64, GLE(x, 32, GLE(x, 16, LAST_GLE(x)))) + +#define PLE(x, val, bits, ifnot) \ + __builtin_choose_expr( \ + (sizeof(*(x)) == bits/8), \ + put_unaligned_le##bits((val), (x)), ifnot) + +extern void bad_put_le(void); +#define LAST_PLE(x, val) \ + __builtin_choose_expr(sizeof(*(x)) == 1, *(x) = (val), bad_put_le()) + +#define PUT_LE(x, val) \ + PLE(x, val, 64, PLE(x, val, 32, PLE(x, val, 16, LAST_PLE(x, val)))) + #define NSYMS (sizeof(required_syms) / sizeof(required_syms[0])) diff --git a/arch/x86/vdso/vdso2c.h b/arch/x86/vdso/vdso2c.h index 8a074637a57..c6eefaf389b 100644 --- a/arch/x86/vdso/vdso2c.h +++ b/arch/x86/vdso/vdso2c.h @@ -18,6 +18,8 @@ static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name) const char *secstrings; uint64_t syms[NSYMS] = {}; + uint64_t fake_sections_value = 0, fake_sections_size = 0; + Elf_Phdr *pt = (Elf_Phdr *)(addr + GET_LE(&hdr->e_phoff)); /* Walk the segment table. */ @@ -84,6 +86,7 @@ static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name) GET_LE(&symtab_hdr->sh_entsize) * i; const char *name = addr + GET_LE(&strtab_hdr->sh_offset) + GET_LE(&sym->st_name); + for (k = 0; k < NSYMS; k++) { if (!strcmp(name, required_syms[k])) { if (syms[k]) { @@ -93,6 +96,13 @@ static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name) syms[k] = GET_LE(&sym->st_value); } } + + if (!strcmp(name, "vdso_fake_sections")) { + if (fake_sections_value) + fail("duplicate vdso_fake_sections\n"); + fake_sections_value = GET_LE(&sym->st_value); + fake_sections_size = GET_LE(&sym->st_size); + } } /* Validate mapping addresses. */ @@ -112,11 +122,14 @@ static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name) if (syms[sym_end_mapping] % 4096) fail("end_mapping must be a multiple of 4096\n"); - /* Remove sections. */ - hdr->e_shoff = 0; - hdr->e_shentsize = 0; - hdr->e_shnum = 0; - hdr->e_shstrndx = htole16(SHN_UNDEF); + /* Remove sections or use fakes */ + if (fake_sections_size % sizeof(Elf_Shdr)) + fail("vdso_fake_sections size is not a multiple of %ld\n", + (long)sizeof(Elf_Shdr)); + PUT_LE(&hdr->e_shoff, fake_sections_value); + PUT_LE(&hdr->e_shentsize, fake_sections_value ? sizeof(Elf_Shdr) : 0); + PUT_LE(&hdr->e_shnum, fake_sections_size / sizeof(Elf_Shdr)); + PUT_LE(&hdr->e_shstrndx, SHN_UNDEF); if (!name) { fwrite(addr, load_size, 1, outfile); diff --git a/block/blk-core.c b/block/blk-core.c index 9aca8c71e70..f6f6b9af3e3 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -43,6 +43,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); +EXPORT_TRACEPOINT_SYMBOL_GPL(block_split); EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug); DEFINE_IDA(blk_queue_ida); diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index a842c71dcc2..02351e21716 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -10,10 +10,6 @@ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ #include <linux/nvme.h> @@ -46,16 +42,26 @@ #include <scsi/sg.h> #include <asm-generic/io-64-nonatomic-lo-hi.h> -#define NVME_Q_DEPTH 1024 +#include <trace/events/block.h> + +#define NVME_Q_DEPTH 1024 #define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) #define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) -#define ADMIN_TIMEOUT (60 * HZ) -#define IOD_TIMEOUT (4 * NVME_IO_TIMEOUT) +#define ADMIN_TIMEOUT (admin_timeout * HZ) +#define IOD_TIMEOUT (retry_time * HZ) + +static unsigned char admin_timeout = 60; +module_param(admin_timeout, byte, 0644); +MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands"); -unsigned char io_timeout = 30; -module_param(io_timeout, byte, 0644); +unsigned char nvme_io_timeout = 30; +module_param_named(io_timeout, nvme_io_timeout, byte, 0644); MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O"); +static unsigned char retry_time = 30; +module_param(retry_time, byte, 0644); +MODULE_PARM_DESC(retry_time, "time in seconds to retry failed I/O"); + static int nvme_major; module_param(nvme_major, int, 0); @@ -67,6 +73,7 @@ static LIST_HEAD(dev_list); static struct task_struct *nvme_thread; static struct workqueue_struct *nvme_workq; static wait_queue_head_t nvme_kthread_wait; +static struct notifier_block nvme_nb; static void nvme_reset_failed_dev(struct work_struct *ws); @@ -199,16 +206,13 @@ static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx, #define CMD_CTX_CANCELLED (0x30C + CMD_CTX_BASE) #define CMD_CTX_COMPLETED (0x310 + CMD_CTX_BASE) #define CMD_CTX_INVALID (0x314 + CMD_CTX_BASE) -#define CMD_CTX_FLUSH (0x318 + CMD_CTX_BASE) -#define CMD_CTX_ABORT (0x31C + CMD_CTX_BASE) +#define CMD_CTX_ABORT (0x318 + CMD_CTX_BASE) static void special_completion(struct nvme_queue *nvmeq, void *ctx, struct nvme_completion *cqe) { if (ctx == CMD_CTX_CANCELLED) return; - if (ctx == CMD_CTX_FLUSH) - return; if (ctx == CMD_CTX_ABORT) { ++nvmeq->dev->abort_limit; return; @@ -247,8 +251,9 @@ static void *free_cmdid(struct nvme_queue *nvmeq, int cmdid, void *ctx; struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); - if (cmdid >= nvmeq->q_depth) { - *fn = special_completion; + if (cmdid >= nvmeq->q_depth || !info[cmdid].fn) { + if (fn) + *fn = special_completion; return CMD_CTX_INVALID; } if (fn) @@ -281,9 +286,17 @@ static struct nvme_queue *raw_nvmeq(struct nvme_dev *dev, int qid) static struct nvme_queue *get_nvmeq(struct nvme_dev *dev) __acquires(RCU) { + struct nvme_queue *nvmeq; unsigned queue_id = get_cpu_var(*dev->io_queue); + rcu_read_lock(); - return rcu_dereference(dev->queues[queue_id]); + nvmeq = rcu_dereference(dev->queues[queue_id]); + if (nvmeq) + return nvmeq; + + rcu_read_unlock(); + put_cpu_var(*dev->io_queue); + return NULL; } static void put_nvmeq(struct nvme_queue *nvmeq) __releases(RCU) @@ -295,8 +308,15 @@ static void put_nvmeq(struct nvme_queue *nvmeq) __releases(RCU) static struct nvme_queue *lock_nvmeq(struct nvme_dev *dev, int q_idx) __acquires(RCU) { + struct nvme_queue *nvmeq; + rcu_read_lock(); - return rcu_dereference(dev->queues[q_idx]); + nvmeq = rcu_dereference(dev->queues[q_idx]); + if (nvmeq) + return nvmeq; + + rcu_read_unlock(); + return NULL; } static void unlock_nvmeq(struct nvme_queue *nvmeq) __releases(RCU) @@ -387,25 +407,30 @@ void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod) static void nvme_start_io_acct(struct bio *bio) { struct gendisk *disk = bio->bi_bdev->bd_disk; - const int rw = bio_data_dir(bio); - int cpu = part_stat_lock(); - part_round_stats(cpu, &disk->part0); - part_stat_inc(cpu, &disk->part0, ios[rw]); - part_stat_add(cpu, &disk->part0, sectors[rw], bio_sectors(bio)); - part_inc_in_flight(&disk->part0, rw); - part_stat_unlock(); + if (blk_queue_io_stat(disk->queue)) { + const int rw = bio_data_dir(bio); + int cpu = part_stat_lock(); + part_round_stats(cpu, &disk->part0); + part_stat_inc(cpu, &disk->part0, ios[rw]); + part_stat_add(cpu, &disk->part0, sectors[rw], + bio_sectors(bio)); + part_inc_in_flight(&disk->part0, rw); + part_stat_unlock(); + } } static void nvme_end_io_acct(struct bio *bio, unsigned long start_time) { struct gendisk *disk = bio->bi_bdev->bd_disk; - const int rw = bio_data_dir(bio); - unsigned long duration = jiffies - start_time; - int cpu = part_stat_lock(); - part_stat_add(cpu, &disk->part0, ticks[rw], duration); - part_round_stats(cpu, &disk->part0); - part_dec_in_flight(&disk->part0, rw); - part_stat_unlock(); + if (blk_queue_io_stat(disk->queue)) { + const int rw = bio_data_dir(bio); + unsigned long duration = jiffies - start_time; + int cpu = part_stat_lock(); + part_stat_add(cpu, &disk->part0, ticks[rw], duration); + part_round_stats(cpu, &disk->part0); + part_dec_in_flight(&disk->part0, rw); + part_stat_unlock(); + } } static void bio_completion(struct nvme_queue *nvmeq, void *ctx, @@ -414,6 +439,7 @@ static void bio_completion(struct nvme_queue *nvmeq, void *ctx, struct nvme_iod *iod = ctx; struct bio *bio = iod->private; u16 status = le16_to_cpup(&cqe->status) >> 1; + int error = 0; if (unlikely(status)) { if (!(status & NVME_SC_DNR || @@ -426,6 +452,7 @@ static void bio_completion(struct nvme_queue *nvmeq, void *ctx, wake_up(&nvmeq->sq_full); return; } + error = -EIO; } if (iod->nents) { dma_unmap_sg(nvmeq->q_dmadev, iod->sg, iod->nents, @@ -433,10 +460,9 @@ static void bio_completion(struct nvme_queue *nvmeq, void *ctx, nvme_end_io_acct(bio, iod->start_time); } nvme_free_iod(nvmeq->dev, iod); - if (status) - bio_endio(bio, -EIO); - else - bio_endio(bio, 0); + + trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio, error); + bio_endio(bio, error); } /* length is in bytes. gfp flags indicates whether we may sleep. */ @@ -525,6 +551,8 @@ static int nvme_split_and_submit(struct bio *bio, struct nvme_queue *nvmeq, if (!split) return -ENOMEM; + trace_block_split(bdev_get_queue(bio->bi_bdev), bio, + split->bi_iter.bi_sector); bio_chain(split, bio); if (!waitqueue_active(&nvmeq->sq_full)) @@ -627,16 +655,6 @@ static int nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns, return 0; } -int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns) -{ - int cmdid = alloc_cmdid(nvmeq, (void *)CMD_CTX_FLUSH, - special_completion, NVME_IO_TIMEOUT); - if (unlikely(cmdid < 0)) - return cmdid; - - return nvme_submit_flush(nvmeq, ns, cmdid); -} - static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod) { struct bio *bio = iod->private; @@ -652,7 +670,7 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod) if (bio->bi_rw & REQ_DISCARD) return nvme_submit_discard(nvmeq, ns, bio, iod, cmdid); - if ((bio->bi_rw & REQ_FLUSH) && !iod->nents) + if (bio->bi_rw & REQ_FLUSH) return nvme_submit_flush(nvmeq, ns, cmdid); control = 0; @@ -686,6 +704,26 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod) return 0; } +static int nvme_split_flush_data(struct nvme_queue *nvmeq, struct bio *bio) +{ + struct bio *split = bio_clone(bio, GFP_ATOMIC); + if (!split) + return -ENOMEM; + + split->bi_iter.bi_size = 0; + split->bi_phys_segments = 0; + bio->bi_rw &= ~REQ_FLUSH; + bio_chain(split, bio); + + if (!waitqueue_active(&nvmeq->sq_full)) + add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait); + bio_list_add(&nvmeq->sq_cong, split); + bio_list_add(&nvmeq->sq_cong, bio); + wake_up_process(nvme_thread); + + return 0; +} + /* * Called with local interrupts disabled and the q_lock held. May not sleep. */ @@ -696,11 +734,8 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, int psegs = bio_phys_segments(ns->queue, bio); int result; - if ((bio->bi_rw & REQ_FLUSH) && psegs) { - result = nvme_submit_flush_data(nvmeq, ns); - if (result) - return result; - } + if ((bio->bi_rw & REQ_FLUSH) && psegs) + return nvme_split_flush_data(nvmeq, bio); iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, GFP_ATOMIC); if (!iod) @@ -795,7 +830,6 @@ static void nvme_make_request(struct request_queue *q, struct bio *bio) int result = -EBUSY; if (!nvmeq) { - put_nvmeq(NULL); bio_endio(bio, -EIO); return; } @@ -870,10 +904,8 @@ static int nvme_submit_sync_cmd(struct nvme_dev *dev, int q_idx, struct nvme_queue *nvmeq; nvmeq = lock_nvmeq(dev, q_idx); - if (!nvmeq) { - unlock_nvmeq(nvmeq); + if (!nvmeq) return -ENODEV; - } cmdinfo.task = current; cmdinfo.status = -EINTR; @@ -898,9 +930,10 @@ static int nvme_submit_sync_cmd(struct nvme_dev *dev, int q_idx, if (cmdinfo.status == -EINTR) { nvmeq = lock_nvmeq(dev, q_idx); - if (nvmeq) + if (nvmeq) { nvme_abort_command(nvmeq, cmdid); - unlock_nvmeq(nvmeq); + unlock_nvmeq(nvmeq); + } return -EINTR; } @@ -1358,7 +1391,8 @@ static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled) return -EINTR; if (time_after(jiffies, timeout)) { dev_err(&dev->pci_dev->dev, - "Device not ready; aborting initialisation\n"); + "Device not ready; aborting %s\n", enabled ? + "initialisation" : "reset"); return -ENODEV; } } @@ -1481,7 +1515,11 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, goto put_pages; } + err = -ENOMEM; iod = nvme_alloc_iod(count, length, GFP_KERNEL); + if (!iod) + goto put_pages; + sg = iod->sg; sg_init_table(sg, count); for (i = 0; i < count; i++) { @@ -1494,7 +1532,6 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, sg_mark_end(&sg[i - 1]); iod->nents = count; - err = -ENOMEM; nents = dma_map_sg(&dev->pci_dev->dev, sg, count, write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); if (!nents) @@ -1894,6 +1931,8 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid, blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); if (dev->max_hw_sectors) blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors); + if (dev->vwc & NVME_CTRL_VWC_PRESENT) + blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA); disk->major = nvme_major; disk->first_minor = 0; @@ -2062,8 +2101,13 @@ static int set_queue_count(struct nvme_dev *dev, int count) status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, q_count, 0, &result); - if (status) - return status < 0 ? -EIO : -EBUSY; + if (status < 0) + return status; + if (status > 0) { + dev_err(&dev->pci_dev->dev, "Could not set queue count (%d)\n", + status); + return -EBUSY; + } return min(result & 0xffff, result >> 16) + 1; } @@ -2072,14 +2116,25 @@ static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues) return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride); } +static void nvme_cpu_workfn(struct work_struct *work) +{ + struct nvme_dev *dev = container_of(work, struct nvme_dev, cpu_work); + if (dev->initialized) + nvme_assign_io_queues(dev); +} + static int nvme_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { - struct nvme_dev *dev = container_of(self, struct nvme_dev, nb); + struct nvme_dev *dev; + switch (action) { case CPU_ONLINE: case CPU_DEAD: - nvme_assign_io_queues(dev); + spin_lock(&dev_list_lock); + list_for_each_entry(dev, &dev_list, node) + schedule_work(&dev->cpu_work); + spin_unlock(&dev_list_lock); break; } return NOTIFY_OK; @@ -2148,11 +2203,6 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) nvme_free_queues(dev, nr_io_queues + 1); nvme_assign_io_queues(dev); - dev->nb.notifier_call = &nvme_cpu_notify; - result = register_hotcpu_notifier(&dev->nb); - if (result) - goto free_queues; - return 0; free_queues: @@ -2184,6 +2234,7 @@ static int nvme_dev_add(struct nvme_dev *dev) res = nvme_identify(dev, 0, 1, dma_addr); if (res) { + dev_err(&pdev->dev, "Identify Controller failed (%d)\n", res); res = -EIO; goto out; } @@ -2192,6 +2243,7 @@ static int nvme_dev_add(struct nvme_dev *dev) nn = le32_to_cpup(&ctrl->nn); dev->oncs = le16_to_cpup(&ctrl->oncs); dev->abort_limit = ctrl->acl + 1; + dev->vwc = ctrl->vwc; memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); @@ -2450,8 +2502,6 @@ static void nvme_dev_shutdown(struct nvme_dev *dev) int i; dev->initialized = 0; - unregister_hotcpu_notifier(&dev->nb); - nvme_dev_list_remove(dev); if (!dev->bar || (dev->bar && readl(&dev->bar->csts) == -1)) { @@ -2722,6 +2772,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) INIT_LIST_HEAD(&dev->namespaces); dev->reset_workfn = nvme_reset_failed_dev; INIT_WORK(&dev->reset_work, nvme_reset_workfn); + INIT_WORK(&dev->cpu_work, nvme_cpu_workfn); dev->pci_dev = pdev; pci_set_drvdata(pdev, dev); result = nvme_set_instance(dev); @@ -2801,6 +2852,7 @@ static void nvme_remove(struct pci_dev *pdev) pci_set_drvdata(pdev, NULL); flush_work(&dev->reset_work); + flush_work(&dev->cpu_work); misc_deregister(&dev->miscdev); nvme_dev_remove(dev); nvme_dev_shutdown(dev); @@ -2889,11 +2941,18 @@ static int __init nvme_init(void) else if (result > 0) nvme_major = result; - result = pci_register_driver(&nvme_driver); + nvme_nb.notifier_call = &nvme_cpu_notify; + result = register_hotcpu_notifier(&nvme_nb); if (result) goto unregister_blkdev; + + result = pci_register_driver(&nvme_driver); + if (result) + goto unregister_hotcpu; return 0; + unregister_hotcpu: + unregister_hotcpu_notifier(&nvme_nb); unregister_blkdev: unregister_blkdev(nvme_major, "nvme"); kill_workq: @@ -2904,9 +2963,11 @@ static int __init nvme_init(void) static void __exit nvme_exit(void) { pci_unregister_driver(&nvme_driver); + unregister_hotcpu_notifier(&nvme_nb); unregister_blkdev(nvme_major, "nvme"); destroy_workqueue(nvme_workq); BUG_ON(nvme_thread && !IS_ERR(nvme_thread)); + _nvme_check_size(); } MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>"); diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index 2c3f5be06da..a4cd6d691c6 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -1,6 +1,6 @@ /* * NVM Express device driver - * Copyright (c) 2011, Intel Corporation. + * Copyright (c) 2011-2014, Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -10,10 +10,6 @@ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ /* @@ -243,8 +239,6 @@ static int sg_version_num = 30534; /* 2 digits for each component */ #define READ_CAP_16_RESP_SIZE 32 /* NVMe Namespace and Command Defines */ -#define NVME_GET_SMART_LOG_PAGE 0x02 -#define NVME_GET_FEAT_TEMP_THRESH 0x04 #define BYTES_TO_DWORDS 4 #define NVME_MAX_FIRMWARE_SLOT 7 @@ -686,6 +680,7 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns, u8 resp_data_format = 0x02; u8 protect; u8 cmdque = 0x01 << 1; + u8 fw_offset = sizeof(dev->firmware_rev); mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), &dma_addr, GFP_KERNEL); @@ -721,7 +716,11 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns, inq_response[7] = cmdque; /* wbus16=0 | sync=0 | vs=0 */ strncpy(&inq_response[8], "NVMe ", 8); strncpy(&inq_response[16], dev->model, 16); - strncpy(&inq_response[32], dev->firmware_rev, 4); + + while (dev->firmware_rev[fw_offset - 1] == ' ' && fw_offset > 4) + fw_offset--; + fw_offset -= 4; + strncpy(&inq_response[32], dev->firmware_rev + fw_offset, 4); xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH); res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); @@ -1018,8 +1017,8 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns, c.common.opcode = nvme_admin_get_log_page; c.common.nsid = cpu_to_le32(0xFFFFFFFF); c.common.prp1 = cpu_to_le64(dma_addr); - c.common.cdw10[0] = cpu_to_le32(((sizeof(struct nvme_smart_log) / - BYTES_TO_DWORDS) << 16) | NVME_GET_SMART_LOG_PAGE); + c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) / + BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART); res = nvme_submit_admin_cmd(dev, &c, NULL); if (res != NVME_SC_SUCCESS) { temp_c = LOG_TEMP_UNKNOWN; @@ -1086,8 +1085,8 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr, c.common.opcode = nvme_admin_get_log_page; c.common.nsid = cpu_to_le32(0xFFFFFFFF); c.common.prp1 = cpu_to_le64(dma_addr); - c.common.cdw10[0] = cpu_to_le32(((sizeof(struct nvme_smart_log) / - BYTES_TO_DWORDS) << 16) | NVME_GET_SMART_LOG_PAGE); + c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) / + BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART); res = nvme_submit_admin_cmd(dev, &c, NULL); if (res != NVME_SC_SUCCESS) { temp_c_cur = LOG_TEMP_UNKNOWN; @@ -1477,7 +1476,7 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr, goto out_dma; } id_ctrl = mem; - lowest_pow_st = id_ctrl->npss - 1; + lowest_pow_st = max(POWER_STATE_0, (int)(id_ctrl->npss - 1)); switch (pc) { case NVME_POWER_STATE_START_VALID: @@ -1494,20 +1493,19 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr, break; case NVME_POWER_STATE_IDLE: /* Action unspecified if POWER CONDITION MODIFIER != [0,1,2] */ - /* min of desired state and (lps-1) because lps is STOP */ if (pcmod == 0x0) - ps_desired = min(POWER_STATE_1, (lowest_pow_st - 1)); + ps_desired = POWER_STATE_1; else if (pcmod == 0x1) - ps_desired = min(POWER_STATE_2, (lowest_pow_st - 1)); + ps_desired = POWER_STATE_2; else if (pcmod == 0x2) - ps_desired = min(POWER_STATE_3, (lowest_pow_st - 1)); + ps_desired = POWER_STATE_3; break; case NVME_POWER_STATE_STANDBY: /* Action unspecified if POWER CONDITION MODIFIER != [0,1] */ if (pcmod == 0x0) - ps_desired = max(0, (lowest_pow_st - 2)); + ps_desired = max(POWER_STATE_0, (lowest_pow_st - 2)); else if (pcmod == 0x1) - ps_desired = max(0, (lowest_pow_st - 1)); + ps_desired = max(POWER_STATE_0, (lowest_pow_st - 1)); break; case NVME_POWER_STATE_LU_CONTROL: default: diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 4c95b503b09..bbeb404b3a0 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -541,7 +541,6 @@ static int rbd_open(struct block_device *bdev, fmode_t mode) return -ENOENT; (void) get_device(&rbd_dev->dev); - set_device_ro(bdev, rbd_dev->mapping.read_only); return 0; } @@ -559,10 +558,76 @@ static void rbd_release(struct gendisk *disk, fmode_t mode) put_device(&rbd_dev->dev); } +static int rbd_ioctl_set_ro(struct rbd_device *rbd_dev, unsigned long arg) +{ + int ret = 0; + int val; + bool ro; + bool ro_changed = false; + + /* get_user() may sleep, so call it before taking rbd_dev->lock */ + if (get_user(val, (int __user *)(arg))) + return -EFAULT; + + ro = val ? true : false; + /* Snapshot doesn't allow to write*/ + if (rbd_dev->spec->snap_id != CEPH_NOSNAP && !ro) + return -EROFS; + + spin_lock_irq(&rbd_dev->lock); + /* prevent others open this device */ + if (rbd_dev->open_count > 1) { + ret = -EBUSY; + goto out; + } + + if (rbd_dev->mapping.read_only != ro) { + rbd_dev->mapping.read_only = ro; + ro_changed = true; + } + +out: + spin_unlock_irq(&rbd_dev->lock); + /* set_disk_ro() may sleep, so call it after releasing rbd_dev->lock */ + if (ret == 0 && ro_changed) + set_disk_ro(rbd_dev->disk, ro ? 1 : 0); + + return ret; +} + +static int rbd_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + struct rbd_device *rbd_dev = bdev->bd_disk->private_data; + int ret = 0; + + switch (cmd) { + case BLKROSET: + ret = rbd_ioctl_set_ro(rbd_dev, arg); + break; + default: + ret = -ENOTTY; + } + + return ret; +} + +#ifdef CONFIG_COMPAT +static int rbd_compat_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + return rbd_ioctl(bdev, mode, cmd, arg); +} +#endif /* CONFIG_COMPAT */ + static const struct block_device_operations rbd_bd_ops = { .owner = THIS_MODULE, .open = rbd_open, .release = rbd_release, + .ioctl = rbd_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = rbd_compat_ioctl, +#endif }; /* @@ -1382,6 +1447,13 @@ static void rbd_obj_request_put(struct rbd_obj_request *obj_request) kref_put(&obj_request->kref, rbd_obj_request_destroy); } +static void rbd_img_request_get(struct rbd_img_request *img_request) +{ + dout("%s: img %p (was %d)\n", __func__, img_request, + atomic_read(&img_request->kref.refcount)); + kref_get(&img_request->kref); +} + static bool img_request_child_test(struct rbd_img_request *img_request); static void rbd_parent_request_destroy(struct kref *kref); static void rbd_img_request_destroy(struct kref *kref); @@ -2142,6 +2214,7 @@ static void rbd_img_obj_callback(struct rbd_obj_request *obj_request) img_request->next_completion = which; out: spin_unlock_irq(&img_request->completion_lock); + rbd_img_request_put(img_request); if (!more) rbd_img_request_complete(img_request); @@ -2242,6 +2315,7 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request, goto out_unwind; obj_request->osd_req = osd_req; obj_request->callback = rbd_img_obj_callback; + rbd_img_request_get(img_request); if (write_request) { osd_req_op_alloc_hint_init(osd_req, which, @@ -2872,56 +2946,55 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) } /* - * Request sync osd watch/unwatch. The value of "start" determines - * whether a watch request is being initiated or torn down. + * Initiate a watch request, synchronously. */ -static int __rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, bool start) +static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev) { struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; struct rbd_obj_request *obj_request; int ret; - rbd_assert(start ^ !!rbd_dev->watch_event); - rbd_assert(start ^ !!rbd_dev->watch_request); + rbd_assert(!rbd_dev->watch_event); + rbd_assert(!rbd_dev->watch_request); - if (start) { - ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev, - &rbd_dev->watch_event); - if (ret < 0) - return ret; - rbd_assert(rbd_dev->watch_event != NULL); - } + ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev, + &rbd_dev->watch_event); + if (ret < 0) + return ret; + + rbd_assert(rbd_dev->watch_event); - ret = -ENOMEM; obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0, - OBJ_REQUEST_NODATA); - if (!obj_request) + OBJ_REQUEST_NODATA); + if (!obj_request) { + ret = -ENOMEM; goto out_cancel; + } obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, 1, obj_request); - if (!obj_request->osd_req) - goto out_cancel; + if (!obj_request->osd_req) { + ret = -ENOMEM; + goto out_put; + } - if (start) - ceph_osdc_set_request_linger(osdc, obj_request->osd_req); - else - ceph_osdc_unregister_linger_request(osdc, - rbd_dev->watch_request->osd_req); + ceph_osdc_set_request_linger(osdc, obj_request->osd_req); osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_WATCH, - rbd_dev->watch_event->cookie, 0, start ? 1 : 0); + rbd_dev->watch_event->cookie, 0, 1); rbd_osd_req_format_write(obj_request); ret = rbd_obj_request_submit(osdc, obj_request); if (ret) - goto out_cancel; + goto out_linger; + ret = rbd_obj_request_wait(obj_request); if (ret) - goto out_cancel; + goto out_linger; + ret = obj_request->result; if (ret) - goto out_cancel; + goto out_linger; /* * A watch request is set to linger, so the underlying osd @@ -2931,36 +3004,84 @@ static int __rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, bool start) * it. We'll drop that reference (below) after we've * unregistered it. */ - if (start) { - rbd_dev->watch_request = obj_request; + rbd_dev->watch_request = obj_request; - return 0; + return 0; + +out_linger: + ceph_osdc_unregister_linger_request(osdc, obj_request->osd_req); +out_put: + rbd_obj_request_put(obj_request); +out_cancel: + ceph_osdc_cancel_event(rbd_dev->watch_event); + rbd_dev->watch_event = NULL; + + return ret; +} + +/* + * Tear down a watch request, synchronously. + */ +static int __rbd_dev_header_unwatch_sync(struct rbd_device *rbd_dev) +{ + struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; + struct rbd_obj_request *obj_request; + int ret; + + rbd_assert(rbd_dev->watch_event); + rbd_assert(rbd_dev->watch_request); + + obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0, + OBJ_REQUEST_NODATA); + if (!obj_request) { + ret = -ENOMEM; + goto out_cancel; + } + + obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, 1, + obj_request); + if (!obj_request->osd_req) { + ret = -ENOMEM; + goto out_put; } + osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_WATCH, + rbd_dev->watch_event->cookie, 0, 0); + rbd_osd_req_format_write(obj_request); + + ret = rbd_obj_request_submit(osdc, obj_request); + if (ret) + goto out_put; + + ret = rbd_obj_request_wait(obj_request); + if (ret) + goto out_put; + + ret = obj_request->result; + if (ret) + goto out_put; + /* We have successfully torn down the watch request */ + ceph_osdc_unregister_linger_request(osdc, + rbd_dev->watch_request->osd_req); rbd_obj_request_put(rbd_dev->watch_request); rbd_dev->watch_request = NULL; + +out_put: + rbd_obj_request_put(obj_request); out_cancel: - /* Cancel the event if we're tearing down, or on error */ ceph_osdc_cancel_event(rbd_dev->watch_event); rbd_dev->watch_event = NULL; - if (obj_request) - rbd_obj_request_put(obj_request); return ret; } -static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev) -{ - return __rbd_dev_header_watch_sync(rbd_dev, true); -} - static void rbd_dev_header_unwatch_sync(struct rbd_device *rbd_dev) { int ret; - ret = __rbd_dev_header_watch_sync(rbd_dev, false); + ret = __rbd_dev_header_unwatch_sync(rbd_dev); if (ret) { rbd_warn(rbd_dev, "unable to tear down watch request: %d\n", ret); @@ -3058,7 +3179,6 @@ static void rbd_request_fn(struct request_queue *q) __releases(q->queue_lock) __acquires(q->queue_lock) { struct rbd_device *rbd_dev = q->queuedata; - bool read_only = rbd_dev->mapping.read_only; struct request *rq; int result; @@ -3094,7 +3214,7 @@ static void rbd_request_fn(struct request_queue *q) if (write_request) { result = -EROFS; - if (read_only) + if (rbd_dev->mapping.read_only) goto end_request; rbd_assert(rbd_dev->spec->snap_id == CEPH_NOSNAP); } @@ -4683,6 +4803,38 @@ out_err: } /* + * Return pool id (>= 0) or a negative error code. + */ +static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name) +{ + u64 newest_epoch; + unsigned long timeout = rbdc->client->options->mount_timeout * HZ; + int tries = 0; + int ret; + +again: + ret = ceph_pg_poolid_by_name(rbdc->client->osdc.osdmap, pool_name); + if (ret == -ENOENT && tries++ < 1) { + ret = ceph_monc_do_get_version(&rbdc->client->monc, "osdmap", + &newest_epoch); + if (ret < 0) + return ret; + + if (rbdc->client->osdc.osdmap->epoch < newest_epoch) { + ceph_monc_request_next_osdmap(&rbdc->client->monc); + (void) ceph_monc_wait_osdmap(&rbdc->client->monc, + newest_epoch, timeout); + goto again; + } else { + /* the osdmap we have is new enough */ + return -ENOENT; + } + } + + return ret; +} + +/* * An rbd format 2 image has a unique identifier, distinct from the * name given to it by the user. Internally, that identifier is * what's used to specify the names of objects related to the image. @@ -4752,7 +4904,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) image_id = ceph_extract_encoded_string(&p, p + ret, NULL, GFP_NOIO); - ret = IS_ERR(image_id) ? PTR_ERR(image_id) : 0; + ret = PTR_ERR_OR_ZERO(image_id); if (!ret) rbd_dev->image_format = 2; } else { @@ -4907,6 +5059,7 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev) if (ret) goto err_out_disk; set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE); + set_disk_ro(rbd_dev->disk, rbd_dev->mapping.read_only); ret = rbd_bus_add_dev(rbd_dev); if (ret) @@ -5053,7 +5206,6 @@ static ssize_t do_rbd_add(struct bus_type *bus, struct rbd_options *rbd_opts = NULL; struct rbd_spec *spec = NULL; struct rbd_client *rbdc; - struct ceph_osd_client *osdc; bool read_only; int rc = -ENOMEM; @@ -5075,8 +5227,7 @@ static ssize_t do_rbd_add(struct bus_type *bus, } /* pick the pool */ - osdc = &rbdc->client->osdc; - rc = ceph_pg_poolid_by_name(osdc->osdmap, spec->pool_name); + rc = rbd_add_get_pool_id(rbdc, spec->pool_name); if (rc < 0) goto err_out_client; spec->pool_id = (u64)rc; @@ -5387,6 +5538,7 @@ err_out_slab: static void __exit rbd_exit(void) { + ida_destroy(&rbd_dev_id_ida); rbd_sysfs_cleanup(); if (single_major) unregister_blkdev(rbd_major, RBD_DRV_NAME); diff --git a/drivers/clk/sunxi/Makefile b/drivers/clk/sunxi/Makefile index b5bac917612..762fd64dbd1 100644 --- a/drivers/clk/sunxi/Makefile +++ b/drivers/clk/sunxi/Makefile @@ -3,3 +3,7 @@ # obj-y += clk-sunxi.o clk-factors.o +obj-y += clk-a10-hosc.o +obj-y += clk-a20-gmac.o + +obj-$(CONFIG_MFD_SUN6I_PRCM) += clk-sun6i-ar100.o clk-sun6i-apb0.o clk-sun6i-apb0-gates.o diff --git a/drivers/clk/sunxi/clk-a10-hosc.c b/drivers/clk/sunxi/clk-a10-hosc.c new file mode 100644 index 00000000000..0481d5d673d --- /dev/null +++ b/drivers/clk/sunxi/clk-a10-hosc.c @@ -0,0 +1,73 @@ +/* + * Copyright 2013 Emilio López + * + * Emilio López <emilio@elopez.com.ar> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/clk-provider.h> +#include <linux/clkdev.h> +#include <linux/of.h> +#include <linux/of_address.h> + +#define SUNXI_OSC24M_GATE 0 + +static DEFINE_SPINLOCK(hosc_lock); + +static void __init sun4i_osc_clk_setup(struct device_node *node) +{ + struct clk *clk; + struct clk_fixed_rate *fixed; + struct clk_gate *gate; + const char *clk_name = node->name; + u32 rate; + + if (of_property_read_u32(node, "clock-frequency", &rate)) + return; + + /* allocate fixed-rate and gate clock structs */ + fixed = kzalloc(sizeof(struct clk_fixed_rate), GFP_KERNEL); + if (!fixed) + return; + gate = kzalloc(sizeof(struct clk_gate), GFP_KERNEL); + if (!gate) + goto err_free_fixed; + + of_property_read_string(node, "clock-output-names", &clk_name); + + /* set up gate and fixed rate properties */ + gate->reg = of_iomap(node, 0); + gate->bit_idx = SUNXI_OSC24M_GATE; + gate->lock = &hosc_lock; + fixed->fixed_rate = rate; + + clk = clk_register_composite(NULL, clk_name, + NULL, 0, + NULL, NULL, + &fixed->hw, &clk_fixed_rate_ops, + &gate->hw, &clk_gate_ops, + CLK_IS_ROOT); + + if (IS_ERR(clk)) + goto err_free_gate; + + of_clk_add_provider(node, of_clk_src_simple_get, clk); + clk_register_clkdev(clk, clk_name, NULL); + + return; + +err_free_gate: + kfree(gate); +err_free_fixed: + kfree(fixed); +} +CLK_OF_DECLARE(sun4i_osc, "allwinner,sun4i-a10-osc-clk", sun4i_osc_clk_setup); diff --git a/drivers/clk/sunxi/clk-a20-gmac.c b/drivers/clk/sunxi/clk-a20-gmac.c new file mode 100644 index 00000000000..633ddc4389e --- /dev/null +++ b/drivers/clk/sunxi/clk-a20-gmac.c @@ -0,0 +1,119 @@ +/* + * Copyright 2013 Emilio López + * Emilio López <emilio@elopez.com.ar> + * + * Copyright 2013 Chen-Yu Tsai + * Chen-Yu Tsai <wens@csie.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/clk-provider.h> +#include <linux/clkdev.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/slab.h> + +static DEFINE_SPINLOCK(gmac_lock); + +/** + * sun7i_a20_gmac_clk_setup - Setup function for A20/A31 GMAC clock module + * + * This clock looks something like this + * ________________________ + * MII TX clock from PHY >-----|___________ _________|----> to GMAC core + * GMAC Int. RGMII TX clk >----|___________\__/__gate---|----> to PHY + * Ext. 125MHz RGMII TX clk >--|__divider__/ | + * |________________________| + * + * The external 125 MHz reference is optional, i.e. GMAC can use its + * internal TX clock just fine. The A31 GMAC clock module does not have + * the divider controls for the external reference. + * + * To keep it simple, let the GMAC use either the MII TX clock for MII mode, + * and its internal TX clock for GMII and RGMII modes. The GMAC driver should + * select the appropriate source and gate/ungate the output to the PHY. + * + * Only the GMAC should use this clock. Altering the clock so that it doesn't + * match the GMAC's operation parameters will result in the GMAC not being + * able to send traffic out. The GMAC driver should set the clock rate and + * enable/disable this clock to configure the required state. The clock + * driver then responds by auto-reparenting the clock. + */ + +#define SUN7I_A20_GMAC_GPIT 2 +#define SUN7I_A20_GMAC_MASK 0x3 +#define SUN7I_A20_GMAC_PARENTS 2 + +static void __init sun7i_a20_gmac_clk_setup(struct device_node *node) +{ + struct clk *clk; + struct clk_mux *mux; + struct clk_gate *gate; + const char *clk_name = node->name; + const char *parents[SUN7I_A20_GMAC_PARENTS]; + void *reg; + + if (of_property_read_string(node, "clock-output-names", &clk_name)) + return; + + /* allocate mux and gate clock structs */ + mux = kzalloc(sizeof(struct clk_mux), GFP_KERNEL); + if (!mux) + return; + + gate = kzalloc(sizeof(struct clk_gate), GFP_KERNEL); + if (!gate) + goto free_mux; + + /* gmac clock requires exactly 2 parents */ + parents[0] = of_clk_get_parent_name(node, 0); + parents[1] = of_clk_get_parent_name(node, 1); + if (!parents[0] || !parents[1]) + goto free_gate; + + reg = of_iomap(node, 0); + if (!reg) + goto free_gate; + + /* set up gate and fixed rate properties */ + gate->reg = reg; + gate->bit_idx = SUN7I_A20_GMAC_GPIT; + gate->lock = &gmac_lock; + mux->reg = reg; + mux->mask = SUN7I_A20_GMAC_MASK; + mux->flags = CLK_MUX_INDEX_BIT; + mux->lock = &gmac_lock; + + clk = clk_register_composite(NULL, clk_name, + parents, SUN7I_A20_GMAC_PARENTS, + &mux->hw, &clk_mux_ops, + NULL, NULL, + &gate->hw, &clk_gate_ops, + 0); + + if (IS_ERR(clk)) + goto iounmap_reg; + + of_clk_add_provider(node, of_clk_src_simple_get, clk); + clk_register_clkdev(clk, clk_name, NULL); + + return; + +iounmap_reg: + iounmap(reg); +free_gate: + kfree(gate); +free_mux: + kfree(mux); +} +CLK_OF_DECLARE(sun7i_a20_gmac, "allwinner,sun7i-a20-gmac-clk", + sun7i_a20_gmac_clk_setup); diff --git a/drivers/clk/sunxi/clk-sun6i-apb0-gates.c b/drivers/clk/sunxi/clk-sun6i-apb0-gates.c new file mode 100644 index 00000000000..44cd27c5c40 --- /dev/null +++ b/drivers/clk/sunxi/clk-sun6i-apb0-gates.c @@ -0,0 +1,99 @@ +/* + * Copyright (C) 2014 Free Electrons + * + * License Terms: GNU General Public License v2 + * Author: Boris BREZILLON <boris.brezillon@free-electrons.com> + * + * Allwinner A31 APB0 clock gates driver + * + */ + +#include <linux/clk-provider.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> + +#define SUN6I_APB0_GATES_MAX_SIZE 32 + +static int sun6i_a31_apb0_gates_clk_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct clk_onecell_data *clk_data; + const char *clk_parent; + const char *clk_name; + struct resource *r; + void __iomem *reg; + int gate_id; + int ngates; + int i; + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + reg = devm_ioremap_resource(&pdev->dev, r); + if (!reg) + return PTR_ERR(reg); + + clk_parent = of_clk_get_parent_name(np, 0); + if (!clk_parent) + return -EINVAL; + + ngates = of_property_count_strings(np, "clock-output-names"); + if (ngates < 0) + return ngates; + + if (!ngates || ngates > SUN6I_APB0_GATES_MAX_SIZE) + return -EINVAL; + + clk_data = devm_kzalloc(&pdev->dev, sizeof(struct clk_onecell_data), + GFP_KERNEL); + if (!clk_data) + return -ENOMEM; + + clk_data->clks = devm_kzalloc(&pdev->dev, + SUN6I_APB0_GATES_MAX_SIZE * + sizeof(struct clk *), + GFP_KERNEL); + if (!clk_data->clks) + return -ENOMEM; + + for (i = 0; i < ngates; i++) { + of_property_read_string_index(np, "clock-output-names", + i, &clk_name); + + gate_id = i; + of_property_read_u32_index(np, "clock-indices", i, &gate_id); + + WARN_ON(gate_id >= SUN6I_APB0_GATES_MAX_SIZE); + if (gate_id >= SUN6I_APB0_GATES_MAX_SIZE) + continue; + + clk_data->clks[gate_id] = clk_register_gate(&pdev->dev, + clk_name, + clk_parent, 0, + reg, gate_id, + 0, NULL); + WARN_ON(IS_ERR(clk_data->clks[gate_id])); + } + + clk_data->clk_num = ngates; + + return of_clk_add_provider(np, of_clk_src_onecell_get, clk_data); +} + +const struct of_device_id sun6i_a31_apb0_gates_clk_dt_ids[] = { + { .compatible = "allwinner,sun6i-a31-apb0-gates-clk" }, + { /* sentinel */ } +}; + +static struct platform_driver sun6i_a31_apb0_gates_clk_driver = { + .driver = { + .name = "sun6i-a31-apb0-gates-clk", + .owner = THIS_MODULE, + .of_match_table = sun6i_a31_apb0_gates_clk_dt_ids, + }, + .probe = sun6i_a31_apb0_gates_clk_probe, +}; +module_platform_driver(sun6i_a31_apb0_gates_clk_driver); + +MODULE_AUTHOR("Boris BREZILLON <boris.brezillon@free-electrons.com>"); +MODULE_DESCRIPTION("Allwinner A31 APB0 gate clocks driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/clk/sunxi/clk-sun6i-apb0.c b/drivers/clk/sunxi/clk-sun6i-apb0.c new file mode 100644 index 00000000000..11f17c34c2a --- /dev/null +++ b/drivers/clk/sunxi/clk-sun6i-apb0.c @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2014 Free Electrons + * + * License Terms: GNU General Public License v2 + * Author: Boris BREZILLON <boris.brezillon@free-electrons.com> + * + * Allwinner A31 APB0 clock driver + * + */ + +#include <linux/clk-provider.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> + +/* + * The APB0 clk has a configurable divisor. + * + * We must use a clk_div_table and not a regular power of 2 + * divisor here, because the first 2 values divide the clock + * by 2. + */ +static const struct clk_div_table sun6i_a31_apb0_divs[] = { + { .val = 0, .div = 2, }, + { .val = 1, .div = 2, }, + { .val = 2, .div = 4, }, + { .val = 3, .div = 8, }, + { /* sentinel */ }, +}; + +static int sun6i_a31_apb0_clk_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + const char *clk_name = np->name; + const char *clk_parent; + struct resource *r; + void __iomem *reg; + struct clk *clk; + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + reg = devm_ioremap_resource(&pdev->dev, r); + if (IS_ERR(reg)) + return PTR_ERR(reg); + + clk_parent = of_clk_get_parent_name(np, 0); + if (!clk_parent) + return -EINVAL; + + of_property_read_string(np, "clock-output-names", &clk_name); + + clk = clk_register_divider_table(&pdev->dev, clk_name, clk_parent, + 0, reg, 0, 2, 0, sun6i_a31_apb0_divs, + NULL); + if (IS_ERR(clk)) + return PTR_ERR(clk); + + return of_clk_add_provider(np, of_clk_src_simple_get, clk); +} + +const struct of_device_id sun6i_a31_apb0_clk_dt_ids[] = { + { .compatible = "allwinner,sun6i-a31-apb0-clk" }, + { /* sentinel */ } +}; + +static struct platform_driver sun6i_a31_apb0_clk_driver = { + .driver = { + .name = "sun6i-a31-apb0-clk", + .owner = THIS_MODULE, + .of_match_table = sun6i_a31_apb0_clk_dt_ids, + }, + .probe = sun6i_a31_apb0_clk_probe, +}; +module_platform_driver(sun6i_a31_apb0_clk_driver); + +MODULE_AUTHOR("Boris BREZILLON <boris.brezillon@free-electrons.com>"); +MODULE_DESCRIPTION("Allwinner A31 APB0 clock Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/clk/sunxi/clk-sun6i-ar100.c b/drivers/clk/sunxi/clk-sun6i-ar100.c new file mode 100644 index 00000000000..f73cc051f0d --- /dev/null +++ b/drivers/clk/sunxi/clk-sun6i-ar100.c @@ -0,0 +1,233 @@ +/* + * Copyright (C) 2014 Free Electrons + * + * License Terms: GNU General Public License v2 + * Author: Boris BREZILLON <boris.brezillon@free-electrons.com> + * + * Allwinner A31 AR100 clock driver + * + */ + +#include <linux/clk-provider.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> + +#define SUN6I_AR100_MAX_PARENTS 4 +#define SUN6I_AR100_SHIFT_MASK 0x3 +#define SUN6I_AR100_SHIFT_MAX SUN6I_AR100_SHIFT_MASK +#define SUN6I_AR100_SHIFT_SHIFT 4 +#define SUN6I_AR100_DIV_MASK 0x1f +#define SUN6I_AR100_DIV_MAX (SUN6I_AR100_DIV_MASK + 1) +#define SUN6I_AR100_DIV_SHIFT 8 +#define SUN6I_AR100_MUX_MASK 0x3 +#define SUN6I_AR100_MUX_SHIFT 16 + +struct ar100_clk { + struct clk_hw hw; + void __iomem *reg; +}; + +static inline struct ar100_clk *to_ar100_clk(struct clk_hw *hw) +{ + return container_of(hw, struct ar100_clk, hw); +} + +static unsigned long ar100_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct ar100_clk *clk = to_ar100_clk(hw); + u32 val = readl(clk->reg); + int shift = (val >> SUN6I_AR100_SHIFT_SHIFT) & SUN6I_AR100_SHIFT_MASK; + int div = (val >> SUN6I_AR100_DIV_SHIFT) & SUN6I_AR100_DIV_MASK; + + return (parent_rate >> shift) / (div + 1); +} + +static long ar100_determine_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *best_parent_rate, + struct clk **best_parent_clk) +{ + int nparents = __clk_get_num_parents(hw->clk); + long best_rate = -EINVAL; + int i; + + *best_parent_clk = NULL; + + for (i = 0; i < nparents; i++) { + unsigned long parent_rate; + unsigned long tmp_rate; + struct clk *parent; + unsigned long div; + int shift; + + parent = clk_get_parent_by_index(hw->clk, i); + parent_rate = __clk_get_rate(parent); + div = DIV_ROUND_UP(parent_rate, rate); + + /* + * The AR100 clk contains 2 divisors: + * - one power of 2 divisor + * - one regular divisor + * + * First check if we can safely shift (or divide by a power + * of 2) without losing precision on the requested rate. + */ + shift = ffs(div) - 1; + if (shift > SUN6I_AR100_SHIFT_MAX) + shift = SUN6I_AR100_SHIFT_MAX; + + div >>= shift; + + /* + * Then if the divisor is still bigger than what the HW + * actually supports, use a bigger shift (or power of 2 + * divider) value and accept to lose some precision. + */ + while (div > SUN6I_AR100_DIV_MAX) { + shift++; + div >>= 1; + if (shift > SUN6I_AR100_SHIFT_MAX) + break; + } + + /* + * If the shift value (or power of 2 divider) is bigger + * than what the HW actually support, skip this parent. + */ + if (shift > SUN6I_AR100_SHIFT_MAX) + continue; + + tmp_rate = (parent_rate >> shift) / div; + if (!*best_parent_clk || tmp_rate > best_rate) { + *best_parent_clk = parent; + *best_parent_rate = parent_rate; + best_rate = tmp_rate; + } + } + + return best_rate; +} + +static int ar100_set_parent(struct clk_hw *hw, u8 index) +{ + struct ar100_clk *clk = to_ar100_clk(hw); + u32 val = readl(clk->reg); + + if (index >= SUN6I_AR100_MAX_PARENTS) + return -EINVAL; + + val &= ~(SUN6I_AR100_MUX_MASK << SUN6I_AR100_MUX_SHIFT); + val |= (index << SUN6I_AR100_MUX_SHIFT); + writel(val, clk->reg); + + return 0; +} + +static u8 ar100_get_parent(struct clk_hw *hw) +{ + struct ar100_clk *clk = to_ar100_clk(hw); + return (readl(clk->reg) >> SUN6I_AR100_MUX_SHIFT) & + SUN6I_AR100_MUX_MASK; +} + +static int ar100_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + unsigned long div = parent_rate / rate; + struct ar100_clk *clk = to_ar100_clk(hw); + u32 val = readl(clk->reg); + int shift; + + if (parent_rate % rate) + return -EINVAL; + + shift = ffs(div) - 1; + if (shift > SUN6I_AR100_SHIFT_MAX) + shift = SUN6I_AR100_SHIFT_MAX; + + div >>= shift; + + if (div > SUN6I_AR100_DIV_MAX) + return -EINVAL; + + val &= ~((SUN6I_AR100_SHIFT_MASK << SUN6I_AR100_SHIFT_SHIFT) | + (SUN6I_AR100_DIV_MASK << SUN6I_AR100_DIV_SHIFT)); + val |= (shift << SUN6I_AR100_SHIFT_SHIFT) | + (div << SUN6I_AR100_DIV_SHIFT); + writel(val, clk->reg); + + return 0; +} + +struct clk_ops ar100_ops = { + .recalc_rate = ar100_recalc_rate, + .determine_rate = ar100_determine_rate, + .set_parent = ar100_set_parent, + .get_parent = ar100_get_parent, + .set_rate = ar100_set_rate, +}; + +static int sun6i_a31_ar100_clk_probe(struct platform_device *pdev) +{ + const char *parents[SUN6I_AR100_MAX_PARENTS]; + struct device_node *np = pdev->dev.of_node; + const char *clk_name = np->name; + struct clk_init_data init; + struct ar100_clk *ar100; + struct resource *r; + struct clk *clk; + int nparents; + int i; + + ar100 = devm_kzalloc(&pdev->dev, sizeof(*ar100), GFP_KERNEL); + if (!ar100) + return -ENOMEM; + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + ar100->reg = devm_ioremap_resource(&pdev->dev, r); + if (IS_ERR(ar100->reg)) + return PTR_ERR(ar100->reg); + + nparents = of_clk_get_parent_count(np); + if (nparents > SUN6I_AR100_MAX_PARENTS) + nparents = SUN6I_AR100_MAX_PARENTS; + + for (i = 0; i < nparents; i++) + parents[i] = of_clk_get_parent_name(np, i); + + of_property_read_string(np, "clock-output-names", &clk_name); + + init.name = clk_name; + init.ops = &ar100_ops; + init.parent_names = parents; + init.num_parents = nparents; + init.flags = 0; + + ar100->hw.init = &init; + + clk = clk_register(&pdev->dev, &ar100->hw); + if (IS_ERR(clk)) + return PTR_ERR(clk); + + return of_clk_add_provider(np, of_clk_src_simple_get, clk); +} + +const struct of_device_id sun6i_a31_ar100_clk_dt_ids[] = { + { .compatible = "allwinner,sun6i-a31-ar100-clk" }, + { /* sentinel */ } +}; + +static struct platform_driver sun6i_a31_ar100_clk_driver = { + .driver = { + .name = "sun6i-a31-ar100-clk", + .owner = THIS_MODULE, + .of_match_table = sun6i_a31_ar100_clk_dt_ids, + }, + .probe = sun6i_a31_ar100_clk_probe, +}; +module_platform_driver(sun6i_a31_ar100_clk_driver); + +MODULE_AUTHOR("Boris BREZILLON <boris.brezillon@free-electrons.com>"); +MODULE_DESCRIPTION("Allwinner A31 AR100 clock Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/clk/sunxi/clk-sunxi.c b/drivers/clk/sunxi/clk-sunxi.c index 426483422d3..fb2ce8440f0 100644 --- a/drivers/clk/sunxi/clk-sunxi.c +++ b/drivers/clk/sunxi/clk-sunxi.c @@ -28,63 +28,6 @@ static DEFINE_SPINLOCK(clk_lock); #define SUNXI_MAX_PARENTS 5 /** - * sun4i_osc_clk_setup() - Setup function for gatable oscillator - */ - -#define SUNXI_OSC24M_GATE 0 - -static void __init sun4i_osc_clk_setup(struct device_node *node) -{ - struct clk *clk; - struct clk_fixed_rate *fixed; - struct clk_gate *gate; - const char *clk_name = node->name; - u32 rate; - - if (of_property_read_u32(node, "clock-frequency", &rate)) - return; - - /* allocate fixed-rate and gate clock structs */ - fixed = kzalloc(sizeof(struct clk_fixed_rate), GFP_KERNEL); - if (!fixed) - return; - gate = kzalloc(sizeof(struct clk_gate), GFP_KERNEL); - if (!gate) - goto err_free_fixed; - - of_property_read_string(node, "clock-output-names", &clk_name); - - /* set up gate and fixed rate properties */ - gate->reg = of_iomap(node, 0); - gate->bit_idx = SUNXI_OSC24M_GATE; - gate->lock = &clk_lock; - fixed->fixed_rate = rate; - - clk = clk_register_composite(NULL, clk_name, - NULL, 0, - NULL, NULL, - &fixed->hw, &clk_fixed_rate_ops, - &gate->hw, &clk_gate_ops, - CLK_IS_ROOT); - - if (IS_ERR(clk)) - goto err_free_gate; - - of_clk_add_provider(node, of_clk_src_simple_get, clk); - clk_register_clkdev(clk, clk_name, NULL); - - return; - -err_free_gate: - kfree(gate); -err_free_fixed: - kfree(fixed); -} -CLK_OF_DECLARE(sun4i_osc, "allwinner,sun4i-a10-osc-clk", sun4i_osc_clk_setup); - - - -/** * sun4i_get_pll1_factors() - calculates n, k, m, p factors for PLL1 * PLL1 rate is calculated as follows * rate = (parent_rate * n * (k + 1) >> p) / (m + 1); @@ -408,104 +351,6 @@ static void sun7i_a20_get_out_factors(u32 *freq, u32 parent_rate, *p = calcp; } - - -/** - * sun7i_a20_gmac_clk_setup - Setup function for A20/A31 GMAC clock module - * - * This clock looks something like this - * ________________________ - * MII TX clock from PHY >-----|___________ _________|----> to GMAC core - * GMAC Int. RGMII TX clk >----|___________\__/__gate---|----> to PHY - * Ext. 125MHz RGMII TX clk >--|__divider__/ | - * |________________________| - * - * The external 125 MHz reference is optional, i.e. GMAC can use its - * internal TX clock just fine. The A31 GMAC clock module does not have - * the divider controls for the external reference. - * - * To keep it simple, let the GMAC use either the MII TX clock for MII mode, - * and its internal TX clock for GMII and RGMII modes. The GMAC driver should - * select the appropriate source and gate/ungate the output to the PHY. - * - * Only the GMAC should use this clock. Altering the clock so that it doesn't - * match the GMAC's operation parameters will result in the GMAC not being - * able to send traffic out. The GMAC driver should set the clock rate and - * enable/disable this clock to configure the required state. The clock - * driver then responds by auto-reparenting the clock. - */ - -#define SUN7I_A20_GMAC_GPIT 2 -#define SUN7I_A20_GMAC_MASK 0x3 -#define SUN7I_A20_GMAC_PARENTS 2 - -static void __init sun7i_a20_gmac_clk_setup(struct device_node *node) -{ - struct clk *clk; - struct clk_mux *mux; - struct clk_gate *gate; - const char *clk_name = node->name; - const char *parents[SUN7I_A20_GMAC_PARENTS]; - void *reg; - - if (of_property_read_string(node, "clock-output-names", &clk_name)) - return; - - /* allocate mux and gate clock structs */ - mux = kzalloc(sizeof(struct clk_mux), GFP_KERNEL); - if (!mux) - return; - - gate = kzalloc(sizeof(struct clk_gate), GFP_KERNEL); - if (!gate) - goto free_mux; - - /* gmac clock requires exactly 2 parents */ - parents[0] = of_clk_get_parent_name(node, 0); - parents[1] = of_clk_get_parent_name(node, 1); - if (!parents[0] || !parents[1]) - goto free_gate; - - reg = of_iomap(node, 0); - if (!reg) - goto free_gate; - - /* set up gate and fixed rate properties */ - gate->reg = reg; - gate->bit_idx = SUN7I_A20_GMAC_GPIT; - gate->lock = &clk_lock; - mux->reg = reg; - mux->mask = SUN7I_A20_GMAC_MASK; - mux->flags = CLK_MUX_INDEX_BIT; - mux->lock = &clk_lock; - - clk = clk_register_composite(NULL, clk_name, - parents, SUN7I_A20_GMAC_PARENTS, - &mux->hw, &clk_mux_ops, - NULL, NULL, - &gate->hw, &clk_gate_ops, - 0); - - if (IS_ERR(clk)) - goto iounmap_reg; - - of_clk_add_provider(node, of_clk_src_simple_get, clk); - clk_register_clkdev(clk, clk_name, NULL); - - return; - -iounmap_reg: - iounmap(reg); -free_gate: - kfree(gate); -free_mux: - kfree(mux); -} -CLK_OF_DECLARE(sun7i_a20_gmac, "allwinner,sun7i-a20-gmac-clk", - sun7i_a20_gmac_clk_setup); - - - /** * clk_sunxi_mmc_phase_control() - configures MMC clock phase control */ @@ -1009,6 +854,11 @@ static const struct gates_data sun5i_a13_usb_gates_data __initconst = { .reset_mask = 0x03, }; +static const struct gates_data sun6i_a31_usb_gates_data __initconst = { + .mask = { BIT(18) | BIT(17) | BIT(16) | BIT(10) | BIT(9) | BIT(8) }, + .reset_mask = BIT(2) | BIT(1) | BIT(0), +}; + static void __init sunxi_gates_clk_setup(struct device_node *node, struct gates_data *data) { @@ -1304,6 +1154,7 @@ static const struct of_device_id clk_gates_match[] __initconst = { {.compatible = "allwinner,sun6i-a31-apb2-gates-clk", .data = &sun6i_a31_apb2_gates_data,}, {.compatible = "allwinner,sun4i-a10-usb-clk", .data = &sun4i_a10_usb_gates_data,}, {.compatible = "allwinner,sun5i-a13-usb-clk", .data = &sun5i_a13_usb_gates_data,}, + {.compatible = "allwinner,sun6i-a31-usb-clk", .data = &sun6i_a31_usb_gates_data,}, {} }; @@ -1321,33 +1172,10 @@ static void __init of_sunxi_table_clock_setup(const struct of_device_id *clk_mat } } -/** - * System clock protection - * - * By enabling these critical clocks, we prevent their accidental gating - * by the framework - */ -static void __init sunxi_clock_protect(void) +static void __init sunxi_init_clocks(const char *clocks[], int nclocks) { - struct clk *clk; - - /* memory bus clock - sun5i+ */ - clk = clk_get(NULL, "mbus"); - if (!IS_ERR(clk)) { - clk_prepare_enable(clk); - clk_put(clk); - } - - /* DDR clock - sun4i+ */ - clk = clk_get(NULL, "pll5_ddr"); - if (!IS_ERR(clk)) { - clk_prepare_enable(clk); - clk_put(clk); - } -} + unsigned int i; -static void __init sunxi_init_clocks(struct device_node *np) -{ /* Register factor clocks */ of_sunxi_table_clock_setup(clk_factors_match, sunxi_factors_clk_setup); @@ -1363,11 +1191,48 @@ static void __init sunxi_init_clocks(struct device_node *np) /* Register gate clocks */ of_sunxi_table_clock_setup(clk_gates_match, sunxi_gates_clk_setup); - /* Enable core system clocks */ - sunxi_clock_protect(); + /* Protect the clocks that needs to stay on */ + for (i = 0; i < nclocks; i++) { + struct clk *clk = clk_get(NULL, clocks[i]); + + if (!IS_ERR(clk)) + clk_prepare_enable(clk); + } +} + +static const char *sun4i_a10_critical_clocks[] __initdata = { + "pll5_ddr", +}; + +static void __init sun4i_a10_init_clocks(struct device_node *node) +{ + sunxi_init_clocks(sun4i_a10_critical_clocks, + ARRAY_SIZE(sun4i_a10_critical_clocks)); +} +CLK_OF_DECLARE(sun4i_a10_clk_init, "allwinner,sun4i-a10", sun4i_a10_init_clocks); + +static const char *sun5i_critical_clocks[] __initdata = { + "mbus", + "pll5_ddr", +}; + +static void __init sun5i_init_clocks(struct device_node *node) +{ + sunxi_init_clocks(sun5i_critical_clocks, + ARRAY_SIZE(sun5i_critical_clocks)); +} +CLK_OF_DECLARE(sun5i_a10s_clk_init, "allwinner,sun5i-a10s", sun5i_init_clocks); +CLK_OF_DECLARE(sun5i_a13_clk_init, "allwinner,sun5i-a13", sun5i_init_clocks); +CLK_OF_DECLARE(sun7i_a20_clk_init, "allwinner,sun7i-a20", sun5i_init_clocks); + +static const char *sun6i_critical_clocks[] __initdata = { + "cpu", + "ahb1_sdram", +}; + +static void __init sun6i_init_clocks(struct device_node *node) +{ + sunxi_init_clocks(sun6i_critical_clocks, + ARRAY_SIZE(sun6i_critical_clocks)); } -CLK_OF_DECLARE(sun4i_a10_clk_init, "allwinner,sun4i-a10", sunxi_init_clocks); -CLK_OF_DECLARE(sun5i_a10s_clk_init, "allwinner,sun5i-a10s", sunxi_init_clocks); -CLK_OF_DECLARE(sun5i_a13_clk_init, "allwinner,sun5i-a13", sunxi_init_clocks); -CLK_OF_DECLARE(sun6i_a31_clk_init, "allwinner,sun6i-a31", sunxi_init_clocks); -CLK_OF_DECLARE(sun7i_a20_clk_init, "allwinner,sun7i-a20", sunxi_init_clocks); +CLK_OF_DECLARE(sun6i_a31_clk_init, "allwinner,sun6i-a31", sun6i_init_clocks); diff --git a/drivers/clk/ti/Makefile b/drivers/clk/ti/Makefile index 4319d4031aa..ed4d0aaf891 100644 --- a/drivers/clk/ti/Makefile +++ b/drivers/clk/ti/Makefile @@ -3,9 +3,11 @@ obj-y += clk.o autoidle.o clockdomain.o clk-common = dpll.o composite.o divider.o gate.o \ fixed-factor.o mux.o apll.o obj-$(CONFIG_SOC_AM33XX) += $(clk-common) clk-33xx.o +obj-$(CONFIG_ARCH_OMAP2) += $(clk-common) interface.o clk-2xxx.o obj-$(CONFIG_ARCH_OMAP3) += $(clk-common) interface.o clk-3xxx.o obj-$(CONFIG_ARCH_OMAP4) += $(clk-common) clk-44xx.o obj-$(CONFIG_SOC_OMAP5) += $(clk-common) clk-54xx.o -obj-$(CONFIG_SOC_DRA7XX) += $(clk-common) clk-7xx.o +obj-$(CONFIG_SOC_DRA7XX) += $(clk-common) clk-7xx.o \ + clk-dra7-atl.o obj-$(CONFIG_SOC_AM43XX) += $(clk-common) clk-43xx.o endif diff --git a/drivers/clk/ti/apll.c b/drivers/clk/ti/apll.c index b986f61f5a7..5428c9c547c 100644 --- a/drivers/clk/ti/apll.c +++ b/drivers/clk/ti/apll.c @@ -221,3 +221,184 @@ cleanup: kfree(init); } CLK_OF_DECLARE(dra7_apll_clock, "ti,dra7-apll-clock", of_dra7_apll_setup); + +#define OMAP2_EN_APLL_LOCKED 0x3 +#define OMAP2_EN_APLL_STOPPED 0x0 + +static int omap2_apll_is_enabled(struct clk_hw *hw) +{ + struct clk_hw_omap *clk = to_clk_hw_omap(hw); + struct dpll_data *ad = clk->dpll_data; + u32 v; + + v = ti_clk_ll_ops->clk_readl(ad->control_reg); + v &= ad->enable_mask; + + v >>= __ffs(ad->enable_mask); + + return v == OMAP2_EN_APLL_LOCKED ? 1 : 0; +} + +static unsigned long omap2_apll_recalc(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct clk_hw_omap *clk = to_clk_hw_omap(hw); + + if (omap2_apll_is_enabled(hw)) + return clk->fixed_rate; + + return 0; +} + +static int omap2_apll_enable(struct clk_hw *hw) +{ + struct clk_hw_omap *clk = to_clk_hw_omap(hw); + struct dpll_data *ad = clk->dpll_data; + u32 v; + int i = 0; + + v = ti_clk_ll_ops->clk_readl(ad->control_reg); + v &= ~ad->enable_mask; + v |= OMAP2_EN_APLL_LOCKED << __ffs(ad->enable_mask); + ti_clk_ll_ops->clk_writel(v, ad->control_reg); + + while (1) { + v = ti_clk_ll_ops->clk_readl(ad->idlest_reg); + if (v & ad->idlest_mask) + break; + if (i > MAX_APLL_WAIT_TRIES) + break; + i++; + udelay(1); + } + + if (i == MAX_APLL_WAIT_TRIES) { + pr_warn("%s failed to transition to locked\n", + __clk_get_name(clk->hw.clk)); + return -EBUSY; + } + + return 0; +} + +static void omap2_apll_disable(struct clk_hw *hw) +{ + struct clk_hw_omap *clk = to_clk_hw_omap(hw); + struct dpll_data *ad = clk->dpll_data; + u32 v; + + v = ti_clk_ll_ops->clk_readl(ad->control_reg); + v &= ~ad->enable_mask; + v |= OMAP2_EN_APLL_STOPPED << __ffs(ad->enable_mask); + ti_clk_ll_ops->clk_writel(v, ad->control_reg); +} + +static struct clk_ops omap2_apll_ops = { + .enable = &omap2_apll_enable, + .disable = &omap2_apll_disable, + .is_enabled = &omap2_apll_is_enabled, + .recalc_rate = &omap2_apll_recalc, +}; + +static void omap2_apll_set_autoidle(struct clk_hw_omap *clk, u32 val) +{ + struct dpll_data *ad = clk->dpll_data; + u32 v; + + v = ti_clk_ll_ops->clk_readl(ad->autoidle_reg); + v &= ~ad->autoidle_mask; + v |= val << __ffs(ad->autoidle_mask); + ti_clk_ll_ops->clk_writel(v, ad->control_reg); +} + +#define OMAP2_APLL_AUTOIDLE_LOW_POWER_STOP 0x3 +#define OMAP2_APLL_AUTOIDLE_DISABLE 0x0 + +static void omap2_apll_allow_idle(struct clk_hw_omap *clk) +{ + omap2_apll_set_autoidle(clk, OMAP2_APLL_AUTOIDLE_LOW_POWER_STOP); +} + +static void omap2_apll_deny_idle(struct clk_hw_omap *clk) +{ + omap2_apll_set_autoidle(clk, OMAP2_APLL_AUTOIDLE_DISABLE); +} + +static struct clk_hw_omap_ops omap2_apll_hwops = { + .allow_idle = &omap2_apll_allow_idle, + .deny_idle = &omap2_apll_deny_idle, +}; + +static void __init of_omap2_apll_setup(struct device_node *node) +{ + struct dpll_data *ad = NULL; + struct clk_hw_omap *clk_hw = NULL; + struct clk_init_data *init = NULL; + struct clk *clk; + const char *parent_name; + u32 val; + + ad = kzalloc(sizeof(*clk_hw), GFP_KERNEL); + clk_hw = kzalloc(sizeof(*clk_hw), GFP_KERNEL); + init = kzalloc(sizeof(*init), GFP_KERNEL); + + if (!ad || !clk_hw || !init) + goto cleanup; + + clk_hw->dpll_data = ad; + clk_hw->hw.init = init; + init->ops = &omap2_apll_ops; + init->name = node->name; + clk_hw->ops = &omap2_apll_hwops; + + init->num_parents = of_clk_get_parent_count(node); + if (init->num_parents != 1) { + pr_err("%s must have one parent\n", node->name); + goto cleanup; + } + + parent_name = of_clk_get_parent_name(node, 0); + init->parent_names = &parent_name; + + if (of_property_read_u32(node, "ti,clock-frequency", &val)) { + pr_err("%s missing clock-frequency\n", node->name); + goto cleanup; + } + clk_hw->fixed_rate = val; + + if (of_property_read_u32(node, "ti,bit-shift", &val)) { + pr_err("%s missing bit-shift\n", node->name); + goto cleanup; + } + + clk_hw->enable_bit = val; + ad->enable_mask = 0x3 << val; + ad->autoidle_mask = 0x3 << val; + + if (of_property_read_u32(node, "ti,idlest-shift", &val)) { + pr_err("%s missing idlest-shift\n", node->name); + goto cleanup; + } + + ad->idlest_mask = 1 << val; + + ad->control_reg = ti_clk_get_reg_addr(node, 0); + ad->autoidle_reg = ti_clk_get_reg_addr(node, 1); + ad->idlest_reg = ti_clk_get_reg_addr(node, 2); + + if (!ad->control_reg || !ad->autoidle_reg || !ad->idlest_reg) + goto cleanup; + + clk = clk_register(NULL, &clk_hw->hw); + if (!IS_ERR(clk)) { + of_clk_add_provider(node, of_clk_src_simple_get, clk); + kfree(init); + return; + } +cleanup: + kfree(ad); + kfree(clk_hw); + kfree(init); +} +CLK_OF_DECLARE(omap2_apll_clock, "ti,omap2-apll-clock", + of_omap2_apll_setup); diff --git a/drivers/clk/ti/clk-2xxx.c b/drivers/clk/ti/clk-2xxx.c new file mode 100644 index 00000000000..c808ab3d2bb --- /dev/null +++ b/drivers/clk/ti/clk-2xxx.c @@ -0,0 +1,256 @@ +/* + * OMAP2 Clock init + * + * Copyright (C) 2013 Texas Instruments, Inc + * Tero Kristo (t-kristo@ti.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation version 2. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/clk-provider.h> +#include <linux/clk/ti.h> + +static struct ti_dt_clk omap2xxx_clks[] = { + DT_CLK(NULL, "func_32k_ck", "func_32k_ck"), + DT_CLK(NULL, "secure_32k_ck", "secure_32k_ck"), + DT_CLK(NULL, "virt_12m_ck", "virt_12m_ck"), + DT_CLK(NULL, "virt_13m_ck", "virt_13m_ck"), + DT_CLK(NULL, "virt_19200000_ck", "virt_19200000_ck"), + DT_CLK(NULL, "virt_26m_ck", "virt_26m_ck"), + DT_CLK(NULL, "aplls_clkin_ck", "aplls_clkin_ck"), + DT_CLK(NULL, "aplls_clkin_x2_ck", "aplls_clkin_x2_ck"), + DT_CLK(NULL, "osc_ck", "osc_ck"), + DT_CLK(NULL, "sys_ck", "sys_ck"), + DT_CLK(NULL, "alt_ck", "alt_ck"), + DT_CLK(NULL, "mcbsp_clks", "mcbsp_clks"), + DT_CLK(NULL, "dpll_ck", "dpll_ck"), + DT_CLK(NULL, "apll96_ck", "apll96_ck"), + DT_CLK(NULL, "apll54_ck", "apll54_ck"), + DT_CLK(NULL, "func_54m_ck", "func_54m_ck"), + DT_CLK(NULL, "core_ck", "core_ck"), + DT_CLK(NULL, "func_96m_ck", "func_96m_ck"), + DT_CLK(NULL, "func_48m_ck", "func_48m_ck"), + DT_CLK(NULL, "func_12m_ck", "func_12m_ck"), + DT_CLK(NULL, "sys_clkout_src", "sys_clkout_src"), + DT_CLK(NULL, "sys_clkout", "sys_clkout"), + DT_CLK(NULL, "emul_ck", "emul_ck"), + DT_CLK(NULL, "mpu_ck", "mpu_ck"), + DT_CLK(NULL, "dsp_fck", "dsp_fck"), + DT_CLK(NULL, "gfx_3d_fck", "gfx_3d_fck"), + DT_CLK(NULL, "gfx_2d_fck", "gfx_2d_fck"), + DT_CLK(NULL, "gfx_ick", "gfx_ick"), + DT_CLK("omapdss_dss", "ick", "dss_ick"), + DT_CLK(NULL, "dss_ick", "dss_ick"), + DT_CLK(NULL, "dss1_fck", "dss1_fck"), + DT_CLK(NULL, "dss2_fck", "dss2_fck"), + DT_CLK(NULL, "dss_54m_fck", "dss_54m_fck"), + DT_CLK(NULL, "core_l3_ck", "core_l3_ck"), + DT_CLK(NULL, "ssi_fck", "ssi_ssr_sst_fck"), + DT_CLK(NULL, "usb_l4_ick", "usb_l4_ick"), + DT_CLK(NULL, "l4_ck", "l4_ck"), + DT_CLK(NULL, "ssi_l4_ick", "ssi_l4_ick"), + DT_CLK(NULL, "gpt1_ick", "gpt1_ick"), + DT_CLK(NULL, "gpt1_fck", "gpt1_fck"), + DT_CLK(NULL, "gpt2_ick", "gpt2_ick"), + DT_CLK(NULL, "gpt2_fck", "gpt2_fck"), + DT_CLK(NULL, "gpt3_ick", "gpt3_ick"), + DT_CLK(NULL, "gpt3_fck", "gpt3_fck"), + DT_CLK(NULL, "gpt4_ick", "gpt4_ick"), + DT_CLK(NULL, "gpt4_fck", "gpt4_fck"), + DT_CLK(NULL, "gpt5_ick", "gpt5_ick"), + DT_CLK(NULL, "gpt5_fck", "gpt5_fck"), + DT_CLK(NULL, "gpt6_ick", "gpt6_ick"), + DT_CLK(NULL, "gpt6_fck", "gpt6_fck"), + DT_CLK(NULL, "gpt7_ick", "gpt7_ick"), + DT_CLK(NULL, "gpt7_fck", "gpt7_fck"), + DT_CLK(NULL, "gpt8_ick", "gpt8_ick"), + DT_CLK(NULL, "gpt8_fck", "gpt8_fck"), + DT_CLK(NULL, "gpt9_ick", "gpt9_ick"), + DT_CLK(NULL, "gpt9_fck", "gpt9_fck"), + DT_CLK(NULL, "gpt10_ick", "gpt10_ick"), + DT_CLK(NULL, "gpt10_fck", "gpt10_fck"), + DT_CLK(NULL, "gpt11_ick", "gpt11_ick"), + DT_CLK(NULL, "gpt11_fck", "gpt11_fck"), + DT_CLK(NULL, "gpt12_ick", "gpt12_ick"), + DT_CLK(NULL, "gpt12_fck", "gpt12_fck"), + DT_CLK("omap-mcbsp.1", "ick", "mcbsp1_ick"), + DT_CLK(NULL, "mcbsp1_ick", "mcbsp1_ick"), + DT_CLK(NULL, "mcbsp1_fck", "mcbsp1_fck"), + DT_CLK("omap-mcbsp.2", "ick", "mcbsp2_ick"), + DT_CLK(NULL, "mcbsp2_ick", "mcbsp2_ick"), + DT_CLK(NULL, "mcbsp2_fck", "mcbsp2_fck"), + DT_CLK("omap2_mcspi.1", "ick", "mcspi1_ick"), + DT_CLK(NULL, "mcspi1_ick", "mcspi1_ick"), + DT_CLK(NULL, "mcspi1_fck", "mcspi1_fck"), + DT_CLK("omap2_mcspi.2", "ick", "mcspi2_ick"), + DT_CLK(NULL, "mcspi2_ick", "mcspi2_ick"), + DT_CLK(NULL, "mcspi2_fck", "mcspi2_fck"), + DT_CLK(NULL, "uart1_ick", "uart1_ick"), + DT_CLK(NULL, "uart1_fck", "uart1_fck"), + DT_CLK(NULL, "uart2_ick", "uart2_ick"), + DT_CLK(NULL, "uart2_fck", "uart2_fck"), + DT_CLK(NULL, "uart3_ick", "uart3_ick"), + DT_CLK(NULL, "uart3_fck", "uart3_fck"), + DT_CLK(NULL, "gpios_ick", "gpios_ick"), + DT_CLK(NULL, "gpios_fck", "gpios_fck"), + DT_CLK("omap_wdt", "ick", "mpu_wdt_ick"), + DT_CLK(NULL, "mpu_wdt_ick", "mpu_wdt_ick"), + DT_CLK(NULL, "mpu_wdt_fck", "mpu_wdt_fck"), + DT_CLK(NULL, "sync_32k_ick", "sync_32k_ick"), + DT_CLK(NULL, "wdt1_ick", "wdt1_ick"), + DT_CLK(NULL, "omapctrl_ick", "omapctrl_ick"), + DT_CLK("omap24xxcam", "fck", "cam_fck"), + DT_CLK(NULL, "cam_fck", "cam_fck"), + DT_CLK("omap24xxcam", "ick", "cam_ick"), + DT_CLK(NULL, "cam_ick", "cam_ick"), + DT_CLK(NULL, "mailboxes_ick", "mailboxes_ick"), + DT_CLK(NULL, "wdt4_ick", "wdt4_ick"), + DT_CLK(NULL, "wdt4_fck", "wdt4_fck"), + DT_CLK(NULL, "mspro_ick", "mspro_ick"), + DT_CLK(NULL, "mspro_fck", "mspro_fck"), + DT_CLK(NULL, "fac_ick", "fac_ick"), + DT_CLK(NULL, "fac_fck", "fac_fck"), + DT_CLK("omap_hdq.0", "ick", "hdq_ick"), + DT_CLK(NULL, "hdq_ick", "hdq_ick"), + DT_CLK("omap_hdq.0", "fck", "hdq_fck"), + DT_CLK(NULL, "hdq_fck", "hdq_fck"), + DT_CLK("omap_i2c.1", "ick", "i2c1_ick"), + DT_CLK(NULL, "i2c1_ick", "i2c1_ick"), + DT_CLK("omap_i2c.2", "ick", "i2c2_ick"), + DT_CLK(NULL, "i2c2_ick", "i2c2_ick"), + DT_CLK(NULL, "gpmc_fck", "gpmc_fck"), + DT_CLK(NULL, "sdma_fck", "sdma_fck"), + DT_CLK(NULL, "sdma_ick", "sdma_ick"), + DT_CLK(NULL, "sdrc_ick", "sdrc_ick"), + DT_CLK(NULL, "des_ick", "des_ick"), + DT_CLK("omap-sham", "ick", "sha_ick"), + DT_CLK(NULL, "sha_ick", "sha_ick"), + DT_CLK("omap_rng", "ick", "rng_ick"), + DT_CLK(NULL, "rng_ick", "rng_ick"), + DT_CLK("omap-aes", "ick", "aes_ick"), + DT_CLK(NULL, "aes_ick", "aes_ick"), + DT_CLK(NULL, "pka_ick", "pka_ick"), + DT_CLK(NULL, "usb_fck", "usb_fck"), + DT_CLK(NULL, "timer_32k_ck", "func_32k_ck"), + DT_CLK(NULL, "timer_sys_ck", "sys_ck"), + DT_CLK(NULL, "timer_ext_ck", "alt_ck"), + { .node_name = NULL }, +}; + +static struct ti_dt_clk omap2420_clks[] = { + DT_CLK(NULL, "sys_clkout2_src", "sys_clkout2_src"), + DT_CLK(NULL, "sys_clkout2", "sys_clkout2"), + DT_CLK(NULL, "dsp_ick", "dsp_ick"), + DT_CLK(NULL, "iva1_ifck", "iva1_ifck"), + DT_CLK(NULL, "iva1_mpu_int_ifck", "iva1_mpu_int_ifck"), + DT_CLK(NULL, "wdt3_ick", "wdt3_ick"), + DT_CLK(NULL, "wdt3_fck", "wdt3_fck"), + DT_CLK("mmci-omap.0", "ick", "mmc_ick"), + DT_CLK(NULL, "mmc_ick", "mmc_ick"), + DT_CLK("mmci-omap.0", "fck", "mmc_fck"), + DT_CLK(NULL, "mmc_fck", "mmc_fck"), + DT_CLK(NULL, "eac_ick", "eac_ick"), + DT_CLK(NULL, "eac_fck", "eac_fck"), + DT_CLK(NULL, "i2c1_fck", "i2c1_fck"), + DT_CLK(NULL, "i2c2_fck", "i2c2_fck"), + DT_CLK(NULL, "vlynq_ick", "vlynq_ick"), + DT_CLK(NULL, "vlynq_fck", "vlynq_fck"), + DT_CLK("musb-hdrc", "fck", "osc_ck"), + { .node_name = NULL }, +}; + +static struct ti_dt_clk omap2430_clks[] = { + DT_CLK("twl", "fck", "osc_ck"), + DT_CLK(NULL, "iva2_1_ick", "iva2_1_ick"), + DT_CLK(NULL, "mdm_ick", "mdm_ick"), + DT_CLK(NULL, "mdm_osc_ck", "mdm_osc_ck"), + DT_CLK("omap-mcbsp.3", "ick", "mcbsp3_ick"), + DT_CLK(NULL, "mcbsp3_ick", "mcbsp3_ick"), + DT_CLK(NULL, "mcbsp3_fck", "mcbsp3_fck"), + DT_CLK("omap-mcbsp.4", "ick", "mcbsp4_ick"), + DT_CLK(NULL, "mcbsp4_ick", "mcbsp4_ick"), + DT_CLK(NULL, "mcbsp4_fck", "mcbsp4_fck"), + DT_CLK("omap-mcbsp.5", "ick", "mcbsp5_ick"), + DT_CLK(NULL, "mcbsp5_ick", "mcbsp5_ick"), + DT_CLK(NULL, "mcbsp5_fck", "mcbsp5_fck"), + DT_CLK("omap2_mcspi.3", "ick", "mcspi3_ick"), + DT_CLK(NULL, "mcspi3_ick", "mcspi3_ick"), + DT_CLK(NULL, "mcspi3_fck", "mcspi3_fck"), + DT_CLK(NULL, "icr_ick", "icr_ick"), + DT_CLK(NULL, "i2chs1_fck", "i2chs1_fck"), + DT_CLK(NULL, "i2chs2_fck", "i2chs2_fck"), + DT_CLK("musb-omap2430", "ick", "usbhs_ick"), + DT_CLK(NULL, "usbhs_ick", "usbhs_ick"), + DT_CLK("omap_hsmmc.0", "ick", "mmchs1_ick"), + DT_CLK(NULL, "mmchs1_ick", "mmchs1_ick"), + DT_CLK(NULL, "mmchs1_fck", "mmchs1_fck"), + DT_CLK("omap_hsmmc.1", "ick", "mmchs2_ick"), + DT_CLK(NULL, "mmchs2_ick", "mmchs2_ick"), + DT_CLK(NULL, "mmchs2_fck", "mmchs2_fck"), + DT_CLK(NULL, "gpio5_ick", "gpio5_ick"), + DT_CLK(NULL, "gpio5_fck", "gpio5_fck"), + DT_CLK(NULL, "mdm_intc_ick", "mdm_intc_ick"), + DT_CLK("omap_hsmmc.0", "mmchsdb_fck", "mmchsdb1_fck"), + DT_CLK(NULL, "mmchsdb1_fck", "mmchsdb1_fck"), + DT_CLK("omap_hsmmc.1", "mmchsdb_fck", "mmchsdb2_fck"), + DT_CLK(NULL, "mmchsdb2_fck", "mmchsdb2_fck"), + { .node_name = NULL }, +}; + +static const char *enable_init_clks[] = { + "apll96_ck", + "apll54_ck", + "sync_32k_ick", + "omapctrl_ick", + "gpmc_fck", + "sdrc_ick", +}; + +enum { + OMAP2_SOC_OMAP2420, + OMAP2_SOC_OMAP2430, +}; + +static int __init omap2xxx_dt_clk_init(int soc_type) +{ + ti_dt_clocks_register(omap2xxx_clks); + + if (soc_type == OMAP2_SOC_OMAP2420) + ti_dt_clocks_register(omap2420_clks); + else + ti_dt_clocks_register(omap2430_clks); + + omap2xxx_clkt_vps_init(); + + omap2_clk_disable_autoidle_all(); + + omap2_clk_enable_init_clocks(enable_init_clks, + ARRAY_SIZE(enable_init_clks)); + + pr_info("Clocking rate (Crystal/DPLL/MPU): %ld.%01ld/%ld/%ld MHz\n", + (clk_get_rate(clk_get_sys(NULL, "sys_ck")) / 1000000), + (clk_get_rate(clk_get_sys(NULL, "sys_ck")) / 100000) % 10, + (clk_get_rate(clk_get_sys(NULL, "dpll_ck")) / 1000000), + (clk_get_rate(clk_get_sys(NULL, "mpu_ck")) / 1000000)); + + return 0; +} + +int __init omap2420_dt_clk_init(void) +{ + return omap2xxx_dt_clk_init(OMAP2_SOC_OMAP2420); +} + +int __init omap2430_dt_clk_init(void) +{ + return omap2xxx_dt_clk_init(OMAP2_SOC_OMAP2430); +} diff --git a/drivers/clk/ti/clk-54xx.c b/drivers/clk/ti/clk-54xx.c index 08f3d1b915b..5e183993e3e 100644 --- a/drivers/clk/ti/clk-54xx.c +++ b/drivers/clk/ti/clk-54xx.c @@ -240,6 +240,12 @@ int __init omap5xxx_dt_clk_init(void) if (rc) pr_err("%s: failed to configure ABE DPLL!\n", __func__); + abe_dpll = clk_get_sys(NULL, "dpll_abe_m2x2_ck"); + if (!rc) + rc = clk_set_rate(abe_dpll, OMAP5_DPLL_ABE_DEFFREQ * 2); + if (rc) + pr_err("%s: failed to configure ABE m2x2 DPLL!\n", __func__); + usb_dpll = clk_get_sys(NULL, "dpll_usb_ck"); rc = clk_set_rate(usb_dpll, OMAP5_DPLL_USB_DEFFREQ); if (rc) diff --git a/drivers/clk/ti/clk-7xx.c b/drivers/clk/ti/clk-7xx.c index f7e40734c81..e1581335937 100644 --- a/drivers/clk/ti/clk-7xx.c +++ b/drivers/clk/ti/clk-7xx.c @@ -24,7 +24,7 @@ static struct ti_dt_clk dra7xx_clks[] = { DT_CLK(NULL, "atl_clkin0_ck", "atl_clkin0_ck"), DT_CLK(NULL, "atl_clkin1_ck", "atl_clkin1_ck"), DT_CLK(NULL, "atl_clkin2_ck", "atl_clkin2_ck"), - DT_CLK(NULL, "atlclkin3_ck", "atlclkin3_ck"), + DT_CLK(NULL, "atl_clkin3_ck", "atl_clkin3_ck"), DT_CLK(NULL, "hdmi_clkin_ck", "hdmi_clkin_ck"), DT_CLK(NULL, "mlb_clkin_ck", "mlb_clkin_ck"), DT_CLK(NULL, "mlbp_clkin_ck", "mlbp_clkin_ck"), diff --git a/drivers/clk/ti/clk-dra7-atl.c b/drivers/clk/ti/clk-dra7-atl.c new file mode 100644 index 00000000000..4a65b410e4d --- /dev/null +++ b/drivers/clk/ti/clk-dra7-atl.c @@ -0,0 +1,312 @@ +/* + * DRA7 ATL (Audio Tracking Logic) clock driver + * + * Copyright (C) 2013 Texas Instruments, Inc. + * + * Peter Ujfalusi <peter.ujfalusi@ti.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/module.h> +#include <linux/clk-provider.h> +#include <linux/slab.h> +#include <linux/io.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> + +#define DRA7_ATL_INSTANCES 4 + +#define DRA7_ATL_PPMR_REG(id) (0x200 + (id * 0x80)) +#define DRA7_ATL_BBSR_REG(id) (0x204 + (id * 0x80)) +#define DRA7_ATL_ATLCR_REG(id) (0x208 + (id * 0x80)) +#define DRA7_ATL_SWEN_REG(id) (0x210 + (id * 0x80)) +#define DRA7_ATL_BWSMUX_REG(id) (0x214 + (id * 0x80)) +#define DRA7_ATL_AWSMUX_REG(id) (0x218 + (id * 0x80)) +#define DRA7_ATL_PCLKMUX_REG(id) (0x21c + (id * 0x80)) + +#define DRA7_ATL_SWEN BIT(0) +#define DRA7_ATL_DIVIDER_MASK (0x1f) +#define DRA7_ATL_PCLKMUX BIT(0) +struct dra7_atl_clock_info; + +struct dra7_atl_desc { + struct clk *clk; + struct clk_hw hw; + struct dra7_atl_clock_info *cinfo; + int id; + + bool probed; /* the driver for the IP has been loaded */ + bool valid; /* configured */ + bool enabled; + u32 bws; /* Baseband Word Select Mux */ + u32 aws; /* Audio Word Select Mux */ + u32 divider; /* Cached divider value */ +}; + +struct dra7_atl_clock_info { + struct device *dev; + void __iomem *iobase; + + struct dra7_atl_desc *cdesc; +}; + +#define to_atl_desc(_hw) container_of(_hw, struct dra7_atl_desc, hw) + +static inline void atl_write(struct dra7_atl_clock_info *cinfo, u32 reg, + u32 val) +{ + __raw_writel(val, cinfo->iobase + reg); +} + +static inline int atl_read(struct dra7_atl_clock_info *cinfo, u32 reg) +{ + return __raw_readl(cinfo->iobase + reg); +} + +static int atl_clk_enable(struct clk_hw *hw) +{ + struct dra7_atl_desc *cdesc = to_atl_desc(hw); + + if (!cdesc->probed) + goto out; + + if (unlikely(!cdesc->valid)) + dev_warn(cdesc->cinfo->dev, "atl%d has not been configured\n", + cdesc->id); + pm_runtime_get_sync(cdesc->cinfo->dev); + + atl_write(cdesc->cinfo, DRA7_ATL_ATLCR_REG(cdesc->id), + cdesc->divider - 1); + atl_write(cdesc->cinfo, DRA7_ATL_SWEN_REG(cdesc->id), DRA7_ATL_SWEN); + +out: + cdesc->enabled = true; + + return 0; +} + +static void atl_clk_disable(struct clk_hw *hw) +{ + struct dra7_atl_desc *cdesc = to_atl_desc(hw); + + if (!cdesc->probed) + goto out; + + atl_write(cdesc->cinfo, DRA7_ATL_SWEN_REG(cdesc->id), 0); + pm_runtime_put_sync(cdesc->cinfo->dev); + +out: + cdesc->enabled = false; +} + +static int atl_clk_is_enabled(struct clk_hw *hw) +{ + struct dra7_atl_desc *cdesc = to_atl_desc(hw); + + return cdesc->enabled; +} + +static unsigned long atl_clk_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct dra7_atl_desc *cdesc = to_atl_desc(hw); + + return parent_rate / cdesc->divider; +} + +static long atl_clk_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *parent_rate) +{ + unsigned divider; + + divider = (*parent_rate + rate / 2) / rate; + if (divider > DRA7_ATL_DIVIDER_MASK + 1) + divider = DRA7_ATL_DIVIDER_MASK + 1; + + return *parent_rate / divider; +} + +static int atl_clk_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct dra7_atl_desc *cdesc = to_atl_desc(hw); + u32 divider; + + divider = ((parent_rate + rate / 2) / rate) - 1; + if (divider > DRA7_ATL_DIVIDER_MASK) + divider = DRA7_ATL_DIVIDER_MASK; + + cdesc->divider = divider + 1; + + return 0; +} + +const struct clk_ops atl_clk_ops = { + .enable = atl_clk_enable, + .disable = atl_clk_disable, + .is_enabled = atl_clk_is_enabled, + .recalc_rate = atl_clk_recalc_rate, + .round_rate = atl_clk_round_rate, + .set_rate = atl_clk_set_rate, +}; + +static void __init of_dra7_atl_clock_setup(struct device_node *node) +{ + struct dra7_atl_desc *clk_hw = NULL; + struct clk_init_data init = { 0 }; + const char **parent_names = NULL; + struct clk *clk; + + clk_hw = kzalloc(sizeof(*clk_hw), GFP_KERNEL); + if (!clk_hw) { + pr_err("%s: could not allocate dra7_atl_desc\n", __func__); + return; + } + + clk_hw->hw.init = &init; + clk_hw->divider = 1; + init.name = node->name; + init.ops = &atl_clk_ops; + init.flags = CLK_IGNORE_UNUSED; + init.num_parents = of_clk_get_parent_count(node); + + if (init.num_parents != 1) { + pr_err("%s: atl clock %s must have 1 parent\n", __func__, + node->name); + goto cleanup; + } + + parent_names = kzalloc(sizeof(char *), GFP_KERNEL); + + if (!parent_names) + goto cleanup; + + parent_names[0] = of_clk_get_parent_name(node, 0); + + init.parent_names = parent_names; + + clk = clk_register(NULL, &clk_hw->hw); + + if (!IS_ERR(clk)) { + of_clk_add_provider(node, of_clk_src_simple_get, clk); + return; + } +cleanup: + kfree(parent_names); + kfree(clk_hw); +} +CLK_OF_DECLARE(dra7_atl_clock, "ti,dra7-atl-clock", of_dra7_atl_clock_setup); + +static int of_dra7_atl_clk_probe(struct platform_device *pdev) +{ + struct device_node *node = pdev->dev.of_node; + struct dra7_atl_clock_info *cinfo; + int i; + int ret = 0; + + if (!node) + return -ENODEV; + + cinfo = devm_kzalloc(&pdev->dev, sizeof(*cinfo), GFP_KERNEL); + if (!cinfo) + return -ENOMEM; + + cinfo->iobase = of_iomap(node, 0); + cinfo->dev = &pdev->dev; + pm_runtime_enable(cinfo->dev); + + pm_runtime_get_sync(cinfo->dev); + atl_write(cinfo, DRA7_ATL_PCLKMUX_REG(0), DRA7_ATL_PCLKMUX); + + for (i = 0; i < DRA7_ATL_INSTANCES; i++) { + struct device_node *cfg_node; + char prop[5]; + struct dra7_atl_desc *cdesc; + struct of_phandle_args clkspec; + struct clk *clk; + int rc; + + rc = of_parse_phandle_with_args(node, "ti,provided-clocks", + NULL, i, &clkspec); + + if (rc) { + pr_err("%s: failed to lookup atl clock %d\n", __func__, + i); + return -EINVAL; + } + + clk = of_clk_get_from_provider(&clkspec); + + cdesc = to_atl_desc(__clk_get_hw(clk)); + cdesc->cinfo = cinfo; + cdesc->id = i; + + /* Get configuration for the ATL instances */ + snprintf(prop, sizeof(prop), "atl%u", i); + cfg_node = of_find_node_by_name(node, prop); + if (cfg_node) { + ret = of_property_read_u32(cfg_node, "bws", + &cdesc->bws); + ret |= of_property_read_u32(cfg_node, "aws", + &cdesc->aws); + if (!ret) { + cdesc->valid = true; + atl_write(cinfo, DRA7_ATL_BWSMUX_REG(i), + cdesc->bws); + atl_write(cinfo, DRA7_ATL_AWSMUX_REG(i), + cdesc->aws); + } + } + + cdesc->probed = true; + /* + * Enable the clock if it has been asked prior to loading the + * hw driver + */ + if (cdesc->enabled) + atl_clk_enable(__clk_get_hw(clk)); + } + pm_runtime_put_sync(cinfo->dev); + + return ret; +} + +static int of_dra7_atl_clk_remove(struct platform_device *pdev) +{ + pm_runtime_disable(&pdev->dev); + + return 0; +} + +static struct of_device_id of_dra7_atl_clk_match_tbl[] = { + { .compatible = "ti,dra7-atl", }, + {}, +}; +MODULE_DEVICE_TABLE(of, of_dra7_atl_clk_match_tbl); + +static struct platform_driver dra7_atl_clk_driver = { + .driver = { + .name = "dra7-atl", + .owner = THIS_MODULE, + .of_match_table = of_dra7_atl_clk_match_tbl, + }, + .probe = of_dra7_atl_clk_probe, + .remove = of_dra7_atl_clk_remove, +}; + +module_platform_driver(dra7_atl_clk_driver); + +MODULE_DESCRIPTION("Clock driver for DRA7 Audio Tracking Logic"); +MODULE_ALIAS("platform:dra7-atl-clock"); +MODULE_AUTHOR("Peter Ujfalusi <peter.ujfalusi@ti.com>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/clk/ti/dpll.c b/drivers/clk/ti/dpll.c index 7e498a44f97..abd956d5f83 100644 --- a/drivers/clk/ti/dpll.c +++ b/drivers/clk/ti/dpll.c @@ -25,8 +25,6 @@ #undef pr_fmt #define pr_fmt(fmt) "%s: " fmt, __func__ -#define DPLL_HAS_AUTOIDLE 0x1 - #if defined(CONFIG_ARCH_OMAP4) || defined(CONFIG_SOC_OMAP5) || \ defined(CONFIG_SOC_DRA7XX) static const struct clk_ops dpll_m4xen_ck_ops = { @@ -37,21 +35,18 @@ static const struct clk_ops dpll_m4xen_ck_ops = { .set_rate = &omap3_noncore_dpll_set_rate, .get_parent = &omap2_init_dpll_parent, }; +#else +static const struct clk_ops dpll_m4xen_ck_ops = {}; #endif +#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_ARCH_OMAP4) || \ + defined(CONFIG_SOC_OMAP5) || defined(CONFIG_SOC_DRA7XX) || \ + defined(CONFIG_SOC_AM33XX) || defined(CONFIG_SOC_AM43XX) static const struct clk_ops dpll_core_ck_ops = { .recalc_rate = &omap3_dpll_recalc, .get_parent = &omap2_init_dpll_parent, }; -#ifdef CONFIG_ARCH_OMAP3 -static const struct clk_ops omap3_dpll_core_ck_ops = { - .get_parent = &omap2_init_dpll_parent, - .recalc_rate = &omap3_dpll_recalc, - .round_rate = &omap2_dpll_round_rate, -}; -#endif - static const struct clk_ops dpll_ck_ops = { .enable = &omap3_noncore_dpll_enable, .disable = &omap3_noncore_dpll_disable, @@ -67,6 +62,33 @@ static const struct clk_ops dpll_no_gate_ck_ops = { .round_rate = &omap2_dpll_round_rate, .set_rate = &omap3_noncore_dpll_set_rate, }; +#else +static const struct clk_ops dpll_core_ck_ops = {}; +static const struct clk_ops dpll_ck_ops = {}; +static const struct clk_ops dpll_no_gate_ck_ops = {}; +const struct clk_hw_omap_ops clkhwops_omap3_dpll = {}; +#endif + +#ifdef CONFIG_ARCH_OMAP2 +static const struct clk_ops omap2_dpll_core_ck_ops = { + .get_parent = &omap2_init_dpll_parent, + .recalc_rate = &omap2_dpllcore_recalc, + .round_rate = &omap2_dpll_round_rate, + .set_rate = &omap2_reprogram_dpllcore, +}; +#else +static const struct clk_ops omap2_dpll_core_ck_ops = {}; +#endif + +#ifdef CONFIG_ARCH_OMAP3 +static const struct clk_ops omap3_dpll_core_ck_ops = { + .get_parent = &omap2_init_dpll_parent, + .recalc_rate = &omap3_dpll_recalc, + .round_rate = &omap2_dpll_round_rate, +}; +#else +static const struct clk_ops omap3_dpll_core_ck_ops = {}; +#endif #ifdef CONFIG_ARCH_OMAP3 static const struct clk_ops omap3_dpll_ck_ops = { @@ -193,14 +215,12 @@ static void ti_clk_register_dpll_x2(struct device_node *node, * @node: device node containing the DPLL info * @ops: ops for the DPLL * @ddt: DPLL data template to use - * @init_flags: flags for controlling init types * * Initializes a DPLL clock from device tree data. */ static void __init of_ti_dpll_setup(struct device_node *node, const struct clk_ops *ops, - const struct dpll_data *ddt, - u8 init_flags) + const struct dpll_data *ddt) { struct clk_hw_omap *clk_hw = NULL; struct clk_init_data *init = NULL; @@ -241,13 +261,30 @@ static void __init of_ti_dpll_setup(struct device_node *node, init->parent_names = parent_names; dd->control_reg = ti_clk_get_reg_addr(node, 0); - dd->idlest_reg = ti_clk_get_reg_addr(node, 1); - dd->mult_div1_reg = ti_clk_get_reg_addr(node, 2); - if (!dd->control_reg || !dd->idlest_reg || !dd->mult_div1_reg) + /* + * Special case for OMAP2 DPLL, register order is different due to + * missing idlest_reg, also clkhwops is different. Detected from + * missing idlest_mask. + */ + if (!dd->idlest_mask) { + dd->mult_div1_reg = ti_clk_get_reg_addr(node, 1); +#ifdef CONFIG_ARCH_OMAP2 + clk_hw->ops = &clkhwops_omap2xxx_dpll; + omap2xxx_clkt_dpllcore_init(&clk_hw->hw); +#endif + } else { + dd->idlest_reg = ti_clk_get_reg_addr(node, 1); + if (!dd->idlest_reg) + goto cleanup; + + dd->mult_div1_reg = ti_clk_get_reg_addr(node, 2); + } + + if (!dd->control_reg || !dd->mult_div1_reg) goto cleanup; - if (init_flags & DPLL_HAS_AUTOIDLE) { + if (dd->autoidle_mask) { dd->autoidle_reg = ti_clk_get_reg_addr(node, 3); if (!dd->autoidle_reg) goto cleanup; @@ -310,7 +347,7 @@ static void __init of_ti_omap3_dpll_setup(struct device_node *node) .modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED), }; - of_ti_dpll_setup(node, &omap3_dpll_ck_ops, &dd, DPLL_HAS_AUTOIDLE); + of_ti_dpll_setup(node, &omap3_dpll_ck_ops, &dd); } CLK_OF_DECLARE(ti_omap3_dpll_clock, "ti,omap3-dpll-clock", of_ti_omap3_dpll_setup); @@ -329,7 +366,7 @@ static void __init of_ti_omap3_core_dpll_setup(struct device_node *node) .freqsel_mask = 0xf0, }; - of_ti_dpll_setup(node, &omap3_dpll_core_ck_ops, &dd, DPLL_HAS_AUTOIDLE); + of_ti_dpll_setup(node, &omap3_dpll_core_ck_ops, &dd); } CLK_OF_DECLARE(ti_omap3_core_dpll_clock, "ti,omap3-dpll-core-clock", of_ti_omap3_core_dpll_setup); @@ -349,7 +386,7 @@ static void __init of_ti_omap3_per_dpll_setup(struct device_node *node) .modes = (1 << DPLL_LOW_POWER_STOP) | (1 << DPLL_LOCKED), }; - of_ti_dpll_setup(node, &omap3_dpll_per_ck_ops, &dd, DPLL_HAS_AUTOIDLE); + of_ti_dpll_setup(node, &omap3_dpll_per_ck_ops, &dd); } CLK_OF_DECLARE(ti_omap3_per_dpll_clock, "ti,omap3-dpll-per-clock", of_ti_omap3_per_dpll_setup); @@ -371,7 +408,7 @@ static void __init of_ti_omap3_per_jtype_dpll_setup(struct device_node *node) .modes = (1 << DPLL_LOW_POWER_STOP) | (1 << DPLL_LOCKED), }; - of_ti_dpll_setup(node, &omap3_dpll_per_ck_ops, &dd, DPLL_HAS_AUTOIDLE); + of_ti_dpll_setup(node, &omap3_dpll_per_ck_ops, &dd); } CLK_OF_DECLARE(ti_omap3_per_jtype_dpll_clock, "ti,omap3-dpll-per-j-type-clock", of_ti_omap3_per_jtype_dpll_setup); @@ -391,11 +428,32 @@ static void __init of_ti_omap4_dpll_setup(struct device_node *node) .modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED), }; - of_ti_dpll_setup(node, &dpll_ck_ops, &dd, DPLL_HAS_AUTOIDLE); + of_ti_dpll_setup(node, &dpll_ck_ops, &dd); } CLK_OF_DECLARE(ti_omap4_dpll_clock, "ti,omap4-dpll-clock", of_ti_omap4_dpll_setup); +static void __init of_ti_omap5_mpu_dpll_setup(struct device_node *node) +{ + const struct dpll_data dd = { + .idlest_mask = 0x1, + .enable_mask = 0x7, + .autoidle_mask = 0x7, + .mult_mask = 0x7ff << 8, + .div1_mask = 0x7f, + .max_multiplier = 2047, + .max_divider = 128, + .dcc_mask = BIT(22), + .dcc_rate = 1400000000, /* DCC beyond 1.4GHz */ + .min_divider = 1, + .modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED), + }; + + of_ti_dpll_setup(node, &dpll_ck_ops, &dd); +} +CLK_OF_DECLARE(of_ti_omap5_mpu_dpll_clock, "ti,omap5-mpu-dpll-clock", + of_ti_omap5_mpu_dpll_setup); + static void __init of_ti_omap4_core_dpll_setup(struct device_node *node) { const struct dpll_data dd = { @@ -410,7 +468,7 @@ static void __init of_ti_omap4_core_dpll_setup(struct device_node *node) .modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED), }; - of_ti_dpll_setup(node, &dpll_core_ck_ops, &dd, DPLL_HAS_AUTOIDLE); + of_ti_dpll_setup(node, &dpll_core_ck_ops, &dd); } CLK_OF_DECLARE(ti_omap4_core_dpll_clock, "ti,omap4-dpll-core-clock", of_ti_omap4_core_dpll_setup); @@ -433,7 +491,7 @@ static void __init of_ti_omap4_m4xen_dpll_setup(struct device_node *node) .modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED), }; - of_ti_dpll_setup(node, &dpll_m4xen_ck_ops, &dd, DPLL_HAS_AUTOIDLE); + of_ti_dpll_setup(node, &dpll_m4xen_ck_ops, &dd); } CLK_OF_DECLARE(ti_omap4_m4xen_dpll_clock, "ti,omap4-dpll-m4xen-clock", of_ti_omap4_m4xen_dpll_setup); @@ -454,7 +512,7 @@ static void __init of_ti_omap4_jtype_dpll_setup(struct device_node *node) .modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED), }; - of_ti_dpll_setup(node, &dpll_m4xen_ck_ops, &dd, DPLL_HAS_AUTOIDLE); + of_ti_dpll_setup(node, &dpll_m4xen_ck_ops, &dd); } CLK_OF_DECLARE(ti_omap4_jtype_dpll_clock, "ti,omap4-dpll-j-type-clock", of_ti_omap4_jtype_dpll_setup); @@ -465,7 +523,6 @@ static void __init of_ti_am3_no_gate_dpll_setup(struct device_node *node) const struct dpll_data dd = { .idlest_mask = 0x1, .enable_mask = 0x7, - .autoidle_mask = 0x7, .mult_mask = 0x7ff << 8, .div1_mask = 0x7f, .max_multiplier = 2047, @@ -474,7 +531,7 @@ static void __init of_ti_am3_no_gate_dpll_setup(struct device_node *node) .modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED), }; - of_ti_dpll_setup(node, &dpll_no_gate_ck_ops, &dd, 0); + of_ti_dpll_setup(node, &dpll_no_gate_ck_ops, &dd); } CLK_OF_DECLARE(ti_am3_no_gate_dpll_clock, "ti,am3-dpll-no-gate-clock", of_ti_am3_no_gate_dpll_setup); @@ -484,7 +541,6 @@ static void __init of_ti_am3_jtype_dpll_setup(struct device_node *node) const struct dpll_data dd = { .idlest_mask = 0x1, .enable_mask = 0x7, - .autoidle_mask = 0x7, .mult_mask = 0x7ff << 8, .div1_mask = 0x7f, .max_multiplier = 4095, @@ -494,7 +550,7 @@ static void __init of_ti_am3_jtype_dpll_setup(struct device_node *node) .modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED), }; - of_ti_dpll_setup(node, &dpll_ck_ops, &dd, 0); + of_ti_dpll_setup(node, &dpll_ck_ops, &dd); } CLK_OF_DECLARE(ti_am3_jtype_dpll_clock, "ti,am3-dpll-j-type-clock", of_ti_am3_jtype_dpll_setup); @@ -504,7 +560,6 @@ static void __init of_ti_am3_no_gate_jtype_dpll_setup(struct device_node *node) const struct dpll_data dd = { .idlest_mask = 0x1, .enable_mask = 0x7, - .autoidle_mask = 0x7, .mult_mask = 0x7ff << 8, .div1_mask = 0x7f, .max_multiplier = 2047, @@ -514,7 +569,7 @@ static void __init of_ti_am3_no_gate_jtype_dpll_setup(struct device_node *node) .modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED), }; - of_ti_dpll_setup(node, &dpll_no_gate_ck_ops, &dd, 0); + of_ti_dpll_setup(node, &dpll_no_gate_ck_ops, &dd); } CLK_OF_DECLARE(ti_am3_no_gate_jtype_dpll_clock, "ti,am3-dpll-no-gate-j-type-clock", @@ -525,7 +580,6 @@ static void __init of_ti_am3_dpll_setup(struct device_node *node) const struct dpll_data dd = { .idlest_mask = 0x1, .enable_mask = 0x7, - .autoidle_mask = 0x7, .mult_mask = 0x7ff << 8, .div1_mask = 0x7f, .max_multiplier = 2047, @@ -534,7 +588,7 @@ static void __init of_ti_am3_dpll_setup(struct device_node *node) .modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED), }; - of_ti_dpll_setup(node, &dpll_ck_ops, &dd, 0); + of_ti_dpll_setup(node, &dpll_ck_ops, &dd); } CLK_OF_DECLARE(ti_am3_dpll_clock, "ti,am3-dpll-clock", of_ti_am3_dpll_setup); @@ -543,7 +597,6 @@ static void __init of_ti_am3_core_dpll_setup(struct device_node *node) const struct dpll_data dd = { .idlest_mask = 0x1, .enable_mask = 0x7, - .autoidle_mask = 0x7, .mult_mask = 0x7ff << 8, .div1_mask = 0x7f, .max_multiplier = 2047, @@ -552,7 +605,22 @@ static void __init of_ti_am3_core_dpll_setup(struct device_node *node) .modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED), }; - of_ti_dpll_setup(node, &dpll_core_ck_ops, &dd, 0); + of_ti_dpll_setup(node, &dpll_core_ck_ops, &dd); } CLK_OF_DECLARE(ti_am3_core_dpll_clock, "ti,am3-dpll-core-clock", of_ti_am3_core_dpll_setup); + +static void __init of_ti_omap2_core_dpll_setup(struct device_node *node) +{ + const struct dpll_data dd = { + .enable_mask = 0x3, + .mult_mask = 0x3ff << 12, + .div1_mask = 0xf << 8, + .max_divider = 16, + .min_divider = 1, + }; + + of_ti_dpll_setup(node, &omap2_dpll_core_ck_ops, &dd); +} +CLK_OF_DECLARE(ti_omap2_core_dpll_clock, "ti,omap2-dpll-core-clock", + of_ti_omap2_core_dpll_setup); diff --git a/drivers/clk/ti/gate.c b/drivers/clk/ti/gate.c index 58734817d50..b326d2797fe 100644 --- a/drivers/clk/ti/gate.c +++ b/drivers/clk/ti/gate.c @@ -185,7 +185,7 @@ of_ti_composite_no_wait_gate_clk_setup(struct device_node *node) CLK_OF_DECLARE(ti_composite_no_wait_gate_clk, "ti,composite-no-wait-gate-clock", of_ti_composite_no_wait_gate_clk_setup); -#ifdef CONFIG_ARCH_OMAP3 +#if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3) static void __init of_ti_composite_interface_clk_setup(struct device_node *node) { _of_ti_composite_gate_clk_setup(node, &clkhwops_iclk_wait); diff --git a/drivers/clk/ti/interface.c b/drivers/clk/ti/interface.c index 320a2b168bb..9c3e8c4aaa4 100644 --- a/drivers/clk/ti/interface.c +++ b/drivers/clk/ti/interface.c @@ -94,6 +94,7 @@ static void __init of_ti_no_wait_interface_clk_setup(struct device_node *node) CLK_OF_DECLARE(ti_no_wait_interface_clk, "ti,omap3-no-wait-interface-clock", of_ti_no_wait_interface_clk_setup); +#ifdef CONFIG_ARCH_OMAP3 static void __init of_ti_hsotgusb_interface_clk_setup(struct device_node *node) { _of_ti_interface_clk_setup(node, @@ -123,3 +124,13 @@ static void __init of_ti_am35xx_interface_clk_setup(struct device_node *node) } CLK_OF_DECLARE(ti_am35xx_interface_clk, "ti,am35xx-interface-clock", of_ti_am35xx_interface_clk_setup); +#endif + +#ifdef CONFIG_SOC_OMAP2430 +static void __init of_ti_omap2430_interface_clk_setup(struct device_node *node) +{ + _of_ti_interface_clk_setup(node, &clkhwops_omap2430_i2chs_wait); +} +CLK_OF_DECLARE(ti_omap2430_interface_clk, "ti,omap2430-interface-clock", + of_ti_omap2430_interface_clk_setup); +#endif diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 719f6fb5b1c..74f5788d50b 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -73,12 +73,10 @@ static int fastsleep_loop(struct cpuidle_device *dev, return index; new_lpcr = old_lpcr; - new_lpcr &= ~(LPCR_MER | LPCR_PECE); /* lpcr[mer] must be 0 */ - - /* exit powersave upon external interrupt, but not decrementer - * interrupt. + /* Do not exit powersave upon decrementer as we've setup the timer + * offload. */ - new_lpcr |= LPCR_PECE0; + new_lpcr &= ~LPCR_PECE1; mtspr(SPRN_LPCR, new_lpcr); power7_sleep(); diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index 136d6a283e0..9634f20e392 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -187,8 +187,11 @@ static int poll_idle(struct cpuidle_device *dev, t1 = ktime_get(); local_irq_enable(); - while (!need_resched()) - cpu_relax(); + if (!current_set_polling_and_test()) { + while (!need_resched()) + cpu_relax(); + } + current_clr_polling(); t2 = ktime_get(); diff = ktime_to_us(ktime_sub(t2, t1)); diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index f066fa23cc0..02f177aeb16 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -313,7 +313,7 @@ config CRYPTO_DEV_S5P config CRYPTO_DEV_NX bool "Support for IBM Power7+ in-Nest cryptographic acceleration" - depends on PPC64 && IBMVIO + depends on PPC64 && IBMVIO && !CPU_LITTLE_ENDIAN default n help Support for Power7+ in-Nest cryptographic acceleration. diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index d9c9cb4665d..2ebc9071e35 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -2614,7 +2614,7 @@ static struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id, desc = of_get_named_gpiod_flags(dev->of_node, prop_name, idx, &of_flags); - if (!IS_ERR(desc)) + if (!IS_ERR(desc) || (PTR_ERR(desc) == -EPROBE_DEFER)) break; } diff --git a/drivers/hsi/clients/Kconfig b/drivers/hsi/clients/Kconfig index 71b9f9ab86e..bc60dec3f58 100644 --- a/drivers/hsi/clients/Kconfig +++ b/drivers/hsi/clients/Kconfig @@ -15,7 +15,7 @@ config NOKIA_MODEM config SSI_PROTOCOL tristate "SSI protocol" - depends on HSI && PHONET && (OMAP_SSI=y || OMAP_SSI=m) + depends on HSI && PHONET && OMAP_SSI help If you say Y here, you will enable the SSI protocol aka McSAAB. diff --git a/drivers/hsi/controllers/omap_ssi_port.c b/drivers/hsi/controllers/omap_ssi_port.c index b8693f0b27f..29aea0b9336 100644 --- a/drivers/hsi/controllers/omap_ssi_port.c +++ b/drivers/hsi/controllers/omap_ssi_port.c @@ -1116,8 +1116,7 @@ static int __init ssi_port_probe(struct platform_device *pd) dev_dbg(&pd->dev, "init ssi port...\n"); - err = ref_module(THIS_MODULE, ssi->owner); - if (err) { + if (!try_module_get(ssi->owner)) { dev_err(&pd->dev, "could not increment parent module refcount (err=%d)\n", err); return -ENODEV; @@ -1254,6 +1253,7 @@ static int __exit ssi_port_remove(struct platform_device *pd) omap_ssi->port[omap_port->port_id] = NULL; platform_set_drvdata(pd, NULL); + module_put(ssi->owner); pm_runtime_disable(&pd->dev); return 0; diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 00343166feb..08531a128f5 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -1124,6 +1124,16 @@ config SENSORS_SHT21 This driver can also be built as a module. If so, the module will be called sht21. +config SENSORS_SHTC1 + tristate "Sensiron humidity and temperature sensors. SHTC1 and compat." + depends on I2C + help + If you say yes here you get support for the Sensiron SHTC1 and SHTW1 + humidity and temperature sensors. + + This driver can also be built as a module. If so, the module + will be called shtc1. + config SENSORS_S3C tristate "Samsung built-in ADC" depends on S3C_ADC diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile index 11798ad7e80..3dc0f02f71d 100644 --- a/drivers/hwmon/Makefile +++ b/drivers/hwmon/Makefile @@ -126,6 +126,7 @@ obj-$(CONFIG_SENSORS_SCH5627) += sch5627.o obj-$(CONFIG_SENSORS_SCH5636) += sch5636.o obj-$(CONFIG_SENSORS_SHT15) += sht15.o obj-$(CONFIG_SENSORS_SHT21) += sht21.o +obj-$(CONFIG_SENSORS_SHTC1) += shtc1.o obj-$(CONFIG_SENSORS_SIS5595) += sis5595.o obj-$(CONFIG_SENSORS_SMM665) += smm665.o obj-$(CONFIG_SENSORS_SMSC47B397)+= smsc47b397.o diff --git a/drivers/hwmon/atxp1.c b/drivers/hwmon/atxp1.c index 6edce42c61d..2ae8a304b5e 100644 --- a/drivers/hwmon/atxp1.c +++ b/drivers/hwmon/atxp1.c @@ -45,30 +45,6 @@ MODULE_AUTHOR("Sebastian Witt <se.witt@gmx.net>"); static const unsigned short normal_i2c[] = { 0x37, 0x4e, I2C_CLIENT_END }; -static int atxp1_probe(struct i2c_client *client, - const struct i2c_device_id *id); -static int atxp1_remove(struct i2c_client *client); -static struct atxp1_data *atxp1_update_device(struct device *dev); -static int atxp1_detect(struct i2c_client *client, struct i2c_board_info *info); - -static const struct i2c_device_id atxp1_id[] = { - { "atxp1", 0 }, - { } -}; -MODULE_DEVICE_TABLE(i2c, atxp1_id); - -static struct i2c_driver atxp1_driver = { - .class = I2C_CLASS_HWMON, - .driver = { - .name = "atxp1", - }, - .probe = atxp1_probe, - .remove = atxp1_remove, - .id_table = atxp1_id, - .detect = atxp1_detect, - .address_list = normal_i2c, -}; - struct atxp1_data { struct device *hwmon_dev; struct mutex update_lock; @@ -386,4 +362,22 @@ static int atxp1_remove(struct i2c_client *client) return 0; }; +static const struct i2c_device_id atxp1_id[] = { + { "atxp1", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, atxp1_id); + +static struct i2c_driver atxp1_driver = { + .class = I2C_CLASS_HWMON, + .driver = { + .name = "atxp1", + }, + .probe = atxp1_probe, + .remove = atxp1_remove, + .id_table = atxp1_id, + .detect = atxp1_detect, + .address_list = normal_i2c, +}; + module_i2c_driver(atxp1_driver); diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c index 93d26e8af3e..bfd3f3eeabc 100644 --- a/drivers/hwmon/ina2xx.c +++ b/drivers/hwmon/ina2xx.c @@ -148,7 +148,8 @@ static int ina2xx_get_value(struct ina2xx_data *data, u8 reg) switch (reg) { case INA2XX_SHUNT_VOLTAGE: - val = DIV_ROUND_CLOSEST(data->regs[reg], + /* signed register */ + val = DIV_ROUND_CLOSEST((s16)data->regs[reg], data->config->shunt_div); break; case INA2XX_BUS_VOLTAGE: @@ -160,8 +161,8 @@ static int ina2xx_get_value(struct ina2xx_data *data, u8 reg) val = data->regs[reg] * data->config->power_lsb; break; case INA2XX_CURRENT: - /* LSB=1mA (selected). Is in mA */ - val = data->regs[reg]; + /* signed register, LSB=1mA (selected), in mA */ + val = (s16)data->regs[reg]; break; default: /* programmer goofed */ diff --git a/drivers/hwmon/lm85.c b/drivers/hwmon/lm85.c index bed4af35830..b0129a54e1a 100644 --- a/drivers/hwmon/lm85.c +++ b/drivers/hwmon/lm85.c @@ -5,7 +5,7 @@ * Copyright (c) 2002, 2003 Philip Pokorny <ppokorny@penguincomputing.com> * Copyright (c) 2003 Margit Schubert-While <margitsw@t-online.de> * Copyright (c) 2004 Justin Thiessen <jthiessen@penguincomputing.com> - * Copyright (C) 2007--2009 Jean Delvare <jdelvare@suse.de> + * Copyright (C) 2007--2014 Jean Delvare <jdelvare@suse.de> * * Chip details at <http://www.national.com/ds/LM/LM85.pdf> * @@ -39,7 +39,7 @@ static const unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END }; enum chips { - any_chip, lm85b, lm85c, + lm85, adm1027, adt7463, adt7468, emc6d100, emc6d102, emc6d103, emc6d103s }; @@ -75,9 +75,6 @@ enum chips { #define LM85_COMPANY_NATIONAL 0x01 #define LM85_COMPANY_ANALOG_DEV 0x41 #define LM85_COMPANY_SMSC 0x5c -#define LM85_VERSTEP_VMASK 0xf0 -#define LM85_VERSTEP_GENERIC 0x60 -#define LM85_VERSTEP_GENERIC2 0x70 #define LM85_VERSTEP_LM85C 0x60 #define LM85_VERSTEP_LM85B 0x62 #define LM85_VERSTEP_LM96000_1 0x68 @@ -351,9 +348,9 @@ static const struct i2c_device_id lm85_id[] = { { "adm1027", adm1027 }, { "adt7463", adt7463 }, { "adt7468", adt7468 }, - { "lm85", any_chip }, - { "lm85b", lm85b }, - { "lm85c", lm85c }, + { "lm85", lm85 }, + { "lm85b", lm85 }, + { "lm85c", lm85 }, { "emc6d100", emc6d100 }, { "emc6d101", emc6d100 }, { "emc6d102", emc6d102 }, @@ -1281,7 +1278,7 @@ static int lm85_detect(struct i2c_client *client, struct i2c_board_info *info) { struct i2c_adapter *adapter = client->adapter; int address = client->addr; - const char *type_name; + const char *type_name = NULL; int company, verstep; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) { @@ -1297,16 +1294,6 @@ static int lm85_detect(struct i2c_client *client, struct i2c_board_info *info) "Detecting device at 0x%02x with COMPANY: 0x%02x and VERSTEP: 0x%02x\n", address, company, verstep); - /* All supported chips have the version in common */ - if ((verstep & LM85_VERSTEP_VMASK) != LM85_VERSTEP_GENERIC && - (verstep & LM85_VERSTEP_VMASK) != LM85_VERSTEP_GENERIC2) { - dev_dbg(&adapter->dev, - "Autodetection failed: unsupported version\n"); - return -ENODEV; - } - type_name = "lm85"; - - /* Now, refine the detection */ if (company == LM85_COMPANY_NATIONAL) { switch (verstep) { case LM85_VERSTEP_LM85C: @@ -1323,6 +1310,7 @@ static int lm85_detect(struct i2c_client *client, struct i2c_board_info *info) "Found Winbond WPCD377I, ignoring\n"); return -ENODEV; } + type_name = "lm85"; break; } } else if (company == LM85_COMPANY_ANALOG_DEV) { @@ -1357,12 +1345,11 @@ static int lm85_detect(struct i2c_client *client, struct i2c_board_info *info) type_name = "emc6d103s"; break; } - } else { - dev_dbg(&adapter->dev, - "Autodetection failed: unknown vendor\n"); - return -ENODEV; } + if (!type_name) + return -ENODEV; + strlcpy(info->type, type_name, I2C_NAME_SIZE); return 0; diff --git a/drivers/hwmon/ltc4151.c b/drivers/hwmon/ltc4151.c index af81be1237c..c86a1840249 100644 --- a/drivers/hwmon/ltc4151.c +++ b/drivers/hwmon/ltc4151.c @@ -47,7 +47,7 @@ #define LTC4151_ADIN_L 0x05 struct ltc4151_data { - struct device *hwmon_dev; + struct i2c_client *client; struct mutex update_lock; bool valid; @@ -59,8 +59,8 @@ struct ltc4151_data { static struct ltc4151_data *ltc4151_update_device(struct device *dev) { - struct i2c_client *client = to_i2c_client(dev); - struct ltc4151_data *data = i2c_get_clientdata(client); + struct ltc4151_data *data = dev_get_drvdata(dev); + struct i2c_client *client = data->client; struct ltc4151_data *ret = data; mutex_lock(&data->update_lock); @@ -159,7 +159,7 @@ static SENSOR_DEVICE_ATTR(curr1_input, S_IRUGO, ltc4151_show_value, NULL, * Finally, construct an array of pointers to members of the above objects, * as required for sysfs_create_group() */ -static struct attribute *ltc4151_attributes[] = { +static struct attribute *ltc4151_attrs[] = { &sensor_dev_attr_in1_input.dev_attr.attr, &sensor_dev_attr_in2_input.dev_attr.attr, @@ -167,54 +167,30 @@ static struct attribute *ltc4151_attributes[] = { NULL, }; - -static const struct attribute_group ltc4151_group = { - .attrs = ltc4151_attributes, -}; +ATTRIBUTE_GROUPS(ltc4151); static int ltc4151_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct i2c_adapter *adapter = client->adapter; + struct device *dev = &client->dev; struct ltc4151_data *data; - int ret; + struct device *hwmon_dev; if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) return -ENODEV; - data = devm_kzalloc(&client->dev, sizeof(*data), GFP_KERNEL); + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; - i2c_set_clientdata(client, data); + data->client = client; mutex_init(&data->update_lock); - /* Register sysfs hooks */ - ret = sysfs_create_group(&client->dev.kobj, <c4151_group); - if (ret) - return ret; - - data->hwmon_dev = hwmon_device_register(&client->dev); - if (IS_ERR(data->hwmon_dev)) { - ret = PTR_ERR(data->hwmon_dev); - goto out_hwmon_device_register; - } - - return 0; - -out_hwmon_device_register: - sysfs_remove_group(&client->dev.kobj, <c4151_group); - return ret; -} - -static int ltc4151_remove(struct i2c_client *client) -{ - struct ltc4151_data *data = i2c_get_clientdata(client); - - hwmon_device_unregister(data->hwmon_dev); - sysfs_remove_group(&client->dev.kobj, <c4151_group); - - return 0; + hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name, + data, + ltc4151_groups); + return PTR_ERR_OR_ZERO(hwmon_dev); } static const struct i2c_device_id ltc4151_id[] = { @@ -229,7 +205,6 @@ static struct i2c_driver ltc4151_driver = { .name = "ltc4151", }, .probe = ltc4151_probe, - .remove = ltc4151_remove, .id_table = ltc4151_id, }; diff --git a/drivers/hwmon/shtc1.c b/drivers/hwmon/shtc1.c new file mode 100644 index 00000000000..decd7df995a --- /dev/null +++ b/drivers/hwmon/shtc1.c @@ -0,0 +1,251 @@ +/* Sensirion SHTC1 humidity and temperature sensor driver + * + * Copyright (C) 2014 Sensirion AG, Switzerland + * Author: Johannes Winkelmann <johannes.winkelmann@sensirion.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/i2c.h> +#include <linux/hwmon.h> +#include <linux/hwmon-sysfs.h> +#include <linux/err.h> +#include <linux/delay.h> +#include <linux/platform_data/shtc1.h> + +/* commands (high precision mode) */ +static const unsigned char shtc1_cmd_measure_blocking_hpm[] = { 0x7C, 0xA2 }; +static const unsigned char shtc1_cmd_measure_nonblocking_hpm[] = { 0x78, 0x66 }; + +/* commands (low precision mode) */ +static const unsigned char shtc1_cmd_measure_blocking_lpm[] = { 0x64, 0x58 }; +static const unsigned char shtc1_cmd_measure_nonblocking_lpm[] = { 0x60, 0x9c }; + +/* command for reading the ID register */ +static const unsigned char shtc1_cmd_read_id_reg[] = { 0xef, 0xc8 }; + +/* constants for reading the ID register */ +#define SHTC1_ID 0x07 +#define SHTC1_ID_REG_MASK 0x1f + +/* delays for non-blocking i2c commands, both in us */ +#define SHTC1_NONBLOCKING_WAIT_TIME_HPM 14400 +#define SHTC1_NONBLOCKING_WAIT_TIME_LPM 1000 + +#define SHTC1_CMD_LENGTH 2 +#define SHTC1_RESPONSE_LENGTH 6 + +struct shtc1_data { + struct i2c_client *client; + struct mutex update_lock; + bool valid; + unsigned long last_updated; /* in jiffies */ + + const unsigned char *command; + unsigned int nonblocking_wait_time; /* in us */ + + struct shtc1_platform_data setup; + + int temperature; /* 1000 * temperature in dgr C */ + int humidity; /* 1000 * relative humidity in %RH */ +}; + +static int shtc1_update_values(struct i2c_client *client, + struct shtc1_data *data, + char *buf, int bufsize) +{ + int ret = i2c_master_send(client, data->command, SHTC1_CMD_LENGTH); + if (ret != SHTC1_CMD_LENGTH) { + dev_err(&client->dev, "failed to send command: %d\n", ret); + return ret < 0 ? ret : -EIO; + } + + /* + * In blocking mode (clock stretching mode) the I2C bus + * is blocked for other traffic, thus the call to i2c_master_recv() + * will wait until the data is ready. For non blocking mode, we + * have to wait ourselves. + */ + if (!data->setup.blocking_io) + usleep_range(data->nonblocking_wait_time, + data->nonblocking_wait_time + 1000); + + ret = i2c_master_recv(client, buf, bufsize); + if (ret != bufsize) { + dev_err(&client->dev, "failed to read values: %d\n", ret); + return ret < 0 ? ret : -EIO; + } + + return 0; +} + +/* sysfs attributes */ +static struct shtc1_data *shtc1_update_client(struct device *dev) +{ + struct shtc1_data *data = dev_get_drvdata(dev); + struct i2c_client *client = data->client; + unsigned char buf[SHTC1_RESPONSE_LENGTH]; + int val; + int ret = 0; + + mutex_lock(&data->update_lock); + + if (time_after(jiffies, data->last_updated + HZ / 10) || !data->valid) { + ret = shtc1_update_values(client, data, buf, sizeof(buf)); + if (ret) + goto out; + + /* + * From datasheet: + * T = -45 + 175 * ST / 2^16 + * RH = 100 * SRH / 2^16 + * + * Adapted for integer fixed point (3 digit) arithmetic. + */ + val = be16_to_cpup((__be16 *)buf); + data->temperature = ((21875 * val) >> 13) - 45000; + val = be16_to_cpup((__be16 *)(buf + 3)); + data->humidity = ((12500 * val) >> 13); + + data->last_updated = jiffies; + data->valid = true; + } + +out: + mutex_unlock(&data->update_lock); + + return ret == 0 ? data : ERR_PTR(ret); +} + +static ssize_t temp1_input_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct shtc1_data *data = shtc1_update_client(dev); + if (IS_ERR(data)) + return PTR_ERR(data); + + return sprintf(buf, "%d\n", data->temperature); +} + +static ssize_t humidity1_input_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct shtc1_data *data = shtc1_update_client(dev); + if (IS_ERR(data)) + return PTR_ERR(data); + + return sprintf(buf, "%d\n", data->humidity); +} + +static DEVICE_ATTR_RO(temp1_input); +static DEVICE_ATTR_RO(humidity1_input); + +static struct attribute *shtc1_attrs[] = { + &dev_attr_temp1_input.attr, + &dev_attr_humidity1_input.attr, + NULL +}; + +ATTRIBUTE_GROUPS(shtc1); + +static void shtc1_select_command(struct shtc1_data *data) +{ + if (data->setup.high_precision) { + data->command = data->setup.blocking_io ? + shtc1_cmd_measure_blocking_hpm : + shtc1_cmd_measure_nonblocking_hpm; + data->nonblocking_wait_time = SHTC1_NONBLOCKING_WAIT_TIME_HPM; + + } else { + data->command = data->setup.blocking_io ? + shtc1_cmd_measure_blocking_lpm : + shtc1_cmd_measure_nonblocking_lpm; + data->nonblocking_wait_time = SHTC1_NONBLOCKING_WAIT_TIME_LPM; + } +} + +static int shtc1_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + int ret; + char id_reg[2]; + struct shtc1_data *data; + struct device *hwmon_dev; + struct i2c_adapter *adap = client->adapter; + struct device *dev = &client->dev; + + if (!i2c_check_functionality(adap, I2C_FUNC_I2C)) { + dev_err(dev, "plain i2c transactions not supported\n"); + return -ENODEV; + } + + ret = i2c_master_send(client, shtc1_cmd_read_id_reg, SHTC1_CMD_LENGTH); + if (ret != SHTC1_CMD_LENGTH) { + dev_err(dev, "could not send read_id_reg command: %d\n", ret); + return ret < 0 ? ret : -ENODEV; + } + ret = i2c_master_recv(client, id_reg, sizeof(id_reg)); + if (ret != sizeof(id_reg)) { + dev_err(dev, "could not read ID register: %d\n", ret); + return -ENODEV; + } + if ((id_reg[1] & SHTC1_ID_REG_MASK) != SHTC1_ID) { + dev_err(dev, "ID register doesn't match\n"); + return -ENODEV; + } + + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->setup.blocking_io = false; + data->setup.high_precision = true; + data->client = client; + + if (client->dev.platform_data) + data->setup = *(struct shtc1_platform_data *)dev->platform_data; + shtc1_select_command(data); + mutex_init(&data->update_lock); + + hwmon_dev = devm_hwmon_device_register_with_groups(dev, + client->name, + data, + shtc1_groups); + if (IS_ERR(hwmon_dev)) + dev_dbg(dev, "unable to register hwmon device\n"); + + return PTR_ERR_OR_ZERO(hwmon_dev); +} + +/* device ID table */ +static const struct i2c_device_id shtc1_id[] = { + { "shtc1", 0 }, + { "shtw1", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, shtc1_id); + +static struct i2c_driver shtc1_i2c_driver = { + .driver.name = "shtc1", + .probe = shtc1_probe, + .id_table = shtc1_id, +}; + +module_i2c_driver(shtc1_i2c_driver); + +MODULE_AUTHOR("Johannes Winkelmann <johannes.winkelmann@sensirion.com>"); +MODULE_DESCRIPTION("Sensirion SHTC1 humidity and temperature sensor driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/hwmon/vexpress.c b/drivers/hwmon/vexpress.c index 611f34c7333..c53619086f3 100644 --- a/drivers/hwmon/vexpress.c +++ b/drivers/hwmon/vexpress.c @@ -27,17 +27,8 @@ struct vexpress_hwmon_data { struct device *hwmon_dev; struct regmap *reg; - const char *name; }; -static ssize_t vexpress_hwmon_name_show(struct device *dev, - struct device_attribute *dev_attr, char *buffer) -{ - struct vexpress_hwmon_data *data = dev_get_drvdata(dev); - - return sprintf(buffer, "%s\n", data->name); -} - static ssize_t vexpress_hwmon_label_show(struct device *dev, struct device_attribute *dev_attr, char *buffer) { @@ -95,16 +86,6 @@ static umode_t vexpress_hwmon_attr_is_visible(struct kobject *kobj, return attr->mode; } -static DEVICE_ATTR(name, S_IRUGO, vexpress_hwmon_name_show, NULL); - -#define VEXPRESS_HWMON_ATTRS(_name, _label_attr, _input_attr) \ -struct attribute *vexpress_hwmon_attrs_##_name[] = { \ - &dev_attr_name.attr, \ - &dev_attr_##_label_attr.attr, \ - &sensor_dev_attr_##_input_attr.dev_attr.attr, \ - NULL \ -} - struct vexpress_hwmon_type { const char *name; const struct attribute_group **attr_groups; @@ -114,7 +95,11 @@ struct vexpress_hwmon_type { static DEVICE_ATTR(in1_label, S_IRUGO, vexpress_hwmon_label_show, NULL); static SENSOR_DEVICE_ATTR(in1_input, S_IRUGO, vexpress_hwmon_u32_show, NULL, 1000); -static VEXPRESS_HWMON_ATTRS(volt, in1_label, in1_input); +static struct attribute *vexpress_hwmon_attrs_volt[] = { + &dev_attr_in1_label.attr, + &sensor_dev_attr_in1_input.dev_attr.attr, + NULL +}; static struct attribute_group vexpress_hwmon_group_volt = { .is_visible = vexpress_hwmon_attr_is_visible, .attrs = vexpress_hwmon_attrs_volt, @@ -131,7 +116,11 @@ static struct vexpress_hwmon_type vexpress_hwmon_volt = { static DEVICE_ATTR(curr1_label, S_IRUGO, vexpress_hwmon_label_show, NULL); static SENSOR_DEVICE_ATTR(curr1_input, S_IRUGO, vexpress_hwmon_u32_show, NULL, 1000); -static VEXPRESS_HWMON_ATTRS(amp, curr1_label, curr1_input); +static struct attribute *vexpress_hwmon_attrs_amp[] = { + &dev_attr_curr1_label.attr, + &sensor_dev_attr_curr1_input.dev_attr.attr, + NULL +}; static struct attribute_group vexpress_hwmon_group_amp = { .is_visible = vexpress_hwmon_attr_is_visible, .attrs = vexpress_hwmon_attrs_amp, @@ -147,7 +136,11 @@ static struct vexpress_hwmon_type vexpress_hwmon_amp = { static DEVICE_ATTR(temp1_label, S_IRUGO, vexpress_hwmon_label_show, NULL); static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, vexpress_hwmon_u32_show, NULL, 1000); -static VEXPRESS_HWMON_ATTRS(temp, temp1_label, temp1_input); +static struct attribute *vexpress_hwmon_attrs_temp[] = { + &dev_attr_temp1_label.attr, + &sensor_dev_attr_temp1_input.dev_attr.attr, + NULL +}; static struct attribute_group vexpress_hwmon_group_temp = { .is_visible = vexpress_hwmon_attr_is_visible, .attrs = vexpress_hwmon_attrs_temp, @@ -163,7 +156,11 @@ static struct vexpress_hwmon_type vexpress_hwmon_temp = { static DEVICE_ATTR(power1_label, S_IRUGO, vexpress_hwmon_label_show, NULL); static SENSOR_DEVICE_ATTR(power1_input, S_IRUGO, vexpress_hwmon_u32_show, NULL, 1); -static VEXPRESS_HWMON_ATTRS(power, power1_label, power1_input); +static struct attribute *vexpress_hwmon_attrs_power[] = { + &dev_attr_power1_label.attr, + &sensor_dev_attr_power1_input.dev_attr.attr, + NULL +}; static struct attribute_group vexpress_hwmon_group_power = { .is_visible = vexpress_hwmon_attr_is_visible, .attrs = vexpress_hwmon_attrs_power, @@ -179,7 +176,11 @@ static struct vexpress_hwmon_type vexpress_hwmon_power = { static DEVICE_ATTR(energy1_label, S_IRUGO, vexpress_hwmon_label_show, NULL); static SENSOR_DEVICE_ATTR(energy1_input, S_IRUGO, vexpress_hwmon_u64_show, NULL, 1); -static VEXPRESS_HWMON_ATTRS(energy, energy1_label, energy1_input); +static struct attribute *vexpress_hwmon_attrs_energy[] = { + &dev_attr_energy1_label.attr, + &sensor_dev_attr_energy1_input.dev_attr.attr, + NULL +}; static struct attribute_group vexpress_hwmon_group_energy = { .is_visible = vexpress_hwmon_attr_is_visible, .attrs = vexpress_hwmon_attrs_energy, @@ -218,7 +219,6 @@ MODULE_DEVICE_TABLE(of, vexpress_hwmon_of_match); static int vexpress_hwmon_probe(struct platform_device *pdev) { - int err; const struct of_device_id *match; struct vexpress_hwmon_data *data; const struct vexpress_hwmon_type *type; @@ -232,45 +232,19 @@ static int vexpress_hwmon_probe(struct platform_device *pdev) if (!match) return -ENODEV; type = match->data; - data->name = type->name; data->reg = devm_regmap_init_vexpress_config(&pdev->dev); if (IS_ERR(data->reg)) return PTR_ERR(data->reg); - err = sysfs_create_groups(&pdev->dev.kobj, type->attr_groups); - if (err) - goto error; - - data->hwmon_dev = hwmon_device_register(&pdev->dev); - if (IS_ERR(data->hwmon_dev)) { - err = PTR_ERR(data->hwmon_dev); - goto error; - } - - return 0; - -error: - sysfs_remove_group(&pdev->dev.kobj, match->data); - return err; -} - -static int vexpress_hwmon_remove(struct platform_device *pdev) -{ - struct vexpress_hwmon_data *data = platform_get_drvdata(pdev); - const struct of_device_id *match; - - hwmon_device_unregister(data->hwmon_dev); - - match = of_match_device(vexpress_hwmon_of_match, &pdev->dev); - sysfs_remove_group(&pdev->dev.kobj, match->data); + data->hwmon_dev = devm_hwmon_device_register_with_groups(&pdev->dev, + type->name, data, type->attr_groups); - return 0; + return PTR_ERR_OR_ZERO(data->hwmon_dev); } static struct platform_driver vexpress_hwmon_driver = { .probe = vexpress_hwmon_probe, - .remove = vexpress_hwmon_remove, .driver = { .name = DRVNAME, .owner = THIS_MODULE, diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 2e2d903db83..8d44a406063 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -41,11 +41,11 @@ #include "iscsi_iser.h" /* Register user buffer memory and initialize passive rdma - * dto descriptor. Total data size is stored in - * iser_task->data[ISER_DIR_IN].data_len + * dto descriptor. Data size is stored in + * task->data[ISER_DIR_IN].data_len, Protection size + * os stored in task->prot[ISER_DIR_IN].data_len */ -static int iser_prepare_read_cmd(struct iscsi_task *task, - unsigned int edtl) +static int iser_prepare_read_cmd(struct iscsi_task *task) { struct iscsi_iser_task *iser_task = task->dd_data; @@ -73,14 +73,6 @@ static int iser_prepare_read_cmd(struct iscsi_task *task, return err; } - if (edtl > iser_task->data[ISER_DIR_IN].data_len) { - iser_err("Total data length: %ld, less than EDTL: " - "%d, in READ cmd BHS itt: %d, conn: 0x%p\n", - iser_task->data[ISER_DIR_IN].data_len, edtl, - task->itt, iser_task->ib_conn); - return -EINVAL; - } - err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_IN); if (err) { iser_err("Failed to set up Data-IN RDMA\n"); @@ -100,8 +92,9 @@ static int iser_prepare_read_cmd(struct iscsi_task *task, } /* Register user buffer memory and initialize passive rdma - * dto descriptor. Total data size is stored in - * task->data[ISER_DIR_OUT].data_len + * dto descriptor. Data size is stored in + * task->data[ISER_DIR_OUT].data_len, Protection size + * is stored at task->prot[ISER_DIR_OUT].data_len */ static int iser_prepare_write_cmd(struct iscsi_task *task, @@ -135,14 +128,6 @@ iser_prepare_write_cmd(struct iscsi_task *task, return err; } - if (edtl > iser_task->data[ISER_DIR_OUT].data_len) { - iser_err("Total data length: %ld, less than EDTL: %d, " - "in WRITE cmd BHS itt: %d, conn: 0x%p\n", - iser_task->data[ISER_DIR_OUT].data_len, - edtl, task->itt, task->conn); - return -EINVAL; - } - err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_OUT); if (err != 0) { iser_err("Failed to register write cmd RDMA mem\n"); @@ -417,11 +402,12 @@ int iser_send_command(struct iscsi_conn *conn, if (scsi_prot_sg_count(sc)) { prot_buf->buf = scsi_prot_sglist(sc); prot_buf->size = scsi_prot_sg_count(sc); - prot_buf->data_len = sc->prot_sdb->length; + prot_buf->data_len = data_buf->data_len >> + ilog2(sc->device->sector_size) * 8; } if (hdr->flags & ISCSI_FLAG_CMD_READ) { - err = iser_prepare_read_cmd(task, edtl); + err = iser_prepare_read_cmd(task); if (err) goto send_command_error; } diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index b9d647468b9..d4c7928a0f3 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -663,8 +663,9 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) pi_support = np->tpg_np->tpg->tpg_attrib.t10_pi; if (pi_support && !device->pi_capable) { - pr_err("Protection information requested but not supported\n"); - ret = -EINVAL; + pr_err("Protection information requested but not supported, " + "rejecting connect request\n"); + ret = rdma_reject(cma_id, NULL, 0); goto out_mr; } @@ -787,14 +788,12 @@ isert_disconnect_work(struct work_struct *work) isert_put_conn(isert_conn); return; } - if (!isert_conn->logout_posted) { - pr_debug("Calling rdma_disconnect for !logout_posted from" - " isert_disconnect_work\n"); + + if (isert_conn->disconnect) { + /* Send DREQ/DREP towards our initiator */ rdma_disconnect(isert_conn->conn_cm_id); - mutex_unlock(&isert_conn->conn_mutex); - iscsit_cause_connection_reinstatement(isert_conn->conn, 0); - goto wake_up; } + mutex_unlock(&isert_conn->conn_mutex); wake_up: @@ -803,10 +802,11 @@ wake_up: } static void -isert_disconnected_handler(struct rdma_cm_id *cma_id) +isert_disconnected_handler(struct rdma_cm_id *cma_id, bool disconnect) { struct isert_conn *isert_conn = (struct isert_conn *)cma_id->context; + isert_conn->disconnect = disconnect; INIT_WORK(&isert_conn->conn_logout_work, isert_disconnect_work); schedule_work(&isert_conn->conn_logout_work); } @@ -815,29 +815,28 @@ static int isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { int ret = 0; + bool disconnect = false; pr_debug("isert_cma_handler: event %d status %d conn %p id %p\n", event->event, event->status, cma_id->context, cma_id); switch (event->event) { case RDMA_CM_EVENT_CONNECT_REQUEST: - pr_debug("RDMA_CM_EVENT_CONNECT_REQUEST: >>>>>>>>>>>>>>>\n"); ret = isert_connect_request(cma_id, event); break; case RDMA_CM_EVENT_ESTABLISHED: - pr_debug("RDMA_CM_EVENT_ESTABLISHED >>>>>>>>>>>>>>\n"); isert_connected_handler(cma_id); break; - case RDMA_CM_EVENT_DISCONNECTED: - pr_debug("RDMA_CM_EVENT_DISCONNECTED: >>>>>>>>>>>>>>\n"); - isert_disconnected_handler(cma_id); - break; - case RDMA_CM_EVENT_DEVICE_REMOVAL: - case RDMA_CM_EVENT_ADDR_CHANGE: + case RDMA_CM_EVENT_ADDR_CHANGE: /* FALLTHRU */ + case RDMA_CM_EVENT_DISCONNECTED: /* FALLTHRU */ + case RDMA_CM_EVENT_DEVICE_REMOVAL: /* FALLTHRU */ + disconnect = true; + case RDMA_CM_EVENT_TIMEWAIT_EXIT: /* FALLTHRU */ + isert_disconnected_handler(cma_id, disconnect); break; case RDMA_CM_EVENT_CONNECT_ERROR: default: - pr_err("Unknown RDMA CMA event: %d\n", event->event); + pr_err("Unhandled RDMA CMA event: %d\n", event->event); break; } @@ -1054,7 +1053,9 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login, } if (!login->login_failed) { if (login->login_complete) { - if (isert_conn->conn_device->use_fastreg) { + if (!conn->sess->sess_ops->SessionType && + isert_conn->conn_device->use_fastreg) { + /* Normal Session and fastreg is used */ u8 pi_support = login->np->tpg_np->tpg->tpg_attrib.t10_pi; ret = isert_conn_create_fastreg_pool(isert_conn, @@ -1824,11 +1825,8 @@ isert_do_control_comp(struct work_struct *work) break; case ISTATE_SEND_LOGOUTRSP: pr_debug("Calling iscsit_logout_post_handler >>>>>>>>>>>>>>\n"); - /* - * Call atomic_dec(&isert_conn->post_send_buf_count) - * from isert_wait_conn() - */ - isert_conn->logout_posted = true; + + atomic_dec(&isert_conn->post_send_buf_count); iscsit_logout_post_handler(cmd, cmd->conn); break; case ISTATE_SEND_TEXTRSP: @@ -2034,6 +2032,8 @@ isert_cq_rx_comp_err(struct isert_conn *isert_conn) isert_conn->state = ISER_CONN_DOWN; mutex_unlock(&isert_conn->conn_mutex); + iscsit_cause_connection_reinstatement(isert_conn->conn, 0); + complete(&isert_conn->conn_wait_comp_err); } @@ -2320,7 +2320,7 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) int rc; isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc); - rc = iscsit_build_text_rsp(cmd, conn, hdr); + rc = iscsit_build_text_rsp(cmd, conn, hdr, ISCSI_INFINIBAND); if (rc < 0) return rc; @@ -3156,9 +3156,14 @@ accept_wait: return -ENODEV; spin_lock_bh(&np->np_thread_lock); - if (np->np_thread_state == ISCSI_NP_THREAD_RESET) { + if (np->np_thread_state >= ISCSI_NP_THREAD_RESET) { spin_unlock_bh(&np->np_thread_lock); - pr_debug("ISCSI_NP_THREAD_RESET for isert_accept_np\n"); + pr_debug("np_thread_state %d for isert_accept_np\n", + np->np_thread_state); + /** + * No point in stalling here when np_thread + * is in state RESET/SHUTDOWN/EXIT - bail + **/ return -ENODEV; } spin_unlock_bh(&np->np_thread_lock); @@ -3208,15 +3213,9 @@ static void isert_wait_conn(struct iscsi_conn *conn) struct isert_conn *isert_conn = conn->context; pr_debug("isert_wait_conn: Starting \n"); - /* - * Decrement post_send_buf_count for special case when called - * from isert_do_control_comp() -> iscsit_logout_post_handler() - */ - mutex_lock(&isert_conn->conn_mutex); - if (isert_conn->logout_posted) - atomic_dec(&isert_conn->post_send_buf_count); - if (isert_conn->conn_cm_id && isert_conn->state != ISER_CONN_DOWN) { + mutex_lock(&isert_conn->conn_mutex); + if (isert_conn->conn_cm_id) { pr_debug("Calling rdma_disconnect from isert_wait_conn\n"); rdma_disconnect(isert_conn->conn_cm_id); } @@ -3293,6 +3292,7 @@ destroy_rx_wq: static void __exit isert_exit(void) { + flush_scheduled_work(); destroy_workqueue(isert_comp_wq); destroy_workqueue(isert_rx_wq); iscsit_unregister_transport(&iser_target_transport); diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index da6612e6800..04f51f7bf61 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -116,7 +116,6 @@ struct isert_device; struct isert_conn { enum iser_conn_state state; - bool logout_posted; int post_recv_buf_count; atomic_t post_send_buf_count; u32 responder_resources; @@ -151,6 +150,7 @@ struct isert_conn { #define ISERT_COMP_BATCH_COUNT 8 int conn_comp_batch; struct llist_head conn_comp_llist; + bool disconnect; }; #define ISERT_MAX_CQ 64 diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig index 20f1655e6d7..8108c698b54 100644 --- a/drivers/media/platform/Kconfig +++ b/drivers/media/platform/Kconfig @@ -93,7 +93,9 @@ config VIDEO_M32R_AR_M64278 config VIDEO_OMAP3 tristate "OMAP 3 Camera support" - depends on OMAP_IOVMM && VIDEO_V4L2 && I2C && VIDEO_V4L2_SUBDEV_API && ARCH_OMAP3 + depends on VIDEO_V4L2 && I2C && VIDEO_V4L2_SUBDEV_API && ARCH_OMAP3 + select ARM_DMA_USE_IOMMU + select OMAP_IOMMU ---help--- Driver for an OMAP 3 camera controller. diff --git a/drivers/media/platform/omap3isp/Makefile b/drivers/media/platform/omap3isp/Makefile index e8847e79e31..254975a9174 100644 --- a/drivers/media/platform/omap3isp/Makefile +++ b/drivers/media/platform/omap3isp/Makefile @@ -3,7 +3,7 @@ ccflags-$(CONFIG_VIDEO_OMAP3_DEBUG) += -DDEBUG omap3-isp-objs += \ - isp.o ispqueue.o ispvideo.o \ + isp.o ispvideo.o \ ispcsiphy.o ispccp2.o ispcsi2.o \ ispccdc.o isppreview.o ispresizer.o \ ispstat.o isph3a_aewb.o isph3a_af.o isphist.o diff --git a/drivers/media/platform/omap3isp/isp.c b/drivers/media/platform/omap3isp/isp.c index 06a0df43424..2c7aa672056 100644 --- a/drivers/media/platform/omap3isp/isp.c +++ b/drivers/media/platform/omap3isp/isp.c @@ -69,6 +69,8 @@ #include <linux/sched.h> #include <linux/vmalloc.h> +#include <asm/dma-iommu.h> + #include <media/v4l2-common.h> #include <media/v4l2-device.h> @@ -1397,14 +1399,14 @@ int omap3isp_module_sync_idle(struct media_entity *me, wait_queue_head_t *wait, if (isp_pipeline_is_last(me)) { struct isp_video *video = pipe->output; unsigned long flags; - spin_lock_irqsave(&video->queue->irqlock, flags); + spin_lock_irqsave(&video->irqlock, flags); if (video->dmaqueue_flags & ISP_VIDEO_DMAQUEUE_UNDERRUN) { - spin_unlock_irqrestore(&video->queue->irqlock, flags); + spin_unlock_irqrestore(&video->irqlock, flags); atomic_set(stopping, 0); smp_mb(); return 0; } - spin_unlock_irqrestore(&video->queue->irqlock, flags); + spin_unlock_irqrestore(&video->irqlock, flags); if (!wait_event_timeout(*wait, !atomic_read(stopping), msecs_to_jiffies(1000))) { atomic_set(stopping, 0); @@ -1625,7 +1627,7 @@ struct isp_device *omap3isp_get(struct isp_device *isp) * Decrement the reference count on the ISP. If the last reference is released, * power-down all submodules, disable clocks and free temporary buffers. */ -void omap3isp_put(struct isp_device *isp) +static void __omap3isp_put(struct isp_device *isp, bool save_ctx) { if (isp == NULL) return; @@ -1634,7 +1636,7 @@ void omap3isp_put(struct isp_device *isp) BUG_ON(isp->ref_count == 0); if (--isp->ref_count == 0) { isp_disable_interrupts(isp); - if (isp->domain) { + if (save_ctx) { isp_save_ctx(isp); isp->has_context = 1; } @@ -1648,6 +1650,11 @@ void omap3isp_put(struct isp_device *isp) mutex_unlock(&isp->isp_mutex); } +void omap3isp_put(struct isp_device *isp) +{ + __omap3isp_put(isp, true); +} + /* -------------------------------------------------------------------------- * Platform device driver */ @@ -2120,6 +2127,61 @@ error_csiphy: return ret; } +static void isp_detach_iommu(struct isp_device *isp) +{ + arm_iommu_release_mapping(isp->mapping); + isp->mapping = NULL; + iommu_group_remove_device(isp->dev); +} + +static int isp_attach_iommu(struct isp_device *isp) +{ + struct dma_iommu_mapping *mapping; + struct iommu_group *group; + int ret; + + /* Create a device group and add the device to it. */ + group = iommu_group_alloc(); + if (IS_ERR(group)) { + dev_err(isp->dev, "failed to allocate IOMMU group\n"); + return PTR_ERR(group); + } + + ret = iommu_group_add_device(group, isp->dev); + iommu_group_put(group); + + if (ret < 0) { + dev_err(isp->dev, "failed to add device to IPMMU group\n"); + return ret; + } + + /* + * Create the ARM mapping, used by the ARM DMA mapping core to allocate + * VAs. This will allocate a corresponding IOMMU domain. + */ + mapping = arm_iommu_create_mapping(&platform_bus_type, SZ_1G, SZ_2G); + if (IS_ERR(mapping)) { + dev_err(isp->dev, "failed to create ARM IOMMU mapping\n"); + ret = PTR_ERR(mapping); + goto error; + } + + isp->mapping = mapping; + + /* Attach the ARM VA mapping to the device. */ + ret = arm_iommu_attach_device(isp->dev, mapping); + if (ret < 0) { + dev_err(isp->dev, "failed to attach device to VA mapping\n"); + goto error; + } + + return 0; + +error: + isp_detach_iommu(isp); + return ret; +} + /* * isp_remove - Remove ISP platform device * @pdev: Pointer to ISP platform device @@ -2135,10 +2197,8 @@ static int isp_remove(struct platform_device *pdev) isp_xclk_cleanup(isp); __omap3isp_get(isp, false); - iommu_detach_device(isp->domain, &pdev->dev); - iommu_domain_free(isp->domain); - isp->domain = NULL; - omap3isp_put(isp); + isp_detach_iommu(isp); + __omap3isp_put(isp, false); return 0; } @@ -2265,39 +2325,32 @@ static int isp_probe(struct platform_device *pdev) } } - isp->domain = iommu_domain_alloc(pdev->dev.bus); - if (!isp->domain) { - dev_err(isp->dev, "can't alloc iommu domain\n"); - ret = -ENOMEM; + /* IOMMU */ + ret = isp_attach_iommu(isp); + if (ret < 0) { + dev_err(&pdev->dev, "unable to attach to IOMMU\n"); goto error_isp; } - ret = iommu_attach_device(isp->domain, &pdev->dev); - if (ret) { - dev_err(&pdev->dev, "can't attach iommu device: %d\n", ret); - ret = -EPROBE_DEFER; - goto free_domain; - } - /* Interrupt */ isp->irq_num = platform_get_irq(pdev, 0); if (isp->irq_num <= 0) { dev_err(isp->dev, "No IRQ resource\n"); ret = -ENODEV; - goto detach_dev; + goto error_iommu; } if (devm_request_irq(isp->dev, isp->irq_num, isp_isr, IRQF_SHARED, "OMAP3 ISP", isp)) { dev_err(isp->dev, "Unable to request IRQ\n"); ret = -EINVAL; - goto detach_dev; + goto error_iommu; } /* Entities */ ret = isp_initialize_modules(isp); if (ret < 0) - goto detach_dev; + goto error_iommu; ret = isp_register_entities(isp); if (ret < 0) @@ -2310,14 +2363,11 @@ static int isp_probe(struct platform_device *pdev) error_modules: isp_cleanup_modules(isp); -detach_dev: - iommu_detach_device(isp->domain, &pdev->dev); -free_domain: - iommu_domain_free(isp->domain); - isp->domain = NULL; +error_iommu: + isp_detach_iommu(isp); error_isp: isp_xclk_cleanup(isp); - omap3isp_put(isp); + __omap3isp_put(isp, false); error: mutex_destroy(&isp->isp_mutex); diff --git a/drivers/media/platform/omap3isp/isp.h b/drivers/media/platform/omap3isp/isp.h index 6d5e6971190..2c314eea125 100644 --- a/drivers/media/platform/omap3isp/isp.h +++ b/drivers/media/platform/omap3isp/isp.h @@ -45,8 +45,6 @@ #include "ispcsi2.h" #include "ispccp2.h" -#define IOMMU_FLAG (IOVMF_ENDIAN_LITTLE | IOVMF_ELSZ_8) - #define ISP_TOK_TERM 0xFFFFFFFF /* * terminating token for ISP * modules reg list @@ -152,6 +150,7 @@ struct isp_xclk { * regions. * @mmio_base_phys: Array with physical L4 bus addresses for ISP register * regions. + * @mapping: IOMMU mapping * @stat_lock: Spinlock for handling statistics * @isp_mutex: Mutex for serializing requests to ISP. * @stop_failure: Indicates that an entity failed to stop. @@ -171,7 +170,6 @@ struct isp_xclk { * @isp_res: Pointer to current settings for ISP Resizer. * @isp_prev: Pointer to current settings for ISP Preview. * @isp_ccdc: Pointer to current settings for ISP CCDC. - * @iommu: Pointer to requested IOMMU instance for ISP. * @platform_cb: ISP driver callback function pointers for platform code * * This structure is used to store the OMAP ISP Information. @@ -189,6 +187,8 @@ struct isp_device { void __iomem *mmio_base[OMAP3_ISP_IOMEM_LAST]; unsigned long mmio_base_phys[OMAP3_ISP_IOMEM_LAST]; + struct dma_iommu_mapping *mapping; + /* ISP Obj */ spinlock_t stat_lock; /* common lock for statistic drivers */ struct mutex isp_mutex; /* For handling ref_count field */ @@ -219,8 +219,6 @@ struct isp_device { unsigned int sbl_resources; unsigned int subclk_resources; - - struct iommu_domain *domain; }; #define v4l2_dev_to_isp_device(dev) \ diff --git a/drivers/media/platform/omap3isp/ispccdc.c b/drivers/media/platform/omap3isp/ispccdc.c index 4d920c800ff..9f727d20f06 100644 --- a/drivers/media/platform/omap3isp/ispccdc.c +++ b/drivers/media/platform/omap3isp/ispccdc.c @@ -30,7 +30,6 @@ #include <linux/device.h> #include <linux/dma-mapping.h> #include <linux/mm.h> -#include <linux/omap-iommu.h> #include <linux/sched.h> #include <linux/slab.h> #include <media/v4l2-event.h> @@ -206,7 +205,8 @@ static int ccdc_lsc_validate_config(struct isp_ccdc_device *ccdc, * ccdc_lsc_program_table - Program Lens Shading Compensation table address. * @ccdc: Pointer to ISP CCDC device. */ -static void ccdc_lsc_program_table(struct isp_ccdc_device *ccdc, u32 addr) +static void ccdc_lsc_program_table(struct isp_ccdc_device *ccdc, + dma_addr_t addr) { isp_reg_writel(to_isp_device(ccdc), addr, OMAP3_ISP_IOMEM_CCDC, ISPCCDC_LSC_TABLE_BASE); @@ -333,7 +333,7 @@ static int __ccdc_lsc_configure(struct isp_ccdc_device *ccdc, return -EBUSY; ccdc_lsc_setup_regs(ccdc, &req->config); - ccdc_lsc_program_table(ccdc, req->table); + ccdc_lsc_program_table(ccdc, req->table.dma); return 0; } @@ -368,11 +368,12 @@ static void ccdc_lsc_free_request(struct isp_ccdc_device *ccdc, if (req == NULL) return; - if (req->iovm) - dma_unmap_sg(isp->dev, req->iovm->sgt->sgl, - req->iovm->sgt->nents, DMA_TO_DEVICE); - if (req->table) - omap_iommu_vfree(isp->domain, isp->dev, req->table); + if (req->table.addr) { + sg_free_table(&req->table.sgt); + dma_free_coherent(isp->dev, req->config.size, req->table.addr, + req->table.dma); + } + kfree(req); } @@ -416,7 +417,6 @@ static int ccdc_lsc_config(struct isp_ccdc_device *ccdc, struct isp_device *isp = to_isp_device(ccdc); struct ispccdc_lsc_config_req *req; unsigned long flags; - void *table; u16 update; int ret; @@ -444,38 +444,31 @@ static int ccdc_lsc_config(struct isp_ccdc_device *ccdc, req->enable = 1; - req->table = omap_iommu_vmalloc(isp->domain, isp->dev, 0, - req->config.size, IOMMU_FLAG); - if (IS_ERR_VALUE(req->table)) { - req->table = 0; - ret = -ENOMEM; - goto done; - } - - req->iovm = omap_find_iovm_area(isp->dev, req->table); - if (req->iovm == NULL) { + req->table.addr = dma_alloc_coherent(isp->dev, req->config.size, + &req->table.dma, + GFP_KERNEL); + if (req->table.addr == NULL) { ret = -ENOMEM; goto done; } - if (!dma_map_sg(isp->dev, req->iovm->sgt->sgl, - req->iovm->sgt->nents, DMA_TO_DEVICE)) { - ret = -ENOMEM; - req->iovm = NULL; + ret = dma_get_sgtable(isp->dev, &req->table.sgt, + req->table.addr, req->table.dma, + req->config.size); + if (ret < 0) goto done; - } - dma_sync_sg_for_cpu(isp->dev, req->iovm->sgt->sgl, - req->iovm->sgt->nents, DMA_TO_DEVICE); + dma_sync_sg_for_cpu(isp->dev, req->table.sgt.sgl, + req->table.sgt.nents, DMA_TO_DEVICE); - table = omap_da_to_va(isp->dev, req->table); - if (copy_from_user(table, config->lsc, req->config.size)) { + if (copy_from_user(req->table.addr, config->lsc, + req->config.size)) { ret = -EFAULT; goto done; } - dma_sync_sg_for_device(isp->dev, req->iovm->sgt->sgl, - req->iovm->sgt->nents, DMA_TO_DEVICE); + dma_sync_sg_for_device(isp->dev, req->table.sgt.sgl, + req->table.sgt.nents, DMA_TO_DEVICE); } spin_lock_irqsave(&ccdc->lsc.req_lock, flags); @@ -584,7 +577,7 @@ static void ccdc_configure_fpc(struct isp_ccdc_device *ccdc) if (!ccdc->fpc_en) return; - isp_reg_writel(isp, ccdc->fpc.fpcaddr, OMAP3_ISP_IOMEM_CCDC, + isp_reg_writel(isp, ccdc->fpc.dma, OMAP3_ISP_IOMEM_CCDC, ISPCCDC_FPC_ADDR); /* The FPNUM field must be set before enabling FPC. */ isp_reg_writel(isp, (ccdc->fpc.fpnum << ISPCCDC_FPC_FPNUM_SHIFT), @@ -724,8 +717,9 @@ static int ccdc_config(struct isp_ccdc_device *ccdc, ccdc->shadow_update = 0; if (OMAP3ISP_CCDC_FPC & ccdc_struct->update) { - u32 table_old = 0; - u32 table_new; + struct omap3isp_ccdc_fpc fpc; + struct ispccdc_fpc fpc_old = { .addr = NULL, }; + struct ispccdc_fpc fpc_new; u32 size; if (ccdc->state != ISP_PIPELINE_STREAM_STOPPED) @@ -734,35 +728,39 @@ static int ccdc_config(struct isp_ccdc_device *ccdc, ccdc->fpc_en = !!(OMAP3ISP_CCDC_FPC & ccdc_struct->flag); if (ccdc->fpc_en) { - if (copy_from_user(&ccdc->fpc, ccdc_struct->fpc, - sizeof(ccdc->fpc))) + if (copy_from_user(&fpc, ccdc_struct->fpc, sizeof(fpc))) return -EFAULT; + size = fpc.fpnum * 4; + /* - * table_new must be 64-bytes aligned, but it's - * already done by omap_iommu_vmalloc(). + * The table address must be 64-bytes aligned, which is + * guaranteed by dma_alloc_coherent(). */ - size = ccdc->fpc.fpnum * 4; - table_new = omap_iommu_vmalloc(isp->domain, isp->dev, - 0, size, IOMMU_FLAG); - if (IS_ERR_VALUE(table_new)) + fpc_new.fpnum = fpc.fpnum; + fpc_new.addr = dma_alloc_coherent(isp->dev, size, + &fpc_new.dma, + GFP_KERNEL); + if (fpc_new.addr == NULL) return -ENOMEM; - if (copy_from_user(omap_da_to_va(isp->dev, table_new), - (__force void __user *) - ccdc->fpc.fpcaddr, size)) { - omap_iommu_vfree(isp->domain, isp->dev, - table_new); + if (copy_from_user(fpc_new.addr, + (__force void __user *)fpc.fpcaddr, + size)) { + dma_free_coherent(isp->dev, size, fpc_new.addr, + fpc_new.dma); return -EFAULT; } - table_old = ccdc->fpc.fpcaddr; - ccdc->fpc.fpcaddr = table_new; + fpc_old = ccdc->fpc; + ccdc->fpc = fpc_new; } ccdc_configure_fpc(ccdc); - if (table_old != 0) - omap_iommu_vfree(isp->domain, isp->dev, table_old); + + if (fpc_old.addr != NULL) + dma_free_coherent(isp->dev, fpc_old.fpnum * 4, + fpc_old.addr, fpc_old.dma); } return ccdc_lsc_config(ccdc, ccdc_struct); @@ -1523,7 +1521,7 @@ static int ccdc_isr_buffer(struct isp_ccdc_device *ccdc) buffer = omap3isp_video_buffer_next(&ccdc->video_out); if (buffer != NULL) { - ccdc_set_outaddr(ccdc, buffer->isp_addr); + ccdc_set_outaddr(ccdc, buffer->dma); restart = 1; } @@ -1662,7 +1660,7 @@ static int ccdc_video_queue(struct isp_video *video, struct isp_buffer *buffer) if (!(ccdc->output & CCDC_OUTPUT_MEMORY)) return -ENODEV; - ccdc_set_outaddr(ccdc, buffer->isp_addr); + ccdc_set_outaddr(ccdc, buffer->dma); /* We now have a buffer queued on the output, restart the pipeline * on the next CCDC interrupt if running in continuous mode (or when @@ -2580,8 +2578,9 @@ void omap3isp_ccdc_cleanup(struct isp_device *isp) cancel_work_sync(&ccdc->lsc.table_work); ccdc_lsc_free_queue(ccdc, &ccdc->lsc.free_queue); - if (ccdc->fpc.fpcaddr != 0) - omap_iommu_vfree(isp->domain, isp->dev, ccdc->fpc.fpcaddr); + if (ccdc->fpc.addr != NULL) + dma_free_coherent(isp->dev, ccdc->fpc.fpnum * 4, ccdc->fpc.addr, + ccdc->fpc.dma); mutex_destroy(&ccdc->ioctl_lock); } diff --git a/drivers/media/platform/omap3isp/ispccdc.h b/drivers/media/platform/omap3isp/ispccdc.h index 9d24e410786..f65061602c7 100644 --- a/drivers/media/platform/omap3isp/ispccdc.h +++ b/drivers/media/platform/omap3isp/ispccdc.h @@ -46,6 +46,12 @@ enum ccdc_input_entity { #define OMAP3ISP_CCDC_NEVENTS 16 +struct ispccdc_fpc { + void *addr; + dma_addr_t dma; + unsigned int fpnum; +}; + enum ispccdc_lsc_state { LSC_STATE_STOPPED = 0, LSC_STATE_STOPPING = 1, @@ -57,8 +63,12 @@ struct ispccdc_lsc_config_req { struct list_head list; struct omap3isp_ccdc_lsc_config config; unsigned char enable; - u32 table; - struct iovm_struct *iovm; + + struct { + void *addr; + dma_addr_t dma; + struct sg_table sgt; + } table; }; /* @@ -136,7 +146,7 @@ struct isp_ccdc_device { fpc_en:1; struct omap3isp_ccdc_blcomp blcomp; struct omap3isp_ccdc_bclamp clamp; - struct omap3isp_ccdc_fpc fpc; + struct ispccdc_fpc fpc; struct ispccdc_lsc lsc; unsigned int update; unsigned int shadow_update; diff --git a/drivers/media/platform/omap3isp/ispccp2.c b/drivers/media/platform/omap3isp/ispccp2.c index b30b67d22a5..f3801db9095 100644 --- a/drivers/media/platform/omap3isp/ispccp2.c +++ b/drivers/media/platform/omap3isp/ispccp2.c @@ -549,7 +549,7 @@ static void ccp2_isr_buffer(struct isp_ccp2_device *ccp2) buffer = omap3isp_video_buffer_next(&ccp2->video_in); if (buffer != NULL) - ccp2_set_inaddr(ccp2, buffer->isp_addr); + ccp2_set_inaddr(ccp2, buffer->dma); pipe->state |= ISP_PIPELINE_IDLE_INPUT; @@ -940,7 +940,7 @@ static int ccp2_video_queue(struct isp_video *video, struct isp_buffer *buffer) { struct isp_ccp2_device *ccp2 = &video->isp->isp_ccp2; - ccp2_set_inaddr(ccp2, buffer->isp_addr); + ccp2_set_inaddr(ccp2, buffer->dma); return 0; } diff --git a/drivers/media/platform/omap3isp/ispcsi2.c b/drivers/media/platform/omap3isp/ispcsi2.c index 620560828a4..5a2e47e58b8 100644 --- a/drivers/media/platform/omap3isp/ispcsi2.c +++ b/drivers/media/platform/omap3isp/ispcsi2.c @@ -695,7 +695,7 @@ static void csi2_isr_buffer(struct isp_csi2_device *csi2) if (buffer == NULL) return; - csi2_set_outaddr(csi2, buffer->isp_addr); + csi2_set_outaddr(csi2, buffer->dma); csi2_ctx_enable(isp, csi2, 0, 1); } @@ -812,7 +812,7 @@ static int csi2_queue(struct isp_video *video, struct isp_buffer *buffer) struct isp_device *isp = video->isp; struct isp_csi2_device *csi2 = &isp->isp_csi2a; - csi2_set_outaddr(csi2, buffer->isp_addr); + csi2_set_outaddr(csi2, buffer->dma); /* * If streaming was enabled before there was a buffer queued diff --git a/drivers/media/platform/omap3isp/isph3a_aewb.c b/drivers/media/platform/omap3isp/isph3a_aewb.c index 75fd82b152b..d6811ce263e 100644 --- a/drivers/media/platform/omap3isp/isph3a_aewb.c +++ b/drivers/media/platform/omap3isp/isph3a_aewb.c @@ -47,7 +47,7 @@ static void h3a_aewb_setup_regs(struct ispstat *aewb, void *priv) if (aewb->state == ISPSTAT_DISABLED) return; - isp_reg_writel(aewb->isp, aewb->active_buf->iommu_addr, + isp_reg_writel(aewb->isp, aewb->active_buf->dma_addr, OMAP3_ISP_IOMEM_H3A, ISPH3A_AEWBUFST); if (!aewb->update) diff --git a/drivers/media/platform/omap3isp/isph3a_af.c b/drivers/media/platform/omap3isp/isph3a_af.c index a0bf5af3243..6fc960cd30f 100644 --- a/drivers/media/platform/omap3isp/isph3a_af.c +++ b/drivers/media/platform/omap3isp/isph3a_af.c @@ -51,7 +51,7 @@ static void h3a_af_setup_regs(struct ispstat *af, void *priv) if (af->state == ISPSTAT_DISABLED) return; - isp_reg_writel(af->isp, af->active_buf->iommu_addr, OMAP3_ISP_IOMEM_H3A, + isp_reg_writel(af->isp, af->active_buf->dma_addr, OMAP3_ISP_IOMEM_H3A, ISPH3A_AFBUFST); if (!af->update) diff --git a/drivers/media/platform/omap3isp/isppreview.c b/drivers/media/platform/omap3isp/isppreview.c index 395b2b068c7..720809b07e7 100644 --- a/drivers/media/platform/omap3isp/isppreview.c +++ b/drivers/media/platform/omap3isp/isppreview.c @@ -1499,14 +1499,14 @@ static void preview_isr_buffer(struct isp_prev_device *prev) if (prev->input == PREVIEW_INPUT_MEMORY) { buffer = omap3isp_video_buffer_next(&prev->video_in); if (buffer != NULL) - preview_set_inaddr(prev, buffer->isp_addr); + preview_set_inaddr(prev, buffer->dma); pipe->state |= ISP_PIPELINE_IDLE_INPUT; } if (prev->output & PREVIEW_OUTPUT_MEMORY) { buffer = omap3isp_video_buffer_next(&prev->video_out); if (buffer != NULL) { - preview_set_outaddr(prev, buffer->isp_addr); + preview_set_outaddr(prev, buffer->dma); restart = 1; } pipe->state |= ISP_PIPELINE_IDLE_OUTPUT; @@ -1577,10 +1577,10 @@ static int preview_video_queue(struct isp_video *video, struct isp_prev_device *prev = &video->isp->isp_prev; if (video->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) - preview_set_inaddr(prev, buffer->isp_addr); + preview_set_inaddr(prev, buffer->dma); if (video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) - preview_set_outaddr(prev, buffer->isp_addr); + preview_set_outaddr(prev, buffer->dma); return 0; } diff --git a/drivers/media/platform/omap3isp/ispqueue.c b/drivers/media/platform/omap3isp/ispqueue.c deleted file mode 100644 index a5e65858e79..00000000000 --- a/drivers/media/platform/omap3isp/ispqueue.c +++ /dev/null @@ -1,1161 +0,0 @@ -/* - * ispqueue.c - * - * TI OMAP3 ISP - Video buffers queue handling - * - * Copyright (C) 2010 Nokia Corporation - * - * Contacts: Laurent Pinchart <laurent.pinchart@ideasonboard.com> - * Sakari Ailus <sakari.ailus@iki.fi> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA - * 02110-1301 USA - */ - -#include <asm/cacheflush.h> -#include <linux/dma-mapping.h> -#include <linux/mm.h> -#include <linux/pagemap.h> -#include <linux/poll.h> -#include <linux/scatterlist.h> -#include <linux/sched.h> -#include <linux/slab.h> -#include <linux/vmalloc.h> - -#include "ispqueue.h" - -/* ----------------------------------------------------------------------------- - * Video buffers management - */ - -/* - * isp_video_buffer_cache_sync - Keep the buffers coherent between CPU and ISP - * - * The typical operation required here is Cache Invalidation across - * the (user space) buffer address range. And this _must_ be done - * at QBUF stage (and *only* at QBUF). - * - * We try to use optimal cache invalidation function: - * - dmac_map_area: - * - used when the number of pages are _low_. - * - it becomes quite slow as the number of pages increase. - * - for 648x492 viewfinder (150 pages) it takes 1.3 ms. - * - for 5 Mpix buffer (2491 pages) it takes between 25-50 ms. - * - * - flush_cache_all: - * - used when the number of pages are _high_. - * - time taken in the range of 500-900 us. - * - has a higher penalty but, as whole dcache + icache is invalidated - */ -/* - * FIXME: dmac_inv_range crashes randomly on the user space buffer - * address. Fall back to flush_cache_all for now. - */ -#define ISP_CACHE_FLUSH_PAGES_MAX 0 - -static void isp_video_buffer_cache_sync(struct isp_video_buffer *buf) -{ - if (buf->skip_cache) - return; - - if (buf->vbuf.m.userptr == 0 || buf->npages == 0 || - buf->npages > ISP_CACHE_FLUSH_PAGES_MAX) - flush_cache_all(); - else { - dmac_map_area((void *)buf->vbuf.m.userptr, buf->vbuf.length, - DMA_FROM_DEVICE); - outer_inv_range(buf->vbuf.m.userptr, - buf->vbuf.m.userptr + buf->vbuf.length); - } -} - -/* - * isp_video_buffer_lock_vma - Prevent VMAs from being unmapped - * - * Lock the VMAs underlying the given buffer into memory. This avoids the - * userspace buffer mapping from being swapped out, making VIPT cache handling - * easier. - * - * Note that the pages will not be freed as the buffers have been locked to - * memory using by a call to get_user_pages(), but the userspace mapping could - * still disappear if the VMAs are not locked. This is caused by the memory - * management code trying to be as lock-less as possible, which results in the - * userspace mapping manager not finding out that the pages are locked under - * some conditions. - */ -static int isp_video_buffer_lock_vma(struct isp_video_buffer *buf, int lock) -{ - struct vm_area_struct *vma; - unsigned long start; - unsigned long end; - int ret = 0; - - if (buf->vbuf.memory == V4L2_MEMORY_MMAP) - return 0; - - /* We can be called from workqueue context if the current task dies to - * unlock the VMAs. In that case there's no current memory management - * context so unlocking can't be performed, but the VMAs have been or - * are getting destroyed anyway so it doesn't really matter. - */ - if (!current || !current->mm) - return lock ? -EINVAL : 0; - - start = buf->vbuf.m.userptr; - end = buf->vbuf.m.userptr + buf->vbuf.length - 1; - - down_write(¤t->mm->mmap_sem); - spin_lock(¤t->mm->page_table_lock); - - do { - vma = find_vma(current->mm, start); - if (vma == NULL) { - ret = -EFAULT; - goto out; - } - - if (lock) - vma->vm_flags |= VM_LOCKED; - else - vma->vm_flags &= ~VM_LOCKED; - - start = vma->vm_end + 1; - } while (vma->vm_end < end); - - if (lock) - buf->vm_flags |= VM_LOCKED; - else - buf->vm_flags &= ~VM_LOCKED; - -out: - spin_unlock(¤t->mm->page_table_lock); - up_write(¤t->mm->mmap_sem); - return ret; -} - -/* - * isp_video_buffer_sglist_kernel - Build a scatter list for a vmalloc'ed buffer - * - * Iterate over the vmalloc'ed area and create a scatter list entry for every - * page. - */ -static int isp_video_buffer_sglist_kernel(struct isp_video_buffer *buf) -{ - struct scatterlist *sglist; - unsigned int npages; - unsigned int i; - void *addr; - - addr = buf->vaddr; - npages = PAGE_ALIGN(buf->vbuf.length) >> PAGE_SHIFT; - - sglist = vmalloc(npages * sizeof(*sglist)); - if (sglist == NULL) - return -ENOMEM; - - sg_init_table(sglist, npages); - - for (i = 0; i < npages; ++i, addr += PAGE_SIZE) { - struct page *page = vmalloc_to_page(addr); - - if (page == NULL || PageHighMem(page)) { - vfree(sglist); - return -EINVAL; - } - - sg_set_page(&sglist[i], page, PAGE_SIZE, 0); - } - - buf->sglen = npages; - buf->sglist = sglist; - - return 0; -} - -/* - * isp_video_buffer_sglist_user - Build a scatter list for a userspace buffer - * - * Walk the buffer pages list and create a 1:1 mapping to a scatter list. - */ -static int isp_video_buffer_sglist_user(struct isp_video_buffer *buf) -{ - struct scatterlist *sglist; - unsigned int offset = buf->offset; - unsigned int i; - - sglist = vmalloc(buf->npages * sizeof(*sglist)); - if (sglist == NULL) - return -ENOMEM; - - sg_init_table(sglist, buf->npages); - - for (i = 0; i < buf->npages; ++i) { - if (PageHighMem(buf->pages[i])) { - vfree(sglist); - return -EINVAL; - } - - sg_set_page(&sglist[i], buf->pages[i], PAGE_SIZE - offset, - offset); - offset = 0; - } - - buf->sglen = buf->npages; - buf->sglist = sglist; - - return 0; -} - -/* - * isp_video_buffer_sglist_pfnmap - Build a scatter list for a VM_PFNMAP buffer - * - * Create a scatter list of physically contiguous pages starting at the buffer - * memory physical address. - */ -static int isp_video_buffer_sglist_pfnmap(struct isp_video_buffer *buf) -{ - struct scatterlist *sglist; - unsigned int offset = buf->offset; - unsigned long pfn = buf->paddr >> PAGE_SHIFT; - unsigned int i; - - sglist = vmalloc(buf->npages * sizeof(*sglist)); - if (sglist == NULL) - return -ENOMEM; - - sg_init_table(sglist, buf->npages); - - for (i = 0; i < buf->npages; ++i, ++pfn) { - sg_set_page(&sglist[i], pfn_to_page(pfn), PAGE_SIZE - offset, - offset); - /* PFNMAP buffers will not get DMA-mapped, set the DMA address - * manually. - */ - sg_dma_address(&sglist[i]) = (pfn << PAGE_SHIFT) + offset; - offset = 0; - } - - buf->sglen = buf->npages; - buf->sglist = sglist; - - return 0; -} - -/* - * isp_video_buffer_cleanup - Release pages for a userspace VMA. - * - * Release pages locked by a call isp_video_buffer_prepare_user and free the - * pages table. - */ -static void isp_video_buffer_cleanup(struct isp_video_buffer *buf) -{ - enum dma_data_direction direction; - unsigned int i; - - if (buf->queue->ops->buffer_cleanup) - buf->queue->ops->buffer_cleanup(buf); - - if (!(buf->vm_flags & VM_PFNMAP)) { - direction = buf->vbuf.type == V4L2_BUF_TYPE_VIDEO_CAPTURE - ? DMA_FROM_DEVICE : DMA_TO_DEVICE; - dma_unmap_sg(buf->queue->dev, buf->sglist, buf->sglen, - direction); - } - - vfree(buf->sglist); - buf->sglist = NULL; - buf->sglen = 0; - - if (buf->pages != NULL) { - isp_video_buffer_lock_vma(buf, 0); - - for (i = 0; i < buf->npages; ++i) - page_cache_release(buf->pages[i]); - - vfree(buf->pages); - buf->pages = NULL; - } - - buf->npages = 0; - buf->skip_cache = false; -} - -/* - * isp_video_buffer_prepare_user - Pin userspace VMA pages to memory. - * - * This function creates a list of pages for a userspace VMA. The number of - * pages is first computed based on the buffer size, and pages are then - * retrieved by a call to get_user_pages. - * - * Pages are pinned to memory by get_user_pages, making them available for DMA - * transfers. However, due to memory management optimization, it seems the - * get_user_pages doesn't guarantee that the pinned pages will not be written - * to swap and removed from the userspace mapping(s). When this happens, a page - * fault can be generated when accessing those unmapped pages. - * - * If the fault is triggered by a page table walk caused by VIPT cache - * management operations, the page fault handler might oops if the MM semaphore - * is held, as it can't handle kernel page faults in that case. To fix that, a - * fixup entry needs to be added to the cache management code, or the userspace - * VMA must be locked to avoid removing pages from the userspace mapping in the - * first place. - * - * If the number of pages retrieved is smaller than the number required by the - * buffer size, the function returns -EFAULT. - */ -static int isp_video_buffer_prepare_user(struct isp_video_buffer *buf) -{ - unsigned long data; - unsigned int first; - unsigned int last; - int ret; - - data = buf->vbuf.m.userptr; - first = (data & PAGE_MASK) >> PAGE_SHIFT; - last = ((data + buf->vbuf.length - 1) & PAGE_MASK) >> PAGE_SHIFT; - - buf->offset = data & ~PAGE_MASK; - buf->npages = last - first + 1; - buf->pages = vmalloc(buf->npages * sizeof(buf->pages[0])); - if (buf->pages == NULL) - return -ENOMEM; - - down_read(¤t->mm->mmap_sem); - ret = get_user_pages(current, current->mm, data & PAGE_MASK, - buf->npages, - buf->vbuf.type == V4L2_BUF_TYPE_VIDEO_CAPTURE, 0, - buf->pages, NULL); - up_read(¤t->mm->mmap_sem); - - if (ret != buf->npages) { - buf->npages = ret < 0 ? 0 : ret; - isp_video_buffer_cleanup(buf); - return -EFAULT; - } - - ret = isp_video_buffer_lock_vma(buf, 1); - if (ret < 0) - isp_video_buffer_cleanup(buf); - - return ret; -} - -/* - * isp_video_buffer_prepare_pfnmap - Validate a VM_PFNMAP userspace buffer - * - * Userspace VM_PFNMAP buffers are supported only if they are contiguous in - * memory and if they span a single VMA. - * - * Return 0 if the buffer is valid, or -EFAULT otherwise. - */ -static int isp_video_buffer_prepare_pfnmap(struct isp_video_buffer *buf) -{ - struct vm_area_struct *vma; - unsigned long prev_pfn; - unsigned long this_pfn; - unsigned long start; - unsigned long end; - dma_addr_t pa = 0; - int ret = -EFAULT; - - start = buf->vbuf.m.userptr; - end = buf->vbuf.m.userptr + buf->vbuf.length - 1; - - buf->offset = start & ~PAGE_MASK; - buf->npages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1; - buf->pages = NULL; - - down_read(¤t->mm->mmap_sem); - vma = find_vma(current->mm, start); - if (vma == NULL || vma->vm_end < end) - goto done; - - for (prev_pfn = 0; start <= end; start += PAGE_SIZE) { - ret = follow_pfn(vma, start, &this_pfn); - if (ret) - goto done; - - if (prev_pfn == 0) - pa = this_pfn << PAGE_SHIFT; - else if (this_pfn != prev_pfn + 1) { - ret = -EFAULT; - goto done; - } - - prev_pfn = this_pfn; - } - - buf->paddr = pa + buf->offset; - ret = 0; - -done: - up_read(¤t->mm->mmap_sem); - return ret; -} - -/* - * isp_video_buffer_prepare_vm_flags - Get VMA flags for a userspace address - * - * This function locates the VMAs for the buffer's userspace address and checks - * that their flags match. The only flag that we need to care for at the moment - * is VM_PFNMAP. - * - * The buffer vm_flags field is set to the first VMA flags. - * - * Return -EFAULT if no VMA can be found for part of the buffer, or if the VMAs - * have incompatible flags. - */ -static int isp_video_buffer_prepare_vm_flags(struct isp_video_buffer *buf) -{ - struct vm_area_struct *vma; - pgprot_t uninitialized_var(vm_page_prot); - unsigned long start; - unsigned long end; - int ret = -EFAULT; - - start = buf->vbuf.m.userptr; - end = buf->vbuf.m.userptr + buf->vbuf.length - 1; - - down_read(¤t->mm->mmap_sem); - - do { - vma = find_vma(current->mm, start); - if (vma == NULL) - goto done; - - if (start == buf->vbuf.m.userptr) { - buf->vm_flags = vma->vm_flags; - vm_page_prot = vma->vm_page_prot; - } - - if ((buf->vm_flags ^ vma->vm_flags) & VM_PFNMAP) - goto done; - - if (vm_page_prot != vma->vm_page_prot) - goto done; - - start = vma->vm_end + 1; - } while (vma->vm_end < end); - - /* Skip cache management to enhance performances for non-cached or - * write-combining buffers. - */ - if (vm_page_prot == pgprot_noncached(vm_page_prot) || - vm_page_prot == pgprot_writecombine(vm_page_prot)) - buf->skip_cache = true; - - ret = 0; - -done: - up_read(¤t->mm->mmap_sem); - return ret; -} - -/* - * isp_video_buffer_prepare - Make a buffer ready for operation - * - * Preparing a buffer involves: - * - * - validating VMAs (userspace buffers only) - * - locking pages and VMAs into memory (userspace buffers only) - * - building page and scatter-gather lists - * - mapping buffers for DMA operation - * - performing driver-specific preparation - * - * The function must be called in userspace context with a valid mm context - * (this excludes cleanup paths such as sys_close when the userspace process - * segfaults). - */ -static int isp_video_buffer_prepare(struct isp_video_buffer *buf) -{ - enum dma_data_direction direction; - int ret; - - switch (buf->vbuf.memory) { - case V4L2_MEMORY_MMAP: - ret = isp_video_buffer_sglist_kernel(buf); - break; - - case V4L2_MEMORY_USERPTR: - ret = isp_video_buffer_prepare_vm_flags(buf); - if (ret < 0) - return ret; - - if (buf->vm_flags & VM_PFNMAP) { - ret = isp_video_buffer_prepare_pfnmap(buf); - if (ret < 0) - return ret; - - ret = isp_video_buffer_sglist_pfnmap(buf); - } else { - ret = isp_video_buffer_prepare_user(buf); - if (ret < 0) - return ret; - - ret = isp_video_buffer_sglist_user(buf); - } - break; - - default: - return -EINVAL; - } - - if (ret < 0) - goto done; - - if (!(buf->vm_flags & VM_PFNMAP)) { - direction = buf->vbuf.type == V4L2_BUF_TYPE_VIDEO_CAPTURE - ? DMA_FROM_DEVICE : DMA_TO_DEVICE; - ret = dma_map_sg(buf->queue->dev, buf->sglist, buf->sglen, - direction); - if (ret != buf->sglen) { - ret = -EFAULT; - goto done; - } - } - - if (buf->queue->ops->buffer_prepare) - ret = buf->queue->ops->buffer_prepare(buf); - -done: - if (ret < 0) { - isp_video_buffer_cleanup(buf); - return ret; - } - - return ret; -} - -/* - * isp_video_queue_query - Query the status of a given buffer - * - * Locking: must be called with the queue lock held. - */ -static void isp_video_buffer_query(struct isp_video_buffer *buf, - struct v4l2_buffer *vbuf) -{ - memcpy(vbuf, &buf->vbuf, sizeof(*vbuf)); - - if (buf->vma_use_count) - vbuf->flags |= V4L2_BUF_FLAG_MAPPED; - - switch (buf->state) { - case ISP_BUF_STATE_ERROR: - vbuf->flags |= V4L2_BUF_FLAG_ERROR; - /* Fallthrough */ - case ISP_BUF_STATE_DONE: - vbuf->flags |= V4L2_BUF_FLAG_DONE; - break; - case ISP_BUF_STATE_QUEUED: - case ISP_BUF_STATE_ACTIVE: - vbuf->flags |= V4L2_BUF_FLAG_QUEUED; - break; - case ISP_BUF_STATE_IDLE: - default: - break; - } -} - -/* - * isp_video_buffer_wait - Wait for a buffer to be ready - * - * In non-blocking mode, return immediately with 0 if the buffer is ready or - * -EAGAIN if the buffer is in the QUEUED or ACTIVE state. - * - * In blocking mode, wait (interruptibly but with no timeout) on the buffer wait - * queue using the same condition. - */ -static int isp_video_buffer_wait(struct isp_video_buffer *buf, int nonblocking) -{ - if (nonblocking) { - return (buf->state != ISP_BUF_STATE_QUEUED && - buf->state != ISP_BUF_STATE_ACTIVE) - ? 0 : -EAGAIN; - } - - return wait_event_interruptible(buf->wait, - buf->state != ISP_BUF_STATE_QUEUED && - buf->state != ISP_BUF_STATE_ACTIVE); -} - -/* ----------------------------------------------------------------------------- - * Queue management - */ - -/* - * isp_video_queue_free - Free video buffers memory - * - * Buffers can only be freed if the queue isn't streaming and if no buffer is - * mapped to userspace. Return -EBUSY if those conditions aren't satisfied. - * - * This function must be called with the queue lock held. - */ -static int isp_video_queue_free(struct isp_video_queue *queue) -{ - unsigned int i; - - if (queue->streaming) - return -EBUSY; - - for (i = 0; i < queue->count; ++i) { - if (queue->buffers[i]->vma_use_count != 0) - return -EBUSY; - } - - for (i = 0; i < queue->count; ++i) { - struct isp_video_buffer *buf = queue->buffers[i]; - - isp_video_buffer_cleanup(buf); - - vfree(buf->vaddr); - buf->vaddr = NULL; - - kfree(buf); - queue->buffers[i] = NULL; - } - - INIT_LIST_HEAD(&queue->queue); - queue->count = 0; - return 0; -} - -/* - * isp_video_queue_alloc - Allocate video buffers memory - * - * This function must be called with the queue lock held. - */ -static int isp_video_queue_alloc(struct isp_video_queue *queue, - unsigned int nbuffers, - unsigned int size, enum v4l2_memory memory) -{ - struct isp_video_buffer *buf; - unsigned int i; - void *mem; - int ret; - - /* Start by freeing the buffers. */ - ret = isp_video_queue_free(queue); - if (ret < 0) - return ret; - - /* Bail out if no buffers should be allocated. */ - if (nbuffers == 0) - return 0; - - /* Initialize the allocated buffers. */ - for (i = 0; i < nbuffers; ++i) { - buf = kzalloc(queue->bufsize, GFP_KERNEL); - if (buf == NULL) - break; - - if (memory == V4L2_MEMORY_MMAP) { - /* Allocate video buffers memory for mmap mode. Align - * the size to the page size. - */ - mem = vmalloc_32_user(PAGE_ALIGN(size)); - if (mem == NULL) { - kfree(buf); - break; - } - - buf->vbuf.m.offset = i * PAGE_ALIGN(size); - buf->vaddr = mem; - } - - buf->vbuf.index = i; - buf->vbuf.length = size; - buf->vbuf.type = queue->type; - buf->vbuf.flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; - buf->vbuf.field = V4L2_FIELD_NONE; - buf->vbuf.memory = memory; - - buf->queue = queue; - init_waitqueue_head(&buf->wait); - - queue->buffers[i] = buf; - } - - if (i == 0) - return -ENOMEM; - - queue->count = i; - return nbuffers; -} - -/** - * omap3isp_video_queue_cleanup - Clean up the video buffers queue - * @queue: Video buffers queue - * - * Free all allocated resources and clean up the video buffers queue. The queue - * must not be busy (no ongoing video stream) and buffers must have been - * unmapped. - * - * Return 0 on success or -EBUSY if the queue is busy or buffers haven't been - * unmapped. - */ -int omap3isp_video_queue_cleanup(struct isp_video_queue *queue) -{ - return isp_video_queue_free(queue); -} - -/** - * omap3isp_video_queue_init - Initialize the video buffers queue - * @queue: Video buffers queue - * @type: V4L2 buffer type (capture or output) - * @ops: Driver-specific queue operations - * @dev: Device used for DMA operations - * @bufsize: Size of the driver-specific buffer structure - * - * Initialize the video buffers queue with the supplied parameters. - * - * The queue type must be one of V4L2_BUF_TYPE_VIDEO_CAPTURE or - * V4L2_BUF_TYPE_VIDEO_OUTPUT. Other buffer types are not supported yet. - * - * Buffer objects will be allocated using the given buffer size to allow room - * for driver-specific fields. Driver-specific buffer structures must start - * with a struct isp_video_buffer field. Drivers with no driver-specific buffer - * structure must pass the size of the isp_video_buffer structure in the bufsize - * parameter. - * - * Return 0 on success. - */ -int omap3isp_video_queue_init(struct isp_video_queue *queue, - enum v4l2_buf_type type, - const struct isp_video_queue_operations *ops, - struct device *dev, unsigned int bufsize) -{ - INIT_LIST_HEAD(&queue->queue); - mutex_init(&queue->lock); - spin_lock_init(&queue->irqlock); - - queue->type = type; - queue->ops = ops; - queue->dev = dev; - queue->bufsize = bufsize; - - return 0; -} - -/* ----------------------------------------------------------------------------- - * V4L2 operations - */ - -/** - * omap3isp_video_queue_reqbufs - Allocate video buffers memory - * - * This function is intended to be used as a VIDIOC_REQBUFS ioctl handler. It - * allocated video buffer objects and, for MMAP buffers, buffer memory. - * - * If the number of buffers is 0, all buffers are freed and the function returns - * without performing any allocation. - * - * If the number of buffers is not 0, currently allocated buffers (if any) are - * freed and the requested number of buffers are allocated. Depending on - * driver-specific requirements and on memory availability, a number of buffer - * smaller or bigger than requested can be allocated. This isn't considered as - * an error. - * - * Return 0 on success or one of the following error codes: - * - * -EINVAL if the buffer type or index are invalid - * -EBUSY if the queue is busy (streaming or buffers mapped) - * -ENOMEM if the buffers can't be allocated due to an out-of-memory condition - */ -int omap3isp_video_queue_reqbufs(struct isp_video_queue *queue, - struct v4l2_requestbuffers *rb) -{ - unsigned int nbuffers = rb->count; - unsigned int size; - int ret; - - if (rb->type != queue->type) - return -EINVAL; - - queue->ops->queue_prepare(queue, &nbuffers, &size); - if (size == 0) - return -EINVAL; - - nbuffers = min_t(unsigned int, nbuffers, ISP_VIDEO_MAX_BUFFERS); - - mutex_lock(&queue->lock); - - ret = isp_video_queue_alloc(queue, nbuffers, size, rb->memory); - if (ret < 0) - goto done; - - rb->count = ret; - ret = 0; - -done: - mutex_unlock(&queue->lock); - return ret; -} - -/** - * omap3isp_video_queue_querybuf - Query the status of a buffer in a queue - * - * This function is intended to be used as a VIDIOC_QUERYBUF ioctl handler. It - * returns the status of a given video buffer. - * - * Return 0 on success or -EINVAL if the buffer type or index are invalid. - */ -int omap3isp_video_queue_querybuf(struct isp_video_queue *queue, - struct v4l2_buffer *vbuf) -{ - struct isp_video_buffer *buf; - int ret = 0; - - if (vbuf->type != queue->type) - return -EINVAL; - - mutex_lock(&queue->lock); - - if (vbuf->index >= queue->count) { - ret = -EINVAL; - goto done; - } - - buf = queue->buffers[vbuf->index]; - isp_video_buffer_query(buf, vbuf); - -done: - mutex_unlock(&queue->lock); - return ret; -} - -/** - * omap3isp_video_queue_qbuf - Queue a buffer - * - * This function is intended to be used as a VIDIOC_QBUF ioctl handler. - * - * The v4l2_buffer structure passed from userspace is first sanity tested. If - * sane, the buffer is then processed and added to the main queue and, if the - * queue is streaming, to the IRQ queue. - * - * Before being enqueued, USERPTR buffers are checked for address changes. If - * the buffer has a different userspace address, the old memory area is unlocked - * and the new memory area is locked. - */ -int omap3isp_video_queue_qbuf(struct isp_video_queue *queue, - struct v4l2_buffer *vbuf) -{ - struct isp_video_buffer *buf; - unsigned long flags; - int ret = -EINVAL; - - if (vbuf->type != queue->type) - goto done; - - mutex_lock(&queue->lock); - - if (vbuf->index >= queue->count) - goto done; - - buf = queue->buffers[vbuf->index]; - - if (vbuf->memory != buf->vbuf.memory) - goto done; - - if (buf->state != ISP_BUF_STATE_IDLE) - goto done; - - if (vbuf->memory == V4L2_MEMORY_USERPTR && - vbuf->length < buf->vbuf.length) - goto done; - - if (vbuf->memory == V4L2_MEMORY_USERPTR && - vbuf->m.userptr != buf->vbuf.m.userptr) { - isp_video_buffer_cleanup(buf); - buf->vbuf.m.userptr = vbuf->m.userptr; - buf->prepared = 0; - } - - if (!buf->prepared) { - ret = isp_video_buffer_prepare(buf); - if (ret < 0) - goto done; - buf->prepared = 1; - } - - isp_video_buffer_cache_sync(buf); - - buf->state = ISP_BUF_STATE_QUEUED; - list_add_tail(&buf->stream, &queue->queue); - - if (queue->streaming) { - spin_lock_irqsave(&queue->irqlock, flags); - queue->ops->buffer_queue(buf); - spin_unlock_irqrestore(&queue->irqlock, flags); - } - - ret = 0; - -done: - mutex_unlock(&queue->lock); - return ret; -} - -/** - * omap3isp_video_queue_dqbuf - Dequeue a buffer - * - * This function is intended to be used as a VIDIOC_DQBUF ioctl handler. - * - * Wait until a buffer is ready to be dequeued, remove it from the queue and - * copy its information to the v4l2_buffer structure. - * - * If the nonblocking argument is not zero and no buffer is ready, return - * -EAGAIN immediately instead of waiting. - * - * If no buffer has been enqueued, or if the requested buffer type doesn't match - * the queue type, return -EINVAL. - */ -int omap3isp_video_queue_dqbuf(struct isp_video_queue *queue, - struct v4l2_buffer *vbuf, int nonblocking) -{ - struct isp_video_buffer *buf; - int ret; - - if (vbuf->type != queue->type) - return -EINVAL; - - mutex_lock(&queue->lock); - - if (list_empty(&queue->queue)) { - ret = -EINVAL; - goto done; - } - - buf = list_first_entry(&queue->queue, struct isp_video_buffer, stream); - ret = isp_video_buffer_wait(buf, nonblocking); - if (ret < 0) - goto done; - - list_del(&buf->stream); - - isp_video_buffer_query(buf, vbuf); - buf->state = ISP_BUF_STATE_IDLE; - vbuf->flags &= ~V4L2_BUF_FLAG_QUEUED; - -done: - mutex_unlock(&queue->lock); - return ret; -} - -/** - * omap3isp_video_queue_streamon - Start streaming - * - * This function is intended to be used as a VIDIOC_STREAMON ioctl handler. It - * starts streaming on the queue and calls the buffer_queue operation for all - * queued buffers. - * - * Return 0 on success. - */ -int omap3isp_video_queue_streamon(struct isp_video_queue *queue) -{ - struct isp_video_buffer *buf; - unsigned long flags; - - mutex_lock(&queue->lock); - - if (queue->streaming) - goto done; - - queue->streaming = 1; - - spin_lock_irqsave(&queue->irqlock, flags); - list_for_each_entry(buf, &queue->queue, stream) - queue->ops->buffer_queue(buf); - spin_unlock_irqrestore(&queue->irqlock, flags); - -done: - mutex_unlock(&queue->lock); - return 0; -} - -/** - * omap3isp_video_queue_streamoff - Stop streaming - * - * This function is intended to be used as a VIDIOC_STREAMOFF ioctl handler. It - * stops streaming on the queue and wakes up all the buffers. - * - * Drivers must stop the hardware and synchronize with interrupt handlers and/or - * delayed works before calling this function to make sure no buffer will be - * touched by the driver and/or hardware. - */ -void omap3isp_video_queue_streamoff(struct isp_video_queue *queue) -{ - struct isp_video_buffer *buf; - unsigned long flags; - unsigned int i; - - mutex_lock(&queue->lock); - - if (!queue->streaming) - goto done; - - queue->streaming = 0; - - spin_lock_irqsave(&queue->irqlock, flags); - for (i = 0; i < queue->count; ++i) { - buf = queue->buffers[i]; - - if (buf->state == ISP_BUF_STATE_ACTIVE) - wake_up(&buf->wait); - - buf->state = ISP_BUF_STATE_IDLE; - } - spin_unlock_irqrestore(&queue->irqlock, flags); - - INIT_LIST_HEAD(&queue->queue); - -done: - mutex_unlock(&queue->lock); -} - -/** - * omap3isp_video_queue_discard_done - Discard all buffers marked as DONE - * - * This function is intended to be used with suspend/resume operations. It - * discards all 'done' buffers as they would be too old to be requested after - * resume. - * - * Drivers must stop the hardware and synchronize with interrupt handlers and/or - * delayed works before calling this function to make sure no buffer will be - * touched by the driver and/or hardware. - */ -void omap3isp_video_queue_discard_done(struct isp_video_queue *queue) -{ - struct isp_video_buffer *buf; - unsigned int i; - - mutex_lock(&queue->lock); - - if (!queue->streaming) - goto done; - - for (i = 0; i < queue->count; ++i) { - buf = queue->buffers[i]; - - if (buf->state == ISP_BUF_STATE_DONE) - buf->state = ISP_BUF_STATE_ERROR; - } - -done: - mutex_unlock(&queue->lock); -} - -static void isp_video_queue_vm_open(struct vm_area_struct *vma) -{ - struct isp_video_buffer *buf = vma->vm_private_data; - - buf->vma_use_count++; -} - -static void isp_video_queue_vm_close(struct vm_area_struct *vma) -{ - struct isp_video_buffer *buf = vma->vm_private_data; - - buf->vma_use_count--; -} - -static const struct vm_operations_struct isp_video_queue_vm_ops = { - .open = isp_video_queue_vm_open, - .close = isp_video_queue_vm_close, -}; - -/** - * omap3isp_video_queue_mmap - Map buffers to userspace - * - * This function is intended to be used as an mmap() file operation handler. It - * maps a buffer to userspace based on the VMA offset. - * - * Only buffers of memory type MMAP are supported. - */ -int omap3isp_video_queue_mmap(struct isp_video_queue *queue, - struct vm_area_struct *vma) -{ - struct isp_video_buffer *uninitialized_var(buf); - unsigned long size; - unsigned int i; - int ret = 0; - - mutex_lock(&queue->lock); - - for (i = 0; i < queue->count; ++i) { - buf = queue->buffers[i]; - if ((buf->vbuf.m.offset >> PAGE_SHIFT) == vma->vm_pgoff) - break; - } - - if (i == queue->count) { - ret = -EINVAL; - goto done; - } - - size = vma->vm_end - vma->vm_start; - - if (buf->vbuf.memory != V4L2_MEMORY_MMAP || - size != PAGE_ALIGN(buf->vbuf.length)) { - ret = -EINVAL; - goto done; - } - - ret = remap_vmalloc_range(vma, buf->vaddr, 0); - if (ret < 0) - goto done; - - vma->vm_ops = &isp_video_queue_vm_ops; - vma->vm_private_data = buf; - isp_video_queue_vm_open(vma); - -done: - mutex_unlock(&queue->lock); - return ret; -} - -/** - * omap3isp_video_queue_poll - Poll video queue state - * - * This function is intended to be used as a poll() file operation handler. It - * polls the state of the video buffer at the front of the queue and returns an - * events mask. - * - * If no buffer is present at the front of the queue, POLLERR is returned. - */ -unsigned int omap3isp_video_queue_poll(struct isp_video_queue *queue, - struct file *file, poll_table *wait) -{ - struct isp_video_buffer *buf; - unsigned int mask = 0; - - mutex_lock(&queue->lock); - if (list_empty(&queue->queue)) { - mask |= POLLERR; - goto done; - } - buf = list_first_entry(&queue->queue, struct isp_video_buffer, stream); - - poll_wait(file, &buf->wait, wait); - if (buf->state == ISP_BUF_STATE_DONE || - buf->state == ISP_BUF_STATE_ERROR) { - if (queue->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) - mask |= POLLIN | POLLRDNORM; - else - mask |= POLLOUT | POLLWRNORM; - } - -done: - mutex_unlock(&queue->lock); - return mask; -} diff --git a/drivers/media/platform/omap3isp/ispqueue.h b/drivers/media/platform/omap3isp/ispqueue.h deleted file mode 100644 index 3e048ad6564..00000000000 --- a/drivers/media/platform/omap3isp/ispqueue.h +++ /dev/null @@ -1,188 +0,0 @@ -/* - * ispqueue.h - * - * TI OMAP3 ISP - Video buffers queue handling - * - * Copyright (C) 2010 Nokia Corporation - * - * Contacts: Laurent Pinchart <laurent.pinchart@ideasonboard.com> - * Sakari Ailus <sakari.ailus@iki.fi> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA - * 02110-1301 USA - */ - -#ifndef OMAP3_ISP_QUEUE_H -#define OMAP3_ISP_QUEUE_H - -#include <linux/kernel.h> -#include <linux/list.h> -#include <linux/mm_types.h> -#include <linux/mutex.h> -#include <linux/videodev2.h> -#include <linux/wait.h> - -struct isp_video_queue; -struct page; -struct scatterlist; - -#define ISP_VIDEO_MAX_BUFFERS 16 - -/** - * enum isp_video_buffer_state - ISP video buffer state - * @ISP_BUF_STATE_IDLE: The buffer is under userspace control (dequeued - * or not queued yet). - * @ISP_BUF_STATE_QUEUED: The buffer has been queued but isn't used by the - * device yet. - * @ISP_BUF_STATE_ACTIVE: The buffer is in use for an active video transfer. - * @ISP_BUF_STATE_ERROR: The device is done with the buffer and an error - * occurred. For capture device the buffer likely contains corrupted data or - * no data at all. - * @ISP_BUF_STATE_DONE: The device is done with the buffer and no error occurred. - * For capture devices the buffer contains valid data. - */ -enum isp_video_buffer_state { - ISP_BUF_STATE_IDLE, - ISP_BUF_STATE_QUEUED, - ISP_BUF_STATE_ACTIVE, - ISP_BUF_STATE_ERROR, - ISP_BUF_STATE_DONE, -}; - -/** - * struct isp_video_buffer - ISP video buffer - * @vma_use_count: Number of times the buffer is mmap'ed to userspace - * @stream: List head for insertion into main queue - * @queue: ISP buffers queue this buffer belongs to - * @prepared: Whether the buffer has been prepared - * @skip_cache: Whether to skip cache management operations for this buffer - * @vaddr: Memory virtual address (for kernel buffers) - * @vm_flags: Buffer VMA flags (for userspace buffers) - * @offset: Offset inside the first page (for userspace buffers) - * @npages: Number of pages (for userspace buffers) - * @pages: Pages table (for userspace non-VM_PFNMAP buffers) - * @paddr: Memory physical address (for userspace VM_PFNMAP buffers) - * @sglen: Number of elements in the scatter list (for non-VM_PFNMAP buffers) - * @sglist: Scatter list (for non-VM_PFNMAP buffers) - * @vbuf: V4L2 buffer - * @irqlist: List head for insertion into IRQ queue - * @state: Current buffer state - * @wait: Wait queue to signal buffer completion - */ -struct isp_video_buffer { - unsigned long vma_use_count; - struct list_head stream; - struct isp_video_queue *queue; - unsigned int prepared:1; - bool skip_cache; - - /* For kernel buffers. */ - void *vaddr; - - /* For userspace buffers. */ - vm_flags_t vm_flags; - unsigned long offset; - unsigned int npages; - struct page **pages; - dma_addr_t paddr; - - /* For all buffers except VM_PFNMAP. */ - unsigned int sglen; - struct scatterlist *sglist; - - /* Touched by the interrupt handler. */ - struct v4l2_buffer vbuf; - struct list_head irqlist; - enum isp_video_buffer_state state; - wait_queue_head_t wait; -}; - -#define to_isp_video_buffer(vb) container_of(vb, struct isp_video_buffer, vb) - -/** - * struct isp_video_queue_operations - Driver-specific operations - * @queue_prepare: Called before allocating buffers. Drivers should clamp the - * number of buffers according to their requirements, and must return the - * buffer size in bytes. - * @buffer_prepare: Called the first time a buffer is queued, or after changing - * the userspace memory address for a USERPTR buffer, with the queue lock - * held. Drivers should perform device-specific buffer preparation (such as - * mapping the buffer memory in an IOMMU). This operation is optional. - * @buffer_queue: Called when a buffer is being added to the queue with the - * queue irqlock spinlock held. - * @buffer_cleanup: Called before freeing buffers, or before changing the - * userspace memory address for a USERPTR buffer, with the queue lock held. - * Drivers must perform cleanup operations required to undo the - * buffer_prepare call. This operation is optional. - */ -struct isp_video_queue_operations { - void (*queue_prepare)(struct isp_video_queue *queue, - unsigned int *nbuffers, unsigned int *size); - int (*buffer_prepare)(struct isp_video_buffer *buf); - void (*buffer_queue)(struct isp_video_buffer *buf); - void (*buffer_cleanup)(struct isp_video_buffer *buf); -}; - -/** - * struct isp_video_queue - ISP video buffers queue - * @type: Type of video buffers handled by this queue - * @ops: Queue operations - * @dev: Device used for DMA operations - * @bufsize: Size of a driver-specific buffer object - * @count: Number of currently allocated buffers - * @buffers: ISP video buffers - * @lock: Mutex to protect access to the buffers, main queue and state - * @irqlock: Spinlock to protect access to the IRQ queue - * @streaming: Queue state, indicates whether the queue is streaming - * @queue: List of all queued buffers - */ -struct isp_video_queue { - enum v4l2_buf_type type; - const struct isp_video_queue_operations *ops; - struct device *dev; - unsigned int bufsize; - - unsigned int count; - struct isp_video_buffer *buffers[ISP_VIDEO_MAX_BUFFERS]; - struct mutex lock; - spinlock_t irqlock; - - unsigned int streaming:1; - - struct list_head queue; -}; - -int omap3isp_video_queue_cleanup(struct isp_video_queue *queue); -int omap3isp_video_queue_init(struct isp_video_queue *queue, - enum v4l2_buf_type type, - const struct isp_video_queue_operations *ops, - struct device *dev, unsigned int bufsize); - -int omap3isp_video_queue_reqbufs(struct isp_video_queue *queue, - struct v4l2_requestbuffers *rb); -int omap3isp_video_queue_querybuf(struct isp_video_queue *queue, - struct v4l2_buffer *vbuf); -int omap3isp_video_queue_qbuf(struct isp_video_queue *queue, - struct v4l2_buffer *vbuf); -int omap3isp_video_queue_dqbuf(struct isp_video_queue *queue, - struct v4l2_buffer *vbuf, int nonblocking); -int omap3isp_video_queue_streamon(struct isp_video_queue *queue); -void omap3isp_video_queue_streamoff(struct isp_video_queue *queue); -void omap3isp_video_queue_discard_done(struct isp_video_queue *queue); -int omap3isp_video_queue_mmap(struct isp_video_queue *queue, - struct vm_area_struct *vma); -unsigned int omap3isp_video_queue_poll(struct isp_video_queue *queue, - struct file *file, poll_table *wait); - -#endif /* OMAP3_ISP_QUEUE_H */ diff --git a/drivers/media/platform/omap3isp/ispresizer.c b/drivers/media/platform/omap3isp/ispresizer.c index 86369df81d7..6f077c2377d 100644 --- a/drivers/media/platform/omap3isp/ispresizer.c +++ b/drivers/media/platform/omap3isp/ispresizer.c @@ -1040,7 +1040,7 @@ static void resizer_isr_buffer(struct isp_res_device *res) */ buffer = omap3isp_video_buffer_next(&res->video_out); if (buffer != NULL) { - resizer_set_outaddr(res, buffer->isp_addr); + resizer_set_outaddr(res, buffer->dma); restart = 1; } @@ -1049,7 +1049,7 @@ static void resizer_isr_buffer(struct isp_res_device *res) if (res->input == RESIZER_INPUT_MEMORY) { buffer = omap3isp_video_buffer_next(&res->video_in); if (buffer != NULL) - resizer_set_inaddr(res, buffer->isp_addr); + resizer_set_inaddr(res, buffer->dma); pipe->state |= ISP_PIPELINE_IDLE_INPUT; } @@ -1101,7 +1101,7 @@ static int resizer_video_queue(struct isp_video *video, struct isp_res_device *res = &video->isp->isp_res; if (video->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) - resizer_set_inaddr(res, buffer->isp_addr); + resizer_set_inaddr(res, buffer->dma); /* * We now have a buffer queued on the output. Despite what the @@ -1116,7 +1116,7 @@ static int resizer_video_queue(struct isp_video *video, * continuous mode or when starting the stream. */ if (video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) - resizer_set_outaddr(res, buffer->isp_addr); + resizer_set_outaddr(res, buffer->dma); return 0; } diff --git a/drivers/media/platform/omap3isp/ispstat.c b/drivers/media/platform/omap3isp/ispstat.c index 5707f85c4cc..e6cbc1eaf4c 100644 --- a/drivers/media/platform/omap3isp/ispstat.c +++ b/drivers/media/platform/omap3isp/ispstat.c @@ -26,13 +26,12 @@ */ #include <linux/dma-mapping.h> -#include <linux/omap-iommu.h> #include <linux/slab.h> #include <linux/uaccess.h> #include "isp.h" -#define IS_COHERENT_BUF(stat) ((stat)->dma_ch >= 0) +#define ISP_STAT_USES_DMAENGINE(stat) ((stat)->dma_ch >= 0) /* * MAGIC_SIZE must always be the greatest common divisor of @@ -77,21 +76,10 @@ static void __isp_stat_buf_sync_magic(struct ispstat *stat, dma_addr_t, unsigned long, size_t, enum dma_data_direction)) { - struct device *dev = stat->isp->dev; - struct page *pg; - dma_addr_t dma_addr; - u32 offset; - - /* Initial magic words */ - pg = vmalloc_to_page(buf->virt_addr); - dma_addr = pfn_to_dma(dev, page_to_pfn(pg)); - dma_sync(dev, dma_addr, 0, MAGIC_SIZE, dir); - - /* Final magic words */ - pg = vmalloc_to_page(buf->virt_addr + buf_size); - dma_addr = pfn_to_dma(dev, page_to_pfn(pg)); - offset = ((u32)buf->virt_addr + buf_size) & ~PAGE_MASK; - dma_sync(dev, dma_addr, offset, MAGIC_SIZE, dir); + /* Sync the initial and final magic words. */ + dma_sync(stat->isp->dev, buf->dma_addr, 0, MAGIC_SIZE, dir); + dma_sync(stat->isp->dev, buf->dma_addr + (buf_size & PAGE_MASK), + buf_size & ~PAGE_MASK, MAGIC_SIZE, dir); } static void isp_stat_buf_sync_magic_for_device(struct ispstat *stat, @@ -99,7 +87,7 @@ static void isp_stat_buf_sync_magic_for_device(struct ispstat *stat, u32 buf_size, enum dma_data_direction dir) { - if (IS_COHERENT_BUF(stat)) + if (ISP_STAT_USES_DMAENGINE(stat)) return; __isp_stat_buf_sync_magic(stat, buf, buf_size, dir, @@ -111,7 +99,7 @@ static void isp_stat_buf_sync_magic_for_cpu(struct ispstat *stat, u32 buf_size, enum dma_data_direction dir) { - if (IS_COHERENT_BUF(stat)) + if (ISP_STAT_USES_DMAENGINE(stat)) return; __isp_stat_buf_sync_magic(stat, buf, buf_size, dir, @@ -180,21 +168,21 @@ static void isp_stat_buf_insert_magic(struct ispstat *stat, static void isp_stat_buf_sync_for_device(struct ispstat *stat, struct ispstat_buffer *buf) { - if (IS_COHERENT_BUF(stat)) + if (ISP_STAT_USES_DMAENGINE(stat)) return; - dma_sync_sg_for_device(stat->isp->dev, buf->iovm->sgt->sgl, - buf->iovm->sgt->nents, DMA_FROM_DEVICE); + dma_sync_sg_for_device(stat->isp->dev, buf->sgt.sgl, + buf->sgt.nents, DMA_FROM_DEVICE); } static void isp_stat_buf_sync_for_cpu(struct ispstat *stat, struct ispstat_buffer *buf) { - if (IS_COHERENT_BUF(stat)) + if (ISP_STAT_USES_DMAENGINE(stat)) return; - dma_sync_sg_for_cpu(stat->isp->dev, buf->iovm->sgt->sgl, - buf->iovm->sgt->nents, DMA_FROM_DEVICE); + dma_sync_sg_for_cpu(stat->isp->dev, buf->sgt.sgl, + buf->sgt.nents, DMA_FROM_DEVICE); } static void isp_stat_buf_clear(struct ispstat *stat) @@ -354,29 +342,21 @@ static struct ispstat_buffer *isp_stat_buf_get(struct ispstat *stat, static void isp_stat_bufs_free(struct ispstat *stat) { - struct isp_device *isp = stat->isp; - int i; + struct device *dev = ISP_STAT_USES_DMAENGINE(stat) + ? NULL : stat->isp->dev; + unsigned int i; for (i = 0; i < STAT_MAX_BUFS; i++) { struct ispstat_buffer *buf = &stat->buf[i]; - if (!IS_COHERENT_BUF(stat)) { - if (IS_ERR_OR_NULL((void *)buf->iommu_addr)) - continue; - if (buf->iovm) - dma_unmap_sg(isp->dev, buf->iovm->sgt->sgl, - buf->iovm->sgt->nents, - DMA_FROM_DEVICE); - omap_iommu_vfree(isp->domain, isp->dev, - buf->iommu_addr); - } else { - if (!buf->virt_addr) - continue; - dma_free_coherent(stat->isp->dev, stat->buf_alloc_size, - buf->virt_addr, buf->dma_addr); - } - buf->iommu_addr = 0; - buf->iovm = NULL; + if (!buf->virt_addr) + continue; + + sg_free_table(&buf->sgt); + + dma_free_coherent(dev, stat->buf_alloc_size, buf->virt_addr, + buf->dma_addr); + buf->dma_addr = 0; buf->virt_addr = NULL; buf->empty = 1; @@ -389,83 +369,51 @@ static void isp_stat_bufs_free(struct ispstat *stat) stat->active_buf = NULL; } -static int isp_stat_bufs_alloc_iommu(struct ispstat *stat, unsigned int size) -{ - struct isp_device *isp = stat->isp; - int i; - - stat->buf_alloc_size = size; - - for (i = 0; i < STAT_MAX_BUFS; i++) { - struct ispstat_buffer *buf = &stat->buf[i]; - struct iovm_struct *iovm; - - WARN_ON(buf->dma_addr); - buf->iommu_addr = omap_iommu_vmalloc(isp->domain, isp->dev, 0, - size, IOMMU_FLAG); - if (IS_ERR((void *)buf->iommu_addr)) { - dev_err(stat->isp->dev, - "%s: Can't acquire memory for " - "buffer %d\n", stat->subdev.name, i); - isp_stat_bufs_free(stat); - return -ENOMEM; - } - - iovm = omap_find_iovm_area(isp->dev, buf->iommu_addr); - if (!iovm || - !dma_map_sg(isp->dev, iovm->sgt->sgl, iovm->sgt->nents, - DMA_FROM_DEVICE)) { - isp_stat_bufs_free(stat); - return -ENOMEM; - } - buf->iovm = iovm; - - buf->virt_addr = omap_da_to_va(stat->isp->dev, - (u32)buf->iommu_addr); - buf->empty = 1; - dev_dbg(stat->isp->dev, "%s: buffer[%d] allocated." - "iommu_addr=0x%08lx virt_addr=0x%08lx", - stat->subdev.name, i, buf->iommu_addr, - (unsigned long)buf->virt_addr); - } - - return 0; -} - -static int isp_stat_bufs_alloc_dma(struct ispstat *stat, unsigned int size) +static int isp_stat_bufs_alloc_one(struct device *dev, + struct ispstat_buffer *buf, + unsigned int size) { - int i; - - stat->buf_alloc_size = size; - - for (i = 0; i < STAT_MAX_BUFS; i++) { - struct ispstat_buffer *buf = &stat->buf[i]; - - WARN_ON(buf->iommu_addr); - buf->virt_addr = dma_alloc_coherent(stat->isp->dev, size, - &buf->dma_addr, GFP_KERNEL | GFP_DMA); + int ret; - if (!buf->virt_addr || !buf->dma_addr) { - dev_info(stat->isp->dev, - "%s: Can't acquire memory for " - "DMA buffer %d\n", stat->subdev.name, i); - isp_stat_bufs_free(stat); - return -ENOMEM; - } - buf->empty = 1; + buf->virt_addr = dma_alloc_coherent(dev, size, &buf->dma_addr, + GFP_KERNEL | GFP_DMA); + if (!buf->virt_addr) + return -ENOMEM; - dev_dbg(stat->isp->dev, "%s: buffer[%d] allocated." - "dma_addr=0x%08lx virt_addr=0x%08lx\n", - stat->subdev.name, i, (unsigned long)buf->dma_addr, - (unsigned long)buf->virt_addr); + ret = dma_get_sgtable(dev, &buf->sgt, buf->virt_addr, buf->dma_addr, + size); + if (ret < 0) { + dma_free_coherent(dev, size, buf->virt_addr, buf->dma_addr); + buf->virt_addr = NULL; + buf->dma_addr = 0; + return ret; } return 0; } +/* + * The device passed to the DMA API depends on whether the statistics block uses + * ISP DMA, external DMA or PIO to transfer data. + * + * The first case (for the AEWB and AF engines) passes the ISP device, resulting + * in the DMA buffers being mapped through the ISP IOMMU. + * + * The second case (for the histogram engine) should pass the DMA engine device. + * As that device isn't accessible through the OMAP DMA engine API the driver + * passes NULL instead, resulting in the buffers being mapped directly as + * physical pages. + * + * The third case (for the histogram engine) doesn't require any mapping. The + * buffers could be allocated with kmalloc/vmalloc, but we still use + * dma_alloc_coherent() for consistency purpose. + */ static int isp_stat_bufs_alloc(struct ispstat *stat, u32 size) { + struct device *dev = ISP_STAT_USES_DMAENGINE(stat) + ? NULL : stat->isp->dev; unsigned long flags; + unsigned int i; spin_lock_irqsave(&stat->isp->stat_lock, flags); @@ -489,10 +437,31 @@ static int isp_stat_bufs_alloc(struct ispstat *stat, u32 size) isp_stat_bufs_free(stat); - if (IS_COHERENT_BUF(stat)) - return isp_stat_bufs_alloc_dma(stat, size); - else - return isp_stat_bufs_alloc_iommu(stat, size); + stat->buf_alloc_size = size; + + for (i = 0; i < STAT_MAX_BUFS; i++) { + struct ispstat_buffer *buf = &stat->buf[i]; + int ret; + + ret = isp_stat_bufs_alloc_one(dev, buf, size); + if (ret < 0) { + dev_err(stat->isp->dev, + "%s: Failed to allocate DMA buffer %u\n", + stat->subdev.name, i); + isp_stat_bufs_free(stat); + return ret; + } + + buf->empty = 1; + + dev_dbg(stat->isp->dev, + "%s: buffer[%u] allocated. dma=0x%08lx virt=0x%08lx", + stat->subdev.name, i, + (unsigned long)buf->dma_addr, + (unsigned long)buf->virt_addr); + } + + return 0; } static void isp_stat_queue_event(struct ispstat *stat, int err) diff --git a/drivers/media/platform/omap3isp/ispstat.h b/drivers/media/platform/omap3isp/ispstat.h index 9a047c929b9..58d6ac7cb66 100644 --- a/drivers/media/platform/omap3isp/ispstat.h +++ b/drivers/media/platform/omap3isp/ispstat.h @@ -46,8 +46,7 @@ struct ispstat; struct ispstat_buffer { - unsigned long iommu_addr; - struct iovm_struct *iovm; + struct sg_table sgt; void *virt_addr; dma_addr_t dma_addr; struct timespec ts; diff --git a/drivers/media/platform/omap3isp/ispvideo.c b/drivers/media/platform/omap3isp/ispvideo.c index 85b4036ba5e..e36bac26476 100644 --- a/drivers/media/platform/omap3isp/ispvideo.c +++ b/drivers/media/platform/omap3isp/ispvideo.c @@ -27,7 +27,6 @@ #include <linux/clk.h> #include <linux/mm.h> #include <linux/module.h> -#include <linux/omap-iommu.h> #include <linux/pagemap.h> #include <linux/scatterlist.h> #include <linux/sched.h> @@ -35,6 +34,7 @@ #include <linux/vmalloc.h> #include <media/v4l2-dev.h> #include <media/v4l2-ioctl.h> +#include <media/videobuf2-dma-contig.h> #include "ispvideo.h" #include "isp.h" @@ -326,90 +326,36 @@ isp_video_check_format(struct isp_video *video, struct isp_video_fh *vfh) } /* ----------------------------------------------------------------------------- - * IOMMU management - */ - -#define IOMMU_FLAG (IOVMF_ENDIAN_LITTLE | IOVMF_ELSZ_8) - -/* - * ispmmu_vmap - Wrapper for Virtual memory mapping of a scatter gather list - * @isp: Device pointer specific to the OMAP3 ISP. - * @sglist: Pointer to source Scatter gather list to allocate. - * @sglen: Number of elements of the scatter-gatter list. - * - * Returns a resulting mapped device address by the ISP MMU, or -ENOMEM if - * we ran out of memory. - */ -static dma_addr_t -ispmmu_vmap(struct isp_device *isp, const struct scatterlist *sglist, int sglen) -{ - struct sg_table *sgt; - u32 da; - - sgt = kmalloc(sizeof(*sgt), GFP_KERNEL); - if (sgt == NULL) - return -ENOMEM; - - sgt->sgl = (struct scatterlist *)sglist; - sgt->nents = sglen; - sgt->orig_nents = sglen; - - da = omap_iommu_vmap(isp->domain, isp->dev, 0, sgt, IOMMU_FLAG); - if (IS_ERR_VALUE(da)) - kfree(sgt); - - return da; -} - -/* - * ispmmu_vunmap - Unmap a device address from the ISP MMU - * @isp: Device pointer specific to the OMAP3 ISP. - * @da: Device address generated from a ispmmu_vmap call. - */ -static void ispmmu_vunmap(struct isp_device *isp, dma_addr_t da) -{ - struct sg_table *sgt; - - sgt = omap_iommu_vunmap(isp->domain, isp->dev, (u32)da); - kfree(sgt); -} - -/* ----------------------------------------------------------------------------- * Video queue operations */ -static void isp_video_queue_prepare(struct isp_video_queue *queue, - unsigned int *nbuffers, unsigned int *size) +static int isp_video_queue_setup(struct vb2_queue *queue, + const struct v4l2_format *fmt, + unsigned int *count, unsigned int *num_planes, + unsigned int sizes[], void *alloc_ctxs[]) { - struct isp_video_fh *vfh = - container_of(queue, struct isp_video_fh, queue); + struct isp_video_fh *vfh = vb2_get_drv_priv(queue); struct isp_video *video = vfh->video; - *size = vfh->format.fmt.pix.sizeimage; - if (*size == 0) - return; + *num_planes = 1; - *nbuffers = min(*nbuffers, video->capture_mem / PAGE_ALIGN(*size)); -} + sizes[0] = vfh->format.fmt.pix.sizeimage; + if (sizes[0] == 0) + return -EINVAL; -static void isp_video_buffer_cleanup(struct isp_video_buffer *buf) -{ - struct isp_video_fh *vfh = isp_video_queue_to_isp_video_fh(buf->queue); - struct isp_buffer *buffer = to_isp_buffer(buf); - struct isp_video *video = vfh->video; + alloc_ctxs[0] = video->alloc_ctx; - if (buffer->isp_addr) { - ispmmu_vunmap(video->isp, buffer->isp_addr); - buffer->isp_addr = 0; - } + *count = min(*count, video->capture_mem / PAGE_ALIGN(sizes[0])); + + return 0; } -static int isp_video_buffer_prepare(struct isp_video_buffer *buf) +static int isp_video_buffer_prepare(struct vb2_buffer *buf) { - struct isp_video_fh *vfh = isp_video_queue_to_isp_video_fh(buf->queue); + struct isp_video_fh *vfh = vb2_get_drv_priv(buf->vb2_queue); struct isp_buffer *buffer = to_isp_buffer(buf); struct isp_video *video = vfh->video; - unsigned long addr; + dma_addr_t addr; /* Refuse to prepare the buffer is the video node has registered an * error. We don't need to take any lock here as the operation is @@ -420,19 +366,16 @@ static int isp_video_buffer_prepare(struct isp_video_buffer *buf) if (unlikely(video->error)) return -EIO; - addr = ispmmu_vmap(video->isp, buf->sglist, buf->sglen); - if (IS_ERR_VALUE(addr)) - return -EIO; - + addr = vb2_dma_contig_plane_dma_addr(buf, 0); if (!IS_ALIGNED(addr, 32)) { - dev_dbg(video->isp->dev, "Buffer address must be " - "aligned to 32 bytes boundary.\n"); - ispmmu_vunmap(video->isp, buffer->isp_addr); + dev_dbg(video->isp->dev, + "Buffer address must be aligned to 32 bytes boundary.\n"); return -EINVAL; } - buf->vbuf.bytesused = vfh->format.fmt.pix.sizeimage; - buffer->isp_addr = addr; + vb2_set_plane_payload(&buffer->vb, 0, vfh->format.fmt.pix.sizeimage); + buffer->dma = addr; + return 0; } @@ -445,9 +388,9 @@ static int isp_video_buffer_prepare(struct isp_video_buffer *buf) * If the pipeline is busy, it will be restarted in the output module interrupt * handler. */ -static void isp_video_buffer_queue(struct isp_video_buffer *buf) +static void isp_video_buffer_queue(struct vb2_buffer *buf) { - struct isp_video_fh *vfh = isp_video_queue_to_isp_video_fh(buf->queue); + struct isp_video_fh *vfh = vb2_get_drv_priv(buf->vb2_queue); struct isp_buffer *buffer = to_isp_buffer(buf); struct isp_video *video = vfh->video; struct isp_pipeline *pipe = to_isp_pipeline(&video->video.entity); @@ -456,14 +399,18 @@ static void isp_video_buffer_queue(struct isp_video_buffer *buf) unsigned int empty; unsigned int start; + spin_lock_irqsave(&video->irqlock, flags); + if (unlikely(video->error)) { - buf->state = ISP_BUF_STATE_ERROR; - wake_up(&buf->wait); + vb2_buffer_done(&buffer->vb, VB2_BUF_STATE_ERROR); + spin_unlock_irqrestore(&video->irqlock, flags); return; } empty = list_empty(&video->dmaqueue); - list_add_tail(&buffer->buffer.irqlist, &video->dmaqueue); + list_add_tail(&buffer->irqlist, &video->dmaqueue); + + spin_unlock_irqrestore(&video->irqlock, flags); if (empty) { if (video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) @@ -487,23 +434,22 @@ static void isp_video_buffer_queue(struct isp_video_buffer *buf) } } -static const struct isp_video_queue_operations isp_video_queue_ops = { - .queue_prepare = &isp_video_queue_prepare, - .buffer_prepare = &isp_video_buffer_prepare, - .buffer_queue = &isp_video_buffer_queue, - .buffer_cleanup = &isp_video_buffer_cleanup, +static const struct vb2_ops isp_video_queue_ops = { + .queue_setup = isp_video_queue_setup, + .buf_prepare = isp_video_buffer_prepare, + .buf_queue = isp_video_buffer_queue, }; /* * omap3isp_video_buffer_next - Complete the current buffer and return the next * @video: ISP video object * - * Remove the current video buffer from the DMA queue and fill its timestamp, - * field count and state fields before waking up its completion handler. + * Remove the current video buffer from the DMA queue and fill its timestamp and + * field count before handing it back to videobuf2. * - * For capture video nodes the buffer state is set to ISP_BUF_STATE_DONE if no - * error has been flagged in the pipeline, or to ISP_BUF_STATE_ERROR otherwise. - * For video output nodes the buffer state is always set to ISP_BUF_STATE_DONE. + * For capture video nodes the buffer state is set to VB2_BUF_STATE_DONE if no + * error has been flagged in the pipeline, or to VB2_BUF_STATE_ERROR otherwise. + * For video output nodes the buffer state is always set to VB2_BUF_STATE_DONE. * * The DMA queue is expected to contain at least one buffer. * @@ -513,26 +459,25 @@ static const struct isp_video_queue_operations isp_video_queue_ops = { struct isp_buffer *omap3isp_video_buffer_next(struct isp_video *video) { struct isp_pipeline *pipe = to_isp_pipeline(&video->video.entity); - struct isp_video_queue *queue = video->queue; enum isp_pipeline_state state; - struct isp_video_buffer *buf; + struct isp_buffer *buf; unsigned long flags; struct timespec ts; - spin_lock_irqsave(&queue->irqlock, flags); + spin_lock_irqsave(&video->irqlock, flags); if (WARN_ON(list_empty(&video->dmaqueue))) { - spin_unlock_irqrestore(&queue->irqlock, flags); + spin_unlock_irqrestore(&video->irqlock, flags); return NULL; } - buf = list_first_entry(&video->dmaqueue, struct isp_video_buffer, + buf = list_first_entry(&video->dmaqueue, struct isp_buffer, irqlist); list_del(&buf->irqlist); - spin_unlock_irqrestore(&queue->irqlock, flags); + spin_unlock_irqrestore(&video->irqlock, flags); ktime_get_ts(&ts); - buf->vbuf.timestamp.tv_sec = ts.tv_sec; - buf->vbuf.timestamp.tv_usec = ts.tv_nsec / NSEC_PER_USEC; + buf->vb.v4l2_buf.timestamp.tv_sec = ts.tv_sec; + buf->vb.v4l2_buf.timestamp.tv_usec = ts.tv_nsec / NSEC_PER_USEC; /* Do frame number propagation only if this is the output video node. * Frame number either comes from the CSI receivers or it gets @@ -541,22 +486,27 @@ struct isp_buffer *omap3isp_video_buffer_next(struct isp_video *video) * first, so the input number might lag behind by 1 in some cases. */ if (video == pipe->output && !pipe->do_propagation) - buf->vbuf.sequence = atomic_inc_return(&pipe->frame_number); + buf->vb.v4l2_buf.sequence = + atomic_inc_return(&pipe->frame_number); else - buf->vbuf.sequence = atomic_read(&pipe->frame_number); + buf->vb.v4l2_buf.sequence = atomic_read(&pipe->frame_number); /* Report pipeline errors to userspace on the capture device side. */ - if (queue->type == V4L2_BUF_TYPE_VIDEO_CAPTURE && pipe->error) { - buf->state = ISP_BUF_STATE_ERROR; + if (video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE && pipe->error) { + state = VB2_BUF_STATE_ERROR; pipe->error = false; } else { - buf->state = ISP_BUF_STATE_DONE; + state = VB2_BUF_STATE_DONE; } - wake_up(&buf->wait); + vb2_buffer_done(&buf->vb, state); + + spin_lock_irqsave(&video->irqlock, flags); if (list_empty(&video->dmaqueue)) { - if (queue->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) + spin_unlock_irqrestore(&video->irqlock, flags); + + if (video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) state = ISP_PIPELINE_QUEUE_OUTPUT | ISP_PIPELINE_STREAM; else @@ -571,16 +521,19 @@ struct isp_buffer *omap3isp_video_buffer_next(struct isp_video *video) return NULL; } - if (queue->type == V4L2_BUF_TYPE_VIDEO_CAPTURE && pipe->input != NULL) { - spin_lock_irqsave(&pipe->lock, flags); + if (video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE && pipe->input != NULL) { + spin_lock(&pipe->lock); pipe->state &= ~ISP_PIPELINE_STREAM; - spin_unlock_irqrestore(&pipe->lock, flags); + spin_unlock(&pipe->lock); } - buf = list_first_entry(&video->dmaqueue, struct isp_video_buffer, + buf = list_first_entry(&video->dmaqueue, struct isp_buffer, irqlist); - buf->state = ISP_BUF_STATE_ACTIVE; - return to_isp_buffer(buf); + buf->vb.state = VB2_BUF_STATE_ACTIVE; + + spin_unlock_irqrestore(&video->irqlock, flags); + + return buf; } /* @@ -592,25 +545,22 @@ struct isp_buffer *omap3isp_video_buffer_next(struct isp_video *video) */ void omap3isp_video_cancel_stream(struct isp_video *video) { - struct isp_video_queue *queue = video->queue; unsigned long flags; - spin_lock_irqsave(&queue->irqlock, flags); + spin_lock_irqsave(&video->irqlock, flags); while (!list_empty(&video->dmaqueue)) { - struct isp_video_buffer *buf; + struct isp_buffer *buf; buf = list_first_entry(&video->dmaqueue, - struct isp_video_buffer, irqlist); + struct isp_buffer, irqlist); list_del(&buf->irqlist); - - buf->state = ISP_BUF_STATE_ERROR; - wake_up(&buf->wait); + vb2_buffer_done(&buf->vb, VB2_BUF_STATE_ERROR); } video->error = true; - spin_unlock_irqrestore(&queue->irqlock, flags); + spin_unlock_irqrestore(&video->irqlock, flags); } /* @@ -627,12 +577,15 @@ void omap3isp_video_resume(struct isp_video *video, int continuous) { struct isp_buffer *buf = NULL; - if (continuous && video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) - omap3isp_video_queue_discard_done(video->queue); + if (continuous && video->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) { + mutex_lock(&video->queue_lock); + vb2_discard_done(video->queue); + mutex_unlock(&video->queue_lock); + } if (!list_empty(&video->dmaqueue)) { buf = list_first_entry(&video->dmaqueue, - struct isp_buffer, buffer.irqlist); + struct isp_buffer, irqlist); video->ops->queue(video, buf); video->dmaqueue_flags |= ISP_VIDEO_DMAQUEUE_QUEUED; } else { @@ -840,33 +793,56 @@ static int isp_video_reqbufs(struct file *file, void *fh, struct v4l2_requestbuffers *rb) { struct isp_video_fh *vfh = to_isp_video_fh(fh); + struct isp_video *video = video_drvdata(file); + int ret; - return omap3isp_video_queue_reqbufs(&vfh->queue, rb); + mutex_lock(&video->queue_lock); + ret = vb2_reqbufs(&vfh->queue, rb); + mutex_unlock(&video->queue_lock); + + return ret; } static int isp_video_querybuf(struct file *file, void *fh, struct v4l2_buffer *b) { struct isp_video_fh *vfh = to_isp_video_fh(fh); + struct isp_video *video = video_drvdata(file); + int ret; + + mutex_lock(&video->queue_lock); + ret = vb2_querybuf(&vfh->queue, b); + mutex_unlock(&video->queue_lock); - return omap3isp_video_queue_querybuf(&vfh->queue, b); + return ret; } static int isp_video_qbuf(struct file *file, void *fh, struct v4l2_buffer *b) { struct isp_video_fh *vfh = to_isp_video_fh(fh); + struct isp_video *video = video_drvdata(file); + int ret; - return omap3isp_video_queue_qbuf(&vfh->queue, b); + mutex_lock(&video->queue_lock); + ret = vb2_qbuf(&vfh->queue, b); + mutex_unlock(&video->queue_lock); + + return ret; } static int isp_video_dqbuf(struct file *file, void *fh, struct v4l2_buffer *b) { struct isp_video_fh *vfh = to_isp_video_fh(fh); + struct isp_video *video = video_drvdata(file); + int ret; + + mutex_lock(&video->queue_lock); + ret = vb2_dqbuf(&vfh->queue, b, file->f_flags & O_NONBLOCK); + mutex_unlock(&video->queue_lock); - return omap3isp_video_queue_dqbuf(&vfh->queue, b, - file->f_flags & O_NONBLOCK); + return ret; } static int isp_video_check_external_subdevs(struct isp_video *video, @@ -1006,11 +982,6 @@ isp_video_streamon(struct file *file, void *fh, enum v4l2_buf_type type) mutex_lock(&video->stream_lock); - if (video->streaming) { - mutex_unlock(&video->stream_lock); - return -EBUSY; - } - /* Start streaming on the pipeline. No link touching an entity in the * pipeline can be activated or deactivated once streaming is started. */ @@ -1069,7 +1040,9 @@ isp_video_streamon(struct file *file, void *fh, enum v4l2_buf_type type) INIT_LIST_HEAD(&video->dmaqueue); atomic_set(&pipe->frame_number, -1); - ret = omap3isp_video_queue_streamon(&vfh->queue); + mutex_lock(&video->queue_lock); + ret = vb2_streamon(&vfh->queue, type); + mutex_unlock(&video->queue_lock); if (ret < 0) goto err_check_format; @@ -1082,19 +1055,19 @@ isp_video_streamon(struct file *file, void *fh, enum v4l2_buf_type type) ISP_PIPELINE_STREAM_CONTINUOUS); if (ret < 0) goto err_set_stream; - spin_lock_irqsave(&video->queue->irqlock, flags); + spin_lock_irqsave(&video->irqlock, flags); if (list_empty(&video->dmaqueue)) video->dmaqueue_flags |= ISP_VIDEO_DMAQUEUE_UNDERRUN; - spin_unlock_irqrestore(&video->queue->irqlock, flags); + spin_unlock_irqrestore(&video->irqlock, flags); } - video->streaming = 1; - mutex_unlock(&video->stream_lock); return 0; err_set_stream: - omap3isp_video_queue_streamoff(&vfh->queue); + mutex_lock(&video->queue_lock); + vb2_streamoff(&vfh->queue, type); + mutex_unlock(&video->queue_lock); err_check_format: media_entity_pipeline_stop(&video->video.entity); err_pipeline_start: @@ -1130,9 +1103,9 @@ isp_video_streamoff(struct file *file, void *fh, enum v4l2_buf_type type) mutex_lock(&video->stream_lock); /* Make sure we're not streaming yet. */ - mutex_lock(&vfh->queue.lock); - streaming = vfh->queue.streaming; - mutex_unlock(&vfh->queue.lock); + mutex_lock(&video->queue_lock); + streaming = vb2_is_streaming(&vfh->queue); + mutex_unlock(&video->queue_lock); if (!streaming) goto done; @@ -1151,9 +1124,12 @@ isp_video_streamoff(struct file *file, void *fh, enum v4l2_buf_type type) /* Stop the stream. */ omap3isp_pipeline_set_stream(pipe, ISP_PIPELINE_STREAM_STOPPED); - omap3isp_video_queue_streamoff(&vfh->queue); + omap3isp_video_cancel_stream(video); + + mutex_lock(&video->queue_lock); + vb2_streamoff(&vfh->queue, type); + mutex_unlock(&video->queue_lock); video->queue = NULL; - video->streaming = 0; video->error = false; if (video->isp->pdata->set_constraints) @@ -1223,6 +1199,7 @@ static int isp_video_open(struct file *file) { struct isp_video *video = video_drvdata(file); struct isp_video_fh *handle; + struct vb2_queue *queue; int ret = 0; handle = kzalloc(sizeof(*handle), GFP_KERNEL); @@ -1244,9 +1221,20 @@ static int isp_video_open(struct file *file) goto done; } - omap3isp_video_queue_init(&handle->queue, video->type, - &isp_video_queue_ops, video->isp->dev, - sizeof(struct isp_buffer)); + queue = &handle->queue; + queue->type = video->type; + queue->io_modes = VB2_MMAP | VB2_USERPTR; + queue->drv_priv = handle; + queue->ops = &isp_video_queue_ops; + queue->mem_ops = &vb2_dma_contig_memops; + queue->buf_struct_size = sizeof(struct isp_buffer); + queue->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; + + ret = vb2_queue_init(&handle->queue); + if (ret < 0) { + omap3isp_put(video->isp); + goto done; + } memset(&handle->format, 0, sizeof(handle->format)); handle->format.type = video->type; @@ -1273,9 +1261,9 @@ static int isp_video_release(struct file *file) /* Disable streaming and free the buffers queue resources. */ isp_video_streamoff(file, vfh, video->type); - mutex_lock(&handle->queue.lock); - omap3isp_video_queue_cleanup(&handle->queue); - mutex_unlock(&handle->queue.lock); + mutex_lock(&video->queue_lock); + vb2_queue_release(&handle->queue); + mutex_unlock(&video->queue_lock); omap3isp_pipeline_pm_use(&video->video.entity, 0); @@ -1292,16 +1280,27 @@ static int isp_video_release(struct file *file) static unsigned int isp_video_poll(struct file *file, poll_table *wait) { struct isp_video_fh *vfh = to_isp_video_fh(file->private_data); - struct isp_video_queue *queue = &vfh->queue; + struct isp_video *video = video_drvdata(file); + int ret; - return omap3isp_video_queue_poll(queue, file, wait); + mutex_lock(&video->queue_lock); + ret = vb2_poll(&vfh->queue, file, wait); + mutex_unlock(&video->queue_lock); + + return ret; } static int isp_video_mmap(struct file *file, struct vm_area_struct *vma) { struct isp_video_fh *vfh = to_isp_video_fh(file->private_data); + struct isp_video *video = video_drvdata(file); + int ret; + + mutex_lock(&video->queue_lock); + ret = vb2_mmap(&vfh->queue, vma); + mutex_unlock(&video->queue_lock); - return omap3isp_video_queue_mmap(&vfh->queue, vma); + return ret; } static struct v4l2_file_operations isp_video_fops = { @@ -1342,15 +1341,23 @@ int omap3isp_video_init(struct isp_video *video, const char *name) return -EINVAL; } + video->alloc_ctx = vb2_dma_contig_init_ctx(video->isp->dev); + if (IS_ERR(video->alloc_ctx)) + return PTR_ERR(video->alloc_ctx); + ret = media_entity_init(&video->video.entity, 1, &video->pad, 0); - if (ret < 0) + if (ret < 0) { + vb2_dma_contig_cleanup_ctx(video->alloc_ctx); return ret; + } mutex_init(&video->mutex); atomic_set(&video->active, 0); spin_lock_init(&video->pipe.lock); mutex_init(&video->stream_lock); + mutex_init(&video->queue_lock); + spin_lock_init(&video->irqlock); /* Initialize the video device. */ if (video->ops == NULL) @@ -1371,7 +1378,9 @@ int omap3isp_video_init(struct isp_video *video, const char *name) void omap3isp_video_cleanup(struct isp_video *video) { + vb2_dma_contig_cleanup_ctx(video->alloc_ctx); media_entity_cleanup(&video->video.entity); + mutex_destroy(&video->queue_lock); mutex_destroy(&video->stream_lock); mutex_destroy(&video->mutex); } diff --git a/drivers/media/platform/omap3isp/ispvideo.h b/drivers/media/platform/omap3isp/ispvideo.h index 4e194076cc6..7d2e82122ec 100644 --- a/drivers/media/platform/omap3isp/ispvideo.h +++ b/drivers/media/platform/omap3isp/ispvideo.h @@ -30,8 +30,7 @@ #include <media/media-entity.h> #include <media/v4l2-dev.h> #include <media/v4l2-fh.h> - -#include "ispqueue.h" +#include <media/videobuf2-core.h> #define ISP_VIDEO_DRIVER_NAME "ispvideo" #define ISP_VIDEO_DRIVER_VERSION "0.0.2" @@ -124,17 +123,19 @@ static inline int isp_pipeline_ready(struct isp_pipeline *pipe) ISP_PIPELINE_IDLE_OUTPUT); } -/* - * struct isp_buffer - ISP buffer - * @buffer: ISP video buffer - * @isp_addr: MMU mapped address (a.k.a. device address) of the buffer. +/** + * struct isp_buffer - ISP video buffer + * @vb: videobuf2 buffer + * @irqlist: List head for insertion into IRQ queue + * @dma: DMA address */ struct isp_buffer { - struct isp_video_buffer buffer; - dma_addr_t isp_addr; + struct vb2_buffer vb; + struct list_head irqlist; + dma_addr_t dma; }; -#define to_isp_buffer(buf) container_of(buf, struct isp_buffer, buffer) +#define to_isp_buffer(buf) container_of(buf, struct isp_buffer, vb) enum isp_video_dmaqueue_flags { /* Set if DMA queue becomes empty when ISP_PIPELINE_STREAM_CONTINUOUS */ @@ -172,16 +173,16 @@ struct isp_video { unsigned int bpl_value; /* bytes per line value */ unsigned int bpl_padding; /* padding at end of line */ - /* Entity video node streaming */ - unsigned int streaming:1; - /* Pipeline state */ struct isp_pipeline pipe; struct mutex stream_lock; /* pipeline and stream states */ bool error; /* Video buffers queue */ - struct isp_video_queue *queue; + void *alloc_ctx; + struct vb2_queue *queue; + struct mutex queue_lock; /* protects the queue */ + spinlock_t irqlock; /* protects dmaqueue */ struct list_head dmaqueue; enum isp_video_dmaqueue_flags dmaqueue_flags; @@ -193,7 +194,7 @@ struct isp_video { struct isp_video_fh { struct v4l2_fh vfh; struct isp_video *video; - struct isp_video_queue queue; + struct vb2_queue queue; struct v4l2_format format; struct v4l2_fract timeperframe; }; diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c index 349e659d75f..7c4489c4236 100644 --- a/drivers/media/v4l2-core/videobuf2-core.c +++ b/drivers/media/v4l2-core/videobuf2-core.c @@ -1200,6 +1200,30 @@ void vb2_buffer_done(struct vb2_buffer *vb, enum vb2_buffer_state state) EXPORT_SYMBOL_GPL(vb2_buffer_done); /** + * vb2_discard_done() - discard all buffers marked as DONE + * @q: videobuf2 queue + * + * This function is intended to be used with suspend/resume operations. It + * discards all 'done' buffers as they would be too old to be requested after + * resume. + * + * Drivers must stop the hardware and synchronize with interrupt handlers and/or + * delayed works before calling this function to make sure no buffer will be + * touched by the driver and/or hardware. + */ +void vb2_discard_done(struct vb2_queue *q) +{ + struct vb2_buffer *vb; + unsigned long flags; + + spin_lock_irqsave(&q->done_lock, flags); + list_for_each_entry(vb, &q->done_list, done_entry) + vb->state = VB2_BUF_STATE_ERROR; + spin_unlock_irqrestore(&q->done_lock, flags); +} +EXPORT_SYMBOL_GPL(vb2_discard_done); + +/** * __fill_vb2_buffer() - fill a vb2_buffer with information provided in a * v4l2_buffer by the userspace. The caller has already verified that struct * v4l2_buffer has a valid number of planes. diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 02832d64d91..baca5897039 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -1773,6 +1773,7 @@ config SCSI_BFA_FC config SCSI_VIRTIO tristate "virtio-scsi support" depends on VIRTIO + select BLK_DEV_INTEGRITY help This is the virtual HBA driver for virtio. If the kernel will be used in a virtual machine, say Y or M. diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 5858600bfe5..31184b35370 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -48,6 +48,7 @@ #include <linux/bitmap.h> #include <linux/atomic.h> #include <linux/jiffies.h> +#include <linux/percpu.h> #include <asm/div64.h> #include "hpsa_cmd.h" #include "hpsa.h" @@ -193,7 +194,8 @@ static int number_of_controllers; static irqreturn_t do_hpsa_intr_intx(int irq, void *dev_id); static irqreturn_t do_hpsa_intr_msi(int irq, void *dev_id); static int hpsa_ioctl(struct scsi_device *dev, int cmd, void *arg); -static void start_io(struct ctlr_info *h); +static void lock_and_start_io(struct ctlr_info *h); +static void start_io(struct ctlr_info *h, unsigned long *flags); #ifdef CONFIG_COMPAT static int hpsa_compat_ioctl(struct scsi_device *dev, int cmd, void *arg); @@ -695,7 +697,7 @@ static inline void addQ(struct list_head *list, struct CommandList *c) static inline u32 next_command(struct ctlr_info *h, u8 q) { u32 a; - struct reply_pool *rq = &h->reply_queue[q]; + struct reply_queue_buffer *rq = &h->reply_queue[q]; unsigned long flags; if (h->transMethod & CFGTBL_Trans_io_accel1) @@ -844,8 +846,8 @@ static void enqueue_cmd_and_start_io(struct ctlr_info *h, spin_lock_irqsave(&h->lock, flags); addQ(&h->reqQ, c); h->Qdepth++; + start_io(h, &flags); spin_unlock_irqrestore(&h->lock, flags); - start_io(h); } static inline void removeQ(struct CommandList *c) @@ -1554,9 +1556,13 @@ static int handle_ioaccel_mode2_error(struct ctlr_info *h, dev_warn(&h->pdev->dev, "%s: task complete with check condition.\n", "HP SSD Smart Path"); + cmd->result |= SAM_STAT_CHECK_CONDITION; if (c2->error_data.data_present != - IOACCEL2_SENSE_DATA_PRESENT) + IOACCEL2_SENSE_DATA_PRESENT) { + memset(cmd->sense_buffer, 0, + SCSI_SENSE_BUFFERSIZE); break; + } /* copy the sense data */ data_len = c2->error_data.sense_data_len; if (data_len > SCSI_SENSE_BUFFERSIZE) @@ -1566,7 +1572,6 @@ static int handle_ioaccel_mode2_error(struct ctlr_info *h, sizeof(c2->error_data.sense_data_buff); memcpy(cmd->sense_buffer, c2->error_data.sense_data_buff, data_len); - cmd->result |= SAM_STAT_CHECK_CONDITION; retry = 1; break; case IOACCEL2_STATUS_SR_TASK_COMP_BUSY: @@ -1651,16 +1656,6 @@ static void process_ioaccel2_completion(struct ctlr_info *h, if (is_logical_dev_addr_mode(dev->scsi3addr) && c2->error_data.serv_response == IOACCEL2_SERV_RESPONSE_FAILURE) { - if (c2->error_data.status == - IOACCEL2_STATUS_SR_IOACCEL_DISABLED) - dev_warn(&h->pdev->dev, - "%s: Path is unavailable, retrying on standard path.\n", - "HP SSD Smart Path"); - else - dev_warn(&h->pdev->dev, - "%s: Error 0x%02x, retrying on standard path.\n", - "HP SSD Smart Path", c2->error_data.status); - dev->offload_enabled = 0; h->drv_req_rescan = 1; /* schedule controller for a rescan */ cmd->result = DID_SOFT_ERROR << 16; @@ -1991,20 +1986,26 @@ static inline void hpsa_scsi_do_simple_cmd_core(struct ctlr_info *h, wait_for_completion(&wait); } +static u32 lockup_detected(struct ctlr_info *h) +{ + int cpu; + u32 rc, *lockup_detected; + + cpu = get_cpu(); + lockup_detected = per_cpu_ptr(h->lockup_detected, cpu); + rc = *lockup_detected; + put_cpu(); + return rc; +} + static void hpsa_scsi_do_simple_cmd_core_if_no_lockup(struct ctlr_info *h, struct CommandList *c) { - unsigned long flags; - /* If controller lockup detected, fake a hardware error. */ - spin_lock_irqsave(&h->lock, flags); - if (unlikely(h->lockup_detected)) { - spin_unlock_irqrestore(&h->lock, flags); + if (unlikely(lockup_detected(h))) c->err_info->CommandStatus = CMD_HARDWARE_ERR; - } else { - spin_unlock_irqrestore(&h->lock, flags); + else hpsa_scsi_do_simple_cmd_core(h, c); - } } #define MAX_DRIVER_CMD_RETRIES 25 @@ -2429,7 +2430,7 @@ static int hpsa_get_device_id(struct ctlr_info *h, unsigned char *scsi3addr, buflen = 16; buf = kzalloc(64, GFP_KERNEL); if (!buf) - return -1; + return -ENOMEM; rc = hpsa_scsi_do_inquiry(h, scsi3addr, VPD_PAGE | 0x83, buf, 64); if (rc == 0) memcpy(device_id, &buf[8], buflen); @@ -2515,27 +2516,21 @@ static int hpsa_get_volume_status(struct ctlr_info *h, return HPSA_VPD_LV_STATUS_UNSUPPORTED; /* Does controller have VPD for logical volume status? */ - if (!hpsa_vpd_page_supported(h, scsi3addr, HPSA_VPD_LV_STATUS)) { - dev_warn(&h->pdev->dev, "Logical volume status VPD page is unsupported.\n"); + if (!hpsa_vpd_page_supported(h, scsi3addr, HPSA_VPD_LV_STATUS)) goto exit_failed; - } /* Get the size of the VPD return buffer */ rc = hpsa_scsi_do_inquiry(h, scsi3addr, VPD_PAGE | HPSA_VPD_LV_STATUS, buf, HPSA_VPD_HEADER_SZ); - if (rc != 0) { - dev_warn(&h->pdev->dev, "Logical volume status VPD inquiry failed.\n"); + if (rc != 0) goto exit_failed; - } size = buf[3]; /* Now get the whole VPD buffer */ rc = hpsa_scsi_do_inquiry(h, scsi3addr, VPD_PAGE | HPSA_VPD_LV_STATUS, buf, size + HPSA_VPD_HEADER_SZ); - if (rc != 0) { - dev_warn(&h->pdev->dev, "Logical volume status VPD inquiry failed.\n"); + if (rc != 0) goto exit_failed; - } status = buf[4]; /* status byte */ kfree(buf); @@ -2548,11 +2543,11 @@ exit_failed: /* Determine offline status of a volume. * Return either: * 0 (not offline) - * -1 (offline for unknown reasons) + * 0xff (offline for unknown reasons) * # (integer code indicating one of several NOT READY states * describing why a volume is to be kept offline) */ -static unsigned char hpsa_volume_offline(struct ctlr_info *h, +static int hpsa_volume_offline(struct ctlr_info *h, unsigned char scsi3addr[]) { struct CommandList *c; @@ -2651,11 +2646,15 @@ static int hpsa_update_device_info(struct ctlr_info *h, if (this_device->devtype == TYPE_DISK && is_logical_dev_addr_mode(scsi3addr)) { + int volume_offline; + hpsa_get_raid_level(h, scsi3addr, &this_device->raid_level); if (h->fw_support & MISC_FW_RAID_OFFLOAD_BASIC) hpsa_get_ioaccel_status(h, scsi3addr, this_device); - this_device->volume_offline = - hpsa_volume_offline(h, scsi3addr); + volume_offline = hpsa_volume_offline(h, scsi3addr); + if (volume_offline < 0 || volume_offline > 0xff) + volume_offline = HPSA_VPD_LV_STATUS_UNSUPPORTED; + this_device->volume_offline = volume_offline & 0xff; } else { this_device->raid_level = RAID_UNKNOWN; this_device->offload_config = 0; @@ -2861,26 +2860,20 @@ static int hpsa_get_pdisk_of_ioaccel2(struct ctlr_info *h, nphysicals = be32_to_cpu(*((__be32 *)physicals->LUNListLength)) / responsesize; - /* find ioaccel2 handle in list of physicals: */ for (i = 0; i < nphysicals; i++) { + struct ext_report_lun_entry *entry = &physicals->LUN[i]; + /* handle is in bytes 28-31 of each lun */ - if (memcmp(&((struct ReportExtendedLUNdata *) - physicals)->LUN[i][20], &find, 4) != 0) { + if (entry->ioaccel_handle != find) continue; /* didn't match */ - } found = 1; - memcpy(scsi3addr, &((struct ReportExtendedLUNdata *) - physicals)->LUN[i][0], 8); + memcpy(scsi3addr, entry->lunid, 8); if (h->raid_offload_debug > 0) dev_info(&h->pdev->dev, - "%s: Searched h=0x%08x, Found h=0x%08x, scsiaddr 0x%02x%02x%02x%02x%02x%02x%02x%02x\n", + "%s: Searched h=0x%08x, Found h=0x%08x, scsiaddr 0x%8phN\n", __func__, find, - ((struct ReportExtendedLUNdata *) - physicals)->LUN[i][20], - scsi3addr[0], scsi3addr[1], scsi3addr[2], - scsi3addr[3], scsi3addr[4], scsi3addr[5], - scsi3addr[6], scsi3addr[7]); + entry->ioaccel_handle, scsi3addr); break; /* found it */ } @@ -2965,7 +2958,8 @@ u8 *figure_lunaddrbytes(struct ctlr_info *h, int raid_ctlr_position, int i, return RAID_CTLR_LUNID; if (i < logicals_start) - return &physdev_list->LUN[i - (raid_ctlr_position == 0)][0]; + return &physdev_list->LUN[i - + (raid_ctlr_position == 0)].lunid[0]; if (i < last_device) return &logdev_list->LUN[i - nphysicals - @@ -3074,7 +3068,7 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno) ndev_allocated++; } - if (unlikely(is_scsi_rev_5(h))) + if (is_scsi_rev_5(h)) raid_ctlr_position = 0; else raid_ctlr_position = nphysicals + nlogicals; @@ -3971,7 +3965,6 @@ static int hpsa_scsi_queue_command_lck(struct scsi_cmnd *cmd, struct hpsa_scsi_dev_t *dev; unsigned char scsi3addr[8]; struct CommandList *c; - unsigned long flags; int rc = 0; /* Get the ptr to our adapter structure out of cmd->host. */ @@ -3984,14 +3977,11 @@ static int hpsa_scsi_queue_command_lck(struct scsi_cmnd *cmd, } memcpy(scsi3addr, dev->scsi3addr, sizeof(scsi3addr)); - spin_lock_irqsave(&h->lock, flags); - if (unlikely(h->lockup_detected)) { - spin_unlock_irqrestore(&h->lock, flags); + if (unlikely(lockup_detected(h))) { cmd->result = DID_ERROR << 16; done(cmd); return 0; } - spin_unlock_irqrestore(&h->lock, flags); c = cmd_alloc(h); if (c == NULL) { /* trouble... */ dev_err(&h->pdev->dev, "cmd_alloc returned NULL!\n"); @@ -4103,16 +4093,13 @@ static int do_not_scan_if_controller_locked_up(struct ctlr_info *h) * we can prevent new rescan threads from piling up on a * locked up controller. */ - spin_lock_irqsave(&h->lock, flags); - if (unlikely(h->lockup_detected)) { - spin_unlock_irqrestore(&h->lock, flags); + if (unlikely(lockup_detected(h))) { spin_lock_irqsave(&h->scan_lock, flags); h->scan_finished = 1; wake_up_all(&h->scan_wait_queue); spin_unlock_irqrestore(&h->scan_lock, flags); return 1; } - spin_unlock_irqrestore(&h->lock, flags); return 0; } @@ -4963,7 +4950,7 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp) buff = kmalloc(iocommand.buf_size, GFP_KERNEL); if (buff == NULL) return -EFAULT; - if (iocommand.Request.Type.Direction == XFER_WRITE) { + if (iocommand.Request.Type.Direction & XFER_WRITE) { /* Copy the data into the buffer we created */ if (copy_from_user(buff, iocommand.buf, iocommand.buf_size)) { @@ -5026,7 +5013,7 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp) rc = -EFAULT; goto out; } - if (iocommand.Request.Type.Direction == XFER_READ && + if ((iocommand.Request.Type.Direction & XFER_READ) && iocommand.buf_size > 0) { /* Copy the data out of the buffer we created */ if (copy_to_user(iocommand.buf, buff, iocommand.buf_size)) { @@ -5103,7 +5090,7 @@ static int hpsa_big_passthru_ioctl(struct ctlr_info *h, void __user *argp) status = -ENOMEM; goto cleanup1; } - if (ioc->Request.Type.Direction == XFER_WRITE) { + if (ioc->Request.Type.Direction & XFER_WRITE) { if (copy_from_user(buff[sg_used], data_ptr, sz)) { status = -ENOMEM; goto cleanup1; @@ -5155,7 +5142,7 @@ static int hpsa_big_passthru_ioctl(struct ctlr_info *h, void __user *argp) status = -EFAULT; goto cleanup0; } - if (ioc->Request.Type.Direction == XFER_READ && ioc->buf_size > 0) { + if ((ioc->Request.Type.Direction & XFER_READ) && ioc->buf_size > 0) { /* Copy the data out of the buffer we created */ BYTE __user *ptr = ioc->buf; for (i = 0; i < sg_used; i++) { @@ -5459,13 +5446,12 @@ static void __iomem *remap_pci_mem(ulong base, ulong size) /* Takes cmds off the submission queue and sends them to the hardware, * then puts them on the queue of cmds waiting for completion. + * Assumes h->lock is held */ -static void start_io(struct ctlr_info *h) +static void start_io(struct ctlr_info *h, unsigned long *flags) { struct CommandList *c; - unsigned long flags; - spin_lock_irqsave(&h->lock, flags); while (!list_empty(&h->reqQ)) { c = list_entry(h->reqQ.next, struct CommandList, list); /* can't do anything if fifo is full */ @@ -5488,14 +5474,20 @@ static void start_io(struct ctlr_info *h) * condition. */ h->commands_outstanding++; - if (h->commands_outstanding > h->max_outstanding) - h->max_outstanding = h->commands_outstanding; /* Tell the controller execute command */ - spin_unlock_irqrestore(&h->lock, flags); + spin_unlock_irqrestore(&h->lock, *flags); h->access.submit_command(h, c); - spin_lock_irqsave(&h->lock, flags); + spin_lock_irqsave(&h->lock, *flags); } +} + +static void lock_and_start_io(struct ctlr_info *h) +{ + unsigned long flags; + + spin_lock_irqsave(&h->lock, flags); + start_io(h, &flags); spin_unlock_irqrestore(&h->lock, flags); } @@ -5563,7 +5555,7 @@ static inline void finish_cmd(struct CommandList *c) else if (c->cmd_type == CMD_IOCTL_PEND) complete(c->waiting); if (unlikely(io_may_be_stalled)) - start_io(h); + lock_and_start_io(h); } static inline u32 hpsa_tag_contains_index(u32 tag) @@ -5840,12 +5832,12 @@ static int hpsa_controller_hard_reset(struct pci_dev *pdev, dev_info(&pdev->dev, "using doorbell to reset controller\n"); writel(use_doorbell, vaddr + SA5_DOORBELL); - /* PMC hardware guys tell us we need a 5 second delay after + /* PMC hardware guys tell us we need a 10 second delay after * doorbell reset and before any attempt to talk to the board * at all to ensure that this actually works and doesn't fall * over in some weird corner cases. */ - msleep(5000); + msleep(10000); } else { /* Try to do it the PCI power state way */ /* Quoting from the Open CISS Specification: "The Power @@ -6166,6 +6158,8 @@ static void hpsa_interrupt_mode(struct ctlr_info *h) if (pci_find_capability(h->pdev, PCI_CAP_ID_MSIX)) { dev_info(&h->pdev->dev, "MSIX\n"); h->msix_vector = MAX_REPLY_QUEUES; + if (h->msix_vector > num_online_cpus()) + h->msix_vector = num_online_cpus(); err = pci_enable_msix(h->pdev, hpsa_msix_entries, h->msix_vector); if (err > 0) { @@ -6615,6 +6609,17 @@ static void hpsa_free_cmd_pool(struct ctlr_info *h) h->ioaccel_cmd_pool, h->ioaccel_cmd_pool_dhandle); } +static void hpsa_irq_affinity_hints(struct ctlr_info *h) +{ + int i, cpu, rc; + + cpu = cpumask_first(cpu_online_mask); + for (i = 0; i < h->msix_vector; i++) { + rc = irq_set_affinity_hint(h->intr[i], get_cpu_mask(cpu)); + cpu = cpumask_next(cpu, cpu_online_mask); + } +} + static int hpsa_request_irq(struct ctlr_info *h, irqreturn_t (*msixhandler)(int, void *), irqreturn_t (*intxhandler)(int, void *)) @@ -6634,6 +6639,7 @@ static int hpsa_request_irq(struct ctlr_info *h, rc = request_irq(h->intr[i], msixhandler, 0, h->devname, &h->q[i]); + hpsa_irq_affinity_hints(h); } else { /* Use single reply pool */ if (h->msix_vector > 0 || h->msi_vector) { @@ -6685,12 +6691,15 @@ static void free_irqs(struct ctlr_info *h) if (!h->msix_vector || h->intr_mode != PERF_MODE_INT) { /* Single reply queue, only one irq to free */ i = h->intr_mode; + irq_set_affinity_hint(h->intr[i], NULL); free_irq(h->intr[i], &h->q[i]); return; } - for (i = 0; i < h->msix_vector; i++) + for (i = 0; i < h->msix_vector; i++) { + irq_set_affinity_hint(h->intr[i], NULL); free_irq(h->intr[i], &h->q[i]); + } } static void hpsa_free_irqs_and_disable_msix(struct ctlr_info *h) @@ -6707,6 +6716,20 @@ static void hpsa_free_irqs_and_disable_msix(struct ctlr_info *h) #endif /* CONFIG_PCI_MSI */ } +static void hpsa_free_reply_queues(struct ctlr_info *h) +{ + int i; + + for (i = 0; i < h->nreply_queues; i++) { + if (!h->reply_queue[i].head) + continue; + pci_free_consistent(h->pdev, h->reply_queue_size, + h->reply_queue[i].head, h->reply_queue[i].busaddr); + h->reply_queue[i].head = NULL; + h->reply_queue[i].busaddr = 0; + } +} + static void hpsa_undo_allocations_after_kdump_soft_reset(struct ctlr_info *h) { hpsa_free_irqs_and_disable_msix(h); @@ -6714,8 +6737,7 @@ static void hpsa_undo_allocations_after_kdump_soft_reset(struct ctlr_info *h) hpsa_free_cmd_pool(h); kfree(h->ioaccel1_blockFetchTable); kfree(h->blockFetchTable); - pci_free_consistent(h->pdev, h->reply_pool_size, - h->reply_pool, h->reply_pool_dhandle); + hpsa_free_reply_queues(h); if (h->vaddr) iounmap(h->vaddr); if (h->transtable) @@ -6740,16 +6762,38 @@ static void fail_all_cmds_on_list(struct ctlr_info *h, struct list_head *list) } } +static void set_lockup_detected_for_all_cpus(struct ctlr_info *h, u32 value) +{ + int i, cpu; + + cpu = cpumask_first(cpu_online_mask); + for (i = 0; i < num_online_cpus(); i++) { + u32 *lockup_detected; + lockup_detected = per_cpu_ptr(h->lockup_detected, cpu); + *lockup_detected = value; + cpu = cpumask_next(cpu, cpu_online_mask); + } + wmb(); /* be sure the per-cpu variables are out to memory */ +} + static void controller_lockup_detected(struct ctlr_info *h) { unsigned long flags; + u32 lockup_detected; h->access.set_intr_mask(h, HPSA_INTR_OFF); spin_lock_irqsave(&h->lock, flags); - h->lockup_detected = readl(h->vaddr + SA5_SCRATCHPAD_OFFSET); + lockup_detected = readl(h->vaddr + SA5_SCRATCHPAD_OFFSET); + if (!lockup_detected) { + /* no heartbeat, but controller gave us a zero. */ + dev_warn(&h->pdev->dev, + "lockup detected but scratchpad register is zero\n"); + lockup_detected = 0xffffffff; + } + set_lockup_detected_for_all_cpus(h, lockup_detected); spin_unlock_irqrestore(&h->lock, flags); dev_warn(&h->pdev->dev, "Controller lockup detected: 0x%08x\n", - h->lockup_detected); + lockup_detected); pci_disable_device(h->pdev); spin_lock_irqsave(&h->lock, flags); fail_all_cmds_on_list(h, &h->cmpQ); @@ -6884,7 +6928,7 @@ static void hpsa_monitor_ctlr_worker(struct work_struct *work) struct ctlr_info *h = container_of(to_delayed_work(work), struct ctlr_info, monitor_ctlr_work); detect_controller_lockup(h); - if (h->lockup_detected) + if (lockup_detected(h)) return; if (hpsa_ctlr_needs_rescan(h) || hpsa_offline_devices_ready(h)) { @@ -6934,7 +6978,6 @@ reinit_after_soft_reset: * the 5 lower bits of the address are used by the hardware. and by * the driver. See comments in hpsa.h for more info. */ -#define COMMANDLIST_ALIGNMENT 128 BUILD_BUG_ON(sizeof(struct CommandList) % COMMANDLIST_ALIGNMENT); h = kzalloc(sizeof(*h), GFP_KERNEL); if (!h) @@ -6949,6 +6992,13 @@ reinit_after_soft_reset: spin_lock_init(&h->offline_device_lock); spin_lock_init(&h->scan_lock); spin_lock_init(&h->passthru_count_lock); + + /* Allocate and clear per-cpu variable lockup_detected */ + h->lockup_detected = alloc_percpu(u32); + if (!h->lockup_detected) + goto clean1; + set_lockup_detected_for_all_cpus(h, 0); + rc = hpsa_pci_init(h); if (rc != 0) goto clean1; @@ -7072,6 +7122,8 @@ clean4: free_irqs(h); clean2: clean1: + if (h->lockup_detected) + free_percpu(h->lockup_detected); kfree(h); return rc; } @@ -7080,16 +7132,10 @@ static void hpsa_flush_cache(struct ctlr_info *h) { char *flush_buf; struct CommandList *c; - unsigned long flags; /* Don't bother trying to flush the cache if locked up */ - spin_lock_irqsave(&h->lock, flags); - if (unlikely(h->lockup_detected)) { - spin_unlock_irqrestore(&h->lock, flags); + if (unlikely(lockup_detected(h))) return; - } - spin_unlock_irqrestore(&h->lock, flags); - flush_buf = kzalloc(4, GFP_KERNEL); if (!flush_buf) return; @@ -7165,8 +7211,7 @@ static void hpsa_remove_one(struct pci_dev *pdev) pci_free_consistent(h->pdev, h->nr_cmds * sizeof(struct ErrorInfo), h->errinfo_pool, h->errinfo_pool_dhandle); - pci_free_consistent(h->pdev, h->reply_pool_size, - h->reply_pool, h->reply_pool_dhandle); + hpsa_free_reply_queues(h); kfree(h->cmd_pool_bits); kfree(h->blockFetchTable); kfree(h->ioaccel1_blockFetchTable); @@ -7174,6 +7219,7 @@ static void hpsa_remove_one(struct pci_dev *pdev) kfree(h->hba_inquiry_data); pci_disable_device(pdev); pci_release_regions(pdev); + free_percpu(h->lockup_detected); kfree(h); } @@ -7278,8 +7324,16 @@ static void hpsa_enter_performant_mode(struct ctlr_info *h, u32 trans_support) * 10 = 6 s/g entry or 24k */ + /* If the controller supports either ioaccel method then + * we can also use the RAID stack submit path that does not + * perform the superfluous readl() after each command submission. + */ + if (trans_support & (CFGTBL_Trans_io_accel1 | CFGTBL_Trans_io_accel2)) + access = SA5_performant_access_no_read; + /* Controller spec: zero out this buffer. */ - memset(h->reply_pool, 0, h->reply_pool_size); + for (i = 0; i < h->nreply_queues; i++) + memset(h->reply_queue[i].head, 0, h->reply_queue_size); bft[7] = SG_ENTRIES_IN_CMD + 4; calc_bucket_map(bft, ARRAY_SIZE(bft), @@ -7295,8 +7349,7 @@ static void hpsa_enter_performant_mode(struct ctlr_info *h, u32 trans_support) for (i = 0; i < h->nreply_queues; i++) { writel(0, &h->transtable->RepQAddr[i].upper); - writel(h->reply_pool_dhandle + - (h->max_commands * sizeof(u64) * i), + writel(h->reply_queue[i].busaddr, &h->transtable->RepQAddr[i].lower); } @@ -7344,8 +7397,10 @@ static void hpsa_enter_performant_mode(struct ctlr_info *h, u32 trans_support) h->ioaccel1_blockFetchTable); /* initialize all reply queue entries to unused */ - memset(h->reply_pool, (u8) IOACCEL_MODE1_REPLY_UNUSED, - h->reply_pool_size); + for (i = 0; i < h->nreply_queues; i++) + memset(h->reply_queue[i].head, + (u8) IOACCEL_MODE1_REPLY_UNUSED, + h->reply_queue_size); /* set all the constant fields in the accelerator command * frames once at init time to save CPU cycles later. @@ -7407,7 +7462,6 @@ static int hpsa_alloc_ioaccel_cmd_and_bft(struct ctlr_info *h) * because the 7 lower bits of the address are used by the * hardware. */ -#define IOACCEL1_COMMANDLIST_ALIGNMENT 128 BUILD_BUG_ON(sizeof(struct io_accel1_cmd) % IOACCEL1_COMMANDLIST_ALIGNMENT); h->ioaccel_cmd_pool = @@ -7445,7 +7499,6 @@ static int ioaccel2_alloc_cmds_and_bft(struct ctlr_info *h) if (h->ioaccel_maxsg > IOACCEL2_MAXSGENTRIES) h->ioaccel_maxsg = IOACCEL2_MAXSGENTRIES; -#define IOACCEL2_COMMANDLIST_ALIGNMENT 128 BUILD_BUG_ON(sizeof(struct io_accel2_cmd) % IOACCEL2_COMMANDLIST_ALIGNMENT); h->ioaccel2_cmd_pool = @@ -7503,16 +7556,17 @@ static void hpsa_put_ctlr_into_performant_mode(struct ctlr_info *h) } } - /* TODO, check that this next line h->nreply_queues is correct */ h->nreply_queues = h->msix_vector > 0 ? h->msix_vector : 1; hpsa_get_max_perf_mode_cmds(h); /* Performant mode ring buffer and supporting data structures */ - h->reply_pool_size = h->max_commands * sizeof(u64) * h->nreply_queues; - h->reply_pool = pci_alloc_consistent(h->pdev, h->reply_pool_size, - &(h->reply_pool_dhandle)); + h->reply_queue_size = h->max_commands * sizeof(u64); for (i = 0; i < h->nreply_queues; i++) { - h->reply_queue[i].head = &h->reply_pool[h->max_commands * i]; + h->reply_queue[i].head = pci_alloc_consistent(h->pdev, + h->reply_queue_size, + &(h->reply_queue[i].busaddr)); + if (!h->reply_queue[i].head) + goto clean_up; h->reply_queue[i].size = h->max_commands; h->reply_queue[i].wraparound = 1; /* spec: init to 1 */ h->reply_queue[i].current_entry = 0; @@ -7521,18 +7575,14 @@ static void hpsa_put_ctlr_into_performant_mode(struct ctlr_info *h) /* Need a block fetch table for performant mode */ h->blockFetchTable = kmalloc(((SG_ENTRIES_IN_CMD + 1) * sizeof(u32)), GFP_KERNEL); - - if ((h->reply_pool == NULL) - || (h->blockFetchTable == NULL)) + if (!h->blockFetchTable) goto clean_up; hpsa_enter_performant_mode(h, trans_support); return; clean_up: - if (h->reply_pool) - pci_free_consistent(h->pdev, h->reply_pool_size, - h->reply_pool, h->reply_pool_dhandle); + hpsa_free_reply_queues(h); kfree(h->blockFetchTable); } diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h index 1e3cf33a82c..24472cec7de 100644 --- a/drivers/scsi/hpsa.h +++ b/drivers/scsi/hpsa.h @@ -57,11 +57,12 @@ struct hpsa_scsi_dev_t { }; -struct reply_pool { +struct reply_queue_buffer { u64 *head; size_t size; u8 wraparound; u32 current_entry; + dma_addr_t busaddr; }; #pragma pack(1) @@ -116,11 +117,8 @@ struct ctlr_info { int nr_cmds; /* Number of commands allowed on this controller */ struct CfgTable __iomem *cfgtable; int interrupts_enabled; - int major; int max_commands; int commands_outstanding; - int max_outstanding; /* Debug */ - int usage_count; /* number of opens all all minor devices */ # define PERF_MODE_INT 0 # define DOORBELL_INT 1 # define SIMPLE_MODE_INT 2 @@ -177,11 +175,9 @@ struct ctlr_info { /* * Performant mode completion buffers */ - u64 *reply_pool; - size_t reply_pool_size; - struct reply_pool reply_queue[MAX_REPLY_QUEUES]; + size_t reply_queue_size; + struct reply_queue_buffer reply_queue[MAX_REPLY_QUEUES]; u8 nreply_queues; - dma_addr_t reply_pool_dhandle; u32 *blockFetchTable; u32 *ioaccel1_blockFetchTable; u32 *ioaccel2_blockFetchTable; @@ -196,7 +192,7 @@ struct ctlr_info { u64 last_heartbeat_timestamp; u32 heartbeat_sample_interval; atomic_t firmware_flash_in_progress; - u32 lockup_detected; + u32 *lockup_detected; struct delayed_work monitor_ctlr_work; int remove_in_progress; u32 fifo_recently_full; @@ -233,11 +229,9 @@ struct ctlr_info { #define CTLR_STATE_CHANGE_EVENT_AIO_CONFIG_CHANGE (1 << 31) #define RESCAN_REQUIRED_EVENT_BITS \ - (CTLR_STATE_CHANGE_EVENT | \ - CTLR_ENCLOSURE_HOT_PLUG_EVENT | \ + (CTLR_ENCLOSURE_HOT_PLUG_EVENT | \ CTLR_STATE_CHANGE_EVENT_PHYSICAL_DRV | \ CTLR_STATE_CHANGE_EVENT_LOGICAL_DRV | \ - CTLR_STATE_CHANGE_EVENT_REDUNDANT_CNTRL | \ CTLR_STATE_CHANGE_EVENT_AIO_ENABLED_DISABLED | \ CTLR_STATE_CHANGE_EVENT_AIO_CONFIG_CHANGE) spinlock_t offline_device_lock; @@ -346,22 +340,23 @@ struct offline_device_entry { static void SA5_submit_command(struct ctlr_info *h, struct CommandList *c) { - dev_dbg(&h->pdev->dev, "Sending %x, tag = %x\n", c->busaddr, - c->Header.Tag.lower); writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET); (void) readl(h->vaddr + SA5_SCRATCHPAD_OFFSET); } +static void SA5_submit_command_no_read(struct ctlr_info *h, + struct CommandList *c) +{ + writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET); +} + static void SA5_submit_command_ioaccel2(struct ctlr_info *h, struct CommandList *c) { - dev_dbg(&h->pdev->dev, "Sending %x, tag = %x\n", c->busaddr, - c->Header.Tag.lower); if (c->cmd_type == CMD_IOACCEL2) writel(c->busaddr, h->vaddr + IOACCEL2_INBOUND_POSTQ_32); else writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET); - (void) readl(h->vaddr + SA5_SCRATCHPAD_OFFSET); } /* @@ -399,7 +394,7 @@ static void SA5_performant_intr_mask(struct ctlr_info *h, unsigned long val) static unsigned long SA5_performant_completed(struct ctlr_info *h, u8 q) { - struct reply_pool *rq = &h->reply_queue[q]; + struct reply_queue_buffer *rq = &h->reply_queue[q]; unsigned long flags, register_value = FIFO_EMPTY; /* msi auto clears the interrupt pending bit. */ @@ -478,7 +473,6 @@ static bool SA5_intr_pending(struct ctlr_info *h) { unsigned long register_value = readl(h->vaddr + SA5_INTR_STATUS); - dev_dbg(&h->pdev->dev, "intr_pending %lx\n", register_value); return register_value & SA5_INTR_PENDING; } @@ -515,7 +509,7 @@ static bool SA5_ioaccel_mode1_intr_pending(struct ctlr_info *h) static unsigned long SA5_ioaccel_mode1_completed(struct ctlr_info *h, u8 q) { u64 register_value; - struct reply_pool *rq = &h->reply_queue[q]; + struct reply_queue_buffer *rq = &h->reply_queue[q]; unsigned long flags; BUG_ON(q >= h->nreply_queues); @@ -573,6 +567,14 @@ static struct access_method SA5_performant_access = { SA5_performant_completed, }; +static struct access_method SA5_performant_access_no_read = { + SA5_submit_command_no_read, + SA5_performant_intr_mask, + SA5_fifo_full, + SA5_performant_intr_pending, + SA5_performant_completed, +}; + struct board_type { u32 board_id; char *product_name; diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h index b5cc7052339..b5125dc3143 100644 --- a/drivers/scsi/hpsa_cmd.h +++ b/drivers/scsi/hpsa_cmd.h @@ -151,7 +151,7 @@ #define HPSA_VPD_HEADER_SZ 4 /* Logical volume states */ -#define HPSA_VPD_LV_STATUS_UNSUPPORTED -1 +#define HPSA_VPD_LV_STATUS_UNSUPPORTED 0xff #define HPSA_LV_OK 0x0 #define HPSA_LV_UNDERGOING_ERASE 0x0F #define HPSA_LV_UNDERGOING_RPI 0x12 @@ -238,11 +238,21 @@ struct ReportLUNdata { u8 LUN[HPSA_MAX_LUN][8]; }; +struct ext_report_lun_entry { + u8 lunid[8]; + u8 wwid[8]; + u8 device_type; + u8 device_flags; + u8 lun_count; /* multi-lun device, how many luns */ + u8 redundant_paths; + u32 ioaccel_handle; /* ioaccel1 only uses lower 16 bits */ +}; + struct ReportExtendedLUNdata { u8 LUNListLength[4]; u8 extended_response_flag; u8 reserved[3]; - u8 LUN[HPSA_MAX_LUN][24]; + struct ext_report_lun_entry LUN[HPSA_MAX_LUN]; }; struct SenseSubsystem_info { @@ -375,6 +385,7 @@ struct ctlr_info; /* defined in hpsa.h */ * or a bus address. */ +#define COMMANDLIST_ALIGNMENT 128 struct CommandList { struct CommandListHeader Header; struct RequestBlock Request; @@ -389,21 +400,7 @@ struct CommandList { struct list_head list; struct completion *waiting; void *scsi_cmd; - -/* on 64 bit architectures, to get this to be 32-byte-aligned - * it so happens we need PAD_64 bytes of padding, on 32 bit systems, - * we need PAD_32 bytes of padding (see below). This does that. - * If it happens that 64 bit and 32 bit systems need different - * padding, PAD_32 and PAD_64 can be set independently, and. - * the code below will do the right thing. - */ -#define IS_32_BIT ((8 - sizeof(long))/4) -#define IS_64_BIT (!IS_32_BIT) -#define PAD_32 (40) -#define PAD_64 (12) -#define COMMANDLIST_PAD (IS_32_BIT * PAD_32 + IS_64_BIT * PAD_64) - u8 pad[COMMANDLIST_PAD]; -}; +} __aligned(COMMANDLIST_ALIGNMENT); /* Max S/G elements in I/O accelerator command */ #define IOACCEL1_MAXSGENTRIES 24 @@ -413,6 +410,7 @@ struct CommandList { * Structure for I/O accelerator (mode 1) commands. * Note that this structure must be 128-byte aligned in size. */ +#define IOACCEL1_COMMANDLIST_ALIGNMENT 128 struct io_accel1_cmd { u16 dev_handle; /* 0x00 - 0x01 */ u8 reserved1; /* 0x02 */ @@ -440,12 +438,7 @@ struct io_accel1_cmd { struct vals32 host_addr; /* 0x70 - 0x77 */ u8 CISS_LUN[8]; /* 0x78 - 0x7F */ struct SGDescriptor SG[IOACCEL1_MAXSGENTRIES]; -#define IOACCEL1_PAD_64 0 -#define IOACCEL1_PAD_32 0 -#define IOACCEL1_PAD (IS_32_BIT * IOACCEL1_PAD_32 + \ - IS_64_BIT * IOACCEL1_PAD_64) - u8 pad[IOACCEL1_PAD]; -}; +} __aligned(IOACCEL1_COMMANDLIST_ALIGNMENT); #define IOACCEL1_FUNCTION_SCSIIO 0x00 #define IOACCEL1_SGLOFFSET 32 @@ -510,14 +503,11 @@ struct io_accel2_scsi_response { u8 sense_data_buff[32]; /* sense/response data buffer */ }; -#define IOACCEL2_64_PAD 76 -#define IOACCEL2_32_PAD 76 -#define IOACCEL2_PAD (IS_32_BIT * IOACCEL2_32_PAD + \ - IS_64_BIT * IOACCEL2_64_PAD) /* * Structure for I/O accelerator (mode 2 or m2) commands. * Note that this structure must be 128-byte aligned in size. */ +#define IOACCEL2_COMMANDLIST_ALIGNMENT 128 struct io_accel2_cmd { u8 IU_type; /* IU Type */ u8 direction; /* direction, memtype, and encryption */ @@ -544,8 +534,7 @@ struct io_accel2_cmd { u32 tweak_upper; /* Encryption tweak, upper 4 bytes */ struct ioaccel2_sg_element sg[IOACCEL2_MAXSGENTRIES]; struct io_accel2_scsi_response error_data; - u8 pad[IOACCEL2_PAD]; -}; +} __aligned(IOACCEL2_COMMANDLIST_ALIGNMENT); /* * defines for Mode 2 command struct @@ -636,7 +625,7 @@ struct TransTable_struct { u32 RepQCount; u32 RepQCtrAddrLow32; u32 RepQCtrAddrHigh32; -#define MAX_REPLY_QUEUES 8 +#define MAX_REPLY_QUEUES 64 struct vals32 RepQAddr[MAX_REPLY_QUEUES]; }; diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index ecd7bd304ef..3d1bc67bac9 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -338,7 +338,7 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) struct iscsi_session *session = conn->session; struct scsi_cmnd *sc = task->sc; struct iscsi_scsi_req *hdr; - unsigned hdrlength, cmd_len; + unsigned hdrlength, cmd_len, transfer_length; itt_t itt; int rc; @@ -391,11 +391,11 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) if (scsi_get_prot_op(sc) != SCSI_PROT_NORMAL) task->protected = true; + transfer_length = scsi_transfer_length(sc); + hdr->data_length = cpu_to_be32(transfer_length); if (sc->sc_data_direction == DMA_TO_DEVICE) { - unsigned out_len = scsi_out(sc)->length; struct iscsi_r2t_info *r2t = &task->unsol_r2t; - hdr->data_length = cpu_to_be32(out_len); hdr->flags |= ISCSI_FLAG_CMD_WRITE; /* * Write counters: @@ -414,18 +414,19 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) memset(r2t, 0, sizeof(*r2t)); if (session->imm_data_en) { - if (out_len >= session->first_burst) + if (transfer_length >= session->first_burst) task->imm_count = min(session->first_burst, conn->max_xmit_dlength); else - task->imm_count = min(out_len, - conn->max_xmit_dlength); + task->imm_count = min(transfer_length, + conn->max_xmit_dlength); hton24(hdr->dlength, task->imm_count); } else zero_data(hdr->dlength); if (!session->initial_r2t_en) { - r2t->data_length = min(session->first_burst, out_len) - + r2t->data_length = min(session->first_burst, + transfer_length) - task->imm_count; r2t->data_offset = task->imm_count; r2t->ttt = cpu_to_be32(ISCSI_RESERVED_TAG); @@ -438,7 +439,6 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) } else { hdr->flags |= ISCSI_FLAG_CMD_FINAL; zero_data(hdr->dlength); - hdr->data_length = cpu_to_be32(scsi_in(sc)->length); if (sc->sc_data_direction == DMA_FROM_DEVICE) hdr->flags |= ISCSI_FLAG_CMD_READ; @@ -466,7 +466,7 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) scsi_bidi_cmnd(sc) ? "bidirectional" : sc->sc_data_direction == DMA_TO_DEVICE ? "write" : "read", conn->id, sc, sc->cmnd[0], - task->itt, scsi_bufflen(sc), + task->itt, transfer_length, scsi_bidi_cmnd(sc) ? scsi_in(sc)->length : 0, session->cmdsn, session->max_cmdsn - session->exp_cmdsn + 1); diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 94a3cafe719..434e9037908 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -1,7 +1,7 @@ /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2004-2013 Emulex. All rights reserved. * + * Copyright (C) 2004-2014 Emulex. All rights reserved. * * EMULEX and SLI are trademarks of Emulex. * * www.emulex.com * * Portions Copyright (C) 2004-2005 Christoph Hellwig * @@ -640,6 +640,7 @@ struct lpfc_hba { #define HBA_DEVLOSS_TMO 0x2000 /* HBA in devloss timeout */ #define HBA_RRQ_ACTIVE 0x4000 /* process the rrq active list */ #define HBA_FCP_IOQ_FLUSH 0x8000 /* FCP I/O queues being flushed */ +#define HBA_FW_DUMP_OP 0x10000 /* Skips fn reset before FW dump */ uint32_t fcp_ring_in_use; /* When polling test if intr-hndlr active*/ struct lpfc_dmabuf slim2p; diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 8d5b6ceec9c..1d7a5c34ee8 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -1,7 +1,7 @@ /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2004-2013 Emulex. All rights reserved. * + * Copyright (C) 2004-2014 Emulex. All rights reserved. * * EMULEX and SLI are trademarks of Emulex. * * www.emulex.com * * Portions Copyright (C) 2004-2005 Christoph Hellwig * @@ -919,10 +919,15 @@ lpfc_sli4_pdev_reg_request(struct lpfc_hba *phba, uint32_t opcode) phba->cfg_sriov_nr_virtfn = 0; } + if (opcode == LPFC_FW_DUMP) + phba->hba_flag |= HBA_FW_DUMP_OP; + status = lpfc_do_offline(phba, LPFC_EVT_OFFLINE); - if (status != 0) + if (status != 0) { + phba->hba_flag &= ~HBA_FW_DUMP_OP; return status; + } /* wait for the device to be quiesced before firmware reset */ msleep(100); @@ -2364,7 +2369,7 @@ lpfc_oas_tgt_store(struct device *dev, struct device_attribute *attr, uint8_t wwpn[WWN_SZ]; int rc; - if (!phba->cfg_EnableXLane) + if (!phba->cfg_fof) return -EPERM; /* count may include a LF at end of string */ @@ -2432,7 +2437,7 @@ lpfc_oas_vpt_store(struct device *dev, struct device_attribute *attr, uint8_t wwpn[WWN_SZ]; int rc; - if (!phba->cfg_EnableXLane) + if (!phba->cfg_fof) return -EPERM; /* count may include a LF at end of string */ @@ -2499,7 +2504,7 @@ lpfc_oas_lun_state_store(struct device *dev, struct device_attribute *attr, struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba; int val = 0; - if (!phba->cfg_EnableXLane) + if (!phba->cfg_fof) return -EPERM; if (!isdigit(buf[0])) @@ -2565,7 +2570,7 @@ lpfc_oas_lun_state_set(struct lpfc_hba *phba, uint8_t vpt_wwpn[], int rc = 0; - if (!phba->cfg_EnableXLane) + if (!phba->cfg_fof) return -EPERM; if (oas_state) { @@ -2670,7 +2675,7 @@ lpfc_oas_lun_show(struct device *dev, struct device_attribute *attr, uint64_t oas_lun; int len = 0; - if (!phba->cfg_EnableXLane) + if (!phba->cfg_fof) return -EPERM; if (wwn_to_u64(phba->cfg_oas_vpt_wwpn) == 0) @@ -2716,7 +2721,7 @@ lpfc_oas_lun_store(struct device *dev, struct device_attribute *attr, uint64_t scsi_lun; ssize_t rc; - if (!phba->cfg_EnableXLane) + if (!phba->cfg_fof) return -EPERM; if (wwn_to_u64(phba->cfg_oas_vpt_wwpn) == 0) @@ -4655,7 +4660,7 @@ LPFC_ATTR_R(EnableXLane, 0, 0, 1, "Enable Express Lane Feature."); # 0x0 - 0x7f = CS_CTL field in FC header (high 7 bits) # Value range is [0x0,0x7f]. Default value is 0 */ -LPFC_ATTR_R(XLanePriority, 0, 0x0, 0x7f, "CS_CTL for Express Lane Feature."); +LPFC_ATTR_RW(XLanePriority, 0, 0x0, 0x7f, "CS_CTL for Express Lane Feature."); /* # lpfc_enable_bg: Enable BlockGuard (Emulex's Implementation of T10-DIF) diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index ca2f4ea7cde..5b5c825d957 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -1,7 +1,7 @@ /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2009-2013 Emulex. All rights reserved. * + * Copyright (C) 2009-2014 Emulex. All rights reserved. * * EMULEX and SLI are trademarks of Emulex. * * www.emulex.com * * * diff --git a/drivers/scsi/lpfc/lpfc_bsg.h b/drivers/scsi/lpfc/lpfc_bsg.h index a94d4c9dfaa..928ef609f36 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.h +++ b/drivers/scsi/lpfc/lpfc_bsg.h @@ -1,7 +1,7 @@ /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2010-2012 Emulex. All rights reserved. * + * Copyright (C) 2010-2014 Emulex. All rights reserved. * * EMULEX and SLI are trademarks of Emulex. * * www.emulex.com * * * diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index adda0bf7a24..db5604f01a1 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -1,7 +1,7 @@ /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2004-2013 Emulex. All rights reserved. * + * Copyright (C) 2004-2014 Emulex. All rights reserved. * * EMULEX and SLI are trademarks of Emulex. * * www.emulex.com * * * @@ -289,6 +289,7 @@ int lpfc_sli_issue_iocb(struct lpfc_hba *, uint32_t, void lpfc_sli_pcimem_bcopy(void *, void *, uint32_t); void lpfc_sli_bemem_bcopy(void *, void *, uint32_t); void lpfc_sli_abort_iocb_ring(struct lpfc_hba *, struct lpfc_sli_ring *); +void lpfc_sli_abort_fcp_rings(struct lpfc_hba *phba); void lpfc_sli_hba_iocb_abort(struct lpfc_hba *); void lpfc_sli_flush_fcp_rings(struct lpfc_hba *); int lpfc_sli_ringpostbuf_put(struct lpfc_hba *, struct lpfc_sli_ring *, @@ -310,6 +311,9 @@ int lpfc_sli_issue_abort_iotag(struct lpfc_hba *, struct lpfc_sli_ring *, int lpfc_sli_sum_iocb(struct lpfc_vport *, uint16_t, uint64_t, lpfc_ctx_cmd); int lpfc_sli_abort_iocb(struct lpfc_vport *, struct lpfc_sli_ring *, uint16_t, uint64_t, lpfc_ctx_cmd); +int +lpfc_sli_abort_taskmgmt(struct lpfc_vport *, struct lpfc_sli_ring *, + uint16_t, uint64_t, lpfc_ctx_cmd); void lpfc_mbox_timeout(unsigned long); void lpfc_mbox_timeout_handler(struct lpfc_hba *); diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index 828c08e9389..b0aedce3f54 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -1,7 +1,7 @@ /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2007-2012 Emulex. All rights reserved. * + * Copyright (C) 2007-2014 Emulex. All rights reserved. * * EMULEX and SLI are trademarks of Emulex. * * www.emulex.com * * * @@ -2314,7 +2314,7 @@ proc_cq: goto too_big; } - if (phba->cfg_EnableXLane) { + if (phba->cfg_fof) { /* OAS CQ */ qp = phba->sli4_hba.oas_cq; diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 624fe0b3cc0..7a5d81a65be 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -1,7 +1,7 @@ /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2004-2013 Emulex. All rights reserved. * + * Copyright (C) 2004-2014 Emulex. All rights reserved. * * EMULEX and SLI are trademarks of Emulex. * * www.emulex.com * * Portions Copyright (C) 2004-2005 Christoph Hellwig * diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 294c072e908..2a17e31265b 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -1,7 +1,7 @@ /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2004-2013 Emulex. All rights reserved. * + * Copyright (C) 2004-2014 Emulex. All rights reserved. * * EMULEX and SLI are trademarks of Emulex. * * www.emulex.com * * Portions Copyright (C) 2004-2005 Christoph Hellwig * @@ -5634,6 +5634,9 @@ lpfc_nlp_init(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ndlp->active_rrqs_xri_bitmap = mempool_alloc(vport->phba->active_rrq_pool, GFP_KERNEL); + if (ndlp->active_rrqs_xri_bitmap) + memset(ndlp->active_rrqs_xri_bitmap, 0, + ndlp->phba->cfg_rrq_xri_bitmap_sz); } diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h index 3d9438ce59a..23625925237 100644 --- a/drivers/scsi/lpfc/lpfc_hw.h +++ b/drivers/scsi/lpfc/lpfc_hw.h @@ -1,7 +1,7 @@ /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2004-2013 Emulex. All rights reserved. * + * Copyright (C) 2004-2014 Emulex. All rights reserved. * * EMULEX and SLI are trademarks of Emulex. * * www.emulex.com * * * diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index fd79f7de766..f432ec180cf 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -1,7 +1,7 @@ /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2009-2013 Emulex. All rights reserved. * + * Copyright (C) 2009-2014 Emulex. All rights reserved. * * EMULEX and SLI are trademarks of Emulex. * * www.emulex.com * * * diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 635eeb3d698..06f9a5b79e6 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -1,7 +1,7 @@ /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2004-2013 Emulex. All rights reserved. * + * Copyright (C) 2004-2014 Emulex. All rights reserved. * * EMULEX and SLI are trademarks of Emulex. * * www.emulex.com * * Portions Copyright (C) 2004-2005 Christoph Hellwig * @@ -820,57 +820,153 @@ lpfc_hba_down_prep(struct lpfc_hba *phba) } /** - * lpfc_hba_down_post_s3 - Perform lpfc uninitialization after HBA reset + * lpfc_sli4_free_sp_events - Cleanup sp_queue_events to free + * rspiocb which got deferred + * * @phba: pointer to lpfc HBA data structure. * - * This routine will do uninitialization after the HBA is reset when bring - * down the SLI Layer. + * This routine will cleanup completed slow path events after HBA is reset + * when bringing down the SLI Layer. + * * * Return codes - * 0 - success. - * Any other value - error. + * void. **/ -static int -lpfc_hba_down_post_s3(struct lpfc_hba *phba) +static void +lpfc_sli4_free_sp_events(struct lpfc_hba *phba) +{ + struct lpfc_iocbq *rspiocbq; + struct hbq_dmabuf *dmabuf; + struct lpfc_cq_event *cq_event; + + spin_lock_irq(&phba->hbalock); + phba->hba_flag &= ~HBA_SP_QUEUE_EVT; + spin_unlock_irq(&phba->hbalock); + + while (!list_empty(&phba->sli4_hba.sp_queue_event)) { + /* Get the response iocb from the head of work queue */ + spin_lock_irq(&phba->hbalock); + list_remove_head(&phba->sli4_hba.sp_queue_event, + cq_event, struct lpfc_cq_event, list); + spin_unlock_irq(&phba->hbalock); + + switch (bf_get(lpfc_wcqe_c_code, &cq_event->cqe.wcqe_cmpl)) { + case CQE_CODE_COMPL_WQE: + rspiocbq = container_of(cq_event, struct lpfc_iocbq, + cq_event); + lpfc_sli_release_iocbq(phba, rspiocbq); + break; + case CQE_CODE_RECEIVE: + case CQE_CODE_RECEIVE_V1: + dmabuf = container_of(cq_event, struct hbq_dmabuf, + cq_event); + lpfc_in_buf_free(phba, &dmabuf->dbuf); + } + } +} + +/** + * lpfc_hba_free_post_buf - Perform lpfc uninitialization after HBA reset + * @phba: pointer to lpfc HBA data structure. + * + * This routine will cleanup posted ELS buffers after the HBA is reset + * when bringing down the SLI Layer. + * + * + * Return codes + * void. + **/ +static void +lpfc_hba_free_post_buf(struct lpfc_hba *phba) { struct lpfc_sli *psli = &phba->sli; struct lpfc_sli_ring *pring; struct lpfc_dmabuf *mp, *next_mp; - LIST_HEAD(completions); - int i; + LIST_HEAD(buflist); + int count; if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) lpfc_sli_hbqbuf_free_all(phba); else { /* Cleanup preposted buffers on the ELS ring */ pring = &psli->ring[LPFC_ELS_RING]; - list_for_each_entry_safe(mp, next_mp, &pring->postbufq, list) { + spin_lock_irq(&phba->hbalock); + list_splice_init(&pring->postbufq, &buflist); + spin_unlock_irq(&phba->hbalock); + + count = 0; + list_for_each_entry_safe(mp, next_mp, &buflist, list) { list_del(&mp->list); - pring->postbufq_cnt--; + count++; lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); } + + spin_lock_irq(&phba->hbalock); + pring->postbufq_cnt -= count; + spin_unlock_irq(&phba->hbalock); } +} + +/** + * lpfc_hba_clean_txcmplq - Perform lpfc uninitialization after HBA reset + * @phba: pointer to lpfc HBA data structure. + * + * This routine will cleanup the txcmplq after the HBA is reset when bringing + * down the SLI Layer. + * + * Return codes + * void + **/ +static void +lpfc_hba_clean_txcmplq(struct lpfc_hba *phba) +{ + struct lpfc_sli *psli = &phba->sli; + struct lpfc_sli_ring *pring; + LIST_HEAD(completions); + int i; - spin_lock_irq(&phba->hbalock); for (i = 0; i < psli->num_rings; i++) { pring = &psli->ring[i]; - + if (phba->sli_rev >= LPFC_SLI_REV4) + spin_lock_irq(&pring->ring_lock); + else + spin_lock_irq(&phba->hbalock); /* At this point in time the HBA is either reset or DOA. Either * way, nothing should be on txcmplq as it will NEVER complete. */ list_splice_init(&pring->txcmplq, &completions); - spin_unlock_irq(&phba->hbalock); + pring->txcmplq_cnt = 0; + + if (phba->sli_rev >= LPFC_SLI_REV4) + spin_unlock_irq(&pring->ring_lock); + else + spin_unlock_irq(&phba->hbalock); /* Cancel all the IOCBs from the completions list */ lpfc_sli_cancel_iocbs(phba, &completions, IOSTAT_LOCAL_REJECT, IOERR_SLI_ABORTED); - lpfc_sli_abort_iocb_ring(phba, pring); - spin_lock_irq(&phba->hbalock); } - spin_unlock_irq(&phba->hbalock); +} +/** + * lpfc_hba_down_post_s3 - Perform lpfc uninitialization after HBA reset + int i; + * @phba: pointer to lpfc HBA data structure. + * + * This routine will do uninitialization after the HBA is reset when bring + * down the SLI Layer. + * + * Return codes + * 0 - success. + * Any other value - error. + **/ +static int +lpfc_hba_down_post_s3(struct lpfc_hba *phba) +{ + lpfc_hba_free_post_buf(phba); + lpfc_hba_clean_txcmplq(phba); return 0; } @@ -890,13 +986,12 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba) { struct lpfc_scsi_buf *psb, *psb_next; LIST_HEAD(aborts); - int ret; unsigned long iflag = 0; struct lpfc_sglq *sglq_entry = NULL; - ret = lpfc_hba_down_post_s3(phba); - if (ret) - return ret; + lpfc_hba_free_post_buf(phba); + lpfc_hba_clean_txcmplq(phba); + /* At this point in time the HBA is either reset or DOA. Either * way, nothing should be on lpfc_abts_els_sgl_list, it needs to be * on the lpfc_sgl_list so that it can either be freed if the @@ -932,6 +1027,8 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba) spin_lock_irqsave(&phba->scsi_buf_list_put_lock, iflag); list_splice(&aborts, &phba->lpfc_scsi_buf_list_put); spin_unlock_irqrestore(&phba->scsi_buf_list_put_lock, iflag); + + lpfc_sli4_free_sp_events(phba); return 0; } @@ -1250,7 +1347,6 @@ static void lpfc_handle_deferred_eratt(struct lpfc_hba *phba) { uint32_t old_host_status = phba->work_hs; - struct lpfc_sli_ring *pring; struct lpfc_sli *psli = &phba->sli; /* If the pci channel is offline, ignore possible errors, @@ -1279,8 +1375,7 @@ lpfc_handle_deferred_eratt(struct lpfc_hba *phba) * dropped by the firmware. Error iocb (I/O) on txcmplq and let the * SCSI layer retry it after re-establishing link. */ - pring = &psli->ring[psli->fcp_ring]; - lpfc_sli_abort_iocb_ring(phba, pring); + lpfc_sli_abort_fcp_rings(phba); /* * There was a firmware error. Take the hba offline and then @@ -1348,7 +1443,6 @@ lpfc_handle_eratt_s3(struct lpfc_hba *phba) { struct lpfc_vport *vport = phba->pport; struct lpfc_sli *psli = &phba->sli; - struct lpfc_sli_ring *pring; uint32_t event_data; unsigned long temperature; struct temp_event temp_event_data; @@ -1400,8 +1494,7 @@ lpfc_handle_eratt_s3(struct lpfc_hba *phba) * Error iocb (I/O) on txcmplq and let the SCSI layer * retry it after re-establishing link. */ - pring = &psli->ring[psli->fcp_ring]; - lpfc_sli_abort_iocb_ring(phba, pring); + lpfc_sli_abort_fcp_rings(phba); /* * There was a firmware error. Take the hba offline and then @@ -1940,78 +2033,81 @@ lpfc_get_hba_model_desc(struct lpfc_hba *phba, uint8_t *mdp, uint8_t *descp) switch (dev_id) { case PCI_DEVICE_ID_FIREFLY: - m = (typeof(m)){"LP6000", "PCI", "Fibre Channel Adapter"}; + m = (typeof(m)){"LP6000", "PCI", + "Obsolete, Unsupported Fibre Channel Adapter"}; break; case PCI_DEVICE_ID_SUPERFLY: if (vp->rev.biuRev >= 1 && vp->rev.biuRev <= 3) - m = (typeof(m)){"LP7000", "PCI", - "Fibre Channel Adapter"}; + m = (typeof(m)){"LP7000", "PCI", ""}; else - m = (typeof(m)){"LP7000E", "PCI", - "Fibre Channel Adapter"}; + m = (typeof(m)){"LP7000E", "PCI", ""}; + m.function = "Obsolete, Unsupported Fibre Channel Adapter"; break; case PCI_DEVICE_ID_DRAGONFLY: m = (typeof(m)){"LP8000", "PCI", - "Fibre Channel Adapter"}; + "Obsolete, Unsupported Fibre Channel Adapter"}; break; case PCI_DEVICE_ID_CENTAUR: if (FC_JEDEC_ID(vp->rev.biuRev) == CENTAUR_2G_JEDEC_ID) - m = (typeof(m)){"LP9002", "PCI", - "Fibre Channel Adapter"}; + m = (typeof(m)){"LP9002", "PCI", ""}; else - m = (typeof(m)){"LP9000", "PCI", - "Fibre Channel Adapter"}; + m = (typeof(m)){"LP9000", "PCI", ""}; + m.function = "Obsolete, Unsupported Fibre Channel Adapter"; break; case PCI_DEVICE_ID_RFLY: m = (typeof(m)){"LP952", "PCI", - "Fibre Channel Adapter"}; + "Obsolete, Unsupported Fibre Channel Adapter"}; break; case PCI_DEVICE_ID_PEGASUS: m = (typeof(m)){"LP9802", "PCI-X", - "Fibre Channel Adapter"}; + "Obsolete, Unsupported Fibre Channel Adapter"}; break; case PCI_DEVICE_ID_THOR: m = (typeof(m)){"LP10000", "PCI-X", - "Fibre Channel Adapter"}; + "Obsolete, Unsupported Fibre Channel Adapter"}; break; case PCI_DEVICE_ID_VIPER: m = (typeof(m)){"LPX1000", "PCI-X", - "Fibre Channel Adapter"}; + "Obsolete, Unsupported Fibre Channel Adapter"}; break; case PCI_DEVICE_ID_PFLY: m = (typeof(m)){"LP982", "PCI-X", - "Fibre Channel Adapter"}; + "Obsolete, Unsupported Fibre Channel Adapter"}; break; case PCI_DEVICE_ID_TFLY: m = (typeof(m)){"LP1050", "PCI-X", - "Fibre Channel Adapter"}; + "Obsolete, Unsupported Fibre Channel Adapter"}; break; case PCI_DEVICE_ID_HELIOS: m = (typeof(m)){"LP11000", "PCI-X2", - "Fibre Channel Adapter"}; + "Obsolete, Unsupported Fibre Channel Adapter"}; break; case PCI_DEVICE_ID_HELIOS_SCSP: m = (typeof(m)){"LP11000-SP", "PCI-X2", - "Fibre Channel Adapter"}; + "Obsolete, Unsupported Fibre Channel Adapter"}; break; case PCI_DEVICE_ID_HELIOS_DCSP: m = (typeof(m)){"LP11002-SP", "PCI-X2", - "Fibre Channel Adapter"}; + "Obsolete, Unsupported Fibre Channel Adapter"}; break; case PCI_DEVICE_ID_NEPTUNE: - m = (typeof(m)){"LPe1000", "PCIe", "Fibre Channel Adapter"}; + m = (typeof(m)){"LPe1000", "PCIe", + "Obsolete, Unsupported Fibre Channel Adapter"}; break; case PCI_DEVICE_ID_NEPTUNE_SCSP: - m = (typeof(m)){"LPe1000-SP", "PCIe", "Fibre Channel Adapter"}; + m = (typeof(m)){"LPe1000-SP", "PCIe", + "Obsolete, Unsupported Fibre Channel Adapter"}; break; case PCI_DEVICE_ID_NEPTUNE_DCSP: - m = (typeof(m)){"LPe1002-SP", "PCIe", "Fibre Channel Adapter"}; + m = (typeof(m)){"LPe1002-SP", "PCIe", + "Obsolete, Unsupported Fibre Channel Adapter"}; break; case PCI_DEVICE_ID_BMID: m = (typeof(m)){"LP1150", "PCI-X2", "Fibre Channel Adapter"}; break; case PCI_DEVICE_ID_BSMB: - m = (typeof(m)){"LP111", "PCI-X2", "Fibre Channel Adapter"}; + m = (typeof(m)){"LP111", "PCI-X2", + "Obsolete, Unsupported Fibre Channel Adapter"}; break; case PCI_DEVICE_ID_ZEPHYR: m = (typeof(m)){"LPe11000", "PCIe", "Fibre Channel Adapter"}; @@ -2030,16 +2126,20 @@ lpfc_get_hba_model_desc(struct lpfc_hba *phba, uint8_t *mdp, uint8_t *descp) m = (typeof(m)){"LPe111", "PCIe", "Fibre Channel Adapter"}; break; case PCI_DEVICE_ID_LP101: - m = (typeof(m)){"LP101", "PCI-X", "Fibre Channel Adapter"}; + m = (typeof(m)){"LP101", "PCI-X", + "Obsolete, Unsupported Fibre Channel Adapter"}; break; case PCI_DEVICE_ID_LP10000S: - m = (typeof(m)){"LP10000-S", "PCI", "Fibre Channel Adapter"}; + m = (typeof(m)){"LP10000-S", "PCI", + "Obsolete, Unsupported Fibre Channel Adapter"}; break; case PCI_DEVICE_ID_LP11000S: - m = (typeof(m)){"LP11000-S", "PCI-X2", "Fibre Channel Adapter"}; + m = (typeof(m)){"LP11000-S", "PCI-X2", + "Obsolete, Unsupported Fibre Channel Adapter"}; break; case PCI_DEVICE_ID_LPE11000S: - m = (typeof(m)){"LPe11000-S", "PCIe", "Fibre Channel Adapter"}; + m = (typeof(m)){"LPe11000-S", "PCIe", + "Obsolete, Unsupported Fibre Channel Adapter"}; break; case PCI_DEVICE_ID_SAT: m = (typeof(m)){"LPe12000", "PCIe", "Fibre Channel Adapter"}; @@ -2060,20 +2160,21 @@ lpfc_get_hba_model_desc(struct lpfc_hba *phba, uint8_t *mdp, uint8_t *descp) m = (typeof(m)){"LPe12000-S", "PCIe", "Fibre Channel Adapter"}; break; case PCI_DEVICE_ID_HORNET: - m = (typeof(m)){"LP21000", "PCIe", "FCoE Adapter"}; + m = (typeof(m)){"LP21000", "PCIe", + "Obsolete, Unsupported FCoE Adapter"}; GE = 1; break; case PCI_DEVICE_ID_PROTEUS_VF: m = (typeof(m)){"LPev12000", "PCIe IOV", - "Fibre Channel Adapter"}; + "Obsolete, Unsupported Fibre Channel Adapter"}; break; case PCI_DEVICE_ID_PROTEUS_PF: m = (typeof(m)){"LPev12000", "PCIe IOV", - "Fibre Channel Adapter"}; + "Obsolete, Unsupported Fibre Channel Adapter"}; break; case PCI_DEVICE_ID_PROTEUS_S: m = (typeof(m)){"LPemv12002-S", "PCIe IOV", - "Fibre Channel Adapter"}; + "Obsolete, Unsupported Fibre Channel Adapter"}; break; case PCI_DEVICE_ID_TIGERSHARK: oneConnect = 1; @@ -2089,17 +2190,24 @@ lpfc_get_hba_model_desc(struct lpfc_hba *phba, uint8_t *mdp, uint8_t *descp) break; case PCI_DEVICE_ID_BALIUS: m = (typeof(m)){"LPVe12002", "PCIe Shared I/O", - "Fibre Channel Adapter"}; + "Obsolete, Unsupported Fibre Channel Adapter"}; break; case PCI_DEVICE_ID_LANCER_FC: - case PCI_DEVICE_ID_LANCER_FC_VF: m = (typeof(m)){"LPe16000", "PCIe", "Fibre Channel Adapter"}; break; + case PCI_DEVICE_ID_LANCER_FC_VF: + m = (typeof(m)){"LPe16000", "PCIe", + "Obsolete, Unsupported Fibre Channel Adapter"}; + break; case PCI_DEVICE_ID_LANCER_FCOE: - case PCI_DEVICE_ID_LANCER_FCOE_VF: oneConnect = 1; m = (typeof(m)){"OCe15100", "PCIe", "FCoE"}; break; + case PCI_DEVICE_ID_LANCER_FCOE_VF: + oneConnect = 1; + m = (typeof(m)){"OCe15100", "PCIe", + "Obsolete, Unsupported FCoE"}; + break; case PCI_DEVICE_ID_SKYHAWK: case PCI_DEVICE_ID_SKYHAWK_VF: oneConnect = 1; @@ -4614,7 +4722,10 @@ lpfc_reset_hba(struct lpfc_hba *phba) phba->link_state = LPFC_HBA_ERROR; return; } - lpfc_offline_prep(phba, LPFC_MBX_WAIT); + if (phba->sli.sli_flag & LPFC_SLI_ACTIVE) + lpfc_offline_prep(phba, LPFC_MBX_WAIT); + else + lpfc_offline_prep(phba, LPFC_MBX_NO_WAIT); lpfc_offline(phba); lpfc_sli_brdrestart(phba); lpfc_online(phba); @@ -9663,9 +9774,6 @@ lpfc_pci_resume_one_s3(struct pci_dev *pdev) static void lpfc_sli_prep_dev_for_recover(struct lpfc_hba *phba) { - struct lpfc_sli *psli = &phba->sli; - struct lpfc_sli_ring *pring; - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "2723 PCI channel I/O abort preparing for recovery\n"); @@ -9673,8 +9781,7 @@ lpfc_sli_prep_dev_for_recover(struct lpfc_hba *phba) * There may be errored I/Os through HBA, abort all I/Os on txcmplq * and let the SCSI mid-layer to retry them to recover. */ - pring = &psli->ring[psli->fcp_ring]; - lpfc_sli_abort_iocb_ring(phba, pring); + lpfc_sli_abort_fcp_rings(phba); } /** @@ -10417,17 +10524,13 @@ lpfc_pci_resume_one_s4(struct pci_dev *pdev) static void lpfc_sli4_prep_dev_for_recover(struct lpfc_hba *phba) { - struct lpfc_sli *psli = &phba->sli; - struct lpfc_sli_ring *pring; - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "2828 PCI channel I/O abort preparing for recovery\n"); /* * There may be errored I/Os through HBA, abort all I/Os on txcmplq * and let the SCSI mid-layer to retry them to recover. */ - pring = &psli->ring[psli->fcp_ring]; - lpfc_sli_abort_iocb_ring(phba, pring); + lpfc_sli_abort_fcp_rings(phba); } /** @@ -10898,7 +11001,7 @@ lpfc_sli4_oas_verify(struct lpfc_hba *phba) if (phba->sli4_hba.pc_sli4_params.oas_supported) { phba->cfg_fof = 1; } else { - phba->cfg_EnableXLane = 0; + phba->cfg_fof = 0; if (phba->device_data_mem_pool) mempool_destroy(phba->device_data_mem_pool); phba->device_data_mem_pool = NULL; @@ -10928,7 +11031,7 @@ lpfc_fof_queue_setup(struct lpfc_hba *phba) if (rc) return -ENOMEM; - if (phba->cfg_EnableXLane) { + if (phba->cfg_fof) { rc = lpfc_cq_create(phba, phba->sli4_hba.oas_cq, phba->sli4_hba.fof_eq, LPFC_WCQ, LPFC_FCP); @@ -10947,8 +11050,7 @@ lpfc_fof_queue_setup(struct lpfc_hba *phba) return 0; out_oas_wq: - if (phba->cfg_EnableXLane) - lpfc_cq_destroy(phba, phba->sli4_hba.oas_cq); + lpfc_cq_destroy(phba, phba->sli4_hba.oas_cq); out_oas_cq: lpfc_eq_destroy(phba, phba->sli4_hba.fof_eq); return rc; @@ -10982,7 +11084,7 @@ lpfc_fof_queue_create(struct lpfc_hba *phba) phba->sli4_hba.fof_eq = qdesc; - if (phba->cfg_EnableXLane) { + if (phba->cfg_fof) { /* Create OAS CQ */ qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.cq_esize, diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c index ed419aad2b1..3fa65338d3f 100644 --- a/drivers/scsi/lpfc/lpfc_mem.c +++ b/drivers/scsi/lpfc/lpfc_mem.c @@ -1,7 +1,7 @@ /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2004-2012 Emulex. All rights reserved. * + * Copyright (C) 2004-2014 Emulex. All rights reserved. * * EMULEX and SLI are trademarks of Emulex. * * www.emulex.com * * Portions Copyright (C) 2004-2005 Christoph Hellwig * diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 462453ee0bd..2df11daad85 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -1,7 +1,7 @@ /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2004-2013 Emulex. All rights reserved. * + * Copyright (C) 2004-2014 Emulex. All rights reserved. * * EMULEX and SLI are trademarks of Emulex. * * www.emulex.com * * Portions Copyright (C) 2004-2005 Christoph Hellwig * @@ -73,7 +73,7 @@ lpfc_rport_data_from_scsi_device(struct scsi_device *sdev) { struct lpfc_vport *vport = (struct lpfc_vport *)sdev->host->hostdata; - if (vport->phba->cfg_EnableXLane) + if (vport->phba->cfg_fof) return ((struct lpfc_device_data *)sdev->hostdata)->rport_data; else return (struct lpfc_rport_data *)sdev->hostdata; @@ -3462,7 +3462,7 @@ lpfc_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd) * If the OAS driver feature is enabled and the lun is enabled for * OAS, set the oas iocb related flags. */ - if ((phba->cfg_EnableXLane) && ((struct lpfc_device_data *) + if ((phba->cfg_fof) && ((struct lpfc_device_data *) scsi_cmnd->device->hostdata)->oas_enabled) lpfc_cmd->cur_iocbq.iocb_flag |= LPFC_IO_OAS; return 0; @@ -4314,6 +4314,7 @@ lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd, fcp_cmnd->fcpCntl1 = SIMPLE_Q; sli4 = (phba->sli_rev == LPFC_SLI_REV4); + piocbq->iocb.un.fcpi.fcpi_XRdy = 0; /* * There are three possibilities here - use scatter-gather segment, use @@ -4782,7 +4783,9 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd) struct lpfc_scsi_buf *lpfc_cmd; IOCB_t *cmd, *icmd; int ret = SUCCESS, status = 0; - unsigned long flags; + struct lpfc_sli_ring *pring_s4; + int ring_number, ret_val; + unsigned long flags, iflags; DECLARE_WAIT_QUEUE_HEAD_ONSTACK(waitq); status = fc_block_scsi_eh(cmnd); @@ -4833,6 +4836,14 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd) BUG_ON(iocb->context1 != lpfc_cmd); + /* abort issued in recovery is still in progress */ + if (iocb->iocb_flag & LPFC_DRIVER_ABORTED) { + lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP, + "3389 SCSI Layer I/O Abort Request is pending\n"); + spin_unlock_irqrestore(&phba->hbalock, flags); + goto wait_for_cmpl; + } + abtsiocb = __lpfc_sli_get_iocbq(phba); if (abtsiocb == NULL) { ret = FAILED; @@ -4871,11 +4882,23 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd) abtsiocb->iocb_cmpl = lpfc_sli_abort_fcp_cmpl; abtsiocb->vport = vport; + if (phba->sli_rev == LPFC_SLI_REV4) { + ring_number = MAX_SLI3_CONFIGURED_RINGS + iocb->fcp_wqidx; + pring_s4 = &phba->sli.ring[ring_number]; + /* Note: both hbalock and ring_lock must be set here */ + spin_lock_irqsave(&pring_s4->ring_lock, iflags); + ret_val = __lpfc_sli_issue_iocb(phba, pring_s4->ringno, + abtsiocb, 0); + spin_unlock_irqrestore(&pring_s4->ring_lock, iflags); + } else { + ret_val = __lpfc_sli_issue_iocb(phba, LPFC_FCP_RING, + abtsiocb, 0); + } /* no longer need the lock after this point */ spin_unlock_irqrestore(&phba->hbalock, flags); - if (lpfc_sli_issue_iocb(phba, LPFC_FCP_RING, abtsiocb, 0) == - IOCB_ERROR) { + + if (ret_val == IOCB_ERROR) { lpfc_sli_release_iocbq(phba, abtsiocb); ret = FAILED; goto out; @@ -4885,12 +4908,16 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd) lpfc_sli_handle_fast_ring_event(phba, &phba->sli.ring[LPFC_FCP_RING], HA_R0RE_REQ); +wait_for_cmpl: lpfc_cmd->waitq = &waitq; /* Wait for abort to complete */ wait_event_timeout(waitq, (lpfc_cmd->pCmd != cmnd), msecs_to_jiffies(2*vport->cfg_devloss_tmo*1000)); + + spin_lock_irqsave(shost->host_lock, flags); lpfc_cmd->waitq = NULL; + spin_unlock_irqrestore(shost->host_lock, flags); if (lpfc_cmd->pCmd == cmnd) { ret = FAILED; @@ -5172,8 +5199,9 @@ lpfc_reset_flush_io_context(struct lpfc_vport *vport, uint16_t tgt_id, cnt = lpfc_sli_sum_iocb(vport, tgt_id, lun_id, context); if (cnt) - lpfc_sli_abort_iocb(vport, &phba->sli.ring[phba->sli.fcp_ring], - tgt_id, lun_id, context); + lpfc_sli_abort_taskmgmt(vport, + &phba->sli.ring[phba->sli.fcp_ring], + tgt_id, lun_id, context); later = msecs_to_jiffies(2 * vport->cfg_devloss_tmo * 1000) + jiffies; while (time_after(later, jiffies) && cnt) { schedule_timeout_uninterruptible(msecs_to_jiffies(20)); @@ -5491,7 +5519,7 @@ lpfc_slave_alloc(struct scsi_device *sdev) if (!rport || fc_remote_port_chkready(rport)) return -ENXIO; - if (phba->cfg_EnableXLane) { + if (phba->cfg_fof) { /* * Check to see if the device data structure for the lun @@ -5616,7 +5644,7 @@ lpfc_slave_destroy(struct scsi_device *sdev) struct lpfc_device_data *device_data = sdev->hostdata; atomic_dec(&phba->sdev_cnt); - if ((phba->cfg_EnableXLane) && (device_data)) { + if ((phba->cfg_fof) && (device_data)) { spin_lock_irqsave(&phba->devicelock, flags); device_data->available = false; if (!device_data->oas_enabled) @@ -5655,7 +5683,7 @@ lpfc_create_device_data(struct lpfc_hba *phba, struct lpfc_name *vport_wwpn, int memory_flags; if (unlikely(!phba) || !vport_wwpn || !target_wwpn || - !(phba->cfg_EnableXLane)) + !(phba->cfg_fof)) return NULL; /* Attempt to create the device data to contain lun info */ @@ -5693,7 +5721,7 @@ lpfc_delete_device_data(struct lpfc_hba *phba, { if (unlikely(!phba) || !lun_info || - !(phba->cfg_EnableXLane)) + !(phba->cfg_fof)) return; if (!list_empty(&lun_info->listentry)) @@ -5727,7 +5755,7 @@ __lpfc_get_device_data(struct lpfc_hba *phba, struct list_head *list, struct lpfc_device_data *lun_info; if (unlikely(!phba) || !list || !vport_wwpn || !target_wwpn || - !phba->cfg_EnableXLane) + !phba->cfg_fof) return NULL; /* Check to see if the lun is already enabled for OAS. */ @@ -5789,7 +5817,7 @@ lpfc_find_next_oas_lun(struct lpfc_hba *phba, struct lpfc_name *vport_wwpn, !starting_lun || !found_vport_wwpn || !found_target_wwpn || !found_lun || !found_lun_status || (*starting_lun == NO_MORE_OAS_LUN) || - !phba->cfg_EnableXLane) + !phba->cfg_fof) return false; lun = *starting_lun; @@ -5873,7 +5901,7 @@ lpfc_enable_oas_lun(struct lpfc_hba *phba, struct lpfc_name *vport_wwpn, unsigned long flags; if (unlikely(!phba) || !vport_wwpn || !target_wwpn || - !phba->cfg_EnableXLane) + !phba->cfg_fof) return false; spin_lock_irqsave(&phba->devicelock, flags); @@ -5930,7 +5958,7 @@ lpfc_disable_oas_lun(struct lpfc_hba *phba, struct lpfc_name *vport_wwpn, unsigned long flags; if (unlikely(!phba) || !vport_wwpn || !target_wwpn || - !phba->cfg_EnableXLane) + !phba->cfg_fof) return false; spin_lock_irqsave(&phba->devicelock, flags); diff --git a/drivers/scsi/lpfc/lpfc_scsi.h b/drivers/scsi/lpfc/lpfc_scsi.h index 0120bfccf50..0389ac1e7b8 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.h +++ b/drivers/scsi/lpfc/lpfc_scsi.h @@ -1,7 +1,7 @@ /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2004-2013 Emulex. All rights reserved. * + * Copyright (C) 2004-2014 Emulex. All rights reserved. * * EMULEX and SLI are trademarks of Emulex. * * www.emulex.com * * * diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 393662c24df..32ada050557 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -1,7 +1,7 @@ /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2004-2013 Emulex. All rights reserved. * + * Copyright (C) 2004-2014 Emulex. All rights reserved. * * EMULEX and SLI are trademarks of Emulex. * * www.emulex.com * * Portions Copyright (C) 2004-2005 Christoph Hellwig * @@ -3532,14 +3532,27 @@ lpfc_sli_abort_iocb_ring(struct lpfc_hba *phba, struct lpfc_sli_ring *pring) /* Error everything on txq and txcmplq * First do the txq. */ - spin_lock_irq(&phba->hbalock); - list_splice_init(&pring->txq, &completions); + if (phba->sli_rev >= LPFC_SLI_REV4) { + spin_lock_irq(&pring->ring_lock); + list_splice_init(&pring->txq, &completions); + pring->txq_cnt = 0; + spin_unlock_irq(&pring->ring_lock); - /* Next issue ABTS for everything on the txcmplq */ - list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) - lpfc_sli_issue_abort_iotag(phba, pring, iocb); + spin_lock_irq(&phba->hbalock); + /* Next issue ABTS for everything on the txcmplq */ + list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) + lpfc_sli_issue_abort_iotag(phba, pring, iocb); + spin_unlock_irq(&phba->hbalock); + } else { + spin_lock_irq(&phba->hbalock); + list_splice_init(&pring->txq, &completions); + pring->txq_cnt = 0; - spin_unlock_irq(&phba->hbalock); + /* Next issue ABTS for everything on the txcmplq */ + list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) + lpfc_sli_issue_abort_iotag(phba, pring, iocb); + spin_unlock_irq(&phba->hbalock); + } /* Cancel all the IOCBs from the completions list */ lpfc_sli_cancel_iocbs(phba, &completions, IOSTAT_LOCAL_REJECT, @@ -3547,6 +3560,36 @@ lpfc_sli_abort_iocb_ring(struct lpfc_hba *phba, struct lpfc_sli_ring *pring) } /** + * lpfc_sli_abort_fcp_rings - Abort all iocbs in all FCP rings + * @phba: Pointer to HBA context object. + * @pring: Pointer to driver SLI ring object. + * + * This function aborts all iocbs in FCP rings and frees all the iocb + * objects in txq. This function issues an abort iocb for all the iocb commands + * in txcmplq. The iocbs in the txcmplq is not guaranteed to complete before + * the return of this function. The caller is not required to hold any locks. + **/ +void +lpfc_sli_abort_fcp_rings(struct lpfc_hba *phba) +{ + struct lpfc_sli *psli = &phba->sli; + struct lpfc_sli_ring *pring; + uint32_t i; + + /* Look on all the FCP Rings for the iotag */ + if (phba->sli_rev >= LPFC_SLI_REV4) { + for (i = 0; i < phba->cfg_fcp_io_channel; i++) { + pring = &psli->ring[i + MAX_SLI3_CONFIGURED_RINGS]; + lpfc_sli_abort_iocb_ring(phba, pring); + } + } else { + pring = &psli->ring[psli->fcp_ring]; + lpfc_sli_abort_iocb_ring(phba, pring); + } +} + + +/** * lpfc_sli_flush_fcp_rings - flush all iocbs in the fcp ring * @phba: Pointer to HBA context object. * @@ -3563,28 +3606,55 @@ lpfc_sli_flush_fcp_rings(struct lpfc_hba *phba) LIST_HEAD(txcmplq); struct lpfc_sli *psli = &phba->sli; struct lpfc_sli_ring *pring; - - /* Currently, only one fcp ring */ - pring = &psli->ring[psli->fcp_ring]; + uint32_t i; spin_lock_irq(&phba->hbalock); - /* Retrieve everything on txq */ - list_splice_init(&pring->txq, &txq); - - /* Retrieve everything on the txcmplq */ - list_splice_init(&pring->txcmplq, &txcmplq); - /* Indicate the I/O queues are flushed */ phba->hba_flag |= HBA_FCP_IOQ_FLUSH; spin_unlock_irq(&phba->hbalock); - /* Flush the txq */ - lpfc_sli_cancel_iocbs(phba, &txq, IOSTAT_LOCAL_REJECT, - IOERR_SLI_DOWN); + /* Look on all the FCP Rings for the iotag */ + if (phba->sli_rev >= LPFC_SLI_REV4) { + for (i = 0; i < phba->cfg_fcp_io_channel; i++) { + pring = &psli->ring[i + MAX_SLI3_CONFIGURED_RINGS]; + + spin_lock_irq(&pring->ring_lock); + /* Retrieve everything on txq */ + list_splice_init(&pring->txq, &txq); + /* Retrieve everything on the txcmplq */ + list_splice_init(&pring->txcmplq, &txcmplq); + pring->txq_cnt = 0; + pring->txcmplq_cnt = 0; + spin_unlock_irq(&pring->ring_lock); + + /* Flush the txq */ + lpfc_sli_cancel_iocbs(phba, &txq, + IOSTAT_LOCAL_REJECT, + IOERR_SLI_DOWN); + /* Flush the txcmpq */ + lpfc_sli_cancel_iocbs(phba, &txcmplq, + IOSTAT_LOCAL_REJECT, + IOERR_SLI_DOWN); + } + } else { + pring = &psli->ring[psli->fcp_ring]; - /* Flush the txcmpq */ - lpfc_sli_cancel_iocbs(phba, &txcmplq, IOSTAT_LOCAL_REJECT, - IOERR_SLI_DOWN); + spin_lock_irq(&phba->hbalock); + /* Retrieve everything on txq */ + list_splice_init(&pring->txq, &txq); + /* Retrieve everything on the txcmplq */ + list_splice_init(&pring->txcmplq, &txcmplq); + pring->txq_cnt = 0; + pring->txcmplq_cnt = 0; + spin_unlock_irq(&phba->hbalock); + + /* Flush the txq */ + lpfc_sli_cancel_iocbs(phba, &txq, IOSTAT_LOCAL_REJECT, + IOERR_SLI_DOWN); + /* Flush the txcmpq */ + lpfc_sli_cancel_iocbs(phba, &txcmplq, IOSTAT_LOCAL_REJECT, + IOERR_SLI_DOWN); + } } /** @@ -3987,12 +4057,13 @@ lpfc_sli4_brdreset(struct lpfc_hba *phba) { struct lpfc_sli *psli = &phba->sli; uint16_t cfg_value; - int rc; + int rc = 0; /* Reset HBA */ lpfc_printf_log(phba, KERN_INFO, LOG_SLI, - "0295 Reset HBA Data: x%x x%x\n", - phba->pport->port_state, psli->sli_flag); + "0295 Reset HBA Data: x%x x%x x%x\n", + phba->pport->port_state, psli->sli_flag, + phba->hba_flag); /* perform board reset */ phba->fc_eventTag = 0; @@ -4005,6 +4076,12 @@ lpfc_sli4_brdreset(struct lpfc_hba *phba) phba->fcf.fcf_flag = 0; spin_unlock_irq(&phba->hbalock); + /* SLI4 INTF 2: if FW dump is being taken skip INIT_PORT */ + if (phba->hba_flag & HBA_FW_DUMP_OP) { + phba->hba_flag &= ~HBA_FW_DUMP_OP; + return rc; + } + /* Now physically reset the device */ lpfc_printf_log(phba, KERN_INFO, LOG_INIT, "0389 Performing PCI function reset!\n"); @@ -5002,7 +5079,7 @@ lpfc_sli4_arm_cqeq_intr(struct lpfc_hba *phba) } while (++fcp_eqidx < phba->cfg_fcp_io_channel); } - if (phba->cfg_EnableXLane) + if (phba->cfg_fof) lpfc_sli4_cq_release(phba->sli4_hba.oas_cq, LPFC_QUEUE_REARM); if (phba->sli4_hba.hba_eq) { @@ -6722,7 +6799,6 @@ lpfc_mbox_timeout_handler(struct lpfc_hba *phba) LPFC_MBOXQ_t *pmbox = phba->sli.mbox_active; MAILBOX_t *mb = &pmbox->u.mb; struct lpfc_sli *psli = &phba->sli; - struct lpfc_sli_ring *pring; /* If the mailbox completed, process the completion and return */ if (lpfc_sli4_process_missed_mbox_completions(phba)) @@ -6764,8 +6840,7 @@ lpfc_mbox_timeout_handler(struct lpfc_hba *phba) psli->sli_flag &= ~LPFC_SLI_ACTIVE; spin_unlock_irq(&phba->hbalock); - pring = &psli->ring[psli->fcp_ring]; - lpfc_sli_abort_iocb_ring(phba, pring); + lpfc_sli_abort_fcp_rings(phba); lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI, "0345 Resetting board due to mailbox timeout\n"); @@ -8133,6 +8208,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq, abort_tag = (uint32_t) iocbq->iotag; xritag = iocbq->sli4_xritag; wqe->generic.wqe_com.word7 = 0; /* The ct field has moved so reset */ + wqe->generic.wqe_com.word10 = 0; /* words0-2 bpl convert bde */ if (iocbq->iocb.un.genreq64.bdl.bdeFlags == BUFF_TYPE_BLP_64) { numBdes = iocbq->iocb.un.genreq64.bdl.bdeSize / @@ -8639,8 +8715,7 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number, if ((piocb->iocb_flag & LPFC_IO_FCP) || (piocb->iocb_flag & LPFC_USE_FCPWQIDX)) { - if (!phba->cfg_EnableXLane || (!(piocb->iocb_flag & - LPFC_IO_OAS))) { + if (!phba->cfg_fof || (!(piocb->iocb_flag & LPFC_IO_OAS))) { wq = phba->sli4_hba.fcp_wq[piocb->fcp_wqidx]; } else { wq = phba->sli4_hba.oas_wq; @@ -8735,7 +8810,7 @@ lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number, if (phba->sli_rev == LPFC_SLI_REV4) { if (piocb->iocb_flag & LPFC_IO_FCP) { - if (!phba->cfg_EnableXLane || (!(piocb->iocb_flag & + if (!phba->cfg_fof || (!(piocb->iocb_flag & LPFC_IO_OAS))) { if (unlikely(!phba->sli4_hba.fcp_wq)) return IOCB_ERROR; @@ -9170,6 +9245,7 @@ lpfc_sli_queue_setup(struct lpfc_hba *phba) pring->sli.sli3.next_cmdidx = 0; pring->sli.sli3.local_getidx = 0; pring->sli.sli3.cmdidx = 0; + pring->flag = 0; INIT_LIST_HEAD(&pring->txq); INIT_LIST_HEAD(&pring->txcmplq); INIT_LIST_HEAD(&pring->iocb_continueq); @@ -9805,43 +9881,6 @@ abort_iotag_exit: } /** - * lpfc_sli_iocb_ring_abort - Unconditionally abort all iocbs on an iocb ring - * @phba: Pointer to HBA context object. - * @pring: Pointer to driver SLI ring object. - * - * This function aborts all iocbs in the given ring and frees all the iocb - * objects in txq. This function issues abort iocbs unconditionally for all - * the iocb commands in txcmplq. The iocbs in the txcmplq is not guaranteed - * to complete before the return of this function. The caller is not required - * to hold any locks. - **/ -static void -lpfc_sli_iocb_ring_abort(struct lpfc_hba *phba, struct lpfc_sli_ring *pring) -{ - LIST_HEAD(completions); - struct lpfc_iocbq *iocb, *next_iocb; - - if (pring->ringno == LPFC_ELS_RING) - lpfc_fabric_abort_hba(phba); - - spin_lock_irq(&phba->hbalock); - - /* Take off all the iocbs on txq for cancelling */ - list_splice_init(&pring->txq, &completions); - pring->txq_cnt = 0; - - /* Next issue ABTS for everything on the txcmplq */ - list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) - lpfc_sli_abort_iotag_issue(phba, pring, iocb); - - spin_unlock_irq(&phba->hbalock); - - /* Cancel all the IOCBs from the completions list */ - lpfc_sli_cancel_iocbs(phba, &completions, IOSTAT_LOCAL_REJECT, - IOERR_SLI_ABORTED); -} - -/** * lpfc_sli_hba_iocb_abort - Abort all iocbs to an hba. * @phba: pointer to lpfc HBA data structure. * @@ -9856,7 +9895,7 @@ lpfc_sli_hba_iocb_abort(struct lpfc_hba *phba) for (i = 0; i < psli->num_rings; i++) { pring = &psli->ring[i]; - lpfc_sli_iocb_ring_abort(phba, pring); + lpfc_sli_abort_iocb_ring(phba, pring); } } @@ -10081,6 +10120,124 @@ lpfc_sli_abort_iocb(struct lpfc_vport *vport, struct lpfc_sli_ring *pring, } /** + * lpfc_sli_abort_taskmgmt - issue abort for all commands on a host/target/LUN + * @vport: Pointer to virtual port. + * @pring: Pointer to driver SLI ring object. + * @tgt_id: SCSI ID of the target. + * @lun_id: LUN ID of the scsi device. + * @taskmgmt_cmd: LPFC_CTX_LUN/LPFC_CTX_TGT/LPFC_CTX_HOST. + * + * This function sends an abort command for every SCSI command + * associated with the given virtual port pending on the ring + * filtered by lpfc_sli_validate_fcp_iocb function. + * When taskmgmt_cmd == LPFC_CTX_LUN, the function sends abort only to the + * FCP iocbs associated with lun specified by tgt_id and lun_id + * parameters + * When taskmgmt_cmd == LPFC_CTX_TGT, the function sends abort only to the + * FCP iocbs associated with SCSI target specified by tgt_id parameter. + * When taskmgmt_cmd == LPFC_CTX_HOST, the function sends abort to all + * FCP iocbs associated with virtual port. + * This function returns number of iocbs it aborted . + * This function is called with no locks held right after a taskmgmt + * command is sent. + **/ +int +lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring, + uint16_t tgt_id, uint64_t lun_id, lpfc_ctx_cmd cmd) +{ + struct lpfc_hba *phba = vport->phba; + struct lpfc_iocbq *abtsiocbq; + struct lpfc_iocbq *iocbq; + IOCB_t *icmd; + int sum, i, ret_val; + unsigned long iflags; + struct lpfc_sli_ring *pring_s4; + uint32_t ring_number; + + spin_lock_irq(&phba->hbalock); + + /* all I/Os are in process of being flushed */ + if (phba->hba_flag & HBA_FCP_IOQ_FLUSH) { + spin_unlock_irq(&phba->hbalock); + return 0; + } + sum = 0; + + for (i = 1; i <= phba->sli.last_iotag; i++) { + iocbq = phba->sli.iocbq_lookup[i]; + + if (lpfc_sli_validate_fcp_iocb(iocbq, vport, tgt_id, lun_id, + cmd) != 0) + continue; + + /* + * If the iocbq is already being aborted, don't take a second + * action, but do count it. + */ + if (iocbq->iocb_flag & LPFC_DRIVER_ABORTED) + continue; + + /* issue ABTS for this IOCB based on iotag */ + abtsiocbq = __lpfc_sli_get_iocbq(phba); + if (abtsiocbq == NULL) + continue; + + icmd = &iocbq->iocb; + abtsiocbq->iocb.un.acxri.abortType = ABORT_TYPE_ABTS; + abtsiocbq->iocb.un.acxri.abortContextTag = icmd->ulpContext; + if (phba->sli_rev == LPFC_SLI_REV4) + abtsiocbq->iocb.un.acxri.abortIoTag = + iocbq->sli4_xritag; + else + abtsiocbq->iocb.un.acxri.abortIoTag = icmd->ulpIoTag; + abtsiocbq->iocb.ulpLe = 1; + abtsiocbq->iocb.ulpClass = icmd->ulpClass; + abtsiocbq->vport = vport; + + /* ABTS WQE must go to the same WQ as the WQE to be aborted */ + abtsiocbq->fcp_wqidx = iocbq->fcp_wqidx; + if (iocbq->iocb_flag & LPFC_IO_FCP) + abtsiocbq->iocb_flag |= LPFC_USE_FCPWQIDX; + + if (lpfc_is_link_up(phba)) + abtsiocbq->iocb.ulpCommand = CMD_ABORT_XRI_CN; + else + abtsiocbq->iocb.ulpCommand = CMD_CLOSE_XRI_CN; + + /* Setup callback routine and issue the command. */ + abtsiocbq->iocb_cmpl = lpfc_sli_abort_fcp_cmpl; + + /* + * Indicate the IO is being aborted by the driver and set + * the caller's flag into the aborted IO. + */ + iocbq->iocb_flag |= LPFC_DRIVER_ABORTED; + + if (phba->sli_rev == LPFC_SLI_REV4) { + ring_number = MAX_SLI3_CONFIGURED_RINGS + + iocbq->fcp_wqidx; + pring_s4 = &phba->sli.ring[ring_number]; + /* Note: both hbalock and ring_lock must be set here */ + spin_lock_irqsave(&pring_s4->ring_lock, iflags); + ret_val = __lpfc_sli_issue_iocb(phba, pring_s4->ringno, + abtsiocbq, 0); + spin_unlock_irqrestore(&pring_s4->ring_lock, iflags); + } else { + ret_val = __lpfc_sli_issue_iocb(phba, pring->ringno, + abtsiocbq, 0); + } + + + if (ret_val == IOCB_ERROR) + __lpfc_sli_release_iocbq(phba, abtsiocbq); + else + sum++; + } + spin_unlock_irq(&phba->hbalock); + return sum; +} + +/** * lpfc_sli_wake_iocb_wait - lpfc_sli_issue_iocb_wait's completion handler * @phba: Pointer to HBA context object. * @cmdiocbq: Pointer to command iocb. diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h index 6f04080f4ea..edb48832c39 100644 --- a/drivers/scsi/lpfc/lpfc_sli.h +++ b/drivers/scsi/lpfc/lpfc_sli.h @@ -1,7 +1,7 @@ /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2004-2013 Emulex. All rights reserved. * + * Copyright (C) 2004-2014 Emulex. All rights reserved. * * EMULEX and SLI are trademarks of Emulex. * * www.emulex.com * * * diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 9b8cda86617..7f50aa04d66 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -1,7 +1,7 @@ /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2009-2013 Emulex. All rights reserved. * + * Copyright (C) 2009-2014 Emulex. All rights reserved. * * EMULEX and SLI are trademarks of Emulex. * * www.emulex.com * * * diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h index e32cbec7032..41675c1193e 100644 --- a/drivers/scsi/lpfc/lpfc_version.h +++ b/drivers/scsi/lpfc/lpfc_version.h @@ -1,7 +1,7 @@ /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2004-2013 Emulex. All rights reserved. * + * Copyright (C) 2004-2014 Emulex. All rights reserved. * * EMULEX and SLI are trademarks of Emulex. * * www.emulex.com * * * @@ -18,7 +18,7 @@ * included with this package. * *******************************************************************/ -#define LPFC_DRIVER_VERSION "8.3.45" +#define LPFC_DRIVER_VERSION "10.2.8001.0." #define LPFC_DRIVER_NAME "lpfc" /* Used for SLI 2/3 */ @@ -30,4 +30,4 @@ #define LPFC_MODULE_DESC "Emulex LightPulse Fibre Channel SCSI driver " \ LPFC_DRIVER_VERSION -#define LPFC_COPYRIGHT "Copyright(c) 2004-2013 Emulex. All rights reserved." +#define LPFC_COPYRIGHT "Copyright(c) 2004-2014 Emulex. All rights reserved." diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 1fa01044866..de5d0ae19d8 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -1648,16 +1648,16 @@ typedef struct { */ struct crc_context { uint32_t handle; /* System handle. */ - uint32_t ref_tag; - uint16_t app_tag; + __le32 ref_tag; + __le16 app_tag; uint8_t ref_tag_mask[4]; /* Validation/Replacement Mask*/ uint8_t app_tag_mask[2]; /* Validation/Replacement Mask*/ - uint16_t guard_seed; /* Initial Guard Seed */ - uint16_t prot_opts; /* Requested Data Protection Mode */ - uint16_t blk_size; /* Data size in bytes */ + __le16 guard_seed; /* Initial Guard Seed */ + __le16 prot_opts; /* Requested Data Protection Mode */ + __le16 blk_size; /* Data size in bytes */ uint16_t runt_blk_guard; /* Guard value for runt block (tape * only) */ - uint32_t byte_count; /* Total byte count/ total data + __le32 byte_count; /* Total byte count/ total data * transfer count */ union { struct { @@ -1671,10 +1671,10 @@ struct crc_context { uint32_t reserved_6; } nobundling; struct { - uint32_t dif_byte_count; /* Total DIF byte + __le32 dif_byte_count; /* Total DIF byte * count */ uint16_t reserved_1; - uint16_t dseg_count; /* Data segment count */ + __le16 dseg_count; /* Data segment count */ uint32_t reserved_2; uint32_t data_address[2]; uint32_t data_length; diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index b1d10f9935c..4b188b0164e 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -104,7 +104,6 @@ static void qlt_reject_free_srr_imm(struct scsi_qla_host *ha, /* * Global Variables */ -static struct kmem_cache *qla_tgt_cmd_cachep; static struct kmem_cache *qla_tgt_mgmt_cmd_cachep; static mempool_t *qla_tgt_mgmt_cmd_mempool; static struct workqueue_struct *qla_tgt_wq; @@ -1997,7 +1996,7 @@ qlt_set_t10dif_tags(struct se_cmd *se_cmd, struct crc_context *ctx) * have been immplemented by TCM, before AppTag is avail. * Look for modesense_handlers[] */ - ctx->app_tag = __constant_cpu_to_le16(0); + ctx->app_tag = 0; ctx->app_tag_mask[0] = 0x0; ctx->app_tag_mask[1] = 0x0; @@ -2079,6 +2078,7 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha) struct se_cmd *se_cmd = &cmd->se_cmd; uint32_t h; struct atio_from_isp *atio = &prm->cmd->atio; + uint16_t t16; sgc = 0; ha = vha->hw; @@ -2175,8 +2175,13 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha) pkt->initiator_id[1] = atio->u.isp24.fcp_hdr.s_id[1]; pkt->initiator_id[2] = atio->u.isp24.fcp_hdr.s_id[0]; pkt->exchange_addr = atio->u.isp24.exchange_addr; - pkt->ox_id = swab16(atio->u.isp24.fcp_hdr.ox_id); - pkt->flags |= (atio->u.isp24.attr << 9); + + /* silence compile warning */ + t16 = be16_to_cpu(atio->u.isp24.fcp_hdr.ox_id); + pkt->ox_id = cpu_to_le16(t16); + + t16 = (atio->u.isp24.attr << 9); + pkt->flags |= cpu_to_le16(t16); pkt->relative_offset = cpu_to_le32(prm->cmd->offset); /* Set transfer direction */ @@ -2251,8 +2256,7 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha) if (bundling && prm->prot_seg_cnt) { /* Walks dif segments */ - pkt->add_flags |= - __constant_cpu_to_le16(CTIO_CRC2_AF_DIF_DSD_ENA); + pkt->add_flags |= CTIO_CRC2_AF_DIF_DSD_ENA; cur_dsd = (uint32_t *) &crc_ctx_pkt->u.bundling.dif_address; if (qla24xx_walk_and_build_prot_sglist(ha, NULL, cur_dsd, @@ -2705,6 +2709,8 @@ done: void qlt_free_cmd(struct qla_tgt_cmd *cmd) { + struct qla_tgt_sess *sess = cmd->sess; + ql_dbg(ql_dbg_tgt, cmd->vha, 0xe074, "%s: se_cmd[%p] ox_id %04x\n", __func__, &cmd->se_cmd, @@ -2713,7 +2719,12 @@ void qlt_free_cmd(struct qla_tgt_cmd *cmd) BUG_ON(cmd->sg_mapped); if (unlikely(cmd->free_sg)) kfree(cmd->sg); - kmem_cache_free(qla_tgt_cmd_cachep, cmd); + + if (!sess || !sess->se_sess) { + WARN_ON(1); + return; + } + percpu_ida_free(&sess->se_sess->sess_tag_pool, cmd->se_cmd.map_tag); } EXPORT_SYMBOL(qlt_free_cmd); @@ -3075,13 +3086,12 @@ static struct qla_tgt_sess *qlt_make_local_sess(struct scsi_qla_host *, /* * Process context for I/O path into tcm_qla2xxx code */ -static void qlt_do_work(struct work_struct *work) +static void __qlt_do_work(struct qla_tgt_cmd *cmd) { - struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, work); scsi_qla_host_t *vha = cmd->vha; struct qla_hw_data *ha = vha->hw; struct qla_tgt *tgt = vha->vha_tgt.qla_tgt; - struct qla_tgt_sess *sess = NULL; + struct qla_tgt_sess *sess = cmd->sess; struct atio_from_isp *atio = &cmd->atio; unsigned char *cdb; unsigned long flags; @@ -3091,41 +3101,6 @@ static void qlt_do_work(struct work_struct *work) if (tgt->tgt_stop) goto out_term; - spin_lock_irqsave(&ha->hardware_lock, flags); - sess = ha->tgt.tgt_ops->find_sess_by_s_id(vha, - atio->u.isp24.fcp_hdr.s_id); - /* Do kref_get() before dropping qla_hw_data->hardware_lock. */ - if (sess) - kref_get(&sess->se_sess->sess_kref); - spin_unlock_irqrestore(&ha->hardware_lock, flags); - - if (unlikely(!sess)) { - uint8_t *s_id = atio->u.isp24.fcp_hdr.s_id; - - ql_dbg(ql_dbg_tgt_mgt, vha, 0xf022, - "qla_target(%d): Unable to find wwn login" - " (s_id %x:%x:%x), trying to create it manually\n", - vha->vp_idx, s_id[0], s_id[1], s_id[2]); - - if (atio->u.raw.entry_count > 1) { - ql_dbg(ql_dbg_tgt_mgt, vha, 0xf023, - "Dropping multy entry cmd %p\n", cmd); - goto out_term; - } - - mutex_lock(&vha->vha_tgt.tgt_mutex); - sess = qlt_make_local_sess(vha, s_id); - /* sess has an extra creation ref. */ - mutex_unlock(&vha->vha_tgt.tgt_mutex); - - if (!sess) - goto out_term; - } - - cmd->sess = sess; - cmd->loop_id = sess->loop_id; - cmd->conf_compl_supported = sess->conf_compl_supported; - cdb = &atio->u.isp24.fcp_cmnd.cdb[0]; cmd->tag = atio->u.isp24.exchange_addr; cmd->unpacked_lun = scsilun_to_int( @@ -3153,8 +3128,8 @@ static void qlt_do_work(struct work_struct *work) cmd, &cmd->se_cmd, cmd->unpacked_lun, cmd->tag, data_length, cmd->atio.u.isp24.fcp_hdr.ox_id); - ret = vha->hw->tgt.tgt_ops->handle_cmd(vha, cmd, cdb, data_length, - fcp_task_attr, data_dir, bidi); + ret = ha->tgt.tgt_ops->handle_cmd(vha, cmd, cdb, data_length, + fcp_task_attr, data_dir, bidi); if (ret != 0) goto out_term; /* @@ -3173,17 +3148,114 @@ out_term: */ spin_lock_irqsave(&ha->hardware_lock, flags); qlt_send_term_exchange(vha, NULL, &cmd->atio, 1); - kmem_cache_free(qla_tgt_cmd_cachep, cmd); - if (sess) + percpu_ida_free(&sess->se_sess->sess_tag_pool, cmd->se_cmd.map_tag); + ha->tgt.tgt_ops->put_sess(sess); + spin_unlock_irqrestore(&ha->hardware_lock, flags); +} + +static void qlt_do_work(struct work_struct *work) +{ + struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, work); + + __qlt_do_work(cmd); +} + +static struct qla_tgt_cmd *qlt_get_tag(scsi_qla_host_t *vha, + struct qla_tgt_sess *sess, + struct atio_from_isp *atio) +{ + struct se_session *se_sess = sess->se_sess; + struct qla_tgt_cmd *cmd; + int tag; + + tag = percpu_ida_alloc(&se_sess->sess_tag_pool, TASK_RUNNING); + if (tag < 0) + return NULL; + + cmd = &((struct qla_tgt_cmd *)se_sess->sess_cmd_map)[tag]; + memset(cmd, 0, sizeof(struct qla_tgt_cmd)); + + memcpy(&cmd->atio, atio, sizeof(*atio)); + cmd->state = QLA_TGT_STATE_NEW; + cmd->tgt = vha->vha_tgt.qla_tgt; + cmd->vha = vha; + cmd->se_cmd.map_tag = tag; + cmd->sess = sess; + cmd->loop_id = sess->loop_id; + cmd->conf_compl_supported = sess->conf_compl_supported; + + return cmd; +} + +static void qlt_send_busy(struct scsi_qla_host *, struct atio_from_isp *, + uint16_t); + +static void qlt_create_sess_from_atio(struct work_struct *work) +{ + struct qla_tgt_sess_op *op = container_of(work, + struct qla_tgt_sess_op, work); + scsi_qla_host_t *vha = op->vha; + struct qla_hw_data *ha = vha->hw; + struct qla_tgt_sess *sess; + struct qla_tgt_cmd *cmd; + unsigned long flags; + uint8_t *s_id = op->atio.u.isp24.fcp_hdr.s_id; + + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf022, + "qla_target(%d): Unable to find wwn login" + " (s_id %x:%x:%x), trying to create it manually\n", + vha->vp_idx, s_id[0], s_id[1], s_id[2]); + + if (op->atio.u.raw.entry_count > 1) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf023, + "Dropping multy entry atio %p\n", &op->atio); + goto out_term; + } + + mutex_lock(&vha->vha_tgt.tgt_mutex); + sess = qlt_make_local_sess(vha, s_id); + /* sess has an extra creation ref. */ + mutex_unlock(&vha->vha_tgt.tgt_mutex); + + if (!sess) + goto out_term; + /* + * Now obtain a pre-allocated session tag using the original op->atio + * packet header, and dispatch into __qlt_do_work() using the existing + * process context. + */ + cmd = qlt_get_tag(vha, sess, &op->atio); + if (!cmd) { + spin_lock_irqsave(&ha->hardware_lock, flags); + qlt_send_busy(vha, &op->atio, SAM_STAT_BUSY); ha->tgt.tgt_ops->put_sess(sess); + spin_unlock_irqrestore(&ha->hardware_lock, flags); + kfree(op); + return; + } + /* + * __qlt_do_work() will call ha->tgt.tgt_ops->put_sess() to release + * the extra reference taken above by qlt_make_local_sess() + */ + __qlt_do_work(cmd); + kfree(op); + return; + +out_term: + spin_lock_irqsave(&ha->hardware_lock, flags); + qlt_send_term_exchange(vha, NULL, &op->atio, 1); spin_unlock_irqrestore(&ha->hardware_lock, flags); + kfree(op); + } /* ha->hardware_lock supposed to be held on entry */ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha, struct atio_from_isp *atio) { + struct qla_hw_data *ha = vha->hw; struct qla_tgt *tgt = vha->vha_tgt.qla_tgt; + struct qla_tgt_sess *sess; struct qla_tgt_cmd *cmd; if (unlikely(tgt->tgt_stop)) { @@ -3192,18 +3264,31 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha, return -EFAULT; } - cmd = kmem_cache_zalloc(qla_tgt_cmd_cachep, GFP_ATOMIC); + sess = ha->tgt.tgt_ops->find_sess_by_s_id(vha, atio->u.isp24.fcp_hdr.s_id); + if (unlikely(!sess)) { + struct qla_tgt_sess_op *op = kzalloc(sizeof(struct qla_tgt_sess_op), + GFP_ATOMIC); + if (!op) + return -ENOMEM; + + memcpy(&op->atio, atio, sizeof(*atio)); + INIT_WORK(&op->work, qlt_create_sess_from_atio); + queue_work(qla_tgt_wq, &op->work); + return 0; + } + /* + * Do kref_get() before returning + dropping qla_hw_data->hardware_lock. + */ + kref_get(&sess->se_sess->sess_kref); + + cmd = qlt_get_tag(vha, sess, atio); if (!cmd) { ql_dbg(ql_dbg_tgt_mgt, vha, 0xf05e, "qla_target(%d): Allocation of cmd failed\n", vha->vp_idx); + ha->tgt.tgt_ops->put_sess(sess); return -ENOMEM; } - memcpy(&cmd->atio, atio, sizeof(*atio)); - cmd->state = QLA_TGT_STATE_NEW; - cmd->tgt = vha->vha_tgt.qla_tgt; - cmd->vha = vha; - INIT_WORK(&cmd->work, qlt_do_work); queue_work(qla_tgt_wq, &cmd->work); return 0; @@ -5501,23 +5586,13 @@ int __init qlt_init(void) if (!QLA_TGT_MODE_ENABLED()) return 0; - qla_tgt_cmd_cachep = kmem_cache_create("qla_tgt_cmd_cachep", - sizeof(struct qla_tgt_cmd), __alignof__(struct qla_tgt_cmd), 0, - NULL); - if (!qla_tgt_cmd_cachep) { - ql_log(ql_log_fatal, NULL, 0xe06c, - "kmem_cache_create for qla_tgt_cmd_cachep failed\n"); - return -ENOMEM; - } - qla_tgt_mgmt_cmd_cachep = kmem_cache_create("qla_tgt_mgmt_cmd_cachep", sizeof(struct qla_tgt_mgmt_cmd), __alignof__(struct qla_tgt_mgmt_cmd), 0, NULL); if (!qla_tgt_mgmt_cmd_cachep) { ql_log(ql_log_fatal, NULL, 0xe06d, "kmem_cache_create for qla_tgt_mgmt_cmd_cachep failed\n"); - ret = -ENOMEM; - goto out; + return -ENOMEM; } qla_tgt_mgmt_cmd_mempool = mempool_create(25, mempool_alloc_slab, @@ -5545,8 +5620,6 @@ out_cmd_mempool: mempool_destroy(qla_tgt_mgmt_cmd_mempool); out_mgmt_cmd_cachep: kmem_cache_destroy(qla_tgt_mgmt_cmd_cachep); -out: - kmem_cache_destroy(qla_tgt_cmd_cachep); return ret; } @@ -5558,5 +5631,4 @@ void qlt_exit(void) destroy_workqueue(qla_tgt_wq); mempool_destroy(qla_tgt_mgmt_cmd_mempool); kmem_cache_destroy(qla_tgt_mgmt_cmd_cachep); - kmem_cache_destroy(qla_tgt_cmd_cachep); } diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h index f873e10451d..e0a58fd13f6 100644 --- a/drivers/scsi/qla2xxx/qla_target.h +++ b/drivers/scsi/qla2xxx/qla_target.h @@ -316,7 +316,7 @@ struct fcp_hdr { uint8_t seq_id; uint8_t df_ctl; uint16_t seq_cnt; - uint16_t ox_id; + __be16 ox_id; uint16_t rx_id; uint32_t parameter; } __packed; @@ -441,7 +441,7 @@ struct ctio7_to_24xx { union { struct { uint16_t reserved1; - uint16_t flags; + __le16 flags; uint32_t residual; uint16_t ox_id; uint16_t scsi_status; @@ -527,7 +527,7 @@ struct ctio_crc2_to_fw { uint32_t handle; /* System handle. */ uint16_t nport_handle; /* N_PORT handle. */ - uint16_t timeout; /* Command timeout. */ + __le16 timeout; /* Command timeout. */ uint16_t dseg_count; /* Data segment count. */ uint8_t vp_index; @@ -538,15 +538,15 @@ struct ctio_crc2_to_fw { uint8_t reserved1; uint32_t exchange_addr; /* rcv exchange address */ uint16_t reserved2; - uint16_t flags; /* refer to CTIO7 flags values */ + __le16 flags; /* refer to CTIO7 flags values */ uint32_t residual; - uint16_t ox_id; + __le16 ox_id; uint16_t scsi_status; - uint32_t relative_offset; + __le32 relative_offset; uint32_t reserved5; - uint32_t transfer_length; /* total fc transfer length */ + __le32 transfer_length; /* total fc transfer length */ uint32_t reserved6; - uint32_t crc_context_address[2];/* Data segment address. */ + __le32 crc_context_address[2];/* Data segment address. */ uint16_t crc_context_len; /* Data segment length. */ uint16_t reserved_1; /* MUST be set to 0. */ } __packed; @@ -870,6 +870,12 @@ struct qla_tgt { struct list_head tgt_list_entry; }; +struct qla_tgt_sess_op { + struct scsi_qla_host *vha; + struct atio_from_isp atio; + struct work_struct work; +}; + /* * Equivilant to IT Nexus (Initiator-Target) */ diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 896cb23adb7..e2beab96209 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -1501,6 +1501,8 @@ static int tcm_qla2xxx_check_initiator_node_acl( struct qla_tgt_sess *sess = qla_tgt_sess; unsigned char port_name[36]; unsigned long flags; + int num_tags = (ha->fw_xcb_count) ? ha->fw_xcb_count : + TCM_QLA2XXX_DEFAULT_TAGS; lport = vha->vha_tgt.target_lport_ptr; if (!lport) { @@ -1518,7 +1520,9 @@ static int tcm_qla2xxx_check_initiator_node_acl( } se_tpg = &tpg->se_tpg; - se_sess = transport_init_session(TARGET_PROT_NORMAL); + se_sess = transport_init_session_tags(num_tags, + sizeof(struct qla_tgt_cmd), + TARGET_PROT_NORMAL); if (IS_ERR(se_sess)) { pr_err("Unable to initialize struct se_session\n"); return PTR_ERR(se_sess); diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.h b/drivers/scsi/qla2xxx/tcm_qla2xxx.h index 33aaac8c7d5..10c00214564 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.h +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.h @@ -4,6 +4,11 @@ #define TCM_QLA2XXX_VERSION "v0.1" /* length of ASCII WWPNs including pad */ #define TCM_QLA2XXX_NAMELEN 32 +/* + * Number of pre-allocated per-session tags, based upon the worst-case + * per port number of iocbs + */ +#define TCM_QLA2XXX_DEFAULT_TAGS 2088 #include "qla_target.h" diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 99fdb940394..89ee5929eb6 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -23,6 +23,7 @@ #include <linux/virtio_config.h> #include <linux/virtio_scsi.h> #include <linux/cpu.h> +#include <linux/blkdev.h> #include <scsi/scsi_host.h> #include <scsi/scsi_device.h> #include <scsi/scsi_cmnd.h> @@ -37,6 +38,7 @@ struct virtio_scsi_cmd { struct completion *comp; union { struct virtio_scsi_cmd_req cmd; + struct virtio_scsi_cmd_req_pi cmd_pi; struct virtio_scsi_ctrl_tmf_req tmf; struct virtio_scsi_ctrl_an_req an; } req; @@ -399,7 +401,7 @@ static int virtscsi_add_cmd(struct virtqueue *vq, size_t req_size, size_t resp_size) { struct scsi_cmnd *sc = cmd->sc; - struct scatterlist *sgs[4], req, resp; + struct scatterlist *sgs[6], req, resp; struct sg_table *out, *in; unsigned out_num = 0, in_num = 0; @@ -417,16 +419,24 @@ static int virtscsi_add_cmd(struct virtqueue *vq, sgs[out_num++] = &req; /* Data-out buffer. */ - if (out) + if (out) { + /* Place WRITE protection SGLs before Data OUT payload */ + if (scsi_prot_sg_count(sc)) + sgs[out_num++] = scsi_prot_sglist(sc); sgs[out_num++] = out->sgl; + } /* Response header. */ sg_init_one(&resp, &cmd->resp, resp_size); sgs[out_num + in_num++] = &resp; /* Data-in buffer */ - if (in) + if (in) { + /* Place READ protection SGLs before Data IN payload */ + if (scsi_prot_sg_count(sc)) + sgs[out_num + in_num++] = scsi_prot_sglist(sc); sgs[out_num + in_num++] = in->sgl; + } return virtqueue_add_sgs(vq, sgs, out_num, in_num, cmd, GFP_ATOMIC); } @@ -451,12 +461,45 @@ static int virtscsi_kick_cmd(struct virtio_scsi_vq *vq, return err; } +static void virtio_scsi_init_hdr(struct virtio_scsi_cmd_req *cmd, + struct scsi_cmnd *sc) +{ + cmd->lun[0] = 1; + cmd->lun[1] = sc->device->id; + cmd->lun[2] = (sc->device->lun >> 8) | 0x40; + cmd->lun[3] = sc->device->lun & 0xff; + cmd->tag = (unsigned long)sc; + cmd->task_attr = VIRTIO_SCSI_S_SIMPLE; + cmd->prio = 0; + cmd->crn = 0; +} + +static void virtio_scsi_init_hdr_pi(struct virtio_scsi_cmd_req_pi *cmd_pi, + struct scsi_cmnd *sc) +{ + struct request *rq = sc->request; + struct blk_integrity *bi; + + virtio_scsi_init_hdr((struct virtio_scsi_cmd_req *)cmd_pi, sc); + + if (!rq || !scsi_prot_sg_count(sc)) + return; + + bi = blk_get_integrity(rq->rq_disk); + + if (sc->sc_data_direction == DMA_TO_DEVICE) + cmd_pi->pi_bytesout = blk_rq_sectors(rq) * bi->tuple_size; + else if (sc->sc_data_direction == DMA_FROM_DEVICE) + cmd_pi->pi_bytesin = blk_rq_sectors(rq) * bi->tuple_size; +} + static int virtscsi_queuecommand(struct virtio_scsi *vscsi, struct virtio_scsi_vq *req_vq, struct scsi_cmnd *sc) { struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev); struct virtio_scsi_cmd *cmd = scsi_cmd_priv(sc); + int req_size; BUG_ON(scsi_sg_count(sc) > shost->sg_tablesize); @@ -468,22 +511,20 @@ static int virtscsi_queuecommand(struct virtio_scsi *vscsi, memset(cmd, 0, sizeof(*cmd)); cmd->sc = sc; - cmd->req.cmd = (struct virtio_scsi_cmd_req){ - .lun[0] = 1, - .lun[1] = sc->device->id, - .lun[2] = (sc->device->lun >> 8) | 0x40, - .lun[3] = sc->device->lun & 0xff, - .tag = (unsigned long)sc, - .task_attr = VIRTIO_SCSI_S_SIMPLE, - .prio = 0, - .crn = 0, - }; BUG_ON(sc->cmd_len > VIRTIO_SCSI_CDB_SIZE); - memcpy(cmd->req.cmd.cdb, sc->cmnd, sc->cmd_len); - if (virtscsi_kick_cmd(req_vq, cmd, - sizeof cmd->req.cmd, sizeof cmd->resp.cmd) != 0) + if (virtio_has_feature(vscsi->vdev, VIRTIO_SCSI_F_T10_PI)) { + virtio_scsi_init_hdr_pi(&cmd->req.cmd_pi, sc); + memcpy(cmd->req.cmd_pi.cdb, sc->cmnd, sc->cmd_len); + req_size = sizeof(cmd->req.cmd_pi); + } else { + virtio_scsi_init_hdr(&cmd->req.cmd, sc); + memcpy(cmd->req.cmd.cdb, sc->cmnd, sc->cmd_len); + req_size = sizeof(cmd->req.cmd); + } + + if (virtscsi_kick_cmd(req_vq, cmd, req_size, sizeof(cmd->resp.cmd)) != 0) return SCSI_MLQUEUE_HOST_BUSY; return 0; } @@ -820,7 +861,7 @@ static int virtscsi_probe(struct virtio_device *vdev) { struct Scsi_Host *shost; struct virtio_scsi *vscsi; - int err; + int err, host_prot; u32 sg_elems, num_targets; u32 cmd_per_lun; u32 num_queues; @@ -870,6 +911,16 @@ static int virtscsi_probe(struct virtio_device *vdev) shost->max_id = num_targets; shost->max_channel = 0; shost->max_cmd_len = VIRTIO_SCSI_CDB_SIZE; + + if (virtio_has_feature(vdev, VIRTIO_SCSI_F_T10_PI)) { + host_prot = SHOST_DIF_TYPE1_PROTECTION | SHOST_DIF_TYPE2_PROTECTION | + SHOST_DIF_TYPE3_PROTECTION | SHOST_DIX_TYPE1_PROTECTION | + SHOST_DIX_TYPE2_PROTECTION | SHOST_DIX_TYPE3_PROTECTION; + + scsi_host_set_prot(shost, host_prot); + scsi_host_set_guard(shost, SHOST_DIX_GUARD_CRC); + } + err = scsi_add_host(shost, &vdev->dev); if (err) goto scsi_add_host_failed; @@ -939,6 +990,7 @@ static struct virtio_device_id id_table[] = { static unsigned int features[] = { VIRTIO_SCSI_F_HOTPLUG, VIRTIO_SCSI_F_CHANGE, + VIRTIO_SCSI_F_T10_PI, }; static struct virtio_driver virtio_scsi_driver = { diff --git a/drivers/staging/media/omap4iss/iss_video.c b/drivers/staging/media/omap4iss/iss_video.c index ded31ea6bd3..cbf455d66f7 100644 --- a/drivers/staging/media/omap4iss/iss_video.c +++ b/drivers/staging/media/omap4iss/iss_video.c @@ -396,7 +396,7 @@ static void iss_video_buf_queue(struct vb2_buffer *vb) } } -static struct vb2_ops iss_video_vb2ops = { +static const struct vb2_ops iss_video_vb2ops = { .queue_setup = iss_video_queue_setup, .buf_prepare = iss_video_buf_prepare, .buf_queue = iss_video_buf_queue, diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 9189bc0a87a..5663f4d19d0 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -300,7 +300,7 @@ bool iscsit_check_np_match( port = ntohs(sock_in->sin_port); } - if ((ip_match == true) && (np->np_port == port) && + if (ip_match && (np->np_port == port) && (np->np_network_transport == network_transport)) return true; @@ -325,7 +325,7 @@ static struct iscsi_np *iscsit_get_np( } match = iscsit_check_np_match(sockaddr, np, network_transport); - if (match == true) { + if (match) { /* * Increment the np_exports reference count now to * prevent iscsit_del_np() below from being called @@ -1121,7 +1121,7 @@ iscsit_get_immediate_data(struct iscsi_cmd *cmd, struct iscsi_scsi_req *hdr, /* * Special case for Unsupported SAM WRITE Opcodes and ImmediateData=Yes. */ - if (dump_payload == true) + if (dump_payload) goto after_immediate_data; immed_ret = iscsit_handle_immediate_data(cmd, hdr, @@ -3390,7 +3390,9 @@ static bool iscsit_check_inaddr_any(struct iscsi_np *np) #define SENDTARGETS_BUF_LIMIT 32768U -static int iscsit_build_sendtargets_response(struct iscsi_cmd *cmd) +static int +iscsit_build_sendtargets_response(struct iscsi_cmd *cmd, + enum iscsit_transport_type network_transport) { char *payload = NULL; struct iscsi_conn *conn = cmd->conn; @@ -3467,6 +3469,9 @@ static int iscsit_build_sendtargets_response(struct iscsi_cmd *cmd) struct iscsi_np *np = tpg_np->tpg_np; bool inaddr_any = iscsit_check_inaddr_any(np); + if (np->np_network_transport != network_transport) + continue; + if (!target_name_printed) { len = sprintf(buf, "TargetName=%s", tiqn->tiqn); @@ -3485,10 +3490,8 @@ static int iscsit_build_sendtargets_response(struct iscsi_cmd *cmd) len = sprintf(buf, "TargetAddress=" "%s:%hu,%hu", - (inaddr_any == false) ? - np->np_ip : conn->local_ip, - (inaddr_any == false) ? - np->np_port : conn->local_port, + inaddr_any ? conn->local_ip : np->np_ip, + inaddr_any ? conn->local_port : np->np_port, tpg->tpgt); len += 1; @@ -3520,11 +3523,12 @@ eob: int iscsit_build_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn, - struct iscsi_text_rsp *hdr) + struct iscsi_text_rsp *hdr, + enum iscsit_transport_type network_transport) { int text_length, padding; - text_length = iscsit_build_sendtargets_response(cmd); + text_length = iscsit_build_sendtargets_response(cmd, network_transport); if (text_length < 0) return text_length; @@ -3562,7 +3566,7 @@ static int iscsit_send_text_rsp( u32 tx_size = 0; int text_length, iov_count = 0, rc; - rc = iscsit_build_text_rsp(cmd, conn, hdr); + rc = iscsit_build_text_rsp(cmd, conn, hdr, ISCSI_TCP); if (rc < 0) return rc; @@ -4234,8 +4238,6 @@ int iscsit_close_connection( if (conn->conn_transport->iscsit_wait_conn) conn->conn_transport->iscsit_wait_conn(conn); - iscsit_free_queue_reqs_for_conn(conn); - /* * During Connection recovery drop unacknowledged out of order * commands for this connection, and prepare the other commands @@ -4252,6 +4254,7 @@ int iscsit_close_connection( iscsit_clear_ooo_cmdsns_for_conn(conn); iscsit_release_commands_from_conn(conn); } + iscsit_free_queue_reqs_for_conn(conn); /* * Handle decrementing session or connection usage count if diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c index de77d9aa22c..19b842c3e0b 100644 --- a/drivers/target/iscsi/iscsi_target_auth.c +++ b/drivers/target/iscsi/iscsi_target_auth.c @@ -71,6 +71,40 @@ static void chap_gen_challenge( challenge_asciihex); } +static int chap_check_algorithm(const char *a_str) +{ + char *tmp, *orig, *token; + + tmp = kstrdup(a_str, GFP_KERNEL); + if (!tmp) { + pr_err("Memory allocation failed for CHAP_A temporary buffer\n"); + return CHAP_DIGEST_UNKNOWN; + } + orig = tmp; + + token = strsep(&tmp, "="); + if (!token) + goto out; + + if (strcmp(token, "CHAP_A")) { + pr_err("Unable to locate CHAP_A key\n"); + goto out; + } + while (token) { + token = strsep(&tmp, ","); + if (!token) + goto out; + + if (!strncmp(token, "5", 1)) { + pr_debug("Selected MD5 Algorithm\n"); + kfree(orig); + return CHAP_DIGEST_MD5; + } + } +out: + kfree(orig); + return CHAP_DIGEST_UNKNOWN; +} static struct iscsi_chap *chap_server_open( struct iscsi_conn *conn, @@ -79,6 +113,7 @@ static struct iscsi_chap *chap_server_open( char *aic_str, unsigned int *aic_len) { + int ret; struct iscsi_chap *chap; if (!(auth->naf_flags & NAF_USERID_SET) || @@ -93,21 +128,24 @@ static struct iscsi_chap *chap_server_open( return NULL; chap = conn->auth_protocol; - /* - * We only support MD5 MDA presently. - */ - if (strncmp(a_str, "CHAP_A=5", 8)) { - pr_err("CHAP_A is not MD5.\n"); + ret = chap_check_algorithm(a_str); + switch (ret) { + case CHAP_DIGEST_MD5: + pr_debug("[server] Got CHAP_A=5\n"); + /* + * Send back CHAP_A set to MD5. + */ + *aic_len = sprintf(aic_str, "CHAP_A=5"); + *aic_len += 1; + chap->digest_type = CHAP_DIGEST_MD5; + pr_debug("[server] Sending CHAP_A=%d\n", chap->digest_type); + break; + case CHAP_DIGEST_UNKNOWN: + default: + pr_err("Unsupported CHAP_A value\n"); return NULL; } - pr_debug("[server] Got CHAP_A=5\n"); - /* - * Send back CHAP_A set to MD5. - */ - *aic_len = sprintf(aic_str, "CHAP_A=5"); - *aic_len += 1; - chap->digest_type = CHAP_DIGEST_MD5; - pr_debug("[server] Sending CHAP_A=%d\n", chap->digest_type); + /* * Set Identifier. */ @@ -314,6 +352,16 @@ static int chap_server_compute_md5( goto out; } /* + * During mutual authentication, the CHAP_C generated by the + * initiator must not match the original CHAP_C generated by + * the target. + */ + if (!memcmp(challenge_binhex, chap->challenge, CHAP_CHALLENGE_LENGTH)) { + pr_err("initiator CHAP_C matches target CHAP_C, failing" + " login attempt\n"); + goto out; + } + /* * Generate CHAP_N and CHAP_R for mutual authentication. */ tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); diff --git a/drivers/target/iscsi/iscsi_target_auth.h b/drivers/target/iscsi/iscsi_target_auth.h index 2f463c09626..d22f7b96a06 100644 --- a/drivers/target/iscsi/iscsi_target_auth.h +++ b/drivers/target/iscsi/iscsi_target_auth.h @@ -1,6 +1,7 @@ #ifndef _ISCSI_CHAP_H_ #define _ISCSI_CHAP_H_ +#define CHAP_DIGEST_UNKNOWN 0 #define CHAP_DIGEST_MD5 5 #define CHAP_DIGEST_SHA 6 diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index d9b1d88e1ad..fecb69535a1 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -1145,7 +1145,7 @@ iscsit_conn_set_transport(struct iscsi_conn *conn, struct iscsit_transport *t) void iscsi_target_login_sess_out(struct iscsi_conn *conn, struct iscsi_np *np, bool zero_tsih, bool new_sess) { - if (new_sess == false) + if (!new_sess) goto old_sess_out; pr_err("iSCSI Login negotiation failed.\n"); diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index 75b685960e8..62a095f36bf 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -404,7 +404,7 @@ static void iscsi_target_sk_data_ready(struct sock *sk) } rc = schedule_delayed_work(&conn->login_work, 0); - if (rc == false) { + if (!rc) { pr_debug("iscsi_target_sk_data_ready, schedule_delayed_work" " got false\n"); } @@ -513,7 +513,7 @@ static void iscsi_target_do_login_rx(struct work_struct *work) state = (tpg->tpg_state == TPG_STATE_ACTIVE); spin_unlock(&tpg->tpg_state_lock); - if (state == false) { + if (!state) { pr_debug("iscsi_target_do_login_rx: tpg_state != TPG_STATE_ACTIVE\n"); iscsi_target_restore_sock_callbacks(conn); iscsi_target_login_drop(conn, login); @@ -528,7 +528,7 @@ static void iscsi_target_do_login_rx(struct work_struct *work) state = iscsi_target_sk_state_check(sk); read_unlock_bh(&sk->sk_callback_lock); - if (state == false) { + if (!state) { pr_debug("iscsi_target_do_login_rx, TCP state CLOSE\n"); iscsi_target_restore_sock_callbacks(conn); iscsi_target_login_drop(conn, login); @@ -773,6 +773,12 @@ static int iscsi_target_handle_csg_zero( } goto do_auth; + } else if (!payload_length) { + pr_err("Initiator sent zero length security payload," + " login failed\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_AUTH_FAILED); + return -1; } if (login->first_request) diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c index 4d2e23fc76f..02f9de26f38 100644 --- a/drivers/target/iscsi/iscsi_target_parameters.c +++ b/drivers/target/iscsi/iscsi_target_parameters.c @@ -474,10 +474,10 @@ int iscsi_set_keys_to_negotiate( if (!strcmp(param->name, AUTHMETHOD)) { SET_PSTATE_NEGOTIATE(param); } else if (!strcmp(param->name, HEADERDIGEST)) { - if (iser == false) + if (!iser) SET_PSTATE_NEGOTIATE(param); } else if (!strcmp(param->name, DATADIGEST)) { - if (iser == false) + if (!iser) SET_PSTATE_NEGOTIATE(param); } else if (!strcmp(param->name, MAXCONNECTIONS)) { SET_PSTATE_NEGOTIATE(param); @@ -497,7 +497,7 @@ int iscsi_set_keys_to_negotiate( } else if (!strcmp(param->name, IMMEDIATEDATA)) { SET_PSTATE_NEGOTIATE(param); } else if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH)) { - if (iser == false) + if (!iser) SET_PSTATE_NEGOTIATE(param); } else if (!strcmp(param->name, MAXXMITDATASEGMENTLENGTH)) { continue; @@ -528,13 +528,13 @@ int iscsi_set_keys_to_negotiate( } else if (!strcmp(param->name, OFMARKINT)) { SET_PSTATE_NEGOTIATE(param); } else if (!strcmp(param->name, RDMAEXTENSIONS)) { - if (iser == true) + if (iser) SET_PSTATE_NEGOTIATE(param); } else if (!strcmp(param->name, INITIATORRECVDATASEGMENTLENGTH)) { - if (iser == true) + if (iser) SET_PSTATE_NEGOTIATE(param); } else if (!strcmp(param->name, TARGETRECVDATASEGMENTLENGTH)) { - if (iser == true) + if (iser) SET_PSTATE_NEGOTIATE(param); } } @@ -1605,7 +1605,7 @@ int iscsi_decode_text_input( tmpbuf = kzalloc(length + 1, GFP_KERNEL); if (!tmpbuf) { - pr_err("Unable to allocate memory for tmpbuf.\n"); + pr_err("Unable to allocate %u + 1 bytes for tmpbuf.\n", length); return -1; } diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 1431e8400d2..c3cb5c15efd 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -189,7 +189,7 @@ static void iscsit_clear_tpg_np_login_thread( iscsit_reset_np_thread(tpg_np->tpg_np, tpg_np, tpg, shutdown); } -void iscsit_clear_tpg_np_login_threads( +static void iscsit_clear_tpg_np_login_threads( struct iscsi_portal_group *tpg, bool shutdown) { @@ -276,8 +276,6 @@ int iscsit_tpg_del_portal_group( tpg->tpg_state = TPG_STATE_INACTIVE; spin_unlock(&tpg->tpg_state_lock); - iscsit_clear_tpg_np_login_threads(tpg, true); - if (iscsit_release_sessions_for_tpg(tpg, force) < 0) { pr_err("Unable to delete iSCSI Target Portal Group:" " %hu while active sessions exist, and force=0\n", @@ -453,7 +451,7 @@ static bool iscsit_tpg_check_network_portal( match = iscsit_check_np_match(sockaddr, np, network_transport); - if (match == true) + if (match) break; } spin_unlock(&tpg->tpg_np_lock); @@ -475,7 +473,7 @@ struct iscsi_tpg_np *iscsit_tpg_add_network_portal( if (!tpg_np_parent) { if (iscsit_tpg_check_network_portal(tpg->tpg_tiqn, sockaddr, - network_transport) == true) { + network_transport)) { pr_err("Network Portal: %s already exists on a" " different TPG on %s\n", ip_str, tpg->tpg_tiqn->tiqn); diff --git a/drivers/target/iscsi/iscsi_target_tpg.h b/drivers/target/iscsi/iscsi_target_tpg.h index 0a182f2aa8a..e7265337bc4 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.h +++ b/drivers/target/iscsi/iscsi_target_tpg.h @@ -8,7 +8,6 @@ extern struct iscsi_portal_group *iscsit_get_tpg_from_np(struct iscsi_tiqn *, struct iscsi_np *, struct iscsi_tpg_np **); extern int iscsit_get_tpg(struct iscsi_portal_group *); extern void iscsit_put_tpg(struct iscsi_portal_group *); -extern void iscsit_clear_tpg_np_login_threads(struct iscsi_portal_group *, bool); extern void iscsit_tpg_dump_params(struct iscsi_portal_group *); extern int iscsit_tpg_add_portal_group(struct iscsi_tiqn *, struct iscsi_portal_group *); extern int iscsit_tpg_del_portal_group(struct iscsi_tiqn *, struct iscsi_portal_group *, diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 73ab75ddaf4..6d2f37578b2 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -179,7 +179,7 @@ static void tcm_loop_submission_work(struct work_struct *work) struct tcm_loop_hba *tl_hba; struct tcm_loop_tpg *tl_tpg; struct scatterlist *sgl_bidi = NULL; - u32 sgl_bidi_count = 0; + u32 sgl_bidi_count = 0, transfer_length; int rc; tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host); @@ -213,12 +213,21 @@ static void tcm_loop_submission_work(struct work_struct *work) } - if (!scsi_prot_sg_count(sc) && scsi_get_prot_op(sc) != SCSI_PROT_NORMAL) + transfer_length = scsi_transfer_length(sc); + if (!scsi_prot_sg_count(sc) && + scsi_get_prot_op(sc) != SCSI_PROT_NORMAL) { se_cmd->prot_pto = true; + /* + * loopback transport doesn't support + * WRITE_GENERATE, READ_STRIP protection + * information operations, go ahead unprotected. + */ + transfer_length = scsi_bufflen(sc); + } rc = target_submit_cmd_map_sgls(se_cmd, tl_nexus->se_sess, sc->cmnd, &tl_cmd->tl_sense_buf[0], tl_cmd->sc->device->lun, - scsi_bufflen(sc), tcm_loop_sam_attr(sc), + transfer_length, tcm_loop_sam_attr(sc), sc->sc_data_direction, 0, scsi_sglist(sc), scsi_sg_count(sc), sgl_bidi, sgl_bidi_count, diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index e0229592ec5..bd78d9235ac 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -81,7 +81,7 @@ sbc_emulate_readcapacity(struct se_cmd *cmd) transport_kunmap_data_sg(cmd); } - target_complete_cmd(cmd, GOOD); + target_complete_cmd_with_length(cmd, GOOD, 8); return 0; } @@ -137,7 +137,7 @@ sbc_emulate_readcapacity_16(struct se_cmd *cmd) transport_kunmap_data_sg(cmd); } - target_complete_cmd(cmd, GOOD); + target_complete_cmd_with_length(cmd, GOOD, 32); return 0; } @@ -176,24 +176,6 @@ static inline u32 sbc_get_size(struct se_cmd *cmd, u32 sectors) return cmd->se_dev->dev_attrib.block_size * sectors; } -static int sbc_check_valid_sectors(struct se_cmd *cmd) -{ - struct se_device *dev = cmd->se_dev; - unsigned long long end_lba; - u32 sectors; - - sectors = cmd->data_length / dev->dev_attrib.block_size; - end_lba = dev->transport->get_blocks(dev) + 1; - - if (cmd->t_task_lba + sectors > end_lba) { - pr_err("target: lba %llu, sectors %u exceeds end lba %llu\n", - cmd->t_task_lba, sectors, end_lba); - return -EINVAL; - } - - return 0; -} - static inline u32 transport_get_sectors_6(unsigned char *cdb) { /* @@ -665,8 +647,19 @@ sbc_check_prot(struct se_device *dev, struct se_cmd *cmd, unsigned char *cdb, cmd->prot_type = dev->dev_attrib.pi_prot_type; cmd->prot_length = dev->prot_length * sectors; - pr_debug("%s: prot_type=%d, prot_length=%d prot_op=%d prot_checks=%d\n", - __func__, cmd->prot_type, cmd->prot_length, + + /** + * In case protection information exists over the wire + * we modify command data length to describe pure data. + * The actual transfer length is data length + protection + * length + **/ + if (protect) + cmd->data_length = sectors * dev->dev_attrib.block_size; + + pr_debug("%s: prot_type=%d, data_length=%d, prot_length=%d " + "prot_op=%d prot_checks=%d\n", + __func__, cmd->prot_type, cmd->data_length, cmd->prot_length, cmd->prot_op, cmd->prot_checks); return true; @@ -877,15 +870,6 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) break; case SYNCHRONIZE_CACHE: case SYNCHRONIZE_CACHE_16: - if (!ops->execute_sync_cache) { - size = 0; - cmd->execute_cmd = sbc_emulate_noop; - break; - } - - /* - * Extract LBA and range to be flushed for emulated SYNCHRONIZE_CACHE - */ if (cdb[0] == SYNCHRONIZE_CACHE) { sectors = transport_get_sectors_10(cdb); cmd->t_task_lba = transport_lba_32(cdb); @@ -893,18 +877,12 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) sectors = transport_get_sectors_16(cdb); cmd->t_task_lba = transport_lba_64(cdb); } - - size = sbc_get_size(cmd, sectors); - - /* - * Check to ensure that LBA + Range does not exceed past end of - * device for IBLOCK and FILEIO ->do_sync_cache() backend calls - */ - if (cmd->t_task_lba || sectors) { - if (sbc_check_valid_sectors(cmd) < 0) - return TCM_ADDRESS_OUT_OF_RANGE; + if (ops->execute_sync_cache) { + cmd->execute_cmd = ops->execute_sync_cache; + goto check_lba; } - cmd->execute_cmd = ops->execute_sync_cache; + size = 0; + cmd->execute_cmd = sbc_emulate_noop; break; case UNMAP: if (!ops->execute_unmap) @@ -947,8 +925,10 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) break; case VERIFY: size = 0; + sectors = transport_get_sectors_10(cdb); + cmd->t_task_lba = transport_lba_32(cdb); cmd->execute_cmd = sbc_emulate_noop; - break; + goto check_lba; case REZERO_UNIT: case SEEK_6: case SEEK_10: @@ -988,7 +968,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) dev->dev_attrib.hw_max_sectors); return TCM_INVALID_CDB_FIELD; } - +check_lba: end_lba = dev->transport->get_blocks(dev) + 1; if (cmd->t_task_lba + sectors > end_lba) { pr_err("cmd exceeds last lba %llu " diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index 8653666612a..6cd7222738f 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -129,15 +129,10 @@ static sense_reason_t spc_emulate_evpd_80(struct se_cmd *cmd, unsigned char *buf) { struct se_device *dev = cmd->se_dev; - u16 len = 0; + u16 len; if (dev->dev_flags & DF_EMULATED_VPD_UNIT_SERIAL) { - u32 unit_serial_len; - - unit_serial_len = strlen(dev->t10_wwn.unit_serial); - unit_serial_len++; /* For NULL Terminator */ - - len += sprintf(&buf[4], "%s", dev->t10_wwn.unit_serial); + len = sprintf(&buf[4], "%s", dev->t10_wwn.unit_serial); len++; /* Extra Byte for NULL Terminator */ buf[3] = len; } @@ -721,6 +716,7 @@ spc_emulate_inquiry(struct se_cmd *cmd) unsigned char *buf; sense_reason_t ret; int p; + int len = 0; buf = kzalloc(SE_INQUIRY_BUF, GFP_KERNEL); if (!buf) { @@ -742,6 +738,7 @@ spc_emulate_inquiry(struct se_cmd *cmd) } ret = spc_emulate_inquiry_std(cmd, buf); + len = buf[4] + 5; goto out; } @@ -749,6 +746,7 @@ spc_emulate_inquiry(struct se_cmd *cmd) if (cdb[2] == evpd_handlers[p].page) { buf[1] = cdb[2]; ret = evpd_handlers[p].emulate(cmd, buf); + len = get_unaligned_be16(&buf[2]) + 4; goto out; } } @@ -765,7 +763,7 @@ out: kfree(buf); if (!ret) - target_complete_cmd(cmd, GOOD); + target_complete_cmd_with_length(cmd, GOOD, len); return ret; } @@ -1103,7 +1101,7 @@ set_length: transport_kunmap_data_sg(cmd); } - target_complete_cmd(cmd, GOOD); + target_complete_cmd_with_length(cmd, GOOD, length); return 0; } @@ -1279,7 +1277,7 @@ done: buf[3] = (lun_count & 0xff); transport_kunmap_data_sg(cmd); - target_complete_cmd(cmd, GOOD); + target_complete_cmd_with_length(cmd, GOOD, 8 + lun_count * 8); return 0; } EXPORT_SYMBOL(spc_emulate_report_luns); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 2179feed0d6..7fa62fc93e0 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -504,7 +504,7 @@ void transport_deregister_session(struct se_session *se_sess) * ->acl_free_comp caller to wakeup configfs se_node_acl->acl_group * removal context. */ - if (se_nacl && comp_nacl == true) + if (se_nacl && comp_nacl) target_put_nacl(se_nacl); transport_free_session(se_sess); @@ -562,7 +562,7 @@ static int transport_cmd_check_stop(struct se_cmd *cmd, bool remove_from_lists, spin_unlock_irqrestore(&cmd->t_state_lock, flags); - complete(&cmd->t_transport_stop_comp); + complete_all(&cmd->t_transport_stop_comp); return 1; } @@ -687,7 +687,7 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status) if (cmd->transport_state & CMD_T_ABORTED && cmd->transport_state & CMD_T_STOP) { spin_unlock_irqrestore(&cmd->t_state_lock, flags); - complete(&cmd->t_transport_stop_comp); + complete_all(&cmd->t_transport_stop_comp); return; } else if (!success) { INIT_WORK(&cmd->work, target_complete_failure_work); @@ -703,6 +703,23 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status) } EXPORT_SYMBOL(target_complete_cmd); +void target_complete_cmd_with_length(struct se_cmd *cmd, u8 scsi_status, int length) +{ + if (scsi_status == SAM_STAT_GOOD && length < cmd->data_length) { + if (cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) { + cmd->residual_count += cmd->data_length - length; + } else { + cmd->se_cmd_flags |= SCF_UNDERFLOW_BIT; + cmd->residual_count = cmd->data_length - length; + } + + cmd->data_length = length; + } + + target_complete_cmd(cmd, scsi_status); +} +EXPORT_SYMBOL(target_complete_cmd_with_length); + static void target_add_to_state_list(struct se_cmd *cmd) { struct se_device *dev = cmd->se_dev; @@ -1761,7 +1778,7 @@ void target_execute_cmd(struct se_cmd *cmd) cmd->se_tfo->get_task_tag(cmd)); spin_unlock_irq(&cmd->t_state_lock); - complete(&cmd->t_transport_stop_comp); + complete_all(&cmd->t_transport_stop_comp); return; } @@ -2363,7 +2380,7 @@ int target_get_sess_cmd(struct se_session *se_sess, struct se_cmd *se_cmd, * fabric acknowledgement that requires two target_put_sess_cmd() * invocations before se_cmd descriptor release. */ - if (ack_kref == true) { + if (ack_kref) { kref_get(&se_cmd->cmd_kref); se_cmd->se_cmd_flags |= SCF_ACK_KREF; } @@ -2407,6 +2424,10 @@ static void target_release_cmd_kref(struct kref *kref) */ int target_put_sess_cmd(struct se_session *se_sess, struct se_cmd *se_cmd) { + if (!se_sess) { + se_cmd->se_tfo->release_cmd(se_cmd); + return 1; + } return kref_put_spinlock_irqsave(&se_cmd->cmd_kref, target_release_cmd_kref, &se_sess->sess_cmd_lock); } @@ -2934,6 +2955,12 @@ static void target_tmr_work(struct work_struct *work) int transport_generic_handle_tmr( struct se_cmd *cmd) { + unsigned long flags; + + spin_lock_irqsave(&cmd->t_state_lock, flags); + cmd->transport_state |= CMD_T_ACTIVE; + spin_unlock_irqrestore(&cmd->t_state_lock, flags); + INIT_WORK(&cmd->work, target_tmr_work); queue_work(cmd->se_dev->tmr_wq, &cmd->work); return 0; diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 669c536fd95..e9186cdf35e 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -70,7 +70,7 @@ static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN], *dev_wwn; int rc; - if (src == true) + if (src) dev_wwn = &xop->dst_tid_wwn[0]; else dev_wwn = &xop->src_tid_wwn[0]; @@ -88,7 +88,7 @@ static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op if (rc != 0) continue; - if (src == true) { + if (src) { xop->dst_dev = se_dev; pr_debug("XCOPY 0xe4: Setting xop->dst_dev: %p from located" " se_dev\n", xop->dst_dev); @@ -166,7 +166,7 @@ static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op return -EINVAL; } - if (src == true) { + if (src) { memcpy(&xop->src_tid_wwn[0], &desc[8], XCOPY_NAA_IEEE_REGEX_LEN); /* * Determine if the source designator matches the local device @@ -236,7 +236,7 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd, /* * Assume target descriptors are in source -> destination order.. */ - if (src == true) + if (src) src = false; else src = true; @@ -560,7 +560,7 @@ static int target_xcopy_init_pt_lun( * reservations. The pt_cmd->se_lun pointer will be setup from within * target_xcopy_setup_pt_port() */ - if (remote_port == false) { + if (!remote_port) { pt_cmd->se_cmd_flags |= SCF_SE_LUN_CMD | SCF_CMD_XCOPY_PASSTHROUGH; return 0; } diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c index f5fd515b2be..be0c0d08c56 100644 --- a/drivers/target/tcm_fc/tfc_cmd.c +++ b/drivers/target/tcm_fc/tfc_cmd.c @@ -128,6 +128,7 @@ int ft_queue_status(struct se_cmd *se_cmd) struct fc_lport *lport; struct fc_exch *ep; size_t len; + int rc; if (cmd->aborted) return 0; @@ -137,9 +138,10 @@ int ft_queue_status(struct se_cmd *se_cmd) len = sizeof(*fcp) + se_cmd->scsi_sense_length; fp = fc_frame_alloc(lport, len); if (!fp) { - /* XXX shouldn't just drop it - requeue and retry? */ - return 0; + se_cmd->scsi_status = SAM_STAT_TASK_SET_FULL; + return -ENOMEM; } + fcp = fc_frame_payload_get(fp, len); memset(fcp, 0, len); fcp->resp.fr_status = se_cmd->scsi_status; @@ -170,7 +172,18 @@ int ft_queue_status(struct se_cmd *se_cmd) fc_fill_fc_hdr(fp, FC_RCTL_DD_CMD_STATUS, ep->did, ep->sid, FC_TYPE_FCP, FC_FC_EX_CTX | FC_FC_LAST_SEQ | FC_FC_END_SEQ, 0); - lport->tt.seq_send(lport, cmd->seq, fp); + rc = lport->tt.seq_send(lport, cmd->seq, fp); + if (rc) { + pr_info_ratelimited("%s: Failed to send response frame %p, " + "xid <0x%x>\n", __func__, fp, ep->xid); + /* + * Generate a TASK_SET_FULL status to notify the initiator + * to reduce it's queue_depth after the se_cmd response has + * been re-queued by target-core. + */ + se_cmd->scsi_status = SAM_STAT_TASK_SET_FULL; + return -ENOMEM; + } lport->tt.exch_done(cmd->seq); return 0; } diff --git a/drivers/target/tcm_fc/tfc_io.c b/drivers/target/tcm_fc/tfc_io.c index e415af32115..97b486c3dda 100644 --- a/drivers/target/tcm_fc/tfc_io.c +++ b/drivers/target/tcm_fc/tfc_io.c @@ -82,6 +82,10 @@ int ft_queue_data_in(struct se_cmd *se_cmd) if (cmd->aborted) return 0; + + if (se_cmd->scsi_status == SAM_STAT_TASK_SET_FULL) + goto queue_status; + ep = fc_seq_exch(cmd->seq); lport = ep->lp; cmd->seq = lport->tt.seq_start_next(cmd->seq); @@ -178,14 +182,23 @@ int ft_queue_data_in(struct se_cmd *se_cmd) FC_TYPE_FCP, f_ctl, fh_off); error = lport->tt.seq_send(lport, seq, fp); if (error) { - /* XXX For now, initiator will retry */ - pr_err_ratelimited("%s: Failed to send frame %p, " + pr_info_ratelimited("%s: Failed to send frame %p, " "xid <0x%x>, remaining %zu, " "lso_max <0x%x>\n", __func__, fp, ep->xid, remaining, lport->lso_max); + /* + * Go ahead and set TASK_SET_FULL status ignoring the + * rest of the DataIN, and immediately attempt to + * send the response via ft_queue_status() in order + * to notify the initiator that it should reduce it's + * per LUN queue_depth. + */ + se_cmd->scsi_status = SAM_STAT_TASK_SET_FULL; + break; } } +queue_status: return ft_queue_status(se_cmd); } diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index e9c280f5581..4f4ffa4c604 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -57,7 +57,8 @@ #define TCM_VHOST_MAX_CDB_SIZE 32 #define TCM_VHOST_DEFAULT_TAGS 256 #define TCM_VHOST_PREALLOC_SGLS 2048 -#define TCM_VHOST_PREALLOC_PAGES 2048 +#define TCM_VHOST_PREALLOC_UPAGES 2048 +#define TCM_VHOST_PREALLOC_PROT_SGLS 512 struct vhost_scsi_inflight { /* Wait for the flush operation to finish */ @@ -79,10 +80,12 @@ struct tcm_vhost_cmd { u64 tvc_tag; /* The number of scatterlists associated with this cmd */ u32 tvc_sgl_count; + u32 tvc_prot_sgl_count; /* Saved unpacked SCSI LUN for tcm_vhost_submission_work() */ u32 tvc_lun; /* Pointer to the SGL formatted memory from virtio-scsi */ struct scatterlist *tvc_sgl; + struct scatterlist *tvc_prot_sgl; struct page **tvc_upages; /* Pointer to response */ struct virtio_scsi_cmd_resp __user *tvc_resp; @@ -166,7 +169,8 @@ enum { }; enum { - VHOST_SCSI_FEATURES = VHOST_FEATURES | (1ULL << VIRTIO_SCSI_F_HOTPLUG) + VHOST_SCSI_FEATURES = VHOST_FEATURES | (1ULL << VIRTIO_SCSI_F_HOTPLUG) | + (1ULL << VIRTIO_SCSI_F_T10_PI) }; #define VHOST_SCSI_MAX_TARGET 256 @@ -456,12 +460,16 @@ static void tcm_vhost_release_cmd(struct se_cmd *se_cmd) struct tcm_vhost_cmd *tv_cmd = container_of(se_cmd, struct tcm_vhost_cmd, tvc_se_cmd); struct se_session *se_sess = se_cmd->se_sess; + int i; if (tv_cmd->tvc_sgl_count) { - u32 i; for (i = 0; i < tv_cmd->tvc_sgl_count; i++) put_page(sg_page(&tv_cmd->tvc_sgl[i])); } + if (tv_cmd->tvc_prot_sgl_count) { + for (i = 0; i < tv_cmd->tvc_prot_sgl_count; i++) + put_page(sg_page(&tv_cmd->tvc_prot_sgl[i])); + } tcm_vhost_put_inflight(tv_cmd->inflight); percpu_ida_free(&se_sess->sess_tag_pool, se_cmd->map_tag); @@ -713,16 +721,14 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) } static struct tcm_vhost_cmd * -vhost_scsi_get_tag(struct vhost_virtqueue *vq, - struct tcm_vhost_tpg *tpg, - struct virtio_scsi_cmd_req *v_req, - u32 exp_data_len, - int data_direction) +vhost_scsi_get_tag(struct vhost_virtqueue *vq, struct tcm_vhost_tpg *tpg, + unsigned char *cdb, u64 scsi_tag, u16 lun, u8 task_attr, + u32 exp_data_len, int data_direction) { struct tcm_vhost_cmd *cmd; struct tcm_vhost_nexus *tv_nexus; struct se_session *se_sess; - struct scatterlist *sg; + struct scatterlist *sg, *prot_sg; struct page **pages; int tag; @@ -741,19 +747,24 @@ vhost_scsi_get_tag(struct vhost_virtqueue *vq, cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[tag]; sg = cmd->tvc_sgl; + prot_sg = cmd->tvc_prot_sgl; pages = cmd->tvc_upages; memset(cmd, 0, sizeof(struct tcm_vhost_cmd)); cmd->tvc_sgl = sg; + cmd->tvc_prot_sgl = prot_sg; cmd->tvc_upages = pages; cmd->tvc_se_cmd.map_tag = tag; - cmd->tvc_tag = v_req->tag; - cmd->tvc_task_attr = v_req->task_attr; + cmd->tvc_tag = scsi_tag; + cmd->tvc_lun = lun; + cmd->tvc_task_attr = task_attr; cmd->tvc_exp_data_len = exp_data_len; cmd->tvc_data_direction = data_direction; cmd->tvc_nexus = tv_nexus; cmd->inflight = tcm_vhost_get_inflight(vq); + memcpy(cmd->tvc_cdb, cdb, TCM_VHOST_MAX_CDB_SIZE); + return cmd; } @@ -767,35 +778,28 @@ vhost_scsi_map_to_sgl(struct tcm_vhost_cmd *tv_cmd, struct scatterlist *sgl, unsigned int sgl_count, struct iovec *iov, - int write) + struct page **pages, + bool write) { unsigned int npages = 0, pages_nr, offset, nbytes; struct scatterlist *sg = sgl; void __user *ptr = iov->iov_base; size_t len = iov->iov_len; - struct page **pages; int ret, i; - if (sgl_count > TCM_VHOST_PREALLOC_SGLS) { - pr_err("vhost_scsi_map_to_sgl() psgl_count: %u greater than" - " preallocated TCM_VHOST_PREALLOC_SGLS: %u\n", - sgl_count, TCM_VHOST_PREALLOC_SGLS); - return -ENOBUFS; - } - pages_nr = iov_num_pages(iov); - if (pages_nr > sgl_count) + if (pages_nr > sgl_count) { + pr_err("vhost_scsi_map_to_sgl() pages_nr: %u greater than" + " sgl_count: %u\n", pages_nr, sgl_count); return -ENOBUFS; - - if (pages_nr > TCM_VHOST_PREALLOC_PAGES) { + } + if (pages_nr > TCM_VHOST_PREALLOC_UPAGES) { pr_err("vhost_scsi_map_to_sgl() pages_nr: %u greater than" - " preallocated TCM_VHOST_PREALLOC_PAGES: %u\n", - pages_nr, TCM_VHOST_PREALLOC_PAGES); + " preallocated TCM_VHOST_PREALLOC_UPAGES: %u\n", + pages_nr, TCM_VHOST_PREALLOC_UPAGES); return -ENOBUFS; } - pages = tv_cmd->tvc_upages; - ret = get_user_pages_fast((unsigned long)ptr, pages_nr, write, pages); /* No pages were pinned */ if (ret < 0) @@ -825,33 +829,32 @@ out: static int vhost_scsi_map_iov_to_sgl(struct tcm_vhost_cmd *cmd, struct iovec *iov, - unsigned int niov, - int write) + int niov, + bool write) { - int ret; - unsigned int i; - u32 sgl_count; - struct scatterlist *sg; + struct scatterlist *sg = cmd->tvc_sgl; + unsigned int sgl_count = 0; + int ret, i; - /* - * Find out how long sglist needs to be - */ - sgl_count = 0; for (i = 0; i < niov; i++) sgl_count += iov_num_pages(&iov[i]); - /* TODO overflow checking */ + if (sgl_count > TCM_VHOST_PREALLOC_SGLS) { + pr_err("vhost_scsi_map_iov_to_sgl() sgl_count: %u greater than" + " preallocated TCM_VHOST_PREALLOC_SGLS: %u\n", + sgl_count, TCM_VHOST_PREALLOC_SGLS); + return -ENOBUFS; + } - sg = cmd->tvc_sgl; pr_debug("%s sg %p sgl_count %u\n", __func__, sg, sgl_count); sg_init_table(sg, sgl_count); - cmd->tvc_sgl_count = sgl_count; - pr_debug("Mapping %u iovecs for %u pages\n", niov, sgl_count); + pr_debug("Mapping iovec %p for %u pages\n", &iov[0], sgl_count); + for (i = 0; i < niov; i++) { ret = vhost_scsi_map_to_sgl(cmd, sg, sgl_count, &iov[i], - write); + cmd->tvc_upages, write); if (ret < 0) { for (i = 0; i < cmd->tvc_sgl_count; i++) put_page(sg_page(&cmd->tvc_sgl[i])); @@ -859,31 +862,70 @@ vhost_scsi_map_iov_to_sgl(struct tcm_vhost_cmd *cmd, cmd->tvc_sgl_count = 0; return ret; } - sg += ret; sgl_count -= ret; } return 0; } +static int +vhost_scsi_map_iov_to_prot(struct tcm_vhost_cmd *cmd, + struct iovec *iov, + int niov, + bool write) +{ + struct scatterlist *prot_sg = cmd->tvc_prot_sgl; + unsigned int prot_sgl_count = 0; + int ret, i; + + for (i = 0; i < niov; i++) + prot_sgl_count += iov_num_pages(&iov[i]); + + if (prot_sgl_count > TCM_VHOST_PREALLOC_PROT_SGLS) { + pr_err("vhost_scsi_map_iov_to_prot() sgl_count: %u greater than" + " preallocated TCM_VHOST_PREALLOC_PROT_SGLS: %u\n", + prot_sgl_count, TCM_VHOST_PREALLOC_PROT_SGLS); + return -ENOBUFS; + } + + pr_debug("%s prot_sg %p prot_sgl_count %u\n", __func__, + prot_sg, prot_sgl_count); + sg_init_table(prot_sg, prot_sgl_count); + cmd->tvc_prot_sgl_count = prot_sgl_count; + + for (i = 0; i < niov; i++) { + ret = vhost_scsi_map_to_sgl(cmd, prot_sg, prot_sgl_count, &iov[i], + cmd->tvc_upages, write); + if (ret < 0) { + for (i = 0; i < cmd->tvc_prot_sgl_count; i++) + put_page(sg_page(&cmd->tvc_prot_sgl[i])); + + cmd->tvc_prot_sgl_count = 0; + return ret; + } + prot_sg += ret; + prot_sgl_count -= ret; + } + return 0; +} + static void tcm_vhost_submission_work(struct work_struct *work) { struct tcm_vhost_cmd *cmd = container_of(work, struct tcm_vhost_cmd, work); struct tcm_vhost_nexus *tv_nexus; struct se_cmd *se_cmd = &cmd->tvc_se_cmd; - struct scatterlist *sg_ptr, *sg_bidi_ptr = NULL; - int rc, sg_no_bidi = 0; + struct scatterlist *sg_ptr, *sg_prot_ptr = NULL; + int rc; + /* FIXME: BIDI operation */ if (cmd->tvc_sgl_count) { sg_ptr = cmd->tvc_sgl; -/* FIXME: Fix BIDI operation in tcm_vhost_submission_work() */ -#if 0 - if (se_cmd->se_cmd_flags & SCF_BIDI) { - sg_bidi_ptr = NULL; - sg_no_bidi = 0; - } -#endif + + if (cmd->tvc_prot_sgl_count) + sg_prot_ptr = cmd->tvc_prot_sgl; + else + se_cmd->prot_pto = true; } else { sg_ptr = NULL; } @@ -894,7 +936,7 @@ static void tcm_vhost_submission_work(struct work_struct *work) cmd->tvc_lun, cmd->tvc_exp_data_len, cmd->tvc_task_attr, cmd->tvc_data_direction, TARGET_SCF_ACK_KREF, sg_ptr, cmd->tvc_sgl_count, - sg_bidi_ptr, sg_no_bidi, NULL, 0); + NULL, 0, sg_prot_ptr, cmd->tvc_prot_sgl_count); if (rc < 0) { transport_send_check_condition_and_sense(se_cmd, TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE, 0); @@ -926,12 +968,18 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) { struct tcm_vhost_tpg **vs_tpg; struct virtio_scsi_cmd_req v_req; + struct virtio_scsi_cmd_req_pi v_req_pi; struct tcm_vhost_tpg *tpg; struct tcm_vhost_cmd *cmd; - u32 exp_data_len, data_first, data_num, data_direction; + u64 tag; + u32 exp_data_len, data_first, data_num, data_direction, prot_first; unsigned out, in, i; - int head, ret; - u8 target; + int head, ret, data_niov, prot_niov, prot_bytes; + size_t req_size; + u16 lun; + u8 *target, *lunp, task_attr; + bool hdr_pi; + void *req, *cdb; mutex_lock(&vq->mutex); /* @@ -962,7 +1010,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) break; } -/* FIXME: BIDI operation */ + /* FIXME: BIDI operation */ if (out == 1 && in == 1) { data_direction = DMA_NONE; data_first = 0; @@ -992,29 +1040,38 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) break; } - if (unlikely(vq->iov[0].iov_len != sizeof(v_req))) { - vq_err(vq, "Expecting virtio_scsi_cmd_req, got %zu" - " bytes\n", vq->iov[0].iov_len); + if (vhost_has_feature(vq, VIRTIO_SCSI_F_T10_PI)) { + req = &v_req_pi; + lunp = &v_req_pi.lun[0]; + target = &v_req_pi.lun[1]; + req_size = sizeof(v_req_pi); + hdr_pi = true; + } else { + req = &v_req; + lunp = &v_req.lun[0]; + target = &v_req.lun[1]; + req_size = sizeof(v_req); + hdr_pi = false; + } + + if (unlikely(vq->iov[0].iov_len < req_size)) { + pr_err("Expecting virtio-scsi header: %zu, got %zu\n", + req_size, vq->iov[0].iov_len); break; } - pr_debug("Calling __copy_from_user: vq->iov[0].iov_base: %p," - " len: %zu\n", vq->iov[0].iov_base, sizeof(v_req)); - ret = __copy_from_user(&v_req, vq->iov[0].iov_base, - sizeof(v_req)); + ret = memcpy_fromiovecend(req, &vq->iov[0], 0, req_size); if (unlikely(ret)) { vq_err(vq, "Faulted on virtio_scsi_cmd_req\n"); break; } /* virtio-scsi spec requires byte 0 of the lun to be 1 */ - if (unlikely(v_req.lun[0] != 1)) { + if (unlikely(*lunp != 1)) { vhost_scsi_send_bad_target(vs, vq, head, out); continue; } - /* Extract the tpgt */ - target = v_req.lun[1]; - tpg = ACCESS_ONCE(vs_tpg[target]); + tpg = ACCESS_ONCE(vs_tpg[*target]); /* Target does not exist, fail the request */ if (unlikely(!tpg)) { @@ -1022,17 +1079,79 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) continue; } + data_niov = data_num; + prot_niov = prot_first = prot_bytes = 0; + /* + * Determine if any protection information iovecs are preceeding + * the actual data payload, and adjust data_first + data_niov + * values accordingly for vhost_scsi_map_iov_to_sgl() below. + * + * Also extract virtio_scsi header bits for vhost_scsi_get_tag() + */ + if (hdr_pi) { + if (v_req_pi.pi_bytesout) { + if (data_direction != DMA_TO_DEVICE) { + vq_err(vq, "Received non zero do_pi_niov" + ", but wrong data_direction\n"); + goto err_cmd; + } + prot_bytes = v_req_pi.pi_bytesout; + } else if (v_req_pi.pi_bytesin) { + if (data_direction != DMA_FROM_DEVICE) { + vq_err(vq, "Received non zero di_pi_niov" + ", but wrong data_direction\n"); + goto err_cmd; + } + prot_bytes = v_req_pi.pi_bytesin; + } + if (prot_bytes) { + int tmp = 0; + + for (i = 0; i < data_num; i++) { + tmp += vq->iov[data_first + i].iov_len; + prot_niov++; + if (tmp >= prot_bytes) + break; + } + prot_first = data_first; + data_first += prot_niov; + data_niov = data_num - prot_niov; + } + tag = v_req_pi.tag; + task_attr = v_req_pi.task_attr; + cdb = &v_req_pi.cdb[0]; + lun = ((v_req_pi.lun[2] << 8) | v_req_pi.lun[3]) & 0x3FFF; + } else { + tag = v_req.tag; + task_attr = v_req.task_attr; + cdb = &v_req.cdb[0]; + lun = ((v_req.lun[2] << 8) | v_req.lun[3]) & 0x3FFF; + } exp_data_len = 0; - for (i = 0; i < data_num; i++) + for (i = 0; i < data_niov; i++) exp_data_len += vq->iov[data_first + i].iov_len; + /* + * Check that the recieved CDB size does not exceeded our + * hardcoded max for vhost-scsi + * + * TODO what if cdb was too small for varlen cdb header? + */ + if (unlikely(scsi_command_size(cdb) > TCM_VHOST_MAX_CDB_SIZE)) { + vq_err(vq, "Received SCSI CDB with command_size: %d that" + " exceeds SCSI_MAX_VARLEN_CDB_SIZE: %d\n", + scsi_command_size(cdb), TCM_VHOST_MAX_CDB_SIZE); + goto err_cmd; + } - cmd = vhost_scsi_get_tag(vq, tpg, &v_req, - exp_data_len, data_direction); + cmd = vhost_scsi_get_tag(vq, tpg, cdb, tag, lun, task_attr, + exp_data_len + prot_bytes, + data_direction); if (IS_ERR(cmd)) { vq_err(vq, "vhost_scsi_get_tag failed %ld\n", PTR_ERR(cmd)); goto err_cmd; } + pr_debug("Allocated tv_cmd: %p exp_data_len: %d, data_direction" ": %d\n", cmd, exp_data_len, data_direction); @@ -1040,40 +1159,28 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) cmd->tvc_vq = vq; cmd->tvc_resp = vq->iov[out].iov_base; - /* - * Copy in the recieved CDB descriptor into cmd->tvc_cdb - * that will be used by tcm_vhost_new_cmd_map() and down into - * target_setup_cmd_from_cdb() - */ - memcpy(cmd->tvc_cdb, v_req.cdb, TCM_VHOST_MAX_CDB_SIZE); - /* - * Check that the recieved CDB size does not exceeded our - * hardcoded max for tcm_vhost - */ - /* TODO what if cdb was too small for varlen cdb header? */ - if (unlikely(scsi_command_size(cmd->tvc_cdb) > - TCM_VHOST_MAX_CDB_SIZE)) { - vq_err(vq, "Received SCSI CDB with command_size: %d that" - " exceeds SCSI_MAX_VARLEN_CDB_SIZE: %d\n", - scsi_command_size(cmd->tvc_cdb), - TCM_VHOST_MAX_CDB_SIZE); - goto err_free; - } - cmd->tvc_lun = ((v_req.lun[2] << 8) | v_req.lun[3]) & 0x3FFF; - pr_debug("vhost_scsi got command opcode: %#02x, lun: %d\n", cmd->tvc_cdb[0], cmd->tvc_lun); + if (prot_niov) { + ret = vhost_scsi_map_iov_to_prot(cmd, + &vq->iov[prot_first], prot_niov, + data_direction == DMA_FROM_DEVICE); + if (unlikely(ret)) { + vq_err(vq, "Failed to map iov to" + " prot_sgl\n"); + goto err_free; + } + } if (data_direction != DMA_NONE) { ret = vhost_scsi_map_iov_to_sgl(cmd, - &vq->iov[data_first], data_num, + &vq->iov[data_first], data_niov, data_direction == DMA_FROM_DEVICE); if (unlikely(ret)) { vq_err(vq, "Failed to map iov to sgl\n"); goto err_free; } } - /* * Save the descriptor from vhost_get_vq_desc() to be used to * complete the virtio-scsi request in TCM callback context via @@ -1716,6 +1823,7 @@ static void tcm_vhost_free_cmd_map_res(struct tcm_vhost_nexus *nexus, tv_cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[i]; kfree(tv_cmd->tvc_sgl); + kfree(tv_cmd->tvc_prot_sgl); kfree(tv_cmd->tvc_upages); } } @@ -1750,7 +1858,7 @@ static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tpg, tv_nexus->tvn_se_sess = transport_init_session_tags( TCM_VHOST_DEFAULT_TAGS, sizeof(struct tcm_vhost_cmd), - TARGET_PROT_NORMAL); + TARGET_PROT_DIN_PASS | TARGET_PROT_DOUT_PASS); if (IS_ERR(tv_nexus->tvn_se_sess)) { mutex_unlock(&tpg->tv_tpg_mutex); kfree(tv_nexus); @@ -1769,12 +1877,20 @@ static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tpg, } tv_cmd->tvc_upages = kzalloc(sizeof(struct page *) * - TCM_VHOST_PREALLOC_PAGES, GFP_KERNEL); + TCM_VHOST_PREALLOC_UPAGES, GFP_KERNEL); if (!tv_cmd->tvc_upages) { mutex_unlock(&tpg->tv_tpg_mutex); pr_err("Unable to allocate tv_cmd->tvc_upages\n"); goto out; } + + tv_cmd->tvc_prot_sgl = kzalloc(sizeof(struct scatterlist) * + TCM_VHOST_PREALLOC_PROT_SGLS, GFP_KERNEL); + if (!tv_cmd->tvc_prot_sgl) { + mutex_unlock(&tpg->tv_tpg_mutex); + pr_err("Unable to allocate tv_cmd->tvc_prot_sgl\n"); + goto out; + } } /* * Since we are running in 'demo mode' this call with generate a diff --git a/firmware/Makefile b/firmware/Makefile index cbb09ce9730..5747417069c 100644 --- a/firmware/Makefile +++ b/firmware/Makefile @@ -4,10 +4,10 @@ # Create $(fwabs) from $(CONFIG_EXTRA_FIRMWARE_DIR) -- if it doesn't have a # leading /, it's relative to $(srctree). -fwdir := $(subst ",,$(CONFIG_EXTRA_FIRMWARE_DIR)) +fwdir := $(subst $(quote),,$(CONFIG_EXTRA_FIRMWARE_DIR)) fwabs := $(addprefix $(srctree)/,$(filter-out /%,$(fwdir)))$(filter /%,$(fwdir)) -fw-external-y := $(subst ",,$(CONFIG_EXTRA_FIRMWARE)) +fw-external-y := $(subst $(quote),,$(CONFIG_EXTRA_FIRMWARE)) # There are three cases to care about: # 1. Building kernel with CONFIG_FIRMWARE_IN_KERNEL=y -- $(fw-shipped-y) should @@ -138,12 +138,6 @@ fw-shipped-$(CONFIG_YAM) += yam/1200.bin yam/9600.bin fw-shipped-all := $(fw-shipped-y) $(fw-shipped-m) $(fw-shipped-) -# Directories which we _might_ need to create, so we have a rule for them. -firmware-dirs := $(sort $(addprefix $(objtree)/$(obj)/,$(dir $(fw-external-y) $(fw-shipped-all)))) - -quiet_cmd_mkdir = MKDIR $(patsubst $(objtree)/%,%,$@) - cmd_mkdir = mkdir -p $@ - quiet_cmd_ihex = IHEX $@ cmd_ihex = $(OBJCOPY) -Iihex -Obinary $< $@ @@ -184,21 +178,10 @@ wordsize_deps := $(wildcard include/config/64bit.h include/config/32bit.h \ include/config/superh32.h include/config/superh64.h \ include/config/x86_32.h include/config/x86_64.h) -# Workaround for make < 3.81, where .SECONDEXPANSION doesn't work. -# It'll end up depending on these targets, so make them a PHONY rule which -# depends on _all_ the directories in $(firmware-dirs), and it'll work out OK. -PHONY += $(objtree)/$$(%) $(objtree)/$(obj)/$$(%) -$(objtree)/$$(%) $(objtree)/$(obj)/$$(%): $(firmware-dirs) - @true - -# For the $$(dir %) trick, where we need % to be expanded first. -.SECONDEXPANSION: - -$(patsubst %,$(obj)/%.gen.S, $(fw-shipped-y)): %: $(wordsize_deps) \ - | $(objtree)/$$(dir %) +$(patsubst %,$(obj)/%.gen.S, $(fw-shipped-y)): %: $(wordsize_deps) $(call cmd,fwbin,$(patsubst %.gen.S,%,$@)) $(patsubst %,$(obj)/%.gen.S, $(fw-external-y)): %: $(wordsize_deps) \ - include/config/extra/firmware/dir.h | $(objtree)/$$(dir %) + include/config/extra/firmware/dir.h $(call cmd,fwbin,$(fwabs)/$(patsubst $(obj)/%.gen.S,%,$@)) # The .o files depend on the binaries directly; the .S files don't. @@ -207,7 +190,7 @@ $(patsubst %,$(obj)/%.gen.o, $(fw-external-y)): $(obj)/%.gen.o: $(fwdir)/% # .ihex is used just as a simple way to hold binary files in a source tree # where binaries are frowned upon. They are directly converted with objcopy. -$(obj)/%: $(obj)/%.ihex | $(objtree)/$(obj)/$$(dir %) +$(obj)/%: $(obj)/%.ihex $(call cmd,ihex) # Don't depend on ihex2fw if we're installing and it already exists. @@ -226,16 +209,13 @@ endif # is actually meaningful, because the firmware has to be loaded in a certain # order rather than as a single binary blob. Thus, we convert them into our # more compact binary representation of ihex records (<linux/ihex.h>) -$(obj)/%.fw: $(obj)/%.HEX $(ihex2fw_dep) | $(objtree)/$(obj)/$$(dir %) +$(obj)/%.fw: $(obj)/%.HEX $(ihex2fw_dep) $(call cmd,ihex2fw) # .H16 is our own modified form of Intel HEX, with 16-bit length for records. -$(obj)/%.fw: $(obj)/%.H16 $(ihex2fw_dep) | $(objtree)/$(obj)/$$(dir %) +$(obj)/%.fw: $(obj)/%.H16 $(ihex2fw_dep) $(call cmd,h16tofw) -$(firmware-dirs): - $(call cmd,mkdir) - obj-y += $(patsubst %,%.gen.o, $(fw-external-y)) obj-$(CONFIG_FIRMWARE_IN_KERNEL) += $(patsubst %,%.gen.o, $(fw-shipped-y)) @@ -477,7 +477,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) } EXPORT_SYMBOL(kiocb_set_cancel_fn); -static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb) +static int kiocb_cancel(struct kiocb *kiocb) { kiocb_cancel_fn *old, *cancel; @@ -538,7 +538,7 @@ static void free_ioctx_users(struct percpu_ref *ref) struct kiocb, ki_list); list_del_init(&req->ki_list); - kiocb_cancel(ctx, req); + kiocb_cancel(req); } spin_unlock_irq(&ctx->ctx_lock); @@ -727,42 +727,42 @@ err: * when the processes owning a context have all exited to encourage * the rapid destruction of the kioctx. */ -static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, +static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, struct completion *requests_done) { - if (!atomic_xchg(&ctx->dead, 1)) { - struct kioctx_table *table; + struct kioctx_table *table; - spin_lock(&mm->ioctx_lock); - rcu_read_lock(); - table = rcu_dereference(mm->ioctx_table); + if (atomic_xchg(&ctx->dead, 1)) + return -EINVAL; - WARN_ON(ctx != table->table[ctx->id]); - table->table[ctx->id] = NULL; - rcu_read_unlock(); - spin_unlock(&mm->ioctx_lock); - /* percpu_ref_kill() will do the necessary call_rcu() */ - wake_up_all(&ctx->wait); + spin_lock(&mm->ioctx_lock); + rcu_read_lock(); + table = rcu_dereference(mm->ioctx_table); - /* - * It'd be more correct to do this in free_ioctx(), after all - * the outstanding kiocbs have finished - but by then io_destroy - * has already returned, so io_setup() could potentially return - * -EAGAIN with no ioctxs actually in use (as far as userspace - * could tell). - */ - aio_nr_sub(ctx->max_reqs); + WARN_ON(ctx != table->table[ctx->id]); + table->table[ctx->id] = NULL; + rcu_read_unlock(); + spin_unlock(&mm->ioctx_lock); - if (ctx->mmap_size) - vm_munmap(ctx->mmap_base, ctx->mmap_size); + /* percpu_ref_kill() will do the necessary call_rcu() */ + wake_up_all(&ctx->wait); - ctx->requests_done = requests_done; - percpu_ref_kill(&ctx->users); - } else { - if (requests_done) - complete(requests_done); - } + /* + * It'd be more correct to do this in free_ioctx(), after all + * the outstanding kiocbs have finished - but by then io_destroy + * has already returned, so io_setup() could potentially return + * -EAGAIN with no ioctxs actually in use (as far as userspace + * could tell). + */ + aio_nr_sub(ctx->max_reqs); + + if (ctx->mmap_size) + vm_munmap(ctx->mmap_base, ctx->mmap_size); + + ctx->requests_done = requests_done; + percpu_ref_kill(&ctx->users); + return 0; } /* wait_on_sync_kiocb: @@ -1219,21 +1219,23 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) if (likely(NULL != ioctx)) { struct completion requests_done = COMPLETION_INITIALIZER_ONSTACK(requests_done); + int ret; /* Pass requests_done to kill_ioctx() where it can be set * in a thread-safe way. If we try to set it here then we have * a race condition if two io_destroy() called simultaneously. */ - kill_ioctx(current->mm, ioctx, &requests_done); + ret = kill_ioctx(current->mm, ioctx, &requests_done); percpu_ref_put(&ioctx->users); /* Wait until all IO for the context are done. Otherwise kernel * keep using user-space buffers even if user thinks the context * is destroyed. */ - wait_for_completion(&requests_done); + if (!ret) + wait_for_completion(&requests_done); - return 0; + return ret; } pr_debug("EINVAL: io_destroy: invalid context id\n"); return -EINVAL; @@ -1595,7 +1597,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, kiocb = lookup_kiocb(ctx, iocb, key); if (kiocb) - ret = kiocb_cancel(ctx, kiocb); + ret = kiocb_cancel(kiocb); else ret = -EINVAL; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f25a9092b94..a389820d158 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2354,7 +2354,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end) { int uptodate = (err == 0); struct extent_io_tree *tree; - int ret; + int ret = 0; tree = &BTRFS_I(page->mapping->host)->io_tree; @@ -5068,6 +5068,43 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, } } +int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv, + unsigned long start, + unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char __user *dst = (char __user *)dstv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + int ret = 0; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); + + while (len > 0) { + page = extent_buffer_page(eb, i); + + cur = min(len, (PAGE_CACHE_SIZE - offset)); + kaddr = page_address(page); + if (copy_to_user(dst, kaddr + offset, cur)) { + ret = -EFAULT; + break; + } + + dst += cur; + len -= cur; + offset = 0; + i++; + } + + return ret; +} + int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, unsigned long min_len, char **map, unsigned long *map_start, diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 8b63f2d4651..15ce5f2a2b6 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -304,6 +304,9 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, void read_extent_buffer(struct extent_buffer *eb, void *dst, unsigned long start, unsigned long len); +int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dst, + unsigned long start, + unsigned long len); void write_extent_buffer(struct extent_buffer *eb, const void *src, unsigned long start, unsigned long len); void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 82c18ba12e3..0d321c23069 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1957,7 +1957,8 @@ static noinline int copy_to_sk(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *key, struct btrfs_ioctl_search_key *sk, - char *buf, + size_t *buf_size, + char __user *ubuf, unsigned long *sk_offset, int *num_found) { @@ -1989,13 +1990,25 @@ static noinline int copy_to_sk(struct btrfs_root *root, if (!key_in_sk(key, sk)) continue; - if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE) + if (sizeof(sh) + item_len > *buf_size) { + if (*num_found) { + ret = 1; + goto out; + } + + /* + * return one empty item back for v1, which does not + * handle -EOVERFLOW + */ + + *buf_size = sizeof(sh) + item_len; item_len = 0; + ret = -EOVERFLOW; + } - if (sizeof(sh) + item_len + *sk_offset > - BTRFS_SEARCH_ARGS_BUFSIZE) { + if (sizeof(sh) + item_len + *sk_offset > *buf_size) { ret = 1; - goto overflow; + goto out; } sh.objectid = key->objectid; @@ -2005,20 +2018,33 @@ static noinline int copy_to_sk(struct btrfs_root *root, sh.transid = found_transid; /* copy search result header */ - memcpy(buf + *sk_offset, &sh, sizeof(sh)); + if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) { + ret = -EFAULT; + goto out; + } + *sk_offset += sizeof(sh); if (item_len) { - char *p = buf + *sk_offset; + char __user *up = ubuf + *sk_offset; /* copy the item */ - read_extent_buffer(leaf, p, - item_off, item_len); + if (read_extent_buffer_to_user(leaf, up, + item_off, item_len)) { + ret = -EFAULT; + goto out; + } + *sk_offset += item_len; } (*num_found)++; - if (*num_found >= sk->nr_items) - break; + if (ret) /* -EOVERFLOW from above */ + goto out; + + if (*num_found >= sk->nr_items) { + ret = 1; + goto out; + } } advance_key: ret = 0; @@ -2033,22 +2059,37 @@ advance_key: key->objectid++; } else ret = 1; -overflow: +out: + /* + * 0: all items from this leaf copied, continue with next + * 1: * more items can be copied, but unused buffer is too small + * * all items were found + * Either way, it will stops the loop which iterates to the next + * leaf + * -EOVERFLOW: item was to large for buffer + * -EFAULT: could not copy extent buffer back to userspace + */ return ret; } static noinline int search_ioctl(struct inode *inode, - struct btrfs_ioctl_search_args *args) + struct btrfs_ioctl_search_key *sk, + size_t *buf_size, + char __user *ubuf) { struct btrfs_root *root; struct btrfs_key key; struct btrfs_path *path; - struct btrfs_ioctl_search_key *sk = &args->key; struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info; int ret; int num_found = 0; unsigned long sk_offset = 0; + if (*buf_size < sizeof(struct btrfs_ioctl_search_header)) { + *buf_size = sizeof(struct btrfs_ioctl_search_header); + return -EOVERFLOW; + } + path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -2082,14 +2123,15 @@ static noinline int search_ioctl(struct inode *inode, ret = 0; goto err; } - ret = copy_to_sk(root, path, &key, sk, args->buf, + ret = copy_to_sk(root, path, &key, sk, buf_size, ubuf, &sk_offset, &num_found); btrfs_release_path(path); - if (ret || num_found >= sk->nr_items) + if (ret) break; } - ret = 0; + if (ret > 0) + ret = 0; err: sk->nr_items = num_found; btrfs_free_path(path); @@ -2099,22 +2141,73 @@ err: static noinline int btrfs_ioctl_tree_search(struct file *file, void __user *argp) { - struct btrfs_ioctl_search_args *args; - struct inode *inode; - int ret; + struct btrfs_ioctl_search_args __user *uargs; + struct btrfs_ioctl_search_key sk; + struct inode *inode; + int ret; + size_t buf_size; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - args = memdup_user(argp, sizeof(*args)); - if (IS_ERR(args)) - return PTR_ERR(args); + uargs = (struct btrfs_ioctl_search_args __user *)argp; + + if (copy_from_user(&sk, &uargs->key, sizeof(sk))) + return -EFAULT; + + buf_size = sizeof(uargs->buf); inode = file_inode(file); - ret = search_ioctl(inode, args); - if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) + ret = search_ioctl(inode, &sk, &buf_size, uargs->buf); + + /* + * In the origin implementation an overflow is handled by returning a + * search header with a len of zero, so reset ret. + */ + if (ret == -EOVERFLOW) + ret = 0; + + if (ret == 0 && copy_to_user(&uargs->key, &sk, sizeof(sk))) ret = -EFAULT; - kfree(args); + return ret; +} + +static noinline int btrfs_ioctl_tree_search_v2(struct file *file, + void __user *argp) +{ + struct btrfs_ioctl_search_args_v2 __user *uarg; + struct btrfs_ioctl_search_args_v2 args; + struct inode *inode; + int ret; + size_t buf_size; + const size_t buf_limit = 16 * 1024 * 1024; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + /* copy search header and buffer size */ + uarg = (struct btrfs_ioctl_search_args_v2 __user *)argp; + if (copy_from_user(&args, uarg, sizeof(args))) + return -EFAULT; + + buf_size = args.buf_size; + + if (buf_size < sizeof(struct btrfs_ioctl_search_header)) + return -EOVERFLOW; + + /* limit result size to 16MB */ + if (buf_size > buf_limit) + buf_size = buf_limit; + + inode = file_inode(file); + ret = search_ioctl(inode, &args.key, &buf_size, + (char *)(&uarg->buf[0])); + if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key))) + ret = -EFAULT; + else if (ret == -EOVERFLOW && + copy_to_user(&uarg->buf_size, &buf_size, sizeof(buf_size))) + ret = -EFAULT; + return ret; } @@ -5198,6 +5291,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_trans_end(file); case BTRFS_IOC_TREE_SEARCH: return btrfs_ioctl_tree_search(file, argp); + case BTRFS_IOC_TREE_SEARCH_V2: + return btrfs_ioctl_tree_search_v2(file, argp); case BTRFS_IOC_INO_LOOKUP: return btrfs_ioctl_ino_lookup(file, argp); case BTRFS_IOC_INO_PATHS: diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index cf5aead95a7..98cb6b2630f 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1798,8 +1798,10 @@ static int qgroup_shared_accounting(struct btrfs_trans_handle *trans, return -ENOMEM; tmp = ulist_alloc(GFP_NOFS); - if (!tmp) + if (!tmp) { + ulist_free(qgroups); return -ENOMEM; + } btrfs_get_tree_mod_seq(fs_info, &elem); ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq, diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 30947f92362..09230cf3a24 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -428,8 +428,13 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, continue; } if (!dev->bdev) { - /* cannot read ahead on missing device */ - continue; + /* + * cannot read ahead on missing device, but for RAID5/6, + * REQ_GET_READ_MIRRORS return 1. So don't skip missing + * device for such case. + */ + if (nzones > 1) + continue; } if (dev_replace_is_ongoing && dev == fs_info->dev_replace.tgtdev) { diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index a5dcacb5df9..9626252ee6b 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -135,7 +135,7 @@ restart: radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) { struct extent_buffer *eb; - eb = radix_tree_deref_slot(slot); + eb = radix_tree_deref_slot_protected(slot, &fs_info->buffer_lock); if (!eb) continue; /* Shouldn't happen but that kind of thinking creates CVE's */ diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index fa691b754aa..ec3dcb20235 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -415,6 +415,8 @@ int btrfs_test_qgroups(void) ret = -ENOMEM; goto out; } + btrfs_set_header_level(root->node, 0); + btrfs_set_header_nritems(root->node, 0); root->alloc_bytenr += 8192; tmp_root = btrfs_alloc_dummy_root(); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 9630f10f8e1..511839c04f1 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1284,11 +1284,13 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, goto fail; } - pending->error = btrfs_qgroup_inherit(trans, fs_info, - root->root_key.objectid, - objectid, pending->inherit); - if (pending->error) - goto no_free_objectid; + ret = btrfs_qgroup_inherit(trans, fs_info, + root->root_key.objectid, + objectid, pending->inherit); + if (ret) { + btrfs_abort_transaction(trans, root, ret); + goto fail; + } /* see comments in should_cow_block() */ set_bit(BTRFS_ROOT_FORCE_COW, &root->state); diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index 21887d63dad..469f2e8657e 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -104,12 +104,6 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type) umode_t new_mode = inode->i_mode, old_mode = inode->i_mode; struct dentry *dentry; - if (acl) { - ret = posix_acl_valid(acl); - if (ret < 0) - goto out; - } - switch (type) { case ACL_TYPE_ACCESS: name = POSIX_ACL_XATTR_ACCESS; diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 4f3f69079f3..90b3954d48e 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -211,18 +211,15 @@ static int readpage_nounlock(struct file *filp, struct page *page) SetPageError(page); ceph_fscache_readpage_cancel(inode, page); goto out; - } else { - if (err < PAGE_CACHE_SIZE) { - /* zero fill remainder of page */ - zero_user_segment(page, err, PAGE_CACHE_SIZE); - } else { - flush_dcache_page(page); - } } - SetPageUptodate(page); + if (err < PAGE_CACHE_SIZE) + /* zero fill remainder of page */ + zero_user_segment(page, err, PAGE_CACHE_SIZE); + else + flush_dcache_page(page); - if (err >= 0) - ceph_readpage_to_fscache(inode, page); + SetPageUptodate(page); + ceph_readpage_to_fscache(inode, page); out: return err < 0 ? err : 0; diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index c561b628ebc..1fde164b74b 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -221,8 +221,8 @@ int ceph_unreserve_caps(struct ceph_mds_client *mdsc, return 0; } -static struct ceph_cap *get_cap(struct ceph_mds_client *mdsc, - struct ceph_cap_reservation *ctx) +struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc, + struct ceph_cap_reservation *ctx) { struct ceph_cap *cap = NULL; @@ -508,15 +508,14 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap, * it is < 0. (This is so we can atomically add the cap and add an * open file reference to it.) */ -int ceph_add_cap(struct inode *inode, - struct ceph_mds_session *session, u64 cap_id, - int fmode, unsigned issued, unsigned wanted, - unsigned seq, unsigned mseq, u64 realmino, int flags, - struct ceph_cap_reservation *caps_reservation) +void ceph_add_cap(struct inode *inode, + struct ceph_mds_session *session, u64 cap_id, + int fmode, unsigned issued, unsigned wanted, + unsigned seq, unsigned mseq, u64 realmino, int flags, + struct ceph_cap **new_cap) { struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_cap *new_cap = NULL; struct ceph_cap *cap; int mds = session->s_mds; int actual_wanted; @@ -531,20 +530,10 @@ int ceph_add_cap(struct inode *inode, if (fmode >= 0) wanted |= ceph_caps_for_mode(fmode); -retry: - spin_lock(&ci->i_ceph_lock); cap = __get_cap_for_mds(ci, mds); if (!cap) { - if (new_cap) { - cap = new_cap; - new_cap = NULL; - } else { - spin_unlock(&ci->i_ceph_lock); - new_cap = get_cap(mdsc, caps_reservation); - if (new_cap == NULL) - return -ENOMEM; - goto retry; - } + cap = *new_cap; + *new_cap = NULL; cap->issued = 0; cap->implemented = 0; @@ -562,9 +551,6 @@ retry: session->s_nr_caps++; spin_unlock(&session->s_cap_lock); } else { - if (new_cap) - ceph_put_cap(mdsc, new_cap); - /* * auth mds of the inode changed. we received the cap export * message, but still haven't received the cap import message. @@ -626,7 +612,6 @@ retry: ci->i_auth_cap = cap; cap->mds_wanted = wanted; } - ci->i_cap_exporting_issued = 0; } else { WARN_ON(ci->i_auth_cap == cap); } @@ -648,9 +633,6 @@ retry: if (fmode >= 0) __ceph_get_fmode(ci, fmode); - spin_unlock(&ci->i_ceph_lock); - wake_up_all(&ci->i_cap_wq); - return 0; } /* @@ -685,7 +667,7 @@ static int __cap_is_valid(struct ceph_cap *cap) */ int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented) { - int have = ci->i_snap_caps | ci->i_cap_exporting_issued; + int have = ci->i_snap_caps; struct ceph_cap *cap; struct rb_node *p; @@ -900,7 +882,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci) */ static int __ceph_is_any_caps(struct ceph_inode_info *ci) { - return !RB_EMPTY_ROOT(&ci->i_caps) || ci->i_cap_exporting_issued; + return !RB_EMPTY_ROOT(&ci->i_caps); } int ceph_is_any_caps(struct inode *inode) @@ -2397,32 +2379,30 @@ static void invalidate_aliases(struct inode *inode) * actually be a revocation if it specifies a smaller cap set.) * * caller holds s_mutex and i_ceph_lock, we drop both. - * - * return value: - * 0 - ok - * 1 - check_caps on auth cap only (writeback) - * 2 - check_caps (ack revoke) */ -static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, +static void handle_cap_grant(struct ceph_mds_client *mdsc, + struct inode *inode, struct ceph_mds_caps *grant, + void *snaptrace, int snaptrace_len, + struct ceph_buffer *xattr_buf, struct ceph_mds_session *session, - struct ceph_cap *cap, - struct ceph_buffer *xattr_buf) - __releases(ci->i_ceph_lock) + struct ceph_cap *cap, int issued) + __releases(ci->i_ceph_lock) { struct ceph_inode_info *ci = ceph_inode(inode); int mds = session->s_mds; int seq = le32_to_cpu(grant->seq); int newcaps = le32_to_cpu(grant->caps); - int issued, implemented, used, wanted, dirty; + int used, wanted, dirty; u64 size = le64_to_cpu(grant->size); u64 max_size = le64_to_cpu(grant->max_size); struct timespec mtime, atime, ctime; int check_caps = 0; - int wake = 0; - int writeback = 0; - int queue_invalidate = 0; - int deleted_inode = 0; - int queue_revalidate = 0; + bool wake = 0; + bool writeback = 0; + bool queue_trunc = 0; + bool queue_invalidate = 0; + bool queue_revalidate = 0; + bool deleted_inode = 0; dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", inode, cap, mds, seq, ceph_cap_string(newcaps)); @@ -2466,16 +2446,13 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, } /* side effects now are allowed */ - - issued = __ceph_caps_issued(ci, &implemented); - issued |= implemented | __ceph_caps_dirty(ci); - cap->cap_gen = session->s_cap_gen; cap->seq = seq; __check_cap_issue(ci, cap, newcaps); - if ((issued & CEPH_CAP_AUTH_EXCL) == 0) { + if ((newcaps & CEPH_CAP_AUTH_SHARED) && + (issued & CEPH_CAP_AUTH_EXCL) == 0) { inode->i_mode = le32_to_cpu(grant->mode); inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid)); inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid)); @@ -2484,7 +2461,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, from_kgid(&init_user_ns, inode->i_gid)); } - if ((issued & CEPH_CAP_LINK_EXCL) == 0) { + if ((newcaps & CEPH_CAP_AUTH_SHARED) && + (issued & CEPH_CAP_LINK_EXCL) == 0) { set_nlink(inode, le32_to_cpu(grant->nlink)); if (inode->i_nlink == 0 && (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL))) @@ -2511,30 +2489,35 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1) queue_revalidate = 1; - /* size/ctime/mtime/atime? */ - ceph_fill_file_size(inode, issued, - le32_to_cpu(grant->truncate_seq), - le64_to_cpu(grant->truncate_size), size); - ceph_decode_timespec(&mtime, &grant->mtime); - ceph_decode_timespec(&atime, &grant->atime); - ceph_decode_timespec(&ctime, &grant->ctime); - ceph_fill_file_time(inode, issued, - le32_to_cpu(grant->time_warp_seq), &ctime, &mtime, - &atime); - - - /* file layout may have changed */ - ci->i_layout = grant->layout; - - /* max size increase? */ - if (ci->i_auth_cap == cap && max_size != ci->i_max_size) { - dout("max_size %lld -> %llu\n", ci->i_max_size, max_size); - ci->i_max_size = max_size; - if (max_size >= ci->i_wanted_max_size) { - ci->i_wanted_max_size = 0; /* reset */ - ci->i_requested_max_size = 0; + if (newcaps & CEPH_CAP_ANY_RD) { + /* ctime/mtime/atime? */ + ceph_decode_timespec(&mtime, &grant->mtime); + ceph_decode_timespec(&atime, &grant->atime); + ceph_decode_timespec(&ctime, &grant->ctime); + ceph_fill_file_time(inode, issued, + le32_to_cpu(grant->time_warp_seq), + &ctime, &mtime, &atime); + } + + if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) { + /* file layout may have changed */ + ci->i_layout = grant->layout; + /* size/truncate_seq? */ + queue_trunc = ceph_fill_file_size(inode, issued, + le32_to_cpu(grant->truncate_seq), + le64_to_cpu(grant->truncate_size), + size); + /* max size increase? */ + if (ci->i_auth_cap == cap && max_size != ci->i_max_size) { + dout("max_size %lld -> %llu\n", + ci->i_max_size, max_size); + ci->i_max_size = max_size; + if (max_size >= ci->i_wanted_max_size) { + ci->i_wanted_max_size = 0; /* reset */ + ci->i_requested_max_size = 0; + } + wake = 1; } - wake = 1; } /* check cap bits */ @@ -2595,6 +2578,23 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, spin_unlock(&ci->i_ceph_lock); + if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) { + down_write(&mdsc->snap_rwsem); + ceph_update_snap_trace(mdsc, snaptrace, + snaptrace + snaptrace_len, false); + downgrade_write(&mdsc->snap_rwsem); + kick_flushing_inode_caps(mdsc, session, inode); + up_read(&mdsc->snap_rwsem); + if (newcaps & ~issued) + wake = 1; + } + + if (queue_trunc) { + ceph_queue_vmtruncate(inode); + ceph_queue_revalidate(inode); + } else if (queue_revalidate) + ceph_queue_revalidate(inode); + if (writeback) /* * queue inode for writeback: we can't actually call @@ -2606,8 +2606,6 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, ceph_queue_invalidate(inode); if (deleted_inode) invalidate_aliases(inode); - if (queue_revalidate) - ceph_queue_revalidate(inode); if (wake) wake_up_all(&ci->i_cap_wq); @@ -2784,7 +2782,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, { struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; struct ceph_mds_session *tsession = NULL; - struct ceph_cap *cap, *tcap; + struct ceph_cap *cap, *tcap, *new_cap = NULL; struct ceph_inode_info *ci = ceph_inode(inode); u64 t_cap_id; unsigned mseq = le32_to_cpu(ex->migrate_seq); @@ -2807,7 +2805,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, retry: spin_lock(&ci->i_ceph_lock); cap = __get_cap_for_mds(ci, mds); - if (!cap) + if (!cap || cap->cap_id != le64_to_cpu(ex->cap_id)) goto out_unlock; if (target < 0) { @@ -2846,15 +2844,14 @@ retry: } __ceph_remove_cap(cap, false); goto out_unlock; - } - - if (tsession) { - int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0; - spin_unlock(&ci->i_ceph_lock); + } else if (tsession) { /* add placeholder for the export tagert */ + int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0; ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0, - t_seq - 1, t_mseq, (u64)-1, flag, NULL); - goto retry; + t_seq - 1, t_mseq, (u64)-1, flag, &new_cap); + + __ceph_remove_cap(cap, false); + goto out_unlock; } spin_unlock(&ci->i_ceph_lock); @@ -2873,6 +2870,7 @@ retry: SINGLE_DEPTH_NESTING); } ceph_add_cap_releases(mdsc, tsession); + new_cap = ceph_get_cap(mdsc, NULL); } else { WARN_ON(1); tsession = NULL; @@ -2887,24 +2885,27 @@ out_unlock: mutex_unlock(&tsession->s_mutex); ceph_put_mds_session(tsession); } + if (new_cap) + ceph_put_cap(mdsc, new_cap); } /* - * Handle cap IMPORT. If there are temp bits from an older EXPORT, - * clean them up. + * Handle cap IMPORT. * - * caller holds s_mutex. + * caller holds s_mutex. acquires i_ceph_lock */ static void handle_cap_import(struct ceph_mds_client *mdsc, struct inode *inode, struct ceph_mds_caps *im, struct ceph_mds_cap_peer *ph, struct ceph_mds_session *session, - void *snaptrace, int snaptrace_len) + struct ceph_cap **target_cap, int *old_issued) + __acquires(ci->i_ceph_lock) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_cap *cap; + struct ceph_cap *cap, *ocap, *new_cap = NULL; int mds = session->s_mds; - unsigned issued = le32_to_cpu(im->caps); + int issued; + unsigned caps = le32_to_cpu(im->caps); unsigned wanted = le32_to_cpu(im->wanted); unsigned seq = le32_to_cpu(im->seq); unsigned mseq = le32_to_cpu(im->migrate_seq); @@ -2924,40 +2925,52 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, dout("handle_cap_import inode %p ci %p mds%d mseq %d peer %d\n", inode, ci, mds, mseq, peer); +retry: spin_lock(&ci->i_ceph_lock); - cap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL; - if (cap && cap->cap_id == p_cap_id) { + cap = __get_cap_for_mds(ci, mds); + if (!cap) { + if (!new_cap) { + spin_unlock(&ci->i_ceph_lock); + new_cap = ceph_get_cap(mdsc, NULL); + goto retry; + } + cap = new_cap; + } else { + if (new_cap) { + ceph_put_cap(mdsc, new_cap); + new_cap = NULL; + } + } + + __ceph_caps_issued(ci, &issued); + issued |= __ceph_caps_dirty(ci); + + ceph_add_cap(inode, session, cap_id, -1, caps, wanted, seq, mseq, + realmino, CEPH_CAP_FLAG_AUTH, &new_cap); + + ocap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL; + if (ocap && ocap->cap_id == p_cap_id) { dout(" remove export cap %p mds%d flags %d\n", - cap, peer, ph->flags); + ocap, peer, ph->flags); if ((ph->flags & CEPH_CAP_FLAG_AUTH) && - (cap->seq != le32_to_cpu(ph->seq) || - cap->mseq != le32_to_cpu(ph->mseq))) { + (ocap->seq != le32_to_cpu(ph->seq) || + ocap->mseq != le32_to_cpu(ph->mseq))) { pr_err("handle_cap_import: mismatched seq/mseq: " "ino (%llx.%llx) mds%d seq %d mseq %d " "importer mds%d has peer seq %d mseq %d\n", - ceph_vinop(inode), peer, cap->seq, - cap->mseq, mds, le32_to_cpu(ph->seq), + ceph_vinop(inode), peer, ocap->seq, + ocap->mseq, mds, le32_to_cpu(ph->seq), le32_to_cpu(ph->mseq)); } - ci->i_cap_exporting_issued = cap->issued; - __ceph_remove_cap(cap, (ph->flags & CEPH_CAP_FLAG_RELEASE)); + __ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE)); } /* make sure we re-request max_size, if necessary */ ci->i_wanted_max_size = 0; ci->i_requested_max_size = 0; - spin_unlock(&ci->i_ceph_lock); - - down_write(&mdsc->snap_rwsem); - ceph_update_snap_trace(mdsc, snaptrace, snaptrace+snaptrace_len, - false); - downgrade_write(&mdsc->snap_rwsem); - ceph_add_cap(inode, session, cap_id, -1, - issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH, - NULL /* no caps context */); - kick_flushing_inode_caps(mdsc, session, inode); - up_read(&mdsc->snap_rwsem); + *old_issued = issued; + *target_cap = cap; } /* @@ -2977,7 +2990,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, struct ceph_mds_caps *h; struct ceph_mds_cap_peer *peer = NULL; int mds = session->s_mds; - int op; + int op, issued; u32 seq, mseq; struct ceph_vino vino; u64 cap_id; @@ -3069,7 +3082,10 @@ void ceph_handle_caps(struct ceph_mds_session *session, case CEPH_CAP_OP_IMPORT: handle_cap_import(mdsc, inode, h, peer, session, - snaptrace, snaptrace_len); + &cap, &issued); + handle_cap_grant(mdsc, inode, h, snaptrace, snaptrace_len, + msg->middle, session, cap, issued); + goto done_unlocked; } /* the rest require a cap */ @@ -3086,8 +3102,10 @@ void ceph_handle_caps(struct ceph_mds_session *session, switch (op) { case CEPH_CAP_OP_REVOKE: case CEPH_CAP_OP_GRANT: - case CEPH_CAP_OP_IMPORT: - handle_cap_grant(inode, h, session, cap, msg->middle); + __ceph_caps_issued(ci, &issued); + issued |= __ceph_caps_dirty(ci); + handle_cap_grant(mdsc, inode, h, NULL, 0, msg->middle, + session, cap, issued); goto done_unlocked; case CEPH_CAP_OP_FLUSH_ACK: diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 00d6af6a32e..8d7d782f438 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -169,7 +169,7 @@ static struct dentry *__get_parent(struct super_block *sb, return dentry; } -struct dentry *ceph_get_parent(struct dentry *child) +static struct dentry *ceph_get_parent(struct dentry *child) { /* don't re-export snaps */ if (ceph_snap(child->d_inode) != CEPH_NOSNAP) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index e4fff9ff1c2..04c89c266ce 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -10,6 +10,7 @@ #include <linux/writeback.h> #include <linux/vmalloc.h> #include <linux/posix_acl.h> +#include <linux/random.h> #include "super.h" #include "mds_client.h" @@ -179,9 +180,8 @@ struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci, u32 f) * specified, copy the frag delegation info to the caller if * it is present. */ -u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, - struct ceph_inode_frag *pfrag, - int *found) +static u32 __ceph_choose_frag(struct ceph_inode_info *ci, u32 v, + struct ceph_inode_frag *pfrag, int *found) { u32 t = ceph_frag_make(0, 0); struct ceph_inode_frag *frag; @@ -191,7 +191,6 @@ u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, if (found) *found = 0; - mutex_lock(&ci->i_fragtree_mutex); while (1) { WARN_ON(!ceph_frag_contains_value(t, v)); frag = __ceph_find_frag(ci, t); @@ -220,10 +219,19 @@ u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, } dout("choose_frag(%x) = %x\n", v, t); - mutex_unlock(&ci->i_fragtree_mutex); return t; } +u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, + struct ceph_inode_frag *pfrag, int *found) +{ + u32 ret; + mutex_lock(&ci->i_fragtree_mutex); + ret = __ceph_choose_frag(ci, v, pfrag, found); + mutex_unlock(&ci->i_fragtree_mutex); + return ret; +} + /* * Process dirfrag (delegation) info from the mds. Include leaf * fragment in tree ONLY if ndist > 0. Otherwise, only @@ -237,11 +245,17 @@ static int ceph_fill_dirfrag(struct inode *inode, u32 id = le32_to_cpu(dirinfo->frag); int mds = le32_to_cpu(dirinfo->auth); int ndist = le32_to_cpu(dirinfo->ndist); + int diri_auth = -1; int i; int err = 0; + spin_lock(&ci->i_ceph_lock); + if (ci->i_auth_cap) + diri_auth = ci->i_auth_cap->mds; + spin_unlock(&ci->i_ceph_lock); + mutex_lock(&ci->i_fragtree_mutex); - if (ndist == 0) { + if (ndist == 0 && mds == diri_auth) { /* no delegation info needed. */ frag = __ceph_find_frag(ci, id); if (!frag) @@ -286,6 +300,75 @@ out: return err; } +static int ceph_fill_fragtree(struct inode *inode, + struct ceph_frag_tree_head *fragtree, + struct ceph_mds_reply_dirfrag *dirinfo) +{ + struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_inode_frag *frag; + struct rb_node *rb_node; + int i; + u32 id, nsplits; + bool update = false; + + mutex_lock(&ci->i_fragtree_mutex); + nsplits = le32_to_cpu(fragtree->nsplits); + if (nsplits) { + i = prandom_u32() % nsplits; + id = le32_to_cpu(fragtree->splits[i].frag); + if (!__ceph_find_frag(ci, id)) + update = true; + } else if (!RB_EMPTY_ROOT(&ci->i_fragtree)) { + rb_node = rb_first(&ci->i_fragtree); + frag = rb_entry(rb_node, struct ceph_inode_frag, node); + if (frag->frag != ceph_frag_make(0, 0) || rb_next(rb_node)) + update = true; + } + if (!update && dirinfo) { + id = le32_to_cpu(dirinfo->frag); + if (id != __ceph_choose_frag(ci, id, NULL, NULL)) + update = true; + } + if (!update) + goto out_unlock; + + dout("fill_fragtree %llx.%llx\n", ceph_vinop(inode)); + rb_node = rb_first(&ci->i_fragtree); + for (i = 0; i < nsplits; i++) { + id = le32_to_cpu(fragtree->splits[i].frag); + frag = NULL; + while (rb_node) { + frag = rb_entry(rb_node, struct ceph_inode_frag, node); + if (ceph_frag_compare(frag->frag, id) >= 0) { + if (frag->frag != id) + frag = NULL; + else + rb_node = rb_next(rb_node); + break; + } + rb_node = rb_next(rb_node); + rb_erase(&frag->node, &ci->i_fragtree); + kfree(frag); + frag = NULL; + } + if (!frag) { + frag = __get_or_create_frag(ci, id); + if (IS_ERR(frag)) + continue; + } + frag->split_by = le32_to_cpu(fragtree->splits[i].by); + dout(" frag %x split by %d\n", frag->frag, frag->split_by); + } + while (rb_node) { + frag = rb_entry(rb_node, struct ceph_inode_frag, node); + rb_node = rb_next(rb_node); + rb_erase(&frag->node, &ci->i_fragtree); + kfree(frag); + } +out_unlock: + mutex_unlock(&ci->i_fragtree_mutex); + return 0; +} /* * initialize a newly allocated inode. @@ -341,7 +424,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb) INIT_LIST_HEAD(&ci->i_cap_snaps); ci->i_head_snapc = NULL; ci->i_snap_caps = 0; - ci->i_cap_exporting_issued = 0; for (i = 0; i < CEPH_FILE_MODE_NUM; i++) ci->i_nr_by_mode[i] = 0; @@ -407,7 +489,7 @@ void ceph_destroy_inode(struct inode *inode) /* * we may still have a snap_realm reference if there are stray - * caps in i_cap_exporting_issued or i_snap_caps. + * caps in i_snap_caps. */ if (ci->i_snap_realm) { struct ceph_mds_client *mdsc = @@ -582,22 +664,26 @@ static int fill_inode(struct inode *inode, unsigned long ttl_from, int cap_fmode, struct ceph_cap_reservation *caps_reservation) { + struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; struct ceph_mds_reply_inode *info = iinfo->in; struct ceph_inode_info *ci = ceph_inode(inode); - int i; - int issued = 0, implemented; + int issued = 0, implemented, new_issued; struct timespec mtime, atime, ctime; - u32 nsplits; - struct ceph_inode_frag *frag; - struct rb_node *rb_node; struct ceph_buffer *xattr_blob = NULL; + struct ceph_cap *new_cap = NULL; int err = 0; - int queue_trunc = 0; + bool wake = false; + bool queue_trunc = false; + bool new_version = false; dout("fill_inode %p ino %llx.%llx v %llu had %llu\n", inode, ceph_vinop(inode), le64_to_cpu(info->version), ci->i_version); + /* prealloc new cap struct */ + if (info->cap.caps && ceph_snap(inode) == CEPH_NOSNAP) + new_cap = ceph_get_cap(mdsc, caps_reservation); + /* * prealloc xattr data, if it looks like we'll need it. only * if len > 4 (meaning there are actually xattrs; the first 4 @@ -623,19 +709,23 @@ static int fill_inode(struct inode *inode, * 3 2 skip * 3 3 update */ - if (le64_to_cpu(info->version) > 0 && - (ci->i_version & ~1) >= le64_to_cpu(info->version)) - goto no_change; - + if (ci->i_version == 0 || + ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && + le64_to_cpu(info->version) > (ci->i_version & ~1))) + new_version = true; + issued = __ceph_caps_issued(ci, &implemented); issued |= implemented | __ceph_caps_dirty(ci); + new_issued = ~issued & le32_to_cpu(info->cap.caps); /* update inode */ ci->i_version = le64_to_cpu(info->version); inode->i_version++; inode->i_rdev = le32_to_cpu(info->rdev); + inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; - if ((issued & CEPH_CAP_AUTH_EXCL) == 0) { + if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) && + (issued & CEPH_CAP_AUTH_EXCL) == 0) { inode->i_mode = le32_to_cpu(info->mode); inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid)); inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid)); @@ -644,23 +734,35 @@ static int fill_inode(struct inode *inode, from_kgid(&init_user_ns, inode->i_gid)); } - if ((issued & CEPH_CAP_LINK_EXCL) == 0) + if ((new_version || (new_issued & CEPH_CAP_LINK_SHARED)) && + (issued & CEPH_CAP_LINK_EXCL) == 0) set_nlink(inode, le32_to_cpu(info->nlink)); - /* be careful with mtime, atime, size */ - ceph_decode_timespec(&atime, &info->atime); - ceph_decode_timespec(&mtime, &info->mtime); - ceph_decode_timespec(&ctime, &info->ctime); - queue_trunc = ceph_fill_file_size(inode, issued, - le32_to_cpu(info->truncate_seq), - le64_to_cpu(info->truncate_size), - le64_to_cpu(info->size)); - ceph_fill_file_time(inode, issued, - le32_to_cpu(info->time_warp_seq), - &ctime, &mtime, &atime); - - ci->i_layout = info->layout; - inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; + if (new_version || (new_issued & CEPH_CAP_ANY_RD)) { + /* be careful with mtime, atime, size */ + ceph_decode_timespec(&atime, &info->atime); + ceph_decode_timespec(&mtime, &info->mtime); + ceph_decode_timespec(&ctime, &info->ctime); + ceph_fill_file_time(inode, issued, + le32_to_cpu(info->time_warp_seq), + &ctime, &mtime, &atime); + } + + if (new_version || + (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) { + ci->i_layout = info->layout; + queue_trunc = ceph_fill_file_size(inode, issued, + le32_to_cpu(info->truncate_seq), + le64_to_cpu(info->truncate_size), + le64_to_cpu(info->size)); + /* only update max_size on auth cap */ + if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && + ci->i_max_size != le64_to_cpu(info->max_size)) { + dout("max_size %lld -> %llu\n", ci->i_max_size, + le64_to_cpu(info->max_size)); + ci->i_max_size = le64_to_cpu(info->max_size); + } + } /* xattrs */ /* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */ @@ -745,58 +847,6 @@ static int fill_inode(struct inode *inode, dout(" marking %p complete (empty)\n", inode); __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count)); } -no_change: - /* only update max_size on auth cap */ - if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && - ci->i_max_size != le64_to_cpu(info->max_size)) { - dout("max_size %lld -> %llu\n", ci->i_max_size, - le64_to_cpu(info->max_size)); - ci->i_max_size = le64_to_cpu(info->max_size); - } - - spin_unlock(&ci->i_ceph_lock); - - /* queue truncate if we saw i_size decrease */ - if (queue_trunc) - ceph_queue_vmtruncate(inode); - - /* populate frag tree */ - /* FIXME: move me up, if/when version reflects fragtree changes */ - nsplits = le32_to_cpu(info->fragtree.nsplits); - mutex_lock(&ci->i_fragtree_mutex); - rb_node = rb_first(&ci->i_fragtree); - for (i = 0; i < nsplits; i++) { - u32 id = le32_to_cpu(info->fragtree.splits[i].frag); - frag = NULL; - while (rb_node) { - frag = rb_entry(rb_node, struct ceph_inode_frag, node); - if (ceph_frag_compare(frag->frag, id) >= 0) { - if (frag->frag != id) - frag = NULL; - else - rb_node = rb_next(rb_node); - break; - } - rb_node = rb_next(rb_node); - rb_erase(&frag->node, &ci->i_fragtree); - kfree(frag); - frag = NULL; - } - if (!frag) { - frag = __get_or_create_frag(ci, id); - if (IS_ERR(frag)) - continue; - } - frag->split_by = le32_to_cpu(info->fragtree.splits[i].by); - dout(" frag %x split by %d\n", frag->frag, frag->split_by); - } - while (rb_node) { - frag = rb_entry(rb_node, struct ceph_inode_frag, node); - rb_node = rb_next(rb_node); - rb_erase(&frag->node, &ci->i_fragtree); - kfree(frag); - } - mutex_unlock(&ci->i_fragtree_mutex); /* were we issued a capability? */ if (info->cap.caps) { @@ -809,30 +859,41 @@ no_change: le32_to_cpu(info->cap.seq), le32_to_cpu(info->cap.mseq), le64_to_cpu(info->cap.realm), - info->cap.flags, - caps_reservation); + info->cap.flags, &new_cap); + wake = true; } else { - spin_lock(&ci->i_ceph_lock); dout(" %p got snap_caps %s\n", inode, ceph_cap_string(le32_to_cpu(info->cap.caps))); ci->i_snap_caps |= le32_to_cpu(info->cap.caps); if (cap_fmode >= 0) __ceph_get_fmode(ci, cap_fmode); - spin_unlock(&ci->i_ceph_lock); } } else if (cap_fmode >= 0) { pr_warn("mds issued no caps on %llx.%llx\n", ceph_vinop(inode)); __ceph_get_fmode(ci, cap_fmode); } + spin_unlock(&ci->i_ceph_lock); + + if (wake) + wake_up_all(&ci->i_cap_wq); + + /* queue truncate if we saw i_size decrease */ + if (queue_trunc) + ceph_queue_vmtruncate(inode); + + /* populate frag tree */ + if (S_ISDIR(inode->i_mode)) + ceph_fill_fragtree(inode, &info->fragtree, dirinfo); /* update delegation info? */ if (dirinfo) ceph_fill_dirfrag(inode, dirinfo); err = 0; - out: + if (new_cap) + ceph_put_cap(mdsc, new_cap); if (xattr_blob) ceph_buffer_put(xattr_blob); return err; @@ -1485,7 +1546,7 @@ static void ceph_invalidate_work(struct work_struct *work) orig_gen = ci->i_rdcache_gen; spin_unlock(&ci->i_ceph_lock); - truncate_inode_pages(inode->i_mapping, 0); + truncate_pagecache(inode, 0); spin_lock(&ci->i_ceph_lock); if (orig_gen == ci->i_rdcache_gen && @@ -1588,7 +1649,7 @@ retry: ci->i_truncate_pending, to); spin_unlock(&ci->i_ceph_lock); - truncate_inode_pages(inode->i_mapping, to); + truncate_pagecache(inode, to); spin_lock(&ci->i_ceph_lock); if (to == ci->i_truncate_size) { diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 9a33b98cb00..92a2548278f 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1558,6 +1558,8 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) init_completion(&req->r_safe_completion); INIT_LIST_HEAD(&req->r_unsafe_item); + req->r_stamp = CURRENT_TIME; + req->r_op = op; req->r_direct_mode = mode; return req; @@ -1783,7 +1785,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, } len = sizeof(*head) + - pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)); + pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) + + sizeof(struct timespec); /* calculate (max) length for cap releases */ len += sizeof(struct ceph_mds_request_release) * @@ -1800,6 +1803,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, goto out_free2; } + msg->hdr.version = 2; msg->hdr.tid = cpu_to_le64(req->r_tid); head = msg->front.iov_base; @@ -1836,6 +1840,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, mds, req->r_old_inode_drop, req->r_old_inode_unless, 0); head->num_releases = cpu_to_le16(releases); + /* time stamp */ + ceph_encode_copy(&p, &req->r_stamp, sizeof(req->r_stamp)); + BUG_ON(p > end); msg->front.iov_len = p - msg->front.iov_base; msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index e90cfccf93b..e00737cf523 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -194,6 +194,7 @@ struct ceph_mds_request { int r_fmode; /* file mode, if expecting cap */ kuid_t r_uid; kgid_t r_gid; + struct timespec r_stamp; /* for choosing which mds to send this request to */ int r_direct_mode; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index ead05cc1f44..12b20744e38 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -292,7 +292,6 @@ struct ceph_inode_info { struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or dirty|flushing caps */ unsigned i_snap_caps; /* cap bits for snapped files */ - unsigned i_cap_exporting_issued; int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */ @@ -775,11 +774,13 @@ static inline void ceph_forget_all_cached_acls(struct inode *inode) extern const char *ceph_cap_string(int c); extern void ceph_handle_caps(struct ceph_mds_session *session, struct ceph_msg *msg); -extern int ceph_add_cap(struct inode *inode, - struct ceph_mds_session *session, u64 cap_id, - int fmode, unsigned issued, unsigned wanted, - unsigned cap, unsigned seq, u64 realmino, int flags, - struct ceph_cap_reservation *caps_reservation); +extern struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc, + struct ceph_cap_reservation *ctx); +extern void ceph_add_cap(struct inode *inode, + struct ceph_mds_session *session, u64 cap_id, + int fmode, unsigned issued, unsigned wanted, + unsigned cap, unsigned seq, u64 realmino, int flags, + struct ceph_cap **new_cap); extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release); extern void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap); diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 1e5b4535950..d08e079ea5d 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -617,6 +617,11 @@ static void retry_failed_sctp_send(struct connection *recv_con, int nodeid = sn_send_failed->ssf_info.sinfo_ppid; log_print("Retry sending %d bytes to node id %d", len, nodeid); + + if (!nodeid) { + log_print("Shouldn't resend data via listening connection."); + return; + } con = nodeid2con(nodeid, 0); if (!con) { diff --git a/fs/exec.c b/fs/exec.c index 238b7aa26f6..a3d33fe592d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1046,13 +1046,13 @@ EXPORT_SYMBOL_GPL(get_task_comm); * so that a new one can be started */ -void set_task_comm(struct task_struct *tsk, const char *buf) +void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec) { task_lock(tsk); trace_task_rename(tsk, buf); strlcpy(tsk->comm, buf, sizeof(tsk->comm)); task_unlock(tsk); - perf_event_comm(tsk); + perf_event_comm(tsk, exec); } int flush_old_exec(struct linux_binprm * bprm) @@ -1110,7 +1110,8 @@ void setup_new_exec(struct linux_binprm * bprm) else set_dumpable(current->mm, suid_dumpable); - set_task_comm(current, kbasename(bprm->filename)); + perf_event_exec(); + __set_task_comm(current, kbasename(bprm->filename), true); /* Set the new mm task size. We have to do that late because it may * depend on TIF_32BIT which is only updated in flush_thread() on diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h new file mode 100644 index 00000000000..6383d54bf98 --- /dev/null +++ b/include/asm-generic/qrwlock.h @@ -0,0 +1,166 @@ +/* + * Queue read/write lock + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P. + * + * Authors: Waiman Long <waiman.long@hp.com> + */ +#ifndef __ASM_GENERIC_QRWLOCK_H +#define __ASM_GENERIC_QRWLOCK_H + +#include <linux/atomic.h> +#include <asm/barrier.h> +#include <asm/processor.h> + +#include <asm-generic/qrwlock_types.h> + +/* + * Writer states & reader shift and bias + */ +#define _QW_WAITING 1 /* A writer is waiting */ +#define _QW_LOCKED 0xff /* A writer holds the lock */ +#define _QW_WMASK 0xff /* Writer mask */ +#define _QR_SHIFT 8 /* Reader count shift */ +#define _QR_BIAS (1U << _QR_SHIFT) + +/* + * External function declarations + */ +extern void queue_read_lock_slowpath(struct qrwlock *lock); +extern void queue_write_lock_slowpath(struct qrwlock *lock); + +/** + * queue_read_can_lock- would read_trylock() succeed? + * @lock: Pointer to queue rwlock structure + */ +static inline int queue_read_can_lock(struct qrwlock *lock) +{ + return !(atomic_read(&lock->cnts) & _QW_WMASK); +} + +/** + * queue_write_can_lock- would write_trylock() succeed? + * @lock: Pointer to queue rwlock structure + */ +static inline int queue_write_can_lock(struct qrwlock *lock) +{ + return !atomic_read(&lock->cnts); +} + +/** + * queue_read_trylock - try to acquire read lock of a queue rwlock + * @lock : Pointer to queue rwlock structure + * Return: 1 if lock acquired, 0 if failed + */ +static inline int queue_read_trylock(struct qrwlock *lock) +{ + u32 cnts; + + cnts = atomic_read(&lock->cnts); + if (likely(!(cnts & _QW_WMASK))) { + cnts = (u32)atomic_add_return(_QR_BIAS, &lock->cnts); + if (likely(!(cnts & _QW_WMASK))) + return 1; + atomic_sub(_QR_BIAS, &lock->cnts); + } + return 0; +} + +/** + * queue_write_trylock - try to acquire write lock of a queue rwlock + * @lock : Pointer to queue rwlock structure + * Return: 1 if lock acquired, 0 if failed + */ +static inline int queue_write_trylock(struct qrwlock *lock) +{ + u32 cnts; + + cnts = atomic_read(&lock->cnts); + if (unlikely(cnts)) + return 0; + + return likely(atomic_cmpxchg(&lock->cnts, + cnts, cnts | _QW_LOCKED) == cnts); +} +/** + * queue_read_lock - acquire read lock of a queue rwlock + * @lock: Pointer to queue rwlock structure + */ +static inline void queue_read_lock(struct qrwlock *lock) +{ + u32 cnts; + + cnts = atomic_add_return(_QR_BIAS, &lock->cnts); + if (likely(!(cnts & _QW_WMASK))) + return; + + /* The slowpath will decrement the reader count, if necessary. */ + queue_read_lock_slowpath(lock); +} + +/** + * queue_write_lock - acquire write lock of a queue rwlock + * @lock : Pointer to queue rwlock structure + */ +static inline void queue_write_lock(struct qrwlock *lock) +{ + /* Optimize for the unfair lock case where the fair flag is 0. */ + if (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0) + return; + + queue_write_lock_slowpath(lock); +} + +/** + * queue_read_unlock - release read lock of a queue rwlock + * @lock : Pointer to queue rwlock structure + */ +static inline void queue_read_unlock(struct qrwlock *lock) +{ + /* + * Atomically decrement the reader count + */ + smp_mb__before_atomic(); + atomic_sub(_QR_BIAS, &lock->cnts); +} + +#ifndef queue_write_unlock +/** + * queue_write_unlock - release write lock of a queue rwlock + * @lock : Pointer to queue rwlock structure + */ +static inline void queue_write_unlock(struct qrwlock *lock) +{ + /* + * If the writer field is atomic, it can be cleared directly. + * Otherwise, an atomic subtraction will be used to clear it. + */ + smp_mb__before_atomic(); + atomic_sub(_QW_LOCKED, &lock->cnts); +} +#endif + +/* + * Remapping rwlock architecture specific functions to the corresponding + * queue rwlock functions. + */ +#define arch_read_can_lock(l) queue_read_can_lock(l) +#define arch_write_can_lock(l) queue_write_can_lock(l) +#define arch_read_lock(l) queue_read_lock(l) +#define arch_write_lock(l) queue_write_lock(l) +#define arch_read_trylock(l) queue_read_trylock(l) +#define arch_write_trylock(l) queue_write_trylock(l) +#define arch_read_unlock(l) queue_read_unlock(l) +#define arch_write_unlock(l) queue_write_unlock(l) + +#endif /* __ASM_GENERIC_QRWLOCK_H */ diff --git a/include/asm-generic/qrwlock_types.h b/include/asm-generic/qrwlock_types.h new file mode 100644 index 00000000000..4d76f24df51 --- /dev/null +++ b/include/asm-generic/qrwlock_types.h @@ -0,0 +1,21 @@ +#ifndef __ASM_GENERIC_QRWLOCK_TYPES_H +#define __ASM_GENERIC_QRWLOCK_TYPES_H + +#include <linux/types.h> +#include <asm/spinlock_types.h> + +/* + * The queue read/write lock data structure + */ + +typedef struct qrwlock { + atomic_t cnts; + arch_spinlock_t lock; +} arch_rwlock_t; + +#define __ARCH_RW_LOCK_UNLOCKED { \ + .cnts = ATOMIC_INIT(0), \ + .lock = __ARCH_SPIN_LOCK_UNLOCKED, \ +} + +#endif /* __ASM_GENERIC_QRWLOCK_TYPES_H */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index d647637cd69..471ba48c7ae 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -109,6 +109,15 @@ #define BRANCH_PROFILE() #endif +#ifdef CONFIG_KPROBES +#define KPROBE_BLACKLIST() . = ALIGN(8); \ + VMLINUX_SYMBOL(__start_kprobe_blacklist) = .; \ + *(_kprobe_blacklist) \ + VMLINUX_SYMBOL(__stop_kprobe_blacklist) = .; +#else +#define KPROBE_BLACKLIST() +#endif + #ifdef CONFIG_EVENT_TRACING #define FTRACE_EVENTS() . = ALIGN(8); \ VMLINUX_SYMBOL(__start_ftrace_events) = .; \ @@ -478,6 +487,7 @@ *(.init.rodata) \ FTRACE_EVENTS() \ TRACE_SYSCALLS() \ + KPROBE_BLACKLIST() \ MEM_DISCARD(init.rodata) \ CLK_OF_TABLES() \ RESERVEDMEM_OF_TABLES() \ diff --git a/include/dt-bindings/clk/ti-dra7-atl.h b/include/dt-bindings/clk/ti-dra7-atl.h new file mode 100644 index 00000000000..42dd4164f6f --- /dev/null +++ b/include/dt-bindings/clk/ti-dra7-atl.h @@ -0,0 +1,40 @@ +/* + * This header provides constants for DRA7 ATL (Audio Tracking Logic) + * + * The constants defined in this header are used in dts files + * + * Copyright (C) 2013 Texas Instruments, Inc. + * + * Peter Ujfalusi <peter.ujfalusi@ti.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _DT_BINDINGS_CLK_DRA7_ATL_H +#define _DT_BINDINGS_CLK_DRA7_ATL_H + +#define DRA7_ATL_WS_MCASP1_FSR 0 +#define DRA7_ATL_WS_MCASP1_FSX 1 +#define DRA7_ATL_WS_MCASP2_FSR 2 +#define DRA7_ATL_WS_MCASP2_FSX 3 +#define DRA7_ATL_WS_MCASP3_FSX 4 +#define DRA7_ATL_WS_MCASP4_FSX 5 +#define DRA7_ATL_WS_MCASP5_FSX 6 +#define DRA7_ATL_WS_MCASP6_FSX 7 +#define DRA7_ATL_WS_MCASP7_FSX 8 +#define DRA7_ATL_WS_MCASP8_FSX 9 +#define DRA7_ATL_WS_MCASP8_AHCLKX 10 +#define DRA7_ATL_WS_XREF_CLK3 11 +#define DRA7_ATL_WS_XREF_CLK0 12 +#define DRA7_ATL_WS_XREF_CLK1 13 +#define DRA7_ATL_WS_XREF_CLK2 14 +#define DRA7_ATL_WS_OSC1_X1 15 + +#endif diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 5f6db18d72e..3c97d5e9b95 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -625,6 +625,8 @@ int ceph_flags_to_mode(int flags); CEPH_CAP_LINK_EXCL | \ CEPH_CAP_XATTR_EXCL | \ CEPH_CAP_FILE_EXCL) +#define CEPH_CAP_ANY_FILE_RD (CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE | \ + CEPH_CAP_FILE_SHARED) #define CEPH_CAP_ANY_FILE_WR (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | \ CEPH_CAP_FILE_EXCL) #define CEPH_CAP_ANY_WR (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR) diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h index a486f390dfb..deb47e45ac7 100644 --- a/include/linux/ceph/mon_client.h +++ b/include/linux/ceph/mon_client.h @@ -40,9 +40,9 @@ struct ceph_mon_request { }; /* - * ceph_mon_generic_request is being used for the statfs and poolop requests - * which are bening done a bit differently because we need to get data back - * to the caller + * ceph_mon_generic_request is being used for the statfs, poolop and + * mon_get_version requests which are being done a bit differently + * because we need to get data back to the caller */ struct ceph_mon_generic_request { struct kref kref; @@ -104,10 +104,15 @@ extern int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 have); extern int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 have); extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc); +extern int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch, + unsigned long timeout); extern int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf); +extern int ceph_monc_do_get_version(struct ceph_mon_client *monc, + const char *what, u64 *newest); + extern int ceph_monc_open_session(struct ceph_mon_client *monc); extern int ceph_monc_validate_auth(struct ceph_mon_client *monc); diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 4a21a872dbb..e8d8a35034a 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -41,6 +41,8 @@ * @idlest_reg: register containing the DPLL idle status bitfield * @autoidle_mask: mask of the DPLL autoidle mode bitfield in @autoidle_reg * @freqsel_mask: mask of the DPLL jitter correction bitfield in @control_reg + * @dcc_mask: mask of the DPLL DCC correction bitfield @mult_div1_reg + * @dcc_rate: rate atleast which DCC @dcc_mask must be set * @idlest_mask: mask of the DPLL idle status bitfield in @idlest_reg * @lpmode_mask: mask of the DPLL low-power mode bitfield in @control_reg * @m4xen_mask: mask of the DPLL M4X multiplier bitfield in @control_reg @@ -86,6 +88,8 @@ struct dpll_data { u32 idlest_mask; u32 dco_mask; u32 sddiv_mask; + u32 dcc_mask; + unsigned long dcc_rate; u32 lpmode_mask; u32 m4xen_mask; u8 auto_recal_bit; @@ -94,7 +98,26 @@ struct dpll_data { u8 flags; }; -struct clk_hw_omap_ops; +struct clk_hw_omap; + +/** + * struct clk_hw_omap_ops - OMAP clk ops + * @find_idlest: find idlest register information for a clock + * @find_companion: find companion clock register information for a clock, + * basically converts CM_ICLKEN* <-> CM_FCLKEN* + * @allow_idle: enables autoidle hardware functionality for a clock + * @deny_idle: prevent autoidle hardware functionality for a clock + */ +struct clk_hw_omap_ops { + void (*find_idlest)(struct clk_hw_omap *oclk, + void __iomem **idlest_reg, + u8 *idlest_bit, u8 *idlest_val); + void (*find_companion)(struct clk_hw_omap *oclk, + void __iomem **other_reg, + u8 *other_bit); + void (*allow_idle)(struct clk_hw_omap *oclk); + void (*deny_idle)(struct clk_hw_omap *oclk); +}; /** * struct clk_hw_omap - OMAP struct clk @@ -259,6 +282,12 @@ int omap2_dflt_clk_enable(struct clk_hw *hw); void omap2_dflt_clk_disable(struct clk_hw *hw); int omap2_dflt_clk_is_enabled(struct clk_hw *hw); void omap3_clk_lock_dpll5(void); +unsigned long omap2_dpllcore_recalc(struct clk_hw *hw, + unsigned long parent_rate); +int omap2_reprogram_dpllcore(struct clk_hw *clk, unsigned long rate, + unsigned long parent_rate); +void omap2xxx_clkt_dpllcore_init(struct clk_hw *hw); +void omap2xxx_clkt_vps_init(void); void __iomem *ti_clk_get_reg_addr(struct device_node *node, int index); void ti_dt_clocks_register(struct ti_dt_clk *oclks); @@ -278,6 +307,8 @@ int omap5xxx_dt_clk_init(void); int dra7xx_dt_clk_init(void); int am33xx_dt_clk_init(void); int am43xx_dt_clk_init(void); +int omap2420_dt_clk_init(void); +int omap2430_dt_clk_init(void); #ifdef CONFIG_OF void of_ti_clk_allow_autoidle_all(void); @@ -287,6 +318,8 @@ static inline void of_ti_clk_allow_autoidle_all(void) { } static inline void of_ti_clk_deny_autoidle_all(void) { } #endif +extern const struct clk_hw_omap_ops clkhwops_omap2xxx_dpll; +extern const struct clk_hw_omap_ops clkhwops_omap2430_i2chs_wait; extern const struct clk_hw_omap_ops clkhwops_omap3_dpll; extern const struct clk_hw_omap_ops clkhwops_omap4_dpllmx; extern const struct clk_hw_omap_ops clkhwops_wait; diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 64fdfe1cfcf..d5ad7b1118f 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -383,7 +383,9 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); /* Ignore/forbid kprobes attach on very low level functions marked by this attribute: */ #ifdef CONFIG_KPROBES # define __kprobes __attribute__((__section__(".kprobes.text"))) +# define nokprobe_inline __always_inline #else # define __kprobes +# define nokprobe_inline inline #endif #endif /* __LINUX_COMPILER_H */ diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 7bd2ad01e39..f7296e57d61 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -205,10 +205,10 @@ struct kretprobe_blackpoint { void *addr; }; -struct kprobe_blackpoint { - const char *name; +struct kprobe_blacklist_entry { + struct list_head list; unsigned long start_addr; - unsigned long range; + unsigned long end_addr; }; #ifdef CONFIG_KPROBES @@ -265,6 +265,7 @@ extern void arch_disarm_kprobe(struct kprobe *p); extern int arch_init_kprobes(void); extern void show_registers(struct pt_regs *regs); extern void kprobes_inc_nmissed_count(struct kprobe *p); +extern bool arch_within_kprobe_blacklist(unsigned long addr); struct kprobe_insn_cache { struct mutex mutex; @@ -476,4 +477,18 @@ static inline int enable_jprobe(struct jprobe *jp) return enable_kprobe(&jp->kp); } +#ifdef CONFIG_KPROBES +/* + * Blacklist ganerating macro. Specify functions which is not probed + * by using this macro. + */ +#define __NOKPROBE_SYMBOL(fname) \ +static unsigned long __used \ + __attribute__((section("_kprobe_blacklist"))) \ + _kbl_addr_##fname = (unsigned long)fname; +#define NOKPROBE_SYMBOL(fname) __NOKPROBE_SYMBOL(fname) +#else +#define NOKPROBE_SYMBOL(fname) +#endif + #endif /* _LINUX_KPROBES_H */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 970c68197c6..ec4e3bd83d4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -586,7 +586,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn); void kvm_vcpu_block(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); -bool kvm_vcpu_yield_to(struct kvm_vcpu *target); +int kvm_vcpu_yield_to(struct kvm_vcpu *target); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); diff --git a/include/linux/nvme.h b/include/linux/nvme.h index a50173ca1d7..2bf403195c0 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1,6 +1,6 @@ /* * Definitions for the NVM Express interface - * Copyright (c) 2011-2013, Intel Corporation. + * Copyright (c) 2011-2014, Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -10,10 +10,6 @@ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ #ifndef _LINUX_NVME_H @@ -66,8 +62,8 @@ enum { #define NVME_VS(major, minor) (major << 16 | minor) -extern unsigned char io_timeout; -#define NVME_IO_TIMEOUT (io_timeout * HZ) +extern unsigned char nvme_io_timeout; +#define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) /* * Represents an NVM Express device. Each nvme_dev is a PCI function. @@ -94,7 +90,7 @@ struct nvme_dev { struct miscdevice miscdev; work_func_t reset_workfn; struct work_struct reset_work; - struct notifier_block nb; + struct work_struct cpu_work; char name[12]; char serial[20]; char model[40]; @@ -103,6 +99,7 @@ struct nvme_dev { u32 stripe_size; u16 oncs; u16 abort_limit; + u8 vwc; u8 initialized; }; @@ -159,7 +156,6 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, void nvme_unmap_user_pages(struct nvme_dev *dev, int write, struct nvme_iod *iod); int nvme_submit_io_cmd(struct nvme_dev *, struct nvme_command *, u32 *); -int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns); int nvme_submit_admin_cmd(struct nvme_dev *, struct nvme_command *, u32 *result); int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns, diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a9209118d80..707617a8c0f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -167,6 +167,11 @@ struct perf_event; #define PERF_EVENT_TXN 0x1 /** + * pmu::capabilities flags + */ +#define PERF_PMU_CAP_NO_INTERRUPT 0x01 + +/** * struct pmu - generic performance monitoring unit */ struct pmu { @@ -178,6 +183,11 @@ struct pmu { const char *name; int type; + /* + * various common per-pmu feature flags + */ + int capabilities; + int * __percpu pmu_disable_count; struct perf_cpu_context * __percpu pmu_cpu_context; int task_ctx_nr; @@ -696,7 +706,8 @@ extern struct perf_guest_info_callbacks *perf_guest_cbs; extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); -extern void perf_event_comm(struct task_struct *tsk); +extern void perf_event_exec(void); +extern void perf_event_comm(struct task_struct *tsk, bool exec); extern void perf_event_fork(struct task_struct *tsk); /* Callchains */ @@ -773,7 +784,7 @@ extern void perf_event_enable(struct perf_event *event); extern void perf_event_disable(struct perf_event *event); extern int __perf_event_disable(void *info); extern void perf_event_task_tick(void); -#else +#else /* !CONFIG_PERF_EVENTS: */ static inline void perf_event_task_sched_in(struct task_struct *prev, struct task_struct *task) { } @@ -803,7 +814,8 @@ static inline int perf_unregister_guest_info_callbacks (struct perf_guest_info_callbacks *callbacks) { return 0; } static inline void perf_event_mmap(struct vm_area_struct *vma) { } -static inline void perf_event_comm(struct task_struct *tsk) { } +static inline void perf_event_exec(void) { } +static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } static inline int perf_swevent_get_recursion_context(void) { return -1; } diff --git a/include/linux/platform_data/shtc1.h b/include/linux/platform_data/shtc1.h new file mode 100644 index 00000000000..7b8c353f7dc --- /dev/null +++ b/include/linux/platform_data/shtc1.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2014 Sensirion AG, Switzerland + * Author: Johannes Winkelmann <johannes.winkelmann@sensirion.com> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __SHTC1_H_ +#define __SHTC1_H_ + +struct shtc1_platform_data { + bool blocking_io; + bool high_precision; +}; +#endif /* __SHTC1_H_ */ diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index d69cf637a15..49a4d6f5910 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -97,7 +97,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k __ring_buffer_alloc((size), (flags), &__key); \ }) -void ring_buffer_wait(struct ring_buffer *buffer, int cpu); +int ring_buffer_wait(struct ring_buffer *buffer, int cpu); int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, struct file *filp, poll_table *poll_table); diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 03f3b05e8ec..8d79708146a 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -16,6 +16,7 @@ #include <linux/atomic.h> +struct optimistic_spin_queue; struct rw_semaphore; #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK @@ -23,9 +24,17 @@ struct rw_semaphore; #else /* All arch specific implementations share the same struct */ struct rw_semaphore { - long count; - raw_spinlock_t wait_lock; - struct list_head wait_list; + long count; + raw_spinlock_t wait_lock; + struct list_head wait_list; +#ifdef CONFIG_SMP + /* + * Write owner. Used as a speculative check to see + * if the owner is running on the cpu. + */ + struct task_struct *owner; + struct optimistic_spin_queue *osq; /* spinner MCS lock */ +#endif #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif @@ -55,11 +64,21 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem) # define __RWSEM_DEP_MAP_INIT(lockname) #endif +#if defined(CONFIG_SMP) && !defined(CONFIG_RWSEM_GENERIC_SPINLOCK) +#define __RWSEM_INITIALIZER(name) \ + { RWSEM_UNLOCKED_VALUE, \ + __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ + LIST_HEAD_INIT((name).wait_list), \ + NULL, /* owner */ \ + NULL /* mcs lock */ \ + __RWSEM_DEP_MAP_INIT(name) } +#else #define __RWSEM_INITIALIZER(name) \ { RWSEM_UNLOCKED_VALUE, \ __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ LIST_HEAD_INIT((name).wait_list) \ __RWSEM_DEP_MAP_INIT(name) } +#endif #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) diff --git a/include/linux/sched.h b/include/linux/sched.h index ea74596014a..306f4f0c987 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -847,10 +847,10 @@ enum cpu_idle_type { }; /* - * Increase resolution of cpu_power calculations + * Increase resolution of cpu_capacity calculations */ -#define SCHED_POWER_SHIFT 10 -#define SCHED_POWER_SCALE (1L << SCHED_POWER_SHIFT) +#define SCHED_CAPACITY_SHIFT 10 +#define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT) /* * sched-domains (multiprocessor balancing) declarations: @@ -862,7 +862,7 @@ enum cpu_idle_type { #define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */ #define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */ #define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ -#define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */ +#define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share cpu power */ #define SD_SHARE_POWERDOMAIN 0x0100 /* Domain members share power domain */ #define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ #define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ @@ -874,7 +874,7 @@ enum cpu_idle_type { #ifdef CONFIG_SCHED_SMT static inline const int cpu_smt_flags(void) { - return SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES; + return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; } #endif @@ -1006,7 +1006,7 @@ typedef const int (*sched_domain_flags_f)(void); struct sd_data { struct sched_domain **__percpu sd; struct sched_group **__percpu sg; - struct sched_group_power **__percpu sgp; + struct sched_group_capacity **__percpu sgc; }; struct sched_domain_topology_level { @@ -2173,7 +2173,7 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { } static inline void sched_autogroup_exit(struct signal_struct *sig) { } #endif -extern bool yield_to(struct task_struct *p, bool preempt); +extern int yield_to(struct task_struct *p, bool preempt); extern void set_user_nice(struct task_struct *p, long nice); extern int task_prio(const struct task_struct *p); /** @@ -2421,7 +2421,11 @@ extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, i struct task_struct *fork_idle(int); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); -extern void set_task_comm(struct task_struct *tsk, const char *from); +extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec); +static inline void set_task_comm(struct task_struct *tsk, const char *from) +{ + __set_task_comm(tsk, from, false); +} extern char *get_task_comm(char *to, struct task_struct *tsk); #ifdef CONFIG_SMP diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index c52f827ba6c..4f844c6b03e 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -103,6 +103,7 @@ extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, u extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); extern bool __weak is_trap_insn(uprobe_opcode_t *insn); extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); +extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs); extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t); extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool); @@ -133,6 +134,9 @@ extern void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, #else /* !CONFIG_UPROBES */ struct uprobes_state { }; + +#define uprobe_get_trap_addr(regs) instruction_pointer(regs) + static inline int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) { diff --git a/include/linux/virtio_scsi.h b/include/linux/virtio_scsi.h index 4195b97a3de..de429d1f435 100644 --- a/include/linux/virtio_scsi.h +++ b/include/linux/virtio_scsi.h @@ -35,11 +35,23 @@ struct virtio_scsi_cmd_req { u8 lun[8]; /* Logical Unit Number */ u64 tag; /* Command identifier */ u8 task_attr; /* Task attribute */ - u8 prio; + u8 prio; /* SAM command priority field */ u8 crn; u8 cdb[VIRTIO_SCSI_CDB_SIZE]; } __packed; +/* SCSI command request, followed by protection information */ +struct virtio_scsi_cmd_req_pi { + u8 lun[8]; /* Logical Unit Number */ + u64 tag; /* Command identifier */ + u8 task_attr; /* Task attribute */ + u8 prio; /* SAM command priority field */ + u8 crn; + u32 pi_bytesout; /* DataOUT PI Number of bytes */ + u32 pi_bytesin; /* DataIN PI Number of bytes */ + u8 cdb[VIRTIO_SCSI_CDB_SIZE]; +} __packed; + /* Response, followed by sense data and data-in */ struct virtio_scsi_cmd_resp { u32 sense_len; /* Sense data length */ @@ -97,6 +109,7 @@ struct virtio_scsi_config { #define VIRTIO_SCSI_F_INOUT 0 #define VIRTIO_SCSI_F_HOTPLUG 1 #define VIRTIO_SCSI_F_CHANGE 2 +#define VIRTIO_SCSI_F_T10_PI 3 /* Response codes */ #define VIRTIO_SCSI_S_OK 0 diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h index bca25dc53f9..8fab6fa0dbf 100644 --- a/include/media/videobuf2-core.h +++ b/include/media/videobuf2-core.h @@ -432,6 +432,7 @@ void *vb2_plane_vaddr(struct vb2_buffer *vb, unsigned int plane_no); void *vb2_plane_cookie(struct vb2_buffer *vb, unsigned int plane_no); void vb2_buffer_done(struct vb2_buffer *vb, enum vb2_buffer_state state); +void vb2_discard_done(struct vb2_queue *q); int vb2_wait_for_all_buffers(struct vb2_queue *q); int vb2_querybuf(struct vb2_queue *q, struct v4l2_buffer *b); diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index e016e2ac38d..42ed789ebaf 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -7,6 +7,7 @@ #include <linux/types.h> #include <linux/timer.h> #include <linux/scatterlist.h> +#include <scsi/scsi_device.h> struct Scsi_Host; struct scsi_device; @@ -315,4 +316,20 @@ static inline void set_driver_byte(struct scsi_cmnd *cmd, char status) cmd->result = (cmd->result & 0x00ffffff) | (status << 24); } +static inline unsigned scsi_transfer_length(struct scsi_cmnd *scmd) +{ + unsigned int xfer_len = blk_rq_bytes(scmd->request); + unsigned int prot_op = scsi_get_prot_op(scmd); + unsigned int sector_size = scmd->device->sector_size; + + switch (prot_op) { + case SCSI_PROT_NORMAL: + case SCSI_PROT_WRITE_STRIP: + case SCSI_PROT_READ_INSERT: + return xfer_len; + } + + return xfer_len + (xfer_len >> ilog2(sector_size)) * 8; +} + #endif /* _SCSI_SCSI_CMND_H */ diff --git a/include/sound/pcm.h b/include/sound/pcm.h index b4d6697085f..d854fb31c00 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -932,7 +932,7 @@ static inline void snd_pcm_gettime(struct snd_pcm_runtime *runtime, struct timespec *tv) { if (runtime->tstamp_type == SNDRV_PCM_TSTAMP_TYPE_MONOTONIC) - do_posix_clock_monotonic_gettime(tv); + ktime_get_ts(tv); else getnstimeofday(tv); } diff --git a/include/target/iscsi/iscsi_transport.h b/include/target/iscsi/iscsi_transport.h index 33b487b5da9..daef9daa500 100644 --- a/include/target/iscsi/iscsi_transport.h +++ b/include/target/iscsi/iscsi_transport.h @@ -70,7 +70,8 @@ extern void iscsit_build_nopin_rsp(struct iscsi_cmd *, struct iscsi_conn *, extern void iscsit_build_task_mgt_rsp(struct iscsi_cmd *, struct iscsi_conn *, struct iscsi_tm_rsp *); extern int iscsit_build_text_rsp(struct iscsi_cmd *, struct iscsi_conn *, - struct iscsi_text_rsp *); + struct iscsi_text_rsp *, + enum iscsit_transport_type); extern void iscsit_build_reject(struct iscsi_cmd *, struct iscsi_conn *, struct iscsi_reject *); extern int iscsit_build_logout_rsp(struct iscsi_cmd *, struct iscsi_conn *, diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 3a1c1eea1ff..9adc1bca117 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -59,6 +59,7 @@ int transport_subsystem_register(struct se_subsystem_api *); void transport_subsystem_release(struct se_subsystem_api *); void target_complete_cmd(struct se_cmd *, u8); +void target_complete_cmd_with_length(struct se_cmd *, u8, int); sense_reason_t spc_parse_cdb(struct se_cmd *cmd, unsigned int *size); sense_reason_t spc_emulate_report_luns(struct se_cmd *cmd); diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 67e1bbf8369..0a68d5ae584 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -530,6 +530,26 @@ TRACE_EVENT(sched_swap_numa, __entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid, __entry->dst_cpu, __entry->dst_nid) ); + +/* + * Tracepoint for waking a polling cpu without an IPI. + */ +TRACE_EVENT(sched_wake_idle_without_ipi, + + TP_PROTO(int cpu), + + TP_ARGS(cpu), + + TP_STRUCT__entry( + __field( int, cpu ) + ), + + TP_fast_assign( + __entry->cpu = cpu; + ), + + TP_printk("cpu=%d", __entry->cpu) +); #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 7554fd381a5..6f9c38ce45c 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -306,6 +306,14 @@ struct btrfs_ioctl_search_args { char buf[BTRFS_SEARCH_ARGS_BUFSIZE]; }; +struct btrfs_ioctl_search_args_v2 { + struct btrfs_ioctl_search_key key; /* in/out - search parameters */ + __u64 buf_size; /* in - size of buffer + * out - on EOVERFLOW: needed size + * to store item */ + __u64 buf[0]; /* out - found items */ +}; + struct btrfs_ioctl_clone_range_args { __s64 src_fd; __u64 src_offset, src_length; @@ -558,6 +566,8 @@ static inline char *btrfs_err_str(enum btrfs_err_code err_code) struct btrfs_ioctl_defrag_range_args) #define BTRFS_IOC_TREE_SEARCH _IOWR(BTRFS_IOCTL_MAGIC, 17, \ struct btrfs_ioctl_search_args) +#define BTRFS_IOC_TREE_SEARCH_V2 _IOWR(BTRFS_IOCTL_MAGIC, 17, \ + struct btrfs_ioctl_search_args_v2) #define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \ struct btrfs_ioctl_ino_lookup_args) #define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, __u64) diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h index 096fe1c6f83..29a7d8619d8 100644 --- a/include/uapi/linux/nvme.h +++ b/include/uapi/linux/nvme.h @@ -1,6 +1,6 @@ /* * Definitions for the NVM Express interface - * Copyright (c) 2011-2013, Intel Corporation. + * Copyright (c) 2011-2014, Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -10,10 +10,6 @@ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ #ifndef _UAPI_LINUX_NVME_H @@ -31,7 +27,12 @@ struct nvme_id_power_state { __u8 read_lat; __u8 write_tput; __u8 write_lat; - __u8 rsvd16[16]; + __le16 idle_power; + __u8 idle_scale; + __u8 rsvd19; + __le16 active_power; + __u8 active_work_scale; + __u8 rsvd23[9]; }; enum { @@ -49,7 +50,9 @@ struct nvme_id_ctrl { __u8 ieee[3]; __u8 mic; __u8 mdts; - __u8 rsvd78[178]; + __u16 cntlid; + __u32 ver; + __u8 rsvd84[172]; __le16 oacs; __u8 acl; __u8 aerl; @@ -57,7 +60,11 @@ struct nvme_id_ctrl { __u8 lpa; __u8 elpe; __u8 npss; - __u8 rsvd264[248]; + __u8 avscc; + __u8 apsta; + __le16 wctemp; + __le16 cctemp; + __u8 rsvd270[242]; __u8 sqes; __u8 cqes; __u8 rsvd514[2]; @@ -68,7 +75,12 @@ struct nvme_id_ctrl { __u8 vwc; __le16 awun; __le16 awupf; - __u8 rsvd530[1518]; + __u8 nvscc; + __u8 rsvd531; + __le16 acwu; + __u8 rsvd534[2]; + __le32 sgls; + __u8 rsvd540[1508]; struct nvme_id_power_state psd[32]; __u8 vs[1024]; }; @@ -77,6 +89,7 @@ enum { NVME_CTRL_ONCS_COMPARE = 1 << 0, NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1, NVME_CTRL_ONCS_DSM = 1 << 2, + NVME_CTRL_VWC_PRESENT = 1 << 0, }; struct nvme_lbaf { @@ -95,7 +108,15 @@ struct nvme_id_ns { __u8 mc; __u8 dpc; __u8 dps; - __u8 rsvd30[98]; + __u8 nmic; + __u8 rescap; + __u8 fpi; + __u8 rsvd33; + __le16 nawun; + __le16 nawupf; + __le16 nacwu; + __u8 rsvd40[80]; + __u8 eui64[8]; struct nvme_lbaf lbaf[16]; __u8 rsvd192[192]; __u8 vs[3712]; @@ -126,7 +147,10 @@ struct nvme_smart_log { __u8 unsafe_shutdowns[16]; __u8 media_errors[16]; __u8 num_err_log_entries[16]; - __u8 rsvd192[320]; + __le32 warning_temp_time; + __le32 critical_comp_time; + __le16 temp_sensor[8]; + __u8 rsvd216[296]; }; enum { @@ -282,6 +306,10 @@ enum { NVME_FEAT_WRITE_ATOMIC = 0x0a, NVME_FEAT_ASYNC_EVENT = 0x0b, NVME_FEAT_SW_PROGRESS = 0x0c, + NVME_LOG_ERROR = 0x01, + NVME_LOG_SMART = 0x02, + NVME_LOG_FW_SLOT = 0x03, + NVME_LOG_RESERVATION = 0x80, NVME_FWACT_REPL = (0 << 3), NVME_FWACT_REPL_ACTV = (1 << 3), NVME_FWACT_ACTV = (2 << 3), diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index e3fc8f09d11..5312fae4721 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -163,8 +163,9 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_ABORT_TX = 1U << 7, /* transaction aborts */ PERF_SAMPLE_BRANCH_IN_TX = 1U << 8, /* in transaction */ PERF_SAMPLE_BRANCH_NO_TX = 1U << 9, /* not in transaction */ + PERF_SAMPLE_BRANCH_COND = 1U << 10, /* conditional branches */ - PERF_SAMPLE_BRANCH_MAX = 1U << 10, /* non-ABI */ + PERF_SAMPLE_BRANCH_MAX = 1U << 11, /* non-ABI */ }; #define PERF_SAMPLE_BRANCH_PLM_ALL \ @@ -301,8 +302,8 @@ struct perf_event_attr { exclude_callchain_kernel : 1, /* exclude kernel callchains */ exclude_callchain_user : 1, /* exclude user callchains */ mmap2 : 1, /* include mmap with inode data */ - - __reserved_1 : 40; + comm_exec : 1, /* flag comm events that are due to an exec */ + __reserved_1 : 39; union { __u32 wakeup_events; /* wakeup every n events */ @@ -501,7 +502,12 @@ struct perf_event_mmap_page { #define PERF_RECORD_MISC_GUEST_KERNEL (4 << 0) #define PERF_RECORD_MISC_GUEST_USER (5 << 0) +/* + * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on + * different events so can reuse the same bit position. + */ #define PERF_RECORD_MISC_MMAP_DATA (1 << 13) +#define PERF_RECORD_MISC_COMM_EXEC (1 << 13) /* * Indicates that the content of PERF_SAMPLE_IP points to * the actual instruction that triggered the event. See also diff --git a/include/uapi/sound/compress_offload.h b/include/uapi/sound/compress_offload.h index 5759810e1c1..21eed488783 100644 --- a/include/uapi/sound/compress_offload.h +++ b/include/uapi/sound/compress_offload.h @@ -80,7 +80,7 @@ struct snd_compr_tstamp { struct snd_compr_avail { __u64 avail; struct snd_compr_tstamp tstamp; -}; +} __attribute__((packed)); enum snd_compr_direction { SND_COMPRESS_PLAYBACK = 0, diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index d2b32ac27a3..35536d9c096 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks @@ -223,3 +223,10 @@ endif config MUTEX_SPIN_ON_OWNER def_bool y depends on SMP && !DEBUG_MUTEXES + +config ARCH_USE_QUEUE_RWLOCK + bool + +config QUEUE_RWLOCK + def_bool y if ARCH_USE_QUEUE_RWLOCK + depends on SMP diff --git a/kernel/events/core.c b/kernel/events/core.c index 24d35cc38e4..5fa58e4cffa 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2974,6 +2974,22 @@ out: local_irq_restore(flags); } +void perf_event_exec(void) +{ + struct perf_event_context *ctx; + int ctxn; + + rcu_read_lock(); + for_each_task_context_nr(ctxn) { + ctx = current->perf_event_ctxp[ctxn]; + if (!ctx) + continue; + + perf_event_enable_on_exec(ctx); + } + rcu_read_unlock(); +} + /* * Cross CPU call to read the hardware event */ @@ -5075,21 +5091,9 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) NULL); } -void perf_event_comm(struct task_struct *task) +void perf_event_comm(struct task_struct *task, bool exec) { struct perf_comm_event comm_event; - struct perf_event_context *ctx; - int ctxn; - - rcu_read_lock(); - for_each_task_context_nr(ctxn) { - ctx = task->perf_event_ctxp[ctxn]; - if (!ctx) - continue; - - perf_event_enable_on_exec(ctx); - } - rcu_read_unlock(); if (!atomic_read(&nr_comm_events)) return; @@ -5101,7 +5105,7 @@ void perf_event_comm(struct task_struct *task) .event_id = { .header = { .type = PERF_RECORD_COMM, - .misc = 0, + .misc = exec ? PERF_RECORD_MISC_COMM_EXEC : 0, /* .size */ }, /* .pid */ @@ -7122,6 +7126,13 @@ SYSCALL_DEFINE5(perf_event_open, } } + if (is_sampling_event(event)) { + if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) { + err = -ENOTSUPP; + goto err_alloc; + } + } + account_event(event); /* @@ -7433,7 +7444,7 @@ __perf_event_exit_task(struct perf_event *child_event, static void perf_event_exit_task_context(struct task_struct *child, int ctxn) { - struct perf_event *child_event; + struct perf_event *child_event, *next; struct perf_event_context *child_ctx; unsigned long flags; @@ -7487,7 +7498,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) */ mutex_lock(&child_ctx->mutex); - list_for_each_entry_rcu(child_event, &child_ctx->event_list, event_entry) + list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry) __perf_event_exit_task(child_event, child_ctx, child); mutex_unlock(&child_ctx->mutex); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index adcd76a9683..c445e392e93 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -36,6 +36,7 @@ #include "../../mm/internal.h" /* munlock_vma_page */ #include <linux/percpu-rwsem.h> #include <linux/task_work.h> +#include <linux/shmem_fs.h> #include <linux/uprobes.h> @@ -127,7 +128,7 @@ struct xol_area { */ static bool valid_vma(struct vm_area_struct *vma, bool is_register) { - vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_SHARED; + vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_MAYSHARE; if (is_register) flags |= VM_WRITE; @@ -279,18 +280,13 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t * supported by that architecture then we need to modify is_trap_at_addr and * uprobe_write_opcode accordingly. This would never be a problem for archs * that have fixed length instructions. - */ - -/* + * * uprobe_write_opcode - write the opcode at a given virtual address. * @mm: the probed process address space. * @vaddr: the virtual address to store the opcode. * @opcode: opcode to be written at @vaddr. * - * Called with mm->mmap_sem held (for read and with a reference to - * mm). - * - * For mm @mm, write the opcode at @vaddr. + * Called with mm->mmap_sem held for write. * Return 0 (success) or a negative errno. */ int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, @@ -310,21 +306,25 @@ retry: if (ret <= 0) goto put_old; + ret = anon_vma_prepare(vma); + if (ret) + goto put_old; + ret = -ENOMEM; new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); if (!new_page) goto put_old; - __SetPageUptodate(new_page); + if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)) + goto put_new; + __SetPageUptodate(new_page); copy_highpage(new_page, old_page); copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); - ret = anon_vma_prepare(vma); - if (ret) - goto put_new; - ret = __replace_page(vma, vaddr, old_page, new_page); + if (ret) + mem_cgroup_uncharge_page(new_page); put_new: page_cache_release(new_page); @@ -537,14 +537,15 @@ static int __copy_insn(struct address_space *mapping, struct file *filp, void *insn, int nbytes, loff_t offset) { struct page *page; - - if (!mapping->a_ops->readpage) - return -EIO; /* - * Ensure that the page that has the original instruction is - * populated and in page-cache. + * Ensure that the page that has the original instruction is populated + * and in page-cache. If ->readpage == NULL it must be shmem_mapping(), + * see uprobe_register(). */ - page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp); + if (mapping->a_ops->readpage) + page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp); + else + page = shmem_read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT); if (IS_ERR(page)) return PTR_ERR(page); @@ -880,6 +881,9 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer * if (!uc->handler && !uc->ret_handler) return -EINVAL; + /* copy_insn() uses read_mapping_page() or shmem_read_mapping_page() */ + if (!inode->i_mapping->a_ops->readpage && !shmem_mapping(inode->i_mapping)) + return -EIO; /* Racy, just to catch the obvious mistakes */ if (offset > i_size_read(inode)) return -EINVAL; @@ -1361,6 +1365,16 @@ unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs) return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE; } +unsigned long uprobe_get_trap_addr(struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + if (unlikely(utask && utask->active_uprobe)) + return utask->vaddr; + + return instruction_pointer(regs); +} + /* * Called with no locks held. * Called in context of a exiting or a exec-ing thread. diff --git a/kernel/kprobes.c b/kernel/kprobes.c index ceeadfcabb7..3214289df5a 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -86,21 +86,8 @@ static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) return &(kretprobe_table_locks[hash].lock); } -/* - * Normally, functions that we'd want to prohibit kprobes in, are marked - * __kprobes. But, there are cases where such functions already belong to - * a different section (__sched for preempt_schedule) - * - * For such cases, we now have a blacklist - */ -static struct kprobe_blackpoint kprobe_blacklist[] = { - {"preempt_schedule",}, - {"native_get_debugreg",}, - {"irq_entries_start",}, - {"common_interrupt",}, - {"mcount",}, /* mcount can be called from everywhere */ - {NULL} /* Terminator */ -}; +/* Blacklist -- list of struct kprobe_blacklist_entry */ +static LIST_HEAD(kprobe_blacklist); #ifdef __ARCH_WANT_KPROBES_INSN_SLOT /* @@ -151,13 +138,13 @@ struct kprobe_insn_cache kprobe_insn_slots = { .insn_size = MAX_INSN_SIZE, .nr_garbage = 0, }; -static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c); +static int collect_garbage_slots(struct kprobe_insn_cache *c); /** * __get_insn_slot() - Find a slot on an executable page for an instruction. * We allocate an executable page if there's no room on existing ones. */ -kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c) +kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c) { struct kprobe_insn_page *kip; kprobe_opcode_t *slot = NULL; @@ -214,7 +201,7 @@ out: } /* Return 1 if all garbages are collected, otherwise 0. */ -static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx) +static int collect_one_slot(struct kprobe_insn_page *kip, int idx) { kip->slot_used[idx] = SLOT_CLEAN; kip->nused--; @@ -235,7 +222,7 @@ static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx) return 0; } -static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c) +static int collect_garbage_slots(struct kprobe_insn_cache *c) { struct kprobe_insn_page *kip, *next; @@ -257,8 +244,8 @@ static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c) return 0; } -void __kprobes __free_insn_slot(struct kprobe_insn_cache *c, - kprobe_opcode_t *slot, int dirty) +void __free_insn_slot(struct kprobe_insn_cache *c, + kprobe_opcode_t *slot, int dirty) { struct kprobe_insn_page *kip; @@ -314,7 +301,7 @@ static inline void reset_kprobe_instance(void) * OR * - with preemption disabled - from arch/xxx/kernel/kprobes.c */ -struct kprobe __kprobes *get_kprobe(void *addr) +struct kprobe *get_kprobe(void *addr) { struct hlist_head *head; struct kprobe *p; @@ -327,8 +314,9 @@ struct kprobe __kprobes *get_kprobe(void *addr) return NULL; } +NOKPROBE_SYMBOL(get_kprobe); -static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs); +static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs); /* Return true if the kprobe is an aggregator */ static inline int kprobe_aggrprobe(struct kprobe *p) @@ -360,7 +348,7 @@ static bool kprobes_allow_optimization; * Call all pre_handler on the list, but ignores its return value. * This must be called from arch-dep optimized caller. */ -void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs) +void opt_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct kprobe *kp; @@ -372,9 +360,10 @@ void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs) reset_kprobe_instance(); } } +NOKPROBE_SYMBOL(opt_pre_handler); /* Free optimized instructions and optimized_kprobe */ -static __kprobes void free_aggr_kprobe(struct kprobe *p) +static void free_aggr_kprobe(struct kprobe *p) { struct optimized_kprobe *op; @@ -412,7 +401,7 @@ static inline int kprobe_disarmed(struct kprobe *p) } /* Return true(!0) if the probe is queued on (un)optimizing lists */ -static int __kprobes kprobe_queued(struct kprobe *p) +static int kprobe_queued(struct kprobe *p) { struct optimized_kprobe *op; @@ -428,7 +417,7 @@ static int __kprobes kprobe_queued(struct kprobe *p) * Return an optimized kprobe whose optimizing code replaces * instructions including addr (exclude breakpoint). */ -static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr) +static struct kprobe *get_optimized_kprobe(unsigned long addr) { int i; struct kprobe *p = NULL; @@ -460,7 +449,7 @@ static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer); * Optimize (replace a breakpoint with a jump) kprobes listed on * optimizing_list. */ -static __kprobes void do_optimize_kprobes(void) +static void do_optimize_kprobes(void) { /* Optimization never be done when disarmed */ if (kprobes_all_disarmed || !kprobes_allow_optimization || @@ -488,7 +477,7 @@ static __kprobes void do_optimize_kprobes(void) * Unoptimize (replace a jump with a breakpoint and remove the breakpoint * if need) kprobes listed on unoptimizing_list. */ -static __kprobes void do_unoptimize_kprobes(void) +static void do_unoptimize_kprobes(void) { struct optimized_kprobe *op, *tmp; @@ -520,7 +509,7 @@ static __kprobes void do_unoptimize_kprobes(void) } /* Reclaim all kprobes on the free_list */ -static __kprobes void do_free_cleaned_kprobes(void) +static void do_free_cleaned_kprobes(void) { struct optimized_kprobe *op, *tmp; @@ -532,13 +521,13 @@ static __kprobes void do_free_cleaned_kprobes(void) } /* Start optimizer after OPTIMIZE_DELAY passed */ -static __kprobes void kick_kprobe_optimizer(void) +static void kick_kprobe_optimizer(void) { schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY); } /* Kprobe jump optimizer */ -static __kprobes void kprobe_optimizer(struct work_struct *work) +static void kprobe_optimizer(struct work_struct *work) { mutex_lock(&kprobe_mutex); /* Lock modules while optimizing kprobes */ @@ -574,7 +563,7 @@ static __kprobes void kprobe_optimizer(struct work_struct *work) } /* Wait for completing optimization and unoptimization */ -static __kprobes void wait_for_kprobe_optimizer(void) +static void wait_for_kprobe_optimizer(void) { mutex_lock(&kprobe_mutex); @@ -593,7 +582,7 @@ static __kprobes void wait_for_kprobe_optimizer(void) } /* Optimize kprobe if p is ready to be optimized */ -static __kprobes void optimize_kprobe(struct kprobe *p) +static void optimize_kprobe(struct kprobe *p) { struct optimized_kprobe *op; @@ -627,7 +616,7 @@ static __kprobes void optimize_kprobe(struct kprobe *p) } /* Short cut to direct unoptimizing */ -static __kprobes void force_unoptimize_kprobe(struct optimized_kprobe *op) +static void force_unoptimize_kprobe(struct optimized_kprobe *op) { get_online_cpus(); arch_unoptimize_kprobe(op); @@ -637,7 +626,7 @@ static __kprobes void force_unoptimize_kprobe(struct optimized_kprobe *op) } /* Unoptimize a kprobe if p is optimized */ -static __kprobes void unoptimize_kprobe(struct kprobe *p, bool force) +static void unoptimize_kprobe(struct kprobe *p, bool force) { struct optimized_kprobe *op; @@ -697,7 +686,7 @@ static void reuse_unused_kprobe(struct kprobe *ap) } /* Remove optimized instructions */ -static void __kprobes kill_optimized_kprobe(struct kprobe *p) +static void kill_optimized_kprobe(struct kprobe *p) { struct optimized_kprobe *op; @@ -723,7 +712,7 @@ static void __kprobes kill_optimized_kprobe(struct kprobe *p) } /* Try to prepare optimized instructions */ -static __kprobes void prepare_optimized_kprobe(struct kprobe *p) +static void prepare_optimized_kprobe(struct kprobe *p) { struct optimized_kprobe *op; @@ -732,7 +721,7 @@ static __kprobes void prepare_optimized_kprobe(struct kprobe *p) } /* Allocate new optimized_kprobe and try to prepare optimized instructions */ -static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p) +static struct kprobe *alloc_aggr_kprobe(struct kprobe *p) { struct optimized_kprobe *op; @@ -747,13 +736,13 @@ static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p) return &op->kp; } -static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p); +static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p); /* * Prepare an optimized_kprobe and optimize it * NOTE: p must be a normal registered kprobe */ -static __kprobes void try_to_optimize_kprobe(struct kprobe *p) +static void try_to_optimize_kprobe(struct kprobe *p) { struct kprobe *ap; struct optimized_kprobe *op; @@ -787,7 +776,7 @@ out: } #ifdef CONFIG_SYSCTL -static void __kprobes optimize_all_kprobes(void) +static void optimize_all_kprobes(void) { struct hlist_head *head; struct kprobe *p; @@ -810,7 +799,7 @@ out: mutex_unlock(&kprobe_mutex); } -static void __kprobes unoptimize_all_kprobes(void) +static void unoptimize_all_kprobes(void) { struct hlist_head *head; struct kprobe *p; @@ -861,7 +850,7 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write, #endif /* CONFIG_SYSCTL */ /* Put a breakpoint for a probe. Must be called with text_mutex locked */ -static void __kprobes __arm_kprobe(struct kprobe *p) +static void __arm_kprobe(struct kprobe *p) { struct kprobe *_p; @@ -876,7 +865,7 @@ static void __kprobes __arm_kprobe(struct kprobe *p) } /* Remove the breakpoint of a probe. Must be called with text_mutex locked */ -static void __kprobes __disarm_kprobe(struct kprobe *p, bool reopt) +static void __disarm_kprobe(struct kprobe *p, bool reopt) { struct kprobe *_p; @@ -911,13 +900,13 @@ static void reuse_unused_kprobe(struct kprobe *ap) BUG_ON(kprobe_unused(ap)); } -static __kprobes void free_aggr_kprobe(struct kprobe *p) +static void free_aggr_kprobe(struct kprobe *p) { arch_remove_kprobe(p); kfree(p); } -static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p) +static struct kprobe *alloc_aggr_kprobe(struct kprobe *p) { return kzalloc(sizeof(struct kprobe), GFP_KERNEL); } @@ -931,7 +920,7 @@ static struct ftrace_ops kprobe_ftrace_ops __read_mostly = { static int kprobe_ftrace_enabled; /* Must ensure p->addr is really on ftrace */ -static int __kprobes prepare_kprobe(struct kprobe *p) +static int prepare_kprobe(struct kprobe *p) { if (!kprobe_ftrace(p)) return arch_prepare_kprobe(p); @@ -940,7 +929,7 @@ static int __kprobes prepare_kprobe(struct kprobe *p) } /* Caller must lock kprobe_mutex */ -static void __kprobes arm_kprobe_ftrace(struct kprobe *p) +static void arm_kprobe_ftrace(struct kprobe *p) { int ret; @@ -955,7 +944,7 @@ static void __kprobes arm_kprobe_ftrace(struct kprobe *p) } /* Caller must lock kprobe_mutex */ -static void __kprobes disarm_kprobe_ftrace(struct kprobe *p) +static void disarm_kprobe_ftrace(struct kprobe *p) { int ret; @@ -975,7 +964,7 @@ static void __kprobes disarm_kprobe_ftrace(struct kprobe *p) #endif /* Arm a kprobe with text_mutex */ -static void __kprobes arm_kprobe(struct kprobe *kp) +static void arm_kprobe(struct kprobe *kp) { if (unlikely(kprobe_ftrace(kp))) { arm_kprobe_ftrace(kp); @@ -992,7 +981,7 @@ static void __kprobes arm_kprobe(struct kprobe *kp) } /* Disarm a kprobe with text_mutex */ -static void __kprobes disarm_kprobe(struct kprobe *kp, bool reopt) +static void disarm_kprobe(struct kprobe *kp, bool reopt) { if (unlikely(kprobe_ftrace(kp))) { disarm_kprobe_ftrace(kp); @@ -1008,7 +997,7 @@ static void __kprobes disarm_kprobe(struct kprobe *kp, bool reopt) * Aggregate handlers for multiple kprobes support - these handlers * take care of invoking the individual kprobe handlers on p->list */ -static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) +static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct kprobe *kp; @@ -1022,9 +1011,10 @@ static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) } return 0; } +NOKPROBE_SYMBOL(aggr_pre_handler); -static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, - unsigned long flags) +static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs, + unsigned long flags) { struct kprobe *kp; @@ -1036,9 +1026,10 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, } } } +NOKPROBE_SYMBOL(aggr_post_handler); -static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, - int trapnr) +static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, + int trapnr) { struct kprobe *cur = __this_cpu_read(kprobe_instance); @@ -1052,8 +1043,9 @@ static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, } return 0; } +NOKPROBE_SYMBOL(aggr_fault_handler); -static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs) +static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs) { struct kprobe *cur = __this_cpu_read(kprobe_instance); int ret = 0; @@ -1065,9 +1057,10 @@ static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs) reset_kprobe_instance(); return ret; } +NOKPROBE_SYMBOL(aggr_break_handler); /* Walks the list and increments nmissed count for multiprobe case */ -void __kprobes kprobes_inc_nmissed_count(struct kprobe *p) +void kprobes_inc_nmissed_count(struct kprobe *p) { struct kprobe *kp; if (!kprobe_aggrprobe(p)) { @@ -1078,9 +1071,10 @@ void __kprobes kprobes_inc_nmissed_count(struct kprobe *p) } return; } +NOKPROBE_SYMBOL(kprobes_inc_nmissed_count); -void __kprobes recycle_rp_inst(struct kretprobe_instance *ri, - struct hlist_head *head) +void recycle_rp_inst(struct kretprobe_instance *ri, + struct hlist_head *head) { struct kretprobe *rp = ri->rp; @@ -1095,8 +1089,9 @@ void __kprobes recycle_rp_inst(struct kretprobe_instance *ri, /* Unregistering */ hlist_add_head(&ri->hlist, head); } +NOKPROBE_SYMBOL(recycle_rp_inst); -void __kprobes kretprobe_hash_lock(struct task_struct *tsk, +void kretprobe_hash_lock(struct task_struct *tsk, struct hlist_head **head, unsigned long *flags) __acquires(hlist_lock) { @@ -1107,17 +1102,19 @@ __acquires(hlist_lock) hlist_lock = kretprobe_table_lock_ptr(hash); raw_spin_lock_irqsave(hlist_lock, *flags); } +NOKPROBE_SYMBOL(kretprobe_hash_lock); -static void __kprobes kretprobe_table_lock(unsigned long hash, - unsigned long *flags) +static void kretprobe_table_lock(unsigned long hash, + unsigned long *flags) __acquires(hlist_lock) { raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); raw_spin_lock_irqsave(hlist_lock, *flags); } +NOKPROBE_SYMBOL(kretprobe_table_lock); -void __kprobes kretprobe_hash_unlock(struct task_struct *tsk, - unsigned long *flags) +void kretprobe_hash_unlock(struct task_struct *tsk, + unsigned long *flags) __releases(hlist_lock) { unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); @@ -1126,14 +1123,16 @@ __releases(hlist_lock) hlist_lock = kretprobe_table_lock_ptr(hash); raw_spin_unlock_irqrestore(hlist_lock, *flags); } +NOKPROBE_SYMBOL(kretprobe_hash_unlock); -static void __kprobes kretprobe_table_unlock(unsigned long hash, - unsigned long *flags) +static void kretprobe_table_unlock(unsigned long hash, + unsigned long *flags) __releases(hlist_lock) { raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); raw_spin_unlock_irqrestore(hlist_lock, *flags); } +NOKPROBE_SYMBOL(kretprobe_table_unlock); /* * This function is called from finish_task_switch when task tk becomes dead, @@ -1141,7 +1140,7 @@ __releases(hlist_lock) * with this task. These left over instances represent probed functions * that have been called but will never return. */ -void __kprobes kprobe_flush_task(struct task_struct *tk) +void kprobe_flush_task(struct task_struct *tk) { struct kretprobe_instance *ri; struct hlist_head *head, empty_rp; @@ -1166,6 +1165,7 @@ void __kprobes kprobe_flush_task(struct task_struct *tk) kfree(ri); } } +NOKPROBE_SYMBOL(kprobe_flush_task); static inline void free_rp_inst(struct kretprobe *rp) { @@ -1178,7 +1178,7 @@ static inline void free_rp_inst(struct kretprobe *rp) } } -static void __kprobes cleanup_rp_inst(struct kretprobe *rp) +static void cleanup_rp_inst(struct kretprobe *rp) { unsigned long flags, hash; struct kretprobe_instance *ri; @@ -1197,12 +1197,13 @@ static void __kprobes cleanup_rp_inst(struct kretprobe *rp) } free_rp_inst(rp); } +NOKPROBE_SYMBOL(cleanup_rp_inst); /* * Add the new probe to ap->list. Fail if this is the * second jprobe at the address - two jprobes can't coexist */ -static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p) +static int add_new_kprobe(struct kprobe *ap, struct kprobe *p) { BUG_ON(kprobe_gone(ap) || kprobe_gone(p)); @@ -1226,7 +1227,7 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p) * Fill in the required fields of the "manager kprobe". Replace the * earlier kprobe in the hlist with the manager kprobe */ -static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p) +static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p) { /* Copy p's insn slot to ap */ copy_kprobe(p, ap); @@ -1252,8 +1253,7 @@ static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p) * This is the second or subsequent kprobe at the address - handle * the intricacies */ -static int __kprobes register_aggr_kprobe(struct kprobe *orig_p, - struct kprobe *p) +static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p) { int ret = 0; struct kprobe *ap = orig_p; @@ -1324,25 +1324,29 @@ out: return ret; } -static int __kprobes in_kprobes_functions(unsigned long addr) +bool __weak arch_within_kprobe_blacklist(unsigned long addr) { - struct kprobe_blackpoint *kb; + /* The __kprobes marked functions and entry code must not be probed */ + return addr >= (unsigned long)__kprobes_text_start && + addr < (unsigned long)__kprobes_text_end; +} - if (addr >= (unsigned long)__kprobes_text_start && - addr < (unsigned long)__kprobes_text_end) - return -EINVAL; +static bool within_kprobe_blacklist(unsigned long addr) +{ + struct kprobe_blacklist_entry *ent; + + if (arch_within_kprobe_blacklist(addr)) + return true; /* * If there exists a kprobe_blacklist, verify and * fail any probe registration in the prohibited area */ - for (kb = kprobe_blacklist; kb->name != NULL; kb++) { - if (kb->start_addr) { - if (addr >= kb->start_addr && - addr < (kb->start_addr + kb->range)) - return -EINVAL; - } + list_for_each_entry(ent, &kprobe_blacklist, list) { + if (addr >= ent->start_addr && addr < ent->end_addr) + return true; } - return 0; + + return false; } /* @@ -1351,7 +1355,7 @@ static int __kprobes in_kprobes_functions(unsigned long addr) * This returns encoded errors if it fails to look up symbol or invalid * combination of parameters. */ -static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p) +static kprobe_opcode_t *kprobe_addr(struct kprobe *p) { kprobe_opcode_t *addr = p->addr; @@ -1374,7 +1378,7 @@ invalid: } /* Check passed kprobe is valid and return kprobe in kprobe_table. */ -static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p) +static struct kprobe *__get_valid_kprobe(struct kprobe *p) { struct kprobe *ap, *list_p; @@ -1406,8 +1410,8 @@ static inline int check_kprobe_rereg(struct kprobe *p) return ret; } -static __kprobes int check_kprobe_address_safe(struct kprobe *p, - struct module **probed_mod) +static int check_kprobe_address_safe(struct kprobe *p, + struct module **probed_mod) { int ret = 0; unsigned long ftrace_addr; @@ -1433,7 +1437,7 @@ static __kprobes int check_kprobe_address_safe(struct kprobe *p, /* Ensure it is not in reserved area nor out of text */ if (!kernel_text_address((unsigned long) p->addr) || - in_kprobes_functions((unsigned long) p->addr) || + within_kprobe_blacklist((unsigned long) p->addr) || jump_label_text_reserved(p->addr, p->addr)) { ret = -EINVAL; goto out; @@ -1469,7 +1473,7 @@ out: return ret; } -int __kprobes register_kprobe(struct kprobe *p) +int register_kprobe(struct kprobe *p) { int ret; struct kprobe *old_p; @@ -1531,7 +1535,7 @@ out: EXPORT_SYMBOL_GPL(register_kprobe); /* Check if all probes on the aggrprobe are disabled */ -static int __kprobes aggr_kprobe_disabled(struct kprobe *ap) +static int aggr_kprobe_disabled(struct kprobe *ap) { struct kprobe *kp; @@ -1547,7 +1551,7 @@ static int __kprobes aggr_kprobe_disabled(struct kprobe *ap) } /* Disable one kprobe: Make sure called under kprobe_mutex is locked */ -static struct kprobe *__kprobes __disable_kprobe(struct kprobe *p) +static struct kprobe *__disable_kprobe(struct kprobe *p) { struct kprobe *orig_p; @@ -1574,7 +1578,7 @@ static struct kprobe *__kprobes __disable_kprobe(struct kprobe *p) /* * Unregister a kprobe without a scheduler synchronization. */ -static int __kprobes __unregister_kprobe_top(struct kprobe *p) +static int __unregister_kprobe_top(struct kprobe *p) { struct kprobe *ap, *list_p; @@ -1631,7 +1635,7 @@ disarmed: return 0; } -static void __kprobes __unregister_kprobe_bottom(struct kprobe *p) +static void __unregister_kprobe_bottom(struct kprobe *p) { struct kprobe *ap; @@ -1647,7 +1651,7 @@ static void __kprobes __unregister_kprobe_bottom(struct kprobe *p) /* Otherwise, do nothing. */ } -int __kprobes register_kprobes(struct kprobe **kps, int num) +int register_kprobes(struct kprobe **kps, int num) { int i, ret = 0; @@ -1665,13 +1669,13 @@ int __kprobes register_kprobes(struct kprobe **kps, int num) } EXPORT_SYMBOL_GPL(register_kprobes); -void __kprobes unregister_kprobe(struct kprobe *p) +void unregister_kprobe(struct kprobe *p) { unregister_kprobes(&p, 1); } EXPORT_SYMBOL_GPL(unregister_kprobe); -void __kprobes unregister_kprobes(struct kprobe **kps, int num) +void unregister_kprobes(struct kprobe **kps, int num) { int i; @@ -1700,7 +1704,7 @@ unsigned long __weak arch_deref_entry_point(void *entry) return (unsigned long)entry; } -int __kprobes register_jprobes(struct jprobe **jps, int num) +int register_jprobes(struct jprobe **jps, int num) { struct jprobe *jp; int ret = 0, i; @@ -1731,19 +1735,19 @@ int __kprobes register_jprobes(struct jprobe **jps, int num) } EXPORT_SYMBOL_GPL(register_jprobes); -int __kprobes register_jprobe(struct jprobe *jp) +int register_jprobe(struct jprobe *jp) { return register_jprobes(&jp, 1); } EXPORT_SYMBOL_GPL(register_jprobe); -void __kprobes unregister_jprobe(struct jprobe *jp) +void unregister_jprobe(struct jprobe *jp) { unregister_jprobes(&jp, 1); } EXPORT_SYMBOL_GPL(unregister_jprobe); -void __kprobes unregister_jprobes(struct jprobe **jps, int num) +void unregister_jprobes(struct jprobe **jps, int num) { int i; @@ -1768,8 +1772,7 @@ EXPORT_SYMBOL_GPL(unregister_jprobes); * This kprobe pre_handler is registered with every kretprobe. When probe * hits it will set up the return probe. */ -static int __kprobes pre_handler_kretprobe(struct kprobe *p, - struct pt_regs *regs) +static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) { struct kretprobe *rp = container_of(p, struct kretprobe, kp); unsigned long hash, flags = 0; @@ -1807,8 +1810,9 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, } return 0; } +NOKPROBE_SYMBOL(pre_handler_kretprobe); -int __kprobes register_kretprobe(struct kretprobe *rp) +int register_kretprobe(struct kretprobe *rp) { int ret = 0; struct kretprobe_instance *inst; @@ -1861,7 +1865,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp) } EXPORT_SYMBOL_GPL(register_kretprobe); -int __kprobes register_kretprobes(struct kretprobe **rps, int num) +int register_kretprobes(struct kretprobe **rps, int num) { int ret = 0, i; @@ -1879,13 +1883,13 @@ int __kprobes register_kretprobes(struct kretprobe **rps, int num) } EXPORT_SYMBOL_GPL(register_kretprobes); -void __kprobes unregister_kretprobe(struct kretprobe *rp) +void unregister_kretprobe(struct kretprobe *rp) { unregister_kretprobes(&rp, 1); } EXPORT_SYMBOL_GPL(unregister_kretprobe); -void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) +void unregister_kretprobes(struct kretprobe **rps, int num) { int i; @@ -1908,38 +1912,38 @@ void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) EXPORT_SYMBOL_GPL(unregister_kretprobes); #else /* CONFIG_KRETPROBES */ -int __kprobes register_kretprobe(struct kretprobe *rp) +int register_kretprobe(struct kretprobe *rp) { return -ENOSYS; } EXPORT_SYMBOL_GPL(register_kretprobe); -int __kprobes register_kretprobes(struct kretprobe **rps, int num) +int register_kretprobes(struct kretprobe **rps, int num) { return -ENOSYS; } EXPORT_SYMBOL_GPL(register_kretprobes); -void __kprobes unregister_kretprobe(struct kretprobe *rp) +void unregister_kretprobe(struct kretprobe *rp) { } EXPORT_SYMBOL_GPL(unregister_kretprobe); -void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) +void unregister_kretprobes(struct kretprobe **rps, int num) { } EXPORT_SYMBOL_GPL(unregister_kretprobes); -static int __kprobes pre_handler_kretprobe(struct kprobe *p, - struct pt_regs *regs) +static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) { return 0; } +NOKPROBE_SYMBOL(pre_handler_kretprobe); #endif /* CONFIG_KRETPROBES */ /* Set the kprobe gone and remove its instruction buffer. */ -static void __kprobes kill_kprobe(struct kprobe *p) +static void kill_kprobe(struct kprobe *p) { struct kprobe *kp; @@ -1963,7 +1967,7 @@ static void __kprobes kill_kprobe(struct kprobe *p) } /* Disable one kprobe */ -int __kprobes disable_kprobe(struct kprobe *kp) +int disable_kprobe(struct kprobe *kp) { int ret = 0; @@ -1979,7 +1983,7 @@ int __kprobes disable_kprobe(struct kprobe *kp) EXPORT_SYMBOL_GPL(disable_kprobe); /* Enable one kprobe */ -int __kprobes enable_kprobe(struct kprobe *kp) +int enable_kprobe(struct kprobe *kp) { int ret = 0; struct kprobe *p; @@ -2012,16 +2016,49 @@ out: } EXPORT_SYMBOL_GPL(enable_kprobe); -void __kprobes dump_kprobe(struct kprobe *kp) +void dump_kprobe(struct kprobe *kp) { printk(KERN_WARNING "Dumping kprobe:\n"); printk(KERN_WARNING "Name: %s\nAddress: %p\nOffset: %x\n", kp->symbol_name, kp->addr, kp->offset); } +NOKPROBE_SYMBOL(dump_kprobe); + +/* + * Lookup and populate the kprobe_blacklist. + * + * Unlike the kretprobe blacklist, we'll need to determine + * the range of addresses that belong to the said functions, + * since a kprobe need not necessarily be at the beginning + * of a function. + */ +static int __init populate_kprobe_blacklist(unsigned long *start, + unsigned long *end) +{ + unsigned long *iter; + struct kprobe_blacklist_entry *ent; + unsigned long offset = 0, size = 0; + + for (iter = start; iter < end; iter++) { + if (!kallsyms_lookup_size_offset(*iter, &size, &offset)) { + pr_err("Failed to find blacklist %p\n", (void *)*iter); + continue; + } + + ent = kmalloc(sizeof(*ent), GFP_KERNEL); + if (!ent) + return -ENOMEM; + ent->start_addr = *iter; + ent->end_addr = *iter + size; + INIT_LIST_HEAD(&ent->list); + list_add_tail(&ent->list, &kprobe_blacklist); + } + return 0; +} /* Module notifier call back, checking kprobes on the module */ -static int __kprobes kprobes_module_callback(struct notifier_block *nb, - unsigned long val, void *data) +static int kprobes_module_callback(struct notifier_block *nb, + unsigned long val, void *data) { struct module *mod = data; struct hlist_head *head; @@ -2062,14 +2099,13 @@ static struct notifier_block kprobe_module_nb = { .priority = 0 }; +/* Markers of _kprobe_blacklist section */ +extern unsigned long __start_kprobe_blacklist[]; +extern unsigned long __stop_kprobe_blacklist[]; + static int __init init_kprobes(void) { int i, err = 0; - unsigned long offset = 0, size = 0; - char *modname, namebuf[KSYM_NAME_LEN]; - const char *symbol_name; - void *addr; - struct kprobe_blackpoint *kb; /* FIXME allocate the probe table, currently defined statically */ /* initialize all list heads */ @@ -2079,26 +2115,11 @@ static int __init init_kprobes(void) raw_spin_lock_init(&(kretprobe_table_locks[i].lock)); } - /* - * Lookup and populate the kprobe_blacklist. - * - * Unlike the kretprobe blacklist, we'll need to determine - * the range of addresses that belong to the said functions, - * since a kprobe need not necessarily be at the beginning - * of a function. - */ - for (kb = kprobe_blacklist; kb->name != NULL; kb++) { - kprobe_lookup_name(kb->name, addr); - if (!addr) - continue; - - kb->start_addr = (unsigned long)addr; - symbol_name = kallsyms_lookup(kb->start_addr, - &size, &offset, &modname, namebuf); - if (!symbol_name) - kb->range = 0; - else - kb->range = size; + err = populate_kprobe_blacklist(__start_kprobe_blacklist, + __stop_kprobe_blacklist); + if (err) { + pr_err("kprobes: failed to populate blacklist: %d\n", err); + pr_err("Please take care of using kprobes.\n"); } if (kretprobe_blacklist_size) { @@ -2138,7 +2159,7 @@ static int __init init_kprobes(void) } #ifdef CONFIG_DEBUG_FS -static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p, +static void report_probe(struct seq_file *pi, struct kprobe *p, const char *sym, int offset, char *modname, struct kprobe *pp) { char *kprobe_type; @@ -2167,12 +2188,12 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p, (kprobe_ftrace(pp) ? "[FTRACE]" : "")); } -static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos) +static void *kprobe_seq_start(struct seq_file *f, loff_t *pos) { return (*pos < KPROBE_TABLE_SIZE) ? pos : NULL; } -static void __kprobes *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos) +static void *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos) { (*pos)++; if (*pos >= KPROBE_TABLE_SIZE) @@ -2180,12 +2201,12 @@ static void __kprobes *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos) return pos; } -static void __kprobes kprobe_seq_stop(struct seq_file *f, void *v) +static void kprobe_seq_stop(struct seq_file *f, void *v) { /* Nothing to do */ } -static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v) +static int show_kprobe_addr(struct seq_file *pi, void *v) { struct hlist_head *head; struct kprobe *p, *kp; @@ -2216,7 +2237,7 @@ static const struct seq_operations kprobes_seq_ops = { .show = show_kprobe_addr }; -static int __kprobes kprobes_open(struct inode *inode, struct file *filp) +static int kprobes_open(struct inode *inode, struct file *filp) { return seq_open(filp, &kprobes_seq_ops); } @@ -2228,7 +2249,47 @@ static const struct file_operations debugfs_kprobes_operations = { .release = seq_release, }; -static void __kprobes arm_all_kprobes(void) +/* kprobes/blacklist -- shows which functions can not be probed */ +static void *kprobe_blacklist_seq_start(struct seq_file *m, loff_t *pos) +{ + return seq_list_start(&kprobe_blacklist, *pos); +} + +static void *kprobe_blacklist_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ + return seq_list_next(v, &kprobe_blacklist, pos); +} + +static int kprobe_blacklist_seq_show(struct seq_file *m, void *v) +{ + struct kprobe_blacklist_entry *ent = + list_entry(v, struct kprobe_blacklist_entry, list); + + seq_printf(m, "0x%p-0x%p\t%ps\n", (void *)ent->start_addr, + (void *)ent->end_addr, (void *)ent->start_addr); + return 0; +} + +static const struct seq_operations kprobe_blacklist_seq_ops = { + .start = kprobe_blacklist_seq_start, + .next = kprobe_blacklist_seq_next, + .stop = kprobe_seq_stop, /* Reuse void function */ + .show = kprobe_blacklist_seq_show, +}; + +static int kprobe_blacklist_open(struct inode *inode, struct file *filp) +{ + return seq_open(filp, &kprobe_blacklist_seq_ops); +} + +static const struct file_operations debugfs_kprobe_blacklist_ops = { + .open = kprobe_blacklist_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static void arm_all_kprobes(void) { struct hlist_head *head; struct kprobe *p; @@ -2256,7 +2317,7 @@ already_enabled: return; } -static void __kprobes disarm_all_kprobes(void) +static void disarm_all_kprobes(void) { struct hlist_head *head; struct kprobe *p; @@ -2340,7 +2401,7 @@ static const struct file_operations fops_kp = { .llseek = default_llseek, }; -static int __kprobes debugfs_kprobe_init(void) +static int __init debugfs_kprobe_init(void) { struct dentry *dir, *file; unsigned int value = 1; @@ -2351,19 +2412,24 @@ static int __kprobes debugfs_kprobe_init(void) file = debugfs_create_file("list", 0444, dir, NULL, &debugfs_kprobes_operations); - if (!file) { - debugfs_remove(dir); - return -ENOMEM; - } + if (!file) + goto error; file = debugfs_create_file("enabled", 0600, dir, &value, &fops_kp); - if (!file) { - debugfs_remove(dir); - return -ENOMEM; - } + if (!file) + goto error; + + file = debugfs_create_file("blacklist", 0444, dir, NULL, + &debugfs_kprobe_blacklist_ops); + if (!file) + goto error; return 0; + +error: + debugfs_remove(dir); + return -ENOMEM; } late_initcall(debugfs_kprobe_init); diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index b8bdcd4785b..8541bfdfd23 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -24,4 +24,5 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o +obj-$(CONFIG_QUEUE_RWLOCK) += qrwlock.o obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c new file mode 100644 index 00000000000..fb5b8ac411a --- /dev/null +++ b/kernel/locking/qrwlock.c @@ -0,0 +1,133 @@ +/* + * Queue read/write lock + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P. + * + * Authors: Waiman Long <waiman.long@hp.com> + */ +#include <linux/smp.h> +#include <linux/bug.h> +#include <linux/cpumask.h> +#include <linux/percpu.h> +#include <linux/hardirq.h> +#include <linux/mutex.h> +#include <asm/qrwlock.h> + +/** + * rspin_until_writer_unlock - inc reader count & spin until writer is gone + * @lock : Pointer to queue rwlock structure + * @writer: Current queue rwlock writer status byte + * + * In interrupt context or at the head of the queue, the reader will just + * increment the reader count & wait until the writer releases the lock. + */ +static __always_inline void +rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts) +{ + while ((cnts & _QW_WMASK) == _QW_LOCKED) { + arch_mutex_cpu_relax(); + cnts = smp_load_acquire((u32 *)&lock->cnts); + } +} + +/** + * queue_read_lock_slowpath - acquire read lock of a queue rwlock + * @lock: Pointer to queue rwlock structure + */ +void queue_read_lock_slowpath(struct qrwlock *lock) +{ + u32 cnts; + + /* + * Readers come here when they cannot get the lock without waiting + */ + if (unlikely(in_interrupt())) { + /* + * Readers in interrupt context will spin until the lock is + * available without waiting in the queue. + */ + cnts = smp_load_acquire((u32 *)&lock->cnts); + rspin_until_writer_unlock(lock, cnts); + return; + } + atomic_sub(_QR_BIAS, &lock->cnts); + + /* + * Put the reader into the wait queue + */ + arch_spin_lock(&lock->lock); + + /* + * At the head of the wait queue now, wait until the writer state + * goes to 0 and then try to increment the reader count and get + * the lock. It is possible that an incoming writer may steal the + * lock in the interim, so it is necessary to check the writer byte + * to make sure that the write lock isn't taken. + */ + while (atomic_read(&lock->cnts) & _QW_WMASK) + arch_mutex_cpu_relax(); + + cnts = atomic_add_return(_QR_BIAS, &lock->cnts) - _QR_BIAS; + rspin_until_writer_unlock(lock, cnts); + + /* + * Signal the next one in queue to become queue head + */ + arch_spin_unlock(&lock->lock); +} +EXPORT_SYMBOL(queue_read_lock_slowpath); + +/** + * queue_write_lock_slowpath - acquire write lock of a queue rwlock + * @lock : Pointer to queue rwlock structure + */ +void queue_write_lock_slowpath(struct qrwlock *lock) +{ + u32 cnts; + + /* Put the writer into the wait queue */ + arch_spin_lock(&lock->lock); + + /* Try to acquire the lock directly if no reader is present */ + if (!atomic_read(&lock->cnts) && + (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0)) + goto unlock; + + /* + * Set the waiting flag to notify readers that a writer is pending, + * or wait for a previous writer to go away. + */ + for (;;) { + cnts = atomic_read(&lock->cnts); + if (!(cnts & _QW_WMASK) && + (atomic_cmpxchg(&lock->cnts, cnts, + cnts | _QW_WAITING) == cnts)) + break; + + arch_mutex_cpu_relax(); + } + + /* When no more readers, set the locked flag */ + for (;;) { + cnts = atomic_read(&lock->cnts); + if ((cnts == _QW_WAITING) && + (atomic_cmpxchg(&lock->cnts, _QW_WAITING, + _QW_LOCKED) == _QW_WAITING)) + break; + + arch_mutex_cpu_relax(); + } +unlock: + arch_spin_unlock(&lock->lock); +} +EXPORT_SYMBOL(queue_write_lock_slowpath); diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index b4219ff87b8..dacc32142fc 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -5,11 +5,17 @@ * * Writer lock-stealing by Alex Shi <alex.shi@intel.com> * and Michel Lespinasse <walken@google.com> + * + * Optimistic spinning by Tim Chen <tim.c.chen@intel.com> + * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes. */ #include <linux/rwsem.h> #include <linux/sched.h> #include <linux/init.h> #include <linux/export.h> +#include <linux/sched/rt.h> + +#include "mcs_spinlock.h" /* * Guide to the rw_semaphore's count field for common values. @@ -76,6 +82,10 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, sem->count = RWSEM_UNLOCKED_VALUE; raw_spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); +#ifdef CONFIG_SMP + sem->owner = NULL; + sem->osq = NULL; +#endif } EXPORT_SYMBOL(__init_rwsem); @@ -190,7 +200,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type) } /* - * wait for the read lock to be granted + * Wait for the read lock to be granted */ __visible struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) @@ -237,64 +247,221 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) return sem; } +static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem) +{ + if (!(count & RWSEM_ACTIVE_MASK)) { + /* try acquiring the write lock */ + if (sem->count == RWSEM_WAITING_BIAS && + cmpxchg(&sem->count, RWSEM_WAITING_BIAS, + RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) { + if (!list_is_singular(&sem->wait_list)) + rwsem_atomic_update(RWSEM_WAITING_BIAS, sem); + return true; + } + } + return false; +} + +#ifdef CONFIG_SMP /* - * wait until we successfully acquire the write lock + * Try to acquire write lock before the writer has been put on wait queue. + */ +static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) +{ + long old, count = ACCESS_ONCE(sem->count); + + while (true) { + if (!(count == 0 || count == RWSEM_WAITING_BIAS)) + return false; + + old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS); + if (old == count) + return true; + + count = old; + } +} + +static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) +{ + struct task_struct *owner; + bool on_cpu = true; + + if (need_resched()) + return 0; + + rcu_read_lock(); + owner = ACCESS_ONCE(sem->owner); + if (owner) + on_cpu = owner->on_cpu; + rcu_read_unlock(); + + /* + * If sem->owner is not set, the rwsem owner may have + * just acquired it and not set the owner yet or the rwsem + * has been released. + */ + return on_cpu; +} + +static inline bool owner_running(struct rw_semaphore *sem, + struct task_struct *owner) +{ + if (sem->owner != owner) + return false; + + /* + * Ensure we emit the owner->on_cpu, dereference _after_ checking + * sem->owner still matches owner, if that fails, owner might + * point to free()d memory, if it still matches, the rcu_read_lock() + * ensures the memory stays valid. + */ + barrier(); + + return owner->on_cpu; +} + +static noinline +bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner) +{ + rcu_read_lock(); + while (owner_running(sem, owner)) { + if (need_resched()) + break; + + arch_mutex_cpu_relax(); + } + rcu_read_unlock(); + + /* + * We break out the loop above on need_resched() or when the + * owner changed, which is a sign for heavy contention. Return + * success only when sem->owner is NULL. + */ + return sem->owner == NULL; +} + +static bool rwsem_optimistic_spin(struct rw_semaphore *sem) +{ + struct task_struct *owner; + bool taken = false; + + preempt_disable(); + + /* sem->wait_lock should not be held when doing optimistic spinning */ + if (!rwsem_can_spin_on_owner(sem)) + goto done; + + if (!osq_lock(&sem->osq)) + goto done; + + while (true) { + owner = ACCESS_ONCE(sem->owner); + if (owner && !rwsem_spin_on_owner(sem, owner)) + break; + + /* wait_lock will be acquired if write_lock is obtained */ + if (rwsem_try_write_lock_unqueued(sem)) { + taken = true; + break; + } + + /* + * When there's no owner, we might have preempted between the + * owner acquiring the lock and setting the owner field. If + * we're an RT task that will live-lock because we won't let + * the owner complete. + */ + if (!owner && (need_resched() || rt_task(current))) + break; + + /* + * The cpu_relax() call is a compiler barrier which forces + * everything in this loop to be re-loaded. We don't need + * memory barriers as we'll eventually observe the right + * values at the cost of a few extra spins. + */ + arch_mutex_cpu_relax(); + } + osq_unlock(&sem->osq); +done: + preempt_enable(); + return taken; +} + +#else +static bool rwsem_optimistic_spin(struct rw_semaphore *sem) +{ + return false; +} +#endif + +/* + * Wait until we successfully acquire the write lock */ __visible struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) { - long count, adjustment = -RWSEM_ACTIVE_WRITE_BIAS; + long count; + bool waiting = true; /* any queued threads before us */ struct rwsem_waiter waiter; - struct task_struct *tsk = current; - /* set up my own style of waitqueue */ - waiter.task = tsk; + /* undo write bias from down_write operation, stop active locking */ + count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem); + + /* do optimistic spinning and steal lock if possible */ + if (rwsem_optimistic_spin(sem)) + return sem; + + /* + * Optimistic spinning failed, proceed to the slowpath + * and block until we can acquire the sem. + */ + waiter.task = current; waiter.type = RWSEM_WAITING_FOR_WRITE; raw_spin_lock_irq(&sem->wait_lock); + + /* account for this before adding a new element to the list */ if (list_empty(&sem->wait_list)) - adjustment += RWSEM_WAITING_BIAS; + waiting = false; + list_add_tail(&waiter.list, &sem->wait_list); /* we're now waiting on the lock, but no longer actively locking */ - count = rwsem_atomic_update(adjustment, sem); + if (waiting) { + count = ACCESS_ONCE(sem->count); - /* If there were already threads queued before us and there are no - * active writers, the lock must be read owned; so we try to wake - * any read locks that were queued ahead of us. */ - if (count > RWSEM_WAITING_BIAS && - adjustment == -RWSEM_ACTIVE_WRITE_BIAS) - sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS); + /* + * If there were already threads queued before us and there are + * no active writers, the lock must be read owned; so we try to + * wake any read locks that were queued ahead of us. + */ + if (count > RWSEM_WAITING_BIAS) + sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS); + + } else + count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem); /* wait until we successfully acquire the lock */ - set_task_state(tsk, TASK_UNINTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); while (true) { - if (!(count & RWSEM_ACTIVE_MASK)) { - /* Try acquiring the write lock. */ - count = RWSEM_ACTIVE_WRITE_BIAS; - if (!list_is_singular(&sem->wait_list)) - count += RWSEM_WAITING_BIAS; - - if (sem->count == RWSEM_WAITING_BIAS && - cmpxchg(&sem->count, RWSEM_WAITING_BIAS, count) == - RWSEM_WAITING_BIAS) - break; - } - + if (rwsem_try_write_lock(count, sem)) + break; raw_spin_unlock_irq(&sem->wait_lock); /* Block until there are no active lockers. */ do { schedule(); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); } while ((count = sem->count) & RWSEM_ACTIVE_MASK); raw_spin_lock_irq(&sem->wait_lock); } + __set_current_state(TASK_RUNNING); list_del(&waiter.list); raw_spin_unlock_irq(&sem->wait_lock); - tsk->state = TASK_RUNNING; return sem; } diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index cfff1435bdf..42f806de49d 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -12,6 +12,27 @@ #include <linux/atomic.h> +#if defined(CONFIG_SMP) && defined(CONFIG_RWSEM_XCHGADD_ALGORITHM) +static inline void rwsem_set_owner(struct rw_semaphore *sem) +{ + sem->owner = current; +} + +static inline void rwsem_clear_owner(struct rw_semaphore *sem) +{ + sem->owner = NULL; +} + +#else +static inline void rwsem_set_owner(struct rw_semaphore *sem) +{ +} + +static inline void rwsem_clear_owner(struct rw_semaphore *sem) +{ +} +#endif + /* * lock for reading */ @@ -48,6 +69,7 @@ void __sched down_write(struct rw_semaphore *sem) rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); LOCK_CONTENDED(sem, __down_write_trylock, __down_write); + rwsem_set_owner(sem); } EXPORT_SYMBOL(down_write); @@ -59,8 +81,11 @@ int down_write_trylock(struct rw_semaphore *sem) { int ret = __down_write_trylock(sem); - if (ret == 1) + if (ret == 1) { rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_); + rwsem_set_owner(sem); + } + return ret; } @@ -85,6 +110,7 @@ void up_write(struct rw_semaphore *sem) { rwsem_release(&sem->dep_map, 1, _RET_IP_); + rwsem_clear_owner(sem); __up_write(sem); } @@ -99,6 +125,7 @@ void downgrade_write(struct rw_semaphore *sem) * lockdep: a downgraded write will live on as a write * dependency. */ + rwsem_clear_owner(sem); __downgrade_write(sem); } @@ -122,6 +149,7 @@ void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest) rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_); LOCK_CONTENDED(sem, __down_write_trylock, __down_write); + rwsem_set_owner(sem); } EXPORT_SYMBOL(_down_write_nest_lock); @@ -141,6 +169,7 @@ void down_write_nested(struct rw_semaphore *sem, int subclass) rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); LOCK_CONTENDED(sem, __down_write_trylock, __down_write); + rwsem_set_owner(sem); } EXPORT_SYMBOL(down_write_nested); diff --git a/kernel/notifier.c b/kernel/notifier.c index db4c8b08a50..4803da6eab6 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -71,9 +71,9 @@ static int notifier_chain_unregister(struct notifier_block **nl, * @returns: notifier_call_chain returns the value returned by the * last notifier function called. */ -static int __kprobes notifier_call_chain(struct notifier_block **nl, - unsigned long val, void *v, - int nr_to_call, int *nr_calls) +static int notifier_call_chain(struct notifier_block **nl, + unsigned long val, void *v, + int nr_to_call, int *nr_calls) { int ret = NOTIFY_DONE; struct notifier_block *nb, *next_nb; @@ -102,6 +102,7 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl, } return ret; } +NOKPROBE_SYMBOL(notifier_call_chain); /* * Atomic notifier chain routines. Registration and unregistration @@ -172,9 +173,9 @@ EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister); * Otherwise the return value is the return value * of the last notifier function called. */ -int __kprobes __atomic_notifier_call_chain(struct atomic_notifier_head *nh, - unsigned long val, void *v, - int nr_to_call, int *nr_calls) +int __atomic_notifier_call_chain(struct atomic_notifier_head *nh, + unsigned long val, void *v, + int nr_to_call, int *nr_calls) { int ret; @@ -184,13 +185,15 @@ int __kprobes __atomic_notifier_call_chain(struct atomic_notifier_head *nh, return ret; } EXPORT_SYMBOL_GPL(__atomic_notifier_call_chain); +NOKPROBE_SYMBOL(__atomic_notifier_call_chain); -int __kprobes atomic_notifier_call_chain(struct atomic_notifier_head *nh, - unsigned long val, void *v) +int atomic_notifier_call_chain(struct atomic_notifier_head *nh, + unsigned long val, void *v) { return __atomic_notifier_call_chain(nh, val, v, -1, NULL); } EXPORT_SYMBOL_GPL(atomic_notifier_call_chain); +NOKPROBE_SYMBOL(atomic_notifier_call_chain); /* * Blocking notifier chain routines. All access to the chain is @@ -527,7 +530,7 @@ EXPORT_SYMBOL_GPL(srcu_init_notifier_head); static ATOMIC_NOTIFIER_HEAD(die_chain); -int notrace __kprobes notify_die(enum die_val val, const char *str, +int notrace notify_die(enum die_val val, const char *str, struct pt_regs *regs, long err, int trap, int sig) { struct die_args args = { @@ -540,6 +543,7 @@ int notrace __kprobes notify_die(enum die_val val, const char *str, }; return atomic_notifier_call_chain(&die_chain, val, &args); } +NOKPROBE_SYMBOL(notify_die); int register_die_notifier(struct notifier_block *nb) { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c6b98793d64..3bdf01b494f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -535,7 +535,7 @@ static inline void init_hrtick(void) __old; \ }) -#ifdef TIF_POLLING_NRFLAG +#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG) /* * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG, * this avoids any races wrt polling state changes and thereby avoids @@ -546,12 +546,44 @@ static bool set_nr_and_not_polling(struct task_struct *p) struct thread_info *ti = task_thread_info(p); return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG); } + +/* + * Atomically set TIF_NEED_RESCHED if TIF_POLLING_NRFLAG is set. + * + * If this returns true, then the idle task promises to call + * sched_ttwu_pending() and reschedule soon. + */ +static bool set_nr_if_polling(struct task_struct *p) +{ + struct thread_info *ti = task_thread_info(p); + typeof(ti->flags) old, val = ACCESS_ONCE(ti->flags); + + for (;;) { + if (!(val & _TIF_POLLING_NRFLAG)) + return false; + if (val & _TIF_NEED_RESCHED) + return true; + old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED); + if (old == val) + break; + val = old; + } + return true; +} + #else static bool set_nr_and_not_polling(struct task_struct *p) { set_tsk_need_resched(p); return true; } + +#ifdef CONFIG_SMP +static bool set_nr_if_polling(struct task_struct *p) +{ + return false; +} +#endif #endif /* @@ -580,6 +612,8 @@ void resched_task(struct task_struct *p) if (set_nr_and_not_polling(p)) smp_send_reschedule(cpu); + else + trace_sched_wake_idle_without_ipi(cpu); } void resched_cpu(int cpu) @@ -642,27 +676,10 @@ static void wake_up_idle_cpu(int cpu) if (cpu == smp_processor_id()) return; - /* - * This is safe, as this function is called with the timer - * wheel base lock of (cpu) held. When the CPU is on the way - * to idle and has not yet set rq->curr to idle then it will - * be serialized on the timer wheel base lock and take the new - * timer into account automatically. - */ - if (rq->curr != rq->idle) - return; - - /* - * We can set TIF_RESCHED on the idle task of the other CPU - * lockless. The worst case is that the other CPU runs the - * idle task through an additional NOOP schedule() - */ - set_tsk_need_resched(rq->idle); - - /* NEED_RESCHED must be visible before we test polling */ - smp_mb(); - if (!tsk_is_polling(rq->idle)) + if (set_nr_and_not_polling(rq->idle)) smp_send_reschedule(cpu); + else + trace_sched_wake_idle_without_ipi(cpu); } static bool wake_up_full_nohz_cpu(int cpu) @@ -888,7 +905,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) rq->clock_task += delta; #if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING) - if ((irq_delta + steal) && sched_feat(NONTASK_POWER)) + if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY)) sched_rt_avg_update(rq, irq_delta + steal); #endif } @@ -1521,13 +1538,17 @@ static int ttwu_remote(struct task_struct *p, int wake_flags) } #ifdef CONFIG_SMP -static void sched_ttwu_pending(void) +void sched_ttwu_pending(void) { struct rq *rq = this_rq(); struct llist_node *llist = llist_del_all(&rq->wake_list); struct task_struct *p; + unsigned long flags; - raw_spin_lock(&rq->lock); + if (!llist) + return; + + raw_spin_lock_irqsave(&rq->lock, flags); while (llist) { p = llist_entry(llist, struct task_struct, wake_entry); @@ -1535,7 +1556,7 @@ static void sched_ttwu_pending(void) ttwu_do_activate(rq, p, 0); } - raw_spin_unlock(&rq->lock); + raw_spin_unlock_irqrestore(&rq->lock, flags); } void scheduler_ipi(void) @@ -1581,8 +1602,14 @@ void scheduler_ipi(void) static void ttwu_queue_remote(struct task_struct *p, int cpu) { - if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list)) - smp_send_reschedule(cpu); + struct rq *rq = cpu_rq(cpu); + + if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list)) { + if (!set_nr_if_polling(rq->idle)) + smp_send_reschedule(cpu); + else + trace_sched_wake_idle_without_ipi(cpu); + } } bool cpus_share_cache(int this_cpu, int that_cpu) @@ -2527,7 +2554,7 @@ notrace unsigned long get_parent_ip(unsigned long addr) #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ defined(CONFIG_PREEMPT_TRACER)) -void __kprobes preempt_count_add(int val) +void preempt_count_add(int val) { #ifdef CONFIG_DEBUG_PREEMPT /* @@ -2553,8 +2580,9 @@ void __kprobes preempt_count_add(int val) } } EXPORT_SYMBOL(preempt_count_add); +NOKPROBE_SYMBOL(preempt_count_add); -void __kprobes preempt_count_sub(int val) +void preempt_count_sub(int val) { #ifdef CONFIG_DEBUG_PREEMPT /* @@ -2575,6 +2603,7 @@ void __kprobes preempt_count_sub(int val) __preempt_count_sub(val); } EXPORT_SYMBOL(preempt_count_sub); +NOKPROBE_SYMBOL(preempt_count_sub); #endif @@ -2857,6 +2886,7 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) barrier(); } while (need_resched()); } +NOKPROBE_SYMBOL(preempt_schedule); EXPORT_SYMBOL(preempt_schedule); #endif /* CONFIG_PREEMPT */ @@ -4216,7 +4246,7 @@ EXPORT_SYMBOL(yield); * false (0) if we failed to boost the target. * -ESRCH if there's no task to yield to. */ -bool __sched yield_to(struct task_struct *p, bool preempt) +int __sched yield_to(struct task_struct *p, bool preempt) { struct task_struct *curr = current; struct rq *rq, *p_rq; @@ -5242,14 +5272,13 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, } /* - * Even though we initialize ->power to something semi-sane, - * we leave power_orig unset. This allows us to detect if + * Even though we initialize ->capacity to something semi-sane, + * we leave capacity_orig unset. This allows us to detect if * domain iteration is still funny without causing /0 traps. */ - if (!group->sgp->power_orig) { + if (!group->sgc->capacity_orig) { printk(KERN_CONT "\n"); - printk(KERN_ERR "ERROR: domain->cpu_power not " - "set\n"); + printk(KERN_ERR "ERROR: domain->cpu_capacity not set\n"); break; } @@ -5271,9 +5300,9 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); printk(KERN_CONT " %s", str); - if (group->sgp->power != SCHED_POWER_SCALE) { - printk(KERN_CONT " (cpu_power = %d)", - group->sgp->power); + if (group->sgc->capacity != SCHED_CAPACITY_SCALE) { + printk(KERN_CONT " (cpu_capacity = %d)", + group->sgc->capacity); } group = group->next; @@ -5331,7 +5360,7 @@ static int sd_degenerate(struct sched_domain *sd) SD_BALANCE_NEWIDLE | SD_BALANCE_FORK | SD_BALANCE_EXEC | - SD_SHARE_CPUPOWER | + SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES | SD_SHARE_POWERDOMAIN)) { if (sd->groups != sd->groups->next) @@ -5362,7 +5391,7 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) SD_BALANCE_NEWIDLE | SD_BALANCE_FORK | SD_BALANCE_EXEC | - SD_SHARE_CPUPOWER | + SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES | SD_PREFER_SIBLING | SD_SHARE_POWERDOMAIN); @@ -5487,7 +5516,7 @@ static struct root_domain *alloc_rootdomain(void) return rd; } -static void free_sched_groups(struct sched_group *sg, int free_sgp) +static void free_sched_groups(struct sched_group *sg, int free_sgc) { struct sched_group *tmp, *first; @@ -5498,8 +5527,8 @@ static void free_sched_groups(struct sched_group *sg, int free_sgp) do { tmp = sg->next; - if (free_sgp && atomic_dec_and_test(&sg->sgp->ref)) - kfree(sg->sgp); + if (free_sgc && atomic_dec_and_test(&sg->sgc->ref)) + kfree(sg->sgc); kfree(sg); sg = tmp; @@ -5517,7 +5546,7 @@ static void free_sched_domain(struct rcu_head *rcu) if (sd->flags & SD_OVERLAP) { free_sched_groups(sd->groups, 1); } else if (atomic_dec_and_test(&sd->groups->ref)) { - kfree(sd->groups->sgp); + kfree(sd->groups->sgc); kfree(sd->groups); } kfree(sd); @@ -5728,17 +5757,17 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) cpumask_or(covered, covered, sg_span); - sg->sgp = *per_cpu_ptr(sdd->sgp, i); - if (atomic_inc_return(&sg->sgp->ref) == 1) + sg->sgc = *per_cpu_ptr(sdd->sgc, i); + if (atomic_inc_return(&sg->sgc->ref) == 1) build_group_mask(sd, sg); /* - * Initialize sgp->power such that even if we mess up the + * Initialize sgc->capacity such that even if we mess up the * domains and no possible iteration will get us here, we won't * die on a /0 trap. */ - sg->sgp->power = SCHED_POWER_SCALE * cpumask_weight(sg_span); - sg->sgp->power_orig = sg->sgp->power; + sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span); + sg->sgc->capacity_orig = sg->sgc->capacity; /* * Make sure the first group of this domain contains the @@ -5776,8 +5805,8 @@ static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg) if (sg) { *sg = *per_cpu_ptr(sdd->sg, cpu); - (*sg)->sgp = *per_cpu_ptr(sdd->sgp, cpu); - atomic_set(&(*sg)->sgp->ref, 1); /* for claim_allocations */ + (*sg)->sgc = *per_cpu_ptr(sdd->sgc, cpu); + atomic_set(&(*sg)->sgc->ref, 1); /* for claim_allocations */ } return cpu; @@ -5786,7 +5815,7 @@ static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg) /* * build_sched_groups will build a circular linked list of the groups * covered by the given span, and will set each group's ->cpumask correctly, - * and ->cpu_power to 0. + * and ->cpu_capacity to 0. * * Assumes the sched_domain tree is fully constructed */ @@ -5840,16 +5869,16 @@ build_sched_groups(struct sched_domain *sd, int cpu) } /* - * Initialize sched groups cpu_power. + * Initialize sched groups cpu_capacity. * - * cpu_power indicates the capacity of sched group, which is used while + * cpu_capacity indicates the capacity of sched group, which is used while * distributing the load between different sched groups in a sched domain. - * Typically cpu_power for all the groups in a sched domain will be same unless - * there are asymmetries in the topology. If there are asymmetries, group - * having more cpu_power will pickup more load compared to the group having - * less cpu_power. + * Typically cpu_capacity for all the groups in a sched domain will be same + * unless there are asymmetries in the topology. If there are asymmetries, + * group having more cpu_capacity will pickup more load compared to the + * group having less cpu_capacity. */ -static void init_sched_groups_power(int cpu, struct sched_domain *sd) +static void init_sched_groups_capacity(int cpu, struct sched_domain *sd) { struct sched_group *sg = sd->groups; @@ -5863,8 +5892,8 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) if (cpu != group_balance_cpu(sg)) return; - update_group_power(sd, cpu); - atomic_set(&sg->sgp->nr_busy_cpus, sg->group_weight); + update_group_capacity(sd, cpu); + atomic_set(&sg->sgc->nr_busy_cpus, sg->group_weight); } /* @@ -5955,8 +5984,8 @@ static void claim_allocations(int cpu, struct sched_domain *sd) if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref)) *per_cpu_ptr(sdd->sg, cpu) = NULL; - if (atomic_read(&(*per_cpu_ptr(sdd->sgp, cpu))->ref)) - *per_cpu_ptr(sdd->sgp, cpu) = NULL; + if (atomic_read(&(*per_cpu_ptr(sdd->sgc, cpu))->ref)) + *per_cpu_ptr(sdd->sgc, cpu) = NULL; } #ifdef CONFIG_NUMA @@ -5969,7 +5998,7 @@ static int sched_domains_curr_level; /* * SD_flags allowed in topology descriptions. * - * SD_SHARE_CPUPOWER - describes SMT topologies + * SD_SHARE_CPUCAPACITY - describes SMT topologies * SD_SHARE_PKG_RESOURCES - describes shared caches * SD_NUMA - describes NUMA topologies * SD_SHARE_POWERDOMAIN - describes shared power domain @@ -5978,7 +6007,7 @@ static int sched_domains_curr_level; * SD_ASYM_PACKING - describes SMT quirks */ #define TOPOLOGY_SD_FLAGS \ - (SD_SHARE_CPUPOWER | \ + (SD_SHARE_CPUCAPACITY | \ SD_SHARE_PKG_RESOURCES | \ SD_NUMA | \ SD_ASYM_PACKING | \ @@ -6024,7 +6053,7 @@ sd_init(struct sched_domain_topology_level *tl, int cpu) | 1*SD_BALANCE_FORK | 0*SD_BALANCE_WAKE | 1*SD_WAKE_AFFINE - | 0*SD_SHARE_CPUPOWER + | 0*SD_SHARE_CPUCAPACITY | 0*SD_SHARE_PKG_RESOURCES | 0*SD_SERIALIZE | 0*SD_PREFER_SIBLING @@ -6046,7 +6075,7 @@ sd_init(struct sched_domain_topology_level *tl, int cpu) * Convert topological properties into behaviour. */ - if (sd->flags & SD_SHARE_CPUPOWER) { + if (sd->flags & SD_SHARE_CPUCAPACITY) { sd->imbalance_pct = 110; sd->smt_gain = 1178; /* ~15% */ @@ -6358,14 +6387,14 @@ static int __sdt_alloc(const struct cpumask *cpu_map) if (!sdd->sg) return -ENOMEM; - sdd->sgp = alloc_percpu(struct sched_group_power *); - if (!sdd->sgp) + sdd->sgc = alloc_percpu(struct sched_group_capacity *); + if (!sdd->sgc) return -ENOMEM; for_each_cpu(j, cpu_map) { struct sched_domain *sd; struct sched_group *sg; - struct sched_group_power *sgp; + struct sched_group_capacity *sgc; sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(), GFP_KERNEL, cpu_to_node(j)); @@ -6383,12 +6412,12 @@ static int __sdt_alloc(const struct cpumask *cpu_map) *per_cpu_ptr(sdd->sg, j) = sg; - sgp = kzalloc_node(sizeof(struct sched_group_power) + cpumask_size(), + sgc = kzalloc_node(sizeof(struct sched_group_capacity) + cpumask_size(), GFP_KERNEL, cpu_to_node(j)); - if (!sgp) + if (!sgc) return -ENOMEM; - *per_cpu_ptr(sdd->sgp, j) = sgp; + *per_cpu_ptr(sdd->sgc, j) = sgc; } } @@ -6415,15 +6444,15 @@ static void __sdt_free(const struct cpumask *cpu_map) if (sdd->sg) kfree(*per_cpu_ptr(sdd->sg, j)); - if (sdd->sgp) - kfree(*per_cpu_ptr(sdd->sgp, j)); + if (sdd->sgc) + kfree(*per_cpu_ptr(sdd->sgc, j)); } free_percpu(sdd->sd); sdd->sd = NULL; free_percpu(sdd->sg); sdd->sg = NULL; - free_percpu(sdd->sgp); - sdd->sgp = NULL; + free_percpu(sdd->sgc); + sdd->sgc = NULL; } } @@ -6493,14 +6522,14 @@ static int build_sched_domains(const struct cpumask *cpu_map, } } - /* Calculate CPU power for physical packages and nodes */ + /* Calculate CPU capacity for physical packages and nodes */ for (i = nr_cpumask_bits-1; i >= 0; i--) { if (!cpumask_test_cpu(i, cpu_map)) continue; for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { claim_allocations(i, sd); - init_sched_groups_power(i, sd); + init_sched_groups_capacity(i, sd); } } @@ -6943,7 +6972,7 @@ void __init sched_init(void) #ifdef CONFIG_SMP rq->sd = NULL; rq->rd = NULL; - rq->cpu_power = SCHED_POWER_SCALE; + rq->cpu_capacity = SCHED_CAPACITY_SCALE; rq->post_schedule = 0; rq->active_balance = 0; rq->next_balance = jiffies; diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 2b8cbf09d1a..fc4f98b1258 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -57,8 +57,6 @@ void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime) dl_b->dl_runtime = runtime; } -extern unsigned long to_ratio(u64 period, u64 runtime); - void init_dl_bw(struct dl_bw *dl_b) { raw_spin_lock_init(&dl_b->lock); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 9855e87d671..fea7d3335e1 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1017,7 +1017,7 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page, static unsigned long weighted_cpuload(const int cpu); static unsigned long source_load(int cpu, int type); static unsigned long target_load(int cpu, int type); -static unsigned long power_of(int cpu); +static unsigned long capacity_of(int cpu); static long effective_load(struct task_group *tg, int cpu, long wl, long wg); /* Cached statistics for all CPUs within a node */ @@ -1026,11 +1026,11 @@ struct numa_stats { unsigned long load; /* Total compute capacity of CPUs on a node */ - unsigned long power; + unsigned long compute_capacity; /* Approximate capacity in terms of runnable tasks on a node */ - unsigned long capacity; - int has_capacity; + unsigned long task_capacity; + int has_free_capacity; }; /* @@ -1046,7 +1046,7 @@ static void update_numa_stats(struct numa_stats *ns, int nid) ns->nr_running += rq->nr_running; ns->load += weighted_cpuload(cpu); - ns->power += power_of(cpu); + ns->compute_capacity += capacity_of(cpu); cpus++; } @@ -1056,15 +1056,16 @@ static void update_numa_stats(struct numa_stats *ns, int nid) * the @ns structure is NULL'ed and task_numa_compare() will * not find this node attractive. * - * We'll either bail at !has_capacity, or we'll detect a huge imbalance - * and bail there. + * We'll either bail at !has_free_capacity, or we'll detect a huge + * imbalance and bail there. */ if (!cpus) return; - ns->load = (ns->load * SCHED_POWER_SCALE) / ns->power; - ns->capacity = DIV_ROUND_CLOSEST(ns->power, SCHED_POWER_SCALE); - ns->has_capacity = (ns->nr_running < ns->capacity); + ns->load = (ns->load * SCHED_CAPACITY_SCALE) / ns->compute_capacity; + ns->task_capacity = + DIV_ROUND_CLOSEST(ns->compute_capacity, SCHED_CAPACITY_SCALE); + ns->has_free_capacity = (ns->nr_running < ns->task_capacity); } struct task_numa_env { @@ -1195,8 +1196,8 @@ static void task_numa_compare(struct task_numa_env *env, if (!cur) { /* Is there capacity at our destination? */ - if (env->src_stats.has_capacity && - !env->dst_stats.has_capacity) + if (env->src_stats.has_free_capacity && + !env->dst_stats.has_free_capacity) goto unlock; goto balance; @@ -1213,7 +1214,7 @@ balance: orig_dst_load = env->dst_stats.load; orig_src_load = env->src_stats.load; - /* XXX missing power terms */ + /* XXX missing capacity terms */ load = task_h_load(env->p); dst_load = orig_dst_load + load; src_load = orig_src_load - load; @@ -1301,8 +1302,8 @@ static int task_numa_migrate(struct task_struct *p) groupimp = group_weight(p, env.dst_nid) - groupweight; update_numa_stats(&env.dst_stats, env.dst_nid); - /* If the preferred nid has capacity, try to use it. */ - if (env.dst_stats.has_capacity) + /* If the preferred nid has free capacity, try to use it. */ + if (env.dst_stats.has_free_capacity) task_numa_find_cpu(&env, taskimp, groupimp); /* No space available on the preferred nid. Look elsewhere. */ @@ -3225,10 +3226,12 @@ static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq) * has not truly expired. * * Fortunately we can check determine whether this the case by checking - * whether the global deadline has advanced. + * whether the global deadline has advanced. It is valid to compare + * cfs_b->runtime_expires without any locks since we only care about + * exact equality, so a partial write will still work. */ - if ((s64)(cfs_rq->runtime_expires - cfs_b->runtime_expires) >= 0) { + if (cfs_rq->runtime_expires != cfs_b->runtime_expires) { /* extend local deadline, drift is bounded above by 2 ticks */ cfs_rq->runtime_expires += TICK_NSEC; } else { @@ -3457,21 +3460,21 @@ next: static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) { u64 runtime, runtime_expires; - int idle = 1, throttled; + int throttled; - raw_spin_lock(&cfs_b->lock); /* no need to continue the timer with no bandwidth constraint */ if (cfs_b->quota == RUNTIME_INF) - goto out_unlock; + goto out_deactivate; throttled = !list_empty(&cfs_b->throttled_cfs_rq); - /* idle depends on !throttled (for the case of a large deficit) */ - idle = cfs_b->idle && !throttled; cfs_b->nr_periods += overrun; - /* if we're going inactive then everything else can be deferred */ - if (idle) - goto out_unlock; + /* + * idle depends on !throttled (for the case of a large deficit), and if + * we're going inactive then everything else can be deferred + */ + if (cfs_b->idle && !throttled) + goto out_deactivate; /* * if we have relooped after returning idle once, we need to update our @@ -3485,7 +3488,7 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) if (!throttled) { /* mark as potentially idle for the upcoming period */ cfs_b->idle = 1; - goto out_unlock; + return 0; } /* account preceding periods in which throttling occurred */ @@ -3525,12 +3528,12 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) * timer to remain active while there are any throttled entities.) */ cfs_b->idle = 0; -out_unlock: - if (idle) - cfs_b->timer_active = 0; - raw_spin_unlock(&cfs_b->lock); - return idle; + return 0; + +out_deactivate: + cfs_b->timer_active = 0; + return 1; } /* a cfs_rq won't donate quota below this amount */ @@ -3707,6 +3710,7 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) int overrun; int idle = 0; + raw_spin_lock(&cfs_b->lock); for (;;) { now = hrtimer_cb_get_time(timer); overrun = hrtimer_forward(timer, now, cfs_b->period); @@ -3716,6 +3720,7 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) idle = do_sched_cfs_period_timer(cfs_b, overrun); } + raw_spin_unlock(&cfs_b->lock); return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; } @@ -3775,8 +3780,6 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq) struct cfs_rq *cfs_rq; for_each_leaf_cfs_rq(rq, cfs_rq) { - struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); - if (!cfs_rq->runtime_enabled) continue; @@ -3784,7 +3787,7 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq) * clock_task is not advancing so we just need to make sure * there's some valid quota amount */ - cfs_rq->runtime_remaining = cfs_b->quota; + cfs_rq->runtime_remaining = 1; if (cfs_rq_throttled(cfs_rq)) unthrottle_cfs_rq(cfs_rq); } @@ -4041,9 +4044,9 @@ static unsigned long target_load(int cpu, int type) return max(rq->cpu_load[type-1], total); } -static unsigned long power_of(int cpu) +static unsigned long capacity_of(int cpu) { - return cpu_rq(cpu)->cpu_power; + return cpu_rq(cpu)->cpu_capacity; } static unsigned long cpu_avg_load_per_task(int cpu) @@ -4065,7 +4068,7 @@ static void record_wakee(struct task_struct *p) * about the boundary, really active task won't care * about the loss. */ - if (jiffies > current->wakee_flip_decay_ts + HZ) { + if (time_after(jiffies, current->wakee_flip_decay_ts + HZ)) { current->wakee_flips >>= 1; current->wakee_flip_decay_ts = jiffies; } @@ -4286,12 +4289,12 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) s64 this_eff_load, prev_eff_load; this_eff_load = 100; - this_eff_load *= power_of(prev_cpu); + this_eff_load *= capacity_of(prev_cpu); this_eff_load *= this_load + effective_load(tg, this_cpu, weight, weight); prev_eff_load = 100 + (sd->imbalance_pct - 100) / 2; - prev_eff_load *= power_of(this_cpu); + prev_eff_load *= capacity_of(this_cpu); prev_eff_load *= load + effective_load(tg, prev_cpu, 0, weight); balanced = this_eff_load <= prev_eff_load; @@ -4367,8 +4370,8 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, avg_load += load; } - /* Adjust by relative CPU power of the group */ - avg_load = (avg_load * SCHED_POWER_SCALE) / group->sgp->power; + /* Adjust by relative CPU capacity of the group */ + avg_load = (avg_load * SCHED_CAPACITY_SCALE) / group->sgc->capacity; if (local_group) { this_load = avg_load; @@ -4948,14 +4951,14 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp * * W'_i,n = (2^n - 1) / 2^n * W_i,n + 1 / 2^n * W_i,0 (3) * - * P_i is the cpu power (or compute capacity) of cpu i, typically it is the + * C_i is the compute capacity of cpu i, typically it is the * fraction of 'recent' time available for SCHED_OTHER task execution. But it * can also include other factors [XXX]. * * To achieve this balance we define a measure of imbalance which follows * directly from (1): * - * imb_i,j = max{ avg(W/P), W_i/P_i } - min{ avg(W/P), W_j/P_j } (4) + * imb_i,j = max{ avg(W/C), W_i/C_i } - min{ avg(W/C), W_j/C_j } (4) * * We them move tasks around to minimize the imbalance. In the continuous * function space it is obvious this converges, in the discrete case we get @@ -5530,13 +5533,13 @@ struct sg_lb_stats { unsigned long group_load; /* Total load over the CPUs of the group */ unsigned long sum_weighted_load; /* Weighted load of group's tasks */ unsigned long load_per_task; - unsigned long group_power; + unsigned long group_capacity; unsigned int sum_nr_running; /* Nr tasks running in the group */ - unsigned int group_capacity; + unsigned int group_capacity_factor; unsigned int idle_cpus; unsigned int group_weight; int group_imb; /* Is there an imbalance in the group ? */ - int group_has_capacity; /* Is there extra capacity in the group? */ + int group_has_free_capacity; #ifdef CONFIG_NUMA_BALANCING unsigned int nr_numa_running; unsigned int nr_preferred_running; @@ -5551,7 +5554,7 @@ struct sd_lb_stats { struct sched_group *busiest; /* Busiest group in this sd */ struct sched_group *local; /* Local group in this sd */ unsigned long total_load; /* Total load of all groups in sd */ - unsigned long total_pwr; /* Total power of all groups in sd */ + unsigned long total_capacity; /* Total capacity of all groups in sd */ unsigned long avg_load; /* Average load across all groups in sd */ struct sg_lb_stats busiest_stat;/* Statistics of the busiest group */ @@ -5570,7 +5573,7 @@ static inline void init_sd_lb_stats(struct sd_lb_stats *sds) .busiest = NULL, .local = NULL, .total_load = 0UL, - .total_pwr = 0UL, + .total_capacity = 0UL, .busiest_stat = { .avg_load = 0UL, }, @@ -5605,17 +5608,17 @@ static inline int get_sd_load_idx(struct sched_domain *sd, return load_idx; } -static unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu) +static unsigned long default_scale_capacity(struct sched_domain *sd, int cpu) { - return SCHED_POWER_SCALE; + return SCHED_CAPACITY_SCALE; } -unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu) +unsigned long __weak arch_scale_freq_capacity(struct sched_domain *sd, int cpu) { - return default_scale_freq_power(sd, cpu); + return default_scale_capacity(sd, cpu); } -static unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu) +static unsigned long default_scale_smt_capacity(struct sched_domain *sd, int cpu) { unsigned long weight = sd->span_weight; unsigned long smt_gain = sd->smt_gain; @@ -5625,12 +5628,12 @@ static unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu) return smt_gain; } -unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu) +unsigned long __weak arch_scale_smt_capacity(struct sched_domain *sd, int cpu) { - return default_scale_smt_power(sd, cpu); + return default_scale_smt_capacity(sd, cpu); } -static unsigned long scale_rt_power(int cpu) +static unsigned long scale_rt_capacity(int cpu) { struct rq *rq = cpu_rq(cpu); u64 total, available, age_stamp, avg; @@ -5650,71 +5653,71 @@ static unsigned long scale_rt_power(int cpu) total = sched_avg_period() + delta; if (unlikely(total < avg)) { - /* Ensures that power won't end up being negative */ + /* Ensures that capacity won't end up being negative */ available = 0; } else { available = total - avg; } - if (unlikely((s64)total < SCHED_POWER_SCALE)) - total = SCHED_POWER_SCALE; + if (unlikely((s64)total < SCHED_CAPACITY_SCALE)) + total = SCHED_CAPACITY_SCALE; - total >>= SCHED_POWER_SHIFT; + total >>= SCHED_CAPACITY_SHIFT; return div_u64(available, total); } -static void update_cpu_power(struct sched_domain *sd, int cpu) +static void update_cpu_capacity(struct sched_domain *sd, int cpu) { unsigned long weight = sd->span_weight; - unsigned long power = SCHED_POWER_SCALE; + unsigned long capacity = SCHED_CAPACITY_SCALE; struct sched_group *sdg = sd->groups; - if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) { - if (sched_feat(ARCH_POWER)) - power *= arch_scale_smt_power(sd, cpu); + if ((sd->flags & SD_SHARE_CPUCAPACITY) && weight > 1) { + if (sched_feat(ARCH_CAPACITY)) + capacity *= arch_scale_smt_capacity(sd, cpu); else - power *= default_scale_smt_power(sd, cpu); + capacity *= default_scale_smt_capacity(sd, cpu); - power >>= SCHED_POWER_SHIFT; + capacity >>= SCHED_CAPACITY_SHIFT; } - sdg->sgp->power_orig = power; + sdg->sgc->capacity_orig = capacity; - if (sched_feat(ARCH_POWER)) - power *= arch_scale_freq_power(sd, cpu); + if (sched_feat(ARCH_CAPACITY)) + capacity *= arch_scale_freq_capacity(sd, cpu); else - power *= default_scale_freq_power(sd, cpu); + capacity *= default_scale_capacity(sd, cpu); - power >>= SCHED_POWER_SHIFT; + capacity >>= SCHED_CAPACITY_SHIFT; - power *= scale_rt_power(cpu); - power >>= SCHED_POWER_SHIFT; + capacity *= scale_rt_capacity(cpu); + capacity >>= SCHED_CAPACITY_SHIFT; - if (!power) - power = 1; + if (!capacity) + capacity = 1; - cpu_rq(cpu)->cpu_power = power; - sdg->sgp->power = power; + cpu_rq(cpu)->cpu_capacity = capacity; + sdg->sgc->capacity = capacity; } -void update_group_power(struct sched_domain *sd, int cpu) +void update_group_capacity(struct sched_domain *sd, int cpu) { struct sched_domain *child = sd->child; struct sched_group *group, *sdg = sd->groups; - unsigned long power, power_orig; + unsigned long capacity, capacity_orig; unsigned long interval; interval = msecs_to_jiffies(sd->balance_interval); interval = clamp(interval, 1UL, max_load_balance_interval); - sdg->sgp->next_update = jiffies + interval; + sdg->sgc->next_update = jiffies + interval; if (!child) { - update_cpu_power(sd, cpu); + update_cpu_capacity(sd, cpu); return; } - power_orig = power = 0; + capacity_orig = capacity = 0; if (child->flags & SD_OVERLAP) { /* @@ -5723,31 +5726,31 @@ void update_group_power(struct sched_domain *sd, int cpu) */ for_each_cpu(cpu, sched_group_cpus(sdg)) { - struct sched_group_power *sgp; + struct sched_group_capacity *sgc; struct rq *rq = cpu_rq(cpu); /* - * build_sched_domains() -> init_sched_groups_power() + * build_sched_domains() -> init_sched_groups_capacity() * gets here before we've attached the domains to the * runqueues. * - * Use power_of(), which is set irrespective of domains - * in update_cpu_power(). + * Use capacity_of(), which is set irrespective of domains + * in update_cpu_capacity(). * - * This avoids power/power_orig from being 0 and + * This avoids capacity/capacity_orig from being 0 and * causing divide-by-zero issues on boot. * - * Runtime updates will correct power_orig. + * Runtime updates will correct capacity_orig. */ if (unlikely(!rq->sd)) { - power_orig += power_of(cpu); - power += power_of(cpu); + capacity_orig += capacity_of(cpu); + capacity += capacity_of(cpu); continue; } - sgp = rq->sd->groups->sgp; - power_orig += sgp->power_orig; - power += sgp->power; + sgc = rq->sd->groups->sgc; + capacity_orig += sgc->capacity_orig; + capacity += sgc->capacity; } } else { /* @@ -5757,14 +5760,14 @@ void update_group_power(struct sched_domain *sd, int cpu) group = child->groups; do { - power_orig += group->sgp->power_orig; - power += group->sgp->power; + capacity_orig += group->sgc->capacity_orig; + capacity += group->sgc->capacity; group = group->next; } while (group != child->groups); } - sdg->sgp->power_orig = power_orig; - sdg->sgp->power = power; + sdg->sgc->capacity_orig = capacity_orig; + sdg->sgc->capacity = capacity; } /* @@ -5778,15 +5781,15 @@ static inline int fix_small_capacity(struct sched_domain *sd, struct sched_group *group) { /* - * Only siblings can have significantly less than SCHED_POWER_SCALE + * Only siblings can have significantly less than SCHED_CAPACITY_SCALE */ - if (!(sd->flags & SD_SHARE_CPUPOWER)) + if (!(sd->flags & SD_SHARE_CPUCAPACITY)) return 0; /* - * If ~90% of the cpu_power is still there, we're good. + * If ~90% of the cpu_capacity is still there, we're good. */ - if (group->sgp->power * 32 > group->sgp->power_orig * 29) + if (group->sgc->capacity * 32 > group->sgc->capacity_orig * 29) return 1; return 0; @@ -5823,34 +5826,35 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group) static inline int sg_imbalanced(struct sched_group *group) { - return group->sgp->imbalance; + return group->sgc->imbalance; } /* - * Compute the group capacity. + * Compute the group capacity factor. * - * Avoid the issue where N*frac(smt_power) >= 1 creates 'phantom' cores by + * Avoid the issue where N*frac(smt_capacity) >= 1 creates 'phantom' cores by * first dividing out the smt factor and computing the actual number of cores - * and limit power unit capacity with that. + * and limit unit capacity with that. */ -static inline int sg_capacity(struct lb_env *env, struct sched_group *group) +static inline int sg_capacity_factor(struct lb_env *env, struct sched_group *group) { - unsigned int capacity, smt, cpus; - unsigned int power, power_orig; + unsigned int capacity_factor, smt, cpus; + unsigned int capacity, capacity_orig; - power = group->sgp->power; - power_orig = group->sgp->power_orig; + capacity = group->sgc->capacity; + capacity_orig = group->sgc->capacity_orig; cpus = group->group_weight; - /* smt := ceil(cpus / power), assumes: 1 < smt_power < 2 */ - smt = DIV_ROUND_UP(SCHED_POWER_SCALE * cpus, power_orig); - capacity = cpus / smt; /* cores */ + /* smt := ceil(cpus / capacity), assumes: 1 < smt_capacity < 2 */ + smt = DIV_ROUND_UP(SCHED_CAPACITY_SCALE * cpus, capacity_orig); + capacity_factor = cpus / smt; /* cores */ - capacity = min_t(unsigned, capacity, DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE)); - if (!capacity) - capacity = fix_small_capacity(env->sd, group); + capacity_factor = min_t(unsigned, + capacity_factor, DIV_ROUND_CLOSEST(capacity, SCHED_CAPACITY_SCALE)); + if (!capacity_factor) + capacity_factor = fix_small_capacity(env->sd, group); - return capacity; + return capacity_factor; } /** @@ -5890,9 +5894,9 @@ static inline void update_sg_lb_stats(struct lb_env *env, sgs->idle_cpus++; } - /* Adjust by relative CPU power of the group */ - sgs->group_power = group->sgp->power; - sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / sgs->group_power; + /* Adjust by relative CPU capacity of the group */ + sgs->group_capacity = group->sgc->capacity; + sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity; if (sgs->sum_nr_running) sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running; @@ -5900,10 +5904,10 @@ static inline void update_sg_lb_stats(struct lb_env *env, sgs->group_weight = group->group_weight; sgs->group_imb = sg_imbalanced(group); - sgs->group_capacity = sg_capacity(env, group); + sgs->group_capacity_factor = sg_capacity_factor(env, group); - if (sgs->group_capacity > sgs->sum_nr_running) - sgs->group_has_capacity = 1; + if (sgs->group_capacity_factor > sgs->sum_nr_running) + sgs->group_has_free_capacity = 1; } /** @@ -5927,7 +5931,7 @@ static bool update_sd_pick_busiest(struct lb_env *env, if (sgs->avg_load <= sds->busiest_stat.avg_load) return false; - if (sgs->sum_nr_running > sgs->group_capacity) + if (sgs->sum_nr_running > sgs->group_capacity_factor) return true; if (sgs->group_imb) @@ -6007,8 +6011,8 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd sgs = &sds->local_stat; if (env->idle != CPU_NEWLY_IDLE || - time_after_eq(jiffies, sg->sgp->next_update)) - update_group_power(env->sd, env->dst_cpu); + time_after_eq(jiffies, sg->sgc->next_update)) + update_group_capacity(env->sd, env->dst_cpu); } update_sg_lb_stats(env, sg, load_idx, local_group, sgs); @@ -6018,17 +6022,17 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd /* * In case the child domain prefers tasks go to siblings - * first, lower the sg capacity to one so that we'll try + * first, lower the sg capacity factor to one so that we'll try * and move all the excess tasks away. We lower the capacity * of a group only if the local group has the capacity to fit - * these excess tasks, i.e. nr_running < group_capacity. The + * these excess tasks, i.e. nr_running < group_capacity_factor. The * extra check prevents the case where you always pull from the * heaviest group when it is already under-utilized (possible * with a large weight task outweighs the tasks on the system). */ if (prefer_sibling && sds->local && - sds->local_stat.group_has_capacity) - sgs->group_capacity = min(sgs->group_capacity, 1U); + sds->local_stat.group_has_free_capacity) + sgs->group_capacity_factor = min(sgs->group_capacity_factor, 1U); if (update_sd_pick_busiest(env, sds, sg, sgs)) { sds->busiest = sg; @@ -6038,7 +6042,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd next_group: /* Now, start updating sd_lb_stats */ sds->total_load += sgs->group_load; - sds->total_pwr += sgs->group_power; + sds->total_capacity += sgs->group_capacity; sg = sg->next; } while (sg != env->sd->groups); @@ -6085,8 +6089,8 @@ static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds) return 0; env->imbalance = DIV_ROUND_CLOSEST( - sds->busiest_stat.avg_load * sds->busiest_stat.group_power, - SCHED_POWER_SCALE); + sds->busiest_stat.avg_load * sds->busiest_stat.group_capacity, + SCHED_CAPACITY_SCALE); return 1; } @@ -6101,7 +6105,7 @@ static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds) static inline void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds) { - unsigned long tmp, pwr_now = 0, pwr_move = 0; + unsigned long tmp, capa_now = 0, capa_move = 0; unsigned int imbn = 2; unsigned long scaled_busy_load_per_task; struct sg_lb_stats *local, *busiest; @@ -6115,8 +6119,8 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds) imbn = 1; scaled_busy_load_per_task = - (busiest->load_per_task * SCHED_POWER_SCALE) / - busiest->group_power; + (busiest->load_per_task * SCHED_CAPACITY_SCALE) / + busiest->group_capacity; if (busiest->avg_load + scaled_busy_load_per_task >= local->avg_load + (scaled_busy_load_per_task * imbn)) { @@ -6126,38 +6130,38 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds) /* * OK, we don't have enough imbalance to justify moving tasks, - * however we may be able to increase total CPU power used by + * however we may be able to increase total CPU capacity used by * moving them. */ - pwr_now += busiest->group_power * + capa_now += busiest->group_capacity * min(busiest->load_per_task, busiest->avg_load); - pwr_now += local->group_power * + capa_now += local->group_capacity * min(local->load_per_task, local->avg_load); - pwr_now /= SCHED_POWER_SCALE; + capa_now /= SCHED_CAPACITY_SCALE; /* Amount of load we'd subtract */ if (busiest->avg_load > scaled_busy_load_per_task) { - pwr_move += busiest->group_power * + capa_move += busiest->group_capacity * min(busiest->load_per_task, busiest->avg_load - scaled_busy_load_per_task); } /* Amount of load we'd add */ - if (busiest->avg_load * busiest->group_power < - busiest->load_per_task * SCHED_POWER_SCALE) { - tmp = (busiest->avg_load * busiest->group_power) / - local->group_power; + if (busiest->avg_load * busiest->group_capacity < + busiest->load_per_task * SCHED_CAPACITY_SCALE) { + tmp = (busiest->avg_load * busiest->group_capacity) / + local->group_capacity; } else { - tmp = (busiest->load_per_task * SCHED_POWER_SCALE) / - local->group_power; + tmp = (busiest->load_per_task * SCHED_CAPACITY_SCALE) / + local->group_capacity; } - pwr_move += local->group_power * + capa_move += local->group_capacity * min(local->load_per_task, local->avg_load + tmp); - pwr_move /= SCHED_POWER_SCALE; + capa_move /= SCHED_CAPACITY_SCALE; /* Move if we gain throughput */ - if (pwr_move > pwr_now) + if (capa_move > capa_now) env->imbalance = busiest->load_per_task; } @@ -6187,7 +6191,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s /* * In the presence of smp nice balancing, certain scenarios can have * max load less than avg load(as we skip the groups at or below - * its cpu_power, while calculating max_load..) + * its cpu_capacity, while calculating max_load..) */ if (busiest->avg_load <= sds->avg_load || local->avg_load >= sds->avg_load) { @@ -6202,10 +6206,10 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s * have to drop below capacity to reach cpu-load equilibrium. */ load_above_capacity = - (busiest->sum_nr_running - busiest->group_capacity); + (busiest->sum_nr_running - busiest->group_capacity_factor); - load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE); - load_above_capacity /= busiest->group_power; + load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_CAPACITY_SCALE); + load_above_capacity /= busiest->group_capacity; } /* @@ -6220,9 +6224,9 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s /* How much load to actually move to equalise the imbalance */ env->imbalance = min( - max_pull * busiest->group_power, - (sds->avg_load - local->avg_load) * local->group_power - ) / SCHED_POWER_SCALE; + max_pull * busiest->group_capacity, + (sds->avg_load - local->avg_load) * local->group_capacity + ) / SCHED_CAPACITY_SCALE; /* * if *imbalance is less than the average load per runnable task @@ -6276,7 +6280,8 @@ static struct sched_group *find_busiest_group(struct lb_env *env) if (!sds.busiest || busiest->sum_nr_running == 0) goto out_balanced; - sds.avg_load = (SCHED_POWER_SCALE * sds.total_load) / sds.total_pwr; + sds.avg_load = (SCHED_CAPACITY_SCALE * sds.total_load) + / sds.total_capacity; /* * If the busiest group is imbalanced the below checks don't @@ -6287,8 +6292,8 @@ static struct sched_group *find_busiest_group(struct lb_env *env) goto force_balance; /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */ - if (env->idle == CPU_NEWLY_IDLE && local->group_has_capacity && - !busiest->group_has_capacity) + if (env->idle == CPU_NEWLY_IDLE && local->group_has_free_capacity && + !busiest->group_has_free_capacity) goto force_balance; /* @@ -6342,11 +6347,11 @@ static struct rq *find_busiest_queue(struct lb_env *env, struct sched_group *group) { struct rq *busiest = NULL, *rq; - unsigned long busiest_load = 0, busiest_power = 1; + unsigned long busiest_load = 0, busiest_capacity = 1; int i; for_each_cpu_and(i, sched_group_cpus(group), env->cpus) { - unsigned long power, capacity, wl; + unsigned long capacity, capacity_factor, wl; enum fbq_type rt; rq = cpu_rq(i); @@ -6374,34 +6379,34 @@ static struct rq *find_busiest_queue(struct lb_env *env, if (rt > env->fbq_type) continue; - power = power_of(i); - capacity = DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE); - if (!capacity) - capacity = fix_small_capacity(env->sd, group); + capacity = capacity_of(i); + capacity_factor = DIV_ROUND_CLOSEST(capacity, SCHED_CAPACITY_SCALE); + if (!capacity_factor) + capacity_factor = fix_small_capacity(env->sd, group); wl = weighted_cpuload(i); /* * When comparing with imbalance, use weighted_cpuload() - * which is not scaled with the cpu power. + * which is not scaled with the cpu capacity. */ - if (capacity && rq->nr_running == 1 && wl > env->imbalance) + if (capacity_factor && rq->nr_running == 1 && wl > env->imbalance) continue; /* * For the load comparisons with the other cpu's, consider - * the weighted_cpuload() scaled with the cpu power, so that - * the load can be moved away from the cpu that is potentially - * running at a lower capacity. + * the weighted_cpuload() scaled with the cpu capacity, so + * that the load can be moved away from the cpu that is + * potentially running at a lower capacity. * - * Thus we're looking for max(wl_i / power_i), crosswise + * Thus we're looking for max(wl_i / capacity_i), crosswise * multiplication to rid ourselves of the division works out - * to: wl_i * power_j > wl_j * power_i; where j is our - * previous maximum. + * to: wl_i * capacity_j > wl_j * capacity_i; where j is + * our previous maximum. */ - if (wl * busiest_power > busiest_load * power) { + if (wl * busiest_capacity > busiest_load * capacity) { busiest_load = wl; - busiest_power = power; + busiest_capacity = capacity; busiest = rq; } } @@ -6609,7 +6614,7 @@ more_balance: * We failed to reach balance because of affinity. */ if (sd_parent) { - int *group_imbalance = &sd_parent->groups->sgp->imbalance; + int *group_imbalance = &sd_parent->groups->sgc->imbalance; if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0) { *group_imbalance = 1; @@ -6996,7 +7001,7 @@ static inline void set_cpu_sd_state_busy(void) goto unlock; sd->nohz_idle = 0; - atomic_inc(&sd->groups->sgp->nr_busy_cpus); + atomic_inc(&sd->groups->sgc->nr_busy_cpus); unlock: rcu_read_unlock(); } @@ -7013,7 +7018,7 @@ void set_cpu_sd_state_idle(void) goto unlock; sd->nohz_idle = 1; - atomic_dec(&sd->groups->sgp->nr_busy_cpus); + atomic_dec(&sd->groups->sgc->nr_busy_cpus); unlock: rcu_read_unlock(); } @@ -7192,12 +7197,17 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) rq = cpu_rq(balance_cpu); - raw_spin_lock_irq(&rq->lock); - update_rq_clock(rq); - update_idle_cpu_load(rq); - raw_spin_unlock_irq(&rq->lock); - - rebalance_domains(rq, CPU_IDLE); + /* + * If time for next balance is due, + * do the balance. + */ + if (time_after_eq(jiffies, rq->next_balance)) { + raw_spin_lock_irq(&rq->lock); + update_rq_clock(rq); + update_idle_cpu_load(rq); + raw_spin_unlock_irq(&rq->lock); + rebalance_domains(rq, CPU_IDLE); + } if (time_after(this_rq->next_balance, rq->next_balance)) this_rq->next_balance = rq->next_balance; @@ -7212,7 +7222,7 @@ end: * of an idle cpu is the system. * - This rq has more than one task. * - At any scheduler domain level, this cpu's scheduler group has multiple - * busy cpu's exceeding the group's power. + * busy cpu's exceeding the group's capacity. * - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler * domain span are idle. */ @@ -7220,7 +7230,7 @@ static inline int nohz_kick_needed(struct rq *rq) { unsigned long now = jiffies; struct sched_domain *sd; - struct sched_group_power *sgp; + struct sched_group_capacity *sgc; int nr_busy, cpu = rq->cpu; if (unlikely(rq->idle_balance)) @@ -7250,8 +7260,8 @@ static inline int nohz_kick_needed(struct rq *rq) sd = rcu_dereference(per_cpu(sd_busy, cpu)); if (sd) { - sgp = sd->groups->sgp; - nr_busy = atomic_read(&sgp->nr_busy_cpus); + sgc = sd->groups->sgc; + nr_busy = atomic_read(&sgc->nr_busy_cpus); if (nr_busy > 1) goto need_kick_unlock; diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 5716929a2e3..90284d117fe 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -37,18 +37,18 @@ SCHED_FEAT(CACHE_HOT_BUDDY, true) SCHED_FEAT(WAKEUP_PREEMPTION, true) /* - * Use arch dependent cpu power functions + * Use arch dependent cpu capacity functions */ -SCHED_FEAT(ARCH_POWER, true) +SCHED_FEAT(ARCH_CAPACITY, true) SCHED_FEAT(HRTICK, false) SCHED_FEAT(DOUBLE_TICK, false) SCHED_FEAT(LB_BIAS, true) /* - * Decrement CPU power based on time not spent running tasks + * Decrement CPU capacity based on time not spent running tasks */ -SCHED_FEAT(NONTASK_POWER, true) +SCHED_FEAT(NONTASK_CAPACITY, true) /* * Queue remote wakeups on the target CPU and process them diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 25b9423abce..cf009fb0bc2 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -12,6 +12,8 @@ #include <trace/events/power.h> +#include "sched.h" + static int __read_mostly cpu_idle_force_poll; void cpu_idle_poll_ctrl(bool enable) @@ -67,6 +69,10 @@ void __weak arch_cpu_idle(void) * cpuidle_idle_call - the main idle function * * NOTE: no locks or semaphores should be used here + * + * On archs that support TIF_POLLING_NRFLAG, is called with polling + * set, and it returns with polling set. If it ever stops polling, it + * must clear the polling bit. */ static void cpuidle_idle_call(void) { @@ -175,10 +181,22 @@ exit_idle: /* * Generic idle loop implementation + * + * Called with polling cleared. */ static void cpu_idle_loop(void) { while (1) { + /* + * If the arch has a polling bit, we maintain an invariant: + * + * Our polling bit is clear if we're not scheduled (i.e. if + * rq->curr != rq->idle). This means that, if rq->idle has + * the polling bit set, then setting need_resched is + * guaranteed to cause the cpu to reschedule. + */ + + __current_set_polling(); tick_nohz_idle_enter(); while (!need_resched()) { @@ -218,6 +236,17 @@ static void cpu_idle_loop(void) */ preempt_set_need_resched(); tick_nohz_idle_exit(); + __current_clr_polling(); + + /* + * We promise to call sched_ttwu_pending and reschedule + * if need_resched is set while polling is set. That + * means that clearing polling needs to be visible + * before doing these things. + */ + smp_mb__after_atomic(); + + sched_ttwu_pending(); schedule_preempt_disabled(); } } @@ -239,7 +268,6 @@ void cpu_startup_entry(enum cpuhp_state state) */ boot_init_stack_canary(); #endif - __current_set_polling(); arch_cpu_idle_prepare(); cpu_idle_loop(); } diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index b3512f1afce..a49083192c6 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -918,7 +918,6 @@ static void update_curr_rt(struct rq *rq) { struct task_struct *curr = rq->curr; struct sched_rt_entity *rt_se = &curr->rt; - struct rt_rq *rt_rq = rt_rq_of_se(rt_se); u64 delta_exec; if (curr->sched_class != &rt_sched_class) @@ -943,7 +942,7 @@ static void update_curr_rt(struct rq *rq) return; for_each_sched_rt_entity(rt_se) { - rt_rq = rt_rq_of_se(rt_se); + struct rt_rq *rt_rq = rt_rq_of_se(rt_se); if (sched_rt_runtime(rt_rq) != RUNTIME_INF) { raw_spin_lock(&rt_rq->rt_runtime_lock); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index e47679b04d1..31cc02ebc54 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -567,7 +567,7 @@ struct rq { struct root_domain *rd; struct sched_domain *sd; - unsigned long cpu_power; + unsigned long cpu_capacity; unsigned char idle_balance; /* For active balancing */ @@ -670,6 +670,8 @@ extern int migrate_swap(struct task_struct *, struct task_struct *); #ifdef CONFIG_SMP +extern void sched_ttwu_pending(void); + #define rcu_dereference_check_sched_domain(p) \ rcu_dereference_check((p), \ lockdep_is_held(&sched_domains_mutex)) @@ -728,15 +730,15 @@ DECLARE_PER_CPU(struct sched_domain *, sd_numa); DECLARE_PER_CPU(struct sched_domain *, sd_busy); DECLARE_PER_CPU(struct sched_domain *, sd_asym); -struct sched_group_power { +struct sched_group_capacity { atomic_t ref; /* - * CPU power of this group, SCHED_LOAD_SCALE being max power for a - * single CPU. + * CPU capacity of this group, SCHED_LOAD_SCALE being max capacity + * for a single CPU. */ - unsigned int power, power_orig; + unsigned int capacity, capacity_orig; unsigned long next_update; - int imbalance; /* XXX unrelated to power but shared group state */ + int imbalance; /* XXX unrelated to capacity but shared group state */ /* * Number of busy cpus in this group. */ @@ -750,7 +752,7 @@ struct sched_group { atomic_t ref; unsigned int group_weight; - struct sched_group_power *sgp; + struct sched_group_capacity *sgc; /* * The CPUs this group covers. @@ -773,7 +775,7 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg) */ static inline struct cpumask *sched_group_mask(struct sched_group *sg) { - return to_cpumask(sg->sgp->cpumask); + return to_cpumask(sg->sgc->cpumask); } /** @@ -787,6 +789,10 @@ static inline unsigned int group_first_cpu(struct sched_group *group) extern int group_balance_cpu(struct sched_group *sg); +#else + +static inline void sched_ttwu_pending(void) { } + #endif /* CONFIG_SMP */ #include "stats.h" @@ -1167,7 +1173,7 @@ extern const struct sched_class idle_sched_class; #ifdef CONFIG_SMP -extern void update_group_power(struct sched_domain *sd, int cpu); +extern void update_group_capacity(struct sched_domain *sd, int cpu); extern void trigger_load_balance(struct rq *rq); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index c634868c292..7c56c3d0694 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -543,7 +543,7 @@ static void rb_wake_up_waiters(struct irq_work *work) * as data is added to any of the @buffer's cpu buffers. Otherwise * it will wait for data to be added to a specific cpu buffer. */ -void ring_buffer_wait(struct ring_buffer *buffer, int cpu) +int ring_buffer_wait(struct ring_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; DEFINE_WAIT(wait); @@ -557,6 +557,8 @@ void ring_buffer_wait(struct ring_buffer *buffer, int cpu) if (cpu == RING_BUFFER_ALL_CPUS) work = &buffer->irq_work; else { + if (!cpumask_test_cpu(cpu, buffer->cpumask)) + return -ENODEV; cpu_buffer = buffer->buffers[cpu]; work = &cpu_buffer->irq_work; } @@ -591,6 +593,7 @@ void ring_buffer_wait(struct ring_buffer *buffer, int cpu) schedule(); finish_wait(&work->waiters, &wait); + return 0; } /** diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 16f7038d1f4..384ede31171 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1085,13 +1085,13 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) } #endif /* CONFIG_TRACER_MAX_TRACE */ -static void wait_on_pipe(struct trace_iterator *iter) +static int wait_on_pipe(struct trace_iterator *iter) { /* Iterators are static, they should be filled or empty */ if (trace_buffer_iter(iter, iter->cpu_file)) - return; + return 0; - ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file); + return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file); } #ifdef CONFIG_FTRACE_STARTUP_TEST @@ -1338,7 +1338,7 @@ static int trace_create_savedcmd(void) { int ret; - savedcmd = kmalloc(sizeof(struct saved_cmdlines_buffer), GFP_KERNEL); + savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL); if (!savedcmd) return -ENOMEM; @@ -3840,7 +3840,7 @@ tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf, int r; arch_spin_lock(&trace_cmdline_lock); - r = sprintf(buf, "%u\n", savedcmd->cmdline_num); + r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num); arch_spin_unlock(&trace_cmdline_lock); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); @@ -3857,7 +3857,7 @@ static int tracing_resize_saved_cmdlines(unsigned int val) { struct saved_cmdlines_buffer *s, *savedcmd_temp; - s = kmalloc(sizeof(struct saved_cmdlines_buffer), GFP_KERNEL); + s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) return -ENOMEM; @@ -4378,6 +4378,7 @@ tracing_poll_pipe(struct file *filp, poll_table *poll_table) static int tracing_wait_pipe(struct file *filp) { struct trace_iterator *iter = filp->private_data; + int ret; while (trace_empty(iter)) { @@ -4399,10 +4400,13 @@ static int tracing_wait_pipe(struct file *filp) mutex_unlock(&iter->mutex); - wait_on_pipe(iter); + ret = wait_on_pipe(iter); mutex_lock(&iter->mutex); + if (ret) + return ret; + if (signal_pending(current)) return -EINTR; } @@ -5327,8 +5331,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, goto out_unlock; } mutex_unlock(&trace_types_lock); - wait_on_pipe(iter); + ret = wait_on_pipe(iter); mutex_lock(&trace_types_lock); + if (ret) { + size = ret; + goto out_unlock; + } if (signal_pending(current)) { size = -EINTR; goto out_unlock; @@ -5538,8 +5546,10 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, goto out; } mutex_unlock(&trace_types_lock); - wait_on_pipe(iter); + ret = wait_on_pipe(iter); mutex_lock(&trace_types_lock); + if (ret) + goto out; if (signal_pending(current)) { ret = -EINTR; goto out; @@ -6232,22 +6242,25 @@ static int allocate_trace_buffers(struct trace_array *tr, int size) return 0; } +static void free_trace_buffer(struct trace_buffer *buf) +{ + if (buf->buffer) { + ring_buffer_free(buf->buffer); + buf->buffer = NULL; + free_percpu(buf->data); + buf->data = NULL; + } +} + static void free_trace_buffers(struct trace_array *tr) { if (!tr) return; - if (tr->trace_buffer.buffer) { - ring_buffer_free(tr->trace_buffer.buffer); - tr->trace_buffer.buffer = NULL; - free_percpu(tr->trace_buffer.data); - } + free_trace_buffer(&tr->trace_buffer); #ifdef CONFIG_TRACER_MAX_TRACE - if (tr->max_buffer.buffer) { - ring_buffer_free(tr->max_buffer.buffer); - tr->max_buffer.buffer = NULL; - } + free_trace_buffer(&tr->max_buffer); #endif } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 9e82551dd56..9258f5a815d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -252,7 +252,7 @@ static inline struct trace_array *top_trace_array(void) { struct trace_array *tr; - if (list_empty(ftrace_trace_arrays.prev)) + if (list_empty(&ftrace_trace_arrays)) return NULL; tr = list_entry(ftrace_trace_arrays.prev, diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index c894614de14..5d12bb407b4 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -248,8 +248,8 @@ void perf_trace_del(struct perf_event *p_event, int flags) tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event); } -__kprobes void *perf_trace_buf_prepare(int size, unsigned short type, - struct pt_regs *regs, int *rctxp) +void *perf_trace_buf_prepare(int size, unsigned short type, + struct pt_regs *regs, int *rctxp) { struct trace_entry *entry; unsigned long flags; @@ -281,6 +281,7 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, return raw_data; } EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); +NOKPROBE_SYMBOL(perf_trace_buf_prepare); #ifdef CONFIG_FUNCTION_TRACER static void diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index ef2fba1f46b..282f6e4e553 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -40,27 +40,27 @@ struct trace_kprobe { (sizeof(struct probe_arg) * (n))) -static __kprobes bool trace_kprobe_is_return(struct trace_kprobe *tk) +static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk) { return tk->rp.handler != NULL; } -static __kprobes const char *trace_kprobe_symbol(struct trace_kprobe *tk) +static nokprobe_inline const char *trace_kprobe_symbol(struct trace_kprobe *tk) { return tk->symbol ? tk->symbol : "unknown"; } -static __kprobes unsigned long trace_kprobe_offset(struct trace_kprobe *tk) +static nokprobe_inline unsigned long trace_kprobe_offset(struct trace_kprobe *tk) { return tk->rp.kp.offset; } -static __kprobes bool trace_kprobe_has_gone(struct trace_kprobe *tk) +static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk) { return !!(kprobe_gone(&tk->rp.kp)); } -static __kprobes bool trace_kprobe_within_module(struct trace_kprobe *tk, +static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk, struct module *mod) { int len = strlen(mod->name); @@ -68,7 +68,7 @@ static __kprobes bool trace_kprobe_within_module(struct trace_kprobe *tk, return strncmp(mod->name, name, len) == 0 && name[len] == ':'; } -static __kprobes bool trace_kprobe_is_on_module(struct trace_kprobe *tk) +static nokprobe_inline bool trace_kprobe_is_on_module(struct trace_kprobe *tk) { return !!strchr(trace_kprobe_symbol(tk), ':'); } @@ -132,19 +132,21 @@ struct symbol_cache *alloc_symbol_cache(const char *sym, long offset) * Kprobes-specific fetch functions */ #define DEFINE_FETCH_stack(type) \ -static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\ +static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs, \ void *offset, void *dest) \ { \ *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \ (unsigned int)((unsigned long)offset)); \ -} +} \ +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(stack, type)); + DEFINE_BASIC_FETCH_FUNCS(stack) /* No string on the stack entry */ #define fetch_stack_string NULL #define fetch_stack_string_size NULL #define DEFINE_FETCH_memory(type) \ -static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\ +static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs, \ void *addr, void *dest) \ { \ type retval; \ @@ -152,14 +154,16 @@ static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\ *(type *)dest = 0; \ else \ *(type *)dest = retval; \ -} +} \ +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, type)); + DEFINE_BASIC_FETCH_FUNCS(memory) /* * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max * length and relative data location. */ -static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, - void *addr, void *dest) +static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, + void *addr, void *dest) { long ret; int maxlen = get_rloc_len(*(u32 *)dest); @@ -193,10 +197,11 @@ static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, get_rloc_offs(*(u32 *)dest)); } } +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string)); /* Return the length of string -- including null terminal byte */ -static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, - void *addr, void *dest) +static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, + void *addr, void *dest) { mm_segment_t old_fs; int ret, len = 0; @@ -219,17 +224,19 @@ static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, else *(u32 *)dest = len; } +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string_size)); #define DEFINE_FETCH_symbol(type) \ -__kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs, \ - void *data, void *dest) \ +void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs, void *data, void *dest)\ { \ struct symbol_cache *sc = data; \ if (sc->addr) \ fetch_memory_##type(regs, (void *)sc->addr, dest); \ else \ *(type *)dest = 0; \ -} +} \ +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(symbol, type)); + DEFINE_BASIC_FETCH_FUNCS(symbol) DEFINE_FETCH_symbol(string) DEFINE_FETCH_symbol(string_size) @@ -907,7 +914,7 @@ static const struct file_operations kprobe_profile_ops = { }; /* Kprobe handler */ -static __kprobes void +static nokprobe_inline void __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, struct ftrace_event_file *ftrace_file) { @@ -943,7 +950,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, entry, irq_flags, pc, regs); } -static __kprobes void +static void kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs) { struct event_file_link *link; @@ -951,9 +958,10 @@ kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs) list_for_each_entry_rcu(link, &tk->tp.files, list) __kprobe_trace_func(tk, regs, link->file); } +NOKPROBE_SYMBOL(kprobe_trace_func); /* Kretprobe handler */ -static __kprobes void +static nokprobe_inline void __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, struct pt_regs *regs, struct ftrace_event_file *ftrace_file) @@ -991,7 +999,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, entry, irq_flags, pc, regs); } -static __kprobes void +static void kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, struct pt_regs *regs) { @@ -1000,6 +1008,7 @@ kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, list_for_each_entry_rcu(link, &tk->tp.files, list) __kretprobe_trace_func(tk, ri, regs, link->file); } +NOKPROBE_SYMBOL(kretprobe_trace_func); /* Event entry printers */ static enum print_line_t @@ -1131,7 +1140,7 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) #ifdef CONFIG_PERF_EVENTS /* Kprobe profile handler */ -static __kprobes void +static void kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) { struct ftrace_event_call *call = &tk->tp.call; @@ -1158,9 +1167,10 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); } +NOKPROBE_SYMBOL(kprobe_perf_func); /* Kretprobe profile handler */ -static __kprobes void +static void kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, struct pt_regs *regs) { @@ -1188,6 +1198,7 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); } +NOKPROBE_SYMBOL(kretprobe_perf_func); #endif /* CONFIG_PERF_EVENTS */ /* @@ -1196,9 +1207,8 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe * lockless, but we can't race with this __init function. */ -static __kprobes -int kprobe_register(struct ftrace_event_call *event, - enum trace_reg type, void *data) +static int kprobe_register(struct ftrace_event_call *event, + enum trace_reg type, void *data) { struct trace_kprobe *tk = (struct trace_kprobe *)event->data; struct ftrace_event_file *file = data; @@ -1224,8 +1234,7 @@ int kprobe_register(struct ftrace_event_call *event, return 0; } -static __kprobes -int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) +static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) { struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp); @@ -1239,9 +1248,10 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) #endif return 0; /* We don't tweek kernel, so just return 0 */ } +NOKPROBE_SYMBOL(kprobe_dispatcher); -static __kprobes -int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) +static int +kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) { struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp); @@ -1255,6 +1265,7 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) #endif return 0; /* We don't tweek kernel, so just return 0 */ } +NOKPROBE_SYMBOL(kretprobe_dispatcher); static struct trace_event_functions kretprobe_funcs = { .trace = print_kretprobe_event diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 8364a421b4d..d4b9fc22cd2 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -37,13 +37,13 @@ const char *reserved_field_names[] = { /* Printing in basic type function template */ #define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt) \ -__kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \ - const char *name, \ - void *data, void *ent) \ +int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name, \ + void *data, void *ent) \ { \ return trace_seq_printf(s, " %s=" fmt, name, *(type *)data); \ } \ -const char PRINT_TYPE_FMT_NAME(type)[] = fmt; +const char PRINT_TYPE_FMT_NAME(type)[] = fmt; \ +NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(type)); DEFINE_BASIC_PRINT_TYPE_FUNC(u8 , "0x%x") DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "0x%x") @@ -55,9 +55,8 @@ DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%d") DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%Ld") /* Print type function for string type */ -__kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, - const char *name, - void *data, void *ent) +int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, const char *name, + void *data, void *ent) { int len = *(u32 *)data >> 16; @@ -67,6 +66,7 @@ __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, return trace_seq_printf(s, " %s=\"%s\"", name, (const char *)get_loc_data(data, ent)); } +NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(string)); const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\""; @@ -81,23 +81,24 @@ const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\""; /* Data fetch function templates */ #define DEFINE_FETCH_reg(type) \ -__kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \ - void *offset, void *dest) \ +void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, void *offset, void *dest) \ { \ *(type *)dest = (type)regs_get_register(regs, \ (unsigned int)((unsigned long)offset)); \ -} +} \ +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(reg, type)); DEFINE_BASIC_FETCH_FUNCS(reg) /* No string on the register */ #define fetch_reg_string NULL #define fetch_reg_string_size NULL #define DEFINE_FETCH_retval(type) \ -__kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs, \ - void *dummy, void *dest) \ +void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs, \ + void *dummy, void *dest) \ { \ *(type *)dest = (type)regs_return_value(regs); \ -} +} \ +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(retval, type)); DEFINE_BASIC_FETCH_FUNCS(retval) /* No string on the retval */ #define fetch_retval_string NULL @@ -112,8 +113,8 @@ struct deref_fetch_param { }; #define DEFINE_FETCH_deref(type) \ -__kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs, \ - void *data, void *dest) \ +void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs, \ + void *data, void *dest) \ { \ struct deref_fetch_param *dprm = data; \ unsigned long addr; \ @@ -123,12 +124,13 @@ __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs, \ dprm->fetch(regs, (void *)addr, dest); \ } else \ *(type *)dest = 0; \ -} +} \ +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, type)); DEFINE_BASIC_FETCH_FUNCS(deref) DEFINE_FETCH_deref(string) -__kprobes void FETCH_FUNC_NAME(deref, string_size)(struct pt_regs *regs, - void *data, void *dest) +void FETCH_FUNC_NAME(deref, string_size)(struct pt_regs *regs, + void *data, void *dest) { struct deref_fetch_param *dprm = data; unsigned long addr; @@ -140,16 +142,18 @@ __kprobes void FETCH_FUNC_NAME(deref, string_size)(struct pt_regs *regs, } else *(string_size *)dest = 0; } +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, string_size)); -static __kprobes void update_deref_fetch_param(struct deref_fetch_param *data) +static void update_deref_fetch_param(struct deref_fetch_param *data) { if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) update_deref_fetch_param(data->orig.data); else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) update_symbol_cache(data->orig.data); } +NOKPROBE_SYMBOL(update_deref_fetch_param); -static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data) +static void free_deref_fetch_param(struct deref_fetch_param *data) { if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) free_deref_fetch_param(data->orig.data); @@ -157,6 +161,7 @@ static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data) free_symbol_cache(data->orig.data); kfree(data); } +NOKPROBE_SYMBOL(free_deref_fetch_param); /* Bitfield fetch function */ struct bitfield_fetch_param { @@ -166,8 +171,8 @@ struct bitfield_fetch_param { }; #define DEFINE_FETCH_bitfield(type) \ -__kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs, \ - void *data, void *dest) \ +void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs, \ + void *data, void *dest) \ { \ struct bitfield_fetch_param *bprm = data; \ type buf = 0; \ @@ -177,13 +182,13 @@ __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs, \ buf >>= bprm->low_shift; \ } \ *(type *)dest = buf; \ -} - +} \ +NOKPROBE_SYMBOL(FETCH_FUNC_NAME(bitfield, type)); DEFINE_BASIC_FETCH_FUNCS(bitfield) #define fetch_bitfield_string NULL #define fetch_bitfield_string_size NULL -static __kprobes void +static void update_bitfield_fetch_param(struct bitfield_fetch_param *data) { /* @@ -196,7 +201,7 @@ update_bitfield_fetch_param(struct bitfield_fetch_param *data) update_symbol_cache(data->orig.data); } -static __kprobes void +static void free_bitfield_fetch_param(struct bitfield_fetch_param *data) { /* @@ -255,17 +260,17 @@ fail: } /* Special function : only accept unsigned long */ -static __kprobes void fetch_kernel_stack_address(struct pt_regs *regs, - void *dummy, void *dest) +static void fetch_kernel_stack_address(struct pt_regs *regs, void *dummy, void *dest) { *(unsigned long *)dest = kernel_stack_pointer(regs); } +NOKPROBE_SYMBOL(fetch_kernel_stack_address); -static __kprobes void fetch_user_stack_address(struct pt_regs *regs, - void *dummy, void *dest) +static void fetch_user_stack_address(struct pt_regs *regs, void *dummy, void *dest) { *(unsigned long *)dest = user_stack_pointer(regs); } +NOKPROBE_SYMBOL(fetch_user_stack_address); static fetch_func_t get_fetch_size_function(const struct fetch_type *type, fetch_func_t orig_fn, diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index fb1ab5dfbd4..4f815fbce16 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -81,13 +81,13 @@ */ #define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs)) -static inline void *get_rloc_data(u32 *dl) +static nokprobe_inline void *get_rloc_data(u32 *dl) { return (u8 *)dl + get_rloc_offs(*dl); } /* For data_loc conversion */ -static inline void *get_loc_data(u32 *dl, void *ent) +static nokprobe_inline void *get_loc_data(u32 *dl, void *ent) { return (u8 *)ent + get_rloc_offs(*dl); } @@ -136,9 +136,8 @@ typedef u32 string_size; /* Printing in basic type function template */ #define DECLARE_BASIC_PRINT_TYPE_FUNC(type) \ -__kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \ - const char *name, \ - void *data, void *ent); \ +int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name, \ + void *data, void *ent); \ extern const char PRINT_TYPE_FMT_NAME(type)[] DECLARE_BASIC_PRINT_TYPE_FUNC(u8); @@ -303,7 +302,7 @@ static inline bool trace_probe_is_registered(struct trace_probe *tp) return !!(tp->flags & TP_FLAG_REGISTERED); } -static inline __kprobes void call_fetch(struct fetch_param *fprm, +static nokprobe_inline void call_fetch(struct fetch_param *fprm, struct pt_regs *regs, void *dest) { return fprm->fn(regs, fprm->data, dest); @@ -351,7 +350,7 @@ extern ssize_t traceprobe_probes_write(struct file *file, extern int traceprobe_command(const char *buf, int (*createfn)(int, char**)); /* Sum up total data length for dynamic arraies (strings) */ -static inline __kprobes int +static nokprobe_inline int __get_data_size(struct trace_probe *tp, struct pt_regs *regs) { int i, ret = 0; @@ -367,7 +366,7 @@ __get_data_size(struct trace_probe *tp, struct pt_regs *regs) } /* Store the value of each argument */ -static inline __kprobes void +static nokprobe_inline void store_trace_args(int ent_size, struct trace_probe *tp, struct pt_regs *regs, u8 *data, int maxlen) { diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index c082a744134..04fdb5de823 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -108,8 +108,8 @@ static unsigned long get_user_stack_nth(struct pt_regs *regs, unsigned int n) * Uprobes-specific fetch functions */ #define DEFINE_FETCH_stack(type) \ -static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\ - void *offset, void *dest) \ +static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs, \ + void *offset, void *dest) \ { \ *(type *)dest = (type)get_user_stack_nth(regs, \ ((unsigned long)offset)); \ @@ -120,8 +120,8 @@ DEFINE_BASIC_FETCH_FUNCS(stack) #define fetch_stack_string_size NULL #define DEFINE_FETCH_memory(type) \ -static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\ - void *addr, void *dest) \ +static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs, \ + void *addr, void *dest) \ { \ type retval; \ void __user *vaddr = (void __force __user *) addr; \ @@ -136,8 +136,8 @@ DEFINE_BASIC_FETCH_FUNCS(memory) * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max * length and relative data location. */ -static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, - void *addr, void *dest) +static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, + void *addr, void *dest) { long ret; u32 rloc = *(u32 *)dest; @@ -158,8 +158,8 @@ static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, } } -static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, - void *addr, void *dest) +static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, + void *addr, void *dest) { int len; void __user *vaddr = (void __force __user *) addr; @@ -184,8 +184,8 @@ static unsigned long translate_user_vaddr(void *file_offset) } #define DEFINE_FETCH_file_offset(type) \ -static __kprobes void FETCH_FUNC_NAME(file_offset, type)(struct pt_regs *regs,\ - void *offset, void *dest) \ +static void FETCH_FUNC_NAME(file_offset, type)(struct pt_regs *regs, \ + void *offset, void *dest)\ { \ void *vaddr = (void *)translate_user_vaddr(offset); \ \ @@ -1009,56 +1009,60 @@ uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event) return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm); } -static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event) +static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) { bool done; write_lock(&tu->filter.rwlock); if (event->hw.tp_target) { - /* - * event->parent != NULL means copy_process(), we can avoid - * uprobe_apply(). current->mm must be probed and we can rely - * on dup_mmap() which preserves the already installed bp's. - * - * attr.enable_on_exec means that exec/mmap will install the - * breakpoints we need. - */ + list_del(&event->hw.tp_list); done = tu->filter.nr_systemwide || - event->parent || event->attr.enable_on_exec || + (event->hw.tp_target->flags & PF_EXITING) || uprobe_filter_event(tu, event); - list_add(&event->hw.tp_list, &tu->filter.perf_events); } else { + tu->filter.nr_systemwide--; done = tu->filter.nr_systemwide; - tu->filter.nr_systemwide++; } write_unlock(&tu->filter.rwlock); if (!done) - uprobe_apply(tu->inode, tu->offset, &tu->consumer, true); + return uprobe_apply(tu->inode, tu->offset, &tu->consumer, false); return 0; } -static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) +static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event) { bool done; + int err; write_lock(&tu->filter.rwlock); if (event->hw.tp_target) { - list_del(&event->hw.tp_list); + /* + * event->parent != NULL means copy_process(), we can avoid + * uprobe_apply(). current->mm must be probed and we can rely + * on dup_mmap() which preserves the already installed bp's. + * + * attr.enable_on_exec means that exec/mmap will install the + * breakpoints we need. + */ done = tu->filter.nr_systemwide || - (event->hw.tp_target->flags & PF_EXITING) || + event->parent || event->attr.enable_on_exec || uprobe_filter_event(tu, event); + list_add(&event->hw.tp_list, &tu->filter.perf_events); } else { - tu->filter.nr_systemwide--; done = tu->filter.nr_systemwide; + tu->filter.nr_systemwide++; } write_unlock(&tu->filter.rwlock); - if (!done) - uprobe_apply(tu->inode, tu->offset, &tu->consumer, false); - - return 0; + err = 0; + if (!done) { + err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true); + if (err) + uprobe_perf_close(tu, event); + } + return err; } static bool uprobe_perf_filter(struct uprobe_consumer *uc, diff --git a/mm/page_io.c b/mm/page_io.c index 243a9b76e5c..955db8b0d49 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -274,8 +274,8 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, .count = PAGE_SIZE, .iov_offset = 0, .nr_segs = 1, - .bvec = &bv }; + from.bvec = &bv; /* older gcc versions are broken */ init_sync_kiocb(&kiocb, swap_file); kiocb.ki_pos = page_file_offset(page); diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 67d7721d237..1675021d8c1 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -72,6 +72,8 @@ const char *ceph_msg_type_name(int type) case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack"; case CEPH_MSG_STATFS: return "statfs"; case CEPH_MSG_STATFS_REPLY: return "statfs_reply"; + case CEPH_MSG_MON_GET_VERSION: return "mon_get_version"; + case CEPH_MSG_MON_GET_VERSION_REPLY: return "mon_get_version_reply"; case CEPH_MSG_MDS_MAP: return "mds_map"; case CEPH_MSG_CLIENT_SESSION: return "client_session"; case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect"; diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 10421a4b76f..d1a62c69a9f 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -126,9 +126,13 @@ static int monc_show(struct seq_file *s, void *p) req = rb_entry(rp, struct ceph_mon_generic_request, node); op = le16_to_cpu(req->request->hdr.type); if (op == CEPH_MSG_STATFS) - seq_printf(s, "%lld statfs\n", req->tid); + seq_printf(s, "%llu statfs\n", req->tid); + else if (op == CEPH_MSG_POOLOP) + seq_printf(s, "%llu poolop\n", req->tid); + else if (op == CEPH_MSG_MON_GET_VERSION) + seq_printf(s, "%llu mon_get_version", req->tid); else - seq_printf(s, "%lld unknown\n", req->tid); + seq_printf(s, "%llu unknown\n", req->tid); } mutex_unlock(&monc->mutex); diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 2ac9ef35110..067d3af2eaf 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -296,6 +296,33 @@ void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc) __send_subscribe(monc); mutex_unlock(&monc->mutex); } +EXPORT_SYMBOL(ceph_monc_request_next_osdmap); + +int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch, + unsigned long timeout) +{ + unsigned long started = jiffies; + int ret; + + mutex_lock(&monc->mutex); + while (monc->have_osdmap < epoch) { + mutex_unlock(&monc->mutex); + + if (timeout != 0 && time_after_eq(jiffies, started + timeout)) + return -ETIMEDOUT; + + ret = wait_event_interruptible_timeout(monc->client->auth_wq, + monc->have_osdmap >= epoch, timeout); + if (ret < 0) + return ret; + + mutex_lock(&monc->mutex); + } + + mutex_unlock(&monc->mutex); + return 0; +} +EXPORT_SYMBOL(ceph_monc_wait_osdmap); /* * @@ -477,14 +504,13 @@ static struct ceph_msg *get_generic_reply(struct ceph_connection *con, return m; } -static int do_generic_request(struct ceph_mon_client *monc, - struct ceph_mon_generic_request *req) +static int __do_generic_request(struct ceph_mon_client *monc, u64 tid, + struct ceph_mon_generic_request *req) { int err; /* register request */ - mutex_lock(&monc->mutex); - req->tid = ++monc->last_tid; + req->tid = tid != 0 ? tid : ++monc->last_tid; req->request->hdr.tid = cpu_to_le64(req->tid); __insert_generic_request(monc, req); monc->num_generic_requests++; @@ -496,13 +522,24 @@ static int do_generic_request(struct ceph_mon_client *monc, mutex_lock(&monc->mutex); rb_erase(&req->node, &monc->generic_request_tree); monc->num_generic_requests--; - mutex_unlock(&monc->mutex); if (!err) err = req->result; return err; } +static int do_generic_request(struct ceph_mon_client *monc, + struct ceph_mon_generic_request *req) +{ + int err; + + mutex_lock(&monc->mutex); + err = __do_generic_request(monc, 0, req); + mutex_unlock(&monc->mutex); + + return err; +} + /* * statfs */ @@ -579,6 +616,96 @@ out: } EXPORT_SYMBOL(ceph_monc_do_statfs); +static void handle_get_version_reply(struct ceph_mon_client *monc, + struct ceph_msg *msg) +{ + struct ceph_mon_generic_request *req; + u64 tid = le64_to_cpu(msg->hdr.tid); + void *p = msg->front.iov_base; + void *end = p + msg->front_alloc_len; + u64 handle; + + dout("%s %p tid %llu\n", __func__, msg, tid); + + ceph_decode_need(&p, end, 2*sizeof(u64), bad); + handle = ceph_decode_64(&p); + if (tid != 0 && tid != handle) + goto bad; + + mutex_lock(&monc->mutex); + req = __lookup_generic_req(monc, handle); + if (req) { + *(u64 *)req->buf = ceph_decode_64(&p); + req->result = 0; + get_generic_request(req); + } + mutex_unlock(&monc->mutex); + if (req) { + complete_all(&req->completion); + put_generic_request(req); + } + + return; +bad: + pr_err("corrupt mon_get_version reply\n"); + ceph_msg_dump(msg); +} + +/* + * Send MMonGetVersion and wait for the reply. + * + * @what: one of "mdsmap", "osdmap" or "monmap" + */ +int ceph_monc_do_get_version(struct ceph_mon_client *monc, const char *what, + u64 *newest) +{ + struct ceph_mon_generic_request *req; + void *p, *end; + u64 tid; + int err; + + req = kzalloc(sizeof(*req), GFP_NOFS); + if (!req) + return -ENOMEM; + + kref_init(&req->kref); + req->buf = newest; + req->buf_len = sizeof(*newest); + init_completion(&req->completion); + + req->request = ceph_msg_new(CEPH_MSG_MON_GET_VERSION, + sizeof(u64) + sizeof(u32) + strlen(what), + GFP_NOFS, true); + if (!req->request) { + err = -ENOMEM; + goto out; + } + + req->reply = ceph_msg_new(CEPH_MSG_MON_GET_VERSION_REPLY, 1024, + GFP_NOFS, true); + if (!req->reply) { + err = -ENOMEM; + goto out; + } + + p = req->request->front.iov_base; + end = p + req->request->front_alloc_len; + + /* fill out request */ + mutex_lock(&monc->mutex); + tid = ++monc->last_tid; + ceph_encode_64(&p, tid); /* handle */ + ceph_encode_string(&p, end, what, strlen(what)); + + err = __do_generic_request(monc, tid, req); + + mutex_unlock(&monc->mutex); +out: + kref_put(&req->kref, release_generic_request); + return err; +} +EXPORT_SYMBOL(ceph_monc_do_get_version); + /* * pool ops */ @@ -981,6 +1108,10 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) handle_statfs_reply(monc, msg); break; + case CEPH_MSG_MON_GET_VERSION_REPLY: + handle_get_version_reply(monc, msg); + break; + case CEPH_MSG_POOLOP_REPLY: handle_poolop_reply(monc, msg); break; @@ -1029,6 +1160,15 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, case CEPH_MSG_AUTH_REPLY: m = ceph_msg_get(monc->m_auth_reply); break; + case CEPH_MSG_MON_GET_VERSION_REPLY: + if (le64_to_cpu(hdr->tid) != 0) + return get_generic_reply(con, hdr, skip); + + /* + * Older OSDs don't set reply tid even if the orignal + * request had a non-zero tid. Workaround this weirdness + * by falling through to the allocate case. + */ case CEPH_MSG_MON_MAP: case CEPH_MSG_MDS_MAP: case CEPH_MSG_OSD_MAP: diff --git a/scripts/Makefile b/scripts/Makefile index 1d07860f6c4..890df5c6adf 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -39,4 +39,4 @@ subdir-$(CONFIG_SECURITY_SELINUX) += selinux subdir-$(CONFIG_DTC) += dtc # Let clean descend into subdirs -subdir- += basic kconfig package selinux +subdir- += basic kconfig package diff --git a/scripts/Makefile.asm-generic b/scripts/Makefile.asm-generic index d17e0ea911e..045e0098e96 100644 --- a/scripts/Makefile.asm-generic +++ b/scripts/Makefile.asm-generic @@ -21,4 +21,3 @@ all: $(patsubst %, $(obj)/%, $(generic-y)) $(obj)/%.h: $(call cmd,wrap) - diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 003bc263105..bf3e6778cd7 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -50,67 +50,6 @@ ifeq ($(KBUILD_NOPEDANTIC),) endif endif -# -# make W=... settings -# -# W=1 - warnings that may be relevant and does not occur too often -# W=2 - warnings that occur quite often but may still be relevant -# W=3 - the more obscure warnings, can most likely be ignored -# -# $(call cc-option, -W...) handles gcc -W.. options which -# are not supported by all versions of the compiler -ifdef KBUILD_ENABLE_EXTRA_GCC_CHECKS -warning- := $(empty) - -warning-1 := -Wextra -Wunused -Wno-unused-parameter -warning-1 += -Wmissing-declarations -warning-1 += -Wmissing-format-attribute -warning-1 += $(call cc-option, -Wmissing-prototypes) -warning-1 += -Wold-style-definition -warning-1 += $(call cc-option, -Wmissing-include-dirs) -warning-1 += $(call cc-option, -Wunused-but-set-variable) -warning-1 += $(call cc-disable-warning, missing-field-initializers) - -# Clang -warning-1 += $(call cc-disable-warning, initializer-overrides) -warning-1 += $(call cc-disable-warning, unused-value) -warning-1 += $(call cc-disable-warning, format) -warning-1 += $(call cc-disable-warning, unknown-warning-option) -warning-1 += $(call cc-disable-warning, sign-compare) -warning-1 += $(call cc-disable-warning, format-zero-length) -warning-1 += $(call cc-disable-warning, uninitialized) -warning-1 += $(call cc-option, -fcatch-undefined-behavior) - -warning-2 := -Waggregate-return -warning-2 += -Wcast-align -warning-2 += -Wdisabled-optimization -warning-2 += -Wnested-externs -warning-2 += -Wshadow -warning-2 += $(call cc-option, -Wlogical-op) -warning-2 += $(call cc-option, -Wmissing-field-initializers) - -warning-3 := -Wbad-function-cast -warning-3 += -Wcast-qual -warning-3 += -Wconversion -warning-3 += -Wpacked -warning-3 += -Wpadded -warning-3 += -Wpointer-arith -warning-3 += -Wredundant-decls -warning-3 += -Wswitch-default -warning-3 += $(call cc-option, -Wpacked-bitfield-compat) -warning-3 += $(call cc-option, -Wvla) - -warning := $(warning-$(findstring 1, $(KBUILD_ENABLE_EXTRA_GCC_CHECKS))) -warning += $(warning-$(findstring 2, $(KBUILD_ENABLE_EXTRA_GCC_CHECKS))) -warning += $(warning-$(findstring 3, $(KBUILD_ENABLE_EXTRA_GCC_CHECKS))) - -ifeq ("$(strip $(warning))","") - $(error W=$(KBUILD_ENABLE_EXTRA_GCC_CHECKS) is unknown) -endif - -KBUILD_CFLAGS += $(warning) -endif - include scripts/Makefile.lib ifdef host-progs @@ -342,7 +281,7 @@ $(real-objs-m) : modkern_aflags := $(KBUILD_AFLAGS_MODULE) $(AFLAGS_MODULE) $(real-objs-m:.o=.s): modkern_aflags := $(KBUILD_AFLAGS_MODULE) $(AFLAGS_MODULE) quiet_cmd_as_s_S = CPP $(quiet_modtag) $@ -cmd_as_s_S = $(CPP) $(a_flags) -o $@ $< +cmd_as_s_S = $(CPP) $(a_flags) -o $@ $< $(obj)/%.s: $(src)/%.S FORCE $(call if_changed_dep,as_s_S) @@ -436,7 +375,7 @@ link_multi_deps = \ $(filter $(addprefix $(obj)/, \ $($(subst $(obj)/,,$(@:.o=-objs))) \ $($(subst $(obj)/,,$(@:.o=-y)))), $^) - + quiet_cmd_link_multi-y = LD $@ cmd_link_multi-y = $(LD) $(ld_flags) -r -o $@ $(link_multi_deps) $(cmd_secanalysis) diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn new file mode 100644 index 00000000000..65643506c71 --- /dev/null +++ b/scripts/Makefile.extrawarn @@ -0,0 +1,67 @@ +# ========================================================================== +# +# make W=... settings +# +# W=1 - warnings that may be relevant and does not occur too often +# W=2 - warnings that occur quite often but may still be relevant +# W=3 - the more obscure warnings, can most likely be ignored +# +# $(call cc-option, -W...) handles gcc -W.. options which +# are not supported by all versions of the compiler +# ========================================================================== + +ifeq ("$(origin W)", "command line") + export KBUILD_ENABLE_EXTRA_GCC_CHECKS := $(W) +endif + +ifdef KBUILD_ENABLE_EXTRA_GCC_CHECKS +warning- := $(empty) + +warning-1 := -Wextra -Wunused -Wno-unused-parameter +warning-1 += -Wmissing-declarations +warning-1 += -Wmissing-format-attribute +warning-1 += $(call cc-option, -Wmissing-prototypes) +warning-1 += -Wold-style-definition +warning-1 += $(call cc-option, -Wmissing-include-dirs) +warning-1 += $(call cc-option, -Wunused-but-set-variable) +warning-1 += $(call cc-disable-warning, missing-field-initializers) + +# Clang +warning-1 += $(call cc-disable-warning, initializer-overrides) +warning-1 += $(call cc-disable-warning, unused-value) +warning-1 += $(call cc-disable-warning, format) +warning-1 += $(call cc-disable-warning, unknown-warning-option) +warning-1 += $(call cc-disable-warning, sign-compare) +warning-1 += $(call cc-disable-warning, format-zero-length) +warning-1 += $(call cc-disable-warning, uninitialized) +warning-1 += $(call cc-option, -fcatch-undefined-behavior) + +warning-2 := -Waggregate-return +warning-2 += -Wcast-align +warning-2 += -Wdisabled-optimization +warning-2 += -Wnested-externs +warning-2 += -Wshadow +warning-2 += $(call cc-option, -Wlogical-op) +warning-2 += $(call cc-option, -Wmissing-field-initializers) + +warning-3 := -Wbad-function-cast +warning-3 += -Wcast-qual +warning-3 += -Wconversion +warning-3 += -Wpacked +warning-3 += -Wpadded +warning-3 += -Wpointer-arith +warning-3 += -Wredundant-decls +warning-3 += -Wswitch-default +warning-3 += $(call cc-option, -Wpacked-bitfield-compat) +warning-3 += $(call cc-option, -Wvla) + +warning := $(warning-$(findstring 1, $(KBUILD_ENABLE_EXTRA_GCC_CHECKS))) +warning += $(warning-$(findstring 2, $(KBUILD_ENABLE_EXTRA_GCC_CHECKS))) +warning += $(warning-$(findstring 3, $(KBUILD_ENABLE_EXTRA_GCC_CHECKS))) + +ifeq ("$(strip $(warning))","") + $(error W=$(KBUILD_ENABLE_EXTRA_GCC_CHECKS) is unknown) +endif + +KBUILD_CFLAGS += $(warning) +endif diff --git a/scripts/Makefile.fwinst b/scripts/Makefile.fwinst index 4d908d16c03..d8e335eed22 100644 --- a/scripts/Makefile.fwinst +++ b/scripts/Makefile.fwinst @@ -18,31 +18,29 @@ include $(srctree)/$(obj)/Makefile include scripts/Makefile.host mod-fw := $(fw-shipped-m) -# If CONFIG_FIRMWARE_IN_KERNEL isn't set, then install the +# If CONFIG_FIRMWARE_IN_KERNEL isn't set, then install the # firmware for in-kernel drivers too. ifndef CONFIG_FIRMWARE_IN_KERNEL mod-fw += $(fw-shipped-y) endif +ifneq ($(KBUILD_SRC),) +# Create output directory if not already present +_dummy := $(shell [ -d $(obj) ] || mkdir -p $(obj)) + +firmware-dirs := $(sort $(addprefix $(objtree)/$(obj)/,$(dir $(fw-external-y) $(fw-shipped-all)))) +# Create directories for firmware in subdirectories +_dummy := $(foreach d,$(firmware-dirs), $(shell [ -d $(d) ] || mkdir -p $(d))) +endif + installed-mod-fw := $(addprefix $(INSTALL_FW_PATH)/,$(mod-fw)) installed-fw := $(addprefix $(INSTALL_FW_PATH)/,$(fw-shipped-all)) -installed-fw-dirs := $(sort $(dir $(installed-fw))) $(INSTALL_FW_PATH)/./ - -# Workaround for make < 3.81, where .SECONDEXPANSION doesn't work. -PHONY += $(INSTALL_FW_PATH)/$$(%) install-all-dirs -$(INSTALL_FW_PATH)/$$(%): install-all-dirs - @true -install-all-dirs: $(installed-fw-dirs) - @true quiet_cmd_install = INSTALL $(subst $(srctree)/,,$@) - cmd_install = $(INSTALL) -m0644 $< $@ - -$(installed-fw-dirs): - $(call cmd,mkdir) + cmd_install = mkdir -p $(@D); $(INSTALL) -m0644 $< $@ -$(installed-fw): $(INSTALL_FW_PATH)/%: $(obj)/% | $(INSTALL_FW_PATH)/$$(dir %) +$(installed-fw): $(INSTALL_FW_PATH)/%: $(obj)/% $(call cmd,install) PHONY += __fw_install __fw_modinst FORCE diff --git a/scripts/Makefile.host b/scripts/Makefile.host index 1ac414fd503..66893643fd7 100644 --- a/scripts/Makefile.host +++ b/scripts/Makefile.host @@ -166,5 +166,4 @@ $(host-cshlib): $(obj)/%: $(host-cshobjs) FORCE $(call if_changed,host-cshlib) targets += $(host-csingle) $(host-cmulti) $(host-cobjs)\ - $(host-cxxmulti) $(host-cxxobjs) $(host-cshlib) $(host-cshobjs) - + $(host-cxxmulti) $(host-cxxobjs) $(host-cshlib) $(host-cshobjs) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 6a5b0decb79..260bf8acfce 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -27,7 +27,7 @@ lib-y := $(filter-out $(obj-y), $(sort $(lib-y) $(lib-m))) # --------------------------------------------------------------------------- # o if we encounter foo/ in $(obj-y), replace it by foo/built-in.o # and add the directory to the list of dirs to descend into: $(subdir-y) -# o if we encounter foo/ in $(obj-m), remove it from $(obj-m) +# o if we encounter foo/ in $(obj-m), remove it from $(obj-m) # and add the directory to the list of dirs to descend into: $(subdir-m) # Determine modorder. @@ -46,7 +46,7 @@ obj-m := $(filter-out %/, $(obj-m)) subdir-ym := $(sort $(subdir-y) $(subdir-m)) -# if $(foo-objs) exists, foo.o is a composite object +# if $(foo-objs) exists, foo.o is a composite object multi-used-y := $(sort $(foreach m,$(obj-y), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y))), $(m)))) multi-used-m := $(sort $(foreach m,$(obj-m), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y))), $(m)))) multi-used := $(multi-used-y) $(multi-used-m) @@ -91,7 +91,7 @@ obj-dirs := $(addprefix $(obj)/,$(obj-dirs)) # These flags are needed for modversions and compiling, so we define them here # already -# $(modname_flags) #defines KBUILD_MODNAME as the name of the module it will +# $(modname_flags) #defines KBUILD_MODNAME as the name of the module it will # end up in (or would, if it gets compiled in) # Note: Files that end up in two or more modules are compiled without the # KBUILD_MODNAME definition. The reason is that any made-up name would @@ -212,7 +212,7 @@ $(obj)/%: $(src)/%_shipped # Commands useful for building a boot image # =========================================================================== -# +# # Use as following: # # target: source(s) FORCE @@ -226,7 +226,7 @@ $(obj)/%: $(src)/%_shipped quiet_cmd_ld = LD $@ cmd_ld = $(LD) $(LDFLAGS) $(ldflags-y) $(LDFLAGS_$(@F)) \ - $(filter-out FORCE,$^) -o $@ + $(filter-out FORCE,$^) -o $@ # Objcopy # --------------------------------------------------------------------------- diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c index 078fe1d64e7..b30406860b7 100644 --- a/scripts/basic/fixdep.c +++ b/scripts/basic/fixdep.c @@ -409,10 +409,10 @@ static void print_deps(void) exit(2); } if (fstat(fd, &st) < 0) { - fprintf(stderr, "fixdep: error fstat'ing depfile: "); - perror(depfile); - exit(2); - } + fprintf(stderr, "fixdep: error fstat'ing depfile: "); + perror(depfile); + exit(2); + } if (st.st_size == 0) { fprintf(stderr,"fixdep: %s is empty\n",depfile); close(fd); diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl index 544aa56b620..c05d586b1fe 100755 --- a/scripts/checkstack.pl +++ b/scripts/checkstack.pl @@ -173,4 +173,3 @@ while (my $line = <STDIN>) { # Sort output by size (last field) print sort { ($b =~ /:\t*(\d+)$/)[0] <=> ($a =~ /:\t*(\d+)$/)[0] } @stack; - diff --git a/scripts/coccinelle/misc/of_table.cocci b/scripts/coccinelle/misc/of_table.cocci new file mode 100644 index 00000000000..3c934046a06 --- /dev/null +++ b/scripts/coccinelle/misc/of_table.cocci @@ -0,0 +1,62 @@ +/// Make sure of_device_id tables are NULL terminated +// +// Keywords: of_table +// Confidence: Medium +// Options: --include-headers + +virtual patch +virtual context +virtual org +virtual report + +@depends on context@ +identifier var, arr; +expression E; +@@ +struct of_device_id arr[] = { + ..., + { + .var = E, +* } +}; + +@depends on patch@ +identifier var, arr; +expression E; +@@ +struct of_device_id arr[] = { + ..., + { + .var = E, +- } ++ }, ++ { } +}; + +@r depends on org || report@ +position p1; +identifier var, arr; +expression E; +@@ +struct of_device_id arr[] = { + ..., + { + .var = E, + } + @p1 +}; + +@script:python depends on org@ +p1 << r.p1; +arr << r.arr; +@@ + +cocci.print_main(arr,p1) + +@script:python depends on report@ +p1 << r.p1; +arr << r.arr; +@@ + +msg = "%s is not NULL terminated at line %s" % (arr, p1[0].line) +coccilib.report.print_report(p1[0],msg) diff --git a/scripts/coccinelle/misc/returnvar.cocci b/scripts/coccinelle/misc/returnvar.cocci new file mode 100644 index 00000000000..605955a91c4 --- /dev/null +++ b/scripts/coccinelle/misc/returnvar.cocci @@ -0,0 +1,66 @@ +/// +/// Removes unneeded variable used to store return value. +/// +// Confidence: Moderate +// Copyright: (C) 2012 Peter Senna Tschudin, INRIA/LIP6. GPLv2. +// URL: http://coccinelle.lip6.fr/ +// Comments: Comments on code can be deleted if near code that is removed. +// "when strict" can be removed to get more hits, but adds false +// positives +// Options: --no-includes --include-headers + +virtual patch +virtual report +virtual context +virtual org + +@depends on patch@ +type T; +constant C; +identifier ret; +@@ +- T ret = C; +... when != ret + when strict +return +- ret ++ C +; + +@depends on context@ +type T; +constant C; +identifier ret; +@@ +* T ret = C; +... when != ret + when strict +* return ret; + +@r1 depends on report || org@ +type T; +constant C; +identifier ret; +position p1, p2; +@@ +T ret@p1 = C; +... when != ret + when strict +return ret@p2; + +@script:python depends on report@ +p1 << r1.p1; +p2 << r1.p2; +C << r1.C; +ret << r1.ret; +@@ +coccilib.report.print_report(p1[0], "Unneeded variable: \"" + ret + "\". Return \"" + C + "\" on line " + p2[0].line) + +@script:python depends on org@ +p1 << r1.p1; +p2 << r1.p2; +C << r1.C; +ret << r1.ret; +@@ +cocci.print_main("unneeded \"" + ret + "\" variable", p1) +cocci.print_sec("return " + C + " here", p2) diff --git a/scripts/config b/scripts/config index 68041793698..026aeb4f32e 100755 --- a/scripts/config +++ b/scripts/config @@ -223,4 +223,3 @@ while [ "$1" != "" ] ; do ;; esac done - diff --git a/scripts/conmakehash.c b/scripts/conmakehash.c index 263a44d57fa..61bbda54cf1 100644 --- a/scripts/conmakehash.c +++ b/scripts/conmakehash.c @@ -104,7 +104,7 @@ int main(int argc, char *argv[]) } } - /* For now we assume the default font is always 256 characters. */ + /* For now we assume the default font is always 256 characters. */ fontlen = 256; /* Initialize table */ @@ -236,15 +236,15 @@ int main(int argc, char *argv[]) } /* Okay, we hit EOF, now output hash table */ - + fclose(ctbl); - + /* Compute total size of Unicode list */ nuni = 0; for ( i = 0 ; i < fontlen ; i++ ) nuni += unicount[i]; - + printf("\ /*\n\ * Do not edit this file; it was automatically generated by\n\ @@ -268,9 +268,9 @@ u8 dfont_unicount[%d] = \n\ else printf(", "); } - + printf("\nu16 dfont_unitable[%d] = \n{\n\t", nuni); - + fp0 = 0; nent = 0; for ( i = 0 ; i < nuni ; i++ ) diff --git a/scripts/docproc.c b/scripts/docproc.c index 2b69eaf5b64..e267e621431 100644 --- a/scripts/docproc.c +++ b/scripts/docproc.c @@ -154,7 +154,7 @@ int symfilecnt = 0; static void add_new_symbol(struct symfile *sym, char * symname) { sym->symbollist = - realloc(sym->symbollist, (sym->symbolcnt + 1) * sizeof(char *)); + realloc(sym->symbollist, (sym->symbolcnt + 1) * sizeof(char *)); sym->symbollist[sym->symbolcnt++].name = strdup(symname); } @@ -215,7 +215,7 @@ static void find_export_symbols(char * filename) char *p; char *e; if (((p = strstr(line, "EXPORT_SYMBOL_GPL")) != NULL) || - ((p = strstr(line, "EXPORT_SYMBOL")) != NULL)) { + ((p = strstr(line, "EXPORT_SYMBOL")) != NULL)) { /* Skip EXPORT_SYMBOL{_GPL} */ while (isalnum(*p) || *p == '_') p++; @@ -291,28 +291,28 @@ static void extfunc(char * filename) { docfunctions(filename, FUNCTION); } static void singfunc(char * filename, char * line) { char *vec[200]; /* Enough for specific functions */ - int i, idx = 0; - int startofsym = 1; + int i, idx = 0; + int startofsym = 1; vec[idx++] = KERNELDOC; vec[idx++] = DOCBOOK; vec[idx++] = SHOWNOTFOUND; - /* Split line up in individual parameters preceded by FUNCTION */ - for (i=0; line[i]; i++) { - if (isspace(line[i])) { - line[i] = '\0'; - startofsym = 1; - continue; - } - if (startofsym) { - startofsym = 0; - vec[idx++] = FUNCTION; - vec[idx++] = &line[i]; - } - } + /* Split line up in individual parameters preceded by FUNCTION */ + for (i=0; line[i]; i++) { + if (isspace(line[i])) { + line[i] = '\0'; + startofsym = 1; + continue; + } + if (startofsym) { + startofsym = 0; + vec[idx++] = FUNCTION; + vec[idx++] = &line[i]; + } + } for (i = 0; i < idx; i++) { - if (strcmp(vec[i], FUNCTION)) - continue; + if (strcmp(vec[i], FUNCTION)) + continue; consume_symbol(vec[i + 1]); } vec[idx++] = filename; @@ -460,14 +460,14 @@ static void parse_file(FILE *infile) break; case 'D': while (*s && !isspace(*s)) s++; - *s = '\0'; - symbolsonly(line+2); - break; + *s = '\0'; + symbolsonly(line+2); + break; case 'F': /* filename */ while (*s && !isspace(*s)) s++; *s++ = '\0'; - /* function names */ + /* function names */ while (isspace(*s)) s++; singlefunctions(line +2, s); @@ -515,11 +515,11 @@ int main(int argc, char *argv[]) } /* Open file, exit on error */ infile = fopen(argv[2], "r"); - if (infile == NULL) { - fprintf(stderr, "docproc: "); - perror(argv[2]); - exit(2); - } + if (infile == NULL) { + fprintf(stderr, "docproc: "); + perror(argv[2]); + exit(2); + } if (strcmp("doc", argv[1]) == 0) { /* Need to do this in two passes. diff --git a/scripts/dtc/.gitignore b/scripts/dtc/.gitignore index 095acb49a37..cdabdc95a6e 100644 --- a/scripts/dtc/.gitignore +++ b/scripts/dtc/.gitignore @@ -2,4 +2,3 @@ dtc dtc-lexer.lex.c dtc-parser.tab.c dtc-parser.tab.h - diff --git a/scripts/dtc/fstree.c b/scripts/dtc/fstree.c index f3774530170..e464727c880 100644 --- a/scripts/dtc/fstree.c +++ b/scripts/dtc/fstree.c @@ -88,4 +88,3 @@ struct boot_info *dt_from_fs(const char *dirname) return build_boot_info(NULL, tree, guess_boot_cpuid(tree)); } - diff --git a/scripts/dtc/libfdt/fdt_empty_tree.c b/scripts/dtc/libfdt/fdt_empty_tree.c index f72d13b1d19..f2ae9b77c28 100644 --- a/scripts/dtc/libfdt/fdt_empty_tree.c +++ b/scripts/dtc/libfdt/fdt_empty_tree.c @@ -81,4 +81,3 @@ int fdt_create_empty_tree(void *buf, int bufsize) return fdt_open_into(buf, buf, bufsize); } - diff --git a/scripts/dtc/treesource.c b/scripts/dtc/treesource.c index 33eeba55fb4..5740e6992d3 100644 --- a/scripts/dtc/treesource.c +++ b/scripts/dtc/treesource.c @@ -281,4 +281,3 @@ void dt_to_source(FILE *f, struct boot_info *bi) write_tree_source_node(f, bi->dt, 0); } - diff --git a/scripts/headers.sh b/scripts/headers.sh index 978b42b3acd..95ece06599a 100755 --- a/scripts/headers.sh +++ b/scripts/headers.sh @@ -28,5 +28,3 @@ for arch in ${archs}; do ;; esac done - - diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 1237dd7fb4c..dc7aa45e80c 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -123,7 +123,7 @@ static int read_symbol(FILE *in, struct sym_entry *s) } if (strlen(str) > KSYM_NAME_LEN) { fprintf(stderr, "Symbol %s too long for kallsyms (%zu vs %d).\n" - "Please increase KSYM_NAME_LEN both in kernel and kallsyms.c\n", + "Please increase KSYM_NAME_LEN both in kernel and kallsyms.c\n", str, strlen(str), KSYM_NAME_LEN); return -1; } diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index 844bc9da08d..9c4d2412fb7 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -33,11 +33,11 @@ oldconfig: $(obj)/conf $< --$@ $(Kconfig) silentoldconfig: $(obj)/conf - $(Q)mkdir -p include/generated + $(Q)mkdir -p include/config include/generated $< --$@ $(Kconfig) localyesconfig localmodconfig: $(obj)/streamline_config.pl $(obj)/conf - $(Q)mkdir -p include/generated + $(Q)mkdir -p include/config include/generated $(Q)perl $< --$@ $(srctree) $(Kconfig) > .tmp.config $(Q)if [ -f .config ]; then \ cmp -s .tmp.config .config || \ @@ -319,4 +319,3 @@ $(obj)/%.moc: $(src)/%.h $(obj)/.tmp_qtcheck $(obj)/gconf.glade.h: $(obj)/gconf.glade $(Q)intltool-extract --type=gettext/glade --srcdir=$(srctree) \ $(obj)/gconf.glade - diff --git a/scripts/kconfig/check.sh b/scripts/kconfig/check.sh index 854d9c7c675..55b79ba1ba2 100755 --- a/scripts/kconfig/check.sh +++ b/scripts/kconfig/check.sh @@ -11,4 +11,3 @@ EOF if [ ! "$?" -eq "0" ]; then echo -DKBUILD_NO_NLS; fi - diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c index d19944f9c3a..fef75fc756f 100644 --- a/scripts/kconfig/conf.c +++ b/scripts/kconfig/conf.c @@ -696,7 +696,7 @@ int main(int ac, char **av) } else if (input_mode == savedefconfig) { if (conf_write_defconfig(defconfig_file)) { fprintf(stderr, _("n*** Error while saving defconfig to: %s\n\n"), - defconfig_file); + defconfig_file); return 1; } } else if (input_mode != listnewconfig) { diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index f2bee70e26f..d0a35b21f30 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -1404,7 +1404,7 @@ static void display_tree(struct menu *menu) && (tree == tree2)) continue; /* - if (((menu != &rootmenu) && !(menu->flags & MENU_ROOT)) + if (((menu != &rootmenu) && !(menu->flags & MENU_ROOT)) || (view_mode == FULL_VIEW) || (view_mode == SPLIT_VIEW))*/ diff --git a/scripts/kconfig/lxdialog/checklist.c b/scripts/kconfig/lxdialog/checklist.c index 3b15c08ec1f..8d016faa28d 100644 --- a/scripts/kconfig/lxdialog/checklist.c +++ b/scripts/kconfig/lxdialog/checklist.c @@ -168,13 +168,13 @@ do_resize: /* create new window for the list */ list = subwin(dialog, list_height, list_width, y + box_y + 1, - x + box_x + 1); + x + box_x + 1); keypad(list, TRUE); /* draw a box around the list items */ draw_box(dialog, box_y, box_x, list_height + 2, list_width + 2, - dlg.menubox_border.atr, dlg.menubox.atr); + dlg.menubox_border.atr, dlg.menubox.atr); /* Find length of longest item in order to center checklist */ check_x = 0; diff --git a/scripts/kconfig/lxdialog/inputbox.c b/scripts/kconfig/lxdialog/inputbox.c index 447a582198c..d58de1dc536 100644 --- a/scripts/kconfig/lxdialog/inputbox.c +++ b/scripts/kconfig/lxdialog/inputbox.c @@ -42,7 +42,7 @@ static void print_buttons(WINDOW * dialog, int height, int width, int selected) * Display a dialog box for inputing a string */ int dialog_inputbox(const char *title, const char *prompt, int height, int width, - const char *init) + const char *init) { int i, x, y, box_y, box_x, box_width; int input_x = 0, key = 0, button = -1; diff --git a/scripts/kconfig/lxdialog/menubox.c b/scripts/kconfig/lxdialog/menubox.c index c93de0b2fac..11ae9ad7ac7 100644 --- a/scripts/kconfig/lxdialog/menubox.c +++ b/scripts/kconfig/lxdialog/menubox.c @@ -64,7 +64,7 @@ static int menu_width, item_x; * Print menu item */ static void do_print_item(WINDOW * win, const char *item, int line_y, - int selected, int hotkey) + int selected, int hotkey) { int j; char *menu_item = malloc(menu_width + 1); @@ -182,7 +182,7 @@ static void do_scroll(WINDOW *win, int *scroll, int n) * Display a menu for choosing among a number of options */ int dialog_menu(const char *title, const char *prompt, - const void *selected, int *s_scroll) + const void *selected, int *s_scroll) { int i, j, x, y, box_x, box_y; int height, width, menu_height; diff --git a/scripts/kconfig/lxdialog/util.c b/scripts/kconfig/lxdialog/util.c index 58a8289dd65..f7abdeb92af 100644 --- a/scripts/kconfig/lxdialog/util.c +++ b/scripts/kconfig/lxdialog/util.c @@ -623,7 +623,7 @@ void item_make(const char *fmt, ...) void item_add_str(const char *fmt, ...) { va_list ap; - size_t avail; + size_t avail; avail = sizeof(item_cur->node.str) - strlen(item_cur->node.str); diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c index 59184bb41ef..14cea7463a6 100644 --- a/scripts/kconfig/mconf.c +++ b/scripts/kconfig/mconf.c @@ -299,7 +299,7 @@ static void set_config_filename(const char *config_filename) int size; size = snprintf(menu_backtitle, sizeof(menu_backtitle), - "%s - %s", config_filename, rootmenu.prompt->text); + "%s - %s", config_filename, rootmenu.prompt->text); if (size >= sizeof(menu_backtitle)) menu_backtitle[sizeof(menu_backtitle)-1] = '\0'; set_dialog_backtitle(menu_backtitle); @@ -1034,4 +1034,3 @@ int main(int ac, char **av) return res; } - diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c index 3ac2c9c6e28..a26cc5d2a9b 100644 --- a/scripts/kconfig/menu.c +++ b/scripts/kconfig/menu.c @@ -258,8 +258,8 @@ static void sym_check_prop(struct symbol *sym) "config symbol '%s' uses select, but is " "not boolean or tristate", sym->name); else if (sym2->type != S_UNKNOWN && - sym2->type != S_BOOLEAN && - sym2->type != S_TRISTATE) + sym2->type != S_BOOLEAN && + sym2->type != S_TRISTATE) prop_warn(prop, "'%s' has wrong type. 'select' only " "accept arguments of boolean and " @@ -268,7 +268,7 @@ static void sym_check_prop(struct symbol *sym) case P_RANGE: if (sym->type != S_INT && sym->type != S_HEX) prop_warn(prop, "range is only allowed " - "for int or hex symbols"); + "for int or hex symbols"); if (!menu_validate_number(sym, prop->expr->left.sym) || !menu_validate_number(sym, prop->expr->right.sym)) prop_warn(prop, "range is invalid"); diff --git a/scripts/kconfig/nconf.c b/scripts/kconfig/nconf.c index 4fbecd2473b..984489ef2b4 100644 --- a/scripts/kconfig/nconf.c +++ b/scripts/kconfig/nconf.c @@ -1554,4 +1554,3 @@ int main(int ac, char **av) endwin(); return 0; } - diff --git a/scripts/kconfig/streamline_config.pl b/scripts/kconfig/streamline_config.pl index 31331723e81..9cb8522d8d2 100644 --- a/scripts/kconfig/streamline_config.pl +++ b/scripts/kconfig/streamline_config.pl @@ -589,7 +589,7 @@ while ($repeat) { # Now we need to see if we have to check selects; loop_select; -} +} my %setconfigs; diff --git a/scripts/kconfig/util.c b/scripts/kconfig/util.c index 6e7fbf19680..94f9c83e324 100644 --- a/scripts/kconfig/util.c +++ b/scripts/kconfig/util.c @@ -155,5 +155,3 @@ void *xcalloc(size_t nmemb, size_t size) fprintf(stderr, "Out of memory.\n"); exit(1); } - - diff --git a/scripts/kconfig/zconf.l b/scripts/kconfig/zconf.l index 1a9f53e535c..6c62d93b4ff 100644 --- a/scripts/kconfig/zconf.l +++ b/scripts/kconfig/zconf.l @@ -27,8 +27,8 @@ static char *text; static int text_size, text_asize; struct buffer { - struct buffer *parent; - YY_BUFFER_STATE state; + struct buffer *parent; + YY_BUFFER_STATE state; }; struct buffer *current_buf; diff --git a/scripts/kconfig/zconf.lex.c_shipped b/scripts/kconfig/zconf.lex.c_shipped index a0521aa5974..349a7f24315 100644 --- a/scripts/kconfig/zconf.lex.c_shipped +++ b/scripts/kconfig/zconf.lex.c_shipped @@ -789,8 +789,8 @@ static char *text; static int text_size, text_asize; struct buffer { - struct buffer *parent; - YY_BUFFER_STATE state; + struct buffer *parent; + YY_BUFFER_STATE state; }; struct buffer *current_buf; diff --git a/scripts/kconfig/zconf.tab.c_shipped b/scripts/kconfig/zconf.tab.c_shipped index 25ae16ac75c..de5e84ed3f9 100644 --- a/scripts/kconfig/zconf.tab.c_shipped +++ b/scripts/kconfig/zconf.tab.c_shipped @@ -2314,7 +2314,7 @@ void conf_parse(const char *name) for_all_symbols(i, sym) { if (sym_check_deps(sym)) zconfnerrs++; - } + } if (zconfnerrs) exit(1); sym_set_change_count(1); diff --git a/scripts/kconfig/zconf.y b/scripts/kconfig/zconf.y index 0653886fac4..0f683cfa53e 100644 --- a/scripts/kconfig/zconf.y +++ b/scripts/kconfig/zconf.y @@ -510,7 +510,7 @@ void conf_parse(const char *name) for_all_symbols(i, sym) { if (sym_check_deps(sym)) zconfnerrs++; - } + } if (zconfnerrs) exit(1); sym_set_change_count(1); diff --git a/scripts/markup_oops.pl b/scripts/markup_oops.pl index 827896f5650..c21d16328d3 100644 --- a/scripts/markup_oops.pl +++ b/scripts/markup_oops.pl @@ -367,4 +367,3 @@ OPTION: EOT exit; } - diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h index cfb8440cc0b..6fdc97ef602 100755 --- a/scripts/mkcompile_h +++ b/scripts/mkcompile_h @@ -68,7 +68,7 @@ UTS_TRUNCATE="cut -b -$UTS_LEN" ( echo /\* This file is auto generated, version $VERSION \*/ if [ -n "$CONFIG_FLAGS" ] ; then echo "/* $CONFIG_FLAGS */"; fi - + echo \#define UTS_MACHINE \"$ARCH\" echo \#define UTS_VERSION \"`echo $UTS_VERSION | $UTS_TRUNCATE`\" @@ -84,7 +84,7 @@ UTS_TRUNCATE="cut -b -$UTS_LEN" # recompilations. # We don't consider the file changed if only the date/time changed. # A kernel config change will increase the generation number, thus -# causing compile.h to be updated (including date/time) due to the +# causing compile.h to be updated (including date/time) due to the # changed comment in the # first line. diff --git a/scripts/mkmakefile b/scripts/mkmakefile index 0cc04426074..84af27bf0f9 100644 --- a/scripts/mkmakefile +++ b/scripts/mkmakefile @@ -42,18 +42,11 @@ MAKEARGS += O=\$(if \$(patsubst /%,,\$(makedir)),\$(CURDIR)/)\$(patsubst %/,%,\$ MAKEFLAGS += --no-print-directory -.PHONY: all \$(MAKECMDGOALS) +.PHONY: __sub-make \$(MAKECMDGOALS) -all := \$(filter-out all Makefile,\$(MAKECMDGOALS)) +__sub-make: + \$(Q)\$(MAKE) \$(MAKEARGS) \$(MAKECMDGOALS) -all: - \$(Q)\$(MAKE) \$(MAKEARGS) \$(all) - -Makefile:; - -\$(all): all - @: - -%/: all +\$(filter-out __sub-make, \$(MAKECMDGOALS)): __sub-make @: EOF diff --git a/scripts/mksysmap b/scripts/mksysmap index c1b6191ef87..7ada35a0f47 100644 --- a/scripts/mksysmap +++ b/scripts/mksysmap @@ -42,4 +42,3 @@ # (At least sparc64 has __crc_ in the middle). $NM -n $1 | grep -v '\( [aNUw] \)\|\(__crc_\)\|\( \$[adt]\)' > $2 - diff --git a/scripts/mod/.gitignore b/scripts/mod/.gitignore index 33bae0df4de..3bd11b60317 100644 --- a/scripts/mod/.gitignore +++ b/scripts/mod/.gitignore @@ -2,4 +2,3 @@ elfconfig.h mk_elfconfig modpost devicetable-offsets.h - diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 1924990a737..e614ef689ee 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -644,28 +644,26 @@ ADD_TO_DEVTABLE("pcmcia", pcmcia_device_id, do_pcmcia_entry); static int do_of_entry (const char *filename, void *symval, char *alias) { - int len; - char *tmp; - DEF_FIELD_ADDR(symval, of_device_id, name); - DEF_FIELD_ADDR(symval, of_device_id, type); - DEF_FIELD_ADDR(symval, of_device_id, compatible); - - len = sprintf (alias, "of:N%sT%s", - (*name)[0] ? *name : "*", - (*type)[0] ? *type : "*"); - - if (compatible[0]) - sprintf (&alias[len], "%sC%s", - (*type)[0] ? "*" : "", - *compatible); - - /* Replace all whitespace with underscores */ - for (tmp = alias; tmp && *tmp; tmp++) - if (isspace (*tmp)) - *tmp = '_'; - - add_wildcard(alias); - return 1; + int len; + char *tmp; + DEF_FIELD_ADDR(symval, of_device_id, name); + DEF_FIELD_ADDR(symval, of_device_id, type); + DEF_FIELD_ADDR(symval, of_device_id, compatible); + + len = sprintf(alias, "of:N%sT%s", (*name)[0] ? *name : "*", + (*type)[0] ? *type : "*"); + + if (compatible[0]) + sprintf(&alias[len], "%sC%s", (*type)[0] ? "*" : "", + *compatible); + + /* Replace all whitespace with underscores */ + for (tmp = alias; tmp && *tmp; tmp++) + if (isspace (*tmp)) + *tmp = '_'; + + add_wildcard(alias); + return 1; } ADD_TO_DEVTABLE("of", of_device_id, do_of_entry); diff --git a/scripts/mod/mk_elfconfig.c b/scripts/mod/mk_elfconfig.c index 639bca7ba55..a4fd71d71d6 100644 --- a/scripts/mod/mk_elfconfig.c +++ b/scripts/mod/mk_elfconfig.c @@ -54,4 +54,3 @@ main(int argc, char **argv) return 0; } - diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 026543ba8d8..9d9c5b905b3 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -862,7 +862,7 @@ static const char *section_white_list[] = * without "ax" / "aw". */ static void check_section(const char *modname, struct elf_info *elf, - Elf_Shdr *sechdr) + Elf_Shdr *sechdr) { const char *sec = sech_name(elf, sechdr); @@ -1296,12 +1296,12 @@ static void print_section_list(const char * const list[20]) */ static void report_sec_mismatch(const char *modname, const struct sectioncheck *mismatch, - const char *fromsec, - unsigned long long fromaddr, - const char *fromsym, - int from_is_func, - const char *tosec, const char *tosym, - int to_is_func) + const char *fromsec, + unsigned long long fromaddr, + const char *fromsym, + int from_is_func, + const char *tosec, const char *tosym, + int to_is_func) { const char *from, *from_p; const char *to, *to_p; @@ -1441,7 +1441,7 @@ static void report_sec_mismatch(const char *modname, } static void check_section_mismatch(const char *modname, struct elf_info *elf, - Elf_Rela *r, Elf_Sym *sym, const char *fromsec) + Elf_Rela *r, Elf_Sym *sym, const char *fromsec) { const char *tosec; const struct sectioncheck *mismatch; @@ -1528,7 +1528,7 @@ static int addend_arm_rel(struct elf_info *elf, Elf_Shdr *sechdr, Elf_Rela *r) case R_ARM_ABS32: /* From ARM ABI: (S + A) | T */ r->r_addend = (int)(long) - (elf->symtab_start + ELF_R_SYM(r->r_info)); + (elf->symtab_start + ELF_R_SYM(r->r_info)); break; case R_ARM_PC24: case R_ARM_CALL: @@ -1538,8 +1538,8 @@ static int addend_arm_rel(struct elf_info *elf, Elf_Shdr *sechdr, Elf_Rela *r) case R_ARM_THM_JUMP19: /* From ARM ABI: ((S + A) | T) - P */ r->r_addend = (int)(long)(elf->hdr + - sechdr->sh_offset + - (r->r_offset - sechdr->sh_addr)); + sechdr->sh_offset + + (r->r_offset - sechdr->sh_addr)); break; default: return 1; @@ -1571,7 +1571,7 @@ static int addend_mips_rel(struct elf_info *elf, Elf_Shdr *sechdr, Elf_Rela *r) } static void section_rela(const char *modname, struct elf_info *elf, - Elf_Shdr *sechdr) + Elf_Shdr *sechdr) { Elf_Sym *sym; Elf_Rela *rela; @@ -1615,7 +1615,7 @@ static void section_rela(const char *modname, struct elf_info *elf, } static void section_rel(const char *modname, struct elf_info *elf, - Elf_Shdr *sechdr) + Elf_Shdr *sechdr) { Elf_Sym *sym; Elf_Rel *rel; @@ -1685,7 +1685,7 @@ static void section_rel(const char *modname, struct elf_info *elf, * be discarded and warns about it. **/ static void check_sec_ref(struct module *mod, const char *modname, - struct elf_info *elf) + struct elf_info *elf) { int i; Elf_Shdr *sechdrs = elf->sechdrs; @@ -1945,7 +1945,7 @@ static int add_versions(struct buffer *b, struct module *mod) s->name, mod->name); } else { merror("\"%s\" [%s.ko] undefined!\n", - s->name, mod->name); + s->name, mod->name); err = 1; } } diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c index deb2994b04c..944418da9fe 100644 --- a/scripts/mod/sumversion.c +++ b/scripts/mod/sumversion.c @@ -214,7 +214,7 @@ static void md4_final_ascii(struct md4_ctx *mctx, char *out, unsigned int len) mctx->block[14] = mctx->byte_count << 3; mctx->block[15] = mctx->byte_count >> 29; le32_to_cpu_array(mctx->block, (sizeof(mctx->block) - - sizeof(uint64_t)) / sizeof(uint32_t)); + sizeof(uint64_t)) / sizeof(uint32_t)); md4_transform(mctx->hash, mctx->block); cpu_to_le32_array(mctx->hash, sizeof(mctx->hash) / sizeof(uint32_t)); @@ -367,7 +367,7 @@ static int parse_source_files(const char *objfile, struct md4_ctx *md) break; /* Terminate line at first space, to get rid of final ' \' */ while (*p) { - if (isspace(*p)) { + if (isspace(*p)) { *p = '\0'; break; } diff --git a/scripts/objdiff b/scripts/objdiff index b3e4f10bfc3..62e51dae213 100755 --- a/scripts/objdiff +++ b/scripts/objdiff @@ -25,25 +25,47 @@ # # Note: 'make mrproper' will also remove .tmp_objdiff -GIT_DIR="`git rev-parse --git-dir`" +SRCTREE=$(cd $(git rev-parse --show-toplevel 2>/dev/null); pwd) -if [ -d "$GIT_DIR" ]; then - TMPD="${GIT_DIR%git}tmp_objdiff" - - [ -d "$TMPD" ] || mkdir "$TMPD" -else - echo "ERROR: git directory not found." +if [ -z "$SRCTREE" ]; then + echo >&2 "ERROR: Not a git repository." exit 1 fi +TMPD=$SRCTREE/.tmp_objdiff + usage() { - echo "Usage: $0 <command> <args>" - echo " record <list of object files>" - echo " diff <commitA> <commitB>" - echo " clean all | <commit>" + echo >&2 "Usage: $0 <command> <args>" + echo >&2 " record <list of object files or directories>" + echo >&2 " diff <commitA> <commitB>" + echo >&2 " clean all | <commit>" exit 1 } +get_output_dir() { + dir=${1%/*} + + if [ "$dir" = "$1" ]; then + dir=. + fi + + dir=$(cd $dir; pwd) + + echo $TMPD/$CMT${dir#$SRCTREE} +} + +do_objdump() { + dir=$(get_output_dir $1) + base=${1##*/} + dis=$dir/${base%.o}.dis + + [ ! -d "$dir" ] && mkdir -p $dir + + # remove addresses for a cleaner diff + # http://dummdida.tumblr.com/post/60924060451/binary-diff-between-libc-from-scientificlinux-and + $OBJDUMP -D $1 | sed "s/^[[:space:]]\+[0-9a-f]\+//" > $dis +} + dorecord() { [ $# -eq 0 ] && usage @@ -52,20 +74,16 @@ dorecord() { CMT="`git rev-parse --short HEAD`" OBJDUMP="${CROSS_COMPILE}objdump" - OBJDIFFD="$TMPD/$CMT" - - [ ! -d "$OBJDIFFD" ] && mkdir -p "$OBJDIFFD" - for f in $FILES; do - dn="${f%/*}" - bn="${f##*/}" - - [ ! -d "$OBJDIFFD/$dn" ] && mkdir -p "$OBJDIFFD/$dn" - - # remove addresses for a more clear diff - # http://dummdida.tumblr.com/post/60924060451/binary-diff-between-libc-from-scientificlinux-and - $OBJDUMP -D "$f" | sed "s/^[[:space:]]\+[0-9a-f]\+//" \ - >"$OBJDIFFD/$dn/$bn" + for d in $FILES; do + if [ -d "$d" ]; then + for f in $(find $d -name '*.o') + do + do_objdump $f + done + else + do_objdump $d + fi done } @@ -90,12 +108,12 @@ dodiff() { DSTD="$TMPD/$DST" if [ ! -d "$SRCD" ]; then - echo "ERROR: $SRCD doesn't exist" + echo >&2 "ERROR: $SRCD doesn't exist" exit 1 fi if [ ! -d "$DSTD" ]; then - echo "ERROR: $DSTD doesn't exist" + echo >&2 "ERROR: $DSTD doesn't exist" exit 1 fi @@ -114,7 +132,7 @@ doclean() { if [ -d "$TMPD/$CMT" ]; then rm -rf $TMPD/$CMT else - echo "$CMT not found" + echo >&2 "$CMT not found" fi fi } @@ -135,7 +153,7 @@ case "$1" in doclean $* ;; *) - echo "Unrecognized command '$1'" + echo >&2 "Unrecognized command '$1'" exit 1 ;; esac diff --git a/scripts/package/Makefile b/scripts/package/Makefile index c5d47339381..99ca6e76eb0 100644 --- a/scripts/package/Makefile +++ b/scripts/package/Makefile @@ -143,4 +143,3 @@ help: FORCE @echo ' perf-targz-src-pkg - Build $(perf-tar).tar.gz source tarball' @echo ' perf-tarbz2-src-pkg - Build $(perf-tar).tar.bz2 source tarball' @echo ' perf-tarxz-src-pkg - Build $(perf-tar).tar.xz source tarball' - diff --git a/scripts/package/builddeb b/scripts/package/builddeb index f46e4dd0558..b5f08f72786 100644 --- a/scripts/package/builddeb +++ b/scripts/package/builddeb @@ -35,13 +35,15 @@ create_package() { sparc*) debarch=sparc ;; s390*) - debarch=s390 ;; + debarch=s390$(grep -q CONFIG_64BIT=y $KCONFIG_CONFIG && echo x || true) ;; ppc*) debarch=powerpc ;; parisc*) debarch=hppa ;; mips*) debarch=mips$(grep -q CPU_LITTLE_ENDIAN=y $KCONFIG_CONFIG && echo el || true) ;; + arm64) + debarch=arm64 ;; arm*) debarch=arm$(grep -q CONFIG_AEABI=y $KCONFIG_CONFIG && echo el || true) ;; *) @@ -130,7 +132,7 @@ if [ "$ARCH" = "um" ] ; then cp System.map "$tmpdir/usr/lib/uml/modules/$version/System.map" cp $KCONFIG_CONFIG "$tmpdir/usr/share/doc/$packagename/config" gzip "$tmpdir/usr/share/doc/$packagename/config" -else +else cp System.map "$tmpdir/boot/System.map-$version" cp $KCONFIG_CONFIG "$tmpdir/boot/config-$version" fi @@ -155,11 +157,11 @@ if grep -q '^CONFIG_MODULES=y' $KCONFIG_CONFIG ; then for module in $(find lib/modules/ -name *.ko); do mkdir -p $(dirname $dbg_dir/usr/lib/debug/$module) # only keep debug symbols in the debug file - objcopy --only-keep-debug $module $dbg_dir/usr/lib/debug/$module + $OBJCOPY --only-keep-debug $module $dbg_dir/usr/lib/debug/$module # strip original module from debug symbols - objcopy --strip-debug $module + $OBJCOPY --strip-debug $module # then add a link to those - objcopy --add-gnu-debuglink=$dbg_dir/usr/lib/debug/$module $module + $OBJCOPY --add-gnu-debuglink=$dbg_dir/usr/lib/debug/$module $module done ) fi diff --git a/scripts/package/buildtar b/scripts/package/buildtar index aa22f9447dd..995c1eafaff 100644 --- a/scripts/package/buildtar +++ b/scripts/package/buildtar @@ -136,4 +136,3 @@ esac echo "Tarball successfully created in ${tarball}${file_ext}" exit 0 - diff --git a/scripts/patch-kernel b/scripts/patch-kernel index d000ea3a41f..49b4241e814 100755 --- a/scripts/patch-kernel +++ b/scripts/patch-kernel @@ -27,7 +27,7 @@ # Nick Holloway <Nick.Holloway@alfie.demon.co.uk>, 2nd January 1995. # # Added support for handling multiple types of compression. What includes -# gzip, bzip, bzip2, zip, compress, and plaintext. +# gzip, bzip, bzip2, zip, compress, and plaintext. # # Adam Sulmicki <adam@cfar.umd.edu>, 1st January 1997. # @@ -159,7 +159,7 @@ applyPatch () { fi # Remove backup files find $sourcedir/ '(' -name '*.orig' -o -name '.*.orig' ')' -exec rm -f {} \; - + return 0; } diff --git a/scripts/pnmtologo.c b/scripts/pnmtologo.c index 68bb4efc5af..4718d7895f0 100644 --- a/scripts/pnmtologo.c +++ b/scripts/pnmtologo.c @@ -512,4 +512,3 @@ int main(int argc, char *argv[]) } exit(0); } - diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index e11aa4a156d..650ecc83d7d 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -487,5 +487,3 @@ main(int argc, char *argv[]) } return !!n_error; } - - diff --git a/scripts/rt-tester/check-all.sh b/scripts/rt-tester/check-all.sh index 43098afe743..6b5c83baf14 100644 --- a/scripts/rt-tester/check-all.sh +++ b/scripts/rt-tester/check-all.sh @@ -19,4 +19,3 @@ testit t3-l2-pi.tst testit t4-l2-pi-deboost.tst testit t5-l4-pi-boost-deboost.tst testit t5-l4-pi-boost-deboost-setsched.tst - diff --git a/scripts/rt-tester/rt-tester.py b/scripts/rt-tester/rt-tester.py index 34186cac1d2..6d916c2a45a 100644 --- a/scripts/rt-tester/rt-tester.py +++ b/scripts/rt-tester/rt-tester.py @@ -216,5 +216,3 @@ while 1: # Normal exit pass print "Pass" sys.exit(0) - - diff --git a/scripts/selinux/install_policy.sh b/scripts/selinux/install_policy.sh index 7b9ccf61f8f..f6a0ce71015 100644 --- a/scripts/selinux/install_policy.sh +++ b/scripts/selinux/install_policy.sh @@ -66,4 +66,3 @@ if [ "eq$dodev" != "eq" ]; then $SF file_contexts /dev mount --move /mnt /dev fi - diff --git a/scripts/show_delta b/scripts/show_delta index e25732b5d70..5b365009e6a 100755 --- a/scripts/show_delta +++ b/scripts/show_delta @@ -126,4 +126,3 @@ def main(): print (convert_line(line, base_time),) main() - diff --git a/scripts/tags.sh b/scripts/tags.sh index f2c5b006a3d..e6b011fe1d0 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -25,6 +25,9 @@ else tree=${srctree}/ fi +# ignore userspace tools +ignore="$ignore ( -path ${tree}tools ) -prune -o" + # Find all available archs find_all_archs() { @@ -47,7 +50,8 @@ find_arch_sources() for i in $archincludedir; do prune="$prune -wholename $i -prune -o" done - find ${tree}arch/$1 $ignore $subarchprune $prune -name "$2" -print; + find ${tree}arch/$1 $ignore $subarchprune $prune -name "$2" \ + -not -type l -print; } # find sources in arch/$1/include @@ -57,14 +61,15 @@ find_arch_include_sources() -name include -type d -print); if [ -n "$include" ]; then archincludedir="$archincludedir $include" - find $include $ignore -name "$2" -print; + find $include $ignore -name "$2" -not -type l -print; fi } # find sources in include/ find_include_sources() { - find ${tree}include $ignore -name config -prune -o -name "$1" -print; + find ${tree}include $ignore -name config -prune -o -name "$1" \ + -not -type l -print; } # find sources in rest of tree @@ -73,7 +78,7 @@ find_other_sources() { find ${tree}* $ignore \ \( -name include -o -name arch -o -name '.tmp_*' \) -prune -o \ - -name "$1" -print; + -name "$1" -not -type l -print; } find_sources() @@ -187,6 +192,10 @@ exuberant() --regex-c++='/TESTCLEARFLAG_FALSE\(([^,)]*).*/TestClearPage\1/' \ --regex-c++='/__TESTCLEARFLAG_FALSE\(([^,)]*).*/__TestClearPage\1/' \ --regex-c++='/_PE\(([^,)]*).*/PEVENT_ERRNO__\1/' \ + --regex-c++='/TESTPCGFLAG\(([^,)]*).*/PageCgroup\1/' \ + --regex-c++='/SETPCGFLAG\(([^,)]*).*/SetPageCgroup\1/' \ + --regex-c++='/CLEARPCGFLAG\(([^,)]*).*/ClearPageCgroup\1/' \ + --regex-c++='/TESTCLEARPCGFLAG\(([^,)]*).*/TestClearPageCgroup\1/' \ --regex-c='/PCI_OP_READ\((\w*).*[1-4]\)/pci_bus_read_config_\1/' \ --regex-c='/PCI_OP_WRITE\((\w*).*[1-4]\)/pci_bus_write_config_\1/' \ --regex-c='/DEFINE_(MUTEX|SEMAPHORE|SPINLOCK)\((\w*)/\2/v/' \ @@ -201,7 +210,8 @@ exuberant() --regex-c='/DECLARE_(TASKLET|WORK|DELAYED_WORK)\((\w*)/\2/v/' \ --regex-c='/DEFINE_PCI_DEVICE_TABLE\((\w*)/\1/v/' \ --regex-c='/(^\s)OFFSET\((\w*)/\2/v/' \ - --regex-c='/(^\s)DEFINE\((\w*)/\2/v/' + --regex-c='/(^\s)DEFINE\((\w*)/\2/v/' \ + --regex-c='/DEFINE_HASHTABLE\((\w*)/\1/v/' all_kconfigs | xargs $1 -a \ --langdef=kconfig --language-force=kconfig \ @@ -244,9 +254,14 @@ emacs() --regex='/__CLEARPAGEFLAG_NOOP(\([^,)]*\).*/__ClearPage\1/' \ --regex='/TESTCLEARFLAG_FALSE(\([^,)]*\).*/TestClearPage\1/' \ --regex='/__TESTCLEARFLAG_FALSE(\([^,)]*\).*/__TestClearPage\1/' \ + --regex='/TESTPCGFLAG\(([^,)]*).*/PageCgroup\1/' \ + --regex='/SETPCGFLAG\(([^,)]*).*/SetPageCgroup\1/' \ + --regex='/CLEARPCGFLAG\(([^,)]*).*/ClearPageCgroup\1/' \ + --regex='/TESTCLEARPCGFLAG\(([^,)]*).*/TestClearPageCgroup\1/' \ --regex='/_PE(\([^,)]*\).*/PEVENT_ERRNO__\1/' \ --regex='/PCI_OP_READ(\([a-z]*[a-z]\).*[1-4])/pci_bus_read_config_\1/' \ - --regex='/PCI_OP_WRITE(\([a-z]*[a-z]\).*[1-4])/pci_bus_write_config_\1/' + --regex='/PCI_OP_WRITE(\([a-z]*[a-z]\).*[1-4])/pci_bus_write_config_\1/'\ + --regex='/DEFINE_HASHTABLE\((\w*)/\1/v/' all_kconfigs | xargs $1 -a \ --regex='/^[ \t]*\(\(menu\)*config\)[ \t]+\([a-zA-Z0-9_]+\)/\3/' @@ -266,7 +281,7 @@ xtags() emacs $1 else all_target_sources | xargs $1 -a - fi + fi } # Support um (which uses SUBARCH) diff --git a/security/integrity/evm/Kconfig b/security/integrity/evm/Kconfig index d35b4915b00..d606f3d12d6 100644 --- a/security/integrity/evm/Kconfig +++ b/security/integrity/evm/Kconfig @@ -12,15 +12,41 @@ config EVM If you are unsure how to answer this question, answer N. -config EVM_HMAC_VERSION - int "EVM HMAC version" +if EVM + +menu "EVM options" + +config EVM_ATTR_FSUUID + bool "FSUUID (version 2)" + default y depends on EVM - default 2 help - This options adds EVM HMAC version support. - 1 - original version - 2 - add per filesystem unique identifier (UUID) (default) + Include filesystem UUID for HMAC calculation. + + Default value is 'selected', which is former version 2. + if 'not selected', it is former version 1 - WARNING: changing the HMAC calculation method or adding + WARNING: changing the HMAC calculation method or adding additional info to the calculation, requires existing EVM - labeled file systems to be relabeled. + labeled file systems to be relabeled. + +config EVM_EXTRA_SMACK_XATTRS + bool "Additional SMACK xattrs" + depends on EVM && SECURITY_SMACK + default n + help + Include additional SMACK xattrs for HMAC calculation. + + In addition to the original security xattrs (eg. security.selinux, + security.SMACK64, security.capability, and security.ima) included + in the HMAC calculation, enabling this option includes newly defined + Smack xattrs: security.SMACK64EXEC, security.SMACK64TRANSMUTE and + security.SMACK64MMAP. + + WARNING: changing the HMAC calculation method or adding + additional info to the calculation, requires existing EVM + labeled file systems to be relabeled. + +endmenu + +endif diff --git a/security/integrity/evm/evm.h b/security/integrity/evm/evm.h index 37c88ddb3cf..88bfe77efa1 100644 --- a/security/integrity/evm/evm.h +++ b/security/integrity/evm/evm.h @@ -24,7 +24,10 @@ extern int evm_initialized; extern char *evm_hmac; extern char *evm_hash; -extern int evm_hmac_version; + +#define EVM_ATTR_FSUUID 0x0001 + +extern int evm_hmac_attrs; extern struct crypto_shash *hmac_tfm; extern struct crypto_shash *hash_tfm; diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c index 6b540f1822e..5e9687f02e1 100644 --- a/security/integrity/evm/evm_crypto.c +++ b/security/integrity/evm/evm_crypto.c @@ -112,7 +112,7 @@ static void hmac_add_misc(struct shash_desc *desc, struct inode *inode, hmac_misc.gid = from_kgid(&init_user_ns, inode->i_gid); hmac_misc.mode = inode->i_mode; crypto_shash_update(desc, (const u8 *)&hmac_misc, sizeof(hmac_misc)); - if (evm_hmac_version > 1) + if (evm_hmac_attrs & EVM_ATTR_FSUUID) crypto_shash_update(desc, inode->i_sb->s_uuid, sizeof(inode->i_sb->s_uuid)); crypto_shash_final(desc, digest); diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c index 6e0bd933b6a..3bcb80df4d0 100644 --- a/security/integrity/evm/evm_main.c +++ b/security/integrity/evm/evm_main.c @@ -32,7 +32,7 @@ static char *integrity_status_msg[] = { }; char *evm_hmac = "hmac(sha1)"; char *evm_hash = "sha1"; -int evm_hmac_version = CONFIG_EVM_HMAC_VERSION; +int evm_hmac_attrs; char *evm_config_xattrnames[] = { #ifdef CONFIG_SECURITY_SELINUX @@ -40,6 +40,11 @@ char *evm_config_xattrnames[] = { #endif #ifdef CONFIG_SECURITY_SMACK XATTR_NAME_SMACK, +#ifdef CONFIG_EVM_EXTRA_SMACK_XATTRS + XATTR_NAME_SMACKEXEC, + XATTR_NAME_SMACKTRANSMUTE, + XATTR_NAME_SMACKMMAP, +#endif #endif #ifdef CONFIG_IMA_APPRAISE XATTR_NAME_IMA, @@ -57,6 +62,14 @@ static int __init evm_set_fixmode(char *str) } __setup("evm=", evm_set_fixmode); +static void __init evm_init_config(void) +{ +#ifdef CONFIG_EVM_ATTR_FSUUID + evm_hmac_attrs |= EVM_ATTR_FSUUID; +#endif + pr_info("HMAC attrs: 0x%x\n", evm_hmac_attrs); +} + static int evm_find_protected_xattrs(struct dentry *dentry) { struct inode *inode = dentry->d_inode; @@ -287,12 +300,20 @@ out: * @xattr_value: pointer to the new extended attribute value * @xattr_value_len: pointer to the new extended attribute value length * - * Updating 'security.evm' requires CAP_SYS_ADMIN privileges and that - * the current value is valid. + * Before allowing the 'security.evm' protected xattr to be updated, + * verify the existing value is valid. As only the kernel should have + * access to the EVM encrypted key needed to calculate the HMAC, prevent + * userspace from writing HMAC value. Writing 'security.evm' requires + * requires CAP_SYS_ADMIN privileges. */ int evm_inode_setxattr(struct dentry *dentry, const char *xattr_name, const void *xattr_value, size_t xattr_value_len) { + const struct evm_ima_xattr_data *xattr_data = xattr_value; + + if ((strcmp(xattr_name, XATTR_NAME_EVM) == 0) + && (xattr_data->type == EVM_XATTR_HMAC)) + return -EPERM; return evm_protect_xattr(dentry, xattr_name, xattr_value, xattr_value_len); } @@ -432,6 +453,8 @@ static int __init init_evm(void) { int error; + evm_init_config(); + error = evm_init_secfs(); if (error < 0) { pr_info("Error registering secfs\n"); diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c index 291bf0f3a46..d3113d4aaa3 100644 --- a/security/integrity/ima/ima_appraise.c +++ b/security/integrity/ima/ima_appraise.c @@ -341,7 +341,7 @@ static int ima_protect_xattr(struct dentry *dentry, const char *xattr_name, return 0; } -static void ima_reset_appraise_flags(struct inode *inode) +static void ima_reset_appraise_flags(struct inode *inode, int digsig) { struct integrity_iint_cache *iint; @@ -353,18 +353,22 @@ static void ima_reset_appraise_flags(struct inode *inode) return; iint->flags &= ~IMA_DONE_MASK; + if (digsig) + iint->flags |= IMA_DIGSIG; return; } int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name, const void *xattr_value, size_t xattr_value_len) { + const struct evm_ima_xattr_data *xvalue = xattr_value; int result; result = ima_protect_xattr(dentry, xattr_name, xattr_value, xattr_value_len); if (result == 1) { - ima_reset_appraise_flags(dentry->d_inode); + ima_reset_appraise_flags(dentry->d_inode, + (xvalue->type == EVM_IMA_XATTR_DIGSIG) ? 1 : 0); result = 0; } return result; @@ -376,7 +380,7 @@ int ima_inode_removexattr(struct dentry *dentry, const char *xattr_name) result = ima_protect_xattr(dentry, xattr_name, NULL, 0); if (result == 1) { - ima_reset_appraise_flags(dentry->d_inode); + ima_reset_appraise_flags(dentry->d_inode, 0); result = 0; } return result; diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c index 1bde8e62776..ccd0ac8fa9a 100644 --- a/security/integrity/ima/ima_crypto.c +++ b/security/integrity/ima/ima_crypto.c @@ -27,6 +27,36 @@ static struct crypto_shash *ima_shash_tfm; +/** + * ima_kernel_read - read file content + * + * This is a function for reading file content instead of kernel_read(). + * It does not perform locking checks to ensure it cannot be blocked. + * It does not perform security checks because it is irrelevant for IMA. + * + */ +static int ima_kernel_read(struct file *file, loff_t offset, + char *addr, unsigned long count) +{ + mm_segment_t old_fs; + char __user *buf = addr; + ssize_t ret; + + if (!(file->f_mode & FMODE_READ)) + return -EBADF; + if (!file->f_op->read && !file->f_op->aio_read) + return -EINVAL; + + old_fs = get_fs(); + set_fs(get_ds()); + if (file->f_op->read) + ret = file->f_op->read(file, buf, count, &offset); + else + ret = do_sync_read(file, buf, count, &offset); + set_fs(old_fs); + return ret; +} + int ima_init_crypto(void) { long rc; @@ -104,7 +134,7 @@ static int ima_calc_file_hash_tfm(struct file *file, while (offset < i_size) { int rbuf_len; - rbuf_len = kernel_read(file, offset, rbuf, PAGE_SIZE); + rbuf_len = ima_kernel_read(file, offset, rbuf, PAGE_SIZE); if (rbuf_len < 0) { rc = rbuf_len; break; diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index dcc98cf542d..09baa335ebc 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -81,7 +81,6 @@ static void ima_rdwr_violation_check(struct file *file) { struct inode *inode = file_inode(file); fmode_t mode = file->f_mode; - int must_measure; bool send_tomtou = false, send_writers = false; char *pathbuf = NULL; const char *pathname; @@ -92,18 +91,19 @@ static void ima_rdwr_violation_check(struct file *file) mutex_lock(&inode->i_mutex); /* file metadata: permissions, xattr */ if (mode & FMODE_WRITE) { - if (atomic_read(&inode->i_readcount) && IS_IMA(inode)) - send_tomtou = true; - goto out; + if (atomic_read(&inode->i_readcount) && IS_IMA(inode)) { + struct integrity_iint_cache *iint; + iint = integrity_iint_find(inode); + /* IMA_MEASURE is set from reader side */ + if (iint && (iint->flags & IMA_MEASURE)) + send_tomtou = true; + } + } else { + if ((atomic_read(&inode->i_writecount) > 0) && + ima_must_measure(inode, MAY_READ, FILE_CHECK)) + send_writers = true; } - must_measure = ima_must_measure(inode, MAY_READ, FILE_CHECK); - if (!must_measure) - goto out; - - if (atomic_read(&inode->i_writecount) > 0) - send_writers = true; -out: mutex_unlock(&inode->i_mutex); if (!send_tomtou && !send_writers) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 9ca5e647e54..225c73152ee 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -660,7 +660,7 @@ static int deliver_to_subscribers(struct snd_seq_client *client, int atomic, int hop) { struct snd_seq_subscribers *subs; - int err = 0, num_ev = 0; + int err, result = 0, num_ev = 0; struct snd_seq_event event_saved; struct snd_seq_client_port *src_port; struct snd_seq_port_subs_info *grp; @@ -685,8 +685,12 @@ static int deliver_to_subscribers(struct snd_seq_client *client, subs->info.flags & SNDRV_SEQ_PORT_SUBS_TIME_REAL); err = snd_seq_deliver_single_event(client, event, 0, atomic, hop); - if (err < 0) - break; + if (err < 0) { + /* save first error that occurs and continue */ + if (!result) + result = err; + continue; + } num_ev++; /* restore original event record */ *event = event_saved; @@ -697,7 +701,7 @@ static int deliver_to_subscribers(struct snd_seq_client *client, up_read(&grp->list_mutex); *event = event_saved; /* restore */ snd_seq_port_unlock(src_port); - return (err < 0) ? err : num_ev; + return (result < 0) ? result : num_ev; } @@ -709,7 +713,7 @@ static int port_broadcast_event(struct snd_seq_client *client, struct snd_seq_event *event, int atomic, int hop) { - int num_ev = 0, err = 0; + int num_ev = 0, err, result = 0; struct snd_seq_client *dest_client; struct snd_seq_client_port *port; @@ -724,14 +728,18 @@ static int port_broadcast_event(struct snd_seq_client *client, err = snd_seq_deliver_single_event(NULL, event, SNDRV_SEQ_FILTER_BROADCAST, atomic, hop); - if (err < 0) - break; + if (err < 0) { + /* save first error that occurs and continue */ + if (!result) + result = err; + continue; + } num_ev++; } read_unlock(&dest_client->ports_lock); snd_seq_client_unlock(dest_client); event->dest.port = SNDRV_SEQ_ADDRESS_BROADCAST; /* restore */ - return (err < 0) ? err : num_ev; + return (result < 0) ? result : num_ev; } /* @@ -741,7 +749,7 @@ static int port_broadcast_event(struct snd_seq_client *client, static int broadcast_event(struct snd_seq_client *client, struct snd_seq_event *event, int atomic, int hop) { - int err = 0, num_ev = 0; + int err, result = 0, num_ev = 0; int dest; struct snd_seq_addr addr; @@ -760,12 +768,16 @@ static int broadcast_event(struct snd_seq_client *client, err = snd_seq_deliver_single_event(NULL, event, SNDRV_SEQ_FILTER_BROADCAST, atomic, hop); - if (err < 0) - break; + if (err < 0) { + /* save first error that occurs and continue */ + if (!result) + result = err; + continue; + } num_ev += err; } event->dest = addr; /* restore */ - return (err < 0) ? err : num_ev; + return (result < 0) ? result : num_ev; } diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c index 559989992be..53a403e17c5 100644 --- a/sound/core/seq/seq_fifo.c +++ b/sound/core/seq/seq_fifo.c @@ -124,7 +124,7 @@ int snd_seq_fifo_event_in(struct snd_seq_fifo *f, snd_use_lock_use(&f->use_lock); err = snd_seq_event_dup(f->pool, event, &cell, 1, NULL); /* always non-blocking */ if (err < 0) { - if (err == -ENOMEM) + if ((err == -ENOMEM) || (err == -EAGAIN)) atomic_inc(&f->overflow); snd_use_lock_free(&f->use_lock); return err; diff --git a/sound/core/timer.c b/sound/core/timer.c index cfd455a8ac1..777a45e08e5 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -390,7 +390,7 @@ static void snd_timer_notify1(struct snd_timer_instance *ti, int event) struct timespec tstamp; if (timer_tstamp_monotonic) - do_posix_clock_monotonic_gettime(&tstamp); + ktime_get_ts(&tstamp); else getnstimeofday(&tstamp); if (snd_BUG_ON(event < SNDRV_TIMER_EVENT_START || @@ -1203,7 +1203,7 @@ static void snd_timer_user_tinterrupt(struct snd_timer_instance *timeri, } if (tu->last_resolution != resolution || ticks > 0) { if (timer_tstamp_monotonic) - do_posix_clock_monotonic_gettime(&tstamp); + ktime_get_ts(&tstamp); else getnstimeofday(&tstamp); } diff --git a/sound/firewire/bebob/bebob.h b/sound/firewire/bebob/bebob.h index d1c93a1e097..e13eef99c27 100644 --- a/sound/firewire/bebob/bebob.h +++ b/sound/firewire/bebob/bebob.h @@ -208,8 +208,6 @@ int snd_bebob_stream_set_rate(struct snd_bebob *bebob, unsigned int rate); int snd_bebob_stream_check_internal_clock(struct snd_bebob *bebob, bool *internal); int snd_bebob_stream_discover(struct snd_bebob *bebob); -int snd_bebob_stream_map(struct snd_bebob *bebob, - struct amdtp_stream *stream); int snd_bebob_stream_init_duplex(struct snd_bebob *bebob); int snd_bebob_stream_start_duplex(struct snd_bebob *bebob, unsigned int rate); void snd_bebob_stream_stop_duplex(struct snd_bebob *bebob); diff --git a/sound/firewire/bebob/bebob_stream.c b/sound/firewire/bebob/bebob_stream.c index bc4f82776fd..ef4d0c9f657 100644 --- a/sound/firewire/bebob/bebob_stream.c +++ b/sound/firewire/bebob/bebob_stream.c @@ -655,8 +655,6 @@ void snd_bebob_stream_stop_duplex(struct snd_bebob *bebob) struct amdtp_stream *master, *slave; atomic_t *master_substreams, *slave_substreams; - mutex_lock(&bebob->mutex); - if (bebob->master == &bebob->rx_stream) { slave = &bebob->tx_stream; master = &bebob->rx_stream; @@ -669,6 +667,8 @@ void snd_bebob_stream_stop_duplex(struct snd_bebob *bebob) master_substreams = &bebob->capture_substreams; } + mutex_lock(&bebob->mutex); + if (atomic_read(slave_substreams) == 0) { amdtp_stream_pcm_abort(slave); amdtp_stream_stop(slave); diff --git a/sound/firewire/fireworks/fireworks.c b/sound/firewire/fireworks/fireworks.c index 996fdc44c83..3e2ed8e82cb 100644 --- a/sound/firewire/fireworks/fireworks.c +++ b/sound/firewire/fireworks/fireworks.c @@ -346,7 +346,6 @@ static void __exit snd_efw_exit(void) { snd_efw_transaction_unregister(); driver_unregister(&efw_driver.driver); - mutex_destroy(&devices_mutex); } module_init(snd_efw_init); diff --git a/sound/firewire/fireworks/fireworks.h b/sound/firewire/fireworks/fireworks.h index d2b36be4d2f..4f0201a9522 100644 --- a/sound/firewire/fireworks/fireworks.h +++ b/sound/firewire/fireworks/fireworks.h @@ -162,7 +162,6 @@ enum snd_efw_grp_type { SND_EFW_CH_TYPE_GUITAR = 7, SND_EFW_CH_TYPE_PIEZO_GUITAR = 8, SND_EFW_CH_TYPE_GUITAR_STRING = 9, - SND_EFW_CH_TYPE_VIRTUAL = 0x10000, SND_EFW_CH_TYPE_DUMMY }; struct snd_efw_phys_meters { diff --git a/sound/firewire/fireworks/fireworks_hwdep.c b/sound/firewire/fireworks/fireworks_hwdep.c index 4f8216fb6b6..33df8655fe8 100644 --- a/sound/firewire/fireworks/fireworks_hwdep.c +++ b/sound/firewire/fireworks/fireworks_hwdep.c @@ -58,7 +58,7 @@ hwdep_read_resp_buf(struct snd_efw *efw, char __user *buf, long remained, efw->pull_ptr += till_end; if (efw->pull_ptr >= efw->resp_buf + snd_efw_resp_buf_size) - efw->pull_ptr = efw->resp_buf; + efw->pull_ptr -= snd_efw_resp_buf_size; length -= till_end; buf += till_end; diff --git a/sound/firewire/fireworks/fireworks_stream.c b/sound/firewire/fireworks/fireworks_stream.c index 541569022a7..b985fc5ebdc 100644 --- a/sound/firewire/fireworks/fireworks_stream.c +++ b/sound/firewire/fireworks/fireworks_stream.c @@ -284,8 +284,6 @@ void snd_efw_stream_stop_duplex(struct snd_efw *efw) struct amdtp_stream *master, *slave; atomic_t *master_substreams, *slave_substreams; - mutex_lock(&efw->mutex); - if (efw->master == &efw->rx_stream) { slave = &efw->tx_stream; master = &efw->rx_stream; @@ -298,6 +296,8 @@ void snd_efw_stream_stop_duplex(struct snd_efw *efw) master_substreams = &efw->capture_substreams; } + mutex_lock(&efw->mutex); + if (atomic_read(slave_substreams) == 0) { stop_stream(efw, slave); diff --git a/sound/firewire/fireworks/fireworks_transaction.c b/sound/firewire/fireworks/fireworks_transaction.c index aa56b8ac537..255dabc6fc3 100644 --- a/sound/firewire/fireworks/fireworks_transaction.c +++ b/sound/firewire/fireworks/fireworks_transaction.c @@ -8,19 +8,19 @@ /* * Fireworks have its own transaction. The transaction can be delivered by AV/C - * Vendor Specific command. But at least Windows driver and firmware version 5.5 - * or later don't use it. + * Vendor Specific command frame or usual asynchronous transaction. At least, + * Windows driver and firmware version 5.5 or later don't use AV/C command. * * Transaction substance: - * At first, 6 data exist. Following to the 6 data, parameters for each - * commands exists. All of parameters are 32 bit alighed to big endian. + * At first, 6 data exist. Following to the data, parameters for each command + * exist. All of the parameters are 32 bit alighed to big endian. * data[0]: Length of transaction substance * data[1]: Transaction version * data[2]: Sequence number. This is incremented by the device - * data[3]: transaction category - * data[4]: transaction command - * data[5]: return value in response. - * data[6-]: parameters + * data[3]: Transaction category + * data[4]: Transaction command + * data[5]: Return value in response. + * data[6-]: Parameters * * Transaction address: * command: 0xecc000000000 @@ -148,7 +148,7 @@ copy_resp_to_buf(struct snd_efw *efw, void *data, size_t length, int *rcode) efw->push_ptr += till_end; if (efw->push_ptr >= efw->resp_buf + snd_efw_resp_buf_size) - efw->push_ptr = efw->resp_buf; + efw->push_ptr -= snd_efw_resp_buf_size; length -= till_end; data += till_end; diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index cd77b9b19b7..bb65a124e00 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -237,6 +237,12 @@ enum { AZX_DCAPS_COUNT_LPIB_DELAY | AZX_DCAPS_PM_RUNTIME | \ AZX_DCAPS_I915_POWERWELL) +/* Broadwell HDMI can't use position buffer reliably, force to use LPIB */ +#define AZX_DCAPS_INTEL_BROADWELL \ + (AZX_DCAPS_SCH_SNOOP | AZX_DCAPS_ALIGN_BUFSIZE | \ + AZX_DCAPS_POSFIX_LPIB | AZX_DCAPS_PM_RUNTIME | \ + AZX_DCAPS_I915_POWERWELL) + /* quirks for ATI SB / AMD Hudson */ #define AZX_DCAPS_PRESET_ATI_SB \ (AZX_DCAPS_ATI_SNOOP | AZX_DCAPS_NO_TCSEL | \ @@ -1367,12 +1373,6 @@ static int azx_first_init(struct azx *chip) /* initialize streams */ azx_init_stream(chip); - /* workaround for Broadwell HDMI: the first stream is broken, - * so mask it by keeping it as if opened - */ - if (pci->vendor == 0x8086 && pci->device == 0x160c) - chip->azx_dev[0].opened = 1; - /* initialize chip */ azx_init_pci(chip); azx_init_chip(chip, (probe_only[dev] & 2) == 0); @@ -1769,7 +1769,7 @@ static const struct pci_device_id azx_ids[] = { .driver_data = AZX_DRIVER_HDMI | AZX_DCAPS_INTEL_HASWELL }, /* Broadwell */ { PCI_DEVICE(0x8086, 0x160c), - .driver_data = AZX_DRIVER_HDMI | AZX_DCAPS_INTEL_HASWELL }, + .driver_data = AZX_DRIVER_HDMI | AZX_DCAPS_INTEL_BROADWELL }, /* 5 Series/3400 */ { PCI_DEVICE(0x8086, 0x3b56), .driver_data = AZX_DRIVER_SCH | AZX_DCAPS_INTEL_PCH_NOPM }, diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index be0a9ee0b80..3e4417b0ddb 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1594,10 +1594,18 @@ static bool hdmi_present_sense(struct hdmi_spec_per_pin *per_pin, int repoll) * Re-setup pin and infoframe. This is needed e.g. when * - sink is first plugged-in (infoframe is not set up if !monitor_present) * - transcoder can change during stream playback on Haswell + * and this can make HW reset converter selection on a pin. */ - if (eld->eld_valid && !old_eld_valid && per_pin->setup) + if (eld->eld_valid && !old_eld_valid && per_pin->setup) { + if (is_haswell_plus(codec) || is_valleyview(codec)) { + intel_verify_pin_cvt_connect(codec, per_pin); + intel_not_share_assigned_cvt(codec, pin_nid, + per_pin->mux_idx); + } + hdmi_setup_audio_infoframe(codec, per_pin, per_pin->non_pcm); + } } if (eld_changed) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 12fb411adf7..af76995fa96 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -929,6 +929,7 @@ struct alc_codec_rename_pci_table { }; static struct alc_codec_rename_table rename_tbl[] = { + { 0x10ec0221, 0xf00f, 0x1003, "ALC231" }, { 0x10ec0269, 0xfff0, 0x3010, "ALC277" }, { 0x10ec0269, 0xf0f0, 0x2010, "ALC259" }, { 0x10ec0269, 0xf0f0, 0x3010, "ALC258" }, @@ -937,6 +938,7 @@ static struct alc_codec_rename_table rename_tbl[] = { { 0x10ec0269, 0xffff, 0x6023, "ALC281X" }, { 0x10ec0269, 0x00f0, 0x0020, "ALC269VC" }, { 0x10ec0269, 0x00f0, 0x0030, "ALC269VD" }, + { 0x10ec0662, 0xffff, 0x4020, "ALC656" }, { 0x10ec0887, 0x00f0, 0x0030, "ALC887-VD" }, { 0x10ec0888, 0x00f0, 0x0030, "ALC888-VD" }, { 0x10ec0888, 0xf0f0, 0x3020, "ALC886" }, @@ -956,6 +958,19 @@ static struct alc_codec_rename_pci_table rename_pci_tbl[] = { { 0x10ec0293, 0x1028, 0, "ALC3235" }, { 0x10ec0255, 0x1028, 0, "ALC3234" }, { 0x10ec0668, 0x1028, 0, "ALC3661" }, + { 0x10ec0275, 0x1028, 0, "ALC3260" }, + { 0x10ec0899, 0x1028, 0, "ALC3861" }, + { 0x10ec0670, 0x1025, 0, "ALC669X" }, + { 0x10ec0676, 0x1025, 0, "ALC679X" }, + { 0x10ec0282, 0x1043, 0, "ALC3229" }, + { 0x10ec0233, 0x1043, 0, "ALC3236" }, + { 0x10ec0280, 0x103c, 0, "ALC3228" }, + { 0x10ec0282, 0x103c, 0, "ALC3227" }, + { 0x10ec0286, 0x103c, 0, "ALC3242" }, + { 0x10ec0290, 0x103c, 0, "ALC3241" }, + { 0x10ec0668, 0x103c, 0, "ALC3662" }, + { 0x10ec0283, 0x17aa, 0, "ALC3239" }, + { 0x10ec0292, 0x17aa, 0, "ALC3232" }, { } /* terminator */ }; @@ -1412,6 +1427,7 @@ static const struct snd_pci_quirk alc880_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x10c3, "ASUS W5A", ALC880_FIXUP_ASUS_W5A), SND_PCI_QUIRK(0x1043, 0x1964, "ASUS Z71V", ALC880_FIXUP_Z71V), SND_PCI_QUIRK_VENDOR(0x1043, "ASUS", ALC880_FIXUP_GPIO1), + SND_PCI_QUIRK(0x147b, 0x1045, "ABit AA8XE", ALC880_FIXUP_6ST_AUTOMUTE), SND_PCI_QUIRK(0x1558, 0x5401, "Clevo GPIO2", ALC880_FIXUP_GPIO2), SND_PCI_QUIRK_VENDOR(0x1558, "Clevo", ALC880_FIXUP_EAPD_COEF), SND_PCI_QUIRK(0x1584, 0x9050, "Uniwill", ALC880_FIXUP_UNIWILL_DIG), @@ -4230,6 +4246,7 @@ enum { ALC269_FIXUP_HEADSET_MIC, ALC269_FIXUP_QUANTA_MUTE, ALC269_FIXUP_LIFEBOOK, + ALC269_FIXUP_LIFEBOOK_EXTMIC, ALC269_FIXUP_AMIC, ALC269_FIXUP_DMIC, ALC269VB_FIXUP_AMIC, @@ -4367,6 +4384,13 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_QUANTA_MUTE }, + [ALC269_FIXUP_LIFEBOOK_EXTMIC] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x01a1903c }, /* headset mic, with jack detect */ + { } + }, + }, [ALC269_FIXUP_AMIC] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -4741,18 +4765,12 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0614, "Dell Inspiron 3135", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0615, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK), SND_PCI_QUIRK(0x1028, 0x0616, "Dell Vostro 5470", ALC290_FIXUP_SUBWOOFER_HSJACK), - SND_PCI_QUIRK(0x1028, 0x062c, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0638, "Dell Inspiron 5439", ALC290_FIXUP_MONO_SPEAKERS_HSJACK), SND_PCI_QUIRK(0x1028, 0x063f, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x064a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x064b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1028, 0x064d, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0668, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0669, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1028, 0x0674, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1028, 0x067e, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1028, 0x067f, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1028, 0x0680, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0684, "Dell", ALC269_FIXUP_DELL2_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x15cc, "Dell X5 Precision", ALC269_FIXUP_DELL2_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x15cd, "Dell X5 Precision", ALC269_FIXUP_DELL2_MIC_NO_PRESENCE), @@ -4764,14 +4782,24 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x1983, "HP Pavilion", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x218b, "HP", ALC269_FIXUP_LIMIT_INT_MIC_BOOST_MUTE_LED), /* ALC282 */ + SND_PCI_QUIRK(0x103c, 0x220d, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x220e, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x220f, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x2210, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x2211, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x2212, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2213, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x2214, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2266, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2267, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2268, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2269, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x226a, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x226b, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x226c, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x226d, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x226e, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x226f, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x227a, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x227b, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x229e, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), @@ -4811,6 +4839,10 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x22c8, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x22c3, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x22c4, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x2334, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x2335, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x2336, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x2337, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK_VENDOR(0x103c, "HP", ALC269_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), @@ -4834,6 +4866,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x104d, 0x9099, "Sony VAIO S13", ALC275_FIXUP_SONY_DISABLE_AAMIX), SND_PCI_QUIRK_VENDOR(0x104d, "Sony VAIO", ALC269_FIXUP_SONY_VAIO), SND_PCI_QUIRK(0x10cf, 0x1475, "Lifebook", ALC269_FIXUP_LIFEBOOK), + SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC), SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x17aa, 0x21b8, "Thinkpad Edge 14", ALC269_FIXUP_SKU_IGNORE), @@ -4977,6 +5010,26 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { #endif .pins = (const struct hda_pintbl[]) { {0x12, 0x90a60160}, + {0x14, 0x90170120}, + {0x17, 0x90170140}, + {0x18, 0x40000000}, + {0x19, 0x411111f0}, + {0x1a, 0x411111f0}, + {0x1b, 0x411111f0}, + {0x1d, 0x41163b05}, + {0x1e, 0x411111f0}, + {0x21, 0x0321102f}, + }, + .value = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + }, + { + .codec = 0x10ec0255, + .subvendor = 0x1028, +#ifdef CONFIG_SND_DEBUG_VERBOSE + .name = "Dell", +#endif + .pins = (const struct hda_pintbl[]) { + {0x12, 0x90a60160}, {0x14, 0x90170130}, {0x17, 0x40000000}, {0x18, 0x411111f0}, @@ -5129,7 +5182,7 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x1d, 0x40700001}, {0x1e, 0x411111f0}, }, - .value = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, + .value = ALC293_FIXUP_DELL1_MIC_NO_PRESENCE, }, {} }; @@ -6014,6 +6067,27 @@ static const struct snd_hda_pin_quirk alc662_pin_fixup_tbl[] = { .name = "Dell", #endif .pins = (const struct hda_pintbl[]) { + {0x12, 0x99a30140}, + {0x14, 0x90170110}, + {0x15, 0x0321101f}, + {0x16, 0x03011020}, + {0x18, 0x40000008}, + {0x19, 0x411111f0}, + {0x1a, 0x411111f0}, + {0x1b, 0x411111f0}, + {0x1d, 0x41000001}, + {0x1e, 0x411111f0}, + {0x1f, 0x411111f0}, + }, + .value = ALC668_FIXUP_AUTO_MUTE, + }, + { + .codec = 0x10ec0668, + .subvendor = 0x1028, +#ifdef CONFIG_SND_DEBUG_VERBOSE + .name = "Dell", +#endif + .pins = (const struct hda_pintbl[]) { {0x12, 0x99a30150}, {0x14, 0x90170110}, {0x15, 0x0321101f}, @@ -6190,6 +6264,7 @@ static const struct hda_codec_preset snd_hda_preset_realtek[] = { { .id = 0x10ec0221, .name = "ALC221", .patch = patch_alc269 }, { .id = 0x10ec0231, .name = "ALC231", .patch = patch_alc269 }, { .id = 0x10ec0233, .name = "ALC233", .patch = patch_alc269 }, + { .id = 0x10ec0235, .name = "ALC233", .patch = patch_alc269 }, { .id = 0x10ec0255, .name = "ALC255", .patch = patch_alc269 }, { .id = 0x10ec0260, .name = "ALC260", .patch = patch_alc260 }, { .id = 0x10ec0262, .name = "ALC262", .patch = patch_alc262 }, @@ -6223,10 +6298,12 @@ static const struct hda_codec_preset snd_hda_preset_realtek[] = { .patch = patch_alc662 }, { .id = 0x10ec0663, .name = "ALC663", .patch = patch_alc662 }, { .id = 0x10ec0665, .name = "ALC665", .patch = patch_alc662 }, + { .id = 0x10ec0667, .name = "ALC667", .patch = patch_alc662 }, { .id = 0x10ec0668, .name = "ALC668", .patch = patch_alc662 }, { .id = 0x10ec0670, .name = "ALC670", .patch = patch_alc662 }, { .id = 0x10ec0671, .name = "ALC671", .patch = patch_alc662 }, { .id = 0x10ec0680, .name = "ALC680", .patch = patch_alc680 }, + { .id = 0x10ec0867, .name = "ALC891", .patch = patch_alc882 }, { .id = 0x10ec0880, .name = "ALC880", .patch = patch_alc880 }, { .id = 0x10ec0882, .name = "ALC882", .patch = patch_alc882 }, { .id = 0x10ec0883, .name = "ALC883", .patch = patch_alc882 }, diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c index 68340d7df76..c91860e0a28 100644 --- a/sound/pci/intel8x0.c +++ b/sound/pci/intel8x0.c @@ -2779,7 +2779,7 @@ static void intel8x0_measure_ac97_clock(struct intel8x0 *chip) unsigned long port; unsigned long pos, pos1, t; int civ, timeout = 1000, attempt = 1; - struct timespec start_time, stop_time; + ktime_t start_time, stop_time; if (chip->ac97_bus->clock != 48000) return; /* specified in module option */ @@ -2813,7 +2813,7 @@ static void intel8x0_measure_ac97_clock(struct intel8x0 *chip) iputbyte(chip, port + ICH_REG_OFF_CR, ICH_IOCE); iputdword(chip, ICHREG(ALI_DMACR), 1 << ichdev->ali_slot); } - do_posix_clock_monotonic_gettime(&start_time); + start_time = ktime_get(); spin_unlock_irq(&chip->reg_lock); msleep(50); spin_lock_irq(&chip->reg_lock); @@ -2837,7 +2837,7 @@ static void intel8x0_measure_ac97_clock(struct intel8x0 *chip) pos += ichdev->position; } chip->in_measurement = 0; - do_posix_clock_monotonic_gettime(&stop_time); + stop_time = ktime_get(); /* stop */ if (chip->device_type == DEVICE_ALI) { iputdword(chip, ICHREG(ALI_DMACR), 1 << (ichdev->ali_slot + 16)); @@ -2865,9 +2865,7 @@ static void intel8x0_measure_ac97_clock(struct intel8x0 *chip) } pos /= 4; - t = stop_time.tv_sec - start_time.tv_sec; - t *= 1000000; - t += (stop_time.tv_nsec - start_time.tv_nsec) / 1000; + t = ktime_us_delta(stop_time, start_time); dev_info(chip->card->dev, "%s: measured %lu usecs (%lu samples)\n", __func__, t, pos); if (t == 0) { diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index 5b5eb788996..c1b49c36a95 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -1,8 +1,10 @@ /* TODO merge/factor in debugfs.c here */ +#include <ctype.h> #include <errno.h> #include <stdbool.h> #include <stdio.h> +#include <stdlib.h> #include <string.h> #include <sys/vfs.h> @@ -96,12 +98,51 @@ static bool fs__check_mounts(struct fs *fs) return false; } +static void mem_toupper(char *f, size_t len) +{ + while (len) { + *f = toupper(*f); + f++; + len--; + } +} + +/* + * Check for "NAME_PATH" environment variable to override fs location (for + * testing). This matches the recommendation in Documentation/sysfs-rules.txt + * for SYSFS_PATH. + */ +static bool fs__env_override(struct fs *fs) +{ + char *override_path; + size_t name_len = strlen(fs->name); + /* name + "_PATH" + '\0' */ + char upper_name[name_len + 5 + 1]; + memcpy(upper_name, fs->name, name_len); + mem_toupper(upper_name, name_len); + strcpy(&upper_name[name_len], "_PATH"); + + override_path = getenv(upper_name); + if (!override_path) + return false; + + fs->found = true; + strncpy(fs->path, override_path, sizeof(fs->path)); + return true; +} + static const char *fs__get_mountpoint(struct fs *fs) { + if (fs__env_override(fs)) + return fs->path; + if (fs__check_mounts(fs)) return fs->path; - return fs__read_mounts(fs) ? fs->path : NULL; + if (fs__read_mounts(fs)) + return fs->path; + + return NULL; } static const char *fs__mountpoint(int idx) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index c71b0f36d9e..d460049cae8 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -184,9 +184,10 @@ following filters are defined: - in_tx: only when the target is in a hardware transaction - no_tx: only when the target is not in a hardware transaction - abort_tx: only when the target is a hardware transaction abort + - cond: conditional branches + -The option requires at least one branch type among any, any_call, any_ret, ind_call. +The option requires at least one branch type among any, any_call, any_ret, ind_call, cond. The privilege levels may be omitted, in which case, the privilege levels of the associated event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege levels are subject to permissions. When sampling on multiple events, branch stack sampling diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index a1b5185402d..cefdf430d1b 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -111,7 +111,7 @@ OPTIONS --fields=:: Specify output field - multiple keys can be specified in CSV format. Following fields are available: - overhead, overhead_sys, overhead_us, sample and period. + overhead, overhead_sys, overhead_us, overhead_children, sample and period. Also it can contain any sort key(s). By default, every sort keys not specified in -F will be appended @@ -163,6 +163,11 @@ OPTIONS Default: fractal,0.5,callee,function. +--children:: + Accumulate callchain of children to parent entry so that then can + show up in the output. The output will have a new "Children" column + and will be sorted on the data. It requires callchains are recorded. + --max-stack:: Set the stack depth limit when parsing the callchain, anything beyond the specified depth will be ignored. This is a trade-off diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index dcfa54c851e..180ae02137a 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -119,7 +119,7 @@ Default is to monitor all CPUS. --fields=:: Specify output field - multiple keys can be specified in CSV format. Following fields are available: - overhead, overhead_sys, overhead_us, sample and period. + overhead, overhead_sys, overhead_us, overhead_children, sample and period. Also it can contain any sort key(s). By default, every sort keys not specified in --field will be appended @@ -161,6 +161,12 @@ Default is to monitor all CPUS. Setup and enable call-graph (stack chain/backtrace) recording, implies -g. +--children:: + Accumulate callchain of children to parent entry so that then can + show up in the output. The output will have a new "Children" column + and will be sorted on the data. It requires -g/--call-graph option + enabled. + --max-stack:: Set the stack depth limit when parsing the callchain, anything beyond the specified depth will be ignored. This is a trade-off diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 02f0a4dd1a8..ae20edfcc3f 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -400,6 +400,7 @@ LIB_OBJS += $(OUTPUT)tests/hists_common.o LIB_OBJS += $(OUTPUT)tests/hists_link.o LIB_OBJS += $(OUTPUT)tests/hists_filter.o LIB_OBJS += $(OUTPUT)tests/hists_output.o +LIB_OBJS += $(OUTPUT)tests/hists_cumulate.o LIB_OBJS += $(OUTPUT)tests/python-use.o LIB_OBJS += $(OUTPUT)tests/bp_signal.o LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o @@ -788,8 +789,8 @@ help: @echo '' @echo 'Perf install targets:' @echo ' NOTE: documentation build requires asciidoc, xmlto packages to be installed' - @echo ' HINT: use "make prefix=<path> <install target>" to install to a particular' - @echo ' path like make prefix=/usr/local install install-doc' + @echo ' HINT: use "prefix" or "DESTDIR" to install to a particular' + @echo ' path like "make prefix=/usr/local install install-doc"' @echo ' install - install compiled binaries' @echo ' install-doc - install *all* documentation' @echo ' install-man - install manpage documentation' @@ -814,17 +815,20 @@ INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html $(DOC_TARGETS): $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all) +TAG_FOLDERS= . ../lib/traceevent ../lib/api ../lib/symbol +TAG_FILES= ../../include/uapi/linux/perf_event.h + TAGS: $(RM) TAGS - $(FIND) . -name '*.[hcS]' -print | xargs etags -a + $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs etags -a $(TAG_FILES) tags: $(RM) tags - $(FIND) . -name '*.[hcS]' -print | xargs ctags -a + $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs ctags -a $(TAG_FILES) cscope: $(RM) cscope* - $(FIND) . -name '*.[hcS]' -print | xargs cscope -b + $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs cscope -b $(TAG_FILES) ### Detect prefix changes TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):\ diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index d30d2c2e2a7..1ec429fef2b 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -65,12 +65,13 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel, return 0; } - he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0); + he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0, + true); if (he == NULL) return -ENOMEM; ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); - hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); + hists__inc_nr_samples(&evsel->hists, true); return ret; } diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 8bff543acaa..9a5a035cb42 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -315,7 +315,7 @@ static int hists__add_entry(struct hists *hists, u64 weight, u64 transaction) { if (__hists__add_entry(hists, al, NULL, NULL, NULL, period, weight, - transaction) != NULL) + transaction, true) != NULL) return 0; return -ENOMEM; } diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index e4c85b8f46c..378b85b731a 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -454,7 +454,11 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) if (done) break; err = poll(rec->evlist->pollfd, rec->evlist->nr_fds, -1); - if (err < 0 && errno == EINTR) + /* + * Propagate error, only if there's any. Ignore positive + * number of returned events and interrupt error. + */ + if (err > 0 || (err < 0 && errno == EINTR)) err = 0; waking++; } @@ -544,6 +548,7 @@ static const struct branch_mode branch_modes[] = { BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX), BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX), BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX), + BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND), BRANCH_END }; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index bc0eec1ce4b..21d830bafff 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -72,6 +72,10 @@ static int report__config(const char *var, const char *value, void *cb) rep->min_percent = strtof(value, NULL); return 0; } + if (!strcmp(var, "report.children")) { + symbol_conf.cumulate_callchain = perf_config_bool(var, value); + return 0; + } return perf_default_config(var, value, cb); } @@ -85,156 +89,52 @@ static void report__inc_stats(struct report *rep, struct hist_entry *he) */ if (he->stat.nr_events == 1) rep->nr_entries++; - - /* - * Only counts number of samples at this stage as it's more - * natural to do it here and non-sample events are also - * counted in perf_session_deliver_event(). The dump_trace - * requires this info is ready before going to the output tree. - */ - hists__inc_nr_events(he->hists, PERF_RECORD_SAMPLE); - if (!he->filtered) - he->hists->stats.nr_non_filtered_samples++; } -static int report__add_mem_hist_entry(struct report *rep, struct addr_location *al, - struct perf_sample *sample, struct perf_evsel *evsel) +static int hist_iter__report_callback(struct hist_entry_iter *iter, + struct addr_location *al, bool single, + void *arg) { - struct symbol *parent = NULL; - struct hist_entry *he; - struct mem_info *mi, *mx; - uint64_t cost; - int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack); - - if (err) - return err; + int err = 0; + struct report *rep = arg; + struct hist_entry *he = iter->he; + struct perf_evsel *evsel = iter->evsel; + struct mem_info *mi; + struct branch_info *bi; - mi = sample__resolve_mem(sample, al); - if (!mi) - return -ENOMEM; + report__inc_stats(rep, he); - if (rep->hide_unresolved && !al->sym) + if (!ui__has_annotation()) return 0; - cost = sample->weight; - if (!cost) - cost = 1; - - /* - * must pass period=weight in order to get the correct - * sorting from hists__collapse_resort() which is solely - * based on periods. We want sorting be done on nr_events * weight - * and this is indirectly achieved by passing period=weight here - * and the he_stat__add_period() function. - */ - he = __hists__add_entry(&evsel->hists, al, parent, NULL, mi, - cost, cost, 0); - if (!he) - return -ENOMEM; - - if (ui__has_annotation()) { - err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); - if (err) - goto out; - - mx = he->mem_info; - err = addr_map_symbol__inc_samples(&mx->daddr, evsel->idx); + if (sort__mode == SORT_MODE__BRANCH) { + bi = he->branch_info; + err = addr_map_symbol__inc_samples(&bi->from, evsel->idx); if (err) goto out; - } - - report__inc_stats(rep, he); - - err = hist_entry__append_callchain(he, sample); -out: - return err; -} - -static int report__add_branch_hist_entry(struct report *rep, struct addr_location *al, - struct perf_sample *sample, struct perf_evsel *evsel) -{ - struct symbol *parent = NULL; - unsigned i; - struct hist_entry *he; - struct branch_info *bi, *bx; - int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack); - if (err) - return err; - - bi = sample__resolve_bstack(sample, al); - if (!bi) - return -ENOMEM; - - for (i = 0; i < sample->branch_stack->nr; i++) { - if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym)) - continue; + err = addr_map_symbol__inc_samples(&bi->to, evsel->idx); - err = -ENOMEM; - - /* overwrite the 'al' to branch-to info */ - al->map = bi[i].to.map; - al->sym = bi[i].to.sym; - al->addr = bi[i].to.addr; - /* - * The report shows the percentage of total branches captured - * and not events sampled. Thus we use a pseudo period of 1. - */ - he = __hists__add_entry(&evsel->hists, al, parent, &bi[i], NULL, - 1, 1, 0); - if (he) { - if (ui__has_annotation()) { - bx = he->branch_info; - err = addr_map_symbol__inc_samples(&bx->from, - evsel->idx); - if (err) - goto out; - - err = addr_map_symbol__inc_samples(&bx->to, - evsel->idx); - if (err) - goto out; - } - report__inc_stats(rep, he); - } else + } else if (rep->mem_mode) { + mi = he->mem_info; + err = addr_map_symbol__inc_samples(&mi->daddr, evsel->idx); + if (err) goto out; - } - err = 0; -out: - free(bi); - return err; -} - -static int report__add_hist_entry(struct report *rep, struct perf_evsel *evsel, - struct addr_location *al, struct perf_sample *sample) -{ - struct symbol *parent = NULL; - struct hist_entry *he; - int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack); - - if (err) - return err; - he = __hists__add_entry(&evsel->hists, al, parent, NULL, NULL, - sample->period, sample->weight, - sample->transaction); - if (he == NULL) - return -ENOMEM; - - err = hist_entry__append_callchain(he, sample); - if (err) - goto out; - - if (ui__has_annotation()) err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); - report__inc_stats(rep, he); + } else if (symbol_conf.cumulate_callchain) { + if (single) + err = hist_entry__inc_addr_samples(he, evsel->idx, + al->addr); + } else { + err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); + } out: return err; } - static int process_sample_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -243,6 +143,10 @@ static int process_sample_event(struct perf_tool *tool, { struct report *rep = container_of(tool, struct report, tool); struct addr_location al; + struct hist_entry_iter iter = { + .hide_unresolved = rep->hide_unresolved, + .add_entry_cb = hist_iter__report_callback, + }; int ret; if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { @@ -257,22 +161,23 @@ static int process_sample_event(struct perf_tool *tool, if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) return 0; - if (sort__mode == SORT_MODE__BRANCH) { - ret = report__add_branch_hist_entry(rep, &al, sample, evsel); - if (ret < 0) - pr_debug("problem adding lbr entry, skipping event\n"); - } else if (rep->mem_mode == 1) { - ret = report__add_mem_hist_entry(rep, &al, sample, evsel); - if (ret < 0) - pr_debug("problem adding mem entry, skipping event\n"); - } else { - if (al.map != NULL) - al.map->dso->hit = 1; + if (sort__mode == SORT_MODE__BRANCH) + iter.ops = &hist_iter_branch; + else if (rep->mem_mode) + iter.ops = &hist_iter_mem; + else if (symbol_conf.cumulate_callchain) + iter.ops = &hist_iter_cumulative; + else + iter.ops = &hist_iter_normal; + + if (al.map != NULL) + al.map->dso->hit = 1; + + ret = hist_entry_iter__add(&iter, &al, evsel, sample, rep->max_stack, + rep); + if (ret < 0) + pr_debug("problem adding hist entry, skipping event\n"); - ret = report__add_hist_entry(rep, evsel, &al, sample); - if (ret < 0) - pr_debug("problem incrementing symbol period, skipping event\n"); - } return ret; } @@ -329,6 +234,14 @@ static int report__setup_sample_type(struct report *rep) } } + if (symbol_conf.cumulate_callchain) { + /* Silently ignore if callchain is missing */ + if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) { + symbol_conf.cumulate_callchain = false; + perf_hpp__cancel_cumulate(); + } + } + if (sort__mode == SORT_MODE__BRANCH) { if (!is_pipe && !(sample_type & PERF_SAMPLE_BRANCH_STACK)) { @@ -712,6 +625,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order", "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). " "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt), + OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, + "Accumulate callchains of children and show total overhead as well"), OPT_INTEGER(0, "max-stack", &report.max_stack, "Set the maximum stack depth when parsing the callchain, " "anything beyond the specified depth will be ignored. " @@ -804,8 +719,10 @@ repeat: has_br_stack = perf_header__has_feat(&session->header, HEADER_BRANCH_STACK); - if (branch_mode == -1 && has_br_stack) + if (branch_mode == -1 && has_br_stack) { sort__mode = SORT_MODE__BRANCH; + symbol_conf.cumulate_callchain = false; + } if (report.mem_mode) { if (sort__mode == SORT_MODE__BRANCH) { @@ -813,6 +730,7 @@ repeat: goto error; } sort__mode = SORT_MODE__MEMORY; + symbol_conf.cumulate_callchain = false; } if (setup_sorting() < 0) { diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index d7176830b9b..c38d06c0477 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1428,7 +1428,7 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_ int err = 0; evsel->hists.stats.total_period += sample->period; - hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); + hists__inc_nr_samples(&evsel->hists, true); if (evsel->handler != NULL) { tracepoint_handler f = evsel->handler; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 5b389ce4cd1..377971dc89a 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -196,6 +196,12 @@ static void perf_top__record_precise_ip(struct perf_top *top, pthread_mutex_unlock(¬es->lock); + /* + * This function is now called with he->hists->lock held. + * Release it before going to sleep. + */ + pthread_mutex_unlock(&he->hists->lock); + if (err == -ERANGE && !he->ms.map->erange_warned) ui__warn_map_erange(he->ms.map, sym, ip); else if (err == -ENOMEM) { @@ -203,6 +209,8 @@ static void perf_top__record_precise_ip(struct perf_top *top, sym->name); sleep(1); } + + pthread_mutex_lock(&he->hists->lock); } static void perf_top__show_details(struct perf_top *top) @@ -238,27 +246,6 @@ out_unlock: pthread_mutex_unlock(¬es->lock); } -static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel, - struct addr_location *al, - struct perf_sample *sample) -{ - struct hist_entry *he; - - pthread_mutex_lock(&evsel->hists.lock); - he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, - sample->period, sample->weight, - sample->transaction); - pthread_mutex_unlock(&evsel->hists.lock); - if (he == NULL) - return NULL; - - hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); - if (!he->filtered) - evsel->hists.stats.nr_non_filtered_samples++; - - return he; -} - static void perf_top__print_sym_table(struct perf_top *top) { char bf[160]; @@ -662,6 +649,26 @@ static int symbol_filter(struct map *map __maybe_unused, struct symbol *sym) return 0; } +static int hist_iter__top_callback(struct hist_entry_iter *iter, + struct addr_location *al, bool single, + void *arg) +{ + struct perf_top *top = arg; + struct hist_entry *he = iter->he; + struct perf_evsel *evsel = iter->evsel; + + if (sort__has_sym && single) { + u64 ip = al->addr; + + if (al->map) + ip = al->map->unmap_ip(al->map, ip); + + perf_top__record_precise_ip(top, he, evsel->idx, ip); + } + + return 0; +} + static void perf_event__process_sample(struct perf_tool *tool, const union perf_event *event, struct perf_evsel *evsel, @@ -669,8 +676,6 @@ static void perf_event__process_sample(struct perf_tool *tool, struct machine *machine) { struct perf_top *top = container_of(tool, struct perf_top, tool); - struct symbol *parent = NULL; - u64 ip = sample->ip; struct addr_location al; int err; @@ -745,25 +750,23 @@ static void perf_event__process_sample(struct perf_tool *tool, } if (al.sym == NULL || !al.sym->ignore) { - struct hist_entry *he; + struct hist_entry_iter iter = { + .add_entry_cb = hist_iter__top_callback, + }; - err = sample__resolve_callchain(sample, &parent, evsel, &al, - top->max_stack); - if (err) - return; + if (symbol_conf.cumulate_callchain) + iter.ops = &hist_iter_cumulative; + else + iter.ops = &hist_iter_normal; - he = perf_evsel__add_hist_entry(evsel, &al, sample); - if (he == NULL) { - pr_err("Problem incrementing symbol period, skipping event\n"); - return; - } + pthread_mutex_lock(&evsel->hists.lock); - err = hist_entry__append_callchain(he, sample); - if (err) - return; + err = hist_entry_iter__add(&iter, &al, evsel, sample, + top->max_stack, top); + if (err < 0) + pr_err("Problem incrementing symbol period, skipping event\n"); - if (sort__has_sym) - perf_top__record_precise_ip(top, he, evsel->idx, ip); + pthread_mutex_unlock(&evsel->hists.lock); } return; @@ -1001,6 +1004,10 @@ static int perf_top_config(const char *var, const char *value, void *cb) if (!strcmp(var, "top.call-graph")) return record_parse_callchain(value, &top->record_opts); + if (!strcmp(var, "top.children")) { + symbol_conf.cumulate_callchain = perf_config_bool(var, value); + return 0; + } return perf_default_config(var, value, cb); } @@ -1095,6 +1102,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) OPT_CALLBACK(0, "call-graph", &top.record_opts, "mode[,dump_size]", record_callchain_help, &parse_callchain_opt), + OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, + "Accumulate callchains of children and show total overhead as well"), OPT_INTEGER(0, "max-stack", &top.max_stack, "Set the maximum stack depth when parsing the callchain. " "Default: " __stringify(PERF_MAX_STACK_DEPTH)), @@ -1200,6 +1209,11 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) top.sym_evsel = perf_evlist__first(top.evlist); + if (!symbol_conf.use_callchain) { + symbol_conf.cumulate_callchain = false; + perf_hpp__cancel_cumulate(); + } + symbol_conf.priv_size = sizeof(struct annotation); symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 729bbdf5cec..4f100b54ba8 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -447,6 +447,7 @@ else ifneq ($(feature-libperl), 1) CFLAGS += -DNO_LIBPERL NO_LIBPERL := 1 + msg := $(warning Missing perl devel files. Disabling perl scripting support, consider installing perl-ExtUtils-Embed); else LDFLAGS += $(PERL_EMBED_LDFLAGS) EXTLIBS += $(PERL_EMBED_LIBADD) @@ -599,7 +600,7 @@ endif # Make the path relative to DESTDIR, not to prefix ifndef DESTDIR -prefix = $(HOME) +prefix ?= $(HOME) endif bindir_relative = bin bindir = $(prefix)/$(bindir_relative) diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 431798a4110..78f7b920e54 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -481,14 +481,18 @@ int main(int argc, const char **argv) fprintf(stderr, "cannot handle %s internally", cmd); goto out; } -#ifdef HAVE_LIBAUDIT_SUPPORT if (!prefixcmp(cmd, "trace")) { +#ifdef HAVE_LIBAUDIT_SUPPORT set_buildid_dir(); setup_path(); argv[0] = "trace"; return cmd_trace(argc, argv, NULL); - } +#else + fprintf(stderr, + "trace command not available: missing audit-libs devel package at build time.\n"); + goto out; #endif + } /* Look for flags.. */ argv++; argc--; diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 831f52cae19..802e3cd50f6 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -140,6 +140,10 @@ static struct test { .func = test__hists_output, }, { + .desc = "Test cumulation of child hist entries", + .func = test__hists_cumulate, + }, + { .func = NULL, }, }; diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c index e4e01aadc3b..a62c0913451 100644 --- a/tools/perf/tests/hists_common.c +++ b/tools/perf/tests/hists_common.c @@ -12,9 +12,9 @@ static struct { u32 pid; const char *comm; } fake_threads[] = { - { 100, "perf" }, - { 200, "perf" }, - { 300, "bash" }, + { FAKE_PID_PERF1, "perf" }, + { FAKE_PID_PERF2, "perf" }, + { FAKE_PID_BASH, "bash" }, }; static struct { @@ -22,15 +22,15 @@ static struct { u64 start; const char *filename; } fake_mmap_info[] = { - { 100, 0x40000, "perf" }, - { 100, 0x50000, "libc" }, - { 100, 0xf0000, "[kernel]" }, - { 200, 0x40000, "perf" }, - { 200, 0x50000, "libc" }, - { 200, 0xf0000, "[kernel]" }, - { 300, 0x40000, "bash" }, - { 300, 0x50000, "libc" }, - { 300, 0xf0000, "[kernel]" }, + { FAKE_PID_PERF1, FAKE_MAP_PERF, "perf" }, + { FAKE_PID_PERF1, FAKE_MAP_LIBC, "libc" }, + { FAKE_PID_PERF1, FAKE_MAP_KERNEL, "[kernel]" }, + { FAKE_PID_PERF2, FAKE_MAP_PERF, "perf" }, + { FAKE_PID_PERF2, FAKE_MAP_LIBC, "libc" }, + { FAKE_PID_PERF2, FAKE_MAP_KERNEL, "[kernel]" }, + { FAKE_PID_BASH, FAKE_MAP_BASH, "bash" }, + { FAKE_PID_BASH, FAKE_MAP_LIBC, "libc" }, + { FAKE_PID_BASH, FAKE_MAP_KERNEL, "[kernel]" }, }; struct fake_sym { @@ -40,27 +40,30 @@ struct fake_sym { }; static struct fake_sym perf_syms[] = { - { 700, 100, "main" }, - { 800, 100, "run_command" }, - { 900, 100, "cmd_record" }, + { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "main" }, + { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "run_command" }, + { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "cmd_record" }, }; static struct fake_sym bash_syms[] = { - { 700, 100, "main" }, - { 800, 100, "xmalloc" }, - { 900, 100, "xfree" }, + { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "main" }, + { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "xmalloc" }, + { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "xfree" }, }; static struct fake_sym libc_syms[] = { { 700, 100, "malloc" }, { 800, 100, "free" }, { 900, 100, "realloc" }, + { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "malloc" }, + { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "free" }, + { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "realloc" }, }; static struct fake_sym kernel_syms[] = { - { 700, 100, "schedule" }, - { 800, 100, "page_fault" }, - { 900, 100, "sys_perf_event_open" }, + { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "schedule" }, + { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "page_fault" }, + { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "sys_perf_event_open" }, }; static struct { @@ -102,7 +105,7 @@ struct machine *setup_fake_machine(struct machines *machines) .pid = fake_mmap_info[i].pid, .tid = fake_mmap_info[i].pid, .start = fake_mmap_info[i].start, - .len = 0x1000ULL, + .len = FAKE_MAP_LENGTH, .pgoff = 0ULL, }, }; @@ -193,10 +196,11 @@ void print_hists_out(struct hists *hists) he = rb_entry(node, struct hist_entry, rb_node); if (!he->filtered) { - pr_info("%2d: entry: %8s:%5d [%-8s] %20s: period = %"PRIu64"\n", + pr_info("%2d: entry: %8s:%5d [%-8s] %20s: period = %"PRIu64"/%"PRIu64"\n", i, thread__comm_str(he->thread), he->thread->tid, he->ms.map->dso->short_name, - he->ms.sym->name, he->stat.period); + he->ms.sym->name, he->stat.period, + he->stat_acc ? he->stat_acc->period : 0); } i++; diff --git a/tools/perf/tests/hists_common.h b/tools/perf/tests/hists_common.h index 1415ae69d7b..888254e8665 100644 --- a/tools/perf/tests/hists_common.h +++ b/tools/perf/tests/hists_common.h @@ -4,6 +4,34 @@ struct machine; struct machines; +#define FAKE_PID_PERF1 100 +#define FAKE_PID_PERF2 200 +#define FAKE_PID_BASH 300 + +#define FAKE_MAP_PERF 0x400000 +#define FAKE_MAP_BASH 0x400000 +#define FAKE_MAP_LIBC 0x500000 +#define FAKE_MAP_KERNEL 0xf00000 +#define FAKE_MAP_LENGTH 0x100000 + +#define FAKE_SYM_OFFSET1 700 +#define FAKE_SYM_OFFSET2 800 +#define FAKE_SYM_OFFSET3 900 +#define FAKE_SYM_LENGTH 100 + +#define FAKE_IP_PERF_MAIN FAKE_MAP_PERF + FAKE_SYM_OFFSET1 +#define FAKE_IP_PERF_RUN_COMMAND FAKE_MAP_PERF + FAKE_SYM_OFFSET2 +#define FAKE_IP_PERF_CMD_RECORD FAKE_MAP_PERF + FAKE_SYM_OFFSET3 +#define FAKE_IP_BASH_MAIN FAKE_MAP_BASH + FAKE_SYM_OFFSET1 +#define FAKE_IP_BASH_XMALLOC FAKE_MAP_BASH + FAKE_SYM_OFFSET2 +#define FAKE_IP_BASH_XFREE FAKE_MAP_BASH + FAKE_SYM_OFFSET3 +#define FAKE_IP_LIBC_MALLOC FAKE_MAP_LIBC + FAKE_SYM_OFFSET1 +#define FAKE_IP_LIBC_FREE FAKE_MAP_LIBC + FAKE_SYM_OFFSET2 +#define FAKE_IP_LIBC_REALLOC FAKE_MAP_LIBC + FAKE_SYM_OFFSET3 +#define FAKE_IP_KERNEL_SCHEDULE FAKE_MAP_KERNEL + FAKE_SYM_OFFSET1 +#define FAKE_IP_KERNEL_PAGE_FAULT FAKE_MAP_KERNEL + FAKE_SYM_OFFSET2 +#define FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN FAKE_MAP_KERNEL + FAKE_SYM_OFFSET3 + /* * The setup_fake_machine() provides a test environment which consists * of 3 processes that have 3 mappings and in turn, have 3 symbols @@ -13,7 +41,7 @@ struct machines; * ............. ............. ................... * perf: 100 perf main * perf: 100 perf run_command - * perf: 100 perf comd_record + * perf: 100 perf cmd_record * perf: 100 libc malloc * perf: 100 libc free * perf: 100 libc realloc @@ -22,7 +50,7 @@ struct machines; * perf: 100 [kernel] sys_perf_event_open * perf: 200 perf main * perf: 200 perf run_command - * perf: 200 perf comd_record + * perf: 200 perf cmd_record * perf: 200 libc malloc * perf: 200 libc free * perf: 200 libc realloc diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c new file mode 100644 index 00000000000..0ac240db2e2 --- /dev/null +++ b/tools/perf/tests/hists_cumulate.c @@ -0,0 +1,726 @@ +#include "perf.h" +#include "util/debug.h" +#include "util/symbol.h" +#include "util/sort.h" +#include "util/evsel.h" +#include "util/evlist.h" +#include "util/machine.h" +#include "util/thread.h" +#include "util/parse-events.h" +#include "tests/tests.h" +#include "tests/hists_common.h" + +struct sample { + u32 pid; + u64 ip; + struct thread *thread; + struct map *map; + struct symbol *sym; +}; + +/* For the numbers, see hists_common.c */ +static struct sample fake_samples[] = { + /* perf [kernel] schedule() */ + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, }, + /* perf [perf] main() */ + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, }, + /* perf [perf] cmd_record() */ + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_CMD_RECORD, }, + /* perf [libc] malloc() */ + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, }, + /* perf [libc] free() */ + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_FREE, }, + /* perf [perf] main() */ + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, }, + /* perf [kernel] page_fault() */ + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, + /* bash [bash] main() */ + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, }, + /* bash [bash] xmalloc() */ + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, }, + /* bash [kernel] page_fault() */ + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, +}; + +/* + * Will be casted to struct ip_callchain which has all 64 bit entries + * of nr and ips[]. + */ +static u64 fake_callchains[][10] = { + /* schedule => run_command => main */ + { 3, FAKE_IP_KERNEL_SCHEDULE, FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, }, + /* main */ + { 1, FAKE_IP_PERF_MAIN, }, + /* cmd_record => run_command => main */ + { 3, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, }, + /* malloc => cmd_record => run_command => main */ + { 4, FAKE_IP_LIBC_MALLOC, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND, + FAKE_IP_PERF_MAIN, }, + /* free => cmd_record => run_command => main */ + { 4, FAKE_IP_LIBC_FREE, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND, + FAKE_IP_PERF_MAIN, }, + /* main */ + { 1, FAKE_IP_PERF_MAIN, }, + /* page_fault => sys_perf_event_open => run_command => main */ + { 4, FAKE_IP_KERNEL_PAGE_FAULT, FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN, + FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, }, + /* main */ + { 1, FAKE_IP_BASH_MAIN, }, + /* xmalloc => malloc => xmalloc => malloc => xmalloc => main */ + { 6, FAKE_IP_BASH_XMALLOC, FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_XMALLOC, + FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_XMALLOC, FAKE_IP_BASH_MAIN, }, + /* page_fault => malloc => main */ + { 3, FAKE_IP_KERNEL_PAGE_FAULT, FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_MAIN, }, +}; + +static int add_hist_entries(struct hists *hists, struct machine *machine) +{ + struct addr_location al; + struct perf_evsel *evsel = hists_to_evsel(hists); + struct perf_sample sample = { .period = 1000, }; + size_t i; + + for (i = 0; i < ARRAY_SIZE(fake_samples); i++) { + const union perf_event event = { + .header = { + .misc = PERF_RECORD_MISC_USER, + }, + }; + struct hist_entry_iter iter = { + .hide_unresolved = false, + }; + + if (symbol_conf.cumulate_callchain) + iter.ops = &hist_iter_cumulative; + else + iter.ops = &hist_iter_normal; + + sample.pid = fake_samples[i].pid; + sample.tid = fake_samples[i].pid; + sample.ip = fake_samples[i].ip; + sample.callchain = (struct ip_callchain *)fake_callchains[i]; + + if (perf_event__preprocess_sample(&event, machine, &al, + &sample) < 0) + goto out; + + if (hist_entry_iter__add(&iter, &al, evsel, &sample, + PERF_MAX_STACK_DEPTH, NULL) < 0) + goto out; + + fake_samples[i].thread = al.thread; + fake_samples[i].map = al.map; + fake_samples[i].sym = al.sym; + } + + return TEST_OK; + +out: + pr_debug("Not enough memory for adding a hist entry\n"); + return TEST_FAIL; +} + +static void del_hist_entries(struct hists *hists) +{ + struct hist_entry *he; + struct rb_root *root_in; + struct rb_root *root_out; + struct rb_node *node; + + if (sort__need_collapse) + root_in = &hists->entries_collapsed; + else + root_in = hists->entries_in; + + root_out = &hists->entries; + + while (!RB_EMPTY_ROOT(root_out)) { + node = rb_first(root_out); + + he = rb_entry(node, struct hist_entry, rb_node); + rb_erase(node, root_out); + rb_erase(&he->rb_node_in, root_in); + hist_entry__free(he); + } +} + +typedef int (*test_fn_t)(struct perf_evsel *, struct machine *); + +#define COMM(he) (thread__comm_str(he->thread)) +#define DSO(he) (he->ms.map->dso->short_name) +#define SYM(he) (he->ms.sym->name) +#define CPU(he) (he->cpu) +#define PID(he) (he->thread->tid) +#define DEPTH(he) (he->callchain->max_depth) +#define CDSO(cl) (cl->ms.map->dso->short_name) +#define CSYM(cl) (cl->ms.sym->name) + +struct result { + u64 children; + u64 self; + const char *comm; + const char *dso; + const char *sym; +}; + +struct callchain_result { + u64 nr; + struct { + const char *dso; + const char *sym; + } node[10]; +}; + +static int do_test(struct hists *hists, struct result *expected, size_t nr_expected, + struct callchain_result *expected_callchain, size_t nr_callchain) +{ + char buf[32]; + size_t i, c; + struct hist_entry *he; + struct rb_root *root; + struct rb_node *node; + struct callchain_node *cnode; + struct callchain_list *clist; + + /* + * adding and deleting hist entries must be done outside of this + * function since TEST_ASSERT_VAL() returns in case of failure. + */ + hists__collapse_resort(hists, NULL); + hists__output_resort(hists); + + if (verbose > 2) { + pr_info("use callchain: %d, cumulate callchain: %d\n", + symbol_conf.use_callchain, + symbol_conf.cumulate_callchain); + print_hists_out(hists); + } + + root = &hists->entries; + for (node = rb_first(root), i = 0; + node && (he = rb_entry(node, struct hist_entry, rb_node)); + node = rb_next(node), i++) { + scnprintf(buf, sizeof(buf), "Invalid hist entry #%zd", i); + + TEST_ASSERT_VAL("Incorrect number of hist entry", + i < nr_expected); + TEST_ASSERT_VAL(buf, he->stat.period == expected[i].self && + !strcmp(COMM(he), expected[i].comm) && + !strcmp(DSO(he), expected[i].dso) && + !strcmp(SYM(he), expected[i].sym)); + + if (symbol_conf.cumulate_callchain) + TEST_ASSERT_VAL(buf, he->stat_acc->period == expected[i].children); + + if (!symbol_conf.use_callchain) + continue; + + /* check callchain entries */ + root = &he->callchain->node.rb_root; + cnode = rb_entry(rb_first(root), struct callchain_node, rb_node); + + c = 0; + list_for_each_entry(clist, &cnode->val, list) { + scnprintf(buf, sizeof(buf), "Invalid callchain entry #%zd/%zd", i, c); + + TEST_ASSERT_VAL("Incorrect number of callchain entry", + c < expected_callchain[i].nr); + TEST_ASSERT_VAL(buf, + !strcmp(CDSO(clist), expected_callchain[i].node[c].dso) && + !strcmp(CSYM(clist), expected_callchain[i].node[c].sym)); + c++; + } + /* TODO: handle multiple child nodes properly */ + TEST_ASSERT_VAL("Incorrect number of callchain entry", + c <= expected_callchain[i].nr); + } + TEST_ASSERT_VAL("Incorrect number of hist entry", + i == nr_expected); + TEST_ASSERT_VAL("Incorrect number of callchain entry", + !symbol_conf.use_callchain || nr_expected == nr_callchain); + return 0; +} + +/* NO callchain + NO children */ +static int test1(struct perf_evsel *evsel, struct machine *machine) +{ + int err; + struct hists *hists = &evsel->hists; + /* + * expected output: + * + * Overhead Command Shared Object Symbol + * ======== ======= ============= ============== + * 20.00% perf perf [.] main + * 10.00% bash [kernel] [k] page_fault + * 10.00% bash bash [.] main + * 10.00% bash bash [.] xmalloc + * 10.00% perf [kernel] [k] page_fault + * 10.00% perf [kernel] [k] schedule + * 10.00% perf libc [.] free + * 10.00% perf libc [.] malloc + * 10.00% perf perf [.] cmd_record + */ + struct result expected[] = { + { 0, 2000, "perf", "perf", "main" }, + { 0, 1000, "bash", "[kernel]", "page_fault" }, + { 0, 1000, "bash", "bash", "main" }, + { 0, 1000, "bash", "bash", "xmalloc" }, + { 0, 1000, "perf", "[kernel]", "page_fault" }, + { 0, 1000, "perf", "[kernel]", "schedule" }, + { 0, 1000, "perf", "libc", "free" }, + { 0, 1000, "perf", "libc", "malloc" }, + { 0, 1000, "perf", "perf", "cmd_record" }, + }; + + symbol_conf.use_callchain = false; + symbol_conf.cumulate_callchain = false; + + setup_sorting(); + callchain_register_param(&callchain_param); + + err = add_hist_entries(hists, machine); + if (err < 0) + goto out; + + err = do_test(hists, expected, ARRAY_SIZE(expected), NULL, 0); + +out: + del_hist_entries(hists); + reset_output_field(); + return err; +} + +/* callcain + NO children */ +static int test2(struct perf_evsel *evsel, struct machine *machine) +{ + int err; + struct hists *hists = &evsel->hists; + /* + * expected output: + * + * Overhead Command Shared Object Symbol + * ======== ======= ============= ============== + * 20.00% perf perf [.] main + * | + * --- main + * + * 10.00% bash [kernel] [k] page_fault + * | + * --- page_fault + * malloc + * main + * + * 10.00% bash bash [.] main + * | + * --- main + * + * 10.00% bash bash [.] xmalloc + * | + * --- xmalloc + * malloc + * xmalloc <--- NOTE: there's a cycle + * malloc + * xmalloc + * main + * + * 10.00% perf [kernel] [k] page_fault + * | + * --- page_fault + * sys_perf_event_open + * run_command + * main + * + * 10.00% perf [kernel] [k] schedule + * | + * --- schedule + * run_command + * main + * + * 10.00% perf libc [.] free + * | + * --- free + * cmd_record + * run_command + * main + * + * 10.00% perf libc [.] malloc + * | + * --- malloc + * cmd_record + * run_command + * main + * + * 10.00% perf perf [.] cmd_record + * | + * --- cmd_record + * run_command + * main + * + */ + struct result expected[] = { + { 0, 2000, "perf", "perf", "main" }, + { 0, 1000, "bash", "[kernel]", "page_fault" }, + { 0, 1000, "bash", "bash", "main" }, + { 0, 1000, "bash", "bash", "xmalloc" }, + { 0, 1000, "perf", "[kernel]", "page_fault" }, + { 0, 1000, "perf", "[kernel]", "schedule" }, + { 0, 1000, "perf", "libc", "free" }, + { 0, 1000, "perf", "libc", "malloc" }, + { 0, 1000, "perf", "perf", "cmd_record" }, + }; + struct callchain_result expected_callchain[] = { + { + 1, { { "perf", "main" }, }, + }, + { + 3, { { "[kernel]", "page_fault" }, + { "libc", "malloc" }, + { "bash", "main" }, }, + }, + { + 1, { { "bash", "main" }, }, + }, + { + 6, { { "bash", "xmalloc" }, + { "libc", "malloc" }, + { "bash", "xmalloc" }, + { "libc", "malloc" }, + { "bash", "xmalloc" }, + { "bash", "main" }, }, + }, + { + 4, { { "[kernel]", "page_fault" }, + { "[kernel]", "sys_perf_event_open" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 3, { { "[kernel]", "schedule" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 4, { { "libc", "free" }, + { "perf", "cmd_record" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 4, { { "libc", "malloc" }, + { "perf", "cmd_record" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 3, { { "perf", "cmd_record" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + }; + + symbol_conf.use_callchain = true; + symbol_conf.cumulate_callchain = false; + + setup_sorting(); + callchain_register_param(&callchain_param); + + err = add_hist_entries(hists, machine); + if (err < 0) + goto out; + + err = do_test(hists, expected, ARRAY_SIZE(expected), + expected_callchain, ARRAY_SIZE(expected_callchain)); + +out: + del_hist_entries(hists); + reset_output_field(); + return err; +} + +/* NO callchain + children */ +static int test3(struct perf_evsel *evsel, struct machine *machine) +{ + int err; + struct hists *hists = &evsel->hists; + /* + * expected output: + * + * Children Self Command Shared Object Symbol + * ======== ======== ======= ============= ======================= + * 70.00% 20.00% perf perf [.] main + * 50.00% 0.00% perf perf [.] run_command + * 30.00% 10.00% bash bash [.] main + * 30.00% 10.00% perf perf [.] cmd_record + * 20.00% 0.00% bash libc [.] malloc + * 10.00% 10.00% bash [kernel] [k] page_fault + * 10.00% 10.00% perf [kernel] [k] schedule + * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open + * 10.00% 10.00% perf [kernel] [k] page_fault + * 10.00% 10.00% perf libc [.] free + * 10.00% 10.00% perf libc [.] malloc + * 10.00% 10.00% bash bash [.] xmalloc + */ + struct result expected[] = { + { 7000, 2000, "perf", "perf", "main" }, + { 5000, 0, "perf", "perf", "run_command" }, + { 3000, 1000, "bash", "bash", "main" }, + { 3000, 1000, "perf", "perf", "cmd_record" }, + { 2000, 0, "bash", "libc", "malloc" }, + { 1000, 1000, "bash", "[kernel]", "page_fault" }, + { 1000, 1000, "perf", "[kernel]", "schedule" }, + { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" }, + { 1000, 1000, "perf", "[kernel]", "page_fault" }, + { 1000, 1000, "perf", "libc", "free" }, + { 1000, 1000, "perf", "libc", "malloc" }, + { 1000, 1000, "bash", "bash", "xmalloc" }, + }; + + symbol_conf.use_callchain = false; + symbol_conf.cumulate_callchain = true; + + setup_sorting(); + callchain_register_param(&callchain_param); + + err = add_hist_entries(hists, machine); + if (err < 0) + goto out; + + err = do_test(hists, expected, ARRAY_SIZE(expected), NULL, 0); + +out: + del_hist_entries(hists); + reset_output_field(); + return err; +} + +/* callchain + children */ +static int test4(struct perf_evsel *evsel, struct machine *machine) +{ + int err; + struct hists *hists = &evsel->hists; + /* + * expected output: + * + * Children Self Command Shared Object Symbol + * ======== ======== ======= ============= ======================= + * 70.00% 20.00% perf perf [.] main + * | + * --- main + * + * 50.00% 0.00% perf perf [.] run_command + * | + * --- run_command + * main + * + * 30.00% 10.00% bash bash [.] main + * | + * --- main + * + * 30.00% 10.00% perf perf [.] cmd_record + * | + * --- cmd_record + * run_command + * main + * + * 20.00% 0.00% bash libc [.] malloc + * | + * --- malloc + * | + * |--50.00%-- xmalloc + * | main + * --50.00%-- main + * + * 10.00% 10.00% bash [kernel] [k] page_fault + * | + * --- page_fault + * malloc + * main + * + * 10.00% 10.00% perf [kernel] [k] schedule + * | + * --- schedule + * run_command + * main + * + * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open + * | + * --- sys_perf_event_open + * run_command + * main + * + * 10.00% 10.00% perf [kernel] [k] page_fault + * | + * --- page_fault + * sys_perf_event_open + * run_command + * main + * + * 10.00% 10.00% perf libc [.] free + * | + * --- free + * cmd_record + * run_command + * main + * + * 10.00% 10.00% perf libc [.] malloc + * | + * --- malloc + * cmd_record + * run_command + * main + * + * 10.00% 10.00% bash bash [.] xmalloc + * | + * --- xmalloc + * malloc + * xmalloc <--- NOTE: there's a cycle + * malloc + * xmalloc + * main + * + */ + struct result expected[] = { + { 7000, 2000, "perf", "perf", "main" }, + { 5000, 0, "perf", "perf", "run_command" }, + { 3000, 1000, "bash", "bash", "main" }, + { 3000, 1000, "perf", "perf", "cmd_record" }, + { 2000, 0, "bash", "libc", "malloc" }, + { 1000, 1000, "bash", "[kernel]", "page_fault" }, + { 1000, 1000, "perf", "[kernel]", "schedule" }, + { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" }, + { 1000, 1000, "perf", "[kernel]", "page_fault" }, + { 1000, 1000, "perf", "libc", "free" }, + { 1000, 1000, "perf", "libc", "malloc" }, + { 1000, 1000, "bash", "bash", "xmalloc" }, + }; + struct callchain_result expected_callchain[] = { + { + 1, { { "perf", "main" }, }, + }, + { + 2, { { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 1, { { "bash", "main" }, }, + }, + { + 3, { { "perf", "cmd_record" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 4, { { "libc", "malloc" }, + { "bash", "xmalloc" }, + { "bash", "main" }, + { "bash", "main" }, }, + }, + { + 3, { { "[kernel]", "page_fault" }, + { "libc", "malloc" }, + { "bash", "main" }, }, + }, + { + 3, { { "[kernel]", "schedule" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 3, { { "[kernel]", "sys_perf_event_open" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 4, { { "[kernel]", "page_fault" }, + { "[kernel]", "sys_perf_event_open" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 4, { { "libc", "free" }, + { "perf", "cmd_record" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 4, { { "libc", "malloc" }, + { "perf", "cmd_record" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { + 6, { { "bash", "xmalloc" }, + { "libc", "malloc" }, + { "bash", "xmalloc" }, + { "libc", "malloc" }, + { "bash", "xmalloc" }, + { "bash", "main" }, }, + }, + }; + + symbol_conf.use_callchain = true; + symbol_conf.cumulate_callchain = true; + + setup_sorting(); + callchain_register_param(&callchain_param); + + err = add_hist_entries(hists, machine); + if (err < 0) + goto out; + + err = do_test(hists, expected, ARRAY_SIZE(expected), + expected_callchain, ARRAY_SIZE(expected_callchain)); + +out: + del_hist_entries(hists); + reset_output_field(); + return err; +} + +int test__hists_cumulate(void) +{ + int err = TEST_FAIL; + struct machines machines; + struct machine *machine; + struct perf_evsel *evsel; + struct perf_evlist *evlist = perf_evlist__new(); + size_t i; + test_fn_t testcases[] = { + test1, + test2, + test3, + test4, + }; + + TEST_ASSERT_VAL("No memory", evlist); + + err = parse_events(evlist, "cpu-clock"); + if (err) + goto out; + + machines__init(&machines); + + /* setup threads/dso/map/symbols also */ + machine = setup_fake_machine(&machines); + if (!machine) + goto out; + + if (verbose > 1) + machine__fprintf(machine, stderr); + + evsel = perf_evlist__first(evlist); + + for (i = 0; i < ARRAY_SIZE(testcases); i++) { + err = testcases[i](evsel, machine); + if (err < 0) + break; + } + +out: + /* tear down everything */ + perf_evlist__delete(evlist); + machines__exit(&machines); + + return err; +} diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index c5ba924a358..821f581fd93 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -21,33 +21,33 @@ struct sample { /* For the numbers, see hists_common.c */ static struct sample fake_samples[] = { /* perf [kernel] schedule() */ - { .pid = 100, .ip = 0xf0000 + 700, }, + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, }, /* perf [perf] main() */ - { .pid = 100, .ip = 0x40000 + 700, }, + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, }, /* perf [libc] malloc() */ - { .pid = 100, .ip = 0x50000 + 700, }, + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, }, /* perf [perf] main() */ - { .pid = 200, .ip = 0x40000 + 700, }, /* will be merged */ + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, }, /* will be merged */ /* perf [perf] cmd_record() */ - { .pid = 200, .ip = 0x40000 + 900, }, + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, }, /* perf [kernel] page_fault() */ - { .pid = 200, .ip = 0xf0000 + 800, }, + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, /* bash [bash] main() */ - { .pid = 300, .ip = 0x40000 + 700, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, }, /* bash [bash] xmalloc() */ - { .pid = 300, .ip = 0x40000 + 800, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, }, /* bash [libc] malloc() */ - { .pid = 300, .ip = 0x50000 + 700, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, }, /* bash [kernel] page_fault() */ - { .pid = 300, .ip = 0xf0000 + 800, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, }; -static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) +static int add_hist_entries(struct perf_evlist *evlist, + struct machine *machine __maybe_unused) { struct perf_evsel *evsel; struct addr_location al; - struct hist_entry *he; - struct perf_sample sample = { .cpu = 0, }; + struct perf_sample sample = { .period = 100, }; size_t i; /* @@ -62,6 +62,10 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) .misc = PERF_RECORD_MISC_USER, }, }; + struct hist_entry_iter iter = { + .ops = &hist_iter_normal, + .hide_unresolved = false, + }; /* make sure it has no filter at first */ evsel->hists.thread_filter = NULL; @@ -76,18 +80,13 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) &sample) < 0) goto out; - he = __hists__add_entry(&evsel->hists, &al, NULL, - NULL, NULL, 100, 1, 0); - if (he == NULL) + if (hist_entry_iter__add(&iter, &al, evsel, &sample, + PERF_MAX_STACK_DEPTH, NULL) < 0) goto out; fake_samples[i].thread = al.thread; fake_samples[i].map = al.map; fake_samples[i].sym = al.sym; - - hists__inc_nr_events(he->hists, PERF_RECORD_SAMPLE); - if (!he->filtered) - he->hists->stats.nr_non_filtered_samples++; } } diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index 5ffa2c3eb77..d4b34b0f50a 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -21,41 +21,41 @@ struct sample { /* For the numbers, see hists_common.c */ static struct sample fake_common_samples[] = { /* perf [kernel] schedule() */ - { .pid = 100, .ip = 0xf0000 + 700, }, + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, }, /* perf [perf] main() */ - { .pid = 200, .ip = 0x40000 + 700, }, + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, }, /* perf [perf] cmd_record() */ - { .pid = 200, .ip = 0x40000 + 900, }, + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, }, /* bash [bash] xmalloc() */ - { .pid = 300, .ip = 0x40000 + 800, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, }, /* bash [libc] malloc() */ - { .pid = 300, .ip = 0x50000 + 700, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, }, }; static struct sample fake_samples[][5] = { { /* perf [perf] run_command() */ - { .pid = 100, .ip = 0x40000 + 800, }, + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_RUN_COMMAND, }, /* perf [libc] malloc() */ - { .pid = 100, .ip = 0x50000 + 700, }, + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, }, /* perf [kernel] page_fault() */ - { .pid = 100, .ip = 0xf0000 + 800, }, + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, /* perf [kernel] sys_perf_event_open() */ - { .pid = 200, .ip = 0xf0000 + 900, }, + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN, }, /* bash [libc] free() */ - { .pid = 300, .ip = 0x50000 + 800, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_FREE, }, }, { /* perf [libc] free() */ - { .pid = 200, .ip = 0x50000 + 800, }, + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_LIBC_FREE, }, /* bash [libc] malloc() */ - { .pid = 300, .ip = 0x50000 + 700, }, /* will be merged */ + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, }, /* will be merged */ /* bash [bash] xfee() */ - { .pid = 300, .ip = 0x40000 + 900, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XFREE, }, /* bash [libc] realloc() */ - { .pid = 300, .ip = 0x50000 + 900, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_REALLOC, }, /* bash [kernel] page_fault() */ - { .pid = 300, .ip = 0xf0000 + 800, }, + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, }, }; @@ -64,7 +64,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) struct perf_evsel *evsel; struct addr_location al; struct hist_entry *he; - struct perf_sample sample = { .cpu = 0, }; + struct perf_sample sample = { .period = 1, }; size_t i = 0, k; /* @@ -88,7 +88,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) goto out; he = __hists__add_entry(&evsel->hists, &al, NULL, - NULL, NULL, 1, 1, 0); + NULL, NULL, 1, 1, 0, true); if (he == NULL) goto out; @@ -112,7 +112,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) goto out; he = __hists__add_entry(&evsel->hists, &al, NULL, - NULL, NULL, 1, 1, 0); + NULL, NULL, 1, 1, 0, true); if (he == NULL) goto out; diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index a1685055179..e3bbd6c54c1 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -22,31 +22,31 @@ struct sample { /* For the numbers, see hists_common.c */ static struct sample fake_samples[] = { /* perf [kernel] schedule() */ - { .cpu = 0, .pid = 100, .ip = 0xf0000 + 700, }, + { .cpu = 0, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, }, /* perf [perf] main() */ - { .cpu = 1, .pid = 100, .ip = 0x40000 + 700, }, + { .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, }, /* perf [perf] cmd_record() */ - { .cpu = 1, .pid = 100, .ip = 0x40000 + 900, }, + { .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_CMD_RECORD, }, /* perf [libc] malloc() */ - { .cpu = 1, .pid = 100, .ip = 0x50000 + 700, }, + { .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, }, /* perf [libc] free() */ - { .cpu = 2, .pid = 100, .ip = 0x50000 + 800, }, + { .cpu = 2, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_FREE, }, /* perf [perf] main() */ - { .cpu = 2, .pid = 200, .ip = 0x40000 + 700, }, + { .cpu = 2, .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, }, /* perf [kernel] page_fault() */ - { .cpu = 2, .pid = 200, .ip = 0xf0000 + 800, }, + { .cpu = 2, .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, /* bash [bash] main() */ - { .cpu = 3, .pid = 300, .ip = 0x40000 + 700, }, + { .cpu = 3, .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, }, /* bash [bash] xmalloc() */ - { .cpu = 0, .pid = 300, .ip = 0x40000 + 800, }, + { .cpu = 0, .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, }, /* bash [kernel] page_fault() */ - { .cpu = 1, .pid = 300, .ip = 0xf0000 + 800, }, + { .cpu = 1, .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, }; static int add_hist_entries(struct hists *hists, struct machine *machine) { struct addr_location al; - struct hist_entry *he; + struct perf_evsel *evsel = hists_to_evsel(hists); struct perf_sample sample = { .period = 100, }; size_t i; @@ -56,6 +56,10 @@ static int add_hist_entries(struct hists *hists, struct machine *machine) .misc = PERF_RECORD_MISC_USER, }, }; + struct hist_entry_iter iter = { + .ops = &hist_iter_normal, + .hide_unresolved = false, + }; sample.cpu = fake_samples[i].cpu; sample.pid = fake_samples[i].pid; @@ -66,9 +70,8 @@ static int add_hist_entries(struct hists *hists, struct machine *machine) &sample) < 0) goto out; - he = __hists__add_entry(hists, &al, NULL, NULL, NULL, - sample.period, 1, 0); - if (he == NULL) + if (hist_entry_iter__add(&iter, &al, evsel, &sample, + PERF_MAX_STACK_DEPTH, NULL) < 0) goto out; fake_samples[i].thread = al.thread; diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index d76c0e2e663..022bb68fd9c 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -45,6 +45,7 @@ int test__hists_filter(void); int test__mmap_thread_lookup(void); int test__thread_mg_share(void); int test__hists_output(void); +int test__hists_cumulate(void); #if defined(__x86_64__) || defined(__i386__) || defined(__arm__) #ifdef HAVE_DWARF_UNWIND_SUPPORT diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index d11541d4d7d..3ccf6e14f89 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -194,7 +194,7 @@ int ui_browser__warning(struct ui_browser *browser, int timeout, ui_helpline__vpush(format, args); va_end(args); } else { - while ((key == ui__question_window("Warning!", text, + while ((key = ui__question_window("Warning!", text, "Press any key...", timeout)) == K_RESIZE) ui_browser__handle_resize(browser); diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 1c331b934ff..52c03fbbba1 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -37,7 +37,6 @@ static int hists__browser_title(struct hists *hists, char *bf, size_t size, static void hist_browser__update_nr_entries(struct hist_browser *hb); static struct rb_node *hists__filter_entries(struct rb_node *nd, - struct hists *hists, float min_pcnt); static bool hist_browser__has_filter(struct hist_browser *hb) @@ -319,7 +318,7 @@ __hist_browser__set_folding(struct hist_browser *browser, bool unfold) struct hists *hists = browser->hists; for (nd = rb_first(&hists->entries); - (nd = hists__filter_entries(nd, hists, browser->min_pcnt)) != NULL; + (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL; nd = rb_next(nd)) { struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); hist_entry__set_folding(he, unfold); @@ -651,13 +650,36 @@ hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,\ __hpp__slsmg_color_printf, true); \ } +#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \ +static u64 __hpp_get_acc_##_field(struct hist_entry *he) \ +{ \ + return he->stat_acc->_field; \ +} \ + \ +static int \ +hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,\ + struct perf_hpp *hpp, \ + struct hist_entry *he) \ +{ \ + if (!symbol_conf.cumulate_callchain) { \ + int ret = scnprintf(hpp->buf, hpp->size, "%8s", "N/A"); \ + slsmg_printf("%s", hpp->buf); \ + \ + return ret; \ + } \ + return __hpp__fmt(hpp, he, __hpp_get_acc_##_field, " %6.2f%%", \ + __hpp__slsmg_color_printf, true); \ +} + __HPP_COLOR_PERCENT_FN(overhead, period) __HPP_COLOR_PERCENT_FN(overhead_sys, period_sys) __HPP_COLOR_PERCENT_FN(overhead_us, period_us) __HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys) __HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us) +__HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period) #undef __HPP_COLOR_PERCENT_FN +#undef __HPP_COLOR_ACC_PERCENT_FN void hist_browser__init_hpp(void) { @@ -671,6 +693,8 @@ void hist_browser__init_hpp(void) hist_browser__hpp_color_overhead_guest_sys; perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color = hist_browser__hpp_color_overhead_guest_us; + perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color = + hist_browser__hpp_color_overhead_acc; } static int hist_browser__show_entry(struct hist_browser *browser, @@ -783,15 +807,12 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser) for (nd = browser->top; nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - u64 total = hists__total_period(h->hists); - float percent = 0.0; + float percent; if (h->filtered) continue; - if (total) - percent = h->stat.period * 100.0 / total; - + percent = hist_entry__get_percent_limit(h); if (percent < hb->min_pcnt) continue; @@ -804,16 +825,11 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser) } static struct rb_node *hists__filter_entries(struct rb_node *nd, - struct hists *hists, float min_pcnt) { while (nd != NULL) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - u64 total = hists__total_period(hists); - float percent = 0.0; - - if (total) - percent = h->stat.period * 100.0 / total; + float percent = hist_entry__get_percent_limit(h); if (!h->filtered && percent >= min_pcnt) return nd; @@ -825,16 +841,11 @@ static struct rb_node *hists__filter_entries(struct rb_node *nd, } static struct rb_node *hists__filter_prev_entries(struct rb_node *nd, - struct hists *hists, float min_pcnt) { while (nd != NULL) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - u64 total = hists__total_period(hists); - float percent = 0.0; - - if (total) - percent = h->stat.period * 100.0 / total; + float percent = hist_entry__get_percent_limit(h); if (!h->filtered && percent >= min_pcnt) return nd; @@ -863,14 +874,14 @@ static void ui_browser__hists_seek(struct ui_browser *browser, switch (whence) { case SEEK_SET: nd = hists__filter_entries(rb_first(browser->entries), - hb->hists, hb->min_pcnt); + hb->min_pcnt); break; case SEEK_CUR: nd = browser->top; goto do_offset; case SEEK_END: nd = hists__filter_prev_entries(rb_last(browser->entries), - hb->hists, hb->min_pcnt); + hb->min_pcnt); first = false; break; default: @@ -913,8 +924,7 @@ do_offset: break; } } - nd = hists__filter_entries(rb_next(nd), hb->hists, - hb->min_pcnt); + nd = hists__filter_entries(rb_next(nd), hb->min_pcnt); if (nd == NULL) break; --offset; @@ -947,7 +957,7 @@ do_offset: } } - nd = hists__filter_prev_entries(rb_prev(nd), hb->hists, + nd = hists__filter_prev_entries(rb_prev(nd), hb->min_pcnt); if (nd == NULL) break; @@ -1126,7 +1136,6 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser, static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp) { struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries), - browser->hists, browser->min_pcnt); int printed = 0; @@ -1134,8 +1143,7 @@ static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp) struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); printed += hist_browser__fprintf_entry(browser, h, fp); - nd = hists__filter_entries(rb_next(nd), browser->hists, - browser->min_pcnt); + nd = hists__filter_entries(rb_next(nd), browser->min_pcnt); } return printed; @@ -1372,8 +1380,7 @@ static void hist_browser__update_nr_entries(struct hist_browser *hb) return; } - while ((nd = hists__filter_entries(nd, hb->hists, - hb->min_pcnt)) != NULL) { + while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) { nr_entries++; nd = rb_next(nd); } @@ -1699,14 +1706,14 @@ zoom_dso: zoom_out_dso: ui_helpline__pop(); browser->hists->dso_filter = NULL; - sort_dso.elide = false; + perf_hpp__set_elide(HISTC_DSO, false); } else { if (dso == NULL) continue; ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s DSO\"", dso->kernel ? "the Kernel" : dso->short_name); browser->hists->dso_filter = dso; - sort_dso.elide = true; + perf_hpp__set_elide(HISTC_DSO, true); pstack__push(fstack, &browser->hists->dso_filter); } hists__filter_by_dso(hists); @@ -1718,13 +1725,13 @@ zoom_thread: zoom_out_thread: ui_helpline__pop(); browser->hists->thread_filter = NULL; - sort_thread.elide = false; + perf_hpp__set_elide(HISTC_THREAD, false); } else { ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"", thread->comm_set ? thread__comm_str(thread) : "", thread->tid); browser->hists->thread_filter = thread; - sort_thread.elide = true; + perf_hpp__set_elide(HISTC_THREAD, false); pstack__push(fstack, &browser->hists->thread_filter); } hists__filter_by_thread(hists); diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 9d90683914d..6ca60e482cd 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -47,11 +47,26 @@ static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, __percent_color_snprintf, true); \ } +#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \ +static u64 he_get_acc_##_field(struct hist_entry *he) \ +{ \ + return he->stat_acc->_field; \ +} \ + \ +static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ + struct perf_hpp *hpp, \ + struct hist_entry *he) \ +{ \ + return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, " %6.2f%%", \ + __percent_color_snprintf, true); \ +} + __HPP_COLOR_PERCENT_FN(overhead, period) __HPP_COLOR_PERCENT_FN(overhead_sys, period_sys) __HPP_COLOR_PERCENT_FN(overhead_us, period_us) __HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys) __HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us) +__HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period) #undef __HPP_COLOR_PERCENT_FN @@ -68,6 +83,8 @@ void perf_gtk__init_hpp(void) perf_gtk__hpp_color_overhead_guest_sys; perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color = perf_gtk__hpp_color_overhead_guest_us; + perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color = + perf_gtk__hpp_color_overhead_acc; } static void callchain_list__sym_name(struct callchain_list *cl, @@ -181,6 +198,13 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, if (perf_hpp__should_skip(fmt)) continue; + /* + * XXX no way to determine where symcol column is.. + * Just use last column for now. + */ + if (perf_hpp__is_sort_entry(fmt)) + sym_col = col_idx; + fmt->header(fmt, &hpp, hists_to_evsel(hists)); gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view), @@ -209,14 +233,12 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); GtkTreeIter iter; u64 total = hists__total_period(h->hists); - float percent = 0.0; + float percent; if (h->filtered) continue; - if (total) - percent = h->stat.period * 100.0 / total; - + percent = hist_entry__get_percent_limit(h); if (percent < min_pcnt) continue; @@ -238,7 +260,8 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, if (symbol_conf.use_callchain && sort__has_sym) { if (callchain_param.mode == CHAIN_GRAPH_REL) - total = h->stat.period; + total = symbol_conf.cumulate_callchain ? + h->stat_acc->period : h->stat.period; perf_gtk__add_callchain(&h->sorted_chain, store, &iter, sym_col, total); diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 4484f5bd1b1..498adb23c02 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -104,6 +104,18 @@ int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, return ret; } +int __hpp__fmt_acc(struct perf_hpp *hpp, struct hist_entry *he, + hpp_field_fn get_field, const char *fmt, + hpp_snprint_fn print_fn, bool fmt_percent) +{ + if (!symbol_conf.cumulate_callchain) { + return snprintf(hpp->buf, hpp->size, "%*s", + fmt_percent ? 8 : 12, "N/A"); + } + + return __hpp__fmt(hpp, he, get_field, fmt, print_fn, fmt_percent); +} + static int field_cmp(u64 field_a, u64 field_b) { if (field_a > field_b) @@ -160,6 +172,24 @@ out: return ret; } +static int __hpp__sort_acc(struct hist_entry *a, struct hist_entry *b, + hpp_field_fn get_field) +{ + s64 ret = 0; + + if (symbol_conf.cumulate_callchain) { + /* + * Put caller above callee when they have equal period. + */ + ret = field_cmp(get_field(a), get_field(b)); + if (ret) + return ret; + + ret = b->callchain->max_depth - a->callchain->max_depth; + } + return ret; +} + #define __HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \ static int hpp__header_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ struct perf_hpp *hpp, \ @@ -242,6 +272,34 @@ static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b) \ return __hpp__sort(a, b, he_get_##_field); \ } +#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \ +static u64 he_get_acc_##_field(struct hist_entry *he) \ +{ \ + return he->stat_acc->_field; \ +} \ + \ +static int hpp__color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ + struct perf_hpp *hpp, struct hist_entry *he) \ +{ \ + return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, " %6.2f%%", \ + hpp_color_scnprintf, true); \ +} + +#define __HPP_ENTRY_ACC_PERCENT_FN(_type, _field) \ +static int hpp__entry_##_type(struct perf_hpp_fmt *_fmt __maybe_unused, \ + struct perf_hpp *hpp, struct hist_entry *he) \ +{ \ + const char *fmt = symbol_conf.field_sep ? " %.2f" : " %6.2f%%"; \ + return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, fmt, \ + hpp_entry_scnprintf, true); \ +} + +#define __HPP_SORT_ACC_FN(_type, _field) \ +static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b) \ +{ \ + return __hpp__sort_acc(a, b, he_get_acc_##_field); \ +} + #define __HPP_ENTRY_RAW_FN(_type, _field) \ static u64 he_get_raw_##_field(struct hist_entry *he) \ { \ @@ -270,18 +328,27 @@ __HPP_COLOR_PERCENT_FN(_type, _field) \ __HPP_ENTRY_PERCENT_FN(_type, _field) \ __HPP_SORT_FN(_type, _field) +#define HPP_PERCENT_ACC_FNS(_type, _str, _field, _min_width, _unit_width)\ +__HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \ +__HPP_WIDTH_FN(_type, _min_width, _unit_width) \ +__HPP_COLOR_ACC_PERCENT_FN(_type, _field) \ +__HPP_ENTRY_ACC_PERCENT_FN(_type, _field) \ +__HPP_SORT_ACC_FN(_type, _field) + #define HPP_RAW_FNS(_type, _str, _field, _min_width, _unit_width) \ __HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \ __HPP_WIDTH_FN(_type, _min_width, _unit_width) \ __HPP_ENTRY_RAW_FN(_type, _field) \ __HPP_SORT_RAW_FN(_type, _field) +__HPP_HEADER_FN(overhead_self, "Self", 8, 8) HPP_PERCENT_FNS(overhead, "Overhead", period, 8, 8) HPP_PERCENT_FNS(overhead_sys, "sys", period_sys, 8, 8) HPP_PERCENT_FNS(overhead_us, "usr", period_us, 8, 8) HPP_PERCENT_FNS(overhead_guest_sys, "guest sys", period_guest_sys, 9, 8) HPP_PERCENT_FNS(overhead_guest_us, "guest usr", period_guest_us, 9, 8) +HPP_PERCENT_ACC_FNS(overhead_acc, "Children", period, 8, 8) HPP_RAW_FNS(samples, "Samples", nr_events, 12, 12) HPP_RAW_FNS(period, "Period", period, 12, 12) @@ -303,6 +370,17 @@ static int64_t hpp__nop_cmp(struct hist_entry *a __maybe_unused, .sort = hpp__sort_ ## _name, \ } +#define HPP__COLOR_ACC_PRINT_FNS(_name) \ + { \ + .header = hpp__header_ ## _name, \ + .width = hpp__width_ ## _name, \ + .color = hpp__color_ ## _name, \ + .entry = hpp__entry_ ## _name, \ + .cmp = hpp__nop_cmp, \ + .collapse = hpp__nop_cmp, \ + .sort = hpp__sort_ ## _name, \ + } + #define HPP__PRINT_FNS(_name) \ { \ .header = hpp__header_ ## _name, \ @@ -319,6 +397,7 @@ struct perf_hpp_fmt perf_hpp__format[] = { HPP__COLOR_PRINT_FNS(overhead_us), HPP__COLOR_PRINT_FNS(overhead_guest_sys), HPP__COLOR_PRINT_FNS(overhead_guest_us), + HPP__COLOR_ACC_PRINT_FNS(overhead_acc), HPP__PRINT_FNS(samples), HPP__PRINT_FNS(period) }; @@ -328,16 +407,23 @@ LIST_HEAD(perf_hpp__sort_list); #undef HPP__COLOR_PRINT_FNS +#undef HPP__COLOR_ACC_PRINT_FNS #undef HPP__PRINT_FNS #undef HPP_PERCENT_FNS +#undef HPP_PERCENT_ACC_FNS #undef HPP_RAW_FNS #undef __HPP_HEADER_FN #undef __HPP_WIDTH_FN #undef __HPP_COLOR_PERCENT_FN #undef __HPP_ENTRY_PERCENT_FN +#undef __HPP_COLOR_ACC_PERCENT_FN +#undef __HPP_ENTRY_ACC_PERCENT_FN #undef __HPP_ENTRY_RAW_FN +#undef __HPP_SORT_FN +#undef __HPP_SORT_ACC_FN +#undef __HPP_SORT_RAW_FN void perf_hpp__init(void) @@ -361,6 +447,13 @@ void perf_hpp__init(void) if (field_order) return; + if (symbol_conf.cumulate_callchain) { + perf_hpp__column_enable(PERF_HPP__OVERHEAD_ACC); + + perf_hpp__format[PERF_HPP__OVERHEAD].header = + hpp__header_overhead_self; + } + perf_hpp__column_enable(PERF_HPP__OVERHEAD); if (symbol_conf.show_cpu_utilization) { @@ -383,6 +476,12 @@ void perf_hpp__init(void) list = &perf_hpp__format[PERF_HPP__OVERHEAD].sort_list; if (list_empty(list)) list_add(list, &perf_hpp__sort_list); + + if (symbol_conf.cumulate_callchain) { + list = &perf_hpp__format[PERF_HPP__OVERHEAD_ACC].sort_list; + if (list_empty(list)) + list_add(list, &perf_hpp__sort_list); + } } void perf_hpp__column_register(struct perf_hpp_fmt *format) @@ -390,6 +489,11 @@ void perf_hpp__column_register(struct perf_hpp_fmt *format) list_add_tail(&format->list, &perf_hpp__list); } +void perf_hpp__column_unregister(struct perf_hpp_fmt *format) +{ + list_del(&format->list); +} + void perf_hpp__register_sort_field(struct perf_hpp_fmt *format) { list_add_tail(&format->sort_list, &perf_hpp__sort_list); @@ -401,6 +505,21 @@ void perf_hpp__column_enable(unsigned col) perf_hpp__column_register(&perf_hpp__format[col]); } +void perf_hpp__column_disable(unsigned col) +{ + BUG_ON(col >= PERF_HPP__MAX_INDEX); + perf_hpp__column_unregister(&perf_hpp__format[col]); +} + +void perf_hpp__cancel_cumulate(void) +{ + if (field_order) + return; + + perf_hpp__column_disable(PERF_HPP__OVERHEAD_ACC); + perf_hpp__format[PERF_HPP__OVERHEAD].header = hpp__header_overhead; +} + void perf_hpp__setup_output_field(void) { struct perf_hpp_fmt *fmt; diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 9f57991025a..90122abd372 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -271,7 +271,9 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he, { switch (callchain_param.mode) { case CHAIN_GRAPH_REL: - return callchain__fprintf_graph(fp, &he->sorted_chain, he->stat.period, + return callchain__fprintf_graph(fp, &he->sorted_chain, + symbol_conf.cumulate_callchain ? + he->stat_acc->period : he->stat.period, left_margin); break; case CHAIN_GRAPH_ABS: @@ -461,12 +463,12 @@ print_entries: for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - float percent = h->stat.period * 100.0 / - hists->stats.total_period; + float percent; if (h->filtered) continue; + percent = hist_entry__get_percent_limit(h); if (percent < min_pcnt) continue; diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 9a42382b392..48b6d3f5001 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -616,7 +616,8 @@ int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent if (sample->callchain == NULL) return 0; - if (symbol_conf.use_callchain || sort__has_parent) { + if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain || + sort__has_parent) { return machine__resolve_callchain(al->machine, evsel, al->thread, sample, parent, al, max_stack); } @@ -629,3 +630,45 @@ int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *samp return 0; return callchain_append(he->callchain, &callchain_cursor, sample->period); } + +int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node, + bool hide_unresolved) +{ + al->map = node->map; + al->sym = node->sym; + if (node->map) + al->addr = node->map->map_ip(node->map, node->ip); + else + al->addr = node->ip; + + if (al->sym == NULL) { + if (hide_unresolved) + return 0; + if (al->map == NULL) + goto out; + } + + if (al->map->groups == &al->machine->kmaps) { + if (machine__is_host(al->machine)) { + al->cpumode = PERF_RECORD_MISC_KERNEL; + al->level = 'k'; + } else { + al->cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + al->level = 'g'; + } + } else { + if (machine__is_host(al->machine)) { + al->cpumode = PERF_RECORD_MISC_USER; + al->level = '.'; + } else if (perf_guest) { + al->cpumode = PERF_RECORD_MISC_GUEST_USER; + al->level = 'u'; + } else { + al->cpumode = PERF_RECORD_MISC_HYPERVISOR; + al->level = 'H'; + } + } + +out: + return 1; +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index bde2b0cc24c..8f84423a75d 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -162,7 +162,18 @@ int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent struct perf_evsel *evsel, struct addr_location *al, int max_stack); int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample); +int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node, + bool hide_unresolved); extern const char record_callchain_help[]; int parse_callchain_report_opt(const char *arg); + +static inline void callchain_cursor_snapshot(struct callchain_cursor *dest, + struct callchain_cursor *src) +{ + *dest = *src; + + dest->first = src->curr; + dest->nr -= src->pos; +} #endif /* __PERF_CALLCHAIN_H */ diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index b262b44b7a6..5a0a4b2cadc 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -4,6 +4,7 @@ #include "session.h" #include "sort.h" #include "evsel.h" +#include "annotate.h" #include <math.h> static bool hists__filter_entry_by_dso(struct hists *hists, @@ -231,6 +232,8 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) return true; he_stat__decay(&he->stat); + if (symbol_conf.cumulate_callchain) + he_stat__decay(he->stat_acc); diff = prev_period - he->stat.period; @@ -276,14 +279,31 @@ void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel) * histogram, sorted on item, collects periods */ -static struct hist_entry *hist_entry__new(struct hist_entry *template) +static struct hist_entry *hist_entry__new(struct hist_entry *template, + bool sample_self) { - size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0; - struct hist_entry *he = zalloc(sizeof(*he) + callchain_size); + size_t callchain_size = 0; + struct hist_entry *he; + + if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) + callchain_size = sizeof(struct callchain_root); + + he = zalloc(sizeof(*he) + callchain_size); if (he != NULL) { *he = *template; + if (symbol_conf.cumulate_callchain) { + he->stat_acc = malloc(sizeof(he->stat)); + if (he->stat_acc == NULL) { + free(he); + return NULL; + } + memcpy(he->stat_acc, &he->stat, sizeof(he->stat)); + if (!sample_self) + memset(&he->stat, 0, sizeof(he->stat)); + } + if (he->ms.map) he->ms.map->referenced = true; @@ -295,6 +315,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template) */ he->branch_info = malloc(sizeof(*he->branch_info)); if (he->branch_info == NULL) { + free(he->stat_acc); free(he); return NULL; } @@ -333,7 +354,8 @@ static u8 symbol__parent_filter(const struct symbol *parent) static struct hist_entry *add_hist_entry(struct hists *hists, struct hist_entry *entry, - struct addr_location *al) + struct addr_location *al, + bool sample_self) { struct rb_node **p; struct rb_node *parent = NULL; @@ -357,7 +379,10 @@ static struct hist_entry *add_hist_entry(struct hists *hists, cmp = hist_entry__cmp(he, entry); if (!cmp) { - he_stat__add_period(&he->stat, period, weight); + if (sample_self) + he_stat__add_period(&he->stat, period, weight); + if (symbol_conf.cumulate_callchain) + he_stat__add_period(he->stat_acc, period, weight); /* * This mem info was allocated from sample__resolve_mem @@ -385,14 +410,17 @@ static struct hist_entry *add_hist_entry(struct hists *hists, p = &(*p)->rb_right; } - he = hist_entry__new(entry); + he = hist_entry__new(entry, sample_self); if (!he) return NULL; rb_link_node(&he->rb_node_in, parent, p); rb_insert_color(&he->rb_node_in, hists->entries_in); out: - he_stat__add_cpumode_period(&he->stat, al->cpumode, period); + if (sample_self) + he_stat__add_cpumode_period(&he->stat, al->cpumode, period); + if (symbol_conf.cumulate_callchain) + he_stat__add_cpumode_period(he->stat_acc, al->cpumode, period); return he; } @@ -401,7 +429,8 @@ struct hist_entry *__hists__add_entry(struct hists *hists, struct symbol *sym_parent, struct branch_info *bi, struct mem_info *mi, - u64 period, u64 weight, u64 transaction) + u64 period, u64 weight, u64 transaction, + bool sample_self) { struct hist_entry entry = { .thread = al->thread, @@ -426,7 +455,429 @@ struct hist_entry *__hists__add_entry(struct hists *hists, .transaction = transaction, }; - return add_hist_entry(hists, &entry, al); + return add_hist_entry(hists, &entry, al, sample_self); +} + +static int +iter_next_nop_entry(struct hist_entry_iter *iter __maybe_unused, + struct addr_location *al __maybe_unused) +{ + return 0; +} + +static int +iter_add_next_nop_entry(struct hist_entry_iter *iter __maybe_unused, + struct addr_location *al __maybe_unused) +{ + return 0; +} + +static int +iter_prepare_mem_entry(struct hist_entry_iter *iter, struct addr_location *al) +{ + struct perf_sample *sample = iter->sample; + struct mem_info *mi; + + mi = sample__resolve_mem(sample, al); + if (mi == NULL) + return -ENOMEM; + + iter->priv = mi; + return 0; +} + +static int +iter_add_single_mem_entry(struct hist_entry_iter *iter, struct addr_location *al) +{ + u64 cost; + struct mem_info *mi = iter->priv; + struct hist_entry *he; + + if (mi == NULL) + return -EINVAL; + + cost = iter->sample->weight; + if (!cost) + cost = 1; + + /* + * must pass period=weight in order to get the correct + * sorting from hists__collapse_resort() which is solely + * based on periods. We want sorting be done on nr_events * weight + * and this is indirectly achieved by passing period=weight here + * and the he_stat__add_period() function. + */ + he = __hists__add_entry(&iter->evsel->hists, al, iter->parent, NULL, mi, + cost, cost, 0, true); + if (!he) + return -ENOMEM; + + iter->he = he; + return 0; +} + +static int +iter_finish_mem_entry(struct hist_entry_iter *iter, + struct addr_location *al __maybe_unused) +{ + struct perf_evsel *evsel = iter->evsel; + struct hist_entry *he = iter->he; + int err = -EINVAL; + + if (he == NULL) + goto out; + + hists__inc_nr_samples(&evsel->hists, he->filtered); + + err = hist_entry__append_callchain(he, iter->sample); + +out: + /* + * We don't need to free iter->priv (mem_info) here since + * the mem info was either already freed in add_hist_entry() or + * passed to a new hist entry by hist_entry__new(). + */ + iter->priv = NULL; + + iter->he = NULL; + return err; +} + +static int +iter_prepare_branch_entry(struct hist_entry_iter *iter, struct addr_location *al) +{ + struct branch_info *bi; + struct perf_sample *sample = iter->sample; + + bi = sample__resolve_bstack(sample, al); + if (!bi) + return -ENOMEM; + + iter->curr = 0; + iter->total = sample->branch_stack->nr; + + iter->priv = bi; + return 0; +} + +static int +iter_add_single_branch_entry(struct hist_entry_iter *iter __maybe_unused, + struct addr_location *al __maybe_unused) +{ + /* to avoid calling callback function */ + iter->he = NULL; + + return 0; +} + +static int +iter_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al) +{ + struct branch_info *bi = iter->priv; + int i = iter->curr; + + if (bi == NULL) + return 0; + + if (iter->curr >= iter->total) + return 0; + + al->map = bi[i].to.map; + al->sym = bi[i].to.sym; + al->addr = bi[i].to.addr; + return 1; +} + +static int +iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al) +{ + struct branch_info *bi; + struct perf_evsel *evsel = iter->evsel; + struct hist_entry *he = NULL; + int i = iter->curr; + int err = 0; + + bi = iter->priv; + + if (iter->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym)) + goto out; + + /* + * The report shows the percentage of total branches captured + * and not events sampled. Thus we use a pseudo period of 1. + */ + he = __hists__add_entry(&evsel->hists, al, iter->parent, &bi[i], NULL, + 1, 1, 0, true); + if (he == NULL) + return -ENOMEM; + + hists__inc_nr_samples(&evsel->hists, he->filtered); + +out: + iter->he = he; + iter->curr++; + return err; +} + +static int +iter_finish_branch_entry(struct hist_entry_iter *iter, + struct addr_location *al __maybe_unused) +{ + zfree(&iter->priv); + iter->he = NULL; + + return iter->curr >= iter->total ? 0 : -1; +} + +static int +iter_prepare_normal_entry(struct hist_entry_iter *iter __maybe_unused, + struct addr_location *al __maybe_unused) +{ + return 0; +} + +static int +iter_add_single_normal_entry(struct hist_entry_iter *iter, struct addr_location *al) +{ + struct perf_evsel *evsel = iter->evsel; + struct perf_sample *sample = iter->sample; + struct hist_entry *he; + + he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL, + sample->period, sample->weight, + sample->transaction, true); + if (he == NULL) + return -ENOMEM; + + iter->he = he; + return 0; +} + +static int +iter_finish_normal_entry(struct hist_entry_iter *iter, + struct addr_location *al __maybe_unused) +{ + struct hist_entry *he = iter->he; + struct perf_evsel *evsel = iter->evsel; + struct perf_sample *sample = iter->sample; + + if (he == NULL) + return 0; + + iter->he = NULL; + + hists__inc_nr_samples(&evsel->hists, he->filtered); + + return hist_entry__append_callchain(he, sample); +} + +static int +iter_prepare_cumulative_entry(struct hist_entry_iter *iter __maybe_unused, + struct addr_location *al __maybe_unused) +{ + struct hist_entry **he_cache; + + callchain_cursor_commit(&callchain_cursor); + + /* + * This is for detecting cycles or recursions so that they're + * cumulated only one time to prevent entries more than 100% + * overhead. + */ + he_cache = malloc(sizeof(*he_cache) * (PERF_MAX_STACK_DEPTH + 1)); + if (he_cache == NULL) + return -ENOMEM; + + iter->priv = he_cache; + iter->curr = 0; + + return 0; +} + +static int +iter_add_single_cumulative_entry(struct hist_entry_iter *iter, + struct addr_location *al) +{ + struct perf_evsel *evsel = iter->evsel; + struct perf_sample *sample = iter->sample; + struct hist_entry **he_cache = iter->priv; + struct hist_entry *he; + int err = 0; + + he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL, + sample->period, sample->weight, + sample->transaction, true); + if (he == NULL) + return -ENOMEM; + + iter->he = he; + he_cache[iter->curr++] = he; + + callchain_append(he->callchain, &callchain_cursor, sample->period); + + /* + * We need to re-initialize the cursor since callchain_append() + * advanced the cursor to the end. + */ + callchain_cursor_commit(&callchain_cursor); + + hists__inc_nr_samples(&evsel->hists, he->filtered); + + return err; +} + +static int +iter_next_cumulative_entry(struct hist_entry_iter *iter, + struct addr_location *al) +{ + struct callchain_cursor_node *node; + + node = callchain_cursor_current(&callchain_cursor); + if (node == NULL) + return 0; + + return fill_callchain_info(al, node, iter->hide_unresolved); +} + +static int +iter_add_next_cumulative_entry(struct hist_entry_iter *iter, + struct addr_location *al) +{ + struct perf_evsel *evsel = iter->evsel; + struct perf_sample *sample = iter->sample; + struct hist_entry **he_cache = iter->priv; + struct hist_entry *he; + struct hist_entry he_tmp = { + .cpu = al->cpu, + .thread = al->thread, + .comm = thread__comm(al->thread), + .ip = al->addr, + .ms = { + .map = al->map, + .sym = al->sym, + }, + .parent = iter->parent, + }; + int i; + struct callchain_cursor cursor; + + callchain_cursor_snapshot(&cursor, &callchain_cursor); + + callchain_cursor_advance(&callchain_cursor); + + /* + * Check if there's duplicate entries in the callchain. + * It's possible that it has cycles or recursive calls. + */ + for (i = 0; i < iter->curr; i++) { + if (hist_entry__cmp(he_cache[i], &he_tmp) == 0) { + /* to avoid calling callback function */ + iter->he = NULL; + return 0; + } + } + + he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL, + sample->period, sample->weight, + sample->transaction, false); + if (he == NULL) + return -ENOMEM; + + iter->he = he; + he_cache[iter->curr++] = he; + + callchain_append(he->callchain, &cursor, sample->period); + return 0; +} + +static int +iter_finish_cumulative_entry(struct hist_entry_iter *iter, + struct addr_location *al __maybe_unused) +{ + zfree(&iter->priv); + iter->he = NULL; + + return 0; +} + +const struct hist_iter_ops hist_iter_mem = { + .prepare_entry = iter_prepare_mem_entry, + .add_single_entry = iter_add_single_mem_entry, + .next_entry = iter_next_nop_entry, + .add_next_entry = iter_add_next_nop_entry, + .finish_entry = iter_finish_mem_entry, +}; + +const struct hist_iter_ops hist_iter_branch = { + .prepare_entry = iter_prepare_branch_entry, + .add_single_entry = iter_add_single_branch_entry, + .next_entry = iter_next_branch_entry, + .add_next_entry = iter_add_next_branch_entry, + .finish_entry = iter_finish_branch_entry, +}; + +const struct hist_iter_ops hist_iter_normal = { + .prepare_entry = iter_prepare_normal_entry, + .add_single_entry = iter_add_single_normal_entry, + .next_entry = iter_next_nop_entry, + .add_next_entry = iter_add_next_nop_entry, + .finish_entry = iter_finish_normal_entry, +}; + +const struct hist_iter_ops hist_iter_cumulative = { + .prepare_entry = iter_prepare_cumulative_entry, + .add_single_entry = iter_add_single_cumulative_entry, + .next_entry = iter_next_cumulative_entry, + .add_next_entry = iter_add_next_cumulative_entry, + .finish_entry = iter_finish_cumulative_entry, +}; + +int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, + struct perf_evsel *evsel, struct perf_sample *sample, + int max_stack_depth, void *arg) +{ + int err, err2; + + err = sample__resolve_callchain(sample, &iter->parent, evsel, al, + max_stack_depth); + if (err) + return err; + + iter->evsel = evsel; + iter->sample = sample; + + err = iter->ops->prepare_entry(iter, al); + if (err) + goto out; + + err = iter->ops->add_single_entry(iter, al); + if (err) + goto out; + + if (iter->he && iter->add_entry_cb) { + err = iter->add_entry_cb(iter, al, true, arg); + if (err) + goto out; + } + + while (iter->ops->next_entry(iter, al)) { + err = iter->ops->add_next_entry(iter, al); + if (err) + break; + + if (iter->he && iter->add_entry_cb) { + err = iter->add_entry_cb(iter, al, false, arg); + if (err) + goto out; + } + } + +out: + err2 = iter->ops->finish_entry(iter, al); + if (!err) + err = err2; + + return err; } int64_t @@ -469,6 +920,7 @@ void hist_entry__free(struct hist_entry *he) { zfree(&he->branch_info); zfree(&he->mem_info); + zfree(&he->stat_acc); free_srcline(he->srcline); free(he); } @@ -494,6 +946,8 @@ static bool hists__collapse_insert_entry(struct hists *hists __maybe_unused, if (!cmp) { he_stat__add_stat(&iter->stat, &he->stat); + if (symbol_conf.cumulate_callchain) + he_stat__add_stat(iter->stat_acc, he->stat_acc); if (symbol_conf.use_callchain) { callchain_cursor_reset(&callchain_cursor); @@ -800,6 +1254,13 @@ void hists__inc_nr_events(struct hists *hists, u32 type) events_stats__inc(&hists->stats, type); } +void hists__inc_nr_samples(struct hists *hists, bool filtered) +{ + events_stats__inc(&hists->stats, PERF_RECORD_SAMPLE); + if (!filtered) + hists->stats.nr_non_filtered_samples++; +} + static struct hist_entry *hists__add_dummy_entry(struct hists *hists, struct hist_entry *pair) { @@ -831,7 +1292,7 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists, p = &(*p)->rb_right; } - he = hist_entry__new(pair); + he = hist_entry__new(pair, true); if (he) { memset(&he->stat, 0, sizeof(he->stat)); he->hists = hists; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index a8418d19808..d2bf03575d5 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -96,12 +96,50 @@ struct hists { u16 col_len[HISTC_NR_COLS]; }; +struct hist_entry_iter; + +struct hist_iter_ops { + int (*prepare_entry)(struct hist_entry_iter *, struct addr_location *); + int (*add_single_entry)(struct hist_entry_iter *, struct addr_location *); + int (*next_entry)(struct hist_entry_iter *, struct addr_location *); + int (*add_next_entry)(struct hist_entry_iter *, struct addr_location *); + int (*finish_entry)(struct hist_entry_iter *, struct addr_location *); +}; + +struct hist_entry_iter { + int total; + int curr; + + bool hide_unresolved; + + struct perf_evsel *evsel; + struct perf_sample *sample; + struct hist_entry *he; + struct symbol *parent; + void *priv; + + const struct hist_iter_ops *ops; + /* user-defined callback function (optional) */ + int (*add_entry_cb)(struct hist_entry_iter *iter, + struct addr_location *al, bool single, void *arg); +}; + +extern const struct hist_iter_ops hist_iter_normal; +extern const struct hist_iter_ops hist_iter_branch; +extern const struct hist_iter_ops hist_iter_mem; +extern const struct hist_iter_ops hist_iter_cumulative; + struct hist_entry *__hists__add_entry(struct hists *hists, struct addr_location *al, struct symbol *parent, struct branch_info *bi, struct mem_info *mi, u64 period, - u64 weight, u64 transaction); + u64 weight, u64 transaction, + bool sample_self); +int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, + struct perf_evsel *evsel, struct perf_sample *sample, + int max_stack_depth, void *arg); + int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); int hist_entry__transaction_len(void); @@ -119,6 +157,7 @@ u64 hists__total_period(struct hists *hists); void hists__reset_stats(struct hists *hists); void hists__inc_stats(struct hists *hists, struct hist_entry *h); void hists__inc_nr_events(struct hists *hists, u32 type); +void hists__inc_nr_samples(struct hists *hists, bool filtered); void events_stats__inc(struct events_stats *stats, u32 type); size_t events_stats__fprintf(struct events_stats *stats, FILE *fp); @@ -166,6 +205,7 @@ struct perf_hpp_fmt { struct list_head list; struct list_head sort_list; + bool elide; }; extern struct list_head perf_hpp__list; @@ -192,6 +232,7 @@ enum { PERF_HPP__OVERHEAD_US, PERF_HPP__OVERHEAD_GUEST_SYS, PERF_HPP__OVERHEAD_GUEST_US, + PERF_HPP__OVERHEAD_ACC, PERF_HPP__SAMPLES, PERF_HPP__PERIOD, @@ -200,7 +241,11 @@ enum { void perf_hpp__init(void); void perf_hpp__column_register(struct perf_hpp_fmt *format); +void perf_hpp__column_unregister(struct perf_hpp_fmt *format); void perf_hpp__column_enable(unsigned col); +void perf_hpp__column_disable(unsigned col); +void perf_hpp__cancel_cumulate(void); + void perf_hpp__register_sort_field(struct perf_hpp_fmt *format); void perf_hpp__setup_output_field(void); void perf_hpp__reset_output_field(void); @@ -208,7 +253,12 @@ void perf_hpp__append_sort_keys(void); bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format); bool perf_hpp__same_sort_entry(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b); -bool perf_hpp__should_skip(struct perf_hpp_fmt *format); + +static inline bool perf_hpp__should_skip(struct perf_hpp_fmt *format) +{ + return format->elide; +} + void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists); typedef u64 (*hpp_field_fn)(struct hist_entry *he); @@ -218,6 +268,9 @@ typedef int (*hpp_snprint_fn)(struct perf_hpp *hpp, const char *fmt, ...); int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, hpp_field_fn get_field, const char *fmt, hpp_snprint_fn print_fn, bool fmt_percent); +int __hpp__fmt_acc(struct perf_hpp *hpp, struct hist_entry *he, + hpp_field_fn get_field, const char *fmt, + hpp_snprint_fn print_fn, bool fmt_percent); static inline void advance_hpp(struct perf_hpp *hpp, int inc) { diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 901b9bece2e..45512baaab6 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1061,6 +1061,7 @@ static struct hpp_dimension hpp_sort_dimensions[] = { DIM(PERF_HPP__OVERHEAD_US, "overhead_us"), DIM(PERF_HPP__OVERHEAD_GUEST_SYS, "overhead_guest_sys"), DIM(PERF_HPP__OVERHEAD_GUEST_US, "overhead_guest_us"), + DIM(PERF_HPP__OVERHEAD_ACC, "overhead_children"), DIM(PERF_HPP__SAMPLES, "sample"), DIM(PERF_HPP__PERIOD, "period"), }; @@ -1156,6 +1157,7 @@ __sort_dimension__alloc_hpp(struct sort_dimension *sd) INIT_LIST_HEAD(&hse->hpp.list); INIT_LIST_HEAD(&hse->hpp.sort_list); + hse->hpp.elide = false; return hse; } @@ -1363,27 +1365,64 @@ static int __setup_sorting(void) return ret; } -bool perf_hpp__should_skip(struct perf_hpp_fmt *format) +void perf_hpp__set_elide(int idx, bool elide) { - if (perf_hpp__is_sort_entry(format)) { - struct hpp_sort_entry *hse; + struct perf_hpp_fmt *fmt; + struct hpp_sort_entry *hse; + + perf_hpp__for_each_format(fmt) { + if (!perf_hpp__is_sort_entry(fmt)) + continue; - hse = container_of(format, struct hpp_sort_entry, hpp); - return hse->se->elide; + hse = container_of(fmt, struct hpp_sort_entry, hpp); + if (hse->se->se_width_idx == idx) { + fmt->elide = elide; + break; + } } - return false; } -static void sort_entry__setup_elide(struct sort_entry *se, - struct strlist *list, - const char *list_name, FILE *fp) +static bool __get_elide(struct strlist *list, const char *list_name, FILE *fp) { if (list && strlist__nr_entries(list) == 1) { if (fp != NULL) fprintf(fp, "# %s: %s\n", list_name, strlist__entry(list, 0)->s); - se->elide = true; + return true; } + return false; +} + +static bool get_elide(int idx, FILE *output) +{ + switch (idx) { + case HISTC_SYMBOL: + return __get_elide(symbol_conf.sym_list, "symbol", output); + case HISTC_DSO: + return __get_elide(symbol_conf.dso_list, "dso", output); + case HISTC_COMM: + return __get_elide(symbol_conf.comm_list, "comm", output); + default: + break; + } + + if (sort__mode != SORT_MODE__BRANCH) + return false; + + switch (idx) { + case HISTC_SYMBOL_FROM: + return __get_elide(symbol_conf.sym_from_list, "sym_from", output); + case HISTC_SYMBOL_TO: + return __get_elide(symbol_conf.sym_to_list, "sym_to", output); + case HISTC_DSO_FROM: + return __get_elide(symbol_conf.dso_from_list, "dso_from", output); + case HISTC_DSO_TO: + return __get_elide(symbol_conf.dso_to_list, "dso_to", output); + default: + break; + } + + return false; } void sort__setup_elide(FILE *output) @@ -1391,39 +1430,12 @@ void sort__setup_elide(FILE *output) struct perf_hpp_fmt *fmt; struct hpp_sort_entry *hse; - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, - "dso", output); - sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, - "comm", output); - sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, - "symbol", output); - - if (sort__mode == SORT_MODE__BRANCH) { - sort_entry__setup_elide(&sort_dso_from, - symbol_conf.dso_from_list, - "dso_from", output); - sort_entry__setup_elide(&sort_dso_to, - symbol_conf.dso_to_list, - "dso_to", output); - sort_entry__setup_elide(&sort_sym_from, - symbol_conf.sym_from_list, - "sym_from", output); - sort_entry__setup_elide(&sort_sym_to, - symbol_conf.sym_to_list, - "sym_to", output); - } else if (sort__mode == SORT_MODE__MEMORY) { - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, - "symbol_daddr", output); - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, - "dso_daddr", output); - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, - "mem", output); - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, - "local_weight", output); - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, - "tlb", output); - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, - "snoop", output); + perf_hpp__for_each_format(fmt) { + if (!perf_hpp__is_sort_entry(fmt)) + continue; + + hse = container_of(fmt, struct hpp_sort_entry, hpp); + fmt->elide = get_elide(hse->se->se_width_idx, output); } /* @@ -1434,8 +1446,7 @@ void sort__setup_elide(FILE *output) if (!perf_hpp__is_sort_entry(fmt)) continue; - hse = container_of(fmt, struct hpp_sort_entry, hpp); - if (!hse->se->elide) + if (!fmt->elide) return; } @@ -1443,8 +1454,7 @@ void sort__setup_elide(FILE *output) if (!perf_hpp__is_sort_entry(fmt)) continue; - hse = container_of(fmt, struct hpp_sort_entry, hpp); - hse->se->elide = false; + fmt->elide = false; } } @@ -1581,6 +1591,9 @@ void reset_output_field(void) sort__has_sym = 0; sort__has_dso = 0; + field_order = NULL; + sort_order = NULL; + reset_dimensions(); perf_hpp__reset_output_field(); } diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 5f38d925e92..5bf0098d6b0 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -20,7 +20,7 @@ #include "parse-options.h" #include "parse-events.h" - +#include "hist.h" #include "thread.h" extern regex_t parent_regex; @@ -82,6 +82,7 @@ struct hist_entry { struct list_head head; } pairs; struct he_stat stat; + struct he_stat *stat_acc; struct map_symbol ms; struct thread *thread; struct comm *comm; @@ -130,6 +131,21 @@ static inline void hist_entry__add_pair(struct hist_entry *pair, list_add_tail(&pair->pairs.node, &he->pairs.head); } +static inline float hist_entry__get_percent_limit(struct hist_entry *he) +{ + u64 period = he->stat.period; + u64 total_period = hists__total_period(he->hists); + + if (unlikely(total_period == 0)) + return 0; + + if (symbol_conf.cumulate_callchain) + period = he->stat_acc->period; + + return period * 100.0 / total_period; +} + + enum sort_mode { SORT_MODE__NORMAL, SORT_MODE__BRANCH, @@ -186,7 +202,6 @@ struct sort_entry { int (*se_snprintf)(struct hist_entry *he, char *bf, size_t size, unsigned int width); u8 se_width_idx; - bool elide; }; extern struct sort_entry sort_thread; @@ -197,6 +212,7 @@ int setup_output_field(void); void reset_output_field(void); extern int sort_dimension__add(const char *); void sort__setup_elide(FILE *fp); +void perf_hpp__set_elide(int idx, bool elide); int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 95e24977993..7b9096f29cd 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -29,11 +29,12 @@ int vmlinux_path__nr_entries; char **vmlinux_path; struct symbol_conf symbol_conf = { - .use_modules = true, - .try_vmlinux_path = true, - .annotate_src = true, - .demangle = true, - .symfs = "", + .use_modules = true, + .try_vmlinux_path = true, + .annotate_src = true, + .demangle = true, + .cumulate_callchain = true, + .symfs = "", }; static enum dso_binary_type binary_type_symtab[] = { diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 33ede53fa6b..615c752dd76 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -109,6 +109,7 @@ struct symbol_conf { show_nr_samples, show_total_period, use_callchain, + cumulate_callchain, exclude_other, show_cpu_utilization, initialized, diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index b3dbe9ef1a4..54833a791a4 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -13,7 +13,7 @@ CFLAGS := -Wall -O2 -flto -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CUR export CC CFLAGS -TARGETS = pmu copyloops mm +TARGETS = pmu copyloops mm tm endif diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c index e80c42a584f..8ebc58a0931 100644 --- a/tools/testing/selftests/powerpc/harness.c +++ b/tools/testing/selftests/powerpc/harness.c @@ -30,12 +30,15 @@ int run_test(int (test_function)(void), char *name) pid = fork(); if (pid == 0) { + setpgid(0, 0); exit(test_function()); } else if (pid == -1) { perror("fork"); return 1; } + setpgid(pid, pid); + /* Wake us up in timeout seconds */ alarm(TIMEOUT); terminated = false; @@ -50,17 +53,20 @@ wait: if (terminated) { printf("!! force killing %s\n", name); - kill(pid, SIGKILL); + kill(-pid, SIGKILL); return 1; } else { printf("!! killing %s\n", name); - kill(pid, SIGTERM); + kill(-pid, SIGTERM); terminated = true; alarm(KILL_TIMEOUT); goto wait; } } + /* Kill anything else in the process group that is still running */ + kill(-pid, SIGTERM); + if (WIFEXITED(status)) status = WEXITSTATUS(status); else { @@ -99,7 +105,10 @@ int test_harness(int (test_function)(void), char *name) rc = run_test(test_function, name); - test_finish(name, rc); + if (rc == MAGIC_SKIP_RETURN_VALUE) + test_skip(name); + else + test_finish(name, rc); return rc; } diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile index 7216f009165..b9ff0db42c7 100644 --- a/tools/testing/selftests/powerpc/pmu/Makefile +++ b/tools/testing/selftests/powerpc/pmu/Makefile @@ -4,7 +4,7 @@ noarg: PROGS := count_instructions EXTRA_SOURCES := ../harness.c event.c -all: $(PROGS) +all: $(PROGS) sub_all $(PROGS): $(EXTRA_SOURCES) @@ -12,12 +12,30 @@ $(PROGS): $(EXTRA_SOURCES) count_instructions: loop.S count_instructions.c $(EXTRA_SOURCES) $(CC) $(CFLAGS) -m64 -o $@ $^ -run_tests: all +run_tests: all sub_run_tests @-for PROG in $(PROGS); do \ ./$$PROG; \ done; -clean: +clean: sub_clean rm -f $(PROGS) loop.o -.PHONY: all run_tests clean + +SUB_TARGETS = ebb + +sub_all: + @for TARGET in $(SUB_TARGETS); do \ + $(MAKE) -C $$TARGET all; \ + done; + +sub_run_tests: all + @for TARGET in $(SUB_TARGETS); do \ + $(MAKE) -C $$TARGET run_tests; \ + done; + +sub_clean: + @for TARGET in $(SUB_TARGETS); do \ + $(MAKE) -C $$TARGET clean; \ + done; + +.PHONY: all run_tests clean sub_all sub_run_tests sub_clean diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile new file mode 100644 index 00000000000..edbba2affc2 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile @@ -0,0 +1,32 @@ +noarg: + $(MAKE) -C ../../ + +# The EBB handler is 64-bit code and everything links against it +CFLAGS += -m64 + +PROGS := reg_access_test event_attributes_test cycles_test \ + cycles_with_freeze_test pmc56_overflow_test \ + ebb_vs_cpu_event_test cpu_event_vs_ebb_test \ + cpu_event_pinned_vs_ebb_test task_event_vs_ebb_test \ + task_event_pinned_vs_ebb_test multi_ebb_procs_test \ + multi_counter_test pmae_handling_test \ + close_clears_pmcc_test instruction_count_test \ + fork_cleanup_test ebb_on_child_test \ + ebb_on_willing_child_test back_to_back_ebbs_test \ + lost_exception_test no_handler_test + +all: $(PROGS) + +$(PROGS): ../../harness.c ../event.c ../lib.c ebb.c ebb_handler.S trace.c + +instruction_count_test: ../loop.S + +lost_exception_test: ../lib.c + +run_tests: all + @-for PROG in $(PROGS); do \ + ./$$PROG; \ + done; + +clean: + rm -f $(PROGS) diff --git a/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c new file mode 100644 index 00000000000..66ea765c0e7 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c @@ -0,0 +1,106 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> + +#include "ebb.h" + + +#define NUMBER_OF_EBBS 50 + +/* + * Test that if we overflow the counter while in the EBB handler, we take + * another EBB on exiting from the handler. + * + * We do this by counting with a stupidly low sample period, causing us to + * overflow the PMU while we're still in the EBB handler, leading to another + * EBB. + * + * We get out of what would otherwise be an infinite loop by leaving the + * counter frozen once we've taken enough EBBs. + */ + +static void ebb_callee(void) +{ + uint64_t siar, val; + + val = mfspr(SPRN_BESCR); + if (!(val & BESCR_PMEO)) { + ebb_state.stats.spurious++; + goto out; + } + + ebb_state.stats.ebb_count++; + trace_log_counter(ebb_state.trace, ebb_state.stats.ebb_count); + + /* Resets the PMC */ + count_pmc(1, sample_period); + +out: + if (ebb_state.stats.ebb_count == NUMBER_OF_EBBS) + /* Reset but leave counters frozen */ + reset_ebb_with_clear_mask(MMCR0_PMAO); + else + /* Unfreezes */ + reset_ebb(); + + /* Do some stuff to chew some cycles and pop the counter */ + siar = mfspr(SPRN_SIAR); + trace_log_reg(ebb_state.trace, SPRN_SIAR, siar); + + val = mfspr(SPRN_PMC1); + trace_log_reg(ebb_state.trace, SPRN_PMC1, val); + + val = mfspr(SPRN_MMCR0); + trace_log_reg(ebb_state.trace, SPRN_MMCR0, val); +} + +int back_to_back_ebbs(void) +{ + struct event event; + + event_init_named(&event, 0x1001e, "cycles"); + event_leader_ebb_init(&event); + + event.attr.exclude_kernel = 1; + event.attr.exclude_hv = 1; + event.attr.exclude_idle = 1; + + FAIL_IF(event_open(&event)); + + setup_ebb_handler(ebb_callee); + + FAIL_IF(ebb_event_enable(&event)); + + sample_period = 5; + + ebb_freeze_pmcs(); + mtspr(SPRN_PMC1, pmc_sample_period(sample_period)); + ebb_global_enable(); + ebb_unfreeze_pmcs(); + + while (ebb_state.stats.ebb_count < NUMBER_OF_EBBS) + FAIL_IF(core_busy_loop()); + + ebb_global_disable(); + ebb_freeze_pmcs(); + + count_pmc(1, sample_period); + + dump_ebb_state(); + + event_close(&event); + + FAIL_IF(ebb_state.stats.ebb_count != NUMBER_OF_EBBS); + + return 0; +} + +int main(void) +{ + return test_harness(back_to_back_ebbs, "back_to_back_ebbs"); +} diff --git a/tools/testing/selftests/powerpc/pmu/ebb/close_clears_pmcc_test.c b/tools/testing/selftests/powerpc/pmu/ebb/close_clears_pmcc_test.c new file mode 100644 index 00000000000..0f0423dba18 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/close_clears_pmcc_test.c @@ -0,0 +1,59 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <setjmp.h> +#include <signal.h> + +#include "ebb.h" + + +/* + * Test that closing the EBB event clears MMCR0_PMCC, preventing further access + * by userspace to the PMU hardware. + */ + +int close_clears_pmcc(void) +{ + struct event event; + + event_init_named(&event, 0x1001e, "cycles"); + event_leader_ebb_init(&event); + + FAIL_IF(event_open(&event)); + + ebb_enable_pmc_counting(1); + setup_ebb_handler(standard_ebb_callee); + ebb_global_enable(); + FAIL_IF(ebb_event_enable(&event)); + + mtspr(SPRN_PMC1, pmc_sample_period(sample_period)); + + while (ebb_state.stats.ebb_count < 1) + FAIL_IF(core_busy_loop()); + + ebb_global_disable(); + event_close(&event); + + FAIL_IF(ebb_state.stats.ebb_count == 0); + + /* The real test is here, do we take a SIGILL when writing PMU regs now + * that we have closed the event. We expect that we will. */ + + FAIL_IF(catch_sigill(write_pmc1)); + + /* We should still be able to read EBB regs though */ + mfspr(SPRN_EBBHR); + mfspr(SPRN_EBBRR); + mfspr(SPRN_BESCR); + + return 0; +} + +int main(void) +{ + return test_harness(close_clears_pmcc, "close_clears_pmcc"); +} diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c new file mode 100644 index 00000000000..d3ed64d5d6c --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c @@ -0,0 +1,93 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "ebb.h" + + +/* + * Tests a pinned cpu event vs an EBB - in that order. The pinned cpu event + * should remain and the EBB event should fail to enable. + */ + +static int setup_cpu_event(struct event *event, int cpu) +{ + event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL"); + + event->attr.pinned = 1; + + event->attr.exclude_kernel = 1; + event->attr.exclude_hv = 1; + event->attr.exclude_idle = 1; + + SKIP_IF(require_paranoia_below(1)); + FAIL_IF(event_open_with_cpu(event, cpu)); + FAIL_IF(event_enable(event)); + + return 0; +} + +int cpu_event_pinned_vs_ebb(void) +{ + union pipe read_pipe, write_pipe; + struct event event; + int cpu, rc; + pid_t pid; + + cpu = pick_online_cpu(); + FAIL_IF(cpu < 0); + FAIL_IF(bind_to_cpu(cpu)); + + FAIL_IF(pipe(read_pipe.fds) == -1); + FAIL_IF(pipe(write_pipe.fds) == -1); + + pid = fork(); + if (pid == 0) { + /* NB order of pipes looks reversed */ + exit(ebb_child(write_pipe, read_pipe)); + } + + /* We setup the cpu event first */ + rc = setup_cpu_event(&event, cpu); + if (rc) { + kill_child_and_wait(pid); + return rc; + } + + /* Signal the child to install its EBB event and wait */ + if (sync_with_child(read_pipe, write_pipe)) + /* If it fails, wait for it to exit */ + goto wait; + + /* Signal the child to run */ + FAIL_IF(sync_with_child(read_pipe, write_pipe)); + +wait: + /* We expect it to fail to read the event */ + FAIL_IF(wait_for_child(pid) != 2); + + FAIL_IF(event_disable(&event)); + FAIL_IF(event_read(&event)); + + event_report(&event); + + /* The cpu event should have run */ + FAIL_IF(event.result.value == 0); + FAIL_IF(event.result.enabled != event.result.running); + + return 0; +} + +int main(void) +{ + return test_harness(cpu_event_pinned_vs_ebb, "cpu_event_pinned_vs_ebb"); +} diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c new file mode 100644 index 00000000000..8b972c2aa39 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c @@ -0,0 +1,89 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "ebb.h" + + +/* + * Tests a cpu event vs an EBB - in that order. The EBB should force the cpu + * event off the PMU. + */ + +static int setup_cpu_event(struct event *event, int cpu) +{ + event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL"); + + event->attr.exclude_kernel = 1; + event->attr.exclude_hv = 1; + event->attr.exclude_idle = 1; + + SKIP_IF(require_paranoia_below(1)); + FAIL_IF(event_open_with_cpu(event, cpu)); + FAIL_IF(event_enable(event)); + + return 0; +} + +int cpu_event_vs_ebb(void) +{ + union pipe read_pipe, write_pipe; + struct event event; + int cpu, rc; + pid_t pid; + + cpu = pick_online_cpu(); + FAIL_IF(cpu < 0); + FAIL_IF(bind_to_cpu(cpu)); + + FAIL_IF(pipe(read_pipe.fds) == -1); + FAIL_IF(pipe(write_pipe.fds) == -1); + + pid = fork(); + if (pid == 0) { + /* NB order of pipes looks reversed */ + exit(ebb_child(write_pipe, read_pipe)); + } + + /* We setup the cpu event first */ + rc = setup_cpu_event(&event, cpu); + if (rc) { + kill_child_and_wait(pid); + return rc; + } + + /* Signal the child to install its EBB event and wait */ + if (sync_with_child(read_pipe, write_pipe)) + /* If it fails, wait for it to exit */ + goto wait; + + /* Signal the child to run */ + FAIL_IF(sync_with_child(read_pipe, write_pipe)); + +wait: + /* We expect the child to succeed */ + FAIL_IF(wait_for_child(pid)); + + FAIL_IF(event_disable(&event)); + FAIL_IF(event_read(&event)); + + event_report(&event); + + /* The cpu event may have run */ + + return 0; +} + +int main(void) +{ + return test_harness(cpu_event_vs_ebb, "cpu_event_vs_ebb"); +} diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c new file mode 100644 index 00000000000..8590fc1bfc0 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c @@ -0,0 +1,58 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "ebb.h" + + +/* + * Basic test that counts user cycles and takes EBBs. + */ +int cycles(void) +{ + struct event event; + + event_init_named(&event, 0x1001e, "cycles"); + event_leader_ebb_init(&event); + + event.attr.exclude_kernel = 1; + event.attr.exclude_hv = 1; + event.attr.exclude_idle = 1; + + FAIL_IF(event_open(&event)); + + ebb_enable_pmc_counting(1); + setup_ebb_handler(standard_ebb_callee); + ebb_global_enable(); + FAIL_IF(ebb_event_enable(&event)); + + mtspr(SPRN_PMC1, pmc_sample_period(sample_period)); + + while (ebb_state.stats.ebb_count < 10) { + FAIL_IF(core_busy_loop()); + FAIL_IF(ebb_check_mmcr0()); + } + + ebb_global_disable(); + ebb_freeze_pmcs(); + + count_pmc(1, sample_period); + + dump_ebb_state(); + + event_close(&event); + + FAIL_IF(ebb_state.stats.ebb_count == 0); + FAIL_IF(!ebb_check_count(1, sample_period, 100)); + + return 0; +} + +int main(void) +{ + return test_harness(cycles, "cycles"); +} diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c new file mode 100644 index 00000000000..754b3f2008d --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c @@ -0,0 +1,117 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> + +#include "ebb.h" + + +/* + * Test of counting cycles while using MMCR0_FC (freeze counters) to only count + * parts of the code. This is complicated by the fact that FC is set by the + * hardware when the event overflows. We may take the EBB after we have set FC, + * so we have to be careful about whether we clear FC at the end of the EBB + * handler or not. + */ + +static bool counters_frozen = false; +static int ebbs_while_frozen = 0; + +static void ebb_callee(void) +{ + uint64_t mask, val; + + mask = MMCR0_PMAO | MMCR0_FC; + + val = mfspr(SPRN_BESCR); + if (!(val & BESCR_PMEO)) { + ebb_state.stats.spurious++; + goto out; + } + + ebb_state.stats.ebb_count++; + trace_log_counter(ebb_state.trace, ebb_state.stats.ebb_count); + + val = mfspr(SPRN_MMCR0); + trace_log_reg(ebb_state.trace, SPRN_MMCR0, val); + + if (counters_frozen) { + trace_log_string(ebb_state.trace, "frozen"); + ebbs_while_frozen++; + mask &= ~MMCR0_FC; + } + + count_pmc(1, sample_period); +out: + reset_ebb_with_clear_mask(mask); +} + +int cycles_with_freeze(void) +{ + struct event event; + uint64_t val; + bool fc_cleared; + + event_init_named(&event, 0x1001e, "cycles"); + event_leader_ebb_init(&event); + + event.attr.exclude_kernel = 1; + event.attr.exclude_hv = 1; + event.attr.exclude_idle = 1; + + FAIL_IF(event_open(&event)); + + setup_ebb_handler(ebb_callee); + ebb_global_enable(); + FAIL_IF(ebb_event_enable(&event)); + + mtspr(SPRN_PMC1, pmc_sample_period(sample_period)); + + fc_cleared = false; + + /* Make sure we loop until we take at least one EBB */ + while ((ebb_state.stats.ebb_count < 20 && !fc_cleared) || + ebb_state.stats.ebb_count < 1) + { + counters_frozen = false; + mb(); + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC); + + FAIL_IF(core_busy_loop()); + + counters_frozen = true; + mb(); + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC); + + val = mfspr(SPRN_MMCR0); + if (! (val & MMCR0_FC)) { + printf("Outside of loop, FC NOT set MMCR0 0x%lx\n", val); + fc_cleared = true; + } + } + + ebb_global_disable(); + ebb_freeze_pmcs(); + + count_pmc(1, sample_period); + + dump_ebb_state(); + + printf("EBBs while frozen %d\n", ebbs_while_frozen); + + event_close(&event); + + FAIL_IF(ebb_state.stats.ebb_count == 0); + FAIL_IF(fc_cleared); + + return 0; +} + +int main(void) +{ + return test_harness(cycles_with_freeze, "cycles_with_freeze"); +} diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c new file mode 100644 index 00000000000..1b46be94b64 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c @@ -0,0 +1,727 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#define _GNU_SOURCE /* For CPU_ZERO etc. */ + +#include <sched.h> +#include <sys/wait.h> +#include <setjmp.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "trace.h" +#include "reg.h" +#include "ebb.h" + + +void (*ebb_user_func)(void); + +void ebb_hook(void) +{ + if (ebb_user_func) + ebb_user_func(); +} + +struct ebb_state ebb_state; + +u64 sample_period = 0x40000000ull; + +void reset_ebb_with_clear_mask(unsigned long mmcr0_clear_mask) +{ + u64 val; + + /* 2) clear MMCR0[PMAO] - docs say BESCR[PMEO] should do this */ + /* 3) set MMCR0[PMAE] - docs say BESCR[PME] should do this */ + val = mfspr(SPRN_MMCR0); + mtspr(SPRN_MMCR0, (val & ~mmcr0_clear_mask) | MMCR0_PMAE); + + /* 4) clear BESCR[PMEO] */ + mtspr(SPRN_BESCRR, BESCR_PMEO); + + /* 5) set BESCR[PME] */ + mtspr(SPRN_BESCRS, BESCR_PME); + + /* 6) rfebb 1 - done in our caller */ +} + +void reset_ebb(void) +{ + reset_ebb_with_clear_mask(MMCR0_PMAO | MMCR0_FC); +} + +/* Called outside of the EBB handler to check MMCR0 is sane */ +int ebb_check_mmcr0(void) +{ + u64 val; + + val = mfspr(SPRN_MMCR0); + if ((val & (MMCR0_FC | MMCR0_PMAO)) == MMCR0_FC) { + /* It's OK if we see FC & PMAO, but not FC by itself */ + printf("Outside of loop, only FC set 0x%llx\n", val); + return 1; + } + + return 0; +} + +bool ebb_check_count(int pmc, u64 sample_period, int fudge) +{ + u64 count, upper, lower; + + count = ebb_state.stats.pmc_count[PMC_INDEX(pmc)]; + + lower = ebb_state.stats.ebb_count * (sample_period - fudge); + + if (count < lower) { + printf("PMC%d count (0x%llx) below lower limit 0x%llx (-0x%llx)\n", + pmc, count, lower, lower - count); + return false; + } + + upper = ebb_state.stats.ebb_count * (sample_period + fudge); + + if (count > upper) { + printf("PMC%d count (0x%llx) above upper limit 0x%llx (+0x%llx)\n", + pmc, count, upper, count - upper); + return false; + } + + printf("PMC%d count (0x%llx) is between 0x%llx and 0x%llx delta +0x%llx/-0x%llx\n", + pmc, count, lower, upper, count - lower, upper - count); + + return true; +} + +void standard_ebb_callee(void) +{ + int found, i; + u64 val; + + val = mfspr(SPRN_BESCR); + if (!(val & BESCR_PMEO)) { + ebb_state.stats.spurious++; + goto out; + } + + ebb_state.stats.ebb_count++; + trace_log_counter(ebb_state.trace, ebb_state.stats.ebb_count); + + val = mfspr(SPRN_MMCR0); + trace_log_reg(ebb_state.trace, SPRN_MMCR0, val); + + found = 0; + for (i = 1; i <= 6; i++) { + if (ebb_state.pmc_enable[PMC_INDEX(i)]) + found += count_pmc(i, sample_period); + } + + if (!found) + ebb_state.stats.no_overflow++; + +out: + reset_ebb(); +} + +extern void ebb_handler(void); + +void setup_ebb_handler(void (*callee)(void)) +{ + u64 entry; + +#if defined(_CALL_ELF) && _CALL_ELF == 2 + entry = (u64)ebb_handler; +#else + struct opd + { + u64 entry; + u64 toc; + } *opd; + + opd = (struct opd *)ebb_handler; + entry = opd->entry; +#endif + printf("EBB Handler is at %#llx\n", entry); + + ebb_user_func = callee; + + /* Ensure ebb_user_func is set before we set the handler */ + mb(); + mtspr(SPRN_EBBHR, entry); + + /* Make sure the handler is set before we return */ + mb(); +} + +void clear_ebb_stats(void) +{ + memset(&ebb_state.stats, 0, sizeof(ebb_state.stats)); +} + +void dump_summary_ebb_state(void) +{ + printf("ebb_state:\n" \ + " ebb_count = %d\n" \ + " spurious = %d\n" \ + " negative = %d\n" \ + " no_overflow = %d\n" \ + " pmc[1] count = 0x%llx\n" \ + " pmc[2] count = 0x%llx\n" \ + " pmc[3] count = 0x%llx\n" \ + " pmc[4] count = 0x%llx\n" \ + " pmc[5] count = 0x%llx\n" \ + " pmc[6] count = 0x%llx\n", + ebb_state.stats.ebb_count, ebb_state.stats.spurious, + ebb_state.stats.negative, ebb_state.stats.no_overflow, + ebb_state.stats.pmc_count[0], ebb_state.stats.pmc_count[1], + ebb_state.stats.pmc_count[2], ebb_state.stats.pmc_count[3], + ebb_state.stats.pmc_count[4], ebb_state.stats.pmc_count[5]); +} + +static char *decode_mmcr0(u32 value) +{ + static char buf[16]; + + buf[0] = '\0'; + + if (value & (1 << 31)) + strcat(buf, "FC "); + if (value & (1 << 26)) + strcat(buf, "PMAE "); + if (value & (1 << 7)) + strcat(buf, "PMAO "); + + return buf; +} + +static char *decode_bescr(u64 value) +{ + static char buf[16]; + + buf[0] = '\0'; + + if (value & (1ull << 63)) + strcat(buf, "GE "); + if (value & (1ull << 32)) + strcat(buf, "PMAE "); + if (value & 1) + strcat(buf, "PMAO "); + + return buf; +} + +void dump_ebb_hw_state(void) +{ + u64 bescr; + u32 mmcr0; + + mmcr0 = mfspr(SPRN_MMCR0); + bescr = mfspr(SPRN_BESCR); + + printf("HW state:\n" \ + "MMCR0 0x%016x %s\n" \ + "EBBHR 0x%016lx\n" \ + "BESCR 0x%016llx %s\n" \ + "PMC1 0x%016lx\n" \ + "PMC2 0x%016lx\n" \ + "PMC3 0x%016lx\n" \ + "PMC4 0x%016lx\n" \ + "PMC5 0x%016lx\n" \ + "PMC6 0x%016lx\n" \ + "SIAR 0x%016lx\n", + mmcr0, decode_mmcr0(mmcr0), mfspr(SPRN_EBBHR), bescr, + decode_bescr(bescr), mfspr(SPRN_PMC1), mfspr(SPRN_PMC2), + mfspr(SPRN_PMC3), mfspr(SPRN_PMC4), mfspr(SPRN_PMC5), + mfspr(SPRN_PMC6), mfspr(SPRN_SIAR)); +} + +void dump_ebb_state(void) +{ + dump_summary_ebb_state(); + + dump_ebb_hw_state(); + + trace_buffer_print(ebb_state.trace); +} + +int count_pmc(int pmc, uint32_t sample_period) +{ + uint32_t start_value; + u64 val; + + /* 0) Read PMC */ + start_value = pmc_sample_period(sample_period); + + val = read_pmc(pmc); + if (val < start_value) + ebb_state.stats.negative++; + else + ebb_state.stats.pmc_count[PMC_INDEX(pmc)] += val - start_value; + + trace_log_reg(ebb_state.trace, SPRN_PMC1 + pmc - 1, val); + + /* 1) Reset PMC */ + write_pmc(pmc, start_value); + + /* Report if we overflowed */ + return val >= COUNTER_OVERFLOW; +} + +int ebb_event_enable(struct event *e) +{ + int rc; + + /* Ensure any SPR writes are ordered vs us */ + mb(); + + rc = ioctl(e->fd, PERF_EVENT_IOC_ENABLE); + if (rc) + return rc; + + rc = event_read(e); + + /* Ditto */ + mb(); + + return rc; +} + +void ebb_freeze_pmcs(void) +{ + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC); + mb(); +} + +void ebb_unfreeze_pmcs(void) +{ + /* Unfreeze counters */ + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC); + mb(); +} + +void ebb_global_enable(void) +{ + /* Enable EBBs globally and PMU EBBs */ + mtspr(SPRN_BESCR, 0x8000000100000000ull); + mb(); +} + +void ebb_global_disable(void) +{ + /* Disable EBBs & freeze counters, events are still scheduled */ + mtspr(SPRN_BESCRR, BESCR_PME); + mb(); +} + +void event_ebb_init(struct event *e) +{ + e->attr.config |= (1ull << 63); +} + +void event_bhrb_init(struct event *e, unsigned ifm) +{ + e->attr.config |= (1ull << 62) | ((u64)ifm << 60); +} + +void event_leader_ebb_init(struct event *e) +{ + event_ebb_init(e); + + e->attr.exclusive = 1; + e->attr.pinned = 1; +} + +int core_busy_loop(void) +{ + int rc; + + asm volatile ( + "li 3, 0x3030\n" + "std 3, -96(1)\n" + "li 4, 0x4040\n" + "std 4, -104(1)\n" + "li 5, 0x5050\n" + "std 5, -112(1)\n" + "li 6, 0x6060\n" + "std 6, -120(1)\n" + "li 7, 0x7070\n" + "std 7, -128(1)\n" + "li 8, 0x0808\n" + "std 8, -136(1)\n" + "li 9, 0x0909\n" + "std 9, -144(1)\n" + "li 10, 0x1010\n" + "std 10, -152(1)\n" + "li 11, 0x1111\n" + "std 11, -160(1)\n" + "li 14, 0x1414\n" + "std 14, -168(1)\n" + "li 15, 0x1515\n" + "std 15, -176(1)\n" + "li 16, 0x1616\n" + "std 16, -184(1)\n" + "li 17, 0x1717\n" + "std 17, -192(1)\n" + "li 18, 0x1818\n" + "std 18, -200(1)\n" + "li 19, 0x1919\n" + "std 19, -208(1)\n" + "li 20, 0x2020\n" + "std 20, -216(1)\n" + "li 21, 0x2121\n" + "std 21, -224(1)\n" + "li 22, 0x2222\n" + "std 22, -232(1)\n" + "li 23, 0x2323\n" + "std 23, -240(1)\n" + "li 24, 0x2424\n" + "std 24, -248(1)\n" + "li 25, 0x2525\n" + "std 25, -256(1)\n" + "li 26, 0x2626\n" + "std 26, -264(1)\n" + "li 27, 0x2727\n" + "std 27, -272(1)\n" + "li 28, 0x2828\n" + "std 28, -280(1)\n" + "li 29, 0x2929\n" + "std 29, -288(1)\n" + "li 30, 0x3030\n" + "li 31, 0x3131\n" + + "li 3, 0\n" + "0: " + "addi 3, 3, 1\n" + "cmpwi 3, 100\n" + "blt 0b\n" + + /* Return 1 (fail) unless we get through all the checks */ + "li 0, 1\n" + + /* Check none of our registers have been corrupted */ + "cmpwi 4, 0x4040\n" + "bne 1f\n" + "cmpwi 5, 0x5050\n" + "bne 1f\n" + "cmpwi 6, 0x6060\n" + "bne 1f\n" + "cmpwi 7, 0x7070\n" + "bne 1f\n" + "cmpwi 8, 0x0808\n" + "bne 1f\n" + "cmpwi 9, 0x0909\n" + "bne 1f\n" + "cmpwi 10, 0x1010\n" + "bne 1f\n" + "cmpwi 11, 0x1111\n" + "bne 1f\n" + "cmpwi 14, 0x1414\n" + "bne 1f\n" + "cmpwi 15, 0x1515\n" + "bne 1f\n" + "cmpwi 16, 0x1616\n" + "bne 1f\n" + "cmpwi 17, 0x1717\n" + "bne 1f\n" + "cmpwi 18, 0x1818\n" + "bne 1f\n" + "cmpwi 19, 0x1919\n" + "bne 1f\n" + "cmpwi 20, 0x2020\n" + "bne 1f\n" + "cmpwi 21, 0x2121\n" + "bne 1f\n" + "cmpwi 22, 0x2222\n" + "bne 1f\n" + "cmpwi 23, 0x2323\n" + "bne 1f\n" + "cmpwi 24, 0x2424\n" + "bne 1f\n" + "cmpwi 25, 0x2525\n" + "bne 1f\n" + "cmpwi 26, 0x2626\n" + "bne 1f\n" + "cmpwi 27, 0x2727\n" + "bne 1f\n" + "cmpwi 28, 0x2828\n" + "bne 1f\n" + "cmpwi 29, 0x2929\n" + "bne 1f\n" + "cmpwi 30, 0x3030\n" + "bne 1f\n" + "cmpwi 31, 0x3131\n" + "bne 1f\n" + + /* Load junk into all our registers before we reload them from the stack. */ + "li 3, 0xde\n" + "li 4, 0xad\n" + "li 5, 0xbe\n" + "li 6, 0xef\n" + "li 7, 0xde\n" + "li 8, 0xad\n" + "li 9, 0xbe\n" + "li 10, 0xef\n" + "li 11, 0xde\n" + "li 14, 0xad\n" + "li 15, 0xbe\n" + "li 16, 0xef\n" + "li 17, 0xde\n" + "li 18, 0xad\n" + "li 19, 0xbe\n" + "li 20, 0xef\n" + "li 21, 0xde\n" + "li 22, 0xad\n" + "li 23, 0xbe\n" + "li 24, 0xef\n" + "li 25, 0xde\n" + "li 26, 0xad\n" + "li 27, 0xbe\n" + "li 28, 0xef\n" + "li 29, 0xdd\n" + + "ld 3, -96(1)\n" + "cmpwi 3, 0x3030\n" + "bne 1f\n" + "ld 4, -104(1)\n" + "cmpwi 4, 0x4040\n" + "bne 1f\n" + "ld 5, -112(1)\n" + "cmpwi 5, 0x5050\n" + "bne 1f\n" + "ld 6, -120(1)\n" + "cmpwi 6, 0x6060\n" + "bne 1f\n" + "ld 7, -128(1)\n" + "cmpwi 7, 0x7070\n" + "bne 1f\n" + "ld 8, -136(1)\n" + "cmpwi 8, 0x0808\n" + "bne 1f\n" + "ld 9, -144(1)\n" + "cmpwi 9, 0x0909\n" + "bne 1f\n" + "ld 10, -152(1)\n" + "cmpwi 10, 0x1010\n" + "bne 1f\n" + "ld 11, -160(1)\n" + "cmpwi 11, 0x1111\n" + "bne 1f\n" + "ld 14, -168(1)\n" + "cmpwi 14, 0x1414\n" + "bne 1f\n" + "ld 15, -176(1)\n" + "cmpwi 15, 0x1515\n" + "bne 1f\n" + "ld 16, -184(1)\n" + "cmpwi 16, 0x1616\n" + "bne 1f\n" + "ld 17, -192(1)\n" + "cmpwi 17, 0x1717\n" + "bne 1f\n" + "ld 18, -200(1)\n" + "cmpwi 18, 0x1818\n" + "bne 1f\n" + "ld 19, -208(1)\n" + "cmpwi 19, 0x1919\n" + "bne 1f\n" + "ld 20, -216(1)\n" + "cmpwi 20, 0x2020\n" + "bne 1f\n" + "ld 21, -224(1)\n" + "cmpwi 21, 0x2121\n" + "bne 1f\n" + "ld 22, -232(1)\n" + "cmpwi 22, 0x2222\n" + "bne 1f\n" + "ld 23, -240(1)\n" + "cmpwi 23, 0x2323\n" + "bne 1f\n" + "ld 24, -248(1)\n" + "cmpwi 24, 0x2424\n" + "bne 1f\n" + "ld 25, -256(1)\n" + "cmpwi 25, 0x2525\n" + "bne 1f\n" + "ld 26, -264(1)\n" + "cmpwi 26, 0x2626\n" + "bne 1f\n" + "ld 27, -272(1)\n" + "cmpwi 27, 0x2727\n" + "bne 1f\n" + "ld 28, -280(1)\n" + "cmpwi 28, 0x2828\n" + "bne 1f\n" + "ld 29, -288(1)\n" + "cmpwi 29, 0x2929\n" + "bne 1f\n" + + /* Load 0 (success) to return */ + "li 0, 0\n" + + "1: mr %0, 0\n" + + : "=r" (rc) + : /* no inputs */ + : "3", "4", "5", "6", "7", "8", "9", "10", "11", "14", + "15", "16", "17", "18", "19", "20", "21", "22", "23", + "24", "25", "26", "27", "28", "29", "30", "31", + "memory" + ); + + return rc; +} + +int core_busy_loop_with_freeze(void) +{ + int rc; + + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC); + rc = core_busy_loop(); + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC); + + return rc; +} + +int ebb_child(union pipe read_pipe, union pipe write_pipe) +{ + struct event event; + uint64_t val; + + FAIL_IF(wait_for_parent(read_pipe)); + + event_init_named(&event, 0x1001e, "cycles"); + event_leader_ebb_init(&event); + + event.attr.exclude_kernel = 1; + event.attr.exclude_hv = 1; + event.attr.exclude_idle = 1; + + FAIL_IF(event_open(&event)); + + ebb_enable_pmc_counting(1); + setup_ebb_handler(standard_ebb_callee); + ebb_global_enable(); + + FAIL_IF(event_enable(&event)); + + if (event_read(&event)) { + /* + * Some tests expect to fail here, so don't report an error on + * this line, and return a distinguisable error code. Tell the + * parent an error happened. + */ + notify_parent_of_error(write_pipe); + return 2; + } + + mtspr(SPRN_PMC1, pmc_sample_period(sample_period)); + + FAIL_IF(notify_parent(write_pipe)); + FAIL_IF(wait_for_parent(read_pipe)); + FAIL_IF(notify_parent(write_pipe)); + + while (ebb_state.stats.ebb_count < 20) { + FAIL_IF(core_busy_loop()); + + /* To try and hit SIGILL case */ + val = mfspr(SPRN_MMCRA); + val |= mfspr(SPRN_MMCR2); + val |= mfspr(SPRN_MMCR0); + } + + ebb_global_disable(); + ebb_freeze_pmcs(); + + count_pmc(1, sample_period); + + dump_ebb_state(); + + event_close(&event); + + FAIL_IF(ebb_state.stats.ebb_count == 0); + + return 0; +} + +static jmp_buf setjmp_env; + +static void sigill_handler(int signal) +{ + printf("Took sigill\n"); + longjmp(setjmp_env, 1); +} + +static struct sigaction sigill_action = { + .sa_handler = sigill_handler, +}; + +int catch_sigill(void (*func)(void)) +{ + if (sigaction(SIGILL, &sigill_action, NULL)) { + perror("sigaction"); + return 1; + } + + if (setjmp(setjmp_env) == 0) { + func(); + return 1; + } + + return 0; +} + +void write_pmc1(void) +{ + mtspr(SPRN_PMC1, 0); +} + +void write_pmc(int pmc, u64 value) +{ + switch (pmc) { + case 1: mtspr(SPRN_PMC1, value); break; + case 2: mtspr(SPRN_PMC2, value); break; + case 3: mtspr(SPRN_PMC3, value); break; + case 4: mtspr(SPRN_PMC4, value); break; + case 5: mtspr(SPRN_PMC5, value); break; + case 6: mtspr(SPRN_PMC6, value); break; + } +} + +u64 read_pmc(int pmc) +{ + switch (pmc) { + case 1: return mfspr(SPRN_PMC1); + case 2: return mfspr(SPRN_PMC2); + case 3: return mfspr(SPRN_PMC3); + case 4: return mfspr(SPRN_PMC4); + case 5: return mfspr(SPRN_PMC5); + case 6: return mfspr(SPRN_PMC6); + } + + return 0; +} + +static void term_handler(int signal) +{ + dump_summary_ebb_state(); + dump_ebb_hw_state(); + abort(); +} + +struct sigaction term_action = { + .sa_handler = term_handler, +}; + +static void __attribute__((constructor)) ebb_init(void) +{ + clear_ebb_stats(); + + if (sigaction(SIGTERM, &term_action, NULL)) + perror("sigaction"); + + ebb_state.trace = trace_buffer_allocate(1 * 1024 * 1024); +} diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.h b/tools/testing/selftests/powerpc/pmu/ebb/ebb.h new file mode 100644 index 00000000000..e62bde05bf7 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.h @@ -0,0 +1,78 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#ifndef _SELFTESTS_POWERPC_PMU_EBB_EBB_H +#define _SELFTESTS_POWERPC_PMU_EBB_EBB_H + +#include "../event.h" +#include "../lib.h" +#include "trace.h" +#include "reg.h" + +#define PMC_INDEX(pmc) ((pmc)-1) + +#define NUM_PMC_VALUES 128 + +struct ebb_state +{ + struct { + u64 pmc_count[6]; + volatile int ebb_count; + int spurious; + int negative; + int no_overflow; + } stats; + + bool pmc_enable[6]; + struct trace_buffer *trace; +}; + +extern struct ebb_state ebb_state; + +#define COUNTER_OVERFLOW 0x80000000ull + +static inline uint32_t pmc_sample_period(uint32_t value) +{ + return COUNTER_OVERFLOW - value; +} + +static inline void ebb_enable_pmc_counting(int pmc) +{ + ebb_state.pmc_enable[PMC_INDEX(pmc)] = true; +} + +bool ebb_check_count(int pmc, u64 sample_period, int fudge); +void event_leader_ebb_init(struct event *e); +void event_ebb_init(struct event *e); +void event_bhrb_init(struct event *e, unsigned ifm); +void setup_ebb_handler(void (*callee)(void)); +void standard_ebb_callee(void); +int ebb_event_enable(struct event *e); +void ebb_global_enable(void); +void ebb_global_disable(void); +void ebb_freeze_pmcs(void); +void ebb_unfreeze_pmcs(void); +void event_ebb_init(struct event *e); +void event_leader_ebb_init(struct event *e); +int count_pmc(int pmc, uint32_t sample_period); +void dump_ebb_state(void); +void dump_summary_ebb_state(void); +void dump_ebb_hw_state(void); +void clear_ebb_stats(void); +void write_pmc(int pmc, u64 value); +u64 read_pmc(int pmc); +void reset_ebb_with_clear_mask(unsigned long mmcr0_clear_mask); +void reset_ebb(void); +int ebb_check_mmcr0(void); + +extern u64 sample_period; + +int core_busy_loop(void); +int core_busy_loop_with_freeze(void); +int ebb_child(union pipe read_pipe, union pipe write_pipe); +int catch_sigill(void (*func)(void)); +void write_pmc1(void); + +#endif /* _SELFTESTS_POWERPC_PMU_EBB_EBB_H */ diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_handler.S b/tools/testing/selftests/powerpc/pmu/ebb/ebb_handler.S new file mode 100644 index 00000000000..14274ea206e --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_handler.S @@ -0,0 +1,365 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#include <ppc-asm.h> +#include "reg.h" + + +/* ppc-asm.h defines most of the reg aliases, but not r1/r2. */ +#define r1 1 +#define r2 2 + +#define RFEBB .long 0x4c000924 + +/* Stack layout: + * + * ^ + * User stack | + * Back chain ------+ <- r1 <-------+ + * ... | + * Red zone / ABI Gap | + * ... | + * vr63 <+ | + * vr0 | | + * VSCR | | + * FSCR | | + * r31 | Save area | + * r0 | | + * XER | | + * CTR | | + * LR | | + * CCR <+ | + * ... <+ | + * LR | Caller frame | + * CCR | | + * Back chain <+ <- updated r1 --------+ + * + */ + +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define ABIGAP 512 +#else +#define ABIGAP 288 +#endif + +#define NR_GPR 32 +#define NR_SPR 6 +#define NR_VSR 64 + +#define SAVE_AREA ((NR_GPR + NR_SPR) * 8 + (NR_VSR * 16)) +#define CALLER_FRAME 112 + +#define STACK_FRAME (ABIGAP + SAVE_AREA + CALLER_FRAME) + +#define CCR_SAVE (CALLER_FRAME) +#define LR_SAVE (CCR_SAVE + 8) +#define CTR_SAVE (LR_SAVE + 8) +#define XER_SAVE (CTR_SAVE + 8) +#define GPR_SAVE(n) (XER_SAVE + 8 + (8 * n)) +#define FSCR_SAVE (GPR_SAVE(31) + 8) +#define VSCR_SAVE (FSCR_SAVE + 8) +#define VSR_SAVE(n) (VSCR_SAVE + 8 + (16 * n)) + +#define SAVE_GPR(n) std n,GPR_SAVE(n)(r1) +#define REST_GPR(n) ld n,GPR_SAVE(n)(r1) +#define TRASH_GPR(n) lis n,0xaaaa + +#define SAVE_VSR(n, b) li b, VSR_SAVE(n); stxvd2x n,b,r1 +#define LOAD_VSR(n, b) li b, VSR_SAVE(n); lxvd2x n,b,r1 + +#define LOAD_REG_IMMEDIATE(reg,expr) \ + lis reg,(expr)@highest; \ + ori reg,reg,(expr)@higher; \ + rldicr reg,reg,32,31; \ + oris reg,reg,(expr)@h; \ + ori reg,reg,(expr)@l; + + +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define ENTRY_POINT(name) \ + .type FUNC_NAME(name),@function; \ + .globl FUNC_NAME(name); \ + FUNC_NAME(name): + +#define RESTORE_TOC(name) \ + /* Restore our TOC pointer using our entry point */ \ + LOAD_REG_IMMEDIATE(r12, name) \ +0: addis r2,r12,(.TOC.-0b)@ha; \ + addi r2,r2,(.TOC.-0b)@l; + +#else +#define ENTRY_POINT(name) FUNC_START(name) +#define RESTORE_TOC(name) \ + /* Restore our TOC pointer via our opd entry */ \ + LOAD_REG_IMMEDIATE(r2, name) \ + ld r2,8(r2); +#endif + + .text + +ENTRY_POINT(ebb_handler) + stdu r1,-STACK_FRAME(r1) + SAVE_GPR(0) + mflr r0 + std r0,LR_SAVE(r1) + mfcr r0 + std r0,CCR_SAVE(r1) + mfctr r0 + std r0,CTR_SAVE(r1) + mfxer r0 + std r0,XER_SAVE(r1) + SAVE_GPR(2) + SAVE_GPR(3) + SAVE_GPR(4) + SAVE_GPR(5) + SAVE_GPR(6) + SAVE_GPR(7) + SAVE_GPR(8) + SAVE_GPR(9) + SAVE_GPR(10) + SAVE_GPR(11) + SAVE_GPR(12) + SAVE_GPR(13) + SAVE_GPR(14) + SAVE_GPR(15) + SAVE_GPR(16) + SAVE_GPR(17) + SAVE_GPR(18) + SAVE_GPR(19) + SAVE_GPR(20) + SAVE_GPR(21) + SAVE_GPR(22) + SAVE_GPR(23) + SAVE_GPR(24) + SAVE_GPR(25) + SAVE_GPR(26) + SAVE_GPR(27) + SAVE_GPR(28) + SAVE_GPR(29) + SAVE_GPR(30) + SAVE_GPR(31) + SAVE_VSR(0, r3) + mffs f0 + stfd f0, FSCR_SAVE(r1) + mfvscr f0 + stfd f0, VSCR_SAVE(r1) + SAVE_VSR(1, r3) + SAVE_VSR(2, r3) + SAVE_VSR(3, r3) + SAVE_VSR(4, r3) + SAVE_VSR(5, r3) + SAVE_VSR(6, r3) + SAVE_VSR(7, r3) + SAVE_VSR(8, r3) + SAVE_VSR(9, r3) + SAVE_VSR(10, r3) + SAVE_VSR(11, r3) + SAVE_VSR(12, r3) + SAVE_VSR(13, r3) + SAVE_VSR(14, r3) + SAVE_VSR(15, r3) + SAVE_VSR(16, r3) + SAVE_VSR(17, r3) + SAVE_VSR(18, r3) + SAVE_VSR(19, r3) + SAVE_VSR(20, r3) + SAVE_VSR(21, r3) + SAVE_VSR(22, r3) + SAVE_VSR(23, r3) + SAVE_VSR(24, r3) + SAVE_VSR(25, r3) + SAVE_VSR(26, r3) + SAVE_VSR(27, r3) + SAVE_VSR(28, r3) + SAVE_VSR(29, r3) + SAVE_VSR(30, r3) + SAVE_VSR(31, r3) + SAVE_VSR(32, r3) + SAVE_VSR(33, r3) + SAVE_VSR(34, r3) + SAVE_VSR(35, r3) + SAVE_VSR(36, r3) + SAVE_VSR(37, r3) + SAVE_VSR(38, r3) + SAVE_VSR(39, r3) + SAVE_VSR(40, r3) + SAVE_VSR(41, r3) + SAVE_VSR(42, r3) + SAVE_VSR(43, r3) + SAVE_VSR(44, r3) + SAVE_VSR(45, r3) + SAVE_VSR(46, r3) + SAVE_VSR(47, r3) + SAVE_VSR(48, r3) + SAVE_VSR(49, r3) + SAVE_VSR(50, r3) + SAVE_VSR(51, r3) + SAVE_VSR(52, r3) + SAVE_VSR(53, r3) + SAVE_VSR(54, r3) + SAVE_VSR(55, r3) + SAVE_VSR(56, r3) + SAVE_VSR(57, r3) + SAVE_VSR(58, r3) + SAVE_VSR(59, r3) + SAVE_VSR(60, r3) + SAVE_VSR(61, r3) + SAVE_VSR(62, r3) + SAVE_VSR(63, r3) + + TRASH_GPR(2) + TRASH_GPR(3) + TRASH_GPR(4) + TRASH_GPR(5) + TRASH_GPR(6) + TRASH_GPR(7) + TRASH_GPR(8) + TRASH_GPR(9) + TRASH_GPR(10) + TRASH_GPR(11) + TRASH_GPR(12) + TRASH_GPR(14) + TRASH_GPR(15) + TRASH_GPR(16) + TRASH_GPR(17) + TRASH_GPR(18) + TRASH_GPR(19) + TRASH_GPR(20) + TRASH_GPR(21) + TRASH_GPR(22) + TRASH_GPR(23) + TRASH_GPR(24) + TRASH_GPR(25) + TRASH_GPR(26) + TRASH_GPR(27) + TRASH_GPR(28) + TRASH_GPR(29) + TRASH_GPR(30) + TRASH_GPR(31) + + RESTORE_TOC(ebb_handler) + + /* + * r13 is our TLS pointer. We leave whatever value was in there when the + * EBB fired. That seems to be OK because once set the TLS pointer is not + * changed - but presumably that could change in future. + */ + + bl ebb_hook + nop + + /* r2 may be changed here but we don't care */ + + lfd f0, FSCR_SAVE(r1) + mtfsf 0xff,f0 + lfd f0, VSCR_SAVE(r1) + mtvscr f0 + LOAD_VSR(0, r3) + LOAD_VSR(1, r3) + LOAD_VSR(2, r3) + LOAD_VSR(3, r3) + LOAD_VSR(4, r3) + LOAD_VSR(5, r3) + LOAD_VSR(6, r3) + LOAD_VSR(7, r3) + LOAD_VSR(8, r3) + LOAD_VSR(9, r3) + LOAD_VSR(10, r3) + LOAD_VSR(11, r3) + LOAD_VSR(12, r3) + LOAD_VSR(13, r3) + LOAD_VSR(14, r3) + LOAD_VSR(15, r3) + LOAD_VSR(16, r3) + LOAD_VSR(17, r3) + LOAD_VSR(18, r3) + LOAD_VSR(19, r3) + LOAD_VSR(20, r3) + LOAD_VSR(21, r3) + LOAD_VSR(22, r3) + LOAD_VSR(23, r3) + LOAD_VSR(24, r3) + LOAD_VSR(25, r3) + LOAD_VSR(26, r3) + LOAD_VSR(27, r3) + LOAD_VSR(28, r3) + LOAD_VSR(29, r3) + LOAD_VSR(30, r3) + LOAD_VSR(31, r3) + LOAD_VSR(32, r3) + LOAD_VSR(33, r3) + LOAD_VSR(34, r3) + LOAD_VSR(35, r3) + LOAD_VSR(36, r3) + LOAD_VSR(37, r3) + LOAD_VSR(38, r3) + LOAD_VSR(39, r3) + LOAD_VSR(40, r3) + LOAD_VSR(41, r3) + LOAD_VSR(42, r3) + LOAD_VSR(43, r3) + LOAD_VSR(44, r3) + LOAD_VSR(45, r3) + LOAD_VSR(46, r3) + LOAD_VSR(47, r3) + LOAD_VSR(48, r3) + LOAD_VSR(49, r3) + LOAD_VSR(50, r3) + LOAD_VSR(51, r3) + LOAD_VSR(52, r3) + LOAD_VSR(53, r3) + LOAD_VSR(54, r3) + LOAD_VSR(55, r3) + LOAD_VSR(56, r3) + LOAD_VSR(57, r3) + LOAD_VSR(58, r3) + LOAD_VSR(59, r3) + LOAD_VSR(60, r3) + LOAD_VSR(61, r3) + LOAD_VSR(62, r3) + LOAD_VSR(63, r3) + + ld r0,XER_SAVE(r1) + mtxer r0 + ld r0,CTR_SAVE(r1) + mtctr r0 + ld r0,LR_SAVE(r1) + mtlr r0 + ld r0,CCR_SAVE(r1) + mtcr r0 + REST_GPR(0) + REST_GPR(2) + REST_GPR(3) + REST_GPR(4) + REST_GPR(5) + REST_GPR(6) + REST_GPR(7) + REST_GPR(8) + REST_GPR(9) + REST_GPR(10) + REST_GPR(11) + REST_GPR(12) + REST_GPR(13) + REST_GPR(14) + REST_GPR(15) + REST_GPR(16) + REST_GPR(17) + REST_GPR(18) + REST_GPR(19) + REST_GPR(20) + REST_GPR(21) + REST_GPR(22) + REST_GPR(23) + REST_GPR(24) + REST_GPR(25) + REST_GPR(26) + REST_GPR(27) + REST_GPR(28) + REST_GPR(29) + REST_GPR(30) + REST_GPR(31) + addi r1,r1,STACK_FRAME + RFEBB +FUNC_END(ebb_handler) diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_child_test.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_child_test.c new file mode 100644 index 00000000000..c45f948148e --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_child_test.c @@ -0,0 +1,86 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "ebb.h" + + +/* + * Tests we can setup an EBB on our child. Nothing interesting happens, because + * even though the event is enabled and running the child hasn't enabled the + * actual delivery of the EBBs. + */ + +static int victim_child(union pipe read_pipe, union pipe write_pipe) +{ + int i; + + FAIL_IF(wait_for_parent(read_pipe)); + FAIL_IF(notify_parent(write_pipe)); + + /* Parent creates EBB event */ + + FAIL_IF(wait_for_parent(read_pipe)); + FAIL_IF(notify_parent(write_pipe)); + + /* Check the EBB is enabled by writing PMC1 */ + write_pmc1(); + + /* EBB event is enabled here */ + for (i = 0; i < 1000000; i++) ; + + return 0; +} + +int ebb_on_child(void) +{ + union pipe read_pipe, write_pipe; + struct event event; + pid_t pid; + + FAIL_IF(pipe(read_pipe.fds) == -1); + FAIL_IF(pipe(write_pipe.fds) == -1); + + pid = fork(); + if (pid == 0) { + /* NB order of pipes looks reversed */ + exit(victim_child(write_pipe, read_pipe)); + } + + FAIL_IF(sync_with_child(read_pipe, write_pipe)); + + /* Child is running now */ + + event_init_named(&event, 0x1001e, "cycles"); + event_leader_ebb_init(&event); + + event.attr.exclude_kernel = 1; + event.attr.exclude_hv = 1; + event.attr.exclude_idle = 1; + + FAIL_IF(event_open_with_pid(&event, pid)); + FAIL_IF(ebb_event_enable(&event)); + + FAIL_IF(sync_with_child(read_pipe, write_pipe)); + + /* Child should just exit happily */ + FAIL_IF(wait_for_child(pid)); + + event_close(&event); + + return 0; +} + +int main(void) +{ + return test_harness(ebb_on_child, "ebb_on_child"); +} diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c new file mode 100644 index 00000000000..11acf1d55f8 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c @@ -0,0 +1,92 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "ebb.h" + + +/* + * Tests we can setup an EBB on our child. The child expects this and enables + * EBBs, which are then delivered to the child, even though the event is + * created by the parent. + */ + +static int victim_child(union pipe read_pipe, union pipe write_pipe) +{ + FAIL_IF(wait_for_parent(read_pipe)); + + /* Setup our EBB handler, before the EBB event is created */ + ebb_enable_pmc_counting(1); + setup_ebb_handler(standard_ebb_callee); + ebb_global_enable(); + + FAIL_IF(notify_parent(write_pipe)); + + while (ebb_state.stats.ebb_count < 20) { + FAIL_IF(core_busy_loop()); + } + + ebb_global_disable(); + ebb_freeze_pmcs(); + + count_pmc(1, sample_period); + + dump_ebb_state(); + + FAIL_IF(ebb_state.stats.ebb_count == 0); + + return 0; +} + +/* Tests we can setup an EBB on our child - if it's expecting it */ +int ebb_on_willing_child(void) +{ + union pipe read_pipe, write_pipe; + struct event event; + pid_t pid; + + FAIL_IF(pipe(read_pipe.fds) == -1); + FAIL_IF(pipe(write_pipe.fds) == -1); + + pid = fork(); + if (pid == 0) { + /* NB order of pipes looks reversed */ + exit(victim_child(write_pipe, read_pipe)); + } + + /* Signal the child to setup its EBB handler */ + FAIL_IF(sync_with_child(read_pipe, write_pipe)); + + /* Child is running now */ + + event_init_named(&event, 0x1001e, "cycles"); + event_leader_ebb_init(&event); + + event.attr.exclude_kernel = 1; + event.attr.exclude_hv = 1; + event.attr.exclude_idle = 1; + + FAIL_IF(event_open_with_pid(&event, pid)); + FAIL_IF(ebb_event_enable(&event)); + + /* Child show now take EBBs and then exit */ + FAIL_IF(wait_for_child(pid)); + + event_close(&event); + + return 0; +} + +int main(void) +{ + return test_harness(ebb_on_willing_child, "ebb_on_willing_child"); +} diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c new file mode 100644 index 00000000000..be4dd5a4e98 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c @@ -0,0 +1,86 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "ebb.h" + + +/* + * Tests an EBB vs a cpu event - in that order. The EBB should force the cpu + * event off the PMU. + */ + +static int setup_cpu_event(struct event *event, int cpu) +{ + event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL"); + + event->attr.exclude_kernel = 1; + event->attr.exclude_hv = 1; + event->attr.exclude_idle = 1; + + SKIP_IF(require_paranoia_below(1)); + FAIL_IF(event_open_with_cpu(event, cpu)); + FAIL_IF(event_enable(event)); + + return 0; +} + +int ebb_vs_cpu_event(void) +{ + union pipe read_pipe, write_pipe; + struct event event; + int cpu, rc; + pid_t pid; + + cpu = pick_online_cpu(); + FAIL_IF(cpu < 0); + FAIL_IF(bind_to_cpu(cpu)); + + FAIL_IF(pipe(read_pipe.fds) == -1); + FAIL_IF(pipe(write_pipe.fds) == -1); + + pid = fork(); + if (pid == 0) { + /* NB order of pipes looks reversed */ + exit(ebb_child(write_pipe, read_pipe)); + } + + /* Signal the child to install its EBB event and wait */ + FAIL_IF(sync_with_child(read_pipe, write_pipe)); + + /* Now try to install our CPU event */ + rc = setup_cpu_event(&event, cpu); + if (rc) { + kill_child_and_wait(pid); + return rc; + } + + /* Signal the child to run */ + FAIL_IF(sync_with_child(read_pipe, write_pipe)); + + /* .. and wait for it to complete */ + FAIL_IF(wait_for_child(pid)); + FAIL_IF(event_disable(&event)); + FAIL_IF(event_read(&event)); + + event_report(&event); + + /* The cpu event may have run, but we don't expect 100% */ + FAIL_IF(event.result.enabled >= event.result.running); + + return 0; +} + +int main(void) +{ + return test_harness(ebb_vs_cpu_event, "ebb_vs_cpu_event"); +} diff --git a/tools/testing/selftests/powerpc/pmu/ebb/event_attributes_test.c b/tools/testing/selftests/powerpc/pmu/ebb/event_attributes_test.c new file mode 100644 index 00000000000..7e78153f08e --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/event_attributes_test.c @@ -0,0 +1,131 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "ebb.h" + + +/* + * Test various attributes of the EBB event are enforced. + */ +int event_attributes(void) +{ + struct event event, leader; + + event_init(&event, 0x1001e); + event_leader_ebb_init(&event); + /* Expected to succeed */ + FAIL_IF(event_open(&event)); + event_close(&event); + + + event_init(&event, 0x001e); /* CYCLES - no PMC specified */ + event_leader_ebb_init(&event); + /* Expected to fail, no PMC specified */ + FAIL_IF(event_open(&event) == 0); + + + event_init(&event, 0x2001e); + event_leader_ebb_init(&event); + event.attr.exclusive = 0; + /* Expected to fail, not exclusive */ + FAIL_IF(event_open(&event) == 0); + + + event_init(&event, 0x3001e); + event_leader_ebb_init(&event); + event.attr.freq = 1; + /* Expected to fail, sets freq */ + FAIL_IF(event_open(&event) == 0); + + + event_init(&event, 0x4001e); + event_leader_ebb_init(&event); + event.attr.sample_period = 1; + /* Expected to fail, sets sample_period */ + FAIL_IF(event_open(&event) == 0); + + + event_init(&event, 0x1001e); + event_leader_ebb_init(&event); + event.attr.enable_on_exec = 1; + /* Expected to fail, sets enable_on_exec */ + FAIL_IF(event_open(&event) == 0); + + + event_init(&event, 0x1001e); + event_leader_ebb_init(&event); + event.attr.inherit = 1; + /* Expected to fail, sets inherit */ + FAIL_IF(event_open(&event) == 0); + + + event_init(&leader, 0x1001e); + event_leader_ebb_init(&leader); + FAIL_IF(event_open(&leader)); + + event_init(&event, 0x20002); + event_ebb_init(&event); + + /* Expected to succeed */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + event_close(&leader); + event_close(&event); + + + event_init(&leader, 0x1001e); + event_leader_ebb_init(&leader); + FAIL_IF(event_open(&leader)); + + event_init(&event, 0x20002); + + /* Expected to fail, event doesn't request EBB, leader does */ + FAIL_IF(event_open_with_group(&event, leader.fd) == 0); + event_close(&leader); + + + event_init(&leader, 0x1001e); + event_leader_ebb_init(&leader); + /* Clear the EBB flag */ + leader.attr.config &= ~(1ull << 63); + + FAIL_IF(event_open(&leader)); + + event_init(&event, 0x20002); + event_ebb_init(&event); + + /* Expected to fail, leader doesn't request EBB */ + FAIL_IF(event_open_with_group(&event, leader.fd) == 0); + event_close(&leader); + + + event_init(&leader, 0x1001e); + event_leader_ebb_init(&leader); + leader.attr.exclusive = 0; + /* Expected to fail, leader isn't exclusive */ + FAIL_IF(event_open(&leader) == 0); + + + event_init(&leader, 0x1001e); + event_leader_ebb_init(&leader); + leader.attr.pinned = 0; + /* Expected to fail, leader isn't pinned */ + FAIL_IF(event_open(&leader) == 0); + + event_init(&event, 0x1001e); + event_leader_ebb_init(&event); + /* Expected to fail, not a task event */ + SKIP_IF(require_paranoia_below(1)); + FAIL_IF(event_open_with_cpu(&event, 0) == 0); + + return 0; +} + +int main(void) +{ + return test_harness(event_attributes, "event_attributes"); +} diff --git a/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S b/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S new file mode 100644 index 00000000000..b866a0581d3 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S @@ -0,0 +1,43 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#include <ppc-asm.h> + + .text + +FUNC_START(thirty_two_instruction_loop) + cmpwi r3,0 + beqlr + addi r4,r3,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 # 28 addi's + subi r3,r3,1 + b FUNC_NAME(thirty_two_instruction_loop) +FUNC_END(thirty_two_instruction_loop) diff --git a/tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c b/tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c new file mode 100644 index 00000000000..9e7af6e7662 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c @@ -0,0 +1,79 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> +#include <setjmp.h> +#include <signal.h> + +#include "ebb.h" + + +/* + * Test that a fork clears the PMU state of the child. eg. BESCR/EBBHR/EBBRR + * are cleared, and MMCR0_PMCC is reset, preventing the child from accessing + * the PMU. + */ + +static struct event event; + +static int child(void) +{ + /* Even though we have EBE=0 we can still see the EBB regs */ + FAIL_IF(mfspr(SPRN_BESCR) != 0); + FAIL_IF(mfspr(SPRN_EBBHR) != 0); + FAIL_IF(mfspr(SPRN_EBBRR) != 0); + + FAIL_IF(catch_sigill(write_pmc1)); + + /* We can still read from the event, though it is on our parent */ + FAIL_IF(event_read(&event)); + + return 0; +} + +/* Tests that fork clears EBB state */ +int fork_cleanup(void) +{ + pid_t pid; + + event_init_named(&event, 0x1001e, "cycles"); + event_leader_ebb_init(&event); + + FAIL_IF(event_open(&event)); + + ebb_enable_pmc_counting(1); + setup_ebb_handler(standard_ebb_callee); + ebb_global_enable(); + + FAIL_IF(ebb_event_enable(&event)); + + mtspr(SPRN_MMCR0, MMCR0_FC); + mtspr(SPRN_PMC1, pmc_sample_period(sample_period)); + + /* Don't need to actually take any EBBs */ + + pid = fork(); + if (pid == 0) + exit(child()); + + /* Child does the actual testing */ + FAIL_IF(wait_for_child(pid)); + + /* After fork */ + event_close(&event); + + return 0; +} + +int main(void) +{ + return test_harness(fork_cleanup, "fork_cleanup"); +} diff --git a/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c b/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c new file mode 100644 index 00000000000..f8190fa2959 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c @@ -0,0 +1,164 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#define _GNU_SOURCE + +#include <stdio.h> +#include <stdbool.h> +#include <string.h> +#include <sys/prctl.h> + +#include "ebb.h" + + +/* + * Run a calibrated instruction loop and count instructions executed using + * EBBs. Make sure the counts look right. + */ + +extern void thirty_two_instruction_loop(uint64_t loops); + +static bool counters_frozen = true; + +static int do_count_loop(struct event *event, uint64_t instructions, + uint64_t overhead, bool report) +{ + int64_t difference, expected; + double percentage; + + clear_ebb_stats(); + + counters_frozen = false; + mb(); + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC); + + thirty_two_instruction_loop(instructions >> 5); + + counters_frozen = true; + mb(); + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC); + + count_pmc(4, sample_period); + + event->result.value = ebb_state.stats.pmc_count[4-1]; + expected = instructions + overhead; + difference = event->result.value - expected; + percentage = (double)difference / event->result.value * 100; + + if (report) { + printf("Looped for %lu instructions, overhead %lu\n", instructions, overhead); + printf("Expected %lu\n", expected); + printf("Actual %llu\n", event->result.value); + printf("Error %ld, %f%%\n", difference, percentage); + printf("Took %d EBBs\n", ebb_state.stats.ebb_count); + } + + if (difference < 0) + difference = -difference; + + /* Tolerate a difference of up to 0.0001 % */ + difference *= 10000 * 100; + if (difference / event->result.value) + return -1; + + return 0; +} + +/* Count how many instructions it takes to do a null loop */ +static uint64_t determine_overhead(struct event *event) +{ + uint64_t current, overhead; + int i; + + do_count_loop(event, 0, 0, false); + overhead = event->result.value; + + for (i = 0; i < 100; i++) { + do_count_loop(event, 0, 0, false); + current = event->result.value; + if (current < overhead) { + printf("Replacing overhead %lu with %lu\n", overhead, current); + overhead = current; + } + } + + return overhead; +} + +static void pmc4_ebb_callee(void) +{ + uint64_t val; + + val = mfspr(SPRN_BESCR); + if (!(val & BESCR_PMEO)) { + ebb_state.stats.spurious++; + goto out; + } + + ebb_state.stats.ebb_count++; + count_pmc(4, sample_period); +out: + if (counters_frozen) + reset_ebb_with_clear_mask(MMCR0_PMAO); + else + reset_ebb(); +} + +int instruction_count(void) +{ + struct event event; + uint64_t overhead; + + event_init_named(&event, 0x400FA, "PM_RUN_INST_CMPL"); + event_leader_ebb_init(&event); + event.attr.exclude_kernel = 1; + event.attr.exclude_hv = 1; + event.attr.exclude_idle = 1; + + FAIL_IF(event_open(&event)); + FAIL_IF(ebb_event_enable(&event)); + + sample_period = COUNTER_OVERFLOW; + + setup_ebb_handler(pmc4_ebb_callee); + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC); + ebb_global_enable(); + + overhead = determine_overhead(&event); + printf("Overhead of null loop: %lu instructions\n", overhead); + + /* Run for 1M instructions */ + FAIL_IF(do_count_loop(&event, 0x100000, overhead, true)); + + /* Run for 10M instructions */ + FAIL_IF(do_count_loop(&event, 0xa00000, overhead, true)); + + /* Run for 100M instructions */ + FAIL_IF(do_count_loop(&event, 0x6400000, overhead, true)); + + /* Run for 1G instructions */ + FAIL_IF(do_count_loop(&event, 0x40000000, overhead, true)); + + /* Run for 16G instructions */ + FAIL_IF(do_count_loop(&event, 0x400000000, overhead, true)); + + /* Run for 64G instructions */ + FAIL_IF(do_count_loop(&event, 0x1000000000, overhead, true)); + + /* Run for 128G instructions */ + FAIL_IF(do_count_loop(&event, 0x2000000000, overhead, true)); + + ebb_global_disable(); + event_close(&event); + + printf("Finished OK\n"); + + return 0; +} + +int main(void) +{ + return test_harness(instruction_count, "instruction_count"); +} diff --git a/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c new file mode 100644 index 00000000000..0c9dd9b2e39 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c @@ -0,0 +1,100 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> + +#include "ebb.h" + + +/* + * Test that tries to trigger CPU_FTR_PMAO_BUG. Which is a hardware defect + * where an exception triggers but we context switch before it is delivered and + * lose the exception. + */ + +static int test_body(void) +{ + int i, orig_period, max_period; + struct event event; + + /* We use PMC4 to make sure the kernel switches all counters correctly */ + event_init_named(&event, 0x40002, "instructions"); + event_leader_ebb_init(&event); + + event.attr.exclude_kernel = 1; + event.attr.exclude_hv = 1; + event.attr.exclude_idle = 1; + + FAIL_IF(event_open(&event)); + + ebb_enable_pmc_counting(4); + setup_ebb_handler(standard_ebb_callee); + ebb_global_enable(); + FAIL_IF(ebb_event_enable(&event)); + + /* + * We want a low sample period, but we also want to get out of the EBB + * handler without tripping up again. + * + * This value picked after much experimentation. + */ + orig_period = max_period = sample_period = 400; + + mtspr(SPRN_PMC4, pmc_sample_period(sample_period)); + + while (ebb_state.stats.ebb_count < 1000000) { + /* + * We are trying to get the EBB exception to race exactly with + * us entering the kernel to do the syscall. We then need the + * kernel to decide our timeslice is up and context switch to + * the other thread. When we come back our EBB will have been + * lost and we'll spin in this while loop forever. + */ + + for (i = 0; i < 100000; i++) + sched_yield(); + + /* Change the sample period slightly to try and hit the race */ + if (sample_period >= (orig_period + 200)) + sample_period = orig_period; + else + sample_period++; + + if (sample_period > max_period) + max_period = sample_period; + } + + ebb_freeze_pmcs(); + ebb_global_disable(); + + count_pmc(4, sample_period); + mtspr(SPRN_PMC4, 0xdead); + + dump_summary_ebb_state(); + dump_ebb_hw_state(); + + event_close(&event); + + FAIL_IF(ebb_state.stats.ebb_count == 0); + + /* We vary our sample period so we need extra fudge here */ + FAIL_IF(!ebb_check_count(4, orig_period, 2 * (max_period - orig_period))); + + return 0; +} + +static int lost_exception(void) +{ + return eat_cpu(test_body); +} + +int main(void) +{ + return test_harness(lost_exception, "lost_exception"); +} diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c new file mode 100644 index 00000000000..67d78af3284 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c @@ -0,0 +1,91 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/ioctl.h> + +#include "ebb.h" + + +/* + * Test counting multiple events using EBBs. + */ +int multi_counter(void) +{ + struct event events[6]; + int i, group_fd; + + event_init_named(&events[0], 0x1001C, "PM_CMPLU_STALL_THRD"); + event_init_named(&events[1], 0x2D016, "PM_CMPLU_STALL_FXU"); + event_init_named(&events[2], 0x30006, "PM_CMPLU_STALL_OTHER_CMPL"); + event_init_named(&events[3], 0x4000A, "PM_CMPLU_STALL"); + event_init_named(&events[4], 0x600f4, "PM_RUN_CYC"); + event_init_named(&events[5], 0x500fa, "PM_RUN_INST_CMPL"); + + event_leader_ebb_init(&events[0]); + for (i = 1; i < 6; i++) + event_ebb_init(&events[i]); + + group_fd = -1; + for (i = 0; i < 6; i++) { + events[i].attr.exclude_kernel = 1; + events[i].attr.exclude_hv = 1; + events[i].attr.exclude_idle = 1; + + FAIL_IF(event_open_with_group(&events[i], group_fd)); + if (group_fd == -1) + group_fd = events[0].fd; + } + + ebb_enable_pmc_counting(1); + ebb_enable_pmc_counting(2); + ebb_enable_pmc_counting(3); + ebb_enable_pmc_counting(4); + ebb_enable_pmc_counting(5); + ebb_enable_pmc_counting(6); + setup_ebb_handler(standard_ebb_callee); + + FAIL_IF(ioctl(events[0].fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP)); + FAIL_IF(event_read(&events[0])); + + ebb_global_enable(); + + mtspr(SPRN_PMC1, pmc_sample_period(sample_period)); + mtspr(SPRN_PMC2, pmc_sample_period(sample_period)); + mtspr(SPRN_PMC3, pmc_sample_period(sample_period)); + mtspr(SPRN_PMC4, pmc_sample_period(sample_period)); + mtspr(SPRN_PMC5, pmc_sample_period(sample_period)); + mtspr(SPRN_PMC6, pmc_sample_period(sample_period)); + + while (ebb_state.stats.ebb_count < 50) { + FAIL_IF(core_busy_loop()); + FAIL_IF(ebb_check_mmcr0()); + } + + ebb_global_disable(); + ebb_freeze_pmcs(); + + count_pmc(1, sample_period); + count_pmc(2, sample_period); + count_pmc(3, sample_period); + count_pmc(4, sample_period); + count_pmc(5, sample_period); + count_pmc(6, sample_period); + + dump_ebb_state(); + + for (i = 0; i < 6; i++) + event_close(&events[i]); + + FAIL_IF(ebb_state.stats.ebb_count == 0); + + return 0; +} + +int main(void) +{ + return test_harness(multi_counter, "multi_counter"); +} diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c new file mode 100644 index 00000000000..b8dc371f933 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c @@ -0,0 +1,109 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> + +#include "ebb.h" + + +/* + * Test running multiple EBB using processes at once on a single CPU. They + * should all run happily without interfering with each other. + */ + +static bool child_should_exit; + +static void sigint_handler(int signal) +{ + child_should_exit = true; +} + +struct sigaction sigint_action = { + .sa_handler = sigint_handler, +}; + +static int cycles_child(void) +{ + struct event event; + + if (sigaction(SIGINT, &sigint_action, NULL)) { + perror("sigaction"); + return 1; + } + + event_init_named(&event, 0x1001e, "cycles"); + event_leader_ebb_init(&event); + + event.attr.exclude_kernel = 1; + event.attr.exclude_hv = 1; + event.attr.exclude_idle = 1; + + FAIL_IF(event_open(&event)); + + ebb_enable_pmc_counting(1); + setup_ebb_handler(standard_ebb_callee); + ebb_global_enable(); + + FAIL_IF(ebb_event_enable(&event)); + + mtspr(SPRN_PMC1, pmc_sample_period(sample_period)); + + while (!child_should_exit) { + FAIL_IF(core_busy_loop()); + FAIL_IF(ebb_check_mmcr0()); + } + + ebb_global_disable(); + ebb_freeze_pmcs(); + + count_pmc(1, sample_period); + + dump_summary_ebb_state(); + + event_close(&event); + + FAIL_IF(ebb_state.stats.ebb_count == 0); + + return 0; +} + +#define NR_CHILDREN 4 + +int multi_ebb_procs(void) +{ + pid_t pids[NR_CHILDREN]; + int cpu, rc, i; + + cpu = pick_online_cpu(); + FAIL_IF(cpu < 0); + FAIL_IF(bind_to_cpu(cpu)); + + for (i = 0; i < NR_CHILDREN; i++) { + pids[i] = fork(); + if (pids[i] == 0) + exit(cycles_child()); + } + + /* Have them all run for "a while" */ + sleep(10); + + rc = 0; + for (i = 0; i < NR_CHILDREN; i++) { + /* Tell them to stop */ + kill(pids[i], SIGINT); + /* And wait */ + rc |= wait_for_child(pids[i]); + } + + return rc; +} + +int main(void) +{ + return test_harness(multi_ebb_procs, "multi_ebb_procs"); +} diff --git a/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c new file mode 100644 index 00000000000..2f9bf8edfa6 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c @@ -0,0 +1,61 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <setjmp.h> +#include <signal.h> + +#include "ebb.h" + + +/* Test that things work sanely if we have no handler */ + +static int no_handler_test(void) +{ + struct event event; + u64 val; + int i; + + event_init_named(&event, 0x1001e, "cycles"); + event_leader_ebb_init(&event); + + event.attr.exclude_kernel = 1; + event.attr.exclude_hv = 1; + event.attr.exclude_idle = 1; + + FAIL_IF(event_open(&event)); + FAIL_IF(ebb_event_enable(&event)); + + val = mfspr(SPRN_EBBHR); + FAIL_IF(val != 0); + + /* Make sure it overflows quickly */ + sample_period = 1000; + mtspr(SPRN_PMC1, pmc_sample_period(sample_period)); + + /* Spin to make sure the event has time to overflow */ + for (i = 0; i < 1000; i++) + mb(); + + dump_ebb_state(); + + /* We expect to see the PMU frozen & PMAO set */ + val = mfspr(SPRN_MMCR0); + FAIL_IF(val != 0x0000000080000080); + + event_close(&event); + + dump_ebb_state(); + + /* The real test is that we never took an EBB at 0x0 */ + + return 0; +} + +int main(void) +{ + return test_harness(no_handler_test,"no_handler_test"); +} diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c new file mode 100644 index 00000000000..986500fd213 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c @@ -0,0 +1,106 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#include <sched.h> +#include <signal.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> + +#include "ebb.h" + + +/* + * Test that the kernel properly handles PMAE across context switches. + * + * We test this by calling into the kernel inside our EBB handler, where PMAE + * is clear. A cpu eater companion thread is running on the same CPU as us to + * encourage the scheduler to switch us. + * + * The kernel must make sure that when it context switches us back in, it + * honours the fact that we had PMAE clear. + * + * Observed to hit the failing case on the first EBB with a broken kernel. + */ + +static bool mmcr0_mismatch; +static uint64_t before, after; + +static void syscall_ebb_callee(void) +{ + uint64_t val; + + val = mfspr(SPRN_BESCR); + if (!(val & BESCR_PMEO)) { + ebb_state.stats.spurious++; + goto out; + } + + ebb_state.stats.ebb_count++; + count_pmc(1, sample_period); + + before = mfspr(SPRN_MMCR0); + + /* Try and get ourselves scheduled, to force a PMU context switch */ + sched_yield(); + + after = mfspr(SPRN_MMCR0); + if (before != after) + mmcr0_mismatch = true; + +out: + reset_ebb(); +} + +static int test_body(void) +{ + struct event event; + + event_init_named(&event, 0x1001e, "cycles"); + event_leader_ebb_init(&event); + + event.attr.exclude_kernel = 1; + event.attr.exclude_hv = 1; + event.attr.exclude_idle = 1; + + FAIL_IF(event_open(&event)); + + setup_ebb_handler(syscall_ebb_callee); + ebb_global_enable(); + + FAIL_IF(ebb_event_enable(&event)); + + mtspr(SPRN_PMC1, pmc_sample_period(sample_period)); + + while (ebb_state.stats.ebb_count < 20 && !mmcr0_mismatch) + FAIL_IF(core_busy_loop()); + + ebb_global_disable(); + ebb_freeze_pmcs(); + + count_pmc(1, sample_period); + + dump_ebb_state(); + + if (mmcr0_mismatch) + printf("Saw MMCR0 before 0x%lx after 0x%lx\n", before, after); + + event_close(&event); + + FAIL_IF(ebb_state.stats.ebb_count == 0); + FAIL_IF(mmcr0_mismatch); + + return 0; +} + +int pmae_handling(void) +{ + return eat_cpu(test_body); +} + +int main(void) +{ + return test_harness(pmae_handling, "pmae_handling"); +} diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c new file mode 100644 index 00000000000..a503fa70c95 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c @@ -0,0 +1,93 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "ebb.h" + + +/* + * Test that PMC5 & 6 are frozen (ie. don't overflow) when they are not being + * used. Tests the MMCR0_FC56 logic in the kernel. + */ + +static int pmc56_overflowed; + +static void ebb_callee(void) +{ + uint64_t val; + + val = mfspr(SPRN_BESCR); + if (!(val & BESCR_PMEO)) { + ebb_state.stats.spurious++; + goto out; + } + + ebb_state.stats.ebb_count++; + count_pmc(2, sample_period); + + val = mfspr(SPRN_PMC5); + if (val >= COUNTER_OVERFLOW) + pmc56_overflowed++; + + count_pmc(5, COUNTER_OVERFLOW); + + val = mfspr(SPRN_PMC6); + if (val >= COUNTER_OVERFLOW) + pmc56_overflowed++; + + count_pmc(6, COUNTER_OVERFLOW); + +out: + reset_ebb(); +} + +int pmc56_overflow(void) +{ + struct event event; + + /* Use PMC2 so we set PMCjCE, which enables PMC5/6 */ + event_init(&event, 0x2001e); + event_leader_ebb_init(&event); + + event.attr.exclude_kernel = 1; + event.attr.exclude_hv = 1; + event.attr.exclude_idle = 1; + + FAIL_IF(event_open(&event)); + + setup_ebb_handler(ebb_callee); + ebb_global_enable(); + + FAIL_IF(ebb_event_enable(&event)); + + mtspr(SPRN_PMC1, pmc_sample_period(sample_period)); + mtspr(SPRN_PMC5, 0); + mtspr(SPRN_PMC6, 0); + + while (ebb_state.stats.ebb_count < 10) + FAIL_IF(core_busy_loop()); + + ebb_global_disable(); + ebb_freeze_pmcs(); + + count_pmc(2, sample_period); + + dump_ebb_state(); + + printf("PMC5/6 overflow %d\n", pmc56_overflowed); + + event_close(&event); + + FAIL_IF(ebb_state.stats.ebb_count == 0 || pmc56_overflowed != 0); + + return 0; +} + +int main(void) +{ + return test_harness(pmc56_overflow, "pmc56_overflow"); +} diff --git a/tools/testing/selftests/powerpc/pmu/ebb/reg.h b/tools/testing/selftests/powerpc/pmu/ebb/reg.h new file mode 100644 index 00000000000..5921b0dfe2e --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/reg.h @@ -0,0 +1,49 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#ifndef _SELFTESTS_POWERPC_REG_H +#define _SELFTESTS_POWERPC_REG_H + +#define __stringify_1(x) #x +#define __stringify(x) __stringify_1(x) + +#define mfspr(rn) ({unsigned long rval; \ + asm volatile("mfspr %0," __stringify(rn) \ + : "=r" (rval)); rval; }) +#define mtspr(rn, v) asm volatile("mtspr " __stringify(rn) ",%0" : \ + : "r" ((unsigned long)(v)) \ + : "memory") + +#define mb() asm volatile("sync" : : : "memory"); + +#define SPRN_MMCR2 769 +#define SPRN_MMCRA 770 +#define SPRN_MMCR0 779 +#define MMCR0_PMAO 0x00000080 +#define MMCR0_PMAE 0x04000000 +#define MMCR0_FC 0x80000000 +#define SPRN_EBBHR 804 +#define SPRN_EBBRR 805 +#define SPRN_BESCR 806 /* Branch event status & control register */ +#define SPRN_BESCRS 800 /* Branch event status & control set (1 bits set to 1) */ +#define SPRN_BESCRSU 801 /* Branch event status & control set upper */ +#define SPRN_BESCRR 802 /* Branch event status & control REset (1 bits set to 0) */ +#define SPRN_BESCRRU 803 /* Branch event status & control REset upper */ + +#define BESCR_PMEO 0x1 /* PMU Event-based exception Occurred */ +#define BESCR_PME (0x1ul << 32) /* PMU Event-based exception Enable */ + +#define SPRN_PMC1 771 +#define SPRN_PMC2 772 +#define SPRN_PMC3 773 +#define SPRN_PMC4 774 +#define SPRN_PMC5 775 +#define SPRN_PMC6 776 + +#define SPRN_SIAR 780 +#define SPRN_SDAR 781 +#define SPRN_SIER 768 + +#endif /* _SELFTESTS_POWERPC_REG_H */ diff --git a/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c b/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c new file mode 100644 index 00000000000..0cae66f659a --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c @@ -0,0 +1,39 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "ebb.h" +#include "reg.h" + + +/* + * Test basic access to the EBB regs, they should be user accessible with no + * kernel interaction required. + */ +int reg_access(void) +{ + uint64_t val, expected; + + expected = 0x8000000100000000ull; + mtspr(SPRN_BESCR, expected); + val = mfspr(SPRN_BESCR); + + FAIL_IF(val != expected); + + expected = 0x0000000001000000ull; + mtspr(SPRN_EBBHR, expected); + val = mfspr(SPRN_EBBHR); + + FAIL_IF(val != expected); + + return 0; +} + +int main(void) +{ + return test_harness(reg_access, "reg_access"); +} diff --git a/tools/testing/selftests/powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c b/tools/testing/selftests/powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c new file mode 100644 index 00000000000..d56607e4ffa --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c @@ -0,0 +1,91 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "ebb.h" + + +/* + * Tests a pinned per-task event vs an EBB - in that order. The pinned per-task + * event should prevent the EBB event from being enabled. + */ + +static int setup_child_event(struct event *event, pid_t child_pid) +{ + event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL"); + + event->attr.pinned = 1; + + event->attr.exclude_kernel = 1; + event->attr.exclude_hv = 1; + event->attr.exclude_idle = 1; + + FAIL_IF(event_open_with_pid(event, child_pid)); + FAIL_IF(event_enable(event)); + + return 0; +} + +int task_event_pinned_vs_ebb(void) +{ + union pipe read_pipe, write_pipe; + struct event event; + pid_t pid; + int rc; + + FAIL_IF(pipe(read_pipe.fds) == -1); + FAIL_IF(pipe(write_pipe.fds) == -1); + + pid = fork(); + if (pid == 0) { + /* NB order of pipes looks reversed */ + exit(ebb_child(write_pipe, read_pipe)); + } + + /* We setup the task event first */ + rc = setup_child_event(&event, pid); + if (rc) { + kill_child_and_wait(pid); + return rc; + } + + /* Signal the child to install its EBB event and wait */ + if (sync_with_child(read_pipe, write_pipe)) + /* If it fails, wait for it to exit */ + goto wait; + + /* Signal the child to run */ + FAIL_IF(sync_with_child(read_pipe, write_pipe)); + +wait: + /* We expect it to fail to read the event */ + FAIL_IF(wait_for_child(pid) != 2); + FAIL_IF(event_disable(&event)); + FAIL_IF(event_read(&event)); + + event_report(&event); + + FAIL_IF(event.result.value == 0); + /* + * For reasons I don't understand enabled is usually just slightly + * lower than running. Would be good to confirm why. + */ + FAIL_IF(event.result.enabled == 0); + FAIL_IF(event.result.running == 0); + + return 0; +} + +int main(void) +{ + return test_harness(task_event_pinned_vs_ebb, "task_event_pinned_vs_ebb"); +} diff --git a/tools/testing/selftests/powerpc/pmu/ebb/task_event_vs_ebb_test.c b/tools/testing/selftests/powerpc/pmu/ebb/task_event_vs_ebb_test.c new file mode 100644 index 00000000000..eba32196dbb --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/task_event_vs_ebb_test.c @@ -0,0 +1,83 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "ebb.h" + + +/* + * Tests a per-task event vs an EBB - in that order. The EBB should push the + * per-task event off the PMU. + */ + +static int setup_child_event(struct event *event, pid_t child_pid) +{ + event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL"); + + event->attr.exclude_kernel = 1; + event->attr.exclude_hv = 1; + event->attr.exclude_idle = 1; + + FAIL_IF(event_open_with_pid(event, child_pid)); + FAIL_IF(event_enable(event)); + + return 0; +} + +int task_event_vs_ebb(void) +{ + union pipe read_pipe, write_pipe; + struct event event; + pid_t pid; + int rc; + + FAIL_IF(pipe(read_pipe.fds) == -1); + FAIL_IF(pipe(write_pipe.fds) == -1); + + pid = fork(); + if (pid == 0) { + /* NB order of pipes looks reversed */ + exit(ebb_child(write_pipe, read_pipe)); + } + + /* We setup the task event first */ + rc = setup_child_event(&event, pid); + if (rc) { + kill_child_and_wait(pid); + return rc; + } + + /* Signal the child to install its EBB event and wait */ + if (sync_with_child(read_pipe, write_pipe)) + /* If it fails, wait for it to exit */ + goto wait; + + /* Signal the child to run */ + FAIL_IF(sync_with_child(read_pipe, write_pipe)); + +wait: + /* The EBB event should push the task event off so the child should succeed */ + FAIL_IF(wait_for_child(pid)); + FAIL_IF(event_disable(&event)); + FAIL_IF(event_read(&event)); + + event_report(&event); + + /* The task event may have run, or not so we can't assert anything about it */ + + return 0; +} + +int main(void) +{ + return test_harness(task_event_vs_ebb, "task_event_vs_ebb"); +} diff --git a/tools/testing/selftests/powerpc/pmu/ebb/trace.c b/tools/testing/selftests/powerpc/pmu/ebb/trace.c new file mode 100644 index 00000000000..251e66ab2aa --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/trace.c @@ -0,0 +1,300 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> + +#include "trace.h" + + +struct trace_buffer *trace_buffer_allocate(u64 size) +{ + struct trace_buffer *tb; + + if (size < sizeof(*tb)) { + fprintf(stderr, "Error: trace buffer too small\n"); + return NULL; + } + + tb = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (tb == MAP_FAILED) { + perror("mmap"); + return NULL; + } + + tb->size = size; + tb->tail = tb->data; + tb->overflow = false; + + return tb; +} + +static bool trace_check_bounds(struct trace_buffer *tb, void *p) +{ + return p < ((void *)tb + tb->size); +} + +static bool trace_check_alloc(struct trace_buffer *tb, void *p) +{ + /* + * If we ever overflowed don't allow any more input. This prevents us + * from dropping a large item and then later logging a small one. The + * buffer should just stop when overflow happened, not be patchy. If + * you're overflowing, make your buffer bigger. + */ + if (tb->overflow) + return false; + + if (!trace_check_bounds(tb, p)) { + tb->overflow = true; + return false; + } + + return true; +} + +static void *trace_alloc(struct trace_buffer *tb, int bytes) +{ + void *p, *newtail; + + p = tb->tail; + newtail = tb->tail + bytes; + if (!trace_check_alloc(tb, newtail)) + return NULL; + + tb->tail = newtail; + + return p; +} + +static struct trace_entry *trace_alloc_entry(struct trace_buffer *tb, int payload_size) +{ + struct trace_entry *e; + + e = trace_alloc(tb, sizeof(*e) + payload_size); + if (e) + e->length = payload_size; + + return e; +} + +int trace_log_reg(struct trace_buffer *tb, u64 reg, u64 value) +{ + struct trace_entry *e; + u64 *p; + + e = trace_alloc_entry(tb, sizeof(reg) + sizeof(value)); + if (!e) + return -ENOSPC; + + e->type = TRACE_TYPE_REG; + p = (u64 *)e->data; + *p++ = reg; + *p++ = value; + + return 0; +} + +int trace_log_counter(struct trace_buffer *tb, u64 value) +{ + struct trace_entry *e; + u64 *p; + + e = trace_alloc_entry(tb, sizeof(value)); + if (!e) + return -ENOSPC; + + e->type = TRACE_TYPE_COUNTER; + p = (u64 *)e->data; + *p++ = value; + + return 0; +} + +int trace_log_string(struct trace_buffer *tb, char *str) +{ + struct trace_entry *e; + char *p; + int len; + + len = strlen(str); + + /* We NULL terminate to make printing easier */ + e = trace_alloc_entry(tb, len + 1); + if (!e) + return -ENOSPC; + + e->type = TRACE_TYPE_STRING; + p = (char *)e->data; + memcpy(p, str, len); + p += len; + *p = '\0'; + + return 0; +} + +int trace_log_indent(struct trace_buffer *tb) +{ + struct trace_entry *e; + + e = trace_alloc_entry(tb, 0); + if (!e) + return -ENOSPC; + + e->type = TRACE_TYPE_INDENT; + + return 0; +} + +int trace_log_outdent(struct trace_buffer *tb) +{ + struct trace_entry *e; + + e = trace_alloc_entry(tb, 0); + if (!e) + return -ENOSPC; + + e->type = TRACE_TYPE_OUTDENT; + + return 0; +} + +static void trace_print_header(int seq, int prefix) +{ + printf("%*s[%d]: ", prefix, "", seq); +} + +static char *trace_decode_reg(int reg) +{ + switch (reg) { + case 769: return "SPRN_MMCR2"; break; + case 770: return "SPRN_MMCRA"; break; + case 779: return "SPRN_MMCR0"; break; + case 804: return "SPRN_EBBHR"; break; + case 805: return "SPRN_EBBRR"; break; + case 806: return "SPRN_BESCR"; break; + case 800: return "SPRN_BESCRS"; break; + case 801: return "SPRN_BESCRSU"; break; + case 802: return "SPRN_BESCRR"; break; + case 803: return "SPRN_BESCRRU"; break; + case 771: return "SPRN_PMC1"; break; + case 772: return "SPRN_PMC2"; break; + case 773: return "SPRN_PMC3"; break; + case 774: return "SPRN_PMC4"; break; + case 775: return "SPRN_PMC5"; break; + case 776: return "SPRN_PMC6"; break; + case 780: return "SPRN_SIAR"; break; + case 781: return "SPRN_SDAR"; break; + case 768: return "SPRN_SIER"; break; + } + + return NULL; +} + +static void trace_print_reg(struct trace_entry *e) +{ + u64 *p, *reg, *value; + char *name; + + p = (u64 *)e->data; + reg = p++; + value = p; + + name = trace_decode_reg(*reg); + if (name) + printf("register %-10s = 0x%016llx\n", name, *value); + else + printf("register %lld = 0x%016llx\n", *reg, *value); +} + +static void trace_print_counter(struct trace_entry *e) +{ + u64 *value; + + value = (u64 *)e->data; + printf("counter = %lld\n", *value); +} + +static void trace_print_string(struct trace_entry *e) +{ + char *str; + + str = (char *)e->data; + puts(str); +} + +#define BASE_PREFIX 2 +#define PREFIX_DELTA 8 + +static void trace_print_entry(struct trace_entry *e, int seq, int *prefix) +{ + switch (e->type) { + case TRACE_TYPE_REG: + trace_print_header(seq, *prefix); + trace_print_reg(e); + break; + case TRACE_TYPE_COUNTER: + trace_print_header(seq, *prefix); + trace_print_counter(e); + break; + case TRACE_TYPE_STRING: + trace_print_header(seq, *prefix); + trace_print_string(e); + break; + case TRACE_TYPE_INDENT: + trace_print_header(seq, *prefix); + puts("{"); + *prefix += PREFIX_DELTA; + break; + case TRACE_TYPE_OUTDENT: + *prefix -= PREFIX_DELTA; + if (*prefix < BASE_PREFIX) + *prefix = BASE_PREFIX; + trace_print_header(seq, *prefix); + puts("}"); + break; + default: + trace_print_header(seq, *prefix); + printf("entry @ %p type %d\n", e, e->type); + break; + } +} + +void trace_buffer_print(struct trace_buffer *tb) +{ + struct trace_entry *e; + int i, prefix; + void *p; + + printf("Trace buffer dump:\n"); + printf(" address %p \n", tb); + printf(" tail %p\n", tb->tail); + printf(" size %llu\n", tb->size); + printf(" overflow %s\n", tb->overflow ? "TRUE" : "false"); + printf(" Content:\n"); + + p = tb->data; + + i = 0; + prefix = BASE_PREFIX; + + while (trace_check_bounds(tb, p) && p < tb->tail) { + e = p; + + trace_print_entry(e, i, &prefix); + + i++; + p = (void *)e + sizeof(*e) + e->length; + } +} + +void trace_print_location(struct trace_buffer *tb) +{ + printf("Trace buffer 0x%llx bytes @ %p\n", tb->size, tb); +} diff --git a/tools/testing/selftests/powerpc/pmu/ebb/trace.h b/tools/testing/selftests/powerpc/pmu/ebb/trace.h new file mode 100644 index 00000000000..926458e28c8 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/trace.h @@ -0,0 +1,41 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#ifndef _SELFTESTS_POWERPC_PMU_EBB_TRACE_H +#define _SELFTESTS_POWERPC_PMU_EBB_TRACE_H + +#include "utils.h" + +#define TRACE_TYPE_REG 1 +#define TRACE_TYPE_COUNTER 2 +#define TRACE_TYPE_STRING 3 +#define TRACE_TYPE_INDENT 4 +#define TRACE_TYPE_OUTDENT 5 + +struct trace_entry +{ + u8 type; + u8 length; + u8 data[0]; +}; + +struct trace_buffer +{ + u64 size; + bool overflow; + void *tail; + u8 data[0]; +}; + +struct trace_buffer *trace_buffer_allocate(u64 size); +int trace_log_reg(struct trace_buffer *tb, u64 reg, u64 value); +int trace_log_counter(struct trace_buffer *tb, u64 value); +int trace_log_string(struct trace_buffer *tb, char *str); +int trace_log_indent(struct trace_buffer *tb); +int trace_log_outdent(struct trace_buffer *tb); +void trace_buffer_print(struct trace_buffer *tb); +void trace_print_location(struct trace_buffer *tb); + +#endif /* _SELFTESTS_POWERPC_PMU_EBB_TRACE_H */ diff --git a/tools/testing/selftests/powerpc/pmu/event.c b/tools/testing/selftests/powerpc/pmu/event.c index 2b2d11df245..184b36807d4 100644 --- a/tools/testing/selftests/powerpc/pmu/event.c +++ b/tools/testing/selftests/powerpc/pmu/event.c @@ -39,7 +39,13 @@ void event_init_named(struct event *e, u64 config, char *name) event_init_opts(e, config, PERF_TYPE_RAW, name); } +void event_init(struct event *e, u64 config) +{ + event_init_opts(e, config, PERF_TYPE_RAW, "event"); +} + #define PERF_CURRENT_PID 0 +#define PERF_NO_PID -1 #define PERF_NO_CPU -1 #define PERF_NO_GROUP -1 @@ -59,6 +65,16 @@ int event_open_with_group(struct event *e, int group_fd) return event_open_with_options(e, PERF_CURRENT_PID, PERF_NO_CPU, group_fd); } +int event_open_with_pid(struct event *e, pid_t pid) +{ + return event_open_with_options(e, pid, PERF_NO_CPU, PERF_NO_GROUP); +} + +int event_open_with_cpu(struct event *e, int cpu) +{ + return event_open_with_options(e, PERF_NO_PID, cpu, PERF_NO_GROUP); +} + int event_open(struct event *e) { return event_open_with_options(e, PERF_CURRENT_PID, PERF_NO_CPU, PERF_NO_GROUP); @@ -69,6 +85,16 @@ void event_close(struct event *e) close(e->fd); } +int event_enable(struct event *e) +{ + return ioctl(e->fd, PERF_EVENT_IOC_ENABLE); +} + +int event_disable(struct event *e) +{ + return ioctl(e->fd, PERF_EVENT_IOC_DISABLE); +} + int event_reset(struct event *e) { return ioctl(e->fd, PERF_EVENT_IOC_RESET); diff --git a/tools/testing/selftests/powerpc/pmu/event.h b/tools/testing/selftests/powerpc/pmu/event.h index e6993192ff3..a0ea6b1eef7 100644 --- a/tools/testing/selftests/powerpc/pmu/event.h +++ b/tools/testing/selftests/powerpc/pmu/event.h @@ -29,8 +29,12 @@ void event_init_named(struct event *e, u64 config, char *name); void event_init_opts(struct event *e, u64 config, int type, char *name); int event_open_with_options(struct event *e, pid_t pid, int cpu, int group_fd); int event_open_with_group(struct event *e, int group_fd); +int event_open_with_pid(struct event *e, pid_t pid); +int event_open_with_cpu(struct event *e, int cpu); int event_open(struct event *e); void event_close(struct event *e); +int event_enable(struct event *e); +int event_disable(struct event *e); int event_reset(struct event *e); int event_read(struct event *e); void event_report_justified(struct event *e, int name_width, int result_width); diff --git a/tools/testing/selftests/powerpc/pmu/lib.c b/tools/testing/selftests/powerpc/pmu/lib.c new file mode 100644 index 00000000000..0f6a4731d54 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/lib.c @@ -0,0 +1,252 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#define _GNU_SOURCE /* For CPU_ZERO etc. */ + +#include <errno.h> +#include <sched.h> +#include <setjmp.h> +#include <stdlib.h> +#include <sys/wait.h> + +#include "utils.h" +#include "lib.h" + + +int pick_online_cpu(void) +{ + cpu_set_t mask; + int cpu; + + CPU_ZERO(&mask); + + if (sched_getaffinity(0, sizeof(mask), &mask)) { + perror("sched_getaffinity"); + return -1; + } + + /* We prefer a primary thread, but skip 0 */ + for (cpu = 8; cpu < CPU_SETSIZE; cpu += 8) + if (CPU_ISSET(cpu, &mask)) + return cpu; + + /* Search for anything, but in reverse */ + for (cpu = CPU_SETSIZE - 1; cpu >= 0; cpu--) + if (CPU_ISSET(cpu, &mask)) + return cpu; + + printf("No cpus in affinity mask?!\n"); + return -1; +} + +int bind_to_cpu(int cpu) +{ + cpu_set_t mask; + + printf("Binding to cpu %d\n", cpu); + + CPU_ZERO(&mask); + CPU_SET(cpu, &mask); + + return sched_setaffinity(0, sizeof(mask), &mask); +} + +#define PARENT_TOKEN 0xAA +#define CHILD_TOKEN 0x55 + +int sync_with_child(union pipe read_pipe, union pipe write_pipe) +{ + char c = PARENT_TOKEN; + + FAIL_IF(write(write_pipe.write_fd, &c, 1) != 1); + FAIL_IF(read(read_pipe.read_fd, &c, 1) != 1); + if (c != CHILD_TOKEN) /* sometimes expected */ + return 1; + + return 0; +} + +int wait_for_parent(union pipe read_pipe) +{ + char c; + + FAIL_IF(read(read_pipe.read_fd, &c, 1) != 1); + FAIL_IF(c != PARENT_TOKEN); + + return 0; +} + +int notify_parent(union pipe write_pipe) +{ + char c = CHILD_TOKEN; + + FAIL_IF(write(write_pipe.write_fd, &c, 1) != 1); + + return 0; +} + +int notify_parent_of_error(union pipe write_pipe) +{ + char c = ~CHILD_TOKEN; + + FAIL_IF(write(write_pipe.write_fd, &c, 1) != 1); + + return 0; +} + +int wait_for_child(pid_t child_pid) +{ + int rc; + + if (waitpid(child_pid, &rc, 0) == -1) { + perror("waitpid"); + return 1; + } + + if (WIFEXITED(rc)) + rc = WEXITSTATUS(rc); + else + rc = 1; /* Signal or other */ + + return rc; +} + +int kill_child_and_wait(pid_t child_pid) +{ + kill(child_pid, SIGTERM); + + return wait_for_child(child_pid); +} + +static int eat_cpu_child(union pipe read_pipe, union pipe write_pipe) +{ + volatile int i = 0; + + /* + * We are just here to eat cpu and die. So make sure we can be killed, + * and also don't do any custom SIGTERM handling. + */ + signal(SIGTERM, SIG_DFL); + + notify_parent(write_pipe); + wait_for_parent(read_pipe); + + /* Soak up cpu forever */ + while (1) i++; + + return 0; +} + +pid_t eat_cpu(int (test_function)(void)) +{ + union pipe read_pipe, write_pipe; + int cpu, rc; + pid_t pid; + + cpu = pick_online_cpu(); + FAIL_IF(cpu < 0); + FAIL_IF(bind_to_cpu(cpu)); + + if (pipe(read_pipe.fds) == -1) + return -1; + + if (pipe(write_pipe.fds) == -1) + return -1; + + pid = fork(); + if (pid == 0) + exit(eat_cpu_child(write_pipe, read_pipe)); + + if (sync_with_child(read_pipe, write_pipe)) { + rc = -1; + goto out; + } + + printf("main test running as pid %d\n", getpid()); + + rc = test_function(); +out: + kill(pid, SIGKILL); + + return rc; +} + +struct addr_range libc, vdso; + +int parse_proc_maps(void) +{ + char execute, name[128]; + uint64_t start, end; + FILE *f; + int rc; + + f = fopen("/proc/self/maps", "r"); + if (!f) { + perror("fopen"); + return -1; + } + + do { + /* This skips line with no executable which is what we want */ + rc = fscanf(f, "%lx-%lx %*c%*c%c%*c %*x %*d:%*d %*d %127s\n", + &start, &end, &execute, name); + if (rc <= 0) + break; + + if (execute != 'x') + continue; + + if (strstr(name, "libc")) { + libc.first = start; + libc.last = end - 1; + } else if (strstr(name, "[vdso]")) { + vdso.first = start; + vdso.last = end - 1; + } + } while(1); + + fclose(f); + + return 0; +} + +#define PARANOID_PATH "/proc/sys/kernel/perf_event_paranoid" + +bool require_paranoia_below(int level) +{ + unsigned long current; + char *end, buf[16]; + FILE *f; + int rc; + + rc = -1; + + f = fopen(PARANOID_PATH, "r"); + if (!f) { + perror("fopen"); + goto out; + } + + if (!fgets(buf, sizeof(buf), f)) { + printf("Couldn't read " PARANOID_PATH "?\n"); + goto out_close; + } + + current = strtoul(buf, &end, 10); + + if (end == buf) { + printf("Couldn't parse " PARANOID_PATH "?\n"); + goto out_close; + } + + if (current >= level) + goto out; + + rc = 0; +out_close: + fclose(f); +out: + return rc; +} diff --git a/tools/testing/selftests/powerpc/pmu/lib.h b/tools/testing/selftests/powerpc/pmu/lib.h new file mode 100644 index 00000000000..ca5d72ae3be --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/lib.h @@ -0,0 +1,41 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#ifndef __SELFTESTS_POWERPC_PMU_LIB_H +#define __SELFTESTS_POWERPC_PMU_LIB_H + +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <unistd.h> + +union pipe { + struct { + int read_fd; + int write_fd; + }; + int fds[2]; +}; + +extern int pick_online_cpu(void); +extern int bind_to_cpu(int cpu); +extern int kill_child_and_wait(pid_t child_pid); +extern int wait_for_child(pid_t child_pid); +extern int sync_with_child(union pipe read_pipe, union pipe write_pipe); +extern int wait_for_parent(union pipe read_pipe); +extern int notify_parent(union pipe write_pipe); +extern int notify_parent_of_error(union pipe write_pipe); +extern pid_t eat_cpu(int (test_function)(void)); +extern bool require_paranoia_below(int level); + +struct addr_range { + uint64_t first, last; +}; + +extern struct addr_range libc, vdso; + +int parse_proc_maps(void); + +#endif /* __SELFTESTS_POWERPC_PMU_LIB_H */ diff --git a/tools/testing/selftests/powerpc/pmu/loop.S b/tools/testing/selftests/powerpc/pmu/loop.S index 8820e3df144..20c1f0876c4 100644 --- a/tools/testing/selftests/powerpc/pmu/loop.S +++ b/tools/testing/selftests/powerpc/pmu/loop.S @@ -3,44 +3,41 @@ * Licensed under GPLv2. */ +#include <ppc-asm.h> + .text - .global thirty_two_instruction_loop - .type .thirty_two_instruction_loop,@function - .section ".opd","aw",@progbits -thirty_two_instruction_loop: - .quad .thirty_two_instruction_loop, .TOC.@tocbase, 0 - .previous -.thirty_two_instruction_loop: - cmpwi %r3,0 +FUNC_START(thirty_two_instruction_loop) + cmpdi r3,0 beqlr - addi %r4,%r3,1 - addi %r4,%r4,1 - addi %r4,%r4,1 - addi %r4,%r4,1 - addi %r4,%r4,1 - addi %r4,%r4,1 - addi %r4,%r4,1 - addi %r4,%r4,1 - addi %r4,%r4,1 - addi %r4,%r4,1 - addi %r4,%r4,1 - addi %r4,%r4,1 - addi %r4,%r4,1 - addi %r4,%r4,1 - addi %r4,%r4,1 - addi %r4,%r4,1 - addi %r4,%r4,1 - addi %r4,%r4,1 - addi %r4,%r4,1 - addi %r4,%r4,1 - addi %r4,%r4,1 - addi %r4,%r4,1 - addi %r4,%r4,1 - addi %r4,%r4,1 - addi %r4,%r4,1 - addi %r4,%r4,1 - addi %r4,%r4,1 - addi %r4,%r4,1 # 28 addi's - subi %r3,%r3,1 - b .thirty_two_instruction_loop + addi r4,r3,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 + addi r4,r4,1 # 28 addi's + subi r3,r3,1 + b FUNC_NAME(thirty_two_instruction_loop) +FUNC_END(thirty_two_instruction_loop) diff --git a/tools/testing/selftests/powerpc/subunit.h b/tools/testing/selftests/powerpc/subunit.h index 98a22920792..9c6c4e901ab 100644 --- a/tools/testing/selftests/powerpc/subunit.h +++ b/tools/testing/selftests/powerpc/subunit.h @@ -26,6 +26,11 @@ static inline void test_error(char *name) printf("error: %s\n", name); } +static inline void test_skip(char *name) +{ + printf("skip: %s\n", name); +} + static inline void test_success(char *name) { printf("success: %s\n", name); diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile new file mode 100644 index 00000000000..51267f4184a --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -0,0 +1,15 @@ +PROGS := tm-resched-dscr + +all: $(PROGS) + +$(PROGS): + +run_tests: all + @-for PROG in $(PROGS); do \ + ./$$PROG; \ + done; + +clean: + rm -f $(PROGS) *.o + +.PHONY: all run_tests clean diff --git a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c new file mode 100644 index 00000000000..ee98e3886af --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c @@ -0,0 +1,90 @@ +/* Test context switching to see if the DSCR SPR is correctly preserved + * when within a transaction. + * + * Note: We assume that the DSCR has been left at the default value (0) + * for all CPUs. + * + * Method: + * + * Set a value into the DSCR. + * + * Start a transaction, and suspend it (*). + * + * Hard loop checking to see if the transaction has become doomed. + * + * Now that we *may* have been preempted, record the DSCR and TEXASR SPRS. + * + * If the abort was because of a context switch, check the DSCR value. + * Otherwise, try again. + * + * (*) If the transaction is not suspended we can't see the problem because + * the transaction abort handler will restore the DSCR to it's checkpointed + * value before we regain control. + */ + +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> +#include <asm/tm.h> + +#define TBEGIN ".long 0x7C00051D ;" +#define TEND ".long 0x7C00055D ;" +#define TCHECK ".long 0x7C00059C ;" +#define TSUSPEND ".long 0x7C0005DD ;" +#define TRESUME ".long 0x7C2005DD ;" +#define SPRN_TEXASR 0x82 +#define SPRN_DSCR 0x03 + +int main(void) { + uint64_t rv, dscr1 = 1, dscr2, texasr; + + printf("Check DSCR TM context switch: "); + fflush(stdout); + for (;;) { + rv = 1; + asm __volatile__ ( + /* set a known value into the DSCR */ + "ld 3, %[dscr1];" + "mtspr %[sprn_dscr], 3;" + + /* start and suspend a transaction */ + TBEGIN + "beq 1f;" + TSUSPEND + + /* hard loop until the transaction becomes doomed */ + "2: ;" + TCHECK + "bc 4, 0, 2b;" + + /* record DSCR and TEXASR */ + "mfspr 3, %[sprn_dscr];" + "std 3, %[dscr2];" + "mfspr 3, %[sprn_texasr];" + "std 3, %[texasr];" + + TRESUME + TEND + "li %[rv], 0;" + "1: ;" + : [rv]"=r"(rv), [dscr2]"=m"(dscr2), [texasr]"=m"(texasr) + : [dscr1]"m"(dscr1) + , [sprn_dscr]"i"(SPRN_DSCR), [sprn_texasr]"i"(SPRN_TEXASR) + : "memory", "r3" + ); + assert(rv); /* make sure the transaction aborted */ + if ((texasr >> 56) != TM_CAUSE_RESCHED) { + putchar('.'); + fflush(stdout); + continue; + } + if (dscr2 != dscr1) { + printf(" FAIL\n"); + exit(EXIT_FAILURE); + } else { + printf(" OK\n"); + exit(EXIT_SUCCESS); + } + } +} diff --git a/tools/testing/selftests/powerpc/utils.h b/tools/testing/selftests/powerpc/utils.h index 0de064406da..a93777ae068 100644 --- a/tools/testing/selftests/powerpc/utils.h +++ b/tools/testing/selftests/powerpc/utils.h @@ -31,6 +31,18 @@ do { \ } \ } while (0) +/* The test harness uses this, yes it's gross */ +#define MAGIC_SKIP_RETURN_VALUE 99 + +#define SKIP_IF(x) \ +do { \ + if ((x)) { \ + fprintf(stderr, \ + "[SKIP] Test skipped on line %d\n", __LINE__); \ + return MAGIC_SKIP_RETURN_VALUE; \ + } \ +} while (0) + #define _str(s) #s #define str(s) _str(s) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c86be0f983d..4b6c01b477f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1714,11 +1714,11 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) EXPORT_SYMBOL_GPL(kvm_vcpu_kick); #endif /* !CONFIG_S390 */ -bool kvm_vcpu_yield_to(struct kvm_vcpu *target) +int kvm_vcpu_yield_to(struct kvm_vcpu *target) { struct pid *pid; struct task_struct *task = NULL; - bool ret = false; + int ret = 0; rcu_read_lock(); pid = rcu_dereference(target->pid); |